]>
Commit | Line | Data |
---|---|---|
76aaf220 DV |
1 | /* |
2 | * Copyright © 2010 Daniel Vetter | |
c4ac524c | 3 | * Copyright © 2011-2014 Intel Corporation |
76aaf220 DV |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the "Software"), | |
7 | * to deal in the Software without restriction, including without limitation | |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
9 | * and/or sell copies of the Software, and to permit persons to whom the | |
10 | * Software is furnished to do so, subject to the following conditions: | |
11 | * | |
12 | * The above copyright notice and this permission notice (including the next | |
13 | * paragraph) shall be included in all copies or substantial portions of the | |
14 | * Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
22 | * IN THE SOFTWARE. | |
23 | * | |
24 | */ | |
25 | ||
aae4a3d8 CW |
26 | #include <linux/slab.h> /* fault-inject.h is not standalone! */ |
27 | ||
28 | #include <linux/fault-inject.h> | |
e007b19d | 29 | #include <linux/log2.h> |
606fec95 | 30 | #include <linux/random.h> |
0e46ce2e | 31 | #include <linux/seq_file.h> |
5bab6f60 | 32 | #include <linux/stop_machine.h> |
e007b19d | 33 | |
ed3ba079 | 34 | #include <asm/set_memory.h> |
78387745 | 35 | #include <asm/smp.h> |
ed3ba079 | 36 | |
760285e7 | 37 | #include <drm/i915_drm.h> |
e007b19d | 38 | |
df0566a6 | 39 | #include "display/intel_frontbuffer.h" |
eaf522f6 | 40 | #include "gt/intel_gt.h" |
df0566a6 | 41 | |
76aaf220 | 42 | #include "i915_drv.h" |
37d63f8f | 43 | #include "i915_scatterlist.h" |
76aaf220 | 44 | #include "i915_trace.h" |
37d63f8f | 45 | #include "i915_vgpu.h" |
76aaf220 | 46 | |
1abb70f5 | 47 | #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) |
bb8f9cff | 48 | |
4c2be3c5 CW |
49 | #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) |
50 | #define DBG(...) trace_printk(__VA_ARGS__) | |
51 | #else | |
52 | #define DBG(...) | |
53 | #endif | |
54 | ||
45f8f69a TU |
55 | /** |
56 | * DOC: Global GTT views | |
57 | * | |
58 | * Background and previous state | |
59 | * | |
60 | * Historically objects could exists (be bound) in global GTT space only as | |
61 | * singular instances with a view representing all of the object's backing pages | |
62 | * in a linear fashion. This view will be called a normal view. | |
63 | * | |
64 | * To support multiple views of the same object, where the number of mapped | |
65 | * pages is not equal to the backing store, or where the layout of the pages | |
66 | * is not linear, concept of a GGTT view was added. | |
67 | * | |
68 | * One example of an alternative view is a stereo display driven by a single | |
69 | * image. In this case we would have a framebuffer looking like this | |
70 | * (2x2 pages): | |
71 | * | |
72 | * 12 | |
73 | * 34 | |
74 | * | |
75 | * Above would represent a normal GGTT view as normally mapped for GPU or CPU | |
76 | * rendering. In contrast, fed to the display engine would be an alternative | |
77 | * view which could look something like this: | |
78 | * | |
79 | * 1212 | |
80 | * 3434 | |
81 | * | |
82 | * In this example both the size and layout of pages in the alternative view is | |
83 | * different from the normal view. | |
84 | * | |
85 | * Implementation and usage | |
86 | * | |
87 | * GGTT views are implemented using VMAs and are distinguished via enum | |
88 | * i915_ggtt_view_type and struct i915_ggtt_view. | |
89 | * | |
90 | * A new flavour of core GEM functions which work with GGTT bound objects were | |
ec7adb6e JL |
91 | * added with the _ggtt_ infix, and sometimes with _view postfix to avoid |
92 | * renaming in large amounts of code. They take the struct i915_ggtt_view | |
93 | * parameter encapsulating all metadata required to implement a view. | |
45f8f69a TU |
94 | * |
95 | * As a helper for callers which are only interested in the normal view, | |
96 | * globally const i915_ggtt_view_normal singleton instance exists. All old core | |
97 | * GEM API functions, the ones not taking the view parameter, are operating on, | |
98 | * or with the normal GGTT view. | |
99 | * | |
100 | * Code wanting to add or use a new GGTT view needs to: | |
101 | * | |
102 | * 1. Add a new enum with a suitable name. | |
103 | * 2. Extend the metadata in the i915_ggtt_view structure if required. | |
104 | * 3. Add support to i915_get_vma_pages(). | |
105 | * | |
106 | * New views are required to build a scatter-gather table from within the | |
107 | * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and | |
108 | * exists for the lifetime of an VMA. | |
109 | * | |
110 | * Core API is designed to have copy semantics which means that passed in | |
111 | * struct i915_ggtt_view does not need to be persistent (left around after | |
112 | * calling the core API functions). | |
113 | * | |
114 | */ | |
115 | ||
57a7e305 CW |
116 | #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) |
117 | ||
70b9f6f8 DV |
118 | static int |
119 | i915_get_ggtt_vma_pages(struct i915_vma *vma); | |
120 | ||
759e4a74 | 121 | static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) |
7c3f86b6 | 122 | { |
6ac689d2 | 123 | struct intel_uncore *uncore = ggtt->vm.gt->uncore; |
59ec84ec | 124 | |
ca6acc25 MK |
125 | /* |
126 | * Note that as an uncached mmio write, this will flush the | |
7c3f86b6 CW |
127 | * WCB of the writes into the GGTT before it triggers the invalidate. |
128 | */ | |
59ec84ec | 129 | intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); |
7c3f86b6 CW |
130 | } |
131 | ||
759e4a74 | 132 | static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) |
7c3f86b6 | 133 | { |
6ac689d2 | 134 | struct intel_uncore *uncore = ggtt->vm.gt->uncore; |
59ec84ec | 135 | |
759e4a74 | 136 | gen6_ggtt_invalidate(ggtt); |
59ec84ec | 137 | intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); |
7c3f86b6 CW |
138 | } |
139 | ||
759e4a74 | 140 | static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) |
7c3f86b6 CW |
141 | { |
142 | intel_gtt_chipset_flush(); | |
143 | } | |
144 | ||
549fe88b CW |
145 | static int ppgtt_bind_vma(struct i915_vma *vma, |
146 | enum i915_cache_level cache_level, | |
147 | u32 unused) | |
47552659 | 148 | { |
ff685975 | 149 | u32 pte_flags; |
549fe88b CW |
150 | int err; |
151 | ||
152 | if (!(vma->flags & I915_VMA_LOCAL_BIND)) { | |
153 | err = vma->vm->allocate_va_range(vma->vm, | |
154 | vma->node.start, vma->size); | |
155 | if (err) | |
156 | return err; | |
157 | } | |
47552659 | 158 | |
250f8c81 | 159 | /* Applicable to VLV, and gen8+ */ |
ff685975 | 160 | pte_flags = 0; |
3e977ac6 | 161 | if (i915_gem_object_is_readonly(vma->obj)) |
47552659 DV |
162 | pte_flags |= PTE_READ_ONLY; |
163 | ||
4a234c5f | 164 | vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); |
70b9f6f8 DV |
165 | |
166 | return 0; | |
47552659 DV |
167 | } |
168 | ||
169 | static void ppgtt_unbind_vma(struct i915_vma *vma) | |
170 | { | |
ff685975 | 171 | vma->vm->clear_range(vma->vm, vma->node.start, vma->size); |
47552659 | 172 | } |
6f65e29a | 173 | |
fa3f46af MA |
174 | static int ppgtt_set_pages(struct i915_vma *vma) |
175 | { | |
176 | GEM_BUG_ON(vma->pages); | |
177 | ||
178 | vma->pages = vma->obj->mm.pages; | |
179 | ||
7464284b MA |
180 | vma->page_sizes = vma->obj->mm.page_sizes; |
181 | ||
fa3f46af MA |
182 | return 0; |
183 | } | |
184 | ||
185 | static void clear_pages(struct i915_vma *vma) | |
186 | { | |
187 | GEM_BUG_ON(!vma->pages); | |
188 | ||
189 | if (vma->pages != vma->obj->mm.pages) { | |
190 | sg_free_table(vma->pages); | |
191 | kfree(vma->pages); | |
192 | } | |
193 | vma->pages = NULL; | |
7464284b MA |
194 | |
195 | memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); | |
fa3f46af MA |
196 | } |
197 | ||
daf3dc0f CW |
198 | static u64 gen8_pte_encode(dma_addr_t addr, |
199 | enum i915_cache_level level, | |
200 | u32 flags) | |
94ec8f61 | 201 | { |
25dda4da JB |
202 | gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; |
203 | ||
204 | if (unlikely(flags & PTE_READ_ONLY)) | |
205 | pte &= ~_PAGE_RW; | |
63c42e56 BW |
206 | |
207 | switch (level) { | |
208 | case I915_CACHE_NONE: | |
c095b97c | 209 | pte |= PPAT_UNCACHED; |
63c42e56 BW |
210 | break; |
211 | case I915_CACHE_WT: | |
c095b97c | 212 | pte |= PPAT_DISPLAY_ELLC; |
63c42e56 BW |
213 | break; |
214 | default: | |
c095b97c | 215 | pte |= PPAT_CACHED; |
63c42e56 BW |
216 | break; |
217 | } | |
218 | ||
94ec8f61 BW |
219 | return pte; |
220 | } | |
221 | ||
f20f272f MK |
222 | static u64 gen8_pde_encode(const dma_addr_t addr, |
223 | const enum i915_cache_level level) | |
b1fe6673 | 224 | { |
f20f272f | 225 | u64 pde = _PAGE_PRESENT | _PAGE_RW; |
b1fe6673 BW |
226 | pde |= addr; |
227 | if (level != I915_CACHE_NONE) | |
c095b97c | 228 | pde |= PPAT_CACHED_PDE; |
b1fe6673 | 229 | else |
c095b97c | 230 | pde |= PPAT_UNCACHED; |
b1fe6673 BW |
231 | return pde; |
232 | } | |
233 | ||
daf3dc0f CW |
234 | static u64 snb_pte_encode(dma_addr_t addr, |
235 | enum i915_cache_level level, | |
236 | u32 flags) | |
54d12527 | 237 | { |
4fb84d99 | 238 | gen6_pte_t pte = GEN6_PTE_VALID; |
54d12527 | 239 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
e7210c3c BW |
240 | |
241 | switch (level) { | |
350ec881 CW |
242 | case I915_CACHE_L3_LLC: |
243 | case I915_CACHE_LLC: | |
244 | pte |= GEN6_PTE_CACHE_LLC; | |
245 | break; | |
246 | case I915_CACHE_NONE: | |
247 | pte |= GEN6_PTE_UNCACHED; | |
248 | break; | |
249 | default: | |
5f77eeb0 | 250 | MISSING_CASE(level); |
350ec881 CW |
251 | } |
252 | ||
253 | return pte; | |
254 | } | |
255 | ||
daf3dc0f CW |
256 | static u64 ivb_pte_encode(dma_addr_t addr, |
257 | enum i915_cache_level level, | |
258 | u32 flags) | |
350ec881 | 259 | { |
4fb84d99 | 260 | gen6_pte_t pte = GEN6_PTE_VALID; |
350ec881 CW |
261 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
262 | ||
263 | switch (level) { | |
264 | case I915_CACHE_L3_LLC: | |
265 | pte |= GEN7_PTE_CACHE_L3_LLC; | |
e7210c3c BW |
266 | break; |
267 | case I915_CACHE_LLC: | |
268 | pte |= GEN6_PTE_CACHE_LLC; | |
269 | break; | |
270 | case I915_CACHE_NONE: | |
9119708c | 271 | pte |= GEN6_PTE_UNCACHED; |
e7210c3c BW |
272 | break; |
273 | default: | |
5f77eeb0 | 274 | MISSING_CASE(level); |
e7210c3c BW |
275 | } |
276 | ||
54d12527 BW |
277 | return pte; |
278 | } | |
279 | ||
daf3dc0f CW |
280 | static u64 byt_pte_encode(dma_addr_t addr, |
281 | enum i915_cache_level level, | |
282 | u32 flags) | |
93c34e70 | 283 | { |
4fb84d99 | 284 | gen6_pte_t pte = GEN6_PTE_VALID; |
93c34e70 KG |
285 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
286 | ||
24f3a8cf AG |
287 | if (!(flags & PTE_READ_ONLY)) |
288 | pte |= BYT_PTE_WRITEABLE; | |
93c34e70 KG |
289 | |
290 | if (level != I915_CACHE_NONE) | |
291 | pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; | |
292 | ||
293 | return pte; | |
294 | } | |
295 | ||
daf3dc0f CW |
296 | static u64 hsw_pte_encode(dma_addr_t addr, |
297 | enum i915_cache_level level, | |
298 | u32 flags) | |
9119708c | 299 | { |
4fb84d99 | 300 | gen6_pte_t pte = GEN6_PTE_VALID; |
0d8ff15e | 301 | pte |= HSW_PTE_ADDR_ENCODE(addr); |
9119708c KG |
302 | |
303 | if (level != I915_CACHE_NONE) | |
87a6b688 | 304 | pte |= HSW_WB_LLC_AGE3; |
9119708c KG |
305 | |
306 | return pte; | |
307 | } | |
308 | ||
daf3dc0f CW |
309 | static u64 iris_pte_encode(dma_addr_t addr, |
310 | enum i915_cache_level level, | |
311 | u32 flags) | |
4d15c145 | 312 | { |
4fb84d99 | 313 | gen6_pte_t pte = GEN6_PTE_VALID; |
4d15c145 BW |
314 | pte |= HSW_PTE_ADDR_ENCODE(addr); |
315 | ||
651d794f CW |
316 | switch (level) { |
317 | case I915_CACHE_NONE: | |
318 | break; | |
319 | case I915_CACHE_WT: | |
c51e9701 | 320 | pte |= HSW_WT_ELLC_LLC_AGE3; |
651d794f CW |
321 | break; |
322 | default: | |
c51e9701 | 323 | pte |= HSW_WB_ELLC_LLC_AGE3; |
651d794f CW |
324 | break; |
325 | } | |
4d15c145 BW |
326 | |
327 | return pte; | |
328 | } | |
329 | ||
63fd659f CW |
330 | static void stash_init(struct pagestash *stash) |
331 | { | |
332 | pagevec_init(&stash->pvec); | |
333 | spin_lock_init(&stash->lock); | |
334 | } | |
335 | ||
336 | static struct page *stash_pop_page(struct pagestash *stash) | |
337 | { | |
338 | struct page *page = NULL; | |
339 | ||
340 | spin_lock(&stash->lock); | |
341 | if (likely(stash->pvec.nr)) | |
342 | page = stash->pvec.pages[--stash->pvec.nr]; | |
343 | spin_unlock(&stash->lock); | |
344 | ||
345 | return page; | |
346 | } | |
347 | ||
348 | static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) | |
349 | { | |
7f5f2280 | 350 | unsigned int nr; |
63fd659f CW |
351 | |
352 | spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); | |
353 | ||
7f5f2280 | 354 | nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); |
63fd659f CW |
355 | memcpy(stash->pvec.pages + stash->pvec.nr, |
356 | pvec->pages + pvec->nr - nr, | |
357 | sizeof(pvec->pages[0]) * nr); | |
358 | stash->pvec.nr += nr; | |
359 | ||
360 | spin_unlock(&stash->lock); | |
361 | ||
362 | pvec->nr -= nr; | |
363 | } | |
364 | ||
8448661d | 365 | static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) |
678d96fb | 366 | { |
63fd659f CW |
367 | struct pagevec stack; |
368 | struct page *page; | |
678d96fb | 369 | |
8448661d CW |
370 | if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) |
371 | i915_gem_shrink_all(vm->i915); | |
aae4a3d8 | 372 | |
63fd659f CW |
373 | page = stash_pop_page(&vm->free_pages); |
374 | if (page) | |
375 | return page; | |
66df1014 CW |
376 | |
377 | if (!vm->pt_kmap_wc) | |
378 | return alloc_page(gfp); | |
379 | ||
66df1014 | 380 | /* Look in our global stash of WC pages... */ |
63fd659f CW |
381 | page = stash_pop_page(&vm->i915->mm.wc_stash); |
382 | if (page) | |
383 | return page; | |
66df1014 | 384 | |
073cd781 | 385 | /* |
63fd659f | 386 | * Otherwise batch allocate pages to amortize cost of set_pages_wc. |
073cd781 CW |
387 | * |
388 | * We have to be careful as page allocation may trigger the shrinker | |
389 | * (via direct reclaim) which will fill up the WC stash underneath us. | |
390 | * So we add our WB pages into a temporary pvec on the stack and merge | |
391 | * them into the WC stash after all the allocations are complete. | |
392 | */ | |
63fd659f | 393 | pagevec_init(&stack); |
66df1014 CW |
394 | do { |
395 | struct page *page; | |
8448661d | 396 | |
66df1014 CW |
397 | page = alloc_page(gfp); |
398 | if (unlikely(!page)) | |
399 | break; | |
400 | ||
63fd659f CW |
401 | stack.pages[stack.nr++] = page; |
402 | } while (pagevec_space(&stack)); | |
66df1014 | 403 | |
63fd659f CW |
404 | if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { |
405 | page = stack.pages[--stack.nr]; | |
8448661d | 406 | |
63fd659f | 407 | /* Merge spare WC pages to the global stash */ |
7f5f2280 CW |
408 | if (stack.nr) |
409 | stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); | |
073cd781 | 410 | |
63fd659f CW |
411 | /* Push any surplus WC pages onto the local VM stash */ |
412 | if (stack.nr) | |
413 | stash_push_pagevec(&vm->free_pages, &stack); | |
073cd781 | 414 | } |
8448661d | 415 | |
63fd659f CW |
416 | /* Return unwanted leftovers */ |
417 | if (unlikely(stack.nr)) { | |
418 | WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); | |
419 | __pagevec_release(&stack); | |
420 | } | |
421 | ||
422 | return page; | |
8448661d CW |
423 | } |
424 | ||
66df1014 CW |
425 | static void vm_free_pages_release(struct i915_address_space *vm, |
426 | bool immediate) | |
8448661d | 427 | { |
63fd659f CW |
428 | struct pagevec *pvec = &vm->free_pages.pvec; |
429 | struct pagevec stack; | |
66df1014 | 430 | |
63fd659f | 431 | lockdep_assert_held(&vm->free_pages.lock); |
66df1014 | 432 | GEM_BUG_ON(!pagevec_count(pvec)); |
8448661d | 433 | |
66df1014 | 434 | if (vm->pt_kmap_wc) { |
63fd659f CW |
435 | /* |
436 | * When we use WC, first fill up the global stash and then | |
66df1014 CW |
437 | * only if full immediately free the overflow. |
438 | */ | |
63fd659f | 439 | stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); |
8448661d | 440 | |
63fd659f CW |
441 | /* |
442 | * As we have made some room in the VM's free_pages, | |
443 | * we can wait for it to fill again. Unless we are | |
444 | * inside i915_address_space_fini() and must | |
445 | * immediately release the pages! | |
446 | */ | |
447 | if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) | |
448 | return; | |
66df1014 | 449 | |
63fd659f CW |
450 | /* |
451 | * We have to drop the lock to allow ourselves to sleep, | |
452 | * so take a copy of the pvec and clear the stash for | |
453 | * others to use it as we sleep. | |
454 | */ | |
455 | stack = *pvec; | |
456 | pagevec_reinit(pvec); | |
457 | spin_unlock(&vm->free_pages.lock); | |
458 | ||
459 | pvec = &stack; | |
66df1014 | 460 | set_pages_array_wb(pvec->pages, pvec->nr); |
63fd659f CW |
461 | |
462 | spin_lock(&vm->free_pages.lock); | |
66df1014 CW |
463 | } |
464 | ||
465 | __pagevec_release(pvec); | |
8448661d CW |
466 | } |
467 | ||
468 | static void vm_free_page(struct i915_address_space *vm, struct page *page) | |
469 | { | |
15e4cda9 CW |
470 | /* |
471 | * On !llc, we need to change the pages back to WB. We only do so | |
472 | * in bulk, so we rarely need to change the page attributes here, | |
473 | * but doing so requires a stop_machine() from deep inside arch/x86/mm. | |
474 | * To make detection of the possible sleep more likely, use an | |
475 | * unconditional might_sleep() for everybody. | |
476 | */ | |
477 | might_sleep(); | |
63fd659f | 478 | spin_lock(&vm->free_pages.lock); |
7f5f2280 | 479 | while (!pagevec_space(&vm->free_pages.pvec)) |
66df1014 | 480 | vm_free_pages_release(vm, false); |
7f5f2280 CW |
481 | GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); |
482 | pagevec_add(&vm->free_pages.pvec, page); | |
63fd659f CW |
483 | spin_unlock(&vm->free_pages.lock); |
484 | } | |
485 | ||
b32fa811 CW |
486 | static void i915_address_space_fini(struct i915_address_space *vm) |
487 | { | |
488 | spin_lock(&vm->free_pages.lock); | |
489 | if (pagevec_count(&vm->free_pages.pvec)) | |
490 | vm_free_pages_release(vm, true); | |
491 | GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); | |
492 | spin_unlock(&vm->free_pages.lock); | |
493 | ||
494 | drm_mm_takedown(&vm->mm); | |
495 | ||
496 | mutex_destroy(&vm->mutex); | |
497 | } | |
498 | ||
499 | static void ppgtt_destroy_vma(struct i915_address_space *vm) | |
500 | { | |
501 | struct list_head *phases[] = { | |
502 | &vm->bound_list, | |
503 | &vm->unbound_list, | |
504 | NULL, | |
505 | }, **phase; | |
506 | ||
507 | mutex_lock(&vm->i915->drm.struct_mutex); | |
508 | for (phase = phases; *phase; phase++) { | |
509 | struct i915_vma *vma, *vn; | |
510 | ||
511 | list_for_each_entry_safe(vma, vn, *phase, vm_link) | |
512 | i915_vma_destroy(vma); | |
513 | } | |
514 | mutex_unlock(&vm->i915->drm.struct_mutex); | |
515 | } | |
516 | ||
517 | static void __i915_vm_release(struct work_struct *work) | |
518 | { | |
519 | struct i915_address_space *vm = | |
520 | container_of(work, struct i915_address_space, rcu.work); | |
521 | ||
522 | ppgtt_destroy_vma(vm); | |
523 | ||
524 | GEM_BUG_ON(!list_empty(&vm->bound_list)); | |
525 | GEM_BUG_ON(!list_empty(&vm->unbound_list)); | |
526 | ||
527 | vm->cleanup(vm); | |
528 | i915_address_space_fini(vm); | |
529 | ||
530 | kfree(vm); | |
531 | } | |
532 | ||
533 | void i915_vm_release(struct kref *kref) | |
534 | { | |
535 | struct i915_address_space *vm = | |
536 | container_of(kref, struct i915_address_space, ref); | |
537 | ||
538 | GEM_BUG_ON(i915_is_ggtt(vm)); | |
539 | trace_i915_ppgtt_release(vm); | |
540 | ||
541 | vm->closed = true; | |
542 | queue_rcu_work(vm->i915->wq, &vm->rcu); | |
543 | } | |
544 | ||
305dc3f9 | 545 | static void i915_address_space_init(struct i915_address_space *vm, int subclass) |
63fd659f | 546 | { |
e568ac38 | 547 | kref_init(&vm->ref); |
b32fa811 | 548 | INIT_RCU_WORK(&vm->rcu, __i915_vm_release); |
e568ac38 | 549 | |
19bb33c7 CW |
550 | /* |
551 | * The vm->mutex must be reclaim safe (for use in the shrinker). | |
552 | * Do a dummy acquire now under fs_reclaim so that any allocation | |
553 | * attempt holding the lock is immediately reported by lockdep. | |
554 | */ | |
555 | mutex_init(&vm->mutex); | |
305dc3f9 | 556 | lockdep_set_subclass(&vm->mutex, subclass); |
d25f71a1 | 557 | i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); |
19bb33c7 | 558 | |
63fd659f CW |
559 | GEM_BUG_ON(!vm->total); |
560 | drm_mm_init(&vm->mm, 0, vm->total); | |
561 | vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; | |
562 | ||
563 | stash_init(&vm->free_pages); | |
564 | ||
63fd659f | 565 | INIT_LIST_HEAD(&vm->unbound_list); |
499197dc | 566 | INIT_LIST_HEAD(&vm->bound_list); |
63fd659f CW |
567 | } |
568 | ||
8448661d CW |
569 | static int __setup_page_dma(struct i915_address_space *vm, |
570 | struct i915_page_dma *p, | |
571 | gfp_t gfp) | |
572 | { | |
1abb70f5 | 573 | p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); |
8448661d CW |
574 | if (unlikely(!p->page)) |
575 | return -ENOMEM; | |
678d96fb | 576 | |
58174eac CW |
577 | p->daddr = dma_map_page_attrs(vm->dma, |
578 | p->page, 0, PAGE_SIZE, | |
579 | PCI_DMA_BIDIRECTIONAL, | |
66daec6b | 580 | DMA_ATTR_SKIP_CPU_SYNC | |
58174eac | 581 | DMA_ATTR_NO_WARN); |
8448661d CW |
582 | if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { |
583 | vm_free_page(vm, p->page); | |
584 | return -ENOMEM; | |
44159ddb | 585 | } |
1266cdb1 MT |
586 | |
587 | return 0; | |
678d96fb BW |
588 | } |
589 | ||
8448661d | 590 | static int setup_page_dma(struct i915_address_space *vm, |
275a991c | 591 | struct i915_page_dma *p) |
c114f76a | 592 | { |
1abb70f5 | 593 | return __setup_page_dma(vm, p, __GFP_HIGHMEM); |
c114f76a MK |
594 | } |
595 | ||
8448661d | 596 | static void cleanup_page_dma(struct i915_address_space *vm, |
275a991c | 597 | struct i915_page_dma *p) |
06fda602 | 598 | { |
8448661d CW |
599 | dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); |
600 | vm_free_page(vm, p->page); | |
44159ddb MK |
601 | } |
602 | ||
9231da70 | 603 | #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) |
d1c54acd | 604 | |
6eebfe8a CW |
605 | static void |
606 | fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) | |
d1c54acd | 607 | { |
6eebfe8a | 608 | kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); |
d1c54acd MK |
609 | } |
610 | ||
6eebfe8a CW |
611 | #define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64)) |
612 | #define fill32_px(px, v) do { \ | |
613 | u64 v__ = lower_32_bits(v); \ | |
614 | fill_px((px), v__ << 32 | v__); \ | |
615 | } while (0) | |
73eeea53 | 616 | |
8bcdd0f7 | 617 | static int |
8448661d | 618 | setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) |
4ad2af1e | 619 | { |
7fb9ee5d | 620 | unsigned long size; |
66df1014 | 621 | |
aa095871 MA |
622 | /* |
623 | * In order to utilize 64K pages for an object with a size < 2M, we will | |
624 | * need to support a 64K scratch page, given that every 16th entry for a | |
625 | * page-table operating in 64K mode must point to a properly aligned 64K | |
626 | * region, including any PTEs which happen to point to scratch. | |
627 | * | |
628 | * This is only relevant for the 48b PPGTT where we support | |
b379e306 CW |
629 | * huge-gtt-pages, see also i915_vma_insert(). However, as we share the |
630 | * scratch (read-only) between all vm, we create one 64k scratch page | |
631 | * for all. | |
aa095871 | 632 | */ |
7fb9ee5d | 633 | size = I915_GTT_PAGE_SIZE_4K; |
a9fe9ca4 | 634 | if (i915_vm_is_4lvl(vm) && |
aa095871 | 635 | HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { |
7fb9ee5d CW |
636 | size = I915_GTT_PAGE_SIZE_64K; |
637 | gfp |= __GFP_NOWARN; | |
aa095871 | 638 | } |
7fb9ee5d CW |
639 | gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; |
640 | ||
641 | do { | |
c03cbe4c | 642 | unsigned int order = get_order(size); |
7fb9ee5d CW |
643 | struct page *page; |
644 | dma_addr_t addr; | |
66df1014 | 645 | |
7fb9ee5d | 646 | page = alloc_pages(gfp, order); |
aa095871 | 647 | if (unlikely(!page)) |
7fb9ee5d | 648 | goto skip; |
aa095871 | 649 | |
58174eac CW |
650 | addr = dma_map_page_attrs(vm->dma, |
651 | page, 0, size, | |
652 | PCI_DMA_BIDIRECTIONAL, | |
66daec6b | 653 | DMA_ATTR_SKIP_CPU_SYNC | |
58174eac | 654 | DMA_ATTR_NO_WARN); |
7fb9ee5d CW |
655 | if (unlikely(dma_mapping_error(vm->dma, addr))) |
656 | goto free_page; | |
66df1014 | 657 | |
7fb9ee5d CW |
658 | if (unlikely(!IS_ALIGNED(addr, size))) |
659 | goto unmap_page; | |
aa095871 | 660 | |
c03cbe4c CW |
661 | vm->scratch[0].base.page = page; |
662 | vm->scratch[0].base.daddr = addr; | |
a2ac437b | 663 | vm->scratch_order = order; |
7fb9ee5d CW |
664 | return 0; |
665 | ||
666 | unmap_page: | |
667 | dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); | |
668 | free_page: | |
669 | __free_pages(page, order); | |
670 | skip: | |
671 | if (size == I915_GTT_PAGE_SIZE_4K) | |
672 | return -ENOMEM; | |
673 | ||
674 | size = I915_GTT_PAGE_SIZE_4K; | |
675 | gfp &= ~__GFP_NOWARN; | |
676 | } while (1); | |
4ad2af1e MK |
677 | } |
678 | ||
8448661d | 679 | static void cleanup_scratch_page(struct i915_address_space *vm) |
4ad2af1e | 680 | { |
c03cbe4c CW |
681 | struct i915_page_dma *p = px_base(&vm->scratch[0]); |
682 | unsigned int order = vm->scratch_order; | |
66df1014 | 683 | |
a2ac437b | 684 | dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, |
aa095871 | 685 | PCI_DMA_BIDIRECTIONAL); |
a2ac437b | 686 | __free_pages(p->page, order); |
4ad2af1e MK |
687 | } |
688 | ||
6eebfe8a CW |
689 | static void free_scratch(struct i915_address_space *vm) |
690 | { | |
c03cbe4c CW |
691 | int i; |
692 | ||
693 | if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ | |
6eebfe8a CW |
694 | return; |
695 | ||
c03cbe4c CW |
696 | for (i = 1; i <= vm->top; i++) { |
697 | if (!px_dma(&vm->scratch[i])) | |
698 | break; | |
699 | cleanup_page_dma(vm, px_base(&vm->scratch[i])); | |
700 | } | |
6eebfe8a CW |
701 | |
702 | cleanup_scratch_page(vm); | |
703 | } | |
704 | ||
8448661d | 705 | static struct i915_page_table *alloc_pt(struct i915_address_space *vm) |
06fda602 | 706 | { |
ec565b3c | 707 | struct i915_page_table *pt; |
06fda602 | 708 | |
1abb70f5 | 709 | pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL); |
dd19674b | 710 | if (unlikely(!pt)) |
06fda602 BW |
711 | return ERR_PTR(-ENOMEM); |
712 | ||
72230b87 | 713 | if (unlikely(setup_page_dma(vm, &pt->base))) { |
dd19674b CW |
714 | kfree(pt); |
715 | return ERR_PTR(-ENOMEM); | |
716 | } | |
06fda602 | 717 | |
b5b7bef9 | 718 | atomic_set(&pt->used, 0); |
06fda602 BW |
719 | return pt; |
720 | } | |
721 | ||
1eda701e | 722 | static struct i915_page_directory *__alloc_pd(size_t sz) |
06fda602 | 723 | { |
ec565b3c | 724 | struct i915_page_directory *pd; |
06fda602 | 725 | |
1eda701e | 726 | pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); |
b5b7bef9 MK |
727 | if (unlikely(!pd)) |
728 | return NULL; | |
729 | ||
b5b7bef9 | 730 | spin_lock_init(&pd->lock); |
b5b7bef9 MK |
731 | return pd; |
732 | } | |
733 | ||
734 | static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) | |
735 | { | |
736 | struct i915_page_directory *pd; | |
737 | ||
1eda701e | 738 | pd = __alloc_pd(sizeof(*pd)); |
fe52e37f | 739 | if (unlikely(!pd)) |
06fda602 BW |
740 | return ERR_PTR(-ENOMEM); |
741 | ||
57a7e305 | 742 | if (unlikely(setup_page_dma(vm, px_base(pd)))) { |
fe52e37f CW |
743 | kfree(pd); |
744 | return ERR_PTR(-ENOMEM); | |
745 | } | |
e5815a2e | 746 | |
06fda602 BW |
747 | return pd; |
748 | } | |
749 | ||
57a7e305 | 750 | static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) |
2e906bea | 751 | { |
57a7e305 | 752 | cleanup_page_dma(vm, pd); |
fe52e37f | 753 | kfree(pd); |
2e906bea MK |
754 | } |
755 | ||
57a7e305 CW |
756 | #define free_px(vm, px) free_pd(vm, px_base(px)) |
757 | ||
f20f272f MK |
758 | static inline void |
759 | write_dma_entry(struct i915_page_dma * const pdma, | |
6239901c | 760 | const unsigned short idx, |
f20f272f MK |
761 | const u64 encoded_entry) |
762 | { | |
763 | u64 * const vaddr = kmap_atomic(pdma->page); | |
764 | ||
6239901c | 765 | vaddr[idx] = encoded_entry; |
f20f272f MK |
766 | kunmap_atomic(vaddr); |
767 | } | |
768 | ||
769 | static inline void | |
770 | __set_pd_entry(struct i915_page_directory * const pd, | |
6239901c | 771 | const unsigned short idx, |
f20f272f MK |
772 | struct i915_page_dma * const to, |
773 | u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) | |
2e906bea | 774 | { |
6acbe9f6 CW |
775 | /* Each thread pre-pins the pd, and we may have a thread per pde. */ |
776 | GEM_BUG_ON(atomic_read(px_used(pd)) > 2 * ARRAY_SIZE(pd->entry)); | |
f20f272f | 777 | |
57a7e305 | 778 | atomic_inc(px_used(pd)); |
6239901c CW |
779 | pd->entry[idx] = to; |
780 | write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); | |
6ac18502 MT |
781 | } |
782 | ||
6239901c CW |
783 | #define set_pd_entry(pd, idx, to) \ |
784 | __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) | |
c03cbe4c | 785 | |
f20f272f | 786 | static inline void |
c03cbe4c | 787 | clear_pd_entry(struct i915_page_directory * const pd, |
6239901c CW |
788 | const unsigned short idx, |
789 | const struct i915_page_scratch * const scratch) | |
69ab76fd | 790 | { |
57a7e305 | 791 | GEM_BUG_ON(atomic_read(px_used(pd)) == 0); |
9ee72503 | 792 | |
6239901c CW |
793 | write_dma_entry(px_base(pd), idx, scratch->encode); |
794 | pd->entry[idx] = NULL; | |
57a7e305 | 795 | atomic_dec(px_used(pd)); |
6ac18502 MT |
796 | } |
797 | ||
50b38bc4 MK |
798 | static bool |
799 | release_pd_entry(struct i915_page_directory * const pd, | |
6239901c | 800 | const unsigned short idx, |
57a7e305 | 801 | struct i915_page_table * const pt, |
6239901c | 802 | const struct i915_page_scratch * const scratch) |
50b38bc4 MK |
803 | { |
804 | bool free = false; | |
805 | ||
4c2be3c5 CW |
806 | if (atomic_add_unless(&pt->used, -1, 1)) |
807 | return false; | |
808 | ||
50b38bc4 | 809 | spin_lock(&pd->lock); |
57a7e305 | 810 | if (atomic_dec_and_test(&pt->used)) { |
6239901c | 811 | clear_pd_entry(pd, idx, scratch); |
50b38bc4 MK |
812 | free = true; |
813 | } | |
814 | spin_unlock(&pd->lock); | |
815 | ||
816 | return free; | |
817 | } | |
f20f272f | 818 | |
fb251a72 CW |
819 | /* |
820 | * PDE TLBs are a pain to invalidate on GEN8+. When we modify | |
fce93755 MK |
821 | * the page table structures, we mark them dirty so that |
822 | * context switching/execlist queuing code takes extra steps | |
823 | * to ensure that tlbs are flushed. | |
824 | */ | |
ab53497b | 825 | static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt) |
fce93755 | 826 | { |
fb251a72 | 827 | ppgtt->pd_dirty_engines = ALL_ENGINES; |
fce93755 MK |
828 | } |
829 | ||
9e77f500 | 830 | static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) |
a9abea97 | 831 | { |
9e77f500 | 832 | struct drm_i915_private *dev_priv = ppgtt->vm.i915; |
a9abea97 CW |
833 | enum vgt_g2v_type msg; |
834 | int i; | |
835 | ||
836 | if (create) | |
837 | atomic_inc(px_used(ppgtt->pd)); /* never remove */ | |
838 | else | |
839 | atomic_dec(px_used(ppgtt->pd)); | |
840 | ||
9e77f500 XZ |
841 | mutex_lock(&dev_priv->vgpu.lock); |
842 | ||
843 | if (i915_vm_is_4lvl(&ppgtt->vm)) { | |
a9abea97 CW |
844 | const u64 daddr = px_dma(ppgtt->pd); |
845 | ||
846 | I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); | |
847 | I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); | |
848 | ||
849 | msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : | |
850 | VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); | |
851 | } else { | |
852 | for (i = 0; i < GEN8_3LVL_PDPES; i++) { | |
853 | const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); | |
854 | ||
855 | I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); | |
856 | I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); | |
857 | } | |
858 | ||
859 | msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : | |
860 | VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); | |
861 | } | |
862 | ||
9e77f500 | 863 | /* g2v_notify atomically (via hv trap) consumes the message packet. */ |
a9abea97 CW |
864 | I915_WRITE(vgtif_reg(g2v_notify), msg); |
865 | ||
9e77f500 | 866 | mutex_unlock(&dev_priv->vgpu.lock); |
a9abea97 CW |
867 | } |
868 | ||
27763264 CW |
869 | /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ |
870 | #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ | |
871 | #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) | |
872 | #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) | |
873 | #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) | |
874 | #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) | |
875 | #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) | |
876 | #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) | |
877 | ||
878 | static inline unsigned int | |
879 | gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) | |
880 | { | |
881 | const int shift = gen8_pd_shift(lvl); | |
882 | const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); | |
883 | ||
884 | GEM_BUG_ON(start >= end); | |
885 | end += ~mask >> gen8_pd_shift(1); | |
886 | ||
887 | *idx = i915_pde_index(start, shift); | |
888 | if ((start ^ end) & mask) | |
889 | return GEN8_PDES - *idx; | |
890 | else | |
891 | return i915_pde_index(end, shift) - *idx; | |
892 | } | |
893 | ||
894 | static inline bool gen8_pd_contains(u64 start, u64 end, int lvl) | |
895 | { | |
896 | const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); | |
897 | ||
898 | GEM_BUG_ON(start >= end); | |
899 | return (start ^ end) & mask && (start & ~mask) == 0; | |
900 | } | |
901 | ||
902 | static inline unsigned int gen8_pt_count(u64 start, u64 end) | |
903 | { | |
904 | GEM_BUG_ON(start >= end); | |
905 | if ((start ^ end) >> gen8_pd_shift(1)) | |
906 | return GEN8_PDES - (start & (GEN8_PDES - 1)); | |
907 | else | |
908 | return end - start; | |
909 | } | |
910 | ||
04364138 CW |
911 | static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm) |
912 | { | |
913 | unsigned int shift = __gen8_pte_shift(vm->top); | |
914 | return (vm->total + (1ull << shift) - 1) >> shift; | |
915 | } | |
916 | ||
eb7c022d MK |
917 | static inline struct i915_page_directory * |
918 | gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) | |
919 | { | |
920 | struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); | |
921 | ||
922 | if (vm->top == 2) | |
923 | return ppgtt->pd; | |
924 | else | |
925 | return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); | |
926 | } | |
927 | ||
928 | static inline struct i915_page_directory * | |
929 | gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) | |
930 | { | |
931 | return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); | |
932 | } | |
933 | ||
1eda701e CW |
934 | static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, |
935 | struct i915_page_directory *pd, | |
936 | int count, int lvl) | |
a9abea97 | 937 | { |
1eda701e CW |
938 | if (lvl) { |
939 | void **pde = pd->entry; | |
a9abea97 | 940 | |
1eda701e CW |
941 | do { |
942 | if (!*pde) | |
943 | continue; | |
a9abea97 | 944 | |
1eda701e CW |
945 | __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); |
946 | } while (pde++, --count); | |
a9abea97 CW |
947 | } |
948 | ||
1eda701e | 949 | free_px(vm, pd); |
a9abea97 CW |
950 | } |
951 | ||
952 | static void gen8_ppgtt_cleanup(struct i915_address_space *vm) | |
953 | { | |
a9abea97 CW |
954 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); |
955 | ||
1eda701e | 956 | if (intel_vgpu_active(vm->i915)) |
a9abea97 CW |
957 | gen8_ppgtt_notify_vgt(ppgtt, false); |
958 | ||
04364138 | 959 | __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); |
a9abea97 CW |
960 | free_scratch(vm); |
961 | } | |
962 | ||
4c2be3c5 CW |
963 | static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, |
964 | struct i915_page_directory * const pd, | |
965 | u64 start, const u64 end, int lvl) | |
459108b8 | 966 | { |
4c2be3c5 CW |
967 | const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; |
968 | unsigned int idx, len; | |
459108b8 | 969 | |
c71ccbe2 CW |
970 | GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); |
971 | ||
4c2be3c5 | 972 | len = gen8_pd_range(start, end, lvl--, &idx); |
c71ccbe2 | 973 | DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", |
4c2be3c5 CW |
974 | __func__, vm, lvl + 1, start, end, |
975 | idx, len, atomic_read(px_used(pd))); | |
976 | GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); | |
2ce5179f | 977 | |
4c2be3c5 CW |
978 | do { |
979 | struct i915_page_table *pt = pd->entry[idx]; | |
980 | ||
981 | if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && | |
982 | gen8_pd_contains(start, end, lvl)) { | |
983 | DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", | |
984 | __func__, vm, lvl + 1, idx, start, end); | |
985 | clear_pd_entry(pd, idx, scratch); | |
986 | __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); | |
987 | start += (u64)I915_PDES << gen8_pd_shift(lvl); | |
988 | continue; | |
989 | } | |
50b38bc4 | 990 | |
4c2be3c5 CW |
991 | if (lvl) { |
992 | start = __gen8_ppgtt_clear(vm, as_pd(pt), | |
993 | start, end, lvl); | |
994 | } else { | |
995 | unsigned int count; | |
996 | u64 *vaddr; | |
06fda602 | 997 | |
4c2be3c5 | 998 | count = gen8_pt_count(start, end); |
c71ccbe2 | 999 | DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", |
4c2be3c5 CW |
1000 | __func__, vm, lvl, start, end, |
1001 | gen8_pd_index(start, 0), count, | |
1002 | atomic_read(&pt->used)); | |
1003 | GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); | |
d209b9c3 | 1004 | |
4c2be3c5 CW |
1005 | vaddr = kmap_atomic_px(pt); |
1006 | memset64(vaddr + gen8_pd_index(start, 0), | |
1007 | vm->scratch[0].encode, | |
1008 | count); | |
1009 | kunmap_atomic(vaddr); | |
1010 | ||
1011 | atomic_sub(count, &pt->used); | |
1012 | start += count; | |
1013 | } | |
1014 | ||
1015 | if (release_pd_entry(pd, idx, pt, scratch)) | |
57a7e305 | 1016 | free_px(vm, pt); |
4c2be3c5 CW |
1017 | } while (idx++, --len); |
1018 | ||
1019 | return start; | |
fe52e37f | 1020 | } |
2ce5179f | 1021 | |
4c2be3c5 CW |
1022 | static void gen8_ppgtt_clear(struct i915_address_space *vm, |
1023 | u64 start, u64 length) | |
d209b9c3 | 1024 | { |
4c2be3c5 CW |
1025 | GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); |
1026 | GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); | |
c71ccbe2 | 1027 | GEM_BUG_ON(range_overflows(start, length, vm->total)); |
06fda602 | 1028 | |
4c2be3c5 CW |
1029 | start >>= GEN8_PTE_SHIFT; |
1030 | length >>= GEN8_PTE_SHIFT; | |
1031 | GEM_BUG_ON(length == 0); | |
1032 | ||
1033 | __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, | |
1034 | start, start + length, vm->top); | |
d209b9c3 | 1035 | } |
459108b8 | 1036 | |
8a98e839 CW |
1037 | static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, |
1038 | struct i915_page_directory * const pd, | |
c71ccbe2 | 1039 | u64 * const start, const u64 end, int lvl) |
9e89f9ee | 1040 | { |
8a98e839 CW |
1041 | const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; |
1042 | struct i915_page_table *alloc = NULL; | |
1043 | unsigned int idx, len; | |
a9abea97 | 1044 | int ret = 0; |
9e89f9ee | 1045 | |
c71ccbe2 CW |
1046 | GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); |
1047 | ||
8a98e839 | 1048 | len = gen8_pd_range(*start, end, lvl--, &idx); |
c71ccbe2 | 1049 | DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", |
8a98e839 CW |
1050 | __func__, vm, lvl + 1, *start, end, |
1051 | idx, len, atomic_read(px_used(pd))); | |
1052 | GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); | |
1053 | ||
a9abea97 | 1054 | spin_lock(&pd->lock); |
8a98e839 CW |
1055 | GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ |
1056 | do { | |
1057 | struct i915_page_table *pt = pd->entry[idx]; | |
9df15b49 | 1058 | |
6239901c | 1059 | if (!pt) { |
a9abea97 | 1060 | spin_unlock(&pd->lock); |
9e89f9ee | 1061 | |
8a98e839 CW |
1062 | DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", |
1063 | __func__, vm, lvl + 1, idx); | |
7ad47cf2 | 1064 | |
8a98e839 CW |
1065 | pt = fetch_and_zero(&alloc); |
1066 | if (lvl) { | |
1067 | if (!pt) { | |
1068 | pt = &alloc_pd(vm)->pt; | |
1069 | if (IS_ERR(pt)) { | |
1070 | ret = PTR_ERR(pt); | |
1071 | goto out; | |
1072 | } | |
1073 | } | |
a9abea97 | 1074 | |
8a98e839 | 1075 | fill_px(pt, vm->scratch[lvl].encode); |
a9abea97 | 1076 | } else { |
8a98e839 CW |
1077 | if (!pt) { |
1078 | pt = alloc_pt(vm); | |
1079 | if (IS_ERR(pt)) { | |
1080 | ret = PTR_ERR(pt); | |
1081 | goto out; | |
1082 | } | |
1083 | } | |
9df15b49 | 1084 | |
8a98e839 CW |
1085 | if (intel_vgpu_active(vm->i915) || |
1086 | gen8_pt_count(*start, end) < I915_PDES) | |
1087 | fill_px(pt, vm->scratch[lvl].encode); | |
1088 | } | |
9e89f9ee | 1089 | |
8a98e839 CW |
1090 | spin_lock(&pd->lock); |
1091 | if (likely(!pd->entry[idx])) | |
1092 | set_pd_entry(pd, idx, pt); | |
1093 | else | |
1094 | alloc = pt, pt = pd->entry[idx]; | |
1095 | } | |
9e89f9ee | 1096 | |
8a98e839 CW |
1097 | if (lvl) { |
1098 | atomic_inc(&pt->used); | |
1099 | spin_unlock(&pd->lock); | |
894ccebe | 1100 | |
8a98e839 CW |
1101 | ret = __gen8_ppgtt_alloc(vm, as_pd(pt), |
1102 | start, end, lvl); | |
1103 | if (unlikely(ret)) { | |
1104 | if (release_pd_entry(pd, idx, pt, scratch)) | |
1105 | free_px(vm, pt); | |
1106 | goto out; | |
7ad47cf2 | 1107 | } |
894ccebe | 1108 | |
8a98e839 CW |
1109 | spin_lock(&pd->lock); |
1110 | atomic_dec(&pt->used); | |
1111 | GEM_BUG_ON(!atomic_read(&pt->used)); | |
1112 | } else { | |
1113 | unsigned int count = gen8_pt_count(*start, end); | |
a9abea97 | 1114 | |
c71ccbe2 | 1115 | DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", |
8a98e839 CW |
1116 | __func__, vm, lvl, *start, end, |
1117 | gen8_pd_index(*start, 0), count, | |
1118 | atomic_read(&pt->used)); | |
a9abea97 | 1119 | |
8a98e839 | 1120 | atomic_add(count, &pt->used); |
ff175010 CW |
1121 | /* All other pdes may be simultaneously removed */ |
1122 | GEM_BUG_ON(atomic_read(&pt->used) > 2 * I915_PDES); | |
8a98e839 CW |
1123 | *start += count; |
1124 | } | |
1125 | } while (idx++, --len); | |
1126 | spin_unlock(&pd->lock); | |
a9abea97 CW |
1127 | out: |
1128 | if (alloc) | |
1129 | free_px(vm, alloc); | |
894ccebe | 1130 | return ret; |
9df15b49 BW |
1131 | } |
1132 | ||
8a98e839 CW |
1133 | static int gen8_ppgtt_alloc(struct i915_address_space *vm, |
1134 | u64 start, u64 length) | |
a9abea97 | 1135 | { |
6b5f3cb1 | 1136 | u64 from; |
8a98e839 | 1137 | int err; |
a9abea97 | 1138 | |
8a98e839 CW |
1139 | GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); |
1140 | GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); | |
c71ccbe2 | 1141 | GEM_BUG_ON(range_overflows(start, length, vm->total)); |
a9abea97 | 1142 | |
8a98e839 CW |
1143 | start >>= GEN8_PTE_SHIFT; |
1144 | length >>= GEN8_PTE_SHIFT; | |
1145 | GEM_BUG_ON(length == 0); | |
6b5f3cb1 | 1146 | from = start; |
a9abea97 | 1147 | |
8a98e839 CW |
1148 | err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, |
1149 | &start, start + length, vm->top); | |
5cad0ddf | 1150 | if (unlikely(err && from != start)) |
8a98e839 CW |
1151 | __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, |
1152 | from, start, vm->top); | |
a9abea97 | 1153 | |
8a98e839 | 1154 | return err; |
a9abea97 CW |
1155 | } |
1156 | ||
1157 | static inline struct sgt_dma { | |
1158 | struct scatterlist *sg; | |
1159 | dma_addr_t dma, max; | |
1160 | } sgt_dma(struct i915_vma *vma) { | |
1161 | struct scatterlist *sg = vma->pages->sgl; | |
1162 | dma_addr_t addr = sg_dma_address(sg); | |
1163 | return (struct sgt_dma) { sg, addr, addr + sg->length }; | |
1164 | } | |
1165 | ||
bea5faf7 | 1166 | static __always_inline u64 |
eb7c022d MK |
1167 | gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, |
1168 | struct i915_page_directory *pdp, | |
1169 | struct sgt_dma *iter, | |
1170 | u64 idx, | |
1171 | enum i915_cache_level cache_level, | |
1172 | u32 flags) | |
a9abea97 CW |
1173 | { |
1174 | struct i915_page_directory *pd; | |
1175 | const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); | |
1176 | gen8_pte_t *vaddr; | |
a9abea97 | 1177 | |
bea5faf7 CW |
1178 | pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); |
1179 | vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); | |
a9abea97 | 1180 | do { |
7ddb66f0 | 1181 | GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); |
bea5faf7 | 1182 | vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; |
a9abea97 CW |
1183 | |
1184 | iter->dma += I915_GTT_PAGE_SIZE; | |
1185 | if (iter->dma >= iter->max) { | |
1186 | iter->sg = __sg_next(iter->sg); | |
1187 | if (!iter->sg) { | |
bea5faf7 | 1188 | idx = 0; |
a9abea97 CW |
1189 | break; |
1190 | } | |
1191 | ||
1192 | iter->dma = sg_dma_address(iter->sg); | |
1193 | iter->max = iter->dma + iter->sg->length; | |
1194 | } | |
1195 | ||
bea5faf7 CW |
1196 | if (gen8_pd_index(++idx, 0) == 0) { |
1197 | if (gen8_pd_index(idx, 1) == 0) { | |
a9abea97 | 1198 | /* Limited by sg length for 3lvl */ |
bea5faf7 | 1199 | if (gen8_pd_index(idx, 2) == 0) |
a9abea97 | 1200 | break; |
a9abea97 | 1201 | |
bea5faf7 | 1202 | pd = pdp->entry[gen8_pd_index(idx, 2)]; |
a9abea97 CW |
1203 | } |
1204 | ||
1205 | kunmap_atomic(vaddr); | |
bea5faf7 | 1206 | vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); |
a9abea97 CW |
1207 | } |
1208 | } while (1); | |
1209 | kunmap_atomic(vaddr); | |
1210 | ||
bea5faf7 | 1211 | return idx; |
a9abea97 CW |
1212 | } |
1213 | ||
eb7c022d MK |
1214 | static void gen8_ppgtt_insert_huge(struct i915_vma *vma, |
1215 | struct sgt_dma *iter, | |
a9abea97 CW |
1216 | enum i915_cache_level cache_level, |
1217 | u32 flags) | |
0a03852e | 1218 | { |
250f8c81 | 1219 | const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); |
0a03852e MA |
1220 | u64 start = vma->node.start; |
1221 | dma_addr_t rem = iter->sg->length; | |
1222 | ||
eb7c022d MK |
1223 | GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm)); |
1224 | ||
0a03852e | 1225 | do { |
eb7c022d MK |
1226 | struct i915_page_directory * const pdp = |
1227 | gen8_pdp_for_page_address(vma->vm, start); | |
1228 | struct i915_page_directory * const pd = | |
bea5faf7 | 1229 | i915_pd_entry(pdp, __gen8_pte_index(start, 2)); |
0a03852e | 1230 | gen8_pte_t encode = pte_encode; |
bea5faf7 CW |
1231 | unsigned int maybe_64K = -1; |
1232 | unsigned int page_size; | |
0a03852e | 1233 | gen8_pte_t *vaddr; |
bea5faf7 | 1234 | u16 index; |
0a03852e MA |
1235 | |
1236 | if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && | |
1237 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && | |
bea5faf7 CW |
1238 | rem >= I915_GTT_PAGE_SIZE_2M && |
1239 | !__gen8_pte_index(start, 0)) { | |
1240 | index = __gen8_pte_index(start, 1); | |
0a03852e | 1241 | encode |= GEN8_PDE_PS_2M; |
bea5faf7 | 1242 | page_size = I915_GTT_PAGE_SIZE_2M; |
0a03852e MA |
1243 | |
1244 | vaddr = kmap_atomic_px(pd); | |
1245 | } else { | |
bea5faf7 CW |
1246 | struct i915_page_table *pt = |
1247 | i915_pt_entry(pd, __gen8_pte_index(start, 1)); | |
0a03852e | 1248 | |
bea5faf7 | 1249 | index = __gen8_pte_index(start, 0); |
0a03852e MA |
1250 | page_size = I915_GTT_PAGE_SIZE; |
1251 | ||
17a00cf7 MA |
1252 | if (!index && |
1253 | vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && | |
1254 | IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && | |
1255 | (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || | |
bea5faf7 CW |
1256 | rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) |
1257 | maybe_64K = __gen8_pte_index(start, 1); | |
17a00cf7 | 1258 | |
0a03852e MA |
1259 | vaddr = kmap_atomic_px(pt); |
1260 | } | |
1261 | ||
1262 | do { | |
1263 | GEM_BUG_ON(iter->sg->length < page_size); | |
1264 | vaddr[index++] = encode | iter->dma; | |
1265 | ||
1266 | start += page_size; | |
1267 | iter->dma += page_size; | |
1268 | rem -= page_size; | |
1269 | if (iter->dma >= iter->max) { | |
1270 | iter->sg = __sg_next(iter->sg); | |
1271 | if (!iter->sg) | |
1272 | break; | |
1273 | ||
1274 | rem = iter->sg->length; | |
1275 | iter->dma = sg_dma_address(iter->sg); | |
1276 | iter->max = iter->dma + rem; | |
1277 | ||
bea5faf7 | 1278 | if (maybe_64K != -1 && index < I915_PDES && |
17a00cf7 MA |
1279 | !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && |
1280 | (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || | |
bea5faf7 CW |
1281 | rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) |
1282 | maybe_64K = -1; | |
17a00cf7 | 1283 | |
0a03852e MA |
1284 | if (unlikely(!IS_ALIGNED(iter->dma, page_size))) |
1285 | break; | |
1286 | } | |
bea5faf7 | 1287 | } while (rem >= page_size && index < I915_PDES); |
0a03852e MA |
1288 | |
1289 | kunmap_atomic(vaddr); | |
17a00cf7 MA |
1290 | |
1291 | /* | |
1292 | * Is it safe to mark the 2M block as 64K? -- Either we have | |
1293 | * filled whole page-table with 64K entries, or filled part of | |
1294 | * it and have reached the end of the sg table and we have | |
1295 | * enough padding. | |
1296 | */ | |
bea5faf7 CW |
1297 | if (maybe_64K != -1 && |
1298 | (index == I915_PDES || | |
17a00cf7 MA |
1299 | (i915_vm_has_scratch_64K(vma->vm) && |
1300 | !iter->sg && IS_ALIGNED(vma->node.start + | |
1301 | vma->node.size, | |
1302 | I915_GTT_PAGE_SIZE_2M)))) { | |
1303 | vaddr = kmap_atomic_px(pd); | |
bea5faf7 | 1304 | vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; |
17a00cf7 | 1305 | kunmap_atomic(vaddr); |
d9ec12f8 | 1306 | page_size = I915_GTT_PAGE_SIZE_64K; |
f79401b4 MA |
1307 | |
1308 | /* | |
1309 | * We write all 4K page entries, even when using 64K | |
1310 | * pages. In order to verify that the HW isn't cheating | |
1311 | * by using the 4K PTE instead of the 64K PTE, we want | |
1312 | * to remove all the surplus entries. If the HW skipped | |
1313 | * the 64K PTE, it will read/write into the scratch page | |
1314 | * instead - which we detect as missing results during | |
1315 | * selftests. | |
1316 | */ | |
1317 | if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { | |
1318 | u16 i; | |
1319 | ||
c03cbe4c | 1320 | encode = vma->vm->scratch[0].encode; |
bea5faf7 | 1321 | vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); |
f79401b4 MA |
1322 | |
1323 | for (i = 1; i < index; i += 16) | |
1324 | memset64(vaddr + i, encode, 15); | |
1325 | ||
1326 | kunmap_atomic(vaddr); | |
1327 | } | |
17a00cf7 | 1328 | } |
d9ec12f8 MA |
1329 | |
1330 | vma->page_sizes.gtt |= page_size; | |
0a03852e MA |
1331 | } while (iter->sg); |
1332 | } | |
1333 | ||
eb7c022d MK |
1334 | static void gen8_ppgtt_insert(struct i915_address_space *vm, |
1335 | struct i915_vma *vma, | |
1336 | enum i915_cache_level cache_level, | |
1337 | u32 flags) | |
894ccebe | 1338 | { |
eb7c022d | 1339 | struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); |
5684514b | 1340 | struct sgt_dma iter = sgt_dma(vma); |
de5ba8eb | 1341 | |
0a03852e | 1342 | if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { |
eb7c022d MK |
1343 | gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags); |
1344 | } else { | |
bea5faf7 | 1345 | u64 idx = vma->node.start >> GEN8_PTE_SHIFT; |
0a03852e | 1346 | |
eb7c022d MK |
1347 | do { |
1348 | struct i915_page_directory * const pdp = | |
1349 | gen8_pdp_for_page_index(vm, idx); | |
1350 | ||
1351 | idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, | |
1352 | cache_level, flags); | |
1353 | } while (idx); | |
d9ec12f8 MA |
1354 | |
1355 | vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; | |
0a03852e | 1356 | } |
f9b5b782 MT |
1357 | } |
1358 | ||
8776f02b MK |
1359 | static int gen8_init_scratch(struct i915_address_space *vm) |
1360 | { | |
64c050db | 1361 | int ret; |
c03cbe4c | 1362 | int i; |
8776f02b | 1363 | |
b379e306 CW |
1364 | /* |
1365 | * If everybody agrees to not to write into the scratch page, | |
1366 | * we can reuse it for all vm, keeping contexts and processes separate. | |
1367 | */ | |
1368 | if (vm->has_read_only && | |
1369 | vm->i915->kernel_context && | |
e568ac38 CW |
1370 | vm->i915->kernel_context->vm) { |
1371 | struct i915_address_space *clone = vm->i915->kernel_context->vm; | |
b379e306 CW |
1372 | |
1373 | GEM_BUG_ON(!clone->has_read_only); | |
1374 | ||
a2ac437b | 1375 | vm->scratch_order = clone->scratch_order; |
c03cbe4c CW |
1376 | memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); |
1377 | px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ | |
b379e306 CW |
1378 | return 0; |
1379 | } | |
1380 | ||
1abb70f5 | 1381 | ret = setup_scratch_page(vm, __GFP_HIGHMEM); |
8bcdd0f7 CW |
1382 | if (ret) |
1383 | return ret; | |
8776f02b | 1384 | |
c03cbe4c CW |
1385 | vm->scratch[0].encode = |
1386 | gen8_pte_encode(px_dma(&vm->scratch[0]), | |
1387 | I915_CACHE_LLC, vm->has_read_only); | |
daf3dc0f | 1388 | |
c03cbe4c CW |
1389 | for (i = 1; i <= vm->top; i++) { |
1390 | if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) | |
1391 | goto free_scratch; | |
8776f02b | 1392 | |
c03cbe4c CW |
1393 | fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); |
1394 | vm->scratch[i].encode = | |
1395 | gen8_pde_encode(px_dma(&vm->scratch[i]), | |
1396 | I915_CACHE_LLC); | |
69ab76fd MT |
1397 | } |
1398 | ||
8776f02b | 1399 | return 0; |
64c050db | 1400 | |
c03cbe4c CW |
1401 | free_scratch: |
1402 | free_scratch(vm); | |
1403 | return -ENOMEM; | |
8776f02b MK |
1404 | } |
1405 | ||
ab53497b | 1406 | static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) |
331f38e7 | 1407 | { |
82ad6443 | 1408 | struct i915_address_space *vm = &ppgtt->vm; |
8a98e839 CW |
1409 | struct i915_page_directory *pd = ppgtt->pd; |
1410 | unsigned int idx; | |
1411 | ||
1412 | GEM_BUG_ON(vm->top != 2); | |
04364138 | 1413 | GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); |
8a98e839 CW |
1414 | |
1415 | for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { | |
1416 | struct i915_page_directory *pde; | |
331f38e7 | 1417 | |
8a98e839 CW |
1418 | pde = alloc_pd(vm); |
1419 | if (IS_ERR(pde)) | |
1420 | return PTR_ERR(pde); | |
331f38e7 | 1421 | |
8a98e839 CW |
1422 | fill_px(pde, vm->scratch[1].encode); |
1423 | set_pd_entry(pd, idx, pde); | |
1424 | atomic_inc(px_used(pde)); /* keep pinned */ | |
e2b763ca | 1425 | } |
331f38e7 | 1426 | |
e2b763ca | 1427 | return 0; |
331f38e7 ZL |
1428 | } |
1429 | ||
763c1e63 | 1430 | static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) |
2ebd000a | 1431 | { |
763c1e63 TU |
1432 | struct drm_i915_private *i915 = gt->i915; |
1433 | ||
1434 | ppgtt->vm.gt = gt; | |
2ebd000a CW |
1435 | ppgtt->vm.i915 = i915; |
1436 | ppgtt->vm.dma = &i915->drm.pdev->dev; | |
1437 | ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size); | |
1438 | ||
1439 | i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT); | |
1440 | ||
1441 | ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma; | |
1442 | ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; | |
1443 | ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; | |
1444 | ppgtt->vm.vma_ops.clear_pages = clear_pages; | |
1445 | } | |
1446 | ||
73a8fdef MK |
1447 | static struct i915_page_directory * |
1448 | gen8_alloc_top_pd(struct i915_address_space *vm) | |
1449 | { | |
04364138 | 1450 | const unsigned int count = gen8_pd_top_count(vm); |
73a8fdef MK |
1451 | struct i915_page_directory *pd; |
1452 | ||
6239901c | 1453 | GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); |
73a8fdef | 1454 | |
1eda701e CW |
1455 | pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); |
1456 | if (unlikely(!pd)) | |
1457 | return ERR_PTR(-ENOMEM); | |
1458 | ||
1459 | if (unlikely(setup_page_dma(vm, px_base(pd)))) { | |
1460 | kfree(pd); | |
1461 | return ERR_PTR(-ENOMEM); | |
1462 | } | |
73a8fdef | 1463 | |
6239901c | 1464 | fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); |
4c2be3c5 | 1465 | atomic_inc(px_used(pd)); /* mark as pinned */ |
73a8fdef MK |
1466 | return pd; |
1467 | } | |
1468 | ||
eb0b44ad | 1469 | /* |
f3a964b9 BW |
1470 | * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers |
1471 | * with a net effect resembling a 2-level page table in normal x86 terms. Each | |
1472 | * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address | |
1473 | * space. | |
37aca44a | 1474 | * |
f3a964b9 | 1475 | */ |
ab53497b | 1476 | static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) |
37aca44a | 1477 | { |
ab53497b | 1478 | struct i915_ppgtt *ppgtt; |
17f297b4 CW |
1479 | int err; |
1480 | ||
1481 | ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); | |
1482 | if (!ppgtt) | |
1483 | return ERR_PTR(-ENOMEM); | |
1484 | ||
763c1e63 | 1485 | ppgtt_init(ppgtt, &i915->gt); |
18c7962b | 1486 | ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; |
1e6437b0 | 1487 | |
3936867d MK |
1488 | /* |
1489 | * From bdw, there is hw support for read-only pages in the PPGTT. | |
1490 | * | |
1491 | * Gen11 has HSDES#:1807136187 unresolved. Disable ro support | |
1492 | * for now. | |
1493 | */ | |
1494 | ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11; | |
250f8c81 | 1495 | |
8448661d CW |
1496 | /* There are only few exceptions for gen >=6. chv and bxt. |
1497 | * And we are not sure about the latter so play safe for now. | |
1498 | */ | |
17f297b4 | 1499 | if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915)) |
82ad6443 | 1500 | ppgtt->vm.pt_kmap_wc = true; |
8448661d | 1501 | |
17f297b4 CW |
1502 | err = gen8_init_scratch(&ppgtt->vm); |
1503 | if (err) | |
1504 | goto err_free; | |
66df1014 | 1505 | |
73a8fdef MK |
1506 | ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); |
1507 | if (IS_ERR(ppgtt->pd)) { | |
1508 | err = PTR_ERR(ppgtt->pd); | |
9ee72503 | 1509 | goto err_free_scratch; |
b5b7bef9 | 1510 | } |
6ac18502 | 1511 | |
eb7c022d | 1512 | if (!i915_vm_is_4lvl(&ppgtt->vm)) { |
17f297b4 CW |
1513 | if (intel_vgpu_active(i915)) { |
1514 | err = gen8_preallocate_top_level_pdp(ppgtt); | |
b5b7bef9 | 1515 | if (err) |
73a8fdef | 1516 | goto err_free_pd; |
331f38e7 | 1517 | } |
81ba8aef | 1518 | } |
6ac18502 | 1519 | |
eb7c022d | 1520 | ppgtt->vm.insert_entries = gen8_ppgtt_insert; |
8a98e839 | 1521 | ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; |
4c2be3c5 CW |
1522 | ppgtt->vm.clear_range = gen8_ppgtt_clear; |
1523 | ||
17f297b4 | 1524 | if (intel_vgpu_active(i915)) |
650da34c ZL |
1525 | gen8_ppgtt_notify_vgt(ppgtt, true); |
1526 | ||
82ad6443 | 1527 | ppgtt->vm.cleanup = gen8_ppgtt_cleanup; |
054b9acd | 1528 | |
17f297b4 | 1529 | return ppgtt; |
6ac18502 | 1530 | |
73a8fdef | 1531 | err_free_pd: |
1eda701e | 1532 | __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, |
04364138 | 1533 | gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); |
9ee72503 | 1534 | err_free_scratch: |
6eebfe8a | 1535 | free_scratch(&ppgtt->vm); |
17f297b4 CW |
1536 | err_free: |
1537 | kfree(ppgtt); | |
1538 | return ERR_PTR(err); | |
d7b2633d MT |
1539 | } |
1540 | ||
678d96fb | 1541 | /* Write pde (index) from the page directory @pd to the page table @pt */ |
ab53497b | 1542 | static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, |
16a011c8 CW |
1543 | const unsigned int pde, |
1544 | const struct i915_page_table *pt) | |
6197349b | 1545 | { |
678d96fb | 1546 | /* Caller needs to make sure the write completes if necessary */ |
68a85703 CW |
1547 | iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, |
1548 | ppgtt->pd_addr + pde); | |
678d96fb | 1549 | } |
6197349b | 1550 | |
acb56d97 | 1551 | static void gen7_ppgtt_enable(struct intel_gt *gt) |
3e302542 | 1552 | { |
acb56d97 TU |
1553 | struct drm_i915_private *i915 = gt->i915; |
1554 | struct intel_uncore *uncore = gt->uncore; | |
e2f80391 | 1555 | struct intel_engine_cs *engine; |
3b3f1650 | 1556 | enum intel_engine_id id; |
28a1f789 | 1557 | u32 ecochk; |
6197349b | 1558 | |
28a1f789 | 1559 | intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); |
a65c2fcd | 1560 | |
acb56d97 TU |
1561 | ecochk = intel_uncore_read(uncore, GAM_ECOCHK); |
1562 | if (IS_HASWELL(i915)) { | |
b4a74e3a BW |
1563 | ecochk |= ECOCHK_PPGTT_WB_HSW; |
1564 | } else { | |
1565 | ecochk |= ECOCHK_PPGTT_LLC_IVB; | |
1566 | ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; | |
1567 | } | |
acb56d97 | 1568 | intel_uncore_write(uncore, GAM_ECOCHK, ecochk); |
a65c2fcd | 1569 | |
acb56d97 | 1570 | for_each_engine(engine, i915, id) { |
6197349b | 1571 | /* GFX_MODE is per-ring on gen7+ */ |
dbc65183 TU |
1572 | ENGINE_WRITE(engine, |
1573 | RING_MODE_GEN7, | |
1574 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); | |
6197349b | 1575 | } |
b4a74e3a | 1576 | } |
6197349b | 1577 | |
acb56d97 | 1578 | static void gen6_ppgtt_enable(struct intel_gt *gt) |
b4a74e3a | 1579 | { |
acb56d97 | 1580 | struct intel_uncore *uncore = gt->uncore; |
a65c2fcd | 1581 | |
28a1f789 TU |
1582 | intel_uncore_rmw(uncore, |
1583 | GAC_ECO_BITS, | |
1584 | 0, | |
1585 | ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); | |
6197349b | 1586 | |
28a1f789 TU |
1587 | intel_uncore_rmw(uncore, |
1588 | GAB_CTL, | |
1589 | 0, | |
1590 | GAB_CTL_CONT_AFTER_PAGEFAULT); | |
b4a74e3a | 1591 | |
28a1f789 TU |
1592 | intel_uncore_rmw(uncore, |
1593 | GAM_ECOCHK, | |
1594 | 0, | |
1595 | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); | |
b4a74e3a | 1596 | |
acb56d97 TU |
1597 | if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ |
1598 | intel_uncore_write(uncore, | |
1599 | GFX_MODE, | |
1600 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); | |
6197349b BW |
1601 | } |
1602 | ||
1d2a314c | 1603 | /* PPGTT support for Sandybdrige/Gen6 and later */ |
853ba5d2 | 1604 | static void gen6_ppgtt_clear_range(struct i915_address_space *vm, |
dd19674b | 1605 | u64 start, u64 length) |
1d2a314c | 1606 | { |
b5b7bef9 MK |
1607 | struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); |
1608 | const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; | |
c03cbe4c | 1609 | const gen6_pte_t scratch_pte = vm->scratch[0].encode; |
dd19674b CW |
1610 | unsigned int pde = first_entry / GEN6_PTES; |
1611 | unsigned int pte = first_entry % GEN6_PTES; | |
21c62a9d | 1612 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; |
1d2a314c | 1613 | |
7bddb01f | 1614 | while (num_entries) { |
b5b7bef9 MK |
1615 | struct i915_page_table * const pt = |
1616 | i915_pt_entry(ppgtt->base.pd, pde++); | |
4f183645 | 1617 | const unsigned int count = min(num_entries, GEN6_PTES - pte); |
dd19674b | 1618 | gen6_pte_t *vaddr; |
7bddb01f | 1619 | |
c03cbe4c | 1620 | GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); |
4a192c7e CW |
1621 | |
1622 | num_entries -= count; | |
1623 | ||
b5b7bef9 MK |
1624 | GEM_BUG_ON(count > atomic_read(&pt->used)); |
1625 | if (!atomic_sub_return(count, &pt->used)) | |
4a192c7e | 1626 | ppgtt->scan_for_unused_pt = true; |
1d2a314c | 1627 | |
549fe88b CW |
1628 | /* |
1629 | * Note that the hw doesn't support removing PDE on the fly | |
dd19674b CW |
1630 | * (they are cached inside the context with no means to |
1631 | * invalidate the cache), so we can only reset the PTE | |
1632 | * entries back to scratch. | |
1633 | */ | |
1d2a314c | 1634 | |
dd19674b | 1635 | vaddr = kmap_atomic_px(pt); |
4f183645 | 1636 | memset32(vaddr + pte, scratch_pte, count); |
dd19674b | 1637 | kunmap_atomic(vaddr); |
1d2a314c | 1638 | |
dd19674b | 1639 | pte = 0; |
7bddb01f | 1640 | } |
1d2a314c DV |
1641 | } |
1642 | ||
853ba5d2 | 1643 | static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, |
4a234c5f | 1644 | struct i915_vma *vma, |
75c7b0b8 CW |
1645 | enum i915_cache_level cache_level, |
1646 | u32 flags) | |
def886c3 | 1647 | { |
ab53497b | 1648 | struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); |
b5b7bef9 | 1649 | struct i915_page_directory * const pd = ppgtt->pd; |
21c62a9d | 1650 | unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE; |
07749ef3 MT |
1651 | unsigned act_pt = first_entry / GEN6_PTES; |
1652 | unsigned act_pte = first_entry % GEN6_PTES; | |
b31144c0 | 1653 | const u32 pte_encode = vm->pte_encode(0, cache_level, flags); |
5684514b | 1654 | struct sgt_dma iter = sgt_dma(vma); |
b31144c0 CW |
1655 | gen6_pte_t *vaddr; |
1656 | ||
c03cbe4c | 1657 | GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); |
4a192c7e | 1658 | |
b5b7bef9 | 1659 | vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); |
b31144c0 | 1660 | do { |
7ddb66f0 | 1661 | GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE); |
b31144c0 | 1662 | vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); |
6e995e23 | 1663 | |
f6e35cda | 1664 | iter.dma += I915_GTT_PAGE_SIZE; |
b31144c0 CW |
1665 | if (iter.dma == iter.max) { |
1666 | iter.sg = __sg_next(iter.sg); | |
1667 | if (!iter.sg) | |
1668 | break; | |
6e995e23 | 1669 | |
b31144c0 CW |
1670 | iter.dma = sg_dma_address(iter.sg); |
1671 | iter.max = iter.dma + iter.sg->length; | |
1672 | } | |
24f3a8cf | 1673 | |
07749ef3 | 1674 | if (++act_pte == GEN6_PTES) { |
9231da70 | 1675 | kunmap_atomic(vaddr); |
b5b7bef9 | 1676 | vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); |
6e995e23 | 1677 | act_pte = 0; |
def886c3 | 1678 | } |
b31144c0 | 1679 | } while (1); |
9231da70 | 1680 | kunmap_atomic(vaddr); |
d9ec12f8 MA |
1681 | |
1682 | vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; | |
def886c3 DV |
1683 | } |
1684 | ||
678d96fb | 1685 | static int gen6_alloc_va_range(struct i915_address_space *vm, |
dd19674b | 1686 | u64 start, u64 length) |
678d96fb | 1687 | { |
ab53497b | 1688 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); |
b5b7bef9 | 1689 | struct i915_page_directory * const pd = ppgtt->base.pd; |
32a19631 | 1690 | struct i915_page_table *pt, *alloc = NULL; |
0a4a6e74 | 1691 | intel_wakeref_t wakeref; |
dd19674b CW |
1692 | u64 from = start; |
1693 | unsigned int pde; | |
1694 | bool flush = false; | |
32a19631 | 1695 | int ret = 0; |
4933d519 | 1696 | |
d858d569 | 1697 | wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); |
0a4a6e74 | 1698 | |
b5b7bef9 MK |
1699 | spin_lock(&pd->lock); |
1700 | gen6_for_each_pde(pt, pd, start, length, pde) { | |
4a192c7e CW |
1701 | const unsigned int count = gen6_pte_count(start, length); |
1702 | ||
c03cbe4c | 1703 | if (px_base(pt) == px_base(&vm->scratch[1])) { |
b5b7bef9 | 1704 | spin_unlock(&pd->lock); |
1d1b5490 | 1705 | |
32a19631 CW |
1706 | pt = fetch_and_zero(&alloc); |
1707 | if (!pt) | |
1708 | pt = alloc_pt(vm); | |
1709 | if (IS_ERR(pt)) { | |
1710 | ret = PTR_ERR(pt); | |
dd19674b | 1711 | goto unwind_out; |
32a19631 | 1712 | } |
4933d519 | 1713 | |
c03cbe4c | 1714 | fill32_px(pt, vm->scratch[0].encode); |
e9e7dc41 | 1715 | |
32a19631 | 1716 | spin_lock(&pd->lock); |
c03cbe4c | 1717 | if (pd->entry[pde] == &vm->scratch[1]) { |
32a19631 | 1718 | pd->entry[pde] = pt; |
1d1b5490 CW |
1719 | if (i915_vma_is_bound(ppgtt->vma, |
1720 | I915_VMA_GLOBAL_BIND)) { | |
1721 | gen6_write_pde(ppgtt, pde, pt); | |
1722 | flush = true; | |
1723 | } | |
1724 | } else { | |
32a19631 CW |
1725 | alloc = pt; |
1726 | pt = pd->entry[pde]; | |
e9e7dc41 | 1727 | } |
4933d519 | 1728 | } |
4a192c7e | 1729 | |
b5b7bef9 | 1730 | atomic_add(count, &pt->used); |
4933d519 | 1731 | } |
b5b7bef9 | 1732 | spin_unlock(&pd->lock); |
4933d519 | 1733 | |
dd19674b | 1734 | if (flush) { |
35ac40d8 | 1735 | mark_tlbs_dirty(&ppgtt->base); |
ba4134a4 | 1736 | gen6_ggtt_invalidate(vm->gt->ggtt); |
678d96fb BW |
1737 | } |
1738 | ||
32a19631 | 1739 | goto out; |
4933d519 MT |
1740 | |
1741 | unwind_out: | |
b4e2727d | 1742 | gen6_ppgtt_clear_range(vm, from, start - from); |
32a19631 CW |
1743 | out: |
1744 | if (alloc) | |
57a7e305 | 1745 | free_px(vm, alloc); |
32a19631 CW |
1746 | intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); |
1747 | return ret; | |
678d96fb BW |
1748 | } |
1749 | ||
ab53497b | 1750 | static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) |
8776f02b | 1751 | { |
e9e7dc41 | 1752 | struct i915_address_space * const vm = &ppgtt->base.vm; |
b5b7bef9 | 1753 | struct i915_page_directory * const pd = ppgtt->base.pd; |
8bcdd0f7 | 1754 | int ret; |
8776f02b | 1755 | |
1abb70f5 | 1756 | ret = setup_scratch_page(vm, __GFP_HIGHMEM); |
8bcdd0f7 CW |
1757 | if (ret) |
1758 | return ret; | |
8776f02b | 1759 | |
c03cbe4c CW |
1760 | vm->scratch[0].encode = |
1761 | vm->pte_encode(px_dma(&vm->scratch[0]), | |
1762 | I915_CACHE_NONE, PTE_READ_ONLY); | |
986dbac4 | 1763 | |
c03cbe4c | 1764 | if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { |
8448661d | 1765 | cleanup_scratch_page(vm); |
6eebfe8a | 1766 | return -ENOMEM; |
8776f02b | 1767 | } |
b5b7bef9 | 1768 | |
c03cbe4c CW |
1769 | fill32_px(&vm->scratch[1], vm->scratch[0].encode); |
1770 | memset_p(pd->entry, &vm->scratch[1], I915_PDES); | |
8776f02b MK |
1771 | |
1772 | return 0; | |
1773 | } | |
1774 | ||
ab53497b | 1775 | static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) |
a00d825d | 1776 | { |
b5b7bef9 | 1777 | struct i915_page_directory * const pd = ppgtt->base.pd; |
c03cbe4c CW |
1778 | struct i915_page_dma * const scratch = |
1779 | px_base(&ppgtt->base.vm.scratch[1]); | |
09942c65 | 1780 | struct i915_page_table *pt; |
75c7b0b8 | 1781 | u32 pde; |
4933d519 | 1782 | |
b5b7bef9 | 1783 | gen6_for_all_pdes(pt, pd, pde) |
c03cbe4c | 1784 | if (px_base(pt) != scratch) |
57a7e305 | 1785 | free_px(&ppgtt->base.vm, pt); |
a9ded785 CW |
1786 | } |
1787 | ||
1788 | static void gen6_ppgtt_cleanup(struct i915_address_space *vm) | |
1789 | { | |
ab53497b | 1790 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); |
b32fa811 | 1791 | struct drm_i915_private *i915 = vm->i915; |
06fda602 | 1792 | |
d3622099 | 1793 | /* FIXME remove the struct_mutex to bring the locking under control */ |
b32fa811 CW |
1794 | mutex_lock(&i915->drm.struct_mutex); |
1795 | i915_vma_destroy(ppgtt->vma); | |
1796 | mutex_unlock(&i915->drm.struct_mutex); | |
a9ded785 CW |
1797 | |
1798 | gen6_ppgtt_free_pd(ppgtt); | |
6eebfe8a | 1799 | free_scratch(vm); |
b5b7bef9 | 1800 | kfree(ppgtt->base.pd); |
3440d265 DV |
1801 | } |
1802 | ||
e9e7dc41 | 1803 | static int pd_vma_set_pages(struct i915_vma *vma) |
3440d265 | 1804 | { |
e9e7dc41 CW |
1805 | vma->pages = ERR_PTR(-ENODEV); |
1806 | return 0; | |
1807 | } | |
1d2a314c | 1808 | |
e9e7dc41 CW |
1809 | static void pd_vma_clear_pages(struct i915_vma *vma) |
1810 | { | |
1811 | GEM_BUG_ON(!vma->pages); | |
4933d519 | 1812 | |
e9e7dc41 CW |
1813 | vma->pages = NULL; |
1814 | } | |
1815 | ||
1816 | static int pd_vma_bind(struct i915_vma *vma, | |
1817 | enum i915_cache_level cache_level, | |
1818 | u32 unused) | |
1819 | { | |
1820 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); | |
ab53497b | 1821 | struct gen6_ppgtt *ppgtt = vma->private; |
f6e35cda | 1822 | u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; |
e9e7dc41 CW |
1823 | struct i915_page_table *pt; |
1824 | unsigned int pde; | |
678d96fb | 1825 | |
57a7e305 | 1826 | px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); |
e9e7dc41 | 1827 | ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; |
1d2a314c | 1828 | |
b5b7bef9 | 1829 | gen6_for_all_pdes(pt, ppgtt->base.pd, pde) |
e9e7dc41 | 1830 | gen6_write_pde(ppgtt, pde, pt); |
52c126ee | 1831 | |
e9e7dc41 | 1832 | mark_tlbs_dirty(&ppgtt->base); |
759e4a74 | 1833 | gen6_ggtt_invalidate(ggtt); |
52c126ee | 1834 | |
c8c26622 | 1835 | return 0; |
4933d519 | 1836 | } |
06dc68d6 | 1837 | |
e9e7dc41 | 1838 | static void pd_vma_unbind(struct i915_vma *vma) |
4933d519 | 1839 | { |
ab53497b | 1840 | struct gen6_ppgtt *ppgtt = vma->private; |
b5b7bef9 | 1841 | struct i915_page_directory * const pd = ppgtt->base.pd; |
c03cbe4c CW |
1842 | struct i915_page_dma * const scratch = |
1843 | px_base(&ppgtt->base.vm.scratch[1]); | |
4a192c7e CW |
1844 | struct i915_page_table *pt; |
1845 | unsigned int pde; | |
1846 | ||
1847 | if (!ppgtt->scan_for_unused_pt) | |
1848 | return; | |
1849 | ||
1850 | /* Free all no longer used page tables */ | |
b5b7bef9 | 1851 | gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { |
6eebfe8a | 1852 | if (px_base(pt) == scratch || atomic_read(&pt->used)) |
4a192c7e CW |
1853 | continue; |
1854 | ||
57a7e305 | 1855 | free_px(&ppgtt->base.vm, pt); |
6eebfe8a | 1856 | pd->entry[pde] = scratch; |
4a192c7e CW |
1857 | } |
1858 | ||
1859 | ppgtt->scan_for_unused_pt = false; | |
e9e7dc41 CW |
1860 | } |
1861 | ||
1862 | static const struct i915_vma_ops pd_vma_ops = { | |
1863 | .set_pages = pd_vma_set_pages, | |
1864 | .clear_pages = pd_vma_clear_pages, | |
1865 | .bind_vma = pd_vma_bind, | |
1866 | .unbind_vma = pd_vma_unbind, | |
1867 | }; | |
1868 | ||
ab53497b | 1869 | static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) |
e9e7dc41 CW |
1870 | { |
1871 | struct drm_i915_private *i915 = ppgtt->base.vm.i915; | |
ba4134a4 | 1872 | struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; |
e9e7dc41 | 1873 | struct i915_vma *vma; |
e9e7dc41 CW |
1874 | |
1875 | GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); | |
1876 | GEM_BUG_ON(size > ggtt->vm.total); | |
1877 | ||
13f1bfd3 | 1878 | vma = i915_vma_alloc(); |
e9e7dc41 CW |
1879 | if (!vma) |
1880 | return ERR_PTR(-ENOMEM); | |
1881 | ||
12c255b5 | 1882 | i915_active_init(i915, &vma->active, NULL, NULL); |
e9e7dc41 CW |
1883 | |
1884 | vma->vm = &ggtt->vm; | |
1885 | vma->ops = &pd_vma_ops; | |
1886 | vma->private = ppgtt; | |
1887 | ||
1888 | vma->size = size; | |
1889 | vma->fence_size = size; | |
1890 | vma->flags = I915_VMA_GGTT; | |
1891 | vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ | |
1892 | ||
1893 | INIT_LIST_HEAD(&vma->obj_link); | |
155ab883 | 1894 | INIT_LIST_HEAD(&vma->closed_link); |
09d7e46b CW |
1895 | |
1896 | mutex_lock(&vma->vm->mutex); | |
e9e7dc41 | 1897 | list_add(&vma->vm_link, &vma->vm->unbound_list); |
09d7e46b | 1898 | mutex_unlock(&vma->vm->mutex); |
e9e7dc41 CW |
1899 | |
1900 | return vma; | |
1901 | } | |
1d2a314c | 1902 | |
ab53497b | 1903 | int gen6_ppgtt_pin(struct i915_ppgtt *base) |
e9e7dc41 | 1904 | { |
ab53497b | 1905 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); |
d4de7535 | 1906 | int err; |
e9e7dc41 | 1907 | |
e0695db7 CW |
1908 | GEM_BUG_ON(ppgtt->base.vm.closed); |
1909 | ||
a2bbf714 CW |
1910 | /* |
1911 | * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt | |
1912 | * which will be pinned into every active context. | |
1913 | * (When vma->pin_count becomes atomic, I expect we will naturally | |
1914 | * need a larger, unpacked, type and kill this redundancy.) | |
1915 | */ | |
1916 | if (ppgtt->pin_count++) | |
1917 | return 0; | |
1918 | ||
e9e7dc41 CW |
1919 | /* |
1920 | * PPGTT PDEs reside in the GGTT and consists of 512 entries. The | |
1921 | * allocator works in address space sizes, so it's multiplied by page | |
1922 | * size. We allocate at the top of the GTT to avoid fragmentation. | |
1923 | */ | |
d4de7535 CW |
1924 | err = i915_vma_pin(ppgtt->vma, |
1925 | 0, GEN6_PD_ALIGN, | |
1926 | PIN_GLOBAL | PIN_HIGH); | |
1927 | if (err) | |
1928 | goto unpin; | |
1929 | ||
1930 | return 0; | |
1931 | ||
1932 | unpin: | |
1933 | ppgtt->pin_count = 0; | |
1934 | return err; | |
b146520f BW |
1935 | } |
1936 | ||
ab53497b | 1937 | void gen6_ppgtt_unpin(struct i915_ppgtt *base) |
a2bbf714 | 1938 | { |
ab53497b | 1939 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); |
a2bbf714 CW |
1940 | |
1941 | GEM_BUG_ON(!ppgtt->pin_count); | |
1942 | if (--ppgtt->pin_count) | |
1943 | return; | |
1944 | ||
1945 | i915_vma_unpin(ppgtt->vma); | |
1946 | } | |
1947 | ||
ab53497b | 1948 | void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) |
e0695db7 | 1949 | { |
ab53497b | 1950 | struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); |
e0695db7 CW |
1951 | |
1952 | if (!ppgtt->pin_count) | |
1953 | return; | |
1954 | ||
1955 | ppgtt->pin_count = 0; | |
1956 | i915_vma_unpin(ppgtt->vma); | |
1957 | } | |
1958 | ||
ab53497b | 1959 | static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) |
b146520f | 1960 | { |
17f297b4 | 1961 | struct i915_ggtt * const ggtt = &i915->ggtt; |
ab53497b | 1962 | struct gen6_ppgtt *ppgtt; |
17f297b4 CW |
1963 | int err; |
1964 | ||
1965 | ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); | |
1966 | if (!ppgtt) | |
1967 | return ERR_PTR(-ENOMEM); | |
1968 | ||
763c1e63 | 1969 | ppgtt_init(&ppgtt->base, &i915->gt); |
18c7962b | 1970 | ppgtt->base.vm.top = 1; |
63fd659f | 1971 | |
549fe88b | 1972 | ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; |
35ac40d8 CW |
1973 | ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; |
1974 | ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; | |
1975 | ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; | |
054b9acd | 1976 | |
a9ded785 CW |
1977 | ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; |
1978 | ||
1eda701e | 1979 | ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); |
b5b7bef9 MK |
1980 | if (!ppgtt->base.pd) { |
1981 | err = -ENOMEM; | |
b32fa811 | 1982 | goto err_free; |
b5b7bef9 MK |
1983 | } |
1984 | ||
e9e7dc41 | 1985 | err = gen6_ppgtt_init_scratch(ppgtt); |
a9ded785 | 1986 | if (err) |
b5b7bef9 | 1987 | goto err_pd; |
a9ded785 | 1988 | |
e9e7dc41 CW |
1989 | ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); |
1990 | if (IS_ERR(ppgtt->vma)) { | |
1991 | err = PTR_ERR(ppgtt->vma); | |
a9ded785 | 1992 | goto err_scratch; |
e9e7dc41 | 1993 | } |
a9ded785 | 1994 | |
35ac40d8 | 1995 | return &ppgtt->base; |
3440d265 | 1996 | |
a9ded785 | 1997 | err_scratch: |
6eebfe8a | 1998 | free_scratch(&ppgtt->base.vm); |
b5b7bef9 MK |
1999 | err_pd: |
2000 | kfree(ppgtt->base.pd); | |
17f297b4 CW |
2001 | err_free: |
2002 | kfree(ppgtt); | |
2003 | return ERR_PTR(err); | |
fa76da34 | 2004 | } |
c114f76a | 2005 | |
acb56d97 | 2006 | static void gtt_write_workarounds(struct intel_gt *gt) |
d5165ebd | 2007 | { |
acb56d97 TU |
2008 | struct drm_i915_private *i915 = gt->i915; |
2009 | struct intel_uncore *uncore = gt->uncore; | |
2010 | ||
d5165ebd TG |
2011 | /* This function is for gtt related workarounds. This function is |
2012 | * called on driver load and after a GPU reset, so you can place | |
2013 | * workarounds here even if they get overwritten by GPU reset. | |
2014 | */ | |
cc38cae7 | 2015 | /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ |
acb56d97 TU |
2016 | if (IS_BROADWELL(i915)) |
2017 | intel_uncore_write(uncore, | |
2018 | GEN8_L3_LRA_1_GPGPU, | |
2019 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); | |
2020 | else if (IS_CHERRYVIEW(i915)) | |
2021 | intel_uncore_write(uncore, | |
2022 | GEN8_L3_LRA_1_GPGPU, | |
2023 | GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); | |
2024 | else if (IS_GEN9_LP(i915)) | |
2025 | intel_uncore_write(uncore, | |
2026 | GEN8_L3_LRA_1_GPGPU, | |
2027 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); | |
2028 | else if (INTEL_GEN(i915) >= 9) | |
2029 | intel_uncore_write(uncore, | |
2030 | GEN8_L3_LRA_1_GPGPU, | |
2031 | GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); | |
9a6330cf MA |
2032 | |
2033 | /* | |
2034 | * To support 64K PTEs we need to first enable the use of the | |
2035 | * Intermediate-Page-Size(IPS) bit of the PDE field via some magical | |
2036 | * mmio, otherwise the page-walker will simply ignore the IPS bit. This | |
2037 | * shouldn't be needed after GEN10. | |
2038 | * | |
2039 | * 64K pages were first introduced from BDW+, although technically they | |
2040 | * only *work* from gen9+. For pre-BDW we instead have the option for | |
2041 | * 32K pages, but we don't currently have any support for it in our | |
2042 | * driver. | |
2043 | */ | |
acb56d97 TU |
2044 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && |
2045 | INTEL_GEN(i915) <= 10) | |
28a1f789 TU |
2046 | intel_uncore_rmw(uncore, |
2047 | GEN8_GAMW_ECO_DEV_RW_IA, | |
2048 | 0, | |
2049 | GAMW_ECO_ENABLE_64K_IPS_FIELD); | |
1feb7864 MA |
2050 | |
2051 | if (IS_GEN_RANGE(i915, 8, 11)) { | |
2052 | bool can_use_gtt_cache = true; | |
2053 | ||
2054 | /* | |
2055 | * According to the BSpec if we use 2M/1G pages then we also | |
2056 | * need to disable the GTT cache. At least on BDW we can see | |
2057 | * visual corruption when using 2M pages, and not disabling the | |
2058 | * GTT cache. | |
2059 | */ | |
2060 | if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) | |
2061 | can_use_gtt_cache = false; | |
2062 | ||
2063 | /* WaGttCachingOffByDefault */ | |
2064 | intel_uncore_write(uncore, | |
2065 | HSW_GTT_CACHE_EN, | |
2066 | can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); | |
2067 | WARN_ON_ONCE(can_use_gtt_cache && | |
2068 | intel_uncore_read(uncore, | |
2069 | HSW_GTT_CACHE_EN) == 0); | |
2070 | } | |
d5165ebd TG |
2071 | } |
2072 | ||
acb56d97 | 2073 | int i915_ppgtt_init_hw(struct intel_gt *gt) |
82460d97 | 2074 | { |
acb56d97 TU |
2075 | struct drm_i915_private *i915 = gt->i915; |
2076 | ||
2077 | gtt_write_workarounds(gt); | |
d5165ebd | 2078 | |
acb56d97 TU |
2079 | if (IS_GEN(i915, 6)) |
2080 | gen6_ppgtt_enable(gt); | |
2081 | else if (IS_GEN(i915, 7)) | |
2082 | gen7_ppgtt_enable(gt); | |
82460d97 | 2083 | |
4ad2fd88 JH |
2084 | return 0; |
2085 | } | |
1d2a314c | 2086 | |
ab53497b CW |
2087 | static struct i915_ppgtt * |
2088 | __ppgtt_create(struct drm_i915_private *i915) | |
17f297b4 CW |
2089 | { |
2090 | if (INTEL_GEN(i915) < 8) | |
2091 | return gen6_ppgtt_create(i915); | |
2092 | else | |
2093 | return gen8_ppgtt_create(i915); | |
2094 | } | |
2095 | ||
ab53497b | 2096 | struct i915_ppgtt * |
3aa9945a | 2097 | i915_ppgtt_create(struct drm_i915_private *i915) |
4d884705 | 2098 | { |
ab53497b | 2099 | struct i915_ppgtt *ppgtt; |
4d884705 | 2100 | |
ab53497b | 2101 | ppgtt = __ppgtt_create(i915); |
17f297b4 CW |
2102 | if (IS_ERR(ppgtt)) |
2103 | return ppgtt; | |
4d884705 | 2104 | |
82ad6443 | 2105 | trace_i915_ppgtt_create(&ppgtt->vm); |
198c974d | 2106 | |
4d884705 DV |
2107 | return ppgtt; |
2108 | } | |
2109 | ||
a81cc00c BW |
2110 | /* Certain Gen5 chipsets require require idling the GPU before |
2111 | * unmapping anything from the GTT when VT-d is enabled. | |
2112 | */ | |
97d6d7ab | 2113 | static bool needs_idle_maps(struct drm_i915_private *dev_priv) |
a81cc00c | 2114 | { |
a81cc00c BW |
2115 | /* Query intel_iommu to see if we need the workaround. Presumably that |
2116 | * was loaded first. | |
2117 | */ | |
cf819eff | 2118 | return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active(); |
a81cc00c BW |
2119 | } |
2120 | ||
68c754b8 | 2121 | static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) |
828c7908 | 2122 | { |
68c754b8 | 2123 | struct drm_i915_private *i915 = ggtt->vm.i915; |
828c7908 BW |
2124 | |
2125 | /* Don't bother messing with faults pre GEN6 as we have little | |
2126 | * documentation supporting that it's a good idea. | |
2127 | */ | |
68c754b8 | 2128 | if (INTEL_GEN(i915) < 6) |
828c7908 BW |
2129 | return; |
2130 | ||
68c754b8 | 2131 | intel_gt_check_and_clear_faults(ggtt->vm.gt); |
828c7908 | 2132 | |
82ad6443 | 2133 | ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); |
91e56499 | 2134 | |
759e4a74 | 2135 | ggtt->invalidate(ggtt); |
828c7908 BW |
2136 | } |
2137 | ||
68c754b8 TU |
2138 | void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915) |
2139 | { | |
2140 | ggtt_suspend_mappings(&i915->ggtt); | |
2141 | } | |
2142 | ||
03ac84f1 CW |
2143 | int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, |
2144 | struct sg_table *pages) | |
7c2e6fdf | 2145 | { |
1a292fa5 | 2146 | do { |
82e07602 CW |
2147 | if (dma_map_sg_attrs(&obj->base.dev->pdev->dev, |
2148 | pages->sgl, pages->nents, | |
2149 | PCI_DMA_BIDIRECTIONAL, | |
2150 | DMA_ATTR_NO_WARN)) | |
1a292fa5 CW |
2151 | return 0; |
2152 | ||
d25f71a1 CW |
2153 | /* |
2154 | * If the DMA remap fails, one cause can be that we have | |
1a292fa5 CW |
2155 | * too many objects pinned in a small remapping table, |
2156 | * such as swiotlb. Incrementally purge all other objects and | |
2157 | * try again - if there are no more pages to remove from | |
2158 | * the DMA remapper, i915_gem_shrink will return 0. | |
2159 | */ | |
2160 | GEM_BUG_ON(obj->mm.pages == pages); | |
2161 | } while (i915_gem_shrink(to_i915(obj->base.dev), | |
912d572d | 2162 | obj->base.size >> PAGE_SHIFT, NULL, |
1a292fa5 | 2163 | I915_SHRINK_BOUND | |
d25f71a1 | 2164 | I915_SHRINK_UNBOUND)); |
9da3da66 | 2165 | |
03ac84f1 | 2166 | return -ENOSPC; |
7c2e6fdf DV |
2167 | } |
2168 | ||
2c642b07 | 2169 | static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) |
94ec8f61 | 2170 | { |
94ec8f61 | 2171 | writeq(pte, addr); |
94ec8f61 BW |
2172 | } |
2173 | ||
d6473f56 CW |
2174 | static void gen8_ggtt_insert_page(struct i915_address_space *vm, |
2175 | dma_addr_t addr, | |
75c7b0b8 | 2176 | u64 offset, |
d6473f56 CW |
2177 | enum i915_cache_level level, |
2178 | u32 unused) | |
2179 | { | |
7c3f86b6 | 2180 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
d6473f56 | 2181 | gen8_pte_t __iomem *pte = |
21c62a9d | 2182 | (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; |
d6473f56 | 2183 | |
25dda4da | 2184 | gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); |
d6473f56 | 2185 | |
759e4a74 | 2186 | ggtt->invalidate(ggtt); |
d6473f56 CW |
2187 | } |
2188 | ||
94ec8f61 | 2189 | static void gen8_ggtt_insert_entries(struct i915_address_space *vm, |
4a234c5f | 2190 | struct i915_vma *vma, |
75c7b0b8 | 2191 | enum i915_cache_level level, |
250f8c81 | 2192 | u32 flags) |
94ec8f61 | 2193 | { |
ce7fda2e | 2194 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
85d1225e DG |
2195 | struct sgt_iter sgt_iter; |
2196 | gen8_pte_t __iomem *gtt_entries; | |
25dda4da | 2197 | const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); |
85d1225e | 2198 | dma_addr_t addr; |
be69459a | 2199 | |
3e977ac6 CW |
2200 | /* |
2201 | * Note that we ignore PTE_READ_ONLY here. The caller must be careful | |
2202 | * not to allow the user to override access to a read only page. | |
2203 | */ | |
250f8c81 | 2204 | |
894ccebe | 2205 | gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; |
21c62a9d | 2206 | gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE; |
4a234c5f | 2207 | for_each_sgt_dma(addr, sgt_iter, vma->pages) |
894ccebe | 2208 | gen8_set_pte(gtt_entries++, pte_encode | addr); |
85d1225e | 2209 | |
ca6acc25 MK |
2210 | /* |
2211 | * We want to flush the TLBs only after we're certain all the PTE | |
2212 | * updates have finished. | |
94ec8f61 | 2213 | */ |
759e4a74 | 2214 | ggtt->invalidate(ggtt); |
94ec8f61 BW |
2215 | } |
2216 | ||
d6473f56 CW |
2217 | static void gen6_ggtt_insert_page(struct i915_address_space *vm, |
2218 | dma_addr_t addr, | |
75c7b0b8 | 2219 | u64 offset, |
d6473f56 CW |
2220 | enum i915_cache_level level, |
2221 | u32 flags) | |
2222 | { | |
7c3f86b6 | 2223 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
d6473f56 | 2224 | gen6_pte_t __iomem *pte = |
21c62a9d | 2225 | (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; |
d6473f56 | 2226 | |
4fb84d99 | 2227 | iowrite32(vm->pte_encode(addr, level, flags), pte); |
d6473f56 | 2228 | |
759e4a74 | 2229 | ggtt->invalidate(ggtt); |
d6473f56 CW |
2230 | } |
2231 | ||
e76e9aeb BW |
2232 | /* |
2233 | * Binds an object into the global gtt with the specified cache level. The object | |
2234 | * will be accessible to the GPU via commands whose operands reference offsets | |
2235 | * within the global GTT as well as accessible by the GPU through the GMADR | |
2236 | * mapped BAR (dev_priv->mm.gtt->gtt). | |
2237 | */ | |
853ba5d2 | 2238 | static void gen6_ggtt_insert_entries(struct i915_address_space *vm, |
4a234c5f | 2239 | struct i915_vma *vma, |
75c7b0b8 CW |
2240 | enum i915_cache_level level, |
2241 | u32 flags) | |
e76e9aeb | 2242 | { |
ce7fda2e | 2243 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
b31144c0 | 2244 | gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; |
21c62a9d | 2245 | unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE; |
b31144c0 | 2246 | struct sgt_iter iter; |
85d1225e | 2247 | dma_addr_t addr; |
4a234c5f | 2248 | for_each_sgt_dma(addr, iter, vma->pages) |
b31144c0 | 2249 | iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); |
0f9b91c7 | 2250 | |
ca6acc25 MK |
2251 | /* |
2252 | * We want to flush the TLBs only after we're certain all the PTE | |
2253 | * updates have finished. | |
0f9b91c7 | 2254 | */ |
759e4a74 | 2255 | ggtt->invalidate(ggtt); |
e76e9aeb BW |
2256 | } |
2257 | ||
f7770bfd | 2258 | static void nop_clear_range(struct i915_address_space *vm, |
75c7b0b8 | 2259 | u64 start, u64 length) |
f7770bfd CW |
2260 | { |
2261 | } | |
2262 | ||
94ec8f61 | 2263 | static void gen8_ggtt_clear_range(struct i915_address_space *vm, |
75c7b0b8 | 2264 | u64 start, u64 length) |
94ec8f61 | 2265 | { |
ce7fda2e | 2266 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
21c62a9d VS |
2267 | unsigned first_entry = start / I915_GTT_PAGE_SIZE; |
2268 | unsigned num_entries = length / I915_GTT_PAGE_SIZE; | |
c03cbe4c | 2269 | const gen8_pte_t scratch_pte = vm->scratch[0].encode; |
894ccebe | 2270 | gen8_pte_t __iomem *gtt_base = |
72e96d64 JL |
2271 | (gen8_pte_t __iomem *)ggtt->gsm + first_entry; |
2272 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; | |
94ec8f61 BW |
2273 | int i; |
2274 | ||
2275 | if (WARN(num_entries > max_entries, | |
2276 | "First entry = %d; Num entries = %d (max=%d)\n", | |
2277 | first_entry, num_entries, max_entries)) | |
2278 | num_entries = max_entries; | |
2279 | ||
94ec8f61 BW |
2280 | for (i = 0; i < num_entries; i++) |
2281 | gen8_set_pte(>t_base[i], scratch_pte); | |
94ec8f61 BW |
2282 | } |
2283 | ||
0ef34ad6 JB |
2284 | static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) |
2285 | { | |
2286 | struct drm_i915_private *dev_priv = vm->i915; | |
2287 | ||
2288 | /* | |
2289 | * Make sure the internal GAM fifo has been cleared of all GTT | |
2290 | * writes before exiting stop_machine(). This guarantees that | |
2291 | * any aperture accesses waiting to start in another process | |
2292 | * cannot back up behind the GTT writes causing a hang. | |
2293 | * The register can be any arbitrary GAM register. | |
2294 | */ | |
2295 | POSTING_READ(GFX_FLSH_CNTL_GEN6); | |
2296 | } | |
2297 | ||
2298 | struct insert_page { | |
2299 | struct i915_address_space *vm; | |
2300 | dma_addr_t addr; | |
2301 | u64 offset; | |
2302 | enum i915_cache_level level; | |
2303 | }; | |
2304 | ||
2305 | static int bxt_vtd_ggtt_insert_page__cb(void *_arg) | |
2306 | { | |
2307 | struct insert_page *arg = _arg; | |
2308 | ||
2309 | gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); | |
2310 | bxt_vtd_ggtt_wa(arg->vm); | |
2311 | ||
2312 | return 0; | |
2313 | } | |
2314 | ||
2315 | static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, | |
2316 | dma_addr_t addr, | |
2317 | u64 offset, | |
2318 | enum i915_cache_level level, | |
2319 | u32 unused) | |
2320 | { | |
2321 | struct insert_page arg = { vm, addr, offset, level }; | |
2322 | ||
2323 | stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); | |
2324 | } | |
2325 | ||
2326 | struct insert_entries { | |
2327 | struct i915_address_space *vm; | |
4a234c5f | 2328 | struct i915_vma *vma; |
0ef34ad6 | 2329 | enum i915_cache_level level; |
250f8c81 | 2330 | u32 flags; |
0ef34ad6 JB |
2331 | }; |
2332 | ||
2333 | static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) | |
2334 | { | |
2335 | struct insert_entries *arg = _arg; | |
2336 | ||
250f8c81 | 2337 | gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); |
0ef34ad6 JB |
2338 | bxt_vtd_ggtt_wa(arg->vm); |
2339 | ||
2340 | return 0; | |
2341 | } | |
2342 | ||
2343 | static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, | |
4a234c5f | 2344 | struct i915_vma *vma, |
0ef34ad6 | 2345 | enum i915_cache_level level, |
250f8c81 | 2346 | u32 flags) |
0ef34ad6 | 2347 | { |
250f8c81 | 2348 | struct insert_entries arg = { vm, vma, level, flags }; |
0ef34ad6 JB |
2349 | |
2350 | stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); | |
2351 | } | |
2352 | ||
2353 | struct clear_range { | |
2354 | struct i915_address_space *vm; | |
2355 | u64 start; | |
2356 | u64 length; | |
2357 | }; | |
2358 | ||
2359 | static int bxt_vtd_ggtt_clear_range__cb(void *_arg) | |
2360 | { | |
2361 | struct clear_range *arg = _arg; | |
2362 | ||
2363 | gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); | |
2364 | bxt_vtd_ggtt_wa(arg->vm); | |
2365 | ||
2366 | return 0; | |
2367 | } | |
2368 | ||
2369 | static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, | |
2370 | u64 start, | |
2371 | u64 length) | |
2372 | { | |
2373 | struct clear_range arg = { vm, start, length }; | |
2374 | ||
2375 | stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); | |
2376 | } | |
2377 | ||
853ba5d2 | 2378 | static void gen6_ggtt_clear_range(struct i915_address_space *vm, |
75c7b0b8 | 2379 | u64 start, u64 length) |
7faf1ab2 | 2380 | { |
ce7fda2e | 2381 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
21c62a9d VS |
2382 | unsigned first_entry = start / I915_GTT_PAGE_SIZE; |
2383 | unsigned num_entries = length / I915_GTT_PAGE_SIZE; | |
07749ef3 | 2384 | gen6_pte_t scratch_pte, __iomem *gtt_base = |
72e96d64 JL |
2385 | (gen6_pte_t __iomem *)ggtt->gsm + first_entry; |
2386 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; | |
7faf1ab2 DV |
2387 | int i; |
2388 | ||
2389 | if (WARN(num_entries > max_entries, | |
2390 | "First entry = %d; Num entries = %d (max=%d)\n", | |
2391 | first_entry, num_entries, max_entries)) | |
2392 | num_entries = max_entries; | |
2393 | ||
c03cbe4c | 2394 | scratch_pte = vm->scratch[0].encode; |
7faf1ab2 DV |
2395 | for (i = 0; i < num_entries; i++) |
2396 | iowrite32(scratch_pte, >t_base[i]); | |
7faf1ab2 DV |
2397 | } |
2398 | ||
d6473f56 CW |
2399 | static void i915_ggtt_insert_page(struct i915_address_space *vm, |
2400 | dma_addr_t addr, | |
75c7b0b8 | 2401 | u64 offset, |
d6473f56 CW |
2402 | enum i915_cache_level cache_level, |
2403 | u32 unused) | |
2404 | { | |
d6473f56 CW |
2405 | unsigned int flags = (cache_level == I915_CACHE_NONE) ? |
2406 | AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; | |
d6473f56 CW |
2407 | |
2408 | intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); | |
d6473f56 CW |
2409 | } |
2410 | ||
d369d2d9 | 2411 | static void i915_ggtt_insert_entries(struct i915_address_space *vm, |
4a234c5f | 2412 | struct i915_vma *vma, |
75c7b0b8 CW |
2413 | enum i915_cache_level cache_level, |
2414 | u32 unused) | |
7faf1ab2 DV |
2415 | { |
2416 | unsigned int flags = (cache_level == I915_CACHE_NONE) ? | |
2417 | AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; | |
2418 | ||
4a234c5f MA |
2419 | intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, |
2420 | flags); | |
7faf1ab2 DV |
2421 | } |
2422 | ||
853ba5d2 | 2423 | static void i915_ggtt_clear_range(struct i915_address_space *vm, |
75c7b0b8 | 2424 | u64 start, u64 length) |
7faf1ab2 | 2425 | { |
2eedfc7d | 2426 | intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); |
7faf1ab2 DV |
2427 | } |
2428 | ||
70b9f6f8 DV |
2429 | static int ggtt_bind_vma(struct i915_vma *vma, |
2430 | enum i915_cache_level cache_level, | |
2431 | u32 flags) | |
0a878716 | 2432 | { |
49d73912 | 2433 | struct drm_i915_private *i915 = vma->vm->i915; |
0a878716 | 2434 | struct drm_i915_gem_object *obj = vma->obj; |
538ef96b | 2435 | intel_wakeref_t wakeref; |
ba7a5741 | 2436 | u32 pte_flags; |
0a878716 | 2437 | |
250f8c81 | 2438 | /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ |
ba7a5741 | 2439 | pte_flags = 0; |
3e977ac6 | 2440 | if (i915_gem_object_is_readonly(obj)) |
0a878716 DV |
2441 | pte_flags |= PTE_READ_ONLY; |
2442 | ||
c447ff7d | 2443 | with_intel_runtime_pm(&i915->runtime_pm, wakeref) |
d4225a53 | 2444 | vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); |
0a878716 | 2445 | |
d9ec12f8 MA |
2446 | vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; |
2447 | ||
0a878716 DV |
2448 | /* |
2449 | * Without aliasing PPGTT there's no difference between | |
2450 | * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally | |
2451 | * upgrade to both bound if we bind either to avoid double-binding. | |
2452 | */ | |
3272db53 | 2453 | vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; |
0a878716 DV |
2454 | |
2455 | return 0; | |
2456 | } | |
2457 | ||
cbc4e9e6 CW |
2458 | static void ggtt_unbind_vma(struct i915_vma *vma) |
2459 | { | |
2460 | struct drm_i915_private *i915 = vma->vm->i915; | |
538ef96b | 2461 | intel_wakeref_t wakeref; |
cbc4e9e6 | 2462 | |
c447ff7d | 2463 | with_intel_runtime_pm(&i915->runtime_pm, wakeref) |
d4225a53 | 2464 | vma->vm->clear_range(vma->vm, vma->node.start, vma->size); |
cbc4e9e6 CW |
2465 | } |
2466 | ||
0a878716 DV |
2467 | static int aliasing_gtt_bind_vma(struct i915_vma *vma, |
2468 | enum i915_cache_level cache_level, | |
2469 | u32 flags) | |
d5bd1449 | 2470 | { |
49d73912 | 2471 | struct drm_i915_private *i915 = vma->vm->i915; |
321d178e | 2472 | u32 pte_flags; |
ff685975 | 2473 | int ret; |
70b9f6f8 | 2474 | |
24f3a8cf | 2475 | /* Currently applicable only to VLV */ |
321d178e | 2476 | pte_flags = 0; |
3e977ac6 | 2477 | if (i915_gem_object_is_readonly(vma->obj)) |
f329f5f6 | 2478 | pte_flags |= PTE_READ_ONLY; |
24f3a8cf | 2479 | |
ff685975 | 2480 | if (flags & I915_VMA_LOCAL_BIND) { |
c082afac | 2481 | struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; |
ff685975 | 2482 | |
549fe88b | 2483 | if (!(vma->flags & I915_VMA_LOCAL_BIND)) { |
c082afac CW |
2484 | ret = alias->vm.allocate_va_range(&alias->vm, |
2485 | vma->node.start, | |
2486 | vma->size); | |
ff685975 | 2487 | if (ret) |
fa3f46af | 2488 | return ret; |
ff685975 CW |
2489 | } |
2490 | ||
c082afac CW |
2491 | alias->vm.insert_entries(&alias->vm, vma, |
2492 | cache_level, pte_flags); | |
ff685975 CW |
2493 | } |
2494 | ||
3272db53 | 2495 | if (flags & I915_VMA_GLOBAL_BIND) { |
538ef96b CW |
2496 | intel_wakeref_t wakeref; |
2497 | ||
c447ff7d | 2498 | with_intel_runtime_pm(&i915->runtime_pm, wakeref) { |
d4225a53 CW |
2499 | vma->vm->insert_entries(vma->vm, vma, |
2500 | cache_level, pte_flags); | |
2501 | } | |
6f65e29a | 2502 | } |
d5bd1449 | 2503 | |
70b9f6f8 | 2504 | return 0; |
d5bd1449 CW |
2505 | } |
2506 | ||
cbc4e9e6 | 2507 | static void aliasing_gtt_unbind_vma(struct i915_vma *vma) |
74163907 | 2508 | { |
49d73912 | 2509 | struct drm_i915_private *i915 = vma->vm->i915; |
6f65e29a | 2510 | |
9c870d03 | 2511 | if (vma->flags & I915_VMA_GLOBAL_BIND) { |
d4225a53 | 2512 | struct i915_address_space *vm = vma->vm; |
538ef96b CW |
2513 | intel_wakeref_t wakeref; |
2514 | ||
c447ff7d | 2515 | with_intel_runtime_pm(&i915->runtime_pm, wakeref) |
d4225a53 | 2516 | vm->clear_range(vm, vma->node.start, vma->size); |
9c870d03 | 2517 | } |
06615ee5 | 2518 | |
cbc4e9e6 | 2519 | if (vma->flags & I915_VMA_LOCAL_BIND) { |
c082afac CW |
2520 | struct i915_address_space *vm = |
2521 | &i915_vm_to_ggtt(vma->vm)->alias->vm; | |
cbc4e9e6 CW |
2522 | |
2523 | vm->clear_range(vm, vma->node.start, vma->size); | |
2524 | } | |
74163907 DV |
2525 | } |
2526 | ||
03ac84f1 CW |
2527 | void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, |
2528 | struct sg_table *pages) | |
7c2e6fdf | 2529 | { |
52a05c30 DW |
2530 | struct drm_i915_private *dev_priv = to_i915(obj->base.dev); |
2531 | struct device *kdev = &dev_priv->drm.pdev->dev; | |
307dc25b | 2532 | struct i915_ggtt *ggtt = &dev_priv->ggtt; |
5c042287 | 2533 | |
307dc25b | 2534 | if (unlikely(ggtt->do_idle_maps)) { |
ec625fb9 | 2535 | if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) { |
307dc25b CW |
2536 | DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); |
2537 | /* Wait a bit, in hopes it avoids the hang */ | |
2538 | udelay(10); | |
2539 | } | |
2540 | } | |
5c042287 | 2541 | |
03ac84f1 | 2542 | dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); |
7c2e6fdf | 2543 | } |
644ec02b | 2544 | |
fa3f46af MA |
2545 | static int ggtt_set_pages(struct i915_vma *vma) |
2546 | { | |
2547 | int ret; | |
2548 | ||
2549 | GEM_BUG_ON(vma->pages); | |
2550 | ||
2551 | ret = i915_get_ggtt_vma_pages(vma); | |
2552 | if (ret) | |
2553 | return ret; | |
2554 | ||
7464284b MA |
2555 | vma->page_sizes = vma->obj->mm.page_sizes; |
2556 | ||
fa3f46af MA |
2557 | return 0; |
2558 | } | |
2559 | ||
45b186f1 | 2560 | static void i915_gtt_color_adjust(const struct drm_mm_node *node, |
42d6ab48 | 2561 | unsigned long color, |
440fd528 TR |
2562 | u64 *start, |
2563 | u64 *end) | |
42d6ab48 | 2564 | { |
a6508ded | 2565 | if (node->allocated && node->color != color) |
f51455d4 | 2566 | *start += I915_GTT_PAGE_SIZE; |
42d6ab48 | 2567 | |
a6508ded CW |
2568 | /* Also leave a space between the unallocated reserved node after the |
2569 | * GTT and any objects within the GTT, i.e. we use the color adjustment | |
2570 | * to insert a guard page to prevent prefetches crossing over the | |
2571 | * GTT boundary. | |
2572 | */ | |
b44f97fd | 2573 | node = list_next_entry(node, node_list); |
a6508ded | 2574 | if (node->color != color) |
f51455d4 | 2575 | *end -= I915_GTT_PAGE_SIZE; |
42d6ab48 | 2576 | } |
fbe5d36e | 2577 | |
c082afac | 2578 | static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) |
6cde9a02 | 2579 | { |
ab53497b | 2580 | struct i915_ppgtt *ppgtt; |
6cde9a02 CW |
2581 | int err; |
2582 | ||
c082afac | 2583 | ppgtt = i915_ppgtt_create(ggtt->vm.i915); |
1188bc66 CW |
2584 | if (IS_ERR(ppgtt)) |
2585 | return PTR_ERR(ppgtt); | |
6cde9a02 | 2586 | |
a0fbacb5 | 2587 | if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { |
e565ceb0 CW |
2588 | err = -ENODEV; |
2589 | goto err_ppgtt; | |
2590 | } | |
2591 | ||
549fe88b CW |
2592 | /* |
2593 | * Note we only pre-allocate as far as the end of the global | |
2594 | * GTT. On 48b / 4-level page-tables, the difference is very, | |
2595 | * very significant! We have to preallocate as GVT/vgpu does | |
2596 | * not like the page directory disappearing. | |
2597 | */ | |
2598 | err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); | |
2599 | if (err) | |
2600 | goto err_ppgtt; | |
6cde9a02 | 2601 | |
c082afac | 2602 | ggtt->alias = ppgtt; |
cbc4e9e6 | 2603 | |
93f2cde2 CW |
2604 | GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); |
2605 | ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; | |
6cde9a02 | 2606 | |
93f2cde2 CW |
2607 | GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); |
2608 | ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; | |
cbc4e9e6 | 2609 | |
6cde9a02 CW |
2610 | return 0; |
2611 | ||
6cde9a02 | 2612 | err_ppgtt: |
e568ac38 | 2613 | i915_vm_put(&ppgtt->vm); |
6cde9a02 CW |
2614 | return err; |
2615 | } | |
2616 | ||
c082afac | 2617 | static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) |
6cde9a02 | 2618 | { |
c082afac | 2619 | struct drm_i915_private *i915 = ggtt->vm.i915; |
ab53497b | 2620 | struct i915_ppgtt *ppgtt; |
6cde9a02 | 2621 | |
3b896628 TU |
2622 | mutex_lock(&i915->drm.struct_mutex); |
2623 | ||
c082afac | 2624 | ppgtt = fetch_and_zero(&ggtt->alias); |
6cde9a02 | 2625 | if (!ppgtt) |
3b896628 | 2626 | goto out; |
6cde9a02 | 2627 | |
e568ac38 | 2628 | i915_vm_put(&ppgtt->vm); |
6cde9a02 | 2629 | |
93f2cde2 CW |
2630 | ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; |
2631 | ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; | |
3b896628 TU |
2632 | |
2633 | out: | |
2634 | mutex_unlock(&i915->drm.struct_mutex); | |
6cde9a02 CW |
2635 | } |
2636 | ||
09a32cb7 TU |
2637 | static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) |
2638 | { | |
2639 | u64 size; | |
2640 | int ret; | |
2641 | ||
2642 | if (!USES_GUC(ggtt->vm.i915)) | |
2643 | return 0; | |
2644 | ||
2645 | GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); | |
2646 | size = ggtt->vm.total - GUC_GGTT_TOP; | |
2647 | ||
2648 | ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, | |
2649 | GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, | |
2650 | PIN_NOEVICT); | |
2651 | if (ret) | |
2652 | DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n"); | |
2653 | ||
2654 | return ret; | |
2655 | } | |
2656 | ||
2657 | static void ggtt_release_guc_top(struct i915_ggtt *ggtt) | |
2658 | { | |
2659 | if (drm_mm_node_allocated(&ggtt->uc_fw)) | |
2660 | drm_mm_remove_node(&ggtt->uc_fw); | |
2661 | } | |
2662 | ||
1d66377a TU |
2663 | static void cleanup_init_ggtt(struct i915_ggtt *ggtt) |
2664 | { | |
2665 | ggtt_release_guc_top(ggtt); | |
2666 | drm_mm_remove_node(&ggtt->error_capture); | |
2667 | } | |
2668 | ||
2669 | static int init_ggtt(struct i915_ggtt *ggtt) | |
644ec02b | 2670 | { |
e78891ca BW |
2671 | /* Let GEM Manage all of the aperture. |
2672 | * | |
2673 | * However, leave one page at the end still bound to the scratch page. | |
2674 | * There are a number of places where the hardware apparently prefetches | |
2675 | * past the end of the object, and we've seen multiple hangs with the | |
2676 | * GPU head pointer stuck in a batchbuffer bound at the last page of the | |
2677 | * aperture. One page should be enough to keep any prefetching inside | |
2678 | * of the aperture. | |
2679 | */ | |
ed2f3452 | 2680 | unsigned long hole_start, hole_end; |
f6b9d5ca | 2681 | struct drm_mm_node *entry; |
fa76da34 | 2682 | int ret; |
644ec02b | 2683 | |
dd18cedf JB |
2684 | /* |
2685 | * GuC requires all resources that we're sharing with it to be placed in | |
2686 | * non-WOPCM memory. If GuC is not present or not in use we still need a | |
2687 | * small bias as ring wraparound at offset 0 sometimes hangs. No idea | |
2688 | * why. | |
2689 | */ | |
2690 | ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, | |
1d66377a | 2691 | intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); |
dd18cedf | 2692 | |
3cb4ce00 | 2693 | ret = intel_vgt_balloon(ggtt); |
b02d22a3 ZW |
2694 | if (ret) |
2695 | return ret; | |
5dda8fa3 | 2696 | |
95374d75 | 2697 | /* Reserve a mappable slot for our lockless error capture */ |
82ad6443 | 2698 | ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, |
4e64e553 CW |
2699 | PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, |
2700 | 0, ggtt->mappable_end, | |
2701 | DRM_MM_INSERT_LOW); | |
95374d75 CW |
2702 | if (ret) |
2703 | return ret; | |
2704 | ||
09a32cb7 TU |
2705 | /* |
2706 | * The upper portion of the GuC address space has a sizeable hole | |
2707 | * (several MB) that is inaccessible by GuC. Reserve this range within | |
2708 | * GGTT as it can comfortably hold GuC/HuC firmware images. | |
2709 | */ | |
2710 | ret = ggtt_reserve_guc_top(ggtt); | |
2711 | if (ret) | |
1d66377a | 2712 | goto err; |
91180076 | 2713 | |
ed2f3452 | 2714 | /* Clear any non-preallocated blocks */ |
82ad6443 | 2715 | drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { |
ed2f3452 CW |
2716 | DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", |
2717 | hole_start, hole_end); | |
82ad6443 CW |
2718 | ggtt->vm.clear_range(&ggtt->vm, hole_start, |
2719 | hole_end - hole_start); | |
ed2f3452 CW |
2720 | } |
2721 | ||
2722 | /* And finally clear the reserved guard page */ | |
82ad6443 | 2723 | ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); |
6c5566a8 | 2724 | |
1d66377a TU |
2725 | return 0; |
2726 | ||
2727 | err: | |
2728 | cleanup_init_ggtt(ggtt); | |
2729 | return ret; | |
2730 | } | |
2731 | ||
2732 | int i915_init_ggtt(struct drm_i915_private *i915) | |
2733 | { | |
2734 | int ret; | |
2735 | ||
2736 | ret = init_ggtt(&i915->ggtt); | |
2737 | if (ret) | |
2738 | return ret; | |
2739 | ||
2740 | if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { | |
c082afac | 2741 | ret = init_aliasing_ppgtt(&i915->ggtt); |
95374d75 | 2742 | if (ret) |
1d66377a | 2743 | cleanup_init_ggtt(&i915->ggtt); |
fa76da34 DV |
2744 | } |
2745 | ||
6c5566a8 | 2746 | return 0; |
e76e9aeb BW |
2747 | } |
2748 | ||
3b896628 | 2749 | static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) |
90d0a0e8 | 2750 | { |
3b896628 | 2751 | struct drm_i915_private *i915 = ggtt->vm.i915; |
94d4a2a9 CW |
2752 | struct i915_vma *vma, *vn; |
2753 | ||
82ad6443 | 2754 | ggtt->vm.closed = true; |
94d4a2a9 | 2755 | |
60a4233a CW |
2756 | rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ |
2757 | flush_workqueue(i915->wq); | |
2758 | ||
3b896628 | 2759 | mutex_lock(&i915->drm.struct_mutex); |
eed28903 | 2760 | |
499197dc | 2761 | list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) |
94d4a2a9 | 2762 | WARN_ON(i915_vma_unbind(vma)); |
1188bc66 | 2763 | |
95374d75 CW |
2764 | if (drm_mm_node_allocated(&ggtt->error_capture)) |
2765 | drm_mm_remove_node(&ggtt->error_capture); | |
2766 | ||
09a32cb7 | 2767 | ggtt_release_guc_top(ggtt); |
91180076 | 2768 | |
82ad6443 | 2769 | if (drm_mm_initialized(&ggtt->vm.mm)) { |
3cb4ce00 | 2770 | intel_vgt_deballoon(ggtt); |
82ad6443 | 2771 | i915_address_space_fini(&ggtt->vm); |
90d0a0e8 DV |
2772 | } |
2773 | ||
82ad6443 | 2774 | ggtt->vm.cleanup(&ggtt->vm); |
66df1014 | 2775 | |
3b896628 TU |
2776 | mutex_unlock(&i915->drm.struct_mutex); |
2777 | ||
2778 | arch_phys_wc_del(ggtt->mtrr); | |
2779 | io_mapping_fini(&ggtt->iomap); | |
2780 | } | |
2781 | ||
2782 | /** | |
3b58a945 | 2783 | * i915_ggtt_driver_release - Clean up GGTT hardware initialization |
80fc1c19 | 2784 | * @i915: i915 device |
3b896628 | 2785 | */ |
3b58a945 | 2786 | void i915_ggtt_driver_release(struct drm_i915_private *i915) |
3b896628 TU |
2787 | { |
2788 | struct pagevec *pvec; | |
2789 | ||
c082afac | 2790 | fini_aliasing_ppgtt(&i915->ggtt); |
3b896628 TU |
2791 | |
2792 | ggtt_cleanup_hw(&i915->ggtt); | |
2793 | ||
2794 | pvec = &i915->mm.wc_stash.pvec; | |
66df1014 CW |
2795 | if (pvec->nr) { |
2796 | set_pages_array_wb(pvec->pages, pvec->nr); | |
2797 | __pagevec_release(pvec); | |
2798 | } | |
2799 | ||
3b896628 | 2800 | i915_gem_cleanup_stolen(i915); |
90d0a0e8 | 2801 | } |
70e32544 | 2802 | |
2c642b07 | 2803 | static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) |
e76e9aeb BW |
2804 | { |
2805 | snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; | |
2806 | snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; | |
2807 | return snb_gmch_ctl << 20; | |
2808 | } | |
2809 | ||
2c642b07 | 2810 | static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) |
9459d252 BW |
2811 | { |
2812 | bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; | |
2813 | bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; | |
2814 | if (bdw_gmch_ctl) | |
2815 | bdw_gmch_ctl = 1 << bdw_gmch_ctl; | |
562d55d9 BW |
2816 | |
2817 | #ifdef CONFIG_X86_32 | |
f6e35cda | 2818 | /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ |
562d55d9 BW |
2819 | if (bdw_gmch_ctl > 4) |
2820 | bdw_gmch_ctl = 4; | |
2821 | #endif | |
2822 | ||
9459d252 BW |
2823 | return bdw_gmch_ctl << 20; |
2824 | } | |
2825 | ||
2c642b07 | 2826 | static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) |
d7f25f23 DL |
2827 | { |
2828 | gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; | |
2829 | gmch_ctrl &= SNB_GMCH_GGMS_MASK; | |
2830 | ||
2831 | if (gmch_ctrl) | |
2832 | return 1 << (20 + gmch_ctrl); | |
2833 | ||
2834 | return 0; | |
2835 | } | |
2836 | ||
34c998b4 | 2837 | static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) |
63340133 | 2838 | { |
82ad6443 | 2839 | struct drm_i915_private *dev_priv = ggtt->vm.i915; |
49d73912 | 2840 | struct pci_dev *pdev = dev_priv->drm.pdev; |
34c998b4 | 2841 | phys_addr_t phys_addr; |
8bcdd0f7 | 2842 | int ret; |
63340133 BW |
2843 | |
2844 | /* For Modern GENs the PTEs and register space are split in the BAR */ | |
34c998b4 | 2845 | phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; |
63340133 | 2846 | |
2a073f89 | 2847 | /* |
385db982 RV |
2848 | * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range |
2849 | * will be dropped. For WC mappings in general we have 64 byte burst | |
2850 | * writes when the WC buffer is flushed, so we can't use it, but have to | |
2a073f89 ID |
2851 | * resort to an uncached mapping. The WC issue is easily caught by the |
2852 | * readback check when writing GTT PTE entries. | |
2853 | */ | |
385db982 | 2854 | if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) |
34c998b4 | 2855 | ggtt->gsm = ioremap_nocache(phys_addr, size); |
2a073f89 | 2856 | else |
34c998b4 | 2857 | ggtt->gsm = ioremap_wc(phys_addr, size); |
72e96d64 | 2858 | if (!ggtt->gsm) { |
34c998b4 | 2859 | DRM_ERROR("Failed to map the ggtt page table\n"); |
63340133 BW |
2860 | return -ENOMEM; |
2861 | } | |
2862 | ||
82ad6443 | 2863 | ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); |
8bcdd0f7 | 2864 | if (ret) { |
63340133 BW |
2865 | DRM_ERROR("Scratch setup failed\n"); |
2866 | /* iounmap will also get called at remove, but meh */ | |
72e96d64 | 2867 | iounmap(ggtt->gsm); |
8bcdd0f7 | 2868 | return ret; |
63340133 BW |
2869 | } |
2870 | ||
c03cbe4c CW |
2871 | ggtt->vm.scratch[0].encode = |
2872 | ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), | |
daf3dc0f CW |
2873 | I915_CACHE_NONE, 0); |
2874 | ||
4ad2af1e | 2875 | return 0; |
63340133 BW |
2876 | } |
2877 | ||
b41e63d8 MT |
2878 | static void tgl_setup_private_ppat(struct drm_i915_private *dev_priv) |
2879 | { | |
2880 | /* TGL doesn't support LLC or AGE settings */ | |
2881 | I915_WRITE(GEN12_PAT_INDEX(0), GEN8_PPAT_WB); | |
2882 | I915_WRITE(GEN12_PAT_INDEX(1), GEN8_PPAT_WC); | |
2883 | I915_WRITE(GEN12_PAT_INDEX(2), GEN8_PPAT_WT); | |
2884 | I915_WRITE(GEN12_PAT_INDEX(3), GEN8_PPAT_UC); | |
2885 | I915_WRITE(GEN12_PAT_INDEX(4), GEN8_PPAT_WB); | |
2886 | I915_WRITE(GEN12_PAT_INDEX(5), GEN8_PPAT_WB); | |
2887 | I915_WRITE(GEN12_PAT_INDEX(6), GEN8_PPAT_WB); | |
2888 | I915_WRITE(GEN12_PAT_INDEX(7), GEN8_PPAT_WB); | |
2889 | } | |
2890 | ||
a3389c14 | 2891 | static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv) |
4e34935f | 2892 | { |
a3389c14 MW |
2893 | I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC); |
2894 | I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); | |
2895 | I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); | |
2896 | I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC); | |
2897 | I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); | |
2898 | I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); | |
2899 | I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); | |
2900 | I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
4e34935f RV |
2901 | } |
2902 | ||
fbe5d36e BW |
2903 | /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability |
2904 | * bits. When using advanced contexts each context stores its own PAT, but | |
2905 | * writing this data shouldn't be harmful even in those cases. */ | |
a3389c14 | 2906 | static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) |
fbe5d36e | 2907 | { |
a3389c14 | 2908 | u64 pat; |
fbe5d36e | 2909 | |
096a9394 MW |
2910 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ |
2911 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ | |
2912 | GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ | |
2913 | GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ | |
2914 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | | |
2915 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | | |
2916 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | | |
2917 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); | |
d6a8b72e | 2918 | |
a3389c14 MW |
2919 | I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); |
2920 | I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
fbe5d36e BW |
2921 | } |
2922 | ||
a3389c14 | 2923 | static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) |
ee0ce478 | 2924 | { |
a3389c14 | 2925 | u64 pat; |
ee0ce478 VS |
2926 | |
2927 | /* | |
2928 | * Map WB on BDW to snooped on CHV. | |
2929 | * | |
2930 | * Only the snoop bit has meaning for CHV, the rest is | |
2931 | * ignored. | |
2932 | * | |
cf3d262e VS |
2933 | * The hardware will never snoop for certain types of accesses: |
2934 | * - CPU GTT (GMADR->GGTT->no snoop->memory) | |
2935 | * - PPGTT page tables | |
2936 | * - some other special cycles | |
2937 | * | |
2938 | * As with BDW, we also need to consider the following for GT accesses: | |
2939 | * "For GGTT, there is NO pat_sel[2:0] from the entry, | |
2940 | * so RTL will always use the value corresponding to | |
2941 | * pat_sel = 000". | |
2942 | * Which means we must set the snoop bit in PAT entry 0 | |
2943 | * in order to keep the global status page working. | |
ee0ce478 | 2944 | */ |
ee0ce478 | 2945 | |
a3389c14 MW |
2946 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | |
2947 | GEN8_PPAT(1, 0) | | |
2948 | GEN8_PPAT(2, 0) | | |
2949 | GEN8_PPAT(3, 0) | | |
2950 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | | |
2951 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | | |
2952 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | | |
2953 | GEN8_PPAT(7, CHV_PPAT_SNOOP); | |
2954 | ||
2955 | I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); | |
2956 | I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); | |
ee0ce478 VS |
2957 | } |
2958 | ||
34c998b4 CW |
2959 | static void gen6_gmch_remove(struct i915_address_space *vm) |
2960 | { | |
2961 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); | |
2962 | ||
2963 | iounmap(ggtt->gsm); | |
8448661d | 2964 | cleanup_scratch_page(vm); |
34c998b4 CW |
2965 | } |
2966 | ||
36e16c49 ZW |
2967 | static void setup_private_pat(struct drm_i915_private *dev_priv) |
2968 | { | |
096a9394 MW |
2969 | GEM_BUG_ON(INTEL_GEN(dev_priv) < 8); |
2970 | ||
b41e63d8 MT |
2971 | if (INTEL_GEN(dev_priv) >= 12) |
2972 | tgl_setup_private_ppat(dev_priv); | |
2973 | else if (INTEL_GEN(dev_priv) >= 10) | |
a3389c14 | 2974 | cnl_setup_private_ppat(dev_priv); |
36e16c49 | 2975 | else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) |
a3389c14 | 2976 | chv_setup_private_ppat(dev_priv); |
36e16c49 | 2977 | else |
a3389c14 | 2978 | bdw_setup_private_ppat(dev_priv); |
36e16c49 ZW |
2979 | } |
2980 | ||
d507d735 | 2981 | static int gen8_gmch_probe(struct i915_ggtt *ggtt) |
63340133 | 2982 | { |
82ad6443 | 2983 | struct drm_i915_private *dev_priv = ggtt->vm.i915; |
97d6d7ab | 2984 | struct pci_dev *pdev = dev_priv->drm.pdev; |
34c998b4 | 2985 | unsigned int size; |
63340133 | 2986 | u16 snb_gmch_ctl; |
4519290a | 2987 | int err; |
63340133 BW |
2988 | |
2989 | /* TODO: We're not aware of mappable constraints on gen8 yet */ | |
73ebd503 MA |
2990 | ggtt->gmadr = |
2991 | (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), | |
2992 | pci_resource_len(pdev, 2)); | |
2993 | ggtt->mappable_end = resource_size(&ggtt->gmadr); | |
63340133 | 2994 | |
4519290a ID |
2995 | err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); |
2996 | if (!err) | |
2997 | err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); | |
2998 | if (err) | |
2999 | DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); | |
63340133 | 3000 | |
97d6d7ab | 3001 | pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); |
c258f91d | 3002 | if (IS_CHERRYVIEW(dev_priv)) |
34c998b4 | 3003 | size = chv_get_total_gtt_size(snb_gmch_ctl); |
c258f91d | 3004 | else |
34c998b4 | 3005 | size = gen8_get_total_gtt_size(snb_gmch_ctl); |
63340133 | 3006 | |
21c62a9d | 3007 | ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; |
82ad6443 | 3008 | ggtt->vm.cleanup = gen6_gmch_remove; |
82ad6443 CW |
3009 | ggtt->vm.insert_page = gen8_ggtt_insert_page; |
3010 | ggtt->vm.clear_range = nop_clear_range; | |
4bdafb9d | 3011 | if (intel_scanout_needs_vtd_wa(dev_priv)) |
82ad6443 | 3012 | ggtt->vm.clear_range = gen8_ggtt_clear_range; |
f7770bfd | 3013 | |
82ad6443 | 3014 | ggtt->vm.insert_entries = gen8_ggtt_insert_entries; |
f7770bfd | 3015 | |
0ef34ad6 | 3016 | /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ |
8cd99918 CW |
3017 | if (intel_ggtt_update_needs_vtd_wa(dev_priv) || |
3018 | IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) { | |
82ad6443 CW |
3019 | ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; |
3020 | ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; | |
3021 | if (ggtt->vm.clear_range != nop_clear_range) | |
3022 | ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; | |
0ef34ad6 JB |
3023 | } |
3024 | ||
7c3f86b6 CW |
3025 | ggtt->invalidate = gen6_ggtt_invalidate; |
3026 | ||
93f2cde2 CW |
3027 | ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; |
3028 | ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; | |
3029 | ggtt->vm.vma_ops.set_pages = ggtt_set_pages; | |
3030 | ggtt->vm.vma_ops.clear_pages = clear_pages; | |
3031 | ||
daf3dc0f CW |
3032 | ggtt->vm.pte_encode = gen8_pte_encode; |
3033 | ||
36e16c49 ZW |
3034 | setup_private_pat(dev_priv); |
3035 | ||
34c998b4 | 3036 | return ggtt_probe_common(ggtt, size); |
63340133 BW |
3037 | } |
3038 | ||
d507d735 | 3039 | static int gen6_gmch_probe(struct i915_ggtt *ggtt) |
e76e9aeb | 3040 | { |
82ad6443 | 3041 | struct drm_i915_private *dev_priv = ggtt->vm.i915; |
97d6d7ab | 3042 | struct pci_dev *pdev = dev_priv->drm.pdev; |
34c998b4 | 3043 | unsigned int size; |
e76e9aeb | 3044 | u16 snb_gmch_ctl; |
4519290a | 3045 | int err; |
e76e9aeb | 3046 | |
73ebd503 MA |
3047 | ggtt->gmadr = |
3048 | (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2), | |
3049 | pci_resource_len(pdev, 2)); | |
3050 | ggtt->mappable_end = resource_size(&ggtt->gmadr); | |
41907ddc | 3051 | |
baa09f5f BW |
3052 | /* 64/512MB is the current min/max we actually know of, but this is just |
3053 | * a coarse sanity check. | |
e76e9aeb | 3054 | */ |
34c998b4 | 3055 | if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { |
b7128ef1 | 3056 | DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); |
baa09f5f | 3057 | return -ENXIO; |
e76e9aeb BW |
3058 | } |
3059 | ||
4519290a ID |
3060 | err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); |
3061 | if (!err) | |
3062 | err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); | |
3063 | if (err) | |
3064 | DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); | |
97d6d7ab | 3065 | pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); |
e76e9aeb | 3066 | |
34c998b4 | 3067 | size = gen6_get_total_gtt_size(snb_gmch_ctl); |
21c62a9d | 3068 | ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; |
e76e9aeb | 3069 | |
267e80ee CW |
3070 | ggtt->vm.clear_range = nop_clear_range; |
3071 | if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) | |
3072 | ggtt->vm.clear_range = gen6_ggtt_clear_range; | |
82ad6443 CW |
3073 | ggtt->vm.insert_page = gen6_ggtt_insert_page; |
3074 | ggtt->vm.insert_entries = gen6_ggtt_insert_entries; | |
82ad6443 | 3075 | ggtt->vm.cleanup = gen6_gmch_remove; |
34c998b4 | 3076 | |
7c3f86b6 CW |
3077 | ggtt->invalidate = gen6_ggtt_invalidate; |
3078 | ||
34c998b4 | 3079 | if (HAS_EDRAM(dev_priv)) |
82ad6443 | 3080 | ggtt->vm.pte_encode = iris_pte_encode; |
34c998b4 | 3081 | else if (IS_HASWELL(dev_priv)) |
82ad6443 | 3082 | ggtt->vm.pte_encode = hsw_pte_encode; |
34c998b4 | 3083 | else if (IS_VALLEYVIEW(dev_priv)) |
82ad6443 | 3084 | ggtt->vm.pte_encode = byt_pte_encode; |
34c998b4 | 3085 | else if (INTEL_GEN(dev_priv) >= 7) |
82ad6443 | 3086 | ggtt->vm.pte_encode = ivb_pte_encode; |
34c998b4 | 3087 | else |
82ad6443 | 3088 | ggtt->vm.pte_encode = snb_pte_encode; |
7faf1ab2 | 3089 | |
93f2cde2 CW |
3090 | ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; |
3091 | ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; | |
3092 | ggtt->vm.vma_ops.set_pages = ggtt_set_pages; | |
3093 | ggtt->vm.vma_ops.clear_pages = clear_pages; | |
3094 | ||
34c998b4 | 3095 | return ggtt_probe_common(ggtt, size); |
e76e9aeb BW |
3096 | } |
3097 | ||
34c998b4 | 3098 | static void i915_gmch_remove(struct i915_address_space *vm) |
e76e9aeb | 3099 | { |
34c998b4 | 3100 | intel_gmch_remove(); |
644ec02b | 3101 | } |
baa09f5f | 3102 | |
d507d735 | 3103 | static int i915_gmch_probe(struct i915_ggtt *ggtt) |
baa09f5f | 3104 | { |
82ad6443 | 3105 | struct drm_i915_private *dev_priv = ggtt->vm.i915; |
73ebd503 | 3106 | phys_addr_t gmadr_base; |
baa09f5f BW |
3107 | int ret; |
3108 | ||
91c8a326 | 3109 | ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); |
baa09f5f BW |
3110 | if (!ret) { |
3111 | DRM_ERROR("failed to set up gmch\n"); | |
3112 | return -EIO; | |
3113 | } | |
3114 | ||
82ad6443 | 3115 | intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); |
baa09f5f | 3116 | |
73ebd503 MA |
3117 | ggtt->gmadr = |
3118 | (struct resource) DEFINE_RES_MEM(gmadr_base, | |
3119 | ggtt->mappable_end); | |
3120 | ||
97d6d7ab | 3121 | ggtt->do_idle_maps = needs_idle_maps(dev_priv); |
82ad6443 CW |
3122 | ggtt->vm.insert_page = i915_ggtt_insert_page; |
3123 | ggtt->vm.insert_entries = i915_ggtt_insert_entries; | |
3124 | ggtt->vm.clear_range = i915_ggtt_clear_range; | |
82ad6443 | 3125 | ggtt->vm.cleanup = i915_gmch_remove; |
baa09f5f | 3126 | |
7c3f86b6 CW |
3127 | ggtt->invalidate = gmch_ggtt_invalidate; |
3128 | ||
93f2cde2 CW |
3129 | ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; |
3130 | ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; | |
3131 | ggtt->vm.vma_ops.set_pages = ggtt_set_pages; | |
3132 | ggtt->vm.vma_ops.clear_pages = clear_pages; | |
3133 | ||
d507d735 | 3134 | if (unlikely(ggtt->do_idle_maps)) |
88f8065c CW |
3135 | dev_notice(dev_priv->drm.dev, |
3136 | "Applying Ironlake quirks for intel_iommu\n"); | |
c0a7f818 | 3137 | |
baa09f5f BW |
3138 | return 0; |
3139 | } | |
3140 | ||
763c1e63 | 3141 | static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) |
baa09f5f | 3142 | { |
763c1e63 | 3143 | struct drm_i915_private *i915 = gt->i915; |
baa09f5f BW |
3144 | int ret; |
3145 | ||
763c1e63 | 3146 | ggtt->vm.gt = gt; |
ee1de7dd TU |
3147 | ggtt->vm.i915 = i915; |
3148 | ggtt->vm.dma = &i915->drm.pdev->dev; | |
c114f76a | 3149 | |
ee1de7dd | 3150 | if (INTEL_GEN(i915) <= 5) |
34c998b4 | 3151 | ret = i915_gmch_probe(ggtt); |
ee1de7dd | 3152 | else if (INTEL_GEN(i915) < 8) |
34c998b4 CW |
3153 | ret = gen6_gmch_probe(ggtt); |
3154 | else | |
3155 | ret = gen8_gmch_probe(ggtt); | |
a54c0c27 | 3156 | if (ret) |
baa09f5f | 3157 | return ret; |
baa09f5f | 3158 | |
82ad6443 | 3159 | if ((ggtt->vm.total - 1) >> 32) { |
c890e2d5 | 3160 | DRM_ERROR("We never expected a Global GTT with more than 32bits" |
f6b9d5ca | 3161 | " of address space! Found %lldM!\n", |
82ad6443 CW |
3162 | ggtt->vm.total >> 20); |
3163 | ggtt->vm.total = 1ULL << 32; | |
3164 | ggtt->mappable_end = | |
3165 | min_t(u64, ggtt->mappable_end, ggtt->vm.total); | |
c890e2d5 CW |
3166 | } |
3167 | ||
82ad6443 | 3168 | if (ggtt->mappable_end > ggtt->vm.total) { |
f6b9d5ca | 3169 | DRM_ERROR("mappable aperture extends past end of GGTT," |
b7128ef1 | 3170 | " aperture=%pa, total=%llx\n", |
82ad6443 CW |
3171 | &ggtt->mappable_end, ggtt->vm.total); |
3172 | ggtt->mappable_end = ggtt->vm.total; | |
f6b9d5ca CW |
3173 | } |
3174 | ||
baa09f5f | 3175 | /* GMADR is the PCI mmio aperture into the global GTT. */ |
82ad6443 | 3176 | DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); |
73ebd503 | 3177 | DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); |
1875fe7b | 3178 | DRM_DEBUG_DRIVER("DSM size = %lluM\n", |
77894226 | 3179 | (u64)resource_size(&intel_graphics_stolen_res) >> 20); |
ee1de7dd TU |
3180 | |
3181 | return 0; | |
3182 | } | |
3183 | ||
3184 | /** | |
3185 | * i915_ggtt_probe_hw - Probe GGTT hardware location | |
80fc1c19 | 3186 | * @i915: i915 device |
ee1de7dd TU |
3187 | */ |
3188 | int i915_ggtt_probe_hw(struct drm_i915_private *i915) | |
3189 | { | |
3190 | int ret; | |
3191 | ||
763c1e63 | 3192 | ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); |
ee1de7dd TU |
3193 | if (ret) |
3194 | return ret; | |
3195 | ||
80debff8 | 3196 | if (intel_vtd_active()) |
88f8065c | 3197 | dev_info(i915->drm.dev, "VT-d active for gfx access\n"); |
baa09f5f BW |
3198 | |
3199 | return 0; | |
0088e522 CW |
3200 | } |
3201 | ||
8b5342f5 TU |
3202 | static int ggtt_init_hw(struct i915_ggtt *ggtt) |
3203 | { | |
3204 | struct drm_i915_private *i915 = ggtt->vm.i915; | |
3205 | int ret = 0; | |
3206 | ||
3207 | mutex_lock(&i915->drm.struct_mutex); | |
63fd659f | 3208 | |
305dc3f9 | 3209 | i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); |
250f8c81 | 3210 | |
48e90504 TU |
3211 | ggtt->vm.is_ggtt = true; |
3212 | ||
250f8c81 | 3213 | /* Only VLV supports read-only GGTT mappings */ |
8b5342f5 | 3214 | ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); |
250f8c81 | 3215 | |
8b5342f5 | 3216 | if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) |
82ad6443 | 3217 | ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; |
f6b9d5ca | 3218 | |
8b5342f5 TU |
3219 | if (!io_mapping_init_wc(&ggtt->iomap, |
3220 | ggtt->gmadr.start, | |
3221 | ggtt->mappable_end)) { | |
3b896628 | 3222 | ggtt->vm.cleanup(&ggtt->vm); |
f6b9d5ca | 3223 | ret = -EIO; |
8b5342f5 | 3224 | goto out; |
f6b9d5ca CW |
3225 | } |
3226 | ||
73ebd503 | 3227 | ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); |
f6b9d5ca | 3228 | |
0cf289bd CW |
3229 | i915_ggtt_init_fences(ggtt); |
3230 | ||
8b5342f5 TU |
3231 | out: |
3232 | mutex_unlock(&i915->drm.struct_mutex); | |
3233 | ||
3234 | return ret; | |
3235 | } | |
3236 | ||
3237 | /** | |
3238 | * i915_ggtt_init_hw - Initialize GGTT hardware | |
3239 | * @dev_priv: i915 device | |
3240 | */ | |
3241 | int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) | |
3242 | { | |
3243 | int ret; | |
3244 | ||
3245 | stash_init(&dev_priv->mm.wc_stash); | |
3246 | ||
3247 | /* Note that we use page colouring to enforce a guard page at the | |
3248 | * end of the address space. This is required as the CS may prefetch | |
3249 | * beyond the end of the batch buffer, across the page boundary, | |
3250 | * and beyond the end of the GTT if we do not provide a guard. | |
3251 | */ | |
3252 | ret = ggtt_init_hw(&dev_priv->ggtt); | |
3253 | if (ret) | |
3254 | return ret; | |
3255 | ||
0088e522 CW |
3256 | /* |
3257 | * Initialise stolen early so that we may reserve preallocated | |
3258 | * objects for the BIOS to KMS transition. | |
3259 | */ | |
7ace3d30 | 3260 | ret = i915_gem_init_stolen(dev_priv); |
0088e522 CW |
3261 | if (ret) |
3262 | goto out_gtt_cleanup; | |
3263 | ||
3264 | return 0; | |
a4eba47b ID |
3265 | |
3266 | out_gtt_cleanup: | |
3b896628 | 3267 | dev_priv->ggtt.vm.cleanup(&dev_priv->ggtt.vm); |
a4eba47b | 3268 | return ret; |
baa09f5f | 3269 | } |
6f65e29a | 3270 | |
97d6d7ab | 3271 | int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) |
ac840ae5 | 3272 | { |
97d6d7ab | 3273 | if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) |
ac840ae5 VS |
3274 | return -EIO; |
3275 | ||
3276 | return 0; | |
3277 | } | |
3278 | ||
84b1ca2f | 3279 | void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) |
7c3f86b6 | 3280 | { |
759e4a74 | 3281 | GEM_BUG_ON(ggtt->invalidate != gen6_ggtt_invalidate); |
aeb950bd | 3282 | |
759e4a74 TU |
3283 | ggtt->invalidate = guc_ggtt_invalidate; |
3284 | ||
3285 | ggtt->invalidate(ggtt); | |
7c3f86b6 CW |
3286 | } |
3287 | ||
84b1ca2f | 3288 | void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) |
7c3f86b6 | 3289 | { |
35e90081 | 3290 | /* XXX Temporary pardon for error unload */ |
759e4a74 | 3291 | if (ggtt->invalidate == gen6_ggtt_invalidate) |
35e90081 CW |
3292 | return; |
3293 | ||
04f7b24e | 3294 | /* We should only be called after i915_ggtt_enable_guc() */ |
759e4a74 | 3295 | GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); |
04f7b24e | 3296 | |
759e4a74 | 3297 | ggtt->invalidate = gen6_ggtt_invalidate; |
aeb950bd | 3298 | |
759e4a74 | 3299 | ggtt->invalidate(ggtt); |
7c3f86b6 CW |
3300 | } |
3301 | ||
68c754b8 | 3302 | static void ggtt_restore_mappings(struct i915_ggtt *ggtt) |
fa42331b | 3303 | { |
74479985 | 3304 | struct i915_vma *vma, *vn; |
64b95df9 | 3305 | bool flush = false; |
fa42331b | 3306 | |
68c754b8 | 3307 | intel_gt_check_and_clear_faults(ggtt->vm.gt); |
fa42331b | 3308 | |
09d7e46b CW |
3309 | mutex_lock(&ggtt->vm.mutex); |
3310 | ||
fa42331b | 3311 | /* First fill our portion of the GTT with scratch pages */ |
82ad6443 | 3312 | ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); |
82ad6443 | 3313 | ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ |
fbb30a5c CW |
3314 | |
3315 | /* clflush objects bound into the GGTT and rebind them. */ | |
499197dc | 3316 | list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { |
74479985 | 3317 | struct drm_i915_gem_object *obj = vma->obj; |
fbb30a5c | 3318 | |
74479985 CW |
3319 | if (!(vma->flags & I915_VMA_GLOBAL_BIND)) |
3320 | continue; | |
fbb30a5c | 3321 | |
09d7e46b CW |
3322 | mutex_unlock(&ggtt->vm.mutex); |
3323 | ||
74479985 | 3324 | if (!i915_vma_unbind(vma)) |
09d7e46b | 3325 | goto lock; |
2c3d9984 | 3326 | |
520ea7c5 CW |
3327 | WARN_ON(i915_vma_bind(vma, |
3328 | obj ? obj->cache_level : 0, | |
3329 | PIN_UPDATE)); | |
64b95df9 CW |
3330 | if (obj) { /* only used during resume => exclusive access */ |
3331 | flush |= fetch_and_zero(&obj->write_domain); | |
3332 | obj->read_domains |= I915_GEM_DOMAIN_GTT; | |
6951e589 | 3333 | } |
09d7e46b CW |
3334 | |
3335 | lock: | |
3336 | mutex_lock(&ggtt->vm.mutex); | |
2c3d9984 | 3337 | } |
fa42331b | 3338 | |
82ad6443 | 3339 | ggtt->vm.closed = false; |
759e4a74 | 3340 | ggtt->invalidate(ggtt); |
fbb30a5c | 3341 | |
09d7e46b | 3342 | mutex_unlock(&ggtt->vm.mutex); |
64b95df9 CW |
3343 | |
3344 | if (flush) | |
3345 | wbinvd_on_all_cpus(); | |
68c754b8 TU |
3346 | } |
3347 | ||
3348 | void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915) | |
3349 | { | |
3350 | ggtt_restore_mappings(&i915->ggtt); | |
09d7e46b | 3351 | |
a3389c14 MW |
3352 | if (INTEL_GEN(i915) >= 8) |
3353 | setup_private_pat(i915); | |
fa42331b DV |
3354 | } |
3355 | ||
804beb4b | 3356 | static struct scatterlist * |
73f522ba | 3357 | rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, |
804beb4b | 3358 | unsigned int width, unsigned int height, |
87130255 | 3359 | unsigned int stride, |
804beb4b | 3360 | struct sg_table *st, struct scatterlist *sg) |
50470bb0 TU |
3361 | { |
3362 | unsigned int column, row; | |
3363 | unsigned int src_idx; | |
50470bb0 | 3364 | |
50470bb0 | 3365 | for (column = 0; column < width; column++) { |
73f522ba | 3366 | src_idx = stride * (height - 1) + column + offset; |
50470bb0 TU |
3367 | for (row = 0; row < height; row++) { |
3368 | st->nents++; | |
3369 | /* We don't need the pages, but need to initialize | |
3370 | * the entries so the sg list can be happily traversed. | |
3371 | * The only thing we need are DMA addresses. | |
3372 | */ | |
f6e35cda | 3373 | sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); |
73f522ba VS |
3374 | sg_dma_address(sg) = |
3375 | i915_gem_object_get_dma_address(obj, src_idx); | |
f6e35cda | 3376 | sg_dma_len(sg) = I915_GTT_PAGE_SIZE; |
50470bb0 | 3377 | sg = sg_next(sg); |
87130255 | 3378 | src_idx -= stride; |
50470bb0 TU |
3379 | } |
3380 | } | |
804beb4b TU |
3381 | |
3382 | return sg; | |
50470bb0 TU |
3383 | } |
3384 | ||
ba7a5741 CW |
3385 | static noinline struct sg_table * |
3386 | intel_rotate_pages(struct intel_rotation_info *rot_info, | |
3387 | struct drm_i915_gem_object *obj) | |
50470bb0 | 3388 | { |
6687c906 | 3389 | unsigned int size = intel_rotation_info_size(rot_info); |
50470bb0 | 3390 | struct sg_table *st; |
89e3e142 | 3391 | struct scatterlist *sg; |
1d00dad5 | 3392 | int ret = -ENOMEM; |
73f522ba | 3393 | int i; |
50470bb0 TU |
3394 | |
3395 | /* Allocate target SG list. */ | |
3396 | st = kmalloc(sizeof(*st), GFP_KERNEL); | |
3397 | if (!st) | |
3398 | goto err_st_alloc; | |
3399 | ||
6687c906 | 3400 | ret = sg_alloc_table(st, size, GFP_KERNEL); |
50470bb0 TU |
3401 | if (ret) |
3402 | goto err_sg_alloc; | |
3403 | ||
11f20322 VS |
3404 | st->nents = 0; |
3405 | sg = st->sgl; | |
3406 | ||
6687c906 | 3407 | for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { |
73f522ba | 3408 | sg = rotate_pages(obj, rot_info->plane[i].offset, |
6687c906 VS |
3409 | rot_info->plane[i].width, rot_info->plane[i].height, |
3410 | rot_info->plane[i].stride, st, sg); | |
89e3e142 TU |
3411 | } |
3412 | ||
50470bb0 TU |
3413 | return st; |
3414 | ||
3415 | err_sg_alloc: | |
3416 | kfree(st); | |
3417 | err_st_alloc: | |
50470bb0 | 3418 | |
62d0fe45 CW |
3419 | DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", |
3420 | obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); | |
6687c906 | 3421 | |
50470bb0 TU |
3422 | return ERR_PTR(ret); |
3423 | } | |
ec7adb6e | 3424 | |
1a74fc0b VS |
3425 | static struct scatterlist * |
3426 | remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, | |
3427 | unsigned int width, unsigned int height, | |
3428 | unsigned int stride, | |
3429 | struct sg_table *st, struct scatterlist *sg) | |
3430 | { | |
3431 | unsigned int row; | |
3432 | ||
3433 | for (row = 0; row < height; row++) { | |
3434 | unsigned int left = width * I915_GTT_PAGE_SIZE; | |
3435 | ||
3436 | while (left) { | |
3437 | dma_addr_t addr; | |
3438 | unsigned int length; | |
3439 | ||
3440 | /* We don't need the pages, but need to initialize | |
3441 | * the entries so the sg list can be happily traversed. | |
3442 | * The only thing we need are DMA addresses. | |
3443 | */ | |
3444 | ||
3445 | addr = i915_gem_object_get_dma_address_len(obj, offset, &length); | |
3446 | ||
3447 | length = min(left, length); | |
3448 | ||
3449 | st->nents++; | |
3450 | ||
3451 | sg_set_page(sg, NULL, length, 0); | |
3452 | sg_dma_address(sg) = addr; | |
3453 | sg_dma_len(sg) = length; | |
3454 | sg = sg_next(sg); | |
3455 | ||
3456 | offset += length / I915_GTT_PAGE_SIZE; | |
3457 | left -= length; | |
3458 | } | |
3459 | ||
3460 | offset += stride - width; | |
3461 | } | |
3462 | ||
3463 | return sg; | |
3464 | } | |
3465 | ||
3466 | static noinline struct sg_table * | |
3467 | intel_remap_pages(struct intel_remapped_info *rem_info, | |
3468 | struct drm_i915_gem_object *obj) | |
3469 | { | |
3470 | unsigned int size = intel_remapped_info_size(rem_info); | |
3471 | struct sg_table *st; | |
3472 | struct scatterlist *sg; | |
3473 | int ret = -ENOMEM; | |
3474 | int i; | |
3475 | ||
3476 | /* Allocate target SG list. */ | |
3477 | st = kmalloc(sizeof(*st), GFP_KERNEL); | |
3478 | if (!st) | |
3479 | goto err_st_alloc; | |
3480 | ||
3481 | ret = sg_alloc_table(st, size, GFP_KERNEL); | |
3482 | if (ret) | |
3483 | goto err_sg_alloc; | |
3484 | ||
3485 | st->nents = 0; | |
3486 | sg = st->sgl; | |
3487 | ||
3488 | for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { | |
3489 | sg = remap_pages(obj, rem_info->plane[i].offset, | |
3490 | rem_info->plane[i].width, rem_info->plane[i].height, | |
3491 | rem_info->plane[i].stride, st, sg); | |
3492 | } | |
3493 | ||
3494 | i915_sg_trim(st); | |
3495 | ||
3496 | return st; | |
3497 | ||
3498 | err_sg_alloc: | |
3499 | kfree(st); | |
3500 | err_st_alloc: | |
3501 | ||
3502 | DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", | |
3503 | obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); | |
3504 | ||
3505 | return ERR_PTR(ret); | |
3506 | } | |
3507 | ||
ba7a5741 | 3508 | static noinline struct sg_table * |
8bd7ef16 JL |
3509 | intel_partial_pages(const struct i915_ggtt_view *view, |
3510 | struct drm_i915_gem_object *obj) | |
3511 | { | |
3512 | struct sg_table *st; | |
d2a84a76 | 3513 | struct scatterlist *sg, *iter; |
8bab1193 | 3514 | unsigned int count = view->partial.size; |
d2a84a76 | 3515 | unsigned int offset; |
8bd7ef16 JL |
3516 | int ret = -ENOMEM; |
3517 | ||
3518 | st = kmalloc(sizeof(*st), GFP_KERNEL); | |
3519 | if (!st) | |
3520 | goto err_st_alloc; | |
3521 | ||
d2a84a76 | 3522 | ret = sg_alloc_table(st, count, GFP_KERNEL); |
8bd7ef16 JL |
3523 | if (ret) |
3524 | goto err_sg_alloc; | |
3525 | ||
8bab1193 | 3526 | iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); |
d2a84a76 CW |
3527 | GEM_BUG_ON(!iter); |
3528 | ||
8bd7ef16 JL |
3529 | sg = st->sgl; |
3530 | st->nents = 0; | |
d2a84a76 CW |
3531 | do { |
3532 | unsigned int len; | |
8bd7ef16 | 3533 | |
d2a84a76 CW |
3534 | len = min(iter->length - (offset << PAGE_SHIFT), |
3535 | count << PAGE_SHIFT); | |
3536 | sg_set_page(sg, NULL, len, 0); | |
3537 | sg_dma_address(sg) = | |
3538 | sg_dma_address(iter) + (offset << PAGE_SHIFT); | |
3539 | sg_dma_len(sg) = len; | |
8bd7ef16 | 3540 | |
8bd7ef16 | 3541 | st->nents++; |
d2a84a76 CW |
3542 | count -= len >> PAGE_SHIFT; |
3543 | if (count == 0) { | |
3544 | sg_mark_end(sg); | |
f8e57863 TU |
3545 | i915_sg_trim(st); /* Drop any unused tail entries. */ |
3546 | ||
d2a84a76 CW |
3547 | return st; |
3548 | } | |
8bd7ef16 | 3549 | |
d2a84a76 CW |
3550 | sg = __sg_next(sg); |
3551 | iter = __sg_next(iter); | |
3552 | offset = 0; | |
3553 | } while (1); | |
8bd7ef16 JL |
3554 | |
3555 | err_sg_alloc: | |
3556 | kfree(st); | |
3557 | err_st_alloc: | |
3558 | return ERR_PTR(ret); | |
3559 | } | |
3560 | ||
70b9f6f8 | 3561 | static int |
50470bb0 | 3562 | i915_get_ggtt_vma_pages(struct i915_vma *vma) |
fe14d5f4 | 3563 | { |
ba7a5741 | 3564 | int ret; |
50470bb0 | 3565 | |
2c3a3f44 CW |
3566 | /* The vma->pages are only valid within the lifespan of the borrowed |
3567 | * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so | |
3568 | * must be the vma->pages. A simple rule is that vma->pages must only | |
3569 | * be accessed when the obj->mm.pages are pinned. | |
3570 | */ | |
3571 | GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); | |
3572 | ||
ba7a5741 | 3573 | switch (vma->ggtt_view.type) { |
62d4028f CW |
3574 | default: |
3575 | GEM_BUG_ON(vma->ggtt_view.type); | |
3576 | /* fall through */ | |
ba7a5741 CW |
3577 | case I915_GGTT_VIEW_NORMAL: |
3578 | vma->pages = vma->obj->mm.pages; | |
fe14d5f4 TU |
3579 | return 0; |
3580 | ||
ba7a5741 | 3581 | case I915_GGTT_VIEW_ROTATED: |
247177dd | 3582 | vma->pages = |
ba7a5741 CW |
3583 | intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); |
3584 | break; | |
3585 | ||
1a74fc0b VS |
3586 | case I915_GGTT_VIEW_REMAPPED: |
3587 | vma->pages = | |
3588 | intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); | |
3589 | break; | |
3590 | ||
ba7a5741 | 3591 | case I915_GGTT_VIEW_PARTIAL: |
247177dd | 3592 | vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); |
ba7a5741 | 3593 | break; |
ba7a5741 | 3594 | } |
fe14d5f4 | 3595 | |
ba7a5741 | 3596 | ret = 0; |
772b5408 | 3597 | if (IS_ERR(vma->pages)) { |
247177dd CW |
3598 | ret = PTR_ERR(vma->pages); |
3599 | vma->pages = NULL; | |
50470bb0 TU |
3600 | DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", |
3601 | vma->ggtt_view.type, ret); | |
fe14d5f4 | 3602 | } |
50470bb0 | 3603 | return ret; |
fe14d5f4 TU |
3604 | } |
3605 | ||
625d988a CW |
3606 | /** |
3607 | * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) | |
a4dbf7cf CW |
3608 | * @vm: the &struct i915_address_space |
3609 | * @node: the &struct drm_mm_node (typically i915_vma.mode) | |
3610 | * @size: how much space to allocate inside the GTT, | |
3611 | * must be #I915_GTT_PAGE_SIZE aligned | |
3612 | * @offset: where to insert inside the GTT, | |
3613 | * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node | |
3614 | * (@offset + @size) must fit within the address space | |
3615 | * @color: color to apply to node, if this node is not from a VMA, | |
3616 | * color must be #I915_COLOR_UNEVICTABLE | |
3617 | * @flags: control search and eviction behaviour | |
625d988a CW |
3618 | * |
3619 | * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside | |
3620 | * the address space (using @size and @color). If the @node does not fit, it | |
3621 | * tries to evict any overlapping nodes from the GTT, including any | |
3622 | * neighbouring nodes if the colors do not match (to ensure guard pages between | |
3623 | * differing domains). See i915_gem_evict_for_node() for the gory details | |
3624 | * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on | |
3625 | * evicting active overlapping objects, and any overlapping node that is pinned | |
3626 | * or marked as unevictable will also result in failure. | |
3627 | * | |
3628 | * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if | |
3629 | * asked to wait for eviction and interrupted. | |
3630 | */ | |
3631 | int i915_gem_gtt_reserve(struct i915_address_space *vm, | |
3632 | struct drm_mm_node *node, | |
3633 | u64 size, u64 offset, unsigned long color, | |
3634 | unsigned int flags) | |
3635 | { | |
3636 | int err; | |
3637 | ||
3638 | GEM_BUG_ON(!size); | |
3639 | GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); | |
3640 | GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); | |
3641 | GEM_BUG_ON(range_overflows(offset, size, vm->total)); | |
c082afac | 3642 | GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm); |
9734ad13 | 3643 | GEM_BUG_ON(drm_mm_node_allocated(node)); |
625d988a CW |
3644 | |
3645 | node->size = size; | |
3646 | node->start = offset; | |
3647 | node->color = color; | |
3648 | ||
3649 | err = drm_mm_reserve_node(&vm->mm, node); | |
3650 | if (err != -ENOSPC) | |
3651 | return err; | |
3652 | ||
616d9cee CW |
3653 | if (flags & PIN_NOEVICT) |
3654 | return -ENOSPC; | |
3655 | ||
625d988a CW |
3656 | err = i915_gem_evict_for_node(vm, node, flags); |
3657 | if (err == 0) | |
3658 | err = drm_mm_reserve_node(&vm->mm, node); | |
3659 | ||
3660 | return err; | |
3661 | } | |
3662 | ||
606fec95 CW |
3663 | static u64 random_offset(u64 start, u64 end, u64 len, u64 align) |
3664 | { | |
3665 | u64 range, addr; | |
3666 | ||
3667 | GEM_BUG_ON(range_overflows(start, len, end)); | |
3668 | GEM_BUG_ON(round_up(start, align) > round_down(end - len, align)); | |
3669 | ||
3670 | range = round_down(end - len, align) - round_up(start, align); | |
3671 | if (range) { | |
3672 | if (sizeof(unsigned long) == sizeof(u64)) { | |
3673 | addr = get_random_long(); | |
3674 | } else { | |
3675 | addr = get_random_int(); | |
3676 | if (range > U32_MAX) { | |
3677 | addr <<= 32; | |
3678 | addr |= get_random_int(); | |
3679 | } | |
3680 | } | |
3681 | div64_u64_rem(addr, range, &addr); | |
3682 | start += addr; | |
3683 | } | |
3684 | ||
3685 | return round_up(start, align); | |
3686 | } | |
3687 | ||
e007b19d CW |
3688 | /** |
3689 | * i915_gem_gtt_insert - insert a node into an address_space (GTT) | |
a4dbf7cf CW |
3690 | * @vm: the &struct i915_address_space |
3691 | * @node: the &struct drm_mm_node (typically i915_vma.node) | |
3692 | * @size: how much space to allocate inside the GTT, | |
3693 | * must be #I915_GTT_PAGE_SIZE aligned | |
3694 | * @alignment: required alignment of starting offset, may be 0 but | |
3695 | * if specified, this must be a power-of-two and at least | |
3696 | * #I915_GTT_MIN_ALIGNMENT | |
3697 | * @color: color to apply to node | |
3698 | * @start: start of any range restriction inside GTT (0 for all), | |
e007b19d | 3699 | * must be #I915_GTT_PAGE_SIZE aligned |
a4dbf7cf CW |
3700 | * @end: end of any range restriction inside GTT (U64_MAX for all), |
3701 | * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX | |
3702 | * @flags: control search and eviction behaviour | |
e007b19d CW |
3703 | * |
3704 | * i915_gem_gtt_insert() first searches for an available hole into which | |
3705 | * is can insert the node. The hole address is aligned to @alignment and | |
3706 | * its @size must then fit entirely within the [@start, @end] bounds. The | |
3707 | * nodes on either side of the hole must match @color, or else a guard page | |
3708 | * will be inserted between the two nodes (or the node evicted). If no | |
606fec95 CW |
3709 | * suitable hole is found, first a victim is randomly selected and tested |
3710 | * for eviction, otherwise then the LRU list of objects within the GTT | |
e007b19d CW |
3711 | * is scanned to find the first set of replacement nodes to create the hole. |
3712 | * Those old overlapping nodes are evicted from the GTT (and so must be | |
3713 | * rebound before any future use). Any node that is currently pinned cannot | |
3714 | * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently | |
3715 | * active and #PIN_NONBLOCK is specified, that node is also skipped when | |
3716 | * searching for an eviction candidate. See i915_gem_evict_something() for | |
3717 | * the gory details on the eviction algorithm. | |
3718 | * | |
3719 | * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if | |
3720 | * asked to wait for eviction and interrupted. | |
3721 | */ | |
3722 | int i915_gem_gtt_insert(struct i915_address_space *vm, | |
3723 | struct drm_mm_node *node, | |
3724 | u64 size, u64 alignment, unsigned long color, | |
3725 | u64 start, u64 end, unsigned int flags) | |
3726 | { | |
4e64e553 | 3727 | enum drm_mm_insert_mode mode; |
606fec95 | 3728 | u64 offset; |
e007b19d CW |
3729 | int err; |
3730 | ||
3731 | lockdep_assert_held(&vm->i915->drm.struct_mutex); | |
3732 | GEM_BUG_ON(!size); | |
3733 | GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); | |
3734 | GEM_BUG_ON(alignment && !is_power_of_2(alignment)); | |
3735 | GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); | |
3736 | GEM_BUG_ON(start >= end); | |
3737 | GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); | |
3738 | GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); | |
c082afac | 3739 | GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm); |
9734ad13 | 3740 | GEM_BUG_ON(drm_mm_node_allocated(node)); |
e007b19d CW |
3741 | |
3742 | if (unlikely(range_overflows(start, size, end))) | |
3743 | return -ENOSPC; | |
3744 | ||
3745 | if (unlikely(round_up(start, alignment) > round_down(end - size, alignment))) | |
3746 | return -ENOSPC; | |
3747 | ||
4e64e553 CW |
3748 | mode = DRM_MM_INSERT_BEST; |
3749 | if (flags & PIN_HIGH) | |
eb479f86 | 3750 | mode = DRM_MM_INSERT_HIGHEST; |
4e64e553 CW |
3751 | if (flags & PIN_MAPPABLE) |
3752 | mode = DRM_MM_INSERT_LOW; | |
e007b19d CW |
3753 | |
3754 | /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, | |
3755 | * so we know that we always have a minimum alignment of 4096. | |
3756 | * The drm_mm range manager is optimised to return results | |
3757 | * with zero alignment, so where possible use the optimal | |
3758 | * path. | |
3759 | */ | |
3760 | BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE); | |
3761 | if (alignment <= I915_GTT_MIN_ALIGNMENT) | |
3762 | alignment = 0; | |
3763 | ||
4e64e553 CW |
3764 | err = drm_mm_insert_node_in_range(&vm->mm, node, |
3765 | size, alignment, color, | |
3766 | start, end, mode); | |
e007b19d CW |
3767 | if (err != -ENOSPC) |
3768 | return err; | |
3769 | ||
eb479f86 CW |
3770 | if (mode & DRM_MM_INSERT_ONCE) { |
3771 | err = drm_mm_insert_node_in_range(&vm->mm, node, | |
3772 | size, alignment, color, | |
3773 | start, end, | |
3774 | DRM_MM_INSERT_BEST); | |
3775 | if (err != -ENOSPC) | |
3776 | return err; | |
3777 | } | |
3778 | ||
616d9cee CW |
3779 | if (flags & PIN_NOEVICT) |
3780 | return -ENOSPC; | |
3781 | ||
6846895f CW |
3782 | /* |
3783 | * No free space, pick a slot at random. | |
606fec95 CW |
3784 | * |
3785 | * There is a pathological case here using a GTT shared between | |
3786 | * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): | |
3787 | * | |
3788 | * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->| | |
3789 | * (64k objects) (448k objects) | |
3790 | * | |
3791 | * Now imagine that the eviction LRU is ordered top-down (just because | |
3792 | * pathology meets real life), and that we need to evict an object to | |
3793 | * make room inside the aperture. The eviction scan then has to walk | |
3794 | * the 448k list before it finds one within range. And now imagine that | |
3795 | * it has to search for a new hole between every byte inside the memcpy, | |
3796 | * for several simultaneous clients. | |
3797 | * | |
3798 | * On a full-ppgtt system, if we have run out of available space, there | |
3799 | * will be lots and lots of objects in the eviction list! Again, | |
3800 | * searching that LRU list may be slow if we are also applying any | |
3801 | * range restrictions (e.g. restriction to low 4GiB) and so, for | |
3802 | * simplicity and similarilty between different GTT, try the single | |
3803 | * random replacement first. | |
3804 | */ | |
3805 | offset = random_offset(start, end, | |
3806 | size, alignment ?: I915_GTT_MIN_ALIGNMENT); | |
3807 | err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags); | |
3808 | if (err != -ENOSPC) | |
3809 | return err; | |
3810 | ||
6846895f CW |
3811 | if (flags & PIN_NOSEARCH) |
3812 | return -ENOSPC; | |
3813 | ||
606fec95 | 3814 | /* Randomly selected placement is pinned, do a search */ |
e007b19d CW |
3815 | err = i915_gem_evict_something(vm, size, alignment, color, |
3816 | start, end, flags); | |
3817 | if (err) | |
3818 | return err; | |
3819 | ||
4e64e553 CW |
3820 | return drm_mm_insert_node_in_range(&vm->mm, node, |
3821 | size, alignment, color, | |
3822 | start, end, DRM_MM_INSERT_EVICT); | |
e007b19d | 3823 | } |
3b5bb0a3 CW |
3824 | |
3825 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | |
3826 | #include "selftests/mock_gtt.c" | |
1c42819a | 3827 | #include "selftests/i915_gem_gtt.c" |
3b5bb0a3 | 3828 | #endif |