]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/gpu/drm/i915/i915_gem_gtt.c
drm/i915: Add debug module option for VTd validation
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / i915 / i915_gem_gtt.c
CommitLineData
76aaf220
DV
1/*
2 * Copyright © 2010 Daniel Vetter
c4ac524c 3 * Copyright © 2011-2014 Intel Corporation
76aaf220
DV
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 *
24 */
25
0e46ce2e 26#include <linux/seq_file.h>
760285e7
DH
27#include <drm/drmP.h>
28#include <drm/i915_drm.h>
76aaf220
DV
29#include "i915_drv.h"
30#include "i915_trace.h"
31#include "intel_drv.h"
32
a2319c08
BW
33static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv);
34
93a25a9e
DV
35bool intel_enable_ppgtt(struct drm_device *dev, bool full)
36{
37 if (i915.enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
38 return false;
39
40 if (i915.enable_ppgtt == 1 && full)
41 return false;
42
43#ifdef CONFIG_INTEL_IOMMU
44 /* Disable ppgtt on SNB if VT-d is on. */
45 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
46 DRM_INFO("Disabling PPGTT because VT-d is on\n");
47 return false;
48 }
49#endif
50
51 /* Full ppgtt disabled by default for now due to issues. */
52 if (full)
8d214b7d 53 return HAS_PPGTT(dev) && (i915.enable_ppgtt == 2);
93a25a9e
DV
54 else
55 return HAS_ALIASING_PPGTT(dev);
56}
57
fbe5d36e 58
6f65e29a
BW
59static void ppgtt_bind_vma(struct i915_vma *vma,
60 enum i915_cache_level cache_level,
61 u32 flags);
62static void ppgtt_unbind_vma(struct i915_vma *vma);
eeb9488e 63static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
6f65e29a 64
94ec8f61
BW
65static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
66 enum i915_cache_level level,
67 bool valid)
68{
69 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
70 pte |= addr;
fbe5d36e
BW
71 if (level != I915_CACHE_NONE)
72 pte |= PPAT_CACHED_INDEX;
73 else
74 pte |= PPAT_UNCACHED_INDEX;
94ec8f61
BW
75 return pte;
76}
77
b1fe6673
BW
78static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
79 dma_addr_t addr,
80 enum i915_cache_level level)
81{
82 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
83 pde |= addr;
84 if (level != I915_CACHE_NONE)
85 pde |= PPAT_CACHED_PDE_INDEX;
86 else
87 pde |= PPAT_UNCACHED_INDEX;
88 return pde;
89}
90
350ec881 91static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
b35b380e
BW
92 enum i915_cache_level level,
93 bool valid)
54d12527 94{
b35b380e 95 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
54d12527 96 pte |= GEN6_PTE_ADDR_ENCODE(addr);
e7210c3c
BW
97
98 switch (level) {
350ec881
CW
99 case I915_CACHE_L3_LLC:
100 case I915_CACHE_LLC:
101 pte |= GEN6_PTE_CACHE_LLC;
102 break;
103 case I915_CACHE_NONE:
104 pte |= GEN6_PTE_UNCACHED;
105 break;
106 default:
107 WARN_ON(1);
108 }
109
110 return pte;
111}
112
113static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
b35b380e
BW
114 enum i915_cache_level level,
115 bool valid)
350ec881 116{
b35b380e 117 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
350ec881
CW
118 pte |= GEN6_PTE_ADDR_ENCODE(addr);
119
120 switch (level) {
121 case I915_CACHE_L3_LLC:
122 pte |= GEN7_PTE_CACHE_L3_LLC;
e7210c3c
BW
123 break;
124 case I915_CACHE_LLC:
125 pte |= GEN6_PTE_CACHE_LLC;
126 break;
127 case I915_CACHE_NONE:
9119708c 128 pte |= GEN6_PTE_UNCACHED;
e7210c3c
BW
129 break;
130 default:
350ec881 131 WARN_ON(1);
e7210c3c
BW
132 }
133
54d12527
BW
134 return pte;
135}
136
80a74f7f 137static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
b35b380e
BW
138 enum i915_cache_level level,
139 bool valid)
93c34e70 140{
b35b380e 141 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
93c34e70
KG
142 pte |= GEN6_PTE_ADDR_ENCODE(addr);
143
144 /* Mark the page as writeable. Other platforms don't have a
145 * setting for read-only/writable, so this matches that behavior.
146 */
147 pte |= BYT_PTE_WRITEABLE;
148
149 if (level != I915_CACHE_NONE)
150 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
151
152 return pte;
153}
154
80a74f7f 155static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
b35b380e
BW
156 enum i915_cache_level level,
157 bool valid)
9119708c 158{
b35b380e 159 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
0d8ff15e 160 pte |= HSW_PTE_ADDR_ENCODE(addr);
9119708c
KG
161
162 if (level != I915_CACHE_NONE)
87a6b688 163 pte |= HSW_WB_LLC_AGE3;
9119708c
KG
164
165 return pte;
166}
167
4d15c145 168static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
b35b380e
BW
169 enum i915_cache_level level,
170 bool valid)
4d15c145 171{
b35b380e 172 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4d15c145
BW
173 pte |= HSW_PTE_ADDR_ENCODE(addr);
174
651d794f
CW
175 switch (level) {
176 case I915_CACHE_NONE:
177 break;
178 case I915_CACHE_WT:
c51e9701 179 pte |= HSW_WT_ELLC_LLC_AGE3;
651d794f
CW
180 break;
181 default:
c51e9701 182 pte |= HSW_WB_ELLC_LLC_AGE3;
651d794f
CW
183 break;
184 }
4d15c145
BW
185
186 return pte;
187}
188
94e409c1
BW
189/* Broadwell Page Directory Pointer Descriptors */
190static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
e178f705 191 uint64_t val, bool synchronous)
94e409c1 192{
e178f705 193 struct drm_i915_private *dev_priv = ring->dev->dev_private;
94e409c1
BW
194 int ret;
195
196 BUG_ON(entry >= 4);
197
e178f705
BW
198 if (synchronous) {
199 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
200 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
201 return 0;
202 }
203
94e409c1
BW
204 ret = intel_ring_begin(ring, 6);
205 if (ret)
206 return ret;
207
208 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
209 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
210 intel_ring_emit(ring, (u32)(val >> 32));
211 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
212 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
213 intel_ring_emit(ring, (u32)(val));
214 intel_ring_advance(ring);
215
216 return 0;
217}
218
eeb9488e
BW
219static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
220 struct intel_ring_buffer *ring,
221 bool synchronous)
94e409c1 222{
eeb9488e 223 int i, ret;
94e409c1
BW
224
225 /* bit of a hack to find the actual last used pd */
226 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
227
94e409c1
BW
228 for (i = used_pd - 1; i >= 0; i--) {
229 dma_addr_t addr = ppgtt->pd_dma_addr[i];
eeb9488e
BW
230 ret = gen8_write_pdp(ring, i, addr, synchronous);
231 if (ret)
232 return ret;
94e409c1 233 }
d595bd4b 234
eeb9488e 235 return 0;
94e409c1
BW
236}
237
459108b8 238static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
782f1495
BW
239 uint64_t start,
240 uint64_t length,
459108b8
BW
241 bool use_scratch)
242{
243 struct i915_hw_ppgtt *ppgtt =
244 container_of(vm, struct i915_hw_ppgtt, base);
245 gen8_gtt_pte_t *pt_vaddr, scratch_pte;
7ad47cf2
BW
246 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
247 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
248 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
782f1495 249 unsigned num_entries = length >> PAGE_SHIFT;
459108b8
BW
250 unsigned last_pte, i;
251
252 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
253 I915_CACHE_LLC, use_scratch);
254
255 while (num_entries) {
7ad47cf2 256 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
459108b8 257
7ad47cf2 258 last_pte = pte + num_entries;
459108b8
BW
259 if (last_pte > GEN8_PTES_PER_PAGE)
260 last_pte = GEN8_PTES_PER_PAGE;
261
262 pt_vaddr = kmap_atomic(page_table);
263
7ad47cf2 264 for (i = pte; i < last_pte; i++) {
459108b8 265 pt_vaddr[i] = scratch_pte;
7ad47cf2
BW
266 num_entries--;
267 }
459108b8
BW
268
269 kunmap_atomic(pt_vaddr);
270
7ad47cf2
BW
271 pte = 0;
272 if (++pde == GEN8_PDES_PER_PAGE) {
273 pdpe++;
274 pde = 0;
275 }
459108b8
BW
276 }
277}
278
9df15b49
BW
279static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
280 struct sg_table *pages,
782f1495 281 uint64_t start,
9df15b49
BW
282 enum i915_cache_level cache_level)
283{
284 struct i915_hw_ppgtt *ppgtt =
285 container_of(vm, struct i915_hw_ppgtt, base);
286 gen8_gtt_pte_t *pt_vaddr;
7ad47cf2
BW
287 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
288 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
289 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
9df15b49
BW
290 struct sg_page_iter sg_iter;
291
6f1cc993 292 pt_vaddr = NULL;
7ad47cf2 293
9df15b49 294 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
7ad47cf2
BW
295 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS))
296 break;
297
6f1cc993 298 if (pt_vaddr == NULL)
7ad47cf2 299 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]);
9df15b49 300
7ad47cf2 301 pt_vaddr[pte] =
6f1cc993
CW
302 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
303 cache_level, true);
7ad47cf2 304 if (++pte == GEN8_PTES_PER_PAGE) {
9df15b49 305 kunmap_atomic(pt_vaddr);
6f1cc993 306 pt_vaddr = NULL;
7ad47cf2
BW
307 if (++pde == GEN8_PDES_PER_PAGE) {
308 pdpe++;
309 pde = 0;
310 }
311 pte = 0;
9df15b49
BW
312 }
313 }
6f1cc993
CW
314 if (pt_vaddr)
315 kunmap_atomic(pt_vaddr);
9df15b49
BW
316}
317
7ad47cf2
BW
318static void gen8_free_page_tables(struct page **pt_pages)
319{
320 int i;
321
322 if (pt_pages == NULL)
323 return;
324
325 for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
326 if (pt_pages[i])
327 __free_pages(pt_pages[i], 0);
328}
329
330static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
b45a6715
BW
331{
332 int i;
333
7ad47cf2
BW
334 for (i = 0; i < ppgtt->num_pd_pages; i++) {
335 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]);
336 kfree(ppgtt->gen8_pt_pages[i]);
b45a6715 337 kfree(ppgtt->gen8_pt_dma_addr[i]);
7ad47cf2 338 }
b45a6715 339
b45a6715
BW
340 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
341}
342
343static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
344{
f3a964b9 345 struct pci_dev *hwdev = ppgtt->base.dev->pdev;
b45a6715
BW
346 int i, j;
347
348 for (i = 0; i < ppgtt->num_pd_pages; i++) {
349 /* TODO: In the future we'll support sparse mappings, so this
350 * will have to change. */
351 if (!ppgtt->pd_dma_addr[i])
352 continue;
353
f3a964b9
BW
354 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
355 PCI_DMA_BIDIRECTIONAL);
b45a6715
BW
356
357 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
358 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
359 if (addr)
f3a964b9
BW
360 pci_unmap_page(hwdev, addr, PAGE_SIZE,
361 PCI_DMA_BIDIRECTIONAL);
b45a6715
BW
362 }
363 }
364}
365
37aca44a
BW
366static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
367{
368 struct i915_hw_ppgtt *ppgtt =
369 container_of(vm, struct i915_hw_ppgtt, base);
37aca44a 370
7e0d96bc 371 list_del(&vm->global_link);
686e1f6f
BW
372 drm_mm_takedown(&vm->mm);
373
b45a6715
BW
374 gen8_ppgtt_unmap_pages(ppgtt);
375 gen8_ppgtt_free(ppgtt);
37aca44a
BW
376}
377
7ad47cf2
BW
378static struct page **__gen8_alloc_page_tables(void)
379{
380 struct page **pt_pages;
381 int i;
382
383 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
384 if (!pt_pages)
385 return ERR_PTR(-ENOMEM);
386
387 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
388 pt_pages[i] = alloc_page(GFP_KERNEL);
389 if (!pt_pages[i])
390 goto bail;
391 }
392
393 return pt_pages;
394
395bail:
396 gen8_free_page_tables(pt_pages);
397 kfree(pt_pages);
398 return ERR_PTR(-ENOMEM);
399}
400
bf2b4ed2
BW
401static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt,
402 const int max_pdp)
403{
7ad47cf2 404 struct page **pt_pages[GEN8_LEGACY_PDPS];
7ad47cf2 405 int i, ret;
bf2b4ed2 406
7ad47cf2
BW
407 for (i = 0; i < max_pdp; i++) {
408 pt_pages[i] = __gen8_alloc_page_tables();
409 if (IS_ERR(pt_pages[i])) {
410 ret = PTR_ERR(pt_pages[i]);
411 goto unwind_out;
412 }
413 }
414
415 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation,
416 * "atomic" - for cleanup purposes.
417 */
418 for (i = 0; i < max_pdp; i++)
419 ppgtt->gen8_pt_pages[i] = pt_pages[i];
bf2b4ed2 420
bf2b4ed2 421 return 0;
7ad47cf2
BW
422
423unwind_out:
424 while (i--) {
425 gen8_free_page_tables(pt_pages[i]);
426 kfree(pt_pages[i]);
427 }
428
429 return ret;
bf2b4ed2
BW
430}
431
432static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
433{
434 int i;
435
436 for (i = 0; i < ppgtt->num_pd_pages; i++) {
437 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
438 sizeof(dma_addr_t),
439 GFP_KERNEL);
440 if (!ppgtt->gen8_pt_dma_addr[i])
441 return -ENOMEM;
442 }
443
444 return 0;
445}
446
447static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
448 const int max_pdp)
449{
450 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
451 if (!ppgtt->pd_pages)
452 return -ENOMEM;
453
454 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
455 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
456
457 return 0;
458}
459
460static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
461 const int max_pdp)
462{
463 int ret;
464
465 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
466 if (ret)
467 return ret;
468
469 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
470 if (ret) {
471 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
472 return ret;
473 }
474
475 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
476
477 ret = gen8_ppgtt_allocate_dma(ppgtt);
478 if (ret)
479 gen8_ppgtt_free(ppgtt);
480
481 return ret;
482}
483
484static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
485 const int pd)
486{
487 dma_addr_t pd_addr;
488 int ret;
489
490 pd_addr = pci_map_page(ppgtt->base.dev->pdev,
491 &ppgtt->pd_pages[pd], 0,
492 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
493
494 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
495 if (ret)
496 return ret;
497
498 ppgtt->pd_dma_addr[pd] = pd_addr;
499
500 return 0;
501}
502
503static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
504 const int pd,
505 const int pt)
506{
507 dma_addr_t pt_addr;
508 struct page *p;
509 int ret;
510
7ad47cf2 511 p = ppgtt->gen8_pt_pages[pd][pt];
bf2b4ed2
BW
512 pt_addr = pci_map_page(ppgtt->base.dev->pdev,
513 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
514 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
515 if (ret)
516 return ret;
517
518 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
519
520 return 0;
521}
522
37aca44a 523/**
f3a964b9
BW
524 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
525 * with a net effect resembling a 2-level page table in normal x86 terms. Each
526 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
527 * space.
37aca44a 528 *
f3a964b9
BW
529 * FIXME: split allocation into smaller pieces. For now we only ever do this
530 * once, but with full PPGTT, the multiple contiguous allocations will be bad.
37aca44a 531 * TODO: Do something with the size parameter
f3a964b9 532 */
37aca44a
BW
533static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
534{
37aca44a 535 const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
bf2b4ed2 536 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
f3a964b9 537 int i, j, ret;
37aca44a
BW
538
539 if (size % (1<<30))
540 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
541
bf2b4ed2
BW
542 /* 1. Do all our allocations for page directories and page tables. */
543 ret = gen8_ppgtt_alloc(ppgtt, max_pdp);
544 if (ret)
545 return ret;
f3a964b9 546
37aca44a 547 /*
bf2b4ed2 548 * 2. Create DMA mappings for the page directories and page tables.
37aca44a
BW
549 */
550 for (i = 0; i < max_pdp; i++) {
bf2b4ed2 551 ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
f3a964b9
BW
552 if (ret)
553 goto bail;
37aca44a 554
37aca44a 555 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
bf2b4ed2 556 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
f3a964b9
BW
557 if (ret)
558 goto bail;
37aca44a
BW
559 }
560 }
561
f3a964b9
BW
562 /*
563 * 3. Map all the page directory entires to point to the page tables
564 * we've allocated.
565 *
566 * For now, the PPGTT helper functions all require that the PDEs are
b1fe6673 567 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
f3a964b9
BW
568 * will never need to touch the PDEs again.
569 */
b1fe6673
BW
570 for (i = 0; i < max_pdp; i++) {
571 gen8_ppgtt_pde_t *pd_vaddr;
572 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
573 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
574 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
575 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
576 I915_CACHE_LLC);
577 }
578 kunmap_atomic(pd_vaddr);
579 }
580
f3a964b9
BW
581 ppgtt->enable = gen8_ppgtt_enable;
582 ppgtt->switch_mm = gen8_mm_switch;
583 ppgtt->base.clear_range = gen8_ppgtt_clear_range;
584 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
585 ppgtt->base.cleanup = gen8_ppgtt_cleanup;
586 ppgtt->base.start = 0;
5abbcca3 587 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
f3a964b9 588
5abbcca3 589 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
459108b8 590
37aca44a
BW
591 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
592 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
593 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
5abbcca3
BW
594 ppgtt->num_pd_entries,
595 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
28cf5415 596 return 0;
37aca44a 597
f3a964b9
BW
598bail:
599 gen8_ppgtt_unmap_pages(ppgtt);
600 gen8_ppgtt_free(ppgtt);
37aca44a
BW
601 return ret;
602}
603
87d60b63
BW
604static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
605{
606 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
607 struct i915_address_space *vm = &ppgtt->base;
608 gen6_gtt_pte_t __iomem *pd_addr;
609 gen6_gtt_pte_t scratch_pte;
610 uint32_t pd_entry;
611 int pte, pde;
612
613 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
614
615 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
616 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
617
618 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm,
619 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries);
620 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
621 u32 expected;
622 gen6_gtt_pte_t *pt_vaddr;
623 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde];
624 pd_entry = readl(pd_addr + pde);
625 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
626
627 if (pd_entry != expected)
628 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
629 pde,
630 pd_entry,
631 expected);
632 seq_printf(m, "\tPDE: %x\n", pd_entry);
633
634 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
635 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
636 unsigned long va =
637 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
638 (pte * PAGE_SIZE);
639 int i;
640 bool found = false;
641 for (i = 0; i < 4; i++)
642 if (pt_vaddr[pte + i] != scratch_pte)
643 found = true;
644 if (!found)
645 continue;
646
647 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
648 for (i = 0; i < 4; i++) {
649 if (pt_vaddr[pte + i] != scratch_pte)
650 seq_printf(m, " %08x", pt_vaddr[pte + i]);
651 else
652 seq_puts(m, " SCRATCH ");
653 }
654 seq_puts(m, "\n");
655 }
656 kunmap_atomic(pt_vaddr);
657 }
658}
659
3e302542 660static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
6197349b 661{
853ba5d2 662 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
6197349b
BW
663 gen6_gtt_pte_t __iomem *pd_addr;
664 uint32_t pd_entry;
665 int i;
666
0a732870 667 WARN_ON(ppgtt->pd_offset & 0x3f);
6197349b
BW
668 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
669 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
670 for (i = 0; i < ppgtt->num_pd_entries; i++) {
671 dma_addr_t pt_addr;
672
673 pt_addr = ppgtt->pt_dma_addr[i];
674 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
675 pd_entry |= GEN6_PDE_VALID;
676
677 writel(pd_entry, pd_addr + i);
678 }
679 readl(pd_addr);
3e302542
BW
680}
681
b4a74e3a 682static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
3e302542 683{
b4a74e3a
BW
684 BUG_ON(ppgtt->pd_offset & 0x3f);
685
686 return (ppgtt->pd_offset / 64) << 16;
687}
688
90252e5c
BW
689static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
690 struct intel_ring_buffer *ring,
691 bool synchronous)
692{
693 struct drm_device *dev = ppgtt->base.dev;
694 struct drm_i915_private *dev_priv = dev->dev_private;
695 int ret;
696
697 /* If we're in reset, we can assume the GPU is sufficiently idle to
698 * manually frob these bits. Ideally we could use the ring functions,
699 * except our error handling makes it quite difficult (can't use
700 * intel_ring_begin, ring->flush, or intel_ring_advance)
701 *
702 * FIXME: We should try not to special case reset
703 */
704 if (synchronous ||
705 i915_reset_in_progress(&dev_priv->gpu_error)) {
706 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
707 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
708 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
709 POSTING_READ(RING_PP_DIR_BASE(ring));
710 return 0;
711 }
712
713 /* NB: TLBs must be flushed and invalidated before a switch */
714 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
715 if (ret)
716 return ret;
717
718 ret = intel_ring_begin(ring, 6);
719 if (ret)
720 return ret;
721
722 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
723 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
724 intel_ring_emit(ring, PP_DIR_DCLV_2G);
725 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
726 intel_ring_emit(ring, get_pd_offset(ppgtt));
727 intel_ring_emit(ring, MI_NOOP);
728 intel_ring_advance(ring);
729
730 return 0;
731}
732
48a10389
BW
733static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
734 struct intel_ring_buffer *ring,
735 bool synchronous)
736{
737 struct drm_device *dev = ppgtt->base.dev;
738 struct drm_i915_private *dev_priv = dev->dev_private;
739 int ret;
740
741 /* If we're in reset, we can assume the GPU is sufficiently idle to
742 * manually frob these bits. Ideally we could use the ring functions,
743 * except our error handling makes it quite difficult (can't use
744 * intel_ring_begin, ring->flush, or intel_ring_advance)
745 *
746 * FIXME: We should try not to special case reset
747 */
748 if (synchronous ||
749 i915_reset_in_progress(&dev_priv->gpu_error)) {
750 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
751 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
752 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
753 POSTING_READ(RING_PP_DIR_BASE(ring));
754 return 0;
755 }
756
757 /* NB: TLBs must be flushed and invalidated before a switch */
758 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
759 if (ret)
760 return ret;
761
762 ret = intel_ring_begin(ring, 6);
763 if (ret)
764 return ret;
765
766 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
767 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
768 intel_ring_emit(ring, PP_DIR_DCLV_2G);
769 intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
770 intel_ring_emit(ring, get_pd_offset(ppgtt));
771 intel_ring_emit(ring, MI_NOOP);
772 intel_ring_advance(ring);
773
90252e5c
BW
774 /* XXX: RCS is the only one to auto invalidate the TLBs? */
775 if (ring->id != RCS) {
776 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
777 if (ret)
778 return ret;
779 }
780
48a10389
BW
781 return 0;
782}
783
eeb9488e
BW
784static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
785 struct intel_ring_buffer *ring,
786 bool synchronous)
787{
788 struct drm_device *dev = ppgtt->base.dev;
789 struct drm_i915_private *dev_priv = dev->dev_private;
790
48a10389
BW
791 if (!synchronous)
792 return 0;
793
eeb9488e
BW
794 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
795 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
796
797 POSTING_READ(RING_PP_DIR_DCLV(ring));
798
799 return 0;
800}
801
802static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
803{
804 struct drm_device *dev = ppgtt->base.dev;
805 struct drm_i915_private *dev_priv = dev->dev_private;
3e302542 806 struct intel_ring_buffer *ring;
eeb9488e 807 int j, ret;
3e302542 808
eeb9488e
BW
809 for_each_ring(ring, dev_priv, j) {
810 I915_WRITE(RING_MODE_GEN7(ring),
811 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
3e302542 812
d2ff7192
BW
813 /* We promise to do a switch later with FULL PPGTT. If this is
814 * aliasing, this is the one and only switch we'll do */
815 if (USES_FULL_PPGTT(dev))
816 continue;
6197349b 817
eeb9488e
BW
818 ret = ppgtt->switch_mm(ppgtt, ring, true);
819 if (ret)
820 goto err_out;
821 }
6197349b 822
eeb9488e 823 return 0;
6197349b 824
eeb9488e
BW
825err_out:
826 for_each_ring(ring, dev_priv, j)
827 I915_WRITE(RING_MODE_GEN7(ring),
828 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
829 return ret;
830}
6197349b 831
b4a74e3a 832static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
3e302542 833{
a3d67d23 834 struct drm_device *dev = ppgtt->base.dev;
50227e1c 835 struct drm_i915_private *dev_priv = dev->dev_private;
3e302542 836 struct intel_ring_buffer *ring;
b4a74e3a 837 uint32_t ecochk, ecobits;
3e302542 838 int i;
6197349b 839
b4a74e3a
BW
840 ecobits = I915_READ(GAC_ECO_BITS);
841 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
a65c2fcd 842
b4a74e3a
BW
843 ecochk = I915_READ(GAM_ECOCHK);
844 if (IS_HASWELL(dev)) {
845 ecochk |= ECOCHK_PPGTT_WB_HSW;
846 } else {
847 ecochk |= ECOCHK_PPGTT_LLC_IVB;
848 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
849 }
850 I915_WRITE(GAM_ECOCHK, ecochk);
a65c2fcd 851
b4a74e3a 852 for_each_ring(ring, dev_priv, i) {
eeb9488e 853 int ret;
6197349b 854 /* GFX_MODE is per-ring on gen7+ */
b4a74e3a
BW
855 I915_WRITE(RING_MODE_GEN7(ring),
856 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
d2ff7192
BW
857
858 /* We promise to do a switch later with FULL PPGTT. If this is
859 * aliasing, this is the one and only switch we'll do */
860 if (USES_FULL_PPGTT(dev))
861 continue;
862
eeb9488e
BW
863 ret = ppgtt->switch_mm(ppgtt, ring, true);
864 if (ret)
865 return ret;
6197349b
BW
866 }
867
b4a74e3a
BW
868 return 0;
869}
6197349b 870
b4a74e3a
BW
871static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
872{
873 struct drm_device *dev = ppgtt->base.dev;
50227e1c 874 struct drm_i915_private *dev_priv = dev->dev_private;
b4a74e3a
BW
875 struct intel_ring_buffer *ring;
876 uint32_t ecochk, gab_ctl, ecobits;
877 int i;
a65c2fcd 878
b4a74e3a
BW
879 ecobits = I915_READ(GAC_ECO_BITS);
880 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
881 ECOBITS_PPGTT_CACHE64B);
6197349b 882
b4a74e3a
BW
883 gab_ctl = I915_READ(GAB_CTL);
884 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
885
886 ecochk = I915_READ(GAM_ECOCHK);
887 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
888
889 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
6197349b 890
b4a74e3a 891 for_each_ring(ring, dev_priv, i) {
eeb9488e
BW
892 int ret = ppgtt->switch_mm(ppgtt, ring, true);
893 if (ret)
894 return ret;
6197349b 895 }
b4a74e3a 896
b7c36d25 897 return 0;
6197349b
BW
898}
899
1d2a314c 900/* PPGTT support for Sandybdrige/Gen6 and later */
853ba5d2 901static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
782f1495
BW
902 uint64_t start,
903 uint64_t length,
828c7908 904 bool use_scratch)
1d2a314c 905{
853ba5d2
BW
906 struct i915_hw_ppgtt *ppgtt =
907 container_of(vm, struct i915_hw_ppgtt, base);
e7c2b58b 908 gen6_gtt_pte_t *pt_vaddr, scratch_pte;
782f1495
BW
909 unsigned first_entry = start >> PAGE_SHIFT;
910 unsigned num_entries = length >> PAGE_SHIFT;
a15326a5 911 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
7bddb01f
DV
912 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
913 unsigned last_pte, i;
1d2a314c 914
b35b380e 915 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
1d2a314c 916
7bddb01f
DV
917 while (num_entries) {
918 last_pte = first_pte + num_entries;
919 if (last_pte > I915_PPGTT_PT_ENTRIES)
920 last_pte = I915_PPGTT_PT_ENTRIES;
921
a15326a5 922 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
1d2a314c 923
7bddb01f
DV
924 for (i = first_pte; i < last_pte; i++)
925 pt_vaddr[i] = scratch_pte;
1d2a314c
DV
926
927 kunmap_atomic(pt_vaddr);
1d2a314c 928
7bddb01f
DV
929 num_entries -= last_pte - first_pte;
930 first_pte = 0;
a15326a5 931 act_pt++;
7bddb01f 932 }
1d2a314c
DV
933}
934
853ba5d2 935static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
def886c3 936 struct sg_table *pages,
782f1495 937 uint64_t start,
def886c3
DV
938 enum i915_cache_level cache_level)
939{
853ba5d2
BW
940 struct i915_hw_ppgtt *ppgtt =
941 container_of(vm, struct i915_hw_ppgtt, base);
e7c2b58b 942 gen6_gtt_pte_t *pt_vaddr;
782f1495 943 unsigned first_entry = start >> PAGE_SHIFT;
a15326a5 944 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
6e995e23
ID
945 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
946 struct sg_page_iter sg_iter;
947
cc79714f 948 pt_vaddr = NULL;
6e995e23 949 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
cc79714f
CW
950 if (pt_vaddr == NULL)
951 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
6e995e23 952
cc79714f
CW
953 pt_vaddr[act_pte] =
954 vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
955 cache_level, true);
6e995e23
ID
956 if (++act_pte == I915_PPGTT_PT_ENTRIES) {
957 kunmap_atomic(pt_vaddr);
cc79714f 958 pt_vaddr = NULL;
a15326a5 959 act_pt++;
6e995e23 960 act_pte = 0;
def886c3 961 }
def886c3 962 }
cc79714f
CW
963 if (pt_vaddr)
964 kunmap_atomic(pt_vaddr);
def886c3
DV
965}
966
a00d825d 967static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
1d2a314c 968{
3440d265
DV
969 int i;
970
971 if (ppgtt->pt_dma_addr) {
972 for (i = 0; i < ppgtt->num_pd_entries; i++)
853ba5d2 973 pci_unmap_page(ppgtt->base.dev->pdev,
3440d265
DV
974 ppgtt->pt_dma_addr[i],
975 4096, PCI_DMA_BIDIRECTIONAL);
976 }
a00d825d
BW
977}
978
979static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
980{
981 int i;
3440d265
DV
982
983 kfree(ppgtt->pt_dma_addr);
984 for (i = 0; i < ppgtt->num_pd_entries; i++)
985 __free_page(ppgtt->pt_pages[i]);
986 kfree(ppgtt->pt_pages);
3440d265
DV
987}
988
a00d825d
BW
989static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
990{
991 struct i915_hw_ppgtt *ppgtt =
992 container_of(vm, struct i915_hw_ppgtt, base);
993
994 list_del(&vm->global_link);
995 drm_mm_takedown(&ppgtt->base.mm);
996 drm_mm_remove_node(&ppgtt->node);
997
998 gen6_ppgtt_unmap_pages(ppgtt);
999 gen6_ppgtt_free(ppgtt);
1000}
1001
b146520f 1002static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
3440d265 1003{
853ba5d2 1004 struct drm_device *dev = ppgtt->base.dev;
1d2a314c 1005 struct drm_i915_private *dev_priv = dev->dev_private;
e3cc1995 1006 bool retried = false;
b146520f 1007 int ret;
1d2a314c 1008
c8d4c0d6
BW
1009 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1010 * allocator works in address space sizes, so it's multiplied by page
1011 * size. We allocate at the top of the GTT to avoid fragmentation.
1012 */
1013 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
e3cc1995 1014alloc:
c8d4c0d6
BW
1015 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
1016 &ppgtt->node, GEN6_PD_SIZE,
1017 GEN6_PD_ALIGN, 0,
1018 0, dev_priv->gtt.base.total,
1019 DRM_MM_SEARCH_DEFAULT);
e3cc1995
BW
1020 if (ret == -ENOSPC && !retried) {
1021 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
1022 GEN6_PD_SIZE, GEN6_PD_ALIGN,
d47c3ea2 1023 I915_CACHE_NONE, 0);
e3cc1995
BW
1024 if (ret)
1025 return ret;
1026
1027 retried = true;
1028 goto alloc;
1029 }
c8d4c0d6
BW
1030
1031 if (ppgtt->node.start < dev_priv->gtt.mappable_end)
1032 DRM_DEBUG("Forced to use aperture for PDEs\n");
1d2a314c 1033
6670a5a5 1034 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
b146520f
BW
1035 return ret;
1036}
1037
1038static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
1039{
1040 int i;
1041
a1e22653 1042 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
1d2a314c 1043 GFP_KERNEL);
b146520f
BW
1044
1045 if (!ppgtt->pt_pages)
3440d265 1046 return -ENOMEM;
1d2a314c
DV
1047
1048 for (i = 0; i < ppgtt->num_pd_entries; i++) {
1049 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
b146520f
BW
1050 if (!ppgtt->pt_pages[i]) {
1051 gen6_ppgtt_free(ppgtt);
1052 return -ENOMEM;
1053 }
1054 }
1055
1056 return 0;
1057}
1058
1059static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
1060{
1061 int ret;
1062
1063 ret = gen6_ppgtt_allocate_page_directories(ppgtt);
1064 if (ret)
1065 return ret;
1066
1067 ret = gen6_ppgtt_allocate_page_tables(ppgtt);
1068 if (ret) {
1069 drm_mm_remove_node(&ppgtt->node);
1070 return ret;
1d2a314c
DV
1071 }
1072
a1e22653 1073 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
8d2e6308 1074 GFP_KERNEL);
b146520f
BW
1075 if (!ppgtt->pt_dma_addr) {
1076 drm_mm_remove_node(&ppgtt->node);
1077 gen6_ppgtt_free(ppgtt);
1078 return -ENOMEM;
1079 }
1080
1081 return 0;
1082}
1083
1084static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
1085{
1086 struct drm_device *dev = ppgtt->base.dev;
1087 int i;
1d2a314c 1088
8d2e6308
BW
1089 for (i = 0; i < ppgtt->num_pd_entries; i++) {
1090 dma_addr_t pt_addr;
211c568b 1091
8d2e6308
BW
1092 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
1093 PCI_DMA_BIDIRECTIONAL);
1d2a314c 1094
8d2e6308 1095 if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
b146520f
BW
1096 gen6_ppgtt_unmap_pages(ppgtt);
1097 return -EIO;
211c568b 1098 }
b146520f 1099
8d2e6308 1100 ppgtt->pt_dma_addr[i] = pt_addr;
1d2a314c 1101 }
1d2a314c 1102
b146520f
BW
1103 return 0;
1104}
1105
1106static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1107{
1108 struct drm_device *dev = ppgtt->base.dev;
1109 struct drm_i915_private *dev_priv = dev->dev_private;
1110 int ret;
1111
1112 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
1113 if (IS_GEN6(dev)) {
1114 ppgtt->enable = gen6_ppgtt_enable;
1115 ppgtt->switch_mm = gen6_mm_switch;
1116 } else if (IS_HASWELL(dev)) {
1117 ppgtt->enable = gen7_ppgtt_enable;
1118 ppgtt->switch_mm = hsw_mm_switch;
1119 } else if (IS_GEN7(dev)) {
1120 ppgtt->enable = gen7_ppgtt_enable;
1121 ppgtt->switch_mm = gen7_mm_switch;
1122 } else
1123 BUG();
1124
1125 ret = gen6_ppgtt_alloc(ppgtt);
1126 if (ret)
1127 return ret;
1128
1129 ret = gen6_ppgtt_setup_page_tables(ppgtt);
1130 if (ret) {
1131 gen6_ppgtt_free(ppgtt);
1132 return ret;
1133 }
1134
1135 ppgtt->base.clear_range = gen6_ppgtt_clear_range;
1136 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1137 ppgtt->base.cleanup = gen6_ppgtt_cleanup;
b146520f 1138 ppgtt->base.start = 0;
5a6c93fe 1139 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
87d60b63 1140 ppgtt->debug_dump = gen6_dump_ppgtt;
1d2a314c 1141
c8d4c0d6
BW
1142 ppgtt->pd_offset =
1143 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
1d2a314c 1144
b146520f 1145 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
1d2a314c 1146
b146520f
BW
1147 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
1148 ppgtt->node.size >> 20,
1149 ppgtt->node.start / PAGE_SIZE);
3440d265 1150
b146520f 1151 return 0;
3440d265
DV
1152}
1153
246cbfb5 1154int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
3440d265
DV
1155{
1156 struct drm_i915_private *dev_priv = dev->dev_private;
d6660add 1157 int ret = 0;
3440d265 1158
853ba5d2 1159 ppgtt->base.dev = dev;
8407bb91 1160 ppgtt->base.scratch = dev_priv->gtt.base.scratch;
3440d265 1161
3ed124b2
BW
1162 if (INTEL_INFO(dev)->gen < 8)
1163 ret = gen6_ppgtt_init(ppgtt);
8fe6bd23 1164 else if (IS_GEN8(dev))
37aca44a 1165 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
3ed124b2
BW
1166 else
1167 BUG();
1168
c7c48dfd 1169 if (!ret) {
7e0d96bc 1170 struct drm_i915_private *dev_priv = dev->dev_private;
c7c48dfd 1171 kref_init(&ppgtt->ref);
93bd8649
BW
1172 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
1173 ppgtt->base.total);
7e0d96bc
BW
1174 i915_init_vm(dev_priv, &ppgtt->base);
1175 if (INTEL_INFO(dev)->gen < 8) {
9f273d48 1176 gen6_write_pdes(ppgtt);
7e0d96bc
BW
1177 DRM_DEBUG("Adding PPGTT at offset %x\n",
1178 ppgtt->pd_offset << 10);
1179 }
93bd8649 1180 }
1d2a314c
DV
1181
1182 return ret;
1183}
1184
7e0d96bc 1185static void
6f65e29a
BW
1186ppgtt_bind_vma(struct i915_vma *vma,
1187 enum i915_cache_level cache_level,
1188 u32 flags)
1d2a314c 1189{
782f1495
BW
1190 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
1191 cache_level);
1d2a314c
DV
1192}
1193
7e0d96bc 1194static void ppgtt_unbind_vma(struct i915_vma *vma)
7bddb01f 1195{
6f65e29a 1196 vma->vm->clear_range(vma->vm,
782f1495
BW
1197 vma->node.start,
1198 vma->obj->base.size,
6f65e29a 1199 true);
7bddb01f
DV
1200}
1201
a81cc00c
BW
1202extern int intel_iommu_gfx_mapped;
1203/* Certain Gen5 chipsets require require idling the GPU before
1204 * unmapping anything from the GTT when VT-d is enabled.
1205 */
1206static inline bool needs_idle_maps(struct drm_device *dev)
1207{
1208#ifdef CONFIG_INTEL_IOMMU
1209 /* Query intel_iommu to see if we need the workaround. Presumably that
1210 * was loaded first.
1211 */
1212 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
1213 return true;
1214#endif
1215 return false;
1216}
1217
5c042287
BW
1218static bool do_idling(struct drm_i915_private *dev_priv)
1219{
1220 bool ret = dev_priv->mm.interruptible;
1221
a81cc00c 1222 if (unlikely(dev_priv->gtt.do_idle_maps)) {
5c042287 1223 dev_priv->mm.interruptible = false;
b2da9fe5 1224 if (i915_gpu_idle(dev_priv->dev)) {
5c042287
BW
1225 DRM_ERROR("Couldn't idle GPU\n");
1226 /* Wait a bit, in hopes it avoids the hang */
1227 udelay(10);
1228 }
1229 }
1230
1231 return ret;
1232}
1233
1234static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
1235{
a81cc00c 1236 if (unlikely(dev_priv->gtt.do_idle_maps))
5c042287
BW
1237 dev_priv->mm.interruptible = interruptible;
1238}
1239
828c7908
BW
1240void i915_check_and_clear_faults(struct drm_device *dev)
1241{
1242 struct drm_i915_private *dev_priv = dev->dev_private;
1243 struct intel_ring_buffer *ring;
1244 int i;
1245
1246 if (INTEL_INFO(dev)->gen < 6)
1247 return;
1248
1249 for_each_ring(ring, dev_priv, i) {
1250 u32 fault_reg;
1251 fault_reg = I915_READ(RING_FAULT_REG(ring));
1252 if (fault_reg & RING_FAULT_VALID) {
1253 DRM_DEBUG_DRIVER("Unexpected fault\n"
1254 "\tAddr: 0x%08lx\\n"
1255 "\tAddress space: %s\n"
1256 "\tSource ID: %d\n"
1257 "\tType: %d\n",
1258 fault_reg & PAGE_MASK,
1259 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
1260 RING_FAULT_SRCID(fault_reg),
1261 RING_FAULT_FAULT_TYPE(fault_reg));
1262 I915_WRITE(RING_FAULT_REG(ring),
1263 fault_reg & ~RING_FAULT_VALID);
1264 }
1265 }
1266 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
1267}
1268
1269void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
1270{
1271 struct drm_i915_private *dev_priv = dev->dev_private;
1272
1273 /* Don't bother messing with faults pre GEN6 as we have little
1274 * documentation supporting that it's a good idea.
1275 */
1276 if (INTEL_INFO(dev)->gen < 6)
1277 return;
1278
1279 i915_check_and_clear_faults(dev);
1280
1281 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
782f1495
BW
1282 dev_priv->gtt.base.start,
1283 dev_priv->gtt.base.total,
e568af1c 1284 true);
828c7908
BW
1285}
1286
76aaf220
DV
1287void i915_gem_restore_gtt_mappings(struct drm_device *dev)
1288{
1289 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 1290 struct drm_i915_gem_object *obj;
80da2161 1291 struct i915_address_space *vm;
76aaf220 1292
828c7908
BW
1293 i915_check_and_clear_faults(dev);
1294
bee4a186 1295 /* First fill our portion of the GTT with scratch pages */
853ba5d2 1296 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
782f1495
BW
1297 dev_priv->gtt.base.start,
1298 dev_priv->gtt.base.total,
828c7908 1299 true);
bee4a186 1300
35c20a60 1301 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
6f65e29a
BW
1302 struct i915_vma *vma = i915_gem_obj_to_vma(obj,
1303 &dev_priv->gtt.base);
1304 if (!vma)
1305 continue;
1306
2c22569b 1307 i915_gem_clflush_object(obj, obj->pin_display);
6f65e29a
BW
1308 /* The bind_vma code tries to be smart about tracking mappings.
1309 * Unfortunately above, we've just wiped out the mappings
1310 * without telling our object about it. So we need to fake it.
1311 */
1312 obj->has_global_gtt_mapping = 0;
1313 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
76aaf220
DV
1314 }
1315
80da2161 1316
a2319c08
BW
1317 if (INTEL_INFO(dev)->gen >= 8) {
1318 gen8_setup_private_ppat(dev_priv);
80da2161 1319 return;
a2319c08 1320 }
80da2161
BW
1321
1322 list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
1323 /* TODO: Perhaps it shouldn't be gen6 specific */
1324 if (i915_is_ggtt(vm)) {
1325 if (dev_priv->mm.aliasing_ppgtt)
1326 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
1327 continue;
1328 }
1329
1330 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
76aaf220
DV
1331 }
1332
e76e9aeb 1333 i915_gem_chipset_flush(dev);
76aaf220 1334}
7c2e6fdf 1335
74163907 1336int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
7c2e6fdf 1337{
9da3da66 1338 if (obj->has_dma_mapping)
74163907 1339 return 0;
9da3da66
CW
1340
1341 if (!dma_map_sg(&obj->base.dev->pdev->dev,
1342 obj->pages->sgl, obj->pages->nents,
1343 PCI_DMA_BIDIRECTIONAL))
1344 return -ENOSPC;
1345
1346 return 0;
7c2e6fdf
DV
1347}
1348
94ec8f61
BW
1349static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
1350{
1351#ifdef writeq
1352 writeq(pte, addr);
1353#else
1354 iowrite32((u32)pte, addr);
1355 iowrite32(pte >> 32, addr + 4);
1356#endif
1357}
1358
1359static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
1360 struct sg_table *st,
782f1495 1361 uint64_t start,
94ec8f61
BW
1362 enum i915_cache_level level)
1363{
1364 struct drm_i915_private *dev_priv = vm->dev->dev_private;
782f1495 1365 unsigned first_entry = start >> PAGE_SHIFT;
94ec8f61
BW
1366 gen8_gtt_pte_t __iomem *gtt_entries =
1367 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1368 int i = 0;
1369 struct sg_page_iter sg_iter;
1370 dma_addr_t addr;
1371
1372 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1373 addr = sg_dma_address(sg_iter.sg) +
1374 (sg_iter.sg_pgoffset << PAGE_SHIFT);
1375 gen8_set_pte(&gtt_entries[i],
1376 gen8_pte_encode(addr, level, true));
1377 i++;
1378 }
1379
1380 /*
1381 * XXX: This serves as a posting read to make sure that the PTE has
1382 * actually been updated. There is some concern that even though
1383 * registers and PTEs are within the same BAR that they are potentially
1384 * of NUMA access patterns. Therefore, even with the way we assume
1385 * hardware should work, we must keep this posting read for paranoia.
1386 */
1387 if (i != 0)
1388 WARN_ON(readq(&gtt_entries[i-1])
1389 != gen8_pte_encode(addr, level, true));
1390
94ec8f61
BW
1391 /* This next bit makes the above posting read even more important. We
1392 * want to flush the TLBs only after we're certain all the PTE updates
1393 * have finished.
1394 */
1395 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1396 POSTING_READ(GFX_FLSH_CNTL_GEN6);
94ec8f61
BW
1397}
1398
e76e9aeb
BW
1399/*
1400 * Binds an object into the global gtt with the specified cache level. The object
1401 * will be accessible to the GPU via commands whose operands reference offsets
1402 * within the global GTT as well as accessible by the GPU through the GMADR
1403 * mapped BAR (dev_priv->mm.gtt->gtt).
1404 */
853ba5d2 1405static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
7faf1ab2 1406 struct sg_table *st,
782f1495 1407 uint64_t start,
7faf1ab2 1408 enum i915_cache_level level)
e76e9aeb 1409{
853ba5d2 1410 struct drm_i915_private *dev_priv = vm->dev->dev_private;
782f1495 1411 unsigned first_entry = start >> PAGE_SHIFT;
e7c2b58b
BW
1412 gen6_gtt_pte_t __iomem *gtt_entries =
1413 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
6e995e23
ID
1414 int i = 0;
1415 struct sg_page_iter sg_iter;
e76e9aeb
BW
1416 dma_addr_t addr;
1417
6e995e23 1418 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2db76d7c 1419 addr = sg_page_iter_dma_address(&sg_iter);
b35b380e 1420 iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
6e995e23 1421 i++;
e76e9aeb
BW
1422 }
1423
e76e9aeb
BW
1424 /* XXX: This serves as a posting read to make sure that the PTE has
1425 * actually been updated. There is some concern that even though
1426 * registers and PTEs are within the same BAR that they are potentially
1427 * of NUMA access patterns. Therefore, even with the way we assume
1428 * hardware should work, we must keep this posting read for paranoia.
1429 */
1430 if (i != 0)
853ba5d2 1431 WARN_ON(readl(&gtt_entries[i-1]) !=
b35b380e 1432 vm->pte_encode(addr, level, true));
0f9b91c7
BW
1433
1434 /* This next bit makes the above posting read even more important. We
1435 * want to flush the TLBs only after we're certain all the PTE updates
1436 * have finished.
1437 */
1438 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
1439 POSTING_READ(GFX_FLSH_CNTL_GEN6);
e76e9aeb
BW
1440}
1441
94ec8f61 1442static void gen8_ggtt_clear_range(struct i915_address_space *vm,
782f1495
BW
1443 uint64_t start,
1444 uint64_t length,
94ec8f61
BW
1445 bool use_scratch)
1446{
1447 struct drm_i915_private *dev_priv = vm->dev->dev_private;
782f1495
BW
1448 unsigned first_entry = start >> PAGE_SHIFT;
1449 unsigned num_entries = length >> PAGE_SHIFT;
94ec8f61
BW
1450 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
1451 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1452 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1453 int i;
1454
1455 if (WARN(num_entries > max_entries,
1456 "First entry = %d; Num entries = %d (max=%d)\n",
1457 first_entry, num_entries, max_entries))
1458 num_entries = max_entries;
1459
1460 scratch_pte = gen8_pte_encode(vm->scratch.addr,
1461 I915_CACHE_LLC,
1462 use_scratch);
1463 for (i = 0; i < num_entries; i++)
1464 gen8_set_pte(&gtt_base[i], scratch_pte);
1465 readl(gtt_base);
1466}
1467
853ba5d2 1468static void gen6_ggtt_clear_range(struct i915_address_space *vm,
782f1495
BW
1469 uint64_t start,
1470 uint64_t length,
828c7908 1471 bool use_scratch)
7faf1ab2 1472{
853ba5d2 1473 struct drm_i915_private *dev_priv = vm->dev->dev_private;
782f1495
BW
1474 unsigned first_entry = start >> PAGE_SHIFT;
1475 unsigned num_entries = length >> PAGE_SHIFT;
e7c2b58b
BW
1476 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
1477 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
a54c0c27 1478 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
7faf1ab2
DV
1479 int i;
1480
1481 if (WARN(num_entries > max_entries,
1482 "First entry = %d; Num entries = %d (max=%d)\n",
1483 first_entry, num_entries, max_entries))
1484 num_entries = max_entries;
1485
828c7908
BW
1486 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
1487
7faf1ab2
DV
1488 for (i = 0; i < num_entries; i++)
1489 iowrite32(scratch_pte, &gtt_base[i]);
1490 readl(gtt_base);
1491}
1492
6f65e29a
BW
1493
1494static void i915_ggtt_bind_vma(struct i915_vma *vma,
1495 enum i915_cache_level cache_level,
1496 u32 unused)
7faf1ab2 1497{
6f65e29a 1498 const unsigned long entry = vma->node.start >> PAGE_SHIFT;
7faf1ab2
DV
1499 unsigned int flags = (cache_level == I915_CACHE_NONE) ?
1500 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
1501
6f65e29a
BW
1502 BUG_ON(!i915_is_ggtt(vma->vm));
1503 intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
1504 vma->obj->has_global_gtt_mapping = 1;
7faf1ab2
DV
1505}
1506
853ba5d2 1507static void i915_ggtt_clear_range(struct i915_address_space *vm,
782f1495
BW
1508 uint64_t start,
1509 uint64_t length,
828c7908 1510 bool unused)
7faf1ab2 1511{
782f1495
BW
1512 unsigned first_entry = start >> PAGE_SHIFT;
1513 unsigned num_entries = length >> PAGE_SHIFT;
7faf1ab2
DV
1514 intel_gtt_clear_range(first_entry, num_entries);
1515}
1516
6f65e29a
BW
1517static void i915_ggtt_unbind_vma(struct i915_vma *vma)
1518{
1519 const unsigned int first = vma->node.start >> PAGE_SHIFT;
1520 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
7faf1ab2 1521
6f65e29a
BW
1522 BUG_ON(!i915_is_ggtt(vma->vm));
1523 vma->obj->has_global_gtt_mapping = 0;
1524 intel_gtt_clear_range(first, size);
1525}
7faf1ab2 1526
6f65e29a
BW
1527static void ggtt_bind_vma(struct i915_vma *vma,
1528 enum i915_cache_level cache_level,
1529 u32 flags)
d5bd1449 1530{
6f65e29a 1531 struct drm_device *dev = vma->vm->dev;
7faf1ab2 1532 struct drm_i915_private *dev_priv = dev->dev_private;
6f65e29a 1533 struct drm_i915_gem_object *obj = vma->obj;
7faf1ab2 1534
6f65e29a
BW
1535 /* If there is no aliasing PPGTT, or the caller needs a global mapping,
1536 * or we have a global mapping already but the cacheability flags have
1537 * changed, set the global PTEs.
1538 *
1539 * If there is an aliasing PPGTT it is anecdotally faster, so use that
1540 * instead if none of the above hold true.
1541 *
1542 * NB: A global mapping should only be needed for special regions like
1543 * "gtt mappable", SNB errata, or if specified via special execbuf
1544 * flags. At all other times, the GPU will use the aliasing PPGTT.
1545 */
1546 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
1547 if (!obj->has_global_gtt_mapping ||
1548 (cache_level != obj->cache_level)) {
782f1495
BW
1549 vma->vm->insert_entries(vma->vm, obj->pages,
1550 vma->node.start,
6f65e29a
BW
1551 cache_level);
1552 obj->has_global_gtt_mapping = 1;
1553 }
1554 }
d5bd1449 1555
6f65e29a
BW
1556 if (dev_priv->mm.aliasing_ppgtt &&
1557 (!obj->has_aliasing_ppgtt_mapping ||
1558 (cache_level != obj->cache_level))) {
1559 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1560 appgtt->base.insert_entries(&appgtt->base,
782f1495
BW
1561 vma->obj->pages,
1562 vma->node.start,
1563 cache_level);
6f65e29a
BW
1564 vma->obj->has_aliasing_ppgtt_mapping = 1;
1565 }
d5bd1449
CW
1566}
1567
6f65e29a 1568static void ggtt_unbind_vma(struct i915_vma *vma)
74163907 1569{
6f65e29a 1570 struct drm_device *dev = vma->vm->dev;
7faf1ab2 1571 struct drm_i915_private *dev_priv = dev->dev_private;
6f65e29a 1572 struct drm_i915_gem_object *obj = vma->obj;
6f65e29a
BW
1573
1574 if (obj->has_global_gtt_mapping) {
782f1495
BW
1575 vma->vm->clear_range(vma->vm,
1576 vma->node.start,
1577 obj->base.size,
6f65e29a
BW
1578 true);
1579 obj->has_global_gtt_mapping = 0;
1580 }
74898d7e 1581
6f65e29a
BW
1582 if (obj->has_aliasing_ppgtt_mapping) {
1583 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1584 appgtt->base.clear_range(&appgtt->base,
782f1495
BW
1585 vma->node.start,
1586 obj->base.size,
6f65e29a
BW
1587 true);
1588 obj->has_aliasing_ppgtt_mapping = 0;
1589 }
74163907
DV
1590}
1591
1592void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
7c2e6fdf 1593{
5c042287
BW
1594 struct drm_device *dev = obj->base.dev;
1595 struct drm_i915_private *dev_priv = dev->dev_private;
1596 bool interruptible;
1597
1598 interruptible = do_idling(dev_priv);
1599
9da3da66
CW
1600 if (!obj->has_dma_mapping)
1601 dma_unmap_sg(&dev->pdev->dev,
1602 obj->pages->sgl, obj->pages->nents,
1603 PCI_DMA_BIDIRECTIONAL);
5c042287
BW
1604
1605 undo_idling(dev_priv, interruptible);
7c2e6fdf 1606}
644ec02b 1607
42d6ab48
CW
1608static void i915_gtt_color_adjust(struct drm_mm_node *node,
1609 unsigned long color,
1610 unsigned long *start,
1611 unsigned long *end)
1612{
1613 if (node->color != color)
1614 *start += 4096;
1615
1616 if (!list_empty(&node->node_list)) {
1617 node = list_entry(node->node_list.next,
1618 struct drm_mm_node,
1619 node_list);
1620 if (node->allocated && node->color != color)
1621 *end -= 4096;
1622 }
1623}
fbe5d36e 1624
d7e5008f
BW
1625void i915_gem_setup_global_gtt(struct drm_device *dev,
1626 unsigned long start,
1627 unsigned long mappable_end,
1628 unsigned long end)
644ec02b 1629{
e78891ca
BW
1630 /* Let GEM Manage all of the aperture.
1631 *
1632 * However, leave one page at the end still bound to the scratch page.
1633 * There are a number of places where the hardware apparently prefetches
1634 * past the end of the object, and we've seen multiple hangs with the
1635 * GPU head pointer stuck in a batchbuffer bound at the last page of the
1636 * aperture. One page should be enough to keep any prefetching inside
1637 * of the aperture.
1638 */
40d74980
BW
1639 struct drm_i915_private *dev_priv = dev->dev_private;
1640 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
ed2f3452
CW
1641 struct drm_mm_node *entry;
1642 struct drm_i915_gem_object *obj;
1643 unsigned long hole_start, hole_end;
644ec02b 1644
35451cb6
BW
1645 BUG_ON(mappable_end > end);
1646
ed2f3452 1647 /* Subtract the guard page ... */
40d74980 1648 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
42d6ab48 1649 if (!HAS_LLC(dev))
93bd8649 1650 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
644ec02b 1651
ed2f3452 1652 /* Mark any preallocated objects as occupied */
35c20a60 1653 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
40d74980 1654 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
b3a070cc 1655 int ret;
edd41a87 1656 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
c6cfb325
BW
1657 i915_gem_obj_ggtt_offset(obj), obj->base.size);
1658
1659 WARN_ON(i915_gem_obj_ggtt_bound(obj));
40d74980 1660 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
c6cfb325 1661 if (ret)
b3a070cc 1662 DRM_DEBUG_KMS("Reservation failed\n");
ed2f3452
CW
1663 obj->has_global_gtt_mapping = 1;
1664 }
1665
853ba5d2
BW
1666 dev_priv->gtt.base.start = start;
1667 dev_priv->gtt.base.total = end - start;
644ec02b 1668
ed2f3452 1669 /* Clear any non-preallocated blocks */
40d74980 1670 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
ed2f3452
CW
1671 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
1672 hole_start, hole_end);
782f1495
BW
1673 ggtt_vm->clear_range(ggtt_vm, hole_start,
1674 hole_end - hole_start, true);
ed2f3452
CW
1675 }
1676
1677 /* And finally clear the reserved guard page */
782f1495 1678 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
e76e9aeb
BW
1679}
1680
d7e5008f
BW
1681void i915_gem_init_global_gtt(struct drm_device *dev)
1682{
1683 struct drm_i915_private *dev_priv = dev->dev_private;
1684 unsigned long gtt_size, mappable_size;
d7e5008f 1685
853ba5d2 1686 gtt_size = dev_priv->gtt.base.total;
93d18799 1687 mappable_size = dev_priv->gtt.mappable_end;
d7e5008f 1688
e78891ca 1689 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
e76e9aeb
BW
1690}
1691
1692static int setup_scratch_page(struct drm_device *dev)
1693{
1694 struct drm_i915_private *dev_priv = dev->dev_private;
1695 struct page *page;
1696 dma_addr_t dma_addr;
1697
1698 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
1699 if (page == NULL)
1700 return -ENOMEM;
1701 get_page(page);
1702 set_pages_uc(page, 1);
1703
1704#ifdef CONFIG_INTEL_IOMMU
1705 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
1706 PCI_DMA_BIDIRECTIONAL);
1707 if (pci_dma_mapping_error(dev->pdev, dma_addr))
1708 return -EINVAL;
1709#else
1710 dma_addr = page_to_phys(page);
1711#endif
853ba5d2
BW
1712 dev_priv->gtt.base.scratch.page = page;
1713 dev_priv->gtt.base.scratch.addr = dma_addr;
e76e9aeb
BW
1714
1715 return 0;
1716}
1717
1718static void teardown_scratch_page(struct drm_device *dev)
1719{
1720 struct drm_i915_private *dev_priv = dev->dev_private;
853ba5d2
BW
1721 struct page *page = dev_priv->gtt.base.scratch.page;
1722
1723 set_pages_wb(page, 1);
1724 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
e76e9aeb 1725 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
853ba5d2
BW
1726 put_page(page);
1727 __free_page(page);
e76e9aeb
BW
1728}
1729
1730static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
1731{
1732 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1733 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1734 return snb_gmch_ctl << 20;
1735}
1736
9459d252
BW
1737static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
1738{
1739 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
1740 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
1741 if (bdw_gmch_ctl)
1742 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
1743 return bdw_gmch_ctl << 20;
1744}
1745
baa09f5f 1746static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
e76e9aeb
BW
1747{
1748 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
1749 snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
1750 return snb_gmch_ctl << 25; /* 32 MB units */
1751}
1752
9459d252
BW
1753static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
1754{
1755 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
1756 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
1757 return bdw_gmch_ctl << 25; /* 32 MB units */
1758}
1759
63340133
BW
1760static int ggtt_probe_common(struct drm_device *dev,
1761 size_t gtt_size)
1762{
1763 struct drm_i915_private *dev_priv = dev->dev_private;
21c34607 1764 phys_addr_t gtt_phys_addr;
63340133
BW
1765 int ret;
1766
1767 /* For Modern GENs the PTEs and register space are split in the BAR */
21c34607 1768 gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
63340133
BW
1769 (pci_resource_len(dev->pdev, 0) / 2);
1770
21c34607 1771 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
63340133
BW
1772 if (!dev_priv->gtt.gsm) {
1773 DRM_ERROR("Failed to map the gtt page table\n");
1774 return -ENOMEM;
1775 }
1776
1777 ret = setup_scratch_page(dev);
1778 if (ret) {
1779 DRM_ERROR("Scratch setup failed\n");
1780 /* iounmap will also get called at remove, but meh */
1781 iounmap(dev_priv->gtt.gsm);
1782 }
1783
1784 return ret;
1785}
1786
fbe5d36e
BW
1787/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
1788 * bits. When using advanced contexts each context stores its own PAT, but
1789 * writing this data shouldn't be harmful even in those cases. */
1790static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
1791{
fbe5d36e
BW
1792 uint64_t pat;
1793
1794 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
1795 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
1796 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
1797 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
1798 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
1799 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
1800 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
1801 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
1802
1803 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
1804 * write would work. */
1805 I915_WRITE(GEN8_PRIVATE_PAT, pat);
1806 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
1807}
1808
63340133
BW
1809static int gen8_gmch_probe(struct drm_device *dev,
1810 size_t *gtt_total,
1811 size_t *stolen,
1812 phys_addr_t *mappable_base,
1813 unsigned long *mappable_end)
1814{
1815 struct drm_i915_private *dev_priv = dev->dev_private;
1816 unsigned int gtt_size;
1817 u16 snb_gmch_ctl;
1818 int ret;
1819
1820 /* TODO: We're not aware of mappable constraints on gen8 yet */
1821 *mappable_base = pci_resource_start(dev->pdev, 2);
1822 *mappable_end = pci_resource_len(dev->pdev, 2);
1823
1824 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
1825 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
1826
1827 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
1828
1829 *stolen = gen8_get_stolen_size(snb_gmch_ctl);
1830
1831 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
d31eb10e 1832 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
63340133 1833
fbe5d36e
BW
1834 gen8_setup_private_ppat(dev_priv);
1835
63340133
BW
1836 ret = ggtt_probe_common(dev, gtt_size);
1837
94ec8f61
BW
1838 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
1839 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
63340133
BW
1840
1841 return ret;
1842}
1843
baa09f5f
BW
1844static int gen6_gmch_probe(struct drm_device *dev,
1845 size_t *gtt_total,
41907ddc
BW
1846 size_t *stolen,
1847 phys_addr_t *mappable_base,
1848 unsigned long *mappable_end)
e76e9aeb
BW
1849{
1850 struct drm_i915_private *dev_priv = dev->dev_private;
baa09f5f 1851 unsigned int gtt_size;
e76e9aeb 1852 u16 snb_gmch_ctl;
e76e9aeb
BW
1853 int ret;
1854
41907ddc
BW
1855 *mappable_base = pci_resource_start(dev->pdev, 2);
1856 *mappable_end = pci_resource_len(dev->pdev, 2);
1857
baa09f5f
BW
1858 /* 64/512MB is the current min/max we actually know of, but this is just
1859 * a coarse sanity check.
e76e9aeb 1860 */
41907ddc 1861 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
baa09f5f
BW
1862 DRM_ERROR("Unknown GMADR size (%lx)\n",
1863 dev_priv->gtt.mappable_end);
1864 return -ENXIO;
e76e9aeb
BW
1865 }
1866
e76e9aeb
BW
1867 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
1868 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
e76e9aeb 1869 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
e76e9aeb 1870
c4ae25ec 1871 *stolen = gen6_get_stolen_size(snb_gmch_ctl);
a93e4161 1872
63340133
BW
1873 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
1874 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
e76e9aeb 1875
63340133 1876 ret = ggtt_probe_common(dev, gtt_size);
e76e9aeb 1877
853ba5d2
BW
1878 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
1879 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
7faf1ab2 1880
e76e9aeb
BW
1881 return ret;
1882}
1883
853ba5d2 1884static void gen6_gmch_remove(struct i915_address_space *vm)
e76e9aeb 1885{
853ba5d2
BW
1886
1887 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
5ed16782
BW
1888
1889 drm_mm_takedown(&vm->mm);
853ba5d2
BW
1890 iounmap(gtt->gsm);
1891 teardown_scratch_page(vm->dev);
644ec02b 1892}
baa09f5f
BW
1893
1894static int i915_gmch_probe(struct drm_device *dev,
1895 size_t *gtt_total,
41907ddc
BW
1896 size_t *stolen,
1897 phys_addr_t *mappable_base,
1898 unsigned long *mappable_end)
baa09f5f
BW
1899{
1900 struct drm_i915_private *dev_priv = dev->dev_private;
1901 int ret;
1902
baa09f5f
BW
1903 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
1904 if (!ret) {
1905 DRM_ERROR("failed to set up gmch\n");
1906 return -EIO;
1907 }
1908
41907ddc 1909 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
baa09f5f
BW
1910
1911 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
853ba5d2 1912 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
baa09f5f 1913
c0a7f818
CW
1914 if (unlikely(dev_priv->gtt.do_idle_maps))
1915 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
1916
baa09f5f
BW
1917 return 0;
1918}
1919
853ba5d2 1920static void i915_gmch_remove(struct i915_address_space *vm)
baa09f5f
BW
1921{
1922 intel_gmch_remove();
1923}
1924
1925int i915_gem_gtt_init(struct drm_device *dev)
1926{
1927 struct drm_i915_private *dev_priv = dev->dev_private;
1928 struct i915_gtt *gtt = &dev_priv->gtt;
baa09f5f
BW
1929 int ret;
1930
baa09f5f 1931 if (INTEL_INFO(dev)->gen <= 5) {
b2f21b4d 1932 gtt->gtt_probe = i915_gmch_probe;
853ba5d2 1933 gtt->base.cleanup = i915_gmch_remove;
63340133 1934 } else if (INTEL_INFO(dev)->gen < 8) {
b2f21b4d 1935 gtt->gtt_probe = gen6_gmch_probe;
853ba5d2 1936 gtt->base.cleanup = gen6_gmch_remove;
4d15c145 1937 if (IS_HASWELL(dev) && dev_priv->ellc_size)
853ba5d2 1938 gtt->base.pte_encode = iris_pte_encode;
4d15c145 1939 else if (IS_HASWELL(dev))
853ba5d2 1940 gtt->base.pte_encode = hsw_pte_encode;
b2f21b4d 1941 else if (IS_VALLEYVIEW(dev))
853ba5d2 1942 gtt->base.pte_encode = byt_pte_encode;
350ec881
CW
1943 else if (INTEL_INFO(dev)->gen >= 7)
1944 gtt->base.pte_encode = ivb_pte_encode;
b2f21b4d 1945 else
350ec881 1946 gtt->base.pte_encode = snb_pte_encode;
63340133
BW
1947 } else {
1948 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
1949 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
baa09f5f
BW
1950 }
1951
853ba5d2 1952 ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
b2f21b4d 1953 &gtt->mappable_base, &gtt->mappable_end);
a54c0c27 1954 if (ret)
baa09f5f 1955 return ret;
baa09f5f 1956
853ba5d2
BW
1957 gtt->base.dev = dev;
1958
baa09f5f 1959 /* GMADR is the PCI mmio aperture into the global GTT. */
853ba5d2
BW
1960 DRM_INFO("Memory usable by graphics device = %zdM\n",
1961 gtt->base.total >> 20);
b2f21b4d
BW
1962 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
1963 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
baa09f5f
BW
1964
1965 return 0;
1966}
6f65e29a
BW
1967
1968static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
1969 struct i915_address_space *vm)
1970{
1971 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
1972 if (vma == NULL)
1973 return ERR_PTR(-ENOMEM);
1974
1975 INIT_LIST_HEAD(&vma->vma_link);
1976 INIT_LIST_HEAD(&vma->mm_list);
1977 INIT_LIST_HEAD(&vma->exec_list);
1978 vma->vm = vm;
1979 vma->obj = obj;
1980
1981 switch (INTEL_INFO(vm->dev)->gen) {
1982 case 8:
1983 case 7:
1984 case 6:
7e0d96bc
BW
1985 if (i915_is_ggtt(vm)) {
1986 vma->unbind_vma = ggtt_unbind_vma;
1987 vma->bind_vma = ggtt_bind_vma;
1988 } else {
1989 vma->unbind_vma = ppgtt_unbind_vma;
1990 vma->bind_vma = ppgtt_bind_vma;
1991 }
6f65e29a
BW
1992 break;
1993 case 5:
1994 case 4:
1995 case 3:
1996 case 2:
1997 BUG_ON(!i915_is_ggtt(vm));
1998 vma->unbind_vma = i915_ggtt_unbind_vma;
1999 vma->bind_vma = i915_ggtt_bind_vma;
2000 break;
2001 default:
2002 BUG();
2003 }
2004
2005 /* Keep GGTT vmas first to make debug easier */
2006 if (i915_is_ggtt(vm))
2007 list_add(&vma->vma_link, &obj->vma_list);
2008 else
2009 list_add_tail(&vma->vma_link, &obj->vma_list);
2010
2011 return vma;
2012}
2013
2014struct i915_vma *
2015i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
2016 struct i915_address_space *vm)
2017{
2018 struct i915_vma *vma;
2019
2020 vma = i915_gem_obj_to_vma(obj, vm);
2021 if (!vma)
2022 vma = __i915_gem_vma_create(obj, vm);
2023
2024 return vma;
2025}