]>
Commit | Line | Data |
---|---|---|
cd5351f4 RC |
1 | /* |
2 | * drivers/staging/omapdrm/omap_gem.c | |
3 | * | |
4 | * Copyright (C) 2011 Texas Instruments | |
5 | * Author: Rob Clark <rob.clark@linaro.org> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify it | |
8 | * under the terms of the GNU General Public License version 2 as published by | |
9 | * the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 | * more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License along with | |
17 | * this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | ||
21 | #include <linux/spinlock.h> | |
22 | #include <linux/shmem_fs.h> | |
23 | ||
24 | #include "omap_drv.h" | |
f7f9f453 | 25 | #include "omap_dmm_tiler.h" |
cd5351f4 RC |
26 | |
27 | /* remove these once drm core helpers are merged */ | |
28 | struct page ** _drm_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask); | |
29 | void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, | |
30 | bool dirty, bool accessed); | |
f7f9f453 | 31 | int _drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size); |
cd5351f4 RC |
32 | |
33 | /* | |
34 | * GEM buffer object implementation. | |
35 | */ | |
36 | ||
37 | #define to_omap_bo(x) container_of(x, struct omap_gem_object, base) | |
38 | ||
39 | /* note: we use upper 8 bits of flags for driver-internal flags: */ | |
40 | #define OMAP_BO_DMA 0x01000000 /* actually is physically contiguous */ | |
41 | #define OMAP_BO_EXT_SYNC 0x02000000 /* externally allocated sync object */ | |
42 | #define OMAP_BO_EXT_MEM 0x04000000 /* externally allocated memory */ | |
43 | ||
44 | ||
45 | struct omap_gem_object { | |
46 | struct drm_gem_object base; | |
47 | ||
f6b6036e RC |
48 | struct list_head mm_list; |
49 | ||
cd5351f4 RC |
50 | uint32_t flags; |
51 | ||
f7f9f453 RC |
52 | /** width/height for tiled formats (rounded up to slot boundaries) */ |
53 | uint16_t width, height; | |
54 | ||
a6a91827 RC |
55 | /** roll applied when mapping to DMM */ |
56 | uint32_t roll; | |
57 | ||
cd5351f4 RC |
58 | /** |
59 | * If buffer is allocated physically contiguous, the OMAP_BO_DMA flag | |
f7f9f453 RC |
60 | * is set and the paddr is valid. Also if the buffer is remapped in |
61 | * TILER and paddr_cnt > 0, then paddr is valid. But if you are using | |
62 | * the physical address and OMAP_BO_DMA is not set, then you should | |
63 | * be going thru omap_gem_{get,put}_paddr() to ensure the mapping is | |
64 | * not removed from under your feet. | |
cd5351f4 RC |
65 | * |
66 | * Note that OMAP_BO_SCANOUT is a hint from userspace that DMA capable | |
67 | * buffer is requested, but doesn't mean that it is. Use the | |
68 | * OMAP_BO_DMA flag to determine if the buffer has a DMA capable | |
69 | * physical address. | |
70 | */ | |
71 | dma_addr_t paddr; | |
72 | ||
f7f9f453 RC |
73 | /** |
74 | * # of users of paddr | |
75 | */ | |
76 | uint32_t paddr_cnt; | |
77 | ||
78 | /** | |
79 | * tiler block used when buffer is remapped in DMM/TILER. | |
80 | */ | |
81 | struct tiler_block *block; | |
82 | ||
cd5351f4 RC |
83 | /** |
84 | * Array of backing pages, if allocated. Note that pages are never | |
85 | * allocated for buffers originally allocated from contiguous memory | |
86 | */ | |
87 | struct page **pages; | |
88 | ||
f3bc9d24 RC |
89 | /** addresses corresponding to pages in above array */ |
90 | dma_addr_t *addrs; | |
91 | ||
cd5351f4 RC |
92 | /** |
93 | * Virtual address, if mapped. | |
94 | */ | |
95 | void *vaddr; | |
96 | ||
97 | /** | |
98 | * sync-object allocated on demand (if needed) | |
99 | * | |
100 | * Per-buffer sync-object for tracking pending and completed hw/dma | |
101 | * read and write operations. The layout in memory is dictated by | |
102 | * the SGX firmware, which uses this information to stall the command | |
103 | * stream if a surface is not ready yet. | |
104 | * | |
105 | * Note that when buffer is used by SGX, the sync-object needs to be | |
106 | * allocated from a special heap of sync-objects. This way many sync | |
107 | * objects can be packed in a page, and not waste GPU virtual address | |
108 | * space. Because of this we have to have a omap_gem_set_sync_object() | |
109 | * API to allow replacement of the syncobj after it has (potentially) | |
110 | * already been allocated. A bit ugly but I haven't thought of a | |
111 | * better alternative. | |
112 | */ | |
113 | struct { | |
114 | uint32_t write_pending; | |
115 | uint32_t write_complete; | |
116 | uint32_t read_pending; | |
117 | uint32_t read_complete; | |
118 | } *sync; | |
119 | }; | |
120 | ||
c5b1247b RC |
121 | static int get_pages(struct drm_gem_object *obj, struct page ***pages); |
122 | static uint64_t mmap_offset(struct drm_gem_object *obj); | |
123 | ||
f7f9f453 RC |
124 | /* To deal with userspace mmap'ings of 2d tiled buffers, which (a) are |
125 | * not necessarily pinned in TILER all the time, and (b) when they are | |
126 | * they are not necessarily page aligned, we reserve one or more small | |
127 | * regions in each of the 2d containers to use as a user-GART where we | |
128 | * can create a second page-aligned mapping of parts of the buffer | |
129 | * being accessed from userspace. | |
130 | * | |
131 | * Note that we could optimize slightly when we know that multiple | |
132 | * tiler containers are backed by the same PAT.. but I'll leave that | |
133 | * for later.. | |
134 | */ | |
135 | #define NUM_USERGART_ENTRIES 2 | |
136 | struct usergart_entry { | |
137 | struct tiler_block *block; /* the reserved tiler block */ | |
138 | dma_addr_t paddr; | |
139 | struct drm_gem_object *obj; /* the current pinned obj */ | |
140 | pgoff_t obj_pgoff; /* page offset of obj currently | |
141 | mapped in */ | |
142 | }; | |
143 | static struct { | |
144 | struct usergart_entry entry[NUM_USERGART_ENTRIES]; | |
145 | int height; /* height in rows */ | |
146 | int height_shift; /* ilog2(height in rows) */ | |
147 | int slot_shift; /* ilog2(width per slot) */ | |
148 | int stride_pfn; /* stride in pages */ | |
149 | int last; /* index of last used entry */ | |
150 | } *usergart; | |
151 | ||
152 | static void evict_entry(struct drm_gem_object *obj, | |
153 | enum tiler_fmt fmt, struct usergart_entry *entry) | |
154 | { | |
155 | if (obj->dev->dev_mapping) { | |
e559895a RC |
156 | struct omap_gem_object *omap_obj = to_omap_bo(obj); |
157 | int n = usergart[fmt].height; | |
158 | size_t size = PAGE_SIZE * n; | |
c5b1247b | 159 | loff_t off = mmap_offset(obj) + |
f7f9f453 | 160 | (entry->obj_pgoff << PAGE_SHIFT); |
e559895a RC |
161 | const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE); |
162 | if (m > 1) { | |
163 | int i; | |
164 | /* if stride > than PAGE_SIZE then sparse mapping: */ | |
165 | for (i = n; i > 0; i--) { | |
166 | unmap_mapping_range(obj->dev->dev_mapping, | |
167 | off, PAGE_SIZE, 1); | |
168 | off += PAGE_SIZE * m; | |
169 | } | |
170 | } else { | |
171 | unmap_mapping_range(obj->dev->dev_mapping, off, size, 1); | |
172 | } | |
f7f9f453 RC |
173 | } |
174 | ||
175 | entry->obj = NULL; | |
176 | } | |
177 | ||
178 | /* Evict a buffer from usergart, if it is mapped there */ | |
179 | static void evict(struct drm_gem_object *obj) | |
180 | { | |
181 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
182 | ||
183 | if (omap_obj->flags & OMAP_BO_TILED) { | |
184 | enum tiler_fmt fmt = gem2fmt(omap_obj->flags); | |
185 | int i; | |
186 | ||
187 | if (!usergart) | |
188 | return; | |
189 | ||
190 | for (i = 0; i < NUM_USERGART_ENTRIES; i++) { | |
191 | struct usergart_entry *entry = &usergart[fmt].entry[i]; | |
192 | if (entry->obj == obj) | |
193 | evict_entry(obj, fmt, entry); | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
cd5351f4 RC |
198 | /* GEM objects can either be allocated from contiguous memory (in which |
199 | * case obj->filp==NULL), or w/ shmem backing (obj->filp!=NULL). But non | |
200 | * contiguous buffers can be remapped in TILER/DMM if they need to be | |
201 | * contiguous... but we don't do this all the time to reduce pressure | |
202 | * on TILER/DMM space when we know at allocation time that the buffer | |
203 | * will need to be scanned out. | |
204 | */ | |
205 | static inline bool is_shmem(struct drm_gem_object *obj) | |
206 | { | |
207 | return obj->filp != NULL; | |
208 | } | |
209 | ||
8b6b569e RC |
210 | /** |
211 | * shmem buffers that are mapped cached can simulate coherency via using | |
212 | * page faulting to keep track of dirty pages | |
213 | */ | |
214 | static inline bool is_cached_coherent(struct drm_gem_object *obj) | |
215 | { | |
216 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
217 | return is_shmem(obj) && | |
218 | ((omap_obj->flags & OMAP_BO_CACHE_MASK) == OMAP_BO_CACHED); | |
219 | } | |
220 | ||
cd5351f4 RC |
221 | static DEFINE_SPINLOCK(sync_lock); |
222 | ||
223 | /** ensure backing pages are allocated */ | |
224 | static int omap_gem_attach_pages(struct drm_gem_object *obj) | |
225 | { | |
8b6b569e | 226 | struct drm_device *dev = obj->dev; |
cd5351f4 RC |
227 | struct omap_gem_object *omap_obj = to_omap_bo(obj); |
228 | struct page **pages; | |
d4eb23a9 EG |
229 | int npages = obj->size >> PAGE_SHIFT; |
230 | int i, ret; | |
8b6b569e | 231 | dma_addr_t *addrs; |
cd5351f4 RC |
232 | |
233 | WARN_ON(omap_obj->pages); | |
234 | ||
235 | /* TODO: __GFP_DMA32 .. but somehow GFP_HIGHMEM is coming from the | |
236 | * mapping_gfp_mask(mapping) which conflicts w/ GFP_DMA32.. probably | |
237 | * we actually want CMA memory for it all anyways.. | |
238 | */ | |
239 | pages = _drm_gem_get_pages(obj, GFP_KERNEL); | |
240 | if (IS_ERR(pages)) { | |
241 | dev_err(obj->dev->dev, "could not get pages: %ld\n", PTR_ERR(pages)); | |
242 | return PTR_ERR(pages); | |
243 | } | |
244 | ||
f3bc9d24 RC |
245 | /* for non-cached buffers, ensure the new pages are clean because |
246 | * DSS, GPU, etc. are not cache coherent: | |
247 | */ | |
248 | if (omap_obj->flags & (OMAP_BO_WC|OMAP_BO_UNCACHED)) { | |
8b6b569e | 249 | addrs = kmalloc(npages * sizeof(addrs), GFP_KERNEL); |
d4eb23a9 EG |
250 | if (!addrs) { |
251 | ret = -ENOMEM; | |
252 | goto free_pages; | |
253 | } | |
254 | ||
f3bc9d24 | 255 | for (i = 0; i < npages; i++) { |
8b6b569e | 256 | addrs[i] = dma_map_page(dev->dev, pages[i], |
f3bc9d24 RC |
257 | 0, PAGE_SIZE, DMA_BIDIRECTIONAL); |
258 | } | |
8b6b569e RC |
259 | } else { |
260 | addrs = kzalloc(npages * sizeof(addrs), GFP_KERNEL); | |
d4eb23a9 EG |
261 | if (!addrs) { |
262 | ret = -ENOMEM; | |
263 | goto free_pages; | |
264 | } | |
f3bc9d24 RC |
265 | } |
266 | ||
8b6b569e | 267 | omap_obj->addrs = addrs; |
cd5351f4 | 268 | omap_obj->pages = pages; |
8b6b569e | 269 | |
cd5351f4 | 270 | return 0; |
d4eb23a9 EG |
271 | |
272 | free_pages: | |
273 | _drm_gem_put_pages(obj, pages, true, false); | |
274 | ||
275 | return ret; | |
cd5351f4 RC |
276 | } |
277 | ||
278 | /** release backing pages */ | |
279 | static void omap_gem_detach_pages(struct drm_gem_object *obj) | |
280 | { | |
281 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
f3bc9d24 RC |
282 | |
283 | /* for non-cached buffers, ensure the new pages are clean because | |
284 | * DSS, GPU, etc. are not cache coherent: | |
285 | */ | |
286 | if (omap_obj->flags & (OMAP_BO_WC|OMAP_BO_UNCACHED)) { | |
287 | int i, npages = obj->size >> PAGE_SHIFT; | |
288 | for (i = 0; i < npages; i++) { | |
289 | dma_unmap_page(obj->dev->dev, omap_obj->addrs[i], | |
290 | PAGE_SIZE, DMA_BIDIRECTIONAL); | |
291 | } | |
f3bc9d24 RC |
292 | } |
293 | ||
8b6b569e RC |
294 | kfree(omap_obj->addrs); |
295 | omap_obj->addrs = NULL; | |
296 | ||
cd5351f4 RC |
297 | _drm_gem_put_pages(obj, omap_obj->pages, true, false); |
298 | omap_obj->pages = NULL; | |
299 | } | |
300 | ||
6ad11bc3 RC |
301 | /* get buffer flags */ |
302 | uint32_t omap_gem_flags(struct drm_gem_object *obj) | |
303 | { | |
304 | return to_omap_bo(obj)->flags; | |
305 | } | |
306 | ||
cd5351f4 | 307 | /** get mmap offset */ |
c5b1247b | 308 | static uint64_t mmap_offset(struct drm_gem_object *obj) |
cd5351f4 | 309 | { |
f6b6036e RC |
310 | struct drm_device *dev = obj->dev; |
311 | ||
312 | WARN_ON(!mutex_is_locked(&dev->struct_mutex)); | |
313 | ||
cd5351f4 RC |
314 | if (!obj->map_list.map) { |
315 | /* Make it mmapable */ | |
f7f9f453 RC |
316 | size_t size = omap_gem_mmap_size(obj); |
317 | int ret = _drm_gem_create_mmap_offset_size(obj, size); | |
318 | ||
cd5351f4 | 319 | if (ret) { |
f6b6036e | 320 | dev_err(dev->dev, "could not allocate mmap offset\n"); |
cd5351f4 RC |
321 | return 0; |
322 | } | |
323 | } | |
324 | ||
325 | return (uint64_t)obj->map_list.hash.key << PAGE_SHIFT; | |
326 | } | |
327 | ||
c5b1247b RC |
328 | uint64_t omap_gem_mmap_offset(struct drm_gem_object *obj) |
329 | { | |
330 | uint64_t offset; | |
331 | mutex_lock(&obj->dev->struct_mutex); | |
332 | offset = mmap_offset(obj); | |
333 | mutex_unlock(&obj->dev->struct_mutex); | |
334 | return offset; | |
335 | } | |
336 | ||
f7f9f453 RC |
337 | /** get mmap size */ |
338 | size_t omap_gem_mmap_size(struct drm_gem_object *obj) | |
339 | { | |
340 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
341 | size_t size = obj->size; | |
342 | ||
343 | if (omap_obj->flags & OMAP_BO_TILED) { | |
344 | /* for tiled buffers, the virtual size has stride rounded up | |
345 | * to 4kb.. (to hide the fact that row n+1 might start 16kb or | |
346 | * 32kb later!). But we don't back the entire buffer with | |
347 | * pages, only the valid picture part.. so need to adjust for | |
348 | * this in the size used to mmap and generate mmap offset | |
349 | */ | |
350 | size = tiler_vsize(gem2fmt(omap_obj->flags), | |
351 | omap_obj->width, omap_obj->height); | |
352 | } | |
353 | ||
354 | return size; | |
355 | } | |
356 | ||
3c810c61 RC |
357 | /* get tiled size, returns -EINVAL if not tiled buffer */ |
358 | int omap_gem_tiled_size(struct drm_gem_object *obj, uint16_t *w, uint16_t *h) | |
359 | { | |
360 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
361 | if (omap_obj->flags & OMAP_BO_TILED) { | |
362 | *w = omap_obj->width; | |
363 | *h = omap_obj->height; | |
364 | return 0; | |
365 | } | |
366 | return -EINVAL; | |
367 | } | |
f7f9f453 RC |
368 | |
369 | /* Normal handling for the case of faulting in non-tiled buffers */ | |
370 | static int fault_1d(struct drm_gem_object *obj, | |
371 | struct vm_area_struct *vma, struct vm_fault *vmf) | |
372 | { | |
373 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
374 | unsigned long pfn; | |
375 | pgoff_t pgoff; | |
376 | ||
377 | /* We don't use vmf->pgoff since that has the fake offset: */ | |
378 | pgoff = ((unsigned long)vmf->virtual_address - | |
379 | vma->vm_start) >> PAGE_SHIFT; | |
380 | ||
381 | if (omap_obj->pages) { | |
8b6b569e | 382 | omap_gem_cpu_sync(obj, pgoff); |
f7f9f453 RC |
383 | pfn = page_to_pfn(omap_obj->pages[pgoff]); |
384 | } else { | |
385 | BUG_ON(!(omap_obj->flags & OMAP_BO_DMA)); | |
386 | pfn = (omap_obj->paddr >> PAGE_SHIFT) + pgoff; | |
387 | } | |
388 | ||
389 | VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address, | |
390 | pfn, pfn << PAGE_SHIFT); | |
391 | ||
392 | return vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn); | |
393 | } | |
394 | ||
395 | /* Special handling for the case of faulting in 2d tiled buffers */ | |
396 | static int fault_2d(struct drm_gem_object *obj, | |
397 | struct vm_area_struct *vma, struct vm_fault *vmf) | |
398 | { | |
399 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
400 | struct usergart_entry *entry; | |
401 | enum tiler_fmt fmt = gem2fmt(omap_obj->flags); | |
402 | struct page *pages[64]; /* XXX is this too much to have on stack? */ | |
403 | unsigned long pfn; | |
404 | pgoff_t pgoff, base_pgoff; | |
405 | void __user *vaddr; | |
406 | int i, ret, slots; | |
407 | ||
e559895a RC |
408 | /* |
409 | * Note the height of the slot is also equal to the number of pages | |
410 | * that need to be mapped in to fill 4kb wide CPU page. If the slot | |
411 | * height is 64, then 64 pages fill a 4kb wide by 64 row region. | |
412 | */ | |
413 | const int n = usergart[fmt].height; | |
414 | const int n_shift = usergart[fmt].height_shift; | |
415 | ||
416 | /* | |
417 | * If buffer width in bytes > PAGE_SIZE then the virtual stride is | |
418 | * rounded up to next multiple of PAGE_SIZE.. this need to be taken | |
419 | * into account in some of the math, so figure out virtual stride | |
420 | * in pages | |
f7f9f453 | 421 | */ |
e559895a | 422 | const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE); |
f7f9f453 RC |
423 | |
424 | /* We don't use vmf->pgoff since that has the fake offset: */ | |
425 | pgoff = ((unsigned long)vmf->virtual_address - | |
426 | vma->vm_start) >> PAGE_SHIFT; | |
427 | ||
e559895a RC |
428 | /* |
429 | * Actual address we start mapping at is rounded down to previous slot | |
f7f9f453 RC |
430 | * boundary in the y direction: |
431 | */ | |
e559895a | 432 | base_pgoff = round_down(pgoff, m << n_shift); |
f7f9f453 | 433 | |
e559895a | 434 | /* figure out buffer width in slots */ |
f7f9f453 RC |
435 | slots = omap_obj->width >> usergart[fmt].slot_shift; |
436 | ||
e559895a RC |
437 | vaddr = vmf->virtual_address - ((pgoff - base_pgoff) << PAGE_SHIFT); |
438 | ||
439 | entry = &usergart[fmt].entry[usergart[fmt].last]; | |
440 | ||
f7f9f453 RC |
441 | /* evict previous buffer using this usergart entry, if any: */ |
442 | if (entry->obj) | |
443 | evict_entry(entry->obj, fmt, entry); | |
444 | ||
445 | entry->obj = obj; | |
446 | entry->obj_pgoff = base_pgoff; | |
447 | ||
e559895a RC |
448 | /* now convert base_pgoff to phys offset from virt offset: */ |
449 | base_pgoff = (base_pgoff >> n_shift) * slots; | |
450 | ||
451 | /* for wider-than 4k.. figure out which part of the slot-row we want: */ | |
452 | if (m > 1) { | |
453 | int off = pgoff % m; | |
454 | entry->obj_pgoff += off; | |
455 | base_pgoff /= m; | |
456 | slots = min(slots - (off << n_shift), n); | |
457 | base_pgoff += off << n_shift; | |
458 | vaddr += off << PAGE_SHIFT; | |
459 | } | |
460 | ||
461 | /* | |
462 | * Map in pages. Beyond the valid pixel part of the buffer, we set | |
463 | * pages[i] to NULL to get a dummy page mapped in.. if someone | |
464 | * reads/writes it they will get random/undefined content, but at | |
465 | * least it won't be corrupting whatever other random page used to | |
466 | * be mapped in, or other undefined behavior. | |
f7f9f453 RC |
467 | */ |
468 | memcpy(pages, &omap_obj->pages[base_pgoff], | |
469 | sizeof(struct page *) * slots); | |
470 | memset(pages + slots, 0, | |
e559895a | 471 | sizeof(struct page *) * (n - slots)); |
f7f9f453 | 472 | |
a6a91827 | 473 | ret = tiler_pin(entry->block, pages, ARRAY_SIZE(pages), 0, true); |
f7f9f453 RC |
474 | if (ret) { |
475 | dev_err(obj->dev->dev, "failed to pin: %d\n", ret); | |
476 | return ret; | |
477 | } | |
478 | ||
f7f9f453 RC |
479 | pfn = entry->paddr >> PAGE_SHIFT; |
480 | ||
481 | VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address, | |
482 | pfn, pfn << PAGE_SHIFT); | |
483 | ||
e559895a | 484 | for (i = n; i > 0; i--) { |
f7f9f453 RC |
485 | vm_insert_mixed(vma, (unsigned long)vaddr, pfn); |
486 | pfn += usergart[fmt].stride_pfn; | |
e559895a | 487 | vaddr += PAGE_SIZE * m; |
f7f9f453 RC |
488 | } |
489 | ||
490 | /* simple round-robin: */ | |
491 | usergart[fmt].last = (usergart[fmt].last + 1) % NUM_USERGART_ENTRIES; | |
492 | ||
493 | return 0; | |
494 | } | |
495 | ||
cd5351f4 RC |
496 | /** |
497 | * omap_gem_fault - pagefault handler for GEM objects | |
498 | * @vma: the VMA of the GEM object | |
499 | * @vmf: fault detail | |
500 | * | |
501 | * Invoked when a fault occurs on an mmap of a GEM managed area. GEM | |
502 | * does most of the work for us including the actual map/unmap calls | |
503 | * but we need to do the actual page work. | |
504 | * | |
505 | * The VMA was set up by GEM. In doing so it also ensured that the | |
506 | * vma->vm_private_data points to the GEM object that is backing this | |
507 | * mapping. | |
508 | */ | |
509 | int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |
510 | { | |
511 | struct drm_gem_object *obj = vma->vm_private_data; | |
512 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
513 | struct drm_device *dev = obj->dev; | |
514 | struct page **pages; | |
cd5351f4 RC |
515 | int ret; |
516 | ||
517 | /* Make sure we don't parallel update on a fault, nor move or remove | |
518 | * something from beneath our feet | |
519 | */ | |
520 | mutex_lock(&dev->struct_mutex); | |
521 | ||
522 | /* if a shmem backed object, make sure we have pages attached now */ | |
523 | ret = get_pages(obj, &pages); | |
524 | if (ret) { | |
525 | goto fail; | |
526 | } | |
527 | ||
528 | /* where should we do corresponding put_pages().. we are mapping | |
529 | * the original page, rather than thru a GART, so we can't rely | |
530 | * on eviction to trigger this. But munmap() or all mappings should | |
531 | * probably trigger put_pages()? | |
532 | */ | |
533 | ||
f7f9f453 RC |
534 | if (omap_obj->flags & OMAP_BO_TILED) |
535 | ret = fault_2d(obj, vma, vmf); | |
536 | else | |
537 | ret = fault_1d(obj, vma, vmf); | |
cd5351f4 | 538 | |
cd5351f4 RC |
539 | |
540 | fail: | |
541 | mutex_unlock(&dev->struct_mutex); | |
542 | switch (ret) { | |
543 | case 0: | |
544 | case -ERESTARTSYS: | |
545 | case -EINTR: | |
546 | return VM_FAULT_NOPAGE; | |
547 | case -ENOMEM: | |
548 | return VM_FAULT_OOM; | |
549 | default: | |
550 | return VM_FAULT_SIGBUS; | |
551 | } | |
552 | } | |
553 | ||
554 | /** We override mainly to fix up some of the vm mapping flags.. */ | |
555 | int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma) | |
556 | { | |
cd5351f4 RC |
557 | int ret; |
558 | ||
559 | ret = drm_gem_mmap(filp, vma); | |
560 | if (ret) { | |
561 | DBG("mmap failed: %d", ret); | |
562 | return ret; | |
563 | } | |
564 | ||
8b6b569e RC |
565 | return omap_gem_mmap_obj(vma->vm_private_data, vma); |
566 | } | |
567 | ||
568 | int omap_gem_mmap_obj(struct drm_gem_object *obj, | |
569 | struct vm_area_struct *vma) | |
570 | { | |
571 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
cd5351f4 RC |
572 | |
573 | vma->vm_flags &= ~VM_PFNMAP; | |
574 | vma->vm_flags |= VM_MIXEDMAP; | |
575 | ||
576 | if (omap_obj->flags & OMAP_BO_WC) { | |
577 | vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); | |
578 | } else if (omap_obj->flags & OMAP_BO_UNCACHED) { | |
579 | vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags)); | |
580 | } else { | |
8b6b569e RC |
581 | /* |
582 | * We do have some private objects, at least for scanout buffers | |
583 | * on hardware without DMM/TILER. But these are allocated write- | |
584 | * combine | |
585 | */ | |
586 | if (WARN_ON(!obj->filp)) | |
587 | return -EINVAL; | |
588 | ||
589 | /* | |
590 | * Shunt off cached objs to shmem file so they have their own | |
591 | * address_space (so unmap_mapping_range does what we want, | |
592 | * in particular in the case of mmap'd dmabufs) | |
593 | */ | |
594 | fput(vma->vm_file); | |
8b6b569e | 595 | vma->vm_pgoff = 0; |
cb0942b8 | 596 | vma->vm_file = get_file(obj->filp); |
8b6b569e | 597 | |
cd5351f4 RC |
598 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
599 | } | |
600 | ||
8b6b569e | 601 | return 0; |
cd5351f4 RC |
602 | } |
603 | ||
8b6b569e | 604 | |
cd5351f4 RC |
605 | /** |
606 | * omap_gem_dumb_create - create a dumb buffer | |
607 | * @drm_file: our client file | |
608 | * @dev: our device | |
609 | * @args: the requested arguments copied from userspace | |
610 | * | |
611 | * Allocate a buffer suitable for use for a frame buffer of the | |
612 | * form described by user space. Give userspace a handle by which | |
613 | * to reference it. | |
614 | */ | |
615 | int omap_gem_dumb_create(struct drm_file *file, struct drm_device *dev, | |
616 | struct drm_mode_create_dumb *args) | |
617 | { | |
618 | union omap_gem_size gsize; | |
619 | ||
620 | /* in case someone tries to feed us a completely bogus stride: */ | |
621 | args->pitch = align_pitch(args->pitch, args->width, args->bpp); | |
622 | args->size = PAGE_ALIGN(args->pitch * args->height); | |
623 | ||
624 | gsize = (union omap_gem_size){ | |
625 | .bytes = args->size, | |
626 | }; | |
627 | ||
628 | return omap_gem_new_handle(dev, file, gsize, | |
629 | OMAP_BO_SCANOUT | OMAP_BO_WC, &args->handle); | |
630 | } | |
631 | ||
632 | /** | |
633 | * omap_gem_dumb_destroy - destroy a dumb buffer | |
634 | * @file: client file | |
635 | * @dev: our DRM device | |
636 | * @handle: the object handle | |
637 | * | |
638 | * Destroy a handle that was created via omap_gem_dumb_create. | |
639 | */ | |
640 | int omap_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev, | |
641 | uint32_t handle) | |
642 | { | |
643 | /* No special work needed, drop the reference and see what falls out */ | |
644 | return drm_gem_handle_delete(file, handle); | |
645 | } | |
646 | ||
647 | /** | |
648 | * omap_gem_dumb_map - buffer mapping for dumb interface | |
649 | * @file: our drm client file | |
650 | * @dev: drm device | |
651 | * @handle: GEM handle to the object (from dumb_create) | |
652 | * | |
653 | * Do the necessary setup to allow the mapping of the frame buffer | |
654 | * into user memory. We don't have to do much here at the moment. | |
655 | */ | |
656 | int omap_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, | |
657 | uint32_t handle, uint64_t *offset) | |
658 | { | |
659 | struct drm_gem_object *obj; | |
660 | int ret = 0; | |
661 | ||
cd5351f4 RC |
662 | /* GEM does all our handle to object mapping */ |
663 | obj = drm_gem_object_lookup(dev, file, handle); | |
664 | if (obj == NULL) { | |
665 | ret = -ENOENT; | |
666 | goto fail; | |
667 | } | |
668 | ||
669 | *offset = omap_gem_mmap_offset(obj); | |
670 | ||
671 | drm_gem_object_unreference_unlocked(obj); | |
672 | ||
673 | fail: | |
cd5351f4 RC |
674 | return ret; |
675 | } | |
676 | ||
a6a91827 RC |
677 | /* Set scrolling position. This allows us to implement fast scrolling |
678 | * for console. | |
9b55b95a RC |
679 | * |
680 | * Call only from non-atomic contexts. | |
a6a91827 RC |
681 | */ |
682 | int omap_gem_roll(struct drm_gem_object *obj, uint32_t roll) | |
683 | { | |
684 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
685 | uint32_t npages = obj->size >> PAGE_SHIFT; | |
686 | int ret = 0; | |
687 | ||
688 | if (roll > npages) { | |
689 | dev_err(obj->dev->dev, "invalid roll: %d\n", roll); | |
690 | return -EINVAL; | |
691 | } | |
692 | ||
a6a91827 RC |
693 | omap_obj->roll = roll; |
694 | ||
af69592a RC |
695 | mutex_lock(&obj->dev->struct_mutex); |
696 | ||
a6a91827 RC |
697 | /* if we aren't mapped yet, we don't need to do anything */ |
698 | if (omap_obj->block) { | |
699 | struct page **pages; | |
700 | ret = get_pages(obj, &pages); | |
701 | if (ret) | |
702 | goto fail; | |
703 | ret = tiler_pin(omap_obj->block, pages, npages, roll, true); | |
704 | if (ret) | |
705 | dev_err(obj->dev->dev, "could not repin: %d\n", ret); | |
706 | } | |
707 | ||
708 | fail: | |
709 | mutex_unlock(&obj->dev->struct_mutex); | |
710 | ||
711 | return ret; | |
712 | } | |
713 | ||
8b6b569e RC |
714 | /* Sync the buffer for CPU access.. note pages should already be |
715 | * attached, ie. omap_gem_get_pages() | |
716 | */ | |
717 | void omap_gem_cpu_sync(struct drm_gem_object *obj, int pgoff) | |
718 | { | |
719 | struct drm_device *dev = obj->dev; | |
720 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
721 | ||
722 | if (is_cached_coherent(obj) && omap_obj->addrs[pgoff]) { | |
723 | dma_unmap_page(dev->dev, omap_obj->addrs[pgoff], | |
724 | PAGE_SIZE, DMA_BIDIRECTIONAL); | |
725 | omap_obj->addrs[pgoff] = 0; | |
726 | } | |
727 | } | |
728 | ||
729 | /* sync the buffer for DMA access */ | |
730 | void omap_gem_dma_sync(struct drm_gem_object *obj, | |
731 | enum dma_data_direction dir) | |
732 | { | |
733 | struct drm_device *dev = obj->dev; | |
734 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
735 | ||
736 | if (is_cached_coherent(obj)) { | |
737 | int i, npages = obj->size >> PAGE_SHIFT; | |
738 | struct page **pages = omap_obj->pages; | |
739 | bool dirty = false; | |
740 | ||
741 | for (i = 0; i < npages; i++) { | |
742 | if (!omap_obj->addrs[i]) { | |
743 | omap_obj->addrs[i] = dma_map_page(dev->dev, pages[i], 0, | |
744 | PAGE_SIZE, DMA_BIDIRECTIONAL); | |
745 | dirty = true; | |
746 | } | |
747 | } | |
748 | ||
749 | if (dirty) { | |
750 | unmap_mapping_range(obj->filp->f_mapping, 0, | |
751 | omap_gem_mmap_size(obj), 1); | |
752 | } | |
753 | } | |
754 | } | |
755 | ||
cd5351f4 RC |
756 | /* Get physical address for DMA.. if 'remap' is true, and the buffer is not |
757 | * already contiguous, remap it to pin in physically contiguous memory.. (ie. | |
758 | * map in TILER) | |
759 | */ | |
760 | int omap_gem_get_paddr(struct drm_gem_object *obj, | |
761 | dma_addr_t *paddr, bool remap) | |
762 | { | |
a6a91827 | 763 | struct omap_drm_private *priv = obj->dev->dev_private; |
cd5351f4 RC |
764 | struct omap_gem_object *omap_obj = to_omap_bo(obj); |
765 | int ret = 0; | |
766 | ||
f7f9f453 RC |
767 | mutex_lock(&obj->dev->struct_mutex); |
768 | ||
a6a91827 | 769 | if (remap && is_shmem(obj) && priv->has_dmm) { |
f7f9f453 RC |
770 | if (omap_obj->paddr_cnt == 0) { |
771 | struct page **pages; | |
a6a91827 | 772 | uint32_t npages = obj->size >> PAGE_SHIFT; |
f7f9f453 RC |
773 | enum tiler_fmt fmt = gem2fmt(omap_obj->flags); |
774 | struct tiler_block *block; | |
a6a91827 | 775 | |
f7f9f453 RC |
776 | BUG_ON(omap_obj->block); |
777 | ||
778 | ret = get_pages(obj, &pages); | |
779 | if (ret) | |
780 | goto fail; | |
781 | ||
f7f9f453 RC |
782 | if (omap_obj->flags & OMAP_BO_TILED) { |
783 | block = tiler_reserve_2d(fmt, | |
784 | omap_obj->width, | |
785 | omap_obj->height, 0); | |
786 | } else { | |
787 | block = tiler_reserve_1d(obj->size); | |
788 | } | |
789 | ||
790 | if (IS_ERR(block)) { | |
791 | ret = PTR_ERR(block); | |
792 | dev_err(obj->dev->dev, | |
793 | "could not remap: %d (%d)\n", ret, fmt); | |
794 | goto fail; | |
795 | } | |
796 | ||
797 | /* TODO: enable async refill.. */ | |
a6a91827 RC |
798 | ret = tiler_pin(block, pages, npages, |
799 | omap_obj->roll, true); | |
f7f9f453 RC |
800 | if (ret) { |
801 | tiler_release(block); | |
802 | dev_err(obj->dev->dev, | |
803 | "could not pin: %d\n", ret); | |
804 | goto fail; | |
805 | } | |
806 | ||
807 | omap_obj->paddr = tiler_ssptr(block); | |
808 | omap_obj->block = block; | |
809 | ||
810 | DBG("got paddr: %08x", omap_obj->paddr); | |
811 | } | |
812 | ||
813 | omap_obj->paddr_cnt++; | |
814 | ||
815 | *paddr = omap_obj->paddr; | |
816 | } else if (omap_obj->flags & OMAP_BO_DMA) { | |
817 | *paddr = omap_obj->paddr; | |
818 | } else { | |
819 | ret = -EINVAL; | |
8b6b569e | 820 | goto fail; |
cd5351f4 RC |
821 | } |
822 | ||
f7f9f453 RC |
823 | fail: |
824 | mutex_unlock(&obj->dev->struct_mutex); | |
cd5351f4 RC |
825 | |
826 | return ret; | |
827 | } | |
828 | ||
829 | /* Release physical address, when DMA is no longer being performed.. this | |
830 | * could potentially unpin and unmap buffers from TILER | |
831 | */ | |
832 | int omap_gem_put_paddr(struct drm_gem_object *obj) | |
833 | { | |
f7f9f453 RC |
834 | struct omap_gem_object *omap_obj = to_omap_bo(obj); |
835 | int ret = 0; | |
836 | ||
837 | mutex_lock(&obj->dev->struct_mutex); | |
838 | if (omap_obj->paddr_cnt > 0) { | |
839 | omap_obj->paddr_cnt--; | |
840 | if (omap_obj->paddr_cnt == 0) { | |
841 | ret = tiler_unpin(omap_obj->block); | |
842 | if (ret) { | |
843 | dev_err(obj->dev->dev, | |
844 | "could not unpin pages: %d\n", ret); | |
845 | goto fail; | |
846 | } | |
847 | ret = tiler_release(omap_obj->block); | |
848 | if (ret) { | |
849 | dev_err(obj->dev->dev, | |
850 | "could not release unmap: %d\n", ret); | |
851 | } | |
852 | omap_obj->block = NULL; | |
853 | } | |
854 | } | |
855 | fail: | |
856 | mutex_unlock(&obj->dev->struct_mutex); | |
857 | return ret; | |
cd5351f4 RC |
858 | } |
859 | ||
3c810c61 RC |
860 | /* Get rotated scanout address (only valid if already pinned), at the |
861 | * specified orientation and x,y offset from top-left corner of buffer | |
862 | * (only valid for tiled 2d buffers) | |
863 | */ | |
864 | int omap_gem_rotated_paddr(struct drm_gem_object *obj, uint32_t orient, | |
865 | int x, int y, dma_addr_t *paddr) | |
866 | { | |
867 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
868 | int ret = -EINVAL; | |
869 | ||
870 | mutex_lock(&obj->dev->struct_mutex); | |
871 | if ((omap_obj->paddr_cnt > 0) && omap_obj->block && | |
872 | (omap_obj->flags & OMAP_BO_TILED)) { | |
873 | *paddr = tiler_tsptr(omap_obj->block, orient, x, y); | |
874 | ret = 0; | |
875 | } | |
876 | mutex_unlock(&obj->dev->struct_mutex); | |
877 | return ret; | |
878 | } | |
879 | ||
880 | /* Get tiler stride for the buffer (only valid for 2d tiled buffers) */ | |
881 | int omap_gem_tiled_stride(struct drm_gem_object *obj, uint32_t orient) | |
882 | { | |
883 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
884 | int ret = -EINVAL; | |
885 | if (omap_obj->flags & OMAP_BO_TILED) | |
886 | ret = tiler_stride(gem2fmt(omap_obj->flags), orient); | |
887 | return ret; | |
888 | } | |
889 | ||
cd5351f4 RC |
890 | /* acquire pages when needed (for example, for DMA where physically |
891 | * contiguous buffer is not required | |
892 | */ | |
893 | static int get_pages(struct drm_gem_object *obj, struct page ***pages) | |
894 | { | |
895 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
896 | int ret = 0; | |
897 | ||
898 | if (is_shmem(obj) && !omap_obj->pages) { | |
899 | ret = omap_gem_attach_pages(obj); | |
900 | if (ret) { | |
901 | dev_err(obj->dev->dev, "could not attach pages\n"); | |
902 | return ret; | |
903 | } | |
904 | } | |
905 | ||
906 | /* TODO: even phys-contig.. we should have a list of pages? */ | |
907 | *pages = omap_obj->pages; | |
908 | ||
909 | return 0; | |
910 | } | |
911 | ||
6ad11bc3 RC |
912 | /* if !remap, and we don't have pages backing, then fail, rather than |
913 | * increasing the pin count (which we don't really do yet anyways, | |
914 | * because we don't support swapping pages back out). And 'remap' | |
915 | * might not be quite the right name, but I wanted to keep it working | |
916 | * similarly to omap_gem_get_paddr(). Note though that mutex is not | |
917 | * aquired if !remap (because this can be called in atomic ctxt), | |
918 | * but probably omap_gem_get_paddr() should be changed to work in the | |
919 | * same way. If !remap, a matching omap_gem_put_pages() call is not | |
920 | * required (and should not be made). | |
921 | */ | |
922 | int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages, | |
923 | bool remap) | |
cd5351f4 RC |
924 | { |
925 | int ret; | |
6ad11bc3 RC |
926 | if (!remap) { |
927 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
928 | if (!omap_obj->pages) | |
929 | return -ENOMEM; | |
930 | *pages = omap_obj->pages; | |
931 | return 0; | |
932 | } | |
cd5351f4 RC |
933 | mutex_lock(&obj->dev->struct_mutex); |
934 | ret = get_pages(obj, pages); | |
935 | mutex_unlock(&obj->dev->struct_mutex); | |
936 | return ret; | |
937 | } | |
938 | ||
939 | /* release pages when DMA no longer being performed */ | |
940 | int omap_gem_put_pages(struct drm_gem_object *obj) | |
941 | { | |
942 | /* do something here if we dynamically attach/detach pages.. at | |
943 | * least they would no longer need to be pinned if everyone has | |
944 | * released the pages.. | |
945 | */ | |
946 | return 0; | |
947 | } | |
948 | ||
f7f9f453 RC |
949 | /* Get kernel virtual address for CPU access.. this more or less only |
950 | * exists for omap_fbdev. This should be called with struct_mutex | |
951 | * held. | |
cd5351f4 RC |
952 | */ |
953 | void *omap_gem_vaddr(struct drm_gem_object *obj) | |
954 | { | |
955 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
f7f9f453 RC |
956 | WARN_ON(! mutex_is_locked(&obj->dev->struct_mutex)); |
957 | if (!omap_obj->vaddr) { | |
958 | struct page **pages; | |
959 | int ret = get_pages(obj, &pages); | |
960 | if (ret) | |
961 | return ERR_PTR(ret); | |
962 | omap_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT, | |
963 | VM_MAP, pgprot_writecombine(PAGE_KERNEL)); | |
964 | } | |
cd5351f4 RC |
965 | return omap_obj->vaddr; |
966 | } | |
967 | ||
f6b6036e RC |
968 | #ifdef CONFIG_DEBUG_FS |
969 | void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m) | |
970 | { | |
971 | struct drm_device *dev = obj->dev; | |
972 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
973 | uint64_t off = 0; | |
974 | ||
975 | WARN_ON(! mutex_is_locked(&dev->struct_mutex)); | |
976 | ||
977 | if (obj->map_list.map) | |
978 | off = (uint64_t)obj->map_list.hash.key; | |
979 | ||
980 | seq_printf(m, "%08x: %2d (%2d) %08llx %08Zx (%2d) %p %4d", | |
981 | omap_obj->flags, obj->name, obj->refcount.refcount.counter, | |
982 | off, omap_obj->paddr, omap_obj->paddr_cnt, | |
983 | omap_obj->vaddr, omap_obj->roll); | |
984 | ||
985 | if (omap_obj->flags & OMAP_BO_TILED) { | |
986 | seq_printf(m, " %dx%d", omap_obj->width, omap_obj->height); | |
987 | if (omap_obj->block) { | |
988 | struct tcm_area *area = &omap_obj->block->area; | |
989 | seq_printf(m, " (%dx%d, %dx%d)", | |
990 | area->p0.x, area->p0.y, | |
991 | area->p1.x, area->p1.y); | |
992 | } | |
993 | } else { | |
994 | seq_printf(m, " %d", obj->size); | |
995 | } | |
996 | ||
997 | seq_printf(m, "\n"); | |
998 | } | |
999 | ||
1000 | void omap_gem_describe_objects(struct list_head *list, struct seq_file *m) | |
1001 | { | |
1002 | struct omap_gem_object *omap_obj; | |
1003 | int count = 0; | |
1004 | size_t size = 0; | |
1005 | ||
1006 | list_for_each_entry(omap_obj, list, mm_list) { | |
1007 | struct drm_gem_object *obj = &omap_obj->base; | |
1008 | seq_printf(m, " "); | |
1009 | omap_gem_describe(obj, m); | |
1010 | count++; | |
1011 | size += obj->size; | |
1012 | } | |
1013 | ||
1014 | seq_printf(m, "Total %d objects, %zu bytes\n", count, size); | |
1015 | } | |
1016 | #endif | |
1017 | ||
cd5351f4 RC |
1018 | /* Buffer Synchronization: |
1019 | */ | |
1020 | ||
1021 | struct omap_gem_sync_waiter { | |
1022 | struct list_head list; | |
1023 | struct omap_gem_object *omap_obj; | |
1024 | enum omap_gem_op op; | |
1025 | uint32_t read_target, write_target; | |
1026 | /* notify called w/ sync_lock held */ | |
1027 | void (*notify)(void *arg); | |
1028 | void *arg; | |
1029 | }; | |
1030 | ||
1031 | /* list of omap_gem_sync_waiter.. the notify fxn gets called back when | |
1032 | * the read and/or write target count is achieved which can call a user | |
1033 | * callback (ex. to kick 3d and/or 2d), wakeup blocked task (prep for | |
1034 | * cpu access), etc. | |
1035 | */ | |
1036 | static LIST_HEAD(waiters); | |
1037 | ||
1038 | static inline bool is_waiting(struct omap_gem_sync_waiter *waiter) | |
1039 | { | |
1040 | struct omap_gem_object *omap_obj = waiter->omap_obj; | |
1041 | if ((waiter->op & OMAP_GEM_READ) && | |
1042 | (omap_obj->sync->read_complete < waiter->read_target)) | |
1043 | return true; | |
1044 | if ((waiter->op & OMAP_GEM_WRITE) && | |
1045 | (omap_obj->sync->write_complete < waiter->write_target)) | |
1046 | return true; | |
1047 | return false; | |
1048 | } | |
1049 | ||
1050 | /* macro for sync debug.. */ | |
1051 | #define SYNCDBG 0 | |
1052 | #define SYNC(fmt, ...) do { if (SYNCDBG) \ | |
1053 | printk(KERN_ERR "%s:%d: "fmt"\n", \ | |
1054 | __func__, __LINE__, ##__VA_ARGS__); \ | |
1055 | } while (0) | |
1056 | ||
1057 | ||
1058 | static void sync_op_update(void) | |
1059 | { | |
1060 | struct omap_gem_sync_waiter *waiter, *n; | |
1061 | list_for_each_entry_safe(waiter, n, &waiters, list) { | |
1062 | if (!is_waiting(waiter)) { | |
1063 | list_del(&waiter->list); | |
1064 | SYNC("notify: %p", waiter); | |
1065 | waiter->notify(waiter->arg); | |
1066 | kfree(waiter); | |
1067 | } | |
1068 | } | |
1069 | } | |
1070 | ||
1071 | static inline int sync_op(struct drm_gem_object *obj, | |
1072 | enum omap_gem_op op, bool start) | |
1073 | { | |
1074 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
1075 | int ret = 0; | |
1076 | ||
1077 | spin_lock(&sync_lock); | |
1078 | ||
1079 | if (!omap_obj->sync) { | |
1080 | omap_obj->sync = kzalloc(sizeof(*omap_obj->sync), GFP_ATOMIC); | |
1081 | if (!omap_obj->sync) { | |
1082 | ret = -ENOMEM; | |
1083 | goto unlock; | |
1084 | } | |
1085 | } | |
1086 | ||
1087 | if (start) { | |
1088 | if (op & OMAP_GEM_READ) | |
1089 | omap_obj->sync->read_pending++; | |
1090 | if (op & OMAP_GEM_WRITE) | |
1091 | omap_obj->sync->write_pending++; | |
1092 | } else { | |
1093 | if (op & OMAP_GEM_READ) | |
1094 | omap_obj->sync->read_complete++; | |
1095 | if (op & OMAP_GEM_WRITE) | |
1096 | omap_obj->sync->write_complete++; | |
1097 | sync_op_update(); | |
1098 | } | |
1099 | ||
1100 | unlock: | |
1101 | spin_unlock(&sync_lock); | |
1102 | ||
1103 | return ret; | |
1104 | } | |
1105 | ||
1106 | /* it is a bit lame to handle updates in this sort of polling way, but | |
1107 | * in case of PVR, the GPU can directly update read/write complete | |
1108 | * values, and not really tell us which ones it updated.. this also | |
1109 | * means that sync_lock is not quite sufficient. So we'll need to | |
1110 | * do something a bit better when it comes time to add support for | |
1111 | * separate 2d hw.. | |
1112 | */ | |
1113 | void omap_gem_op_update(void) | |
1114 | { | |
1115 | spin_lock(&sync_lock); | |
1116 | sync_op_update(); | |
1117 | spin_unlock(&sync_lock); | |
1118 | } | |
1119 | ||
1120 | /* mark the start of read and/or write operation */ | |
1121 | int omap_gem_op_start(struct drm_gem_object *obj, enum omap_gem_op op) | |
1122 | { | |
1123 | return sync_op(obj, op, true); | |
1124 | } | |
1125 | ||
1126 | int omap_gem_op_finish(struct drm_gem_object *obj, enum omap_gem_op op) | |
1127 | { | |
1128 | return sync_op(obj, op, false); | |
1129 | } | |
1130 | ||
1131 | static DECLARE_WAIT_QUEUE_HEAD(sync_event); | |
1132 | ||
1133 | static void sync_notify(void *arg) | |
1134 | { | |
1135 | struct task_struct **waiter_task = arg; | |
1136 | *waiter_task = NULL; | |
1137 | wake_up_all(&sync_event); | |
1138 | } | |
1139 | ||
1140 | int omap_gem_op_sync(struct drm_gem_object *obj, enum omap_gem_op op) | |
1141 | { | |
1142 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
1143 | int ret = 0; | |
1144 | if (omap_obj->sync) { | |
1145 | struct task_struct *waiter_task = current; | |
1146 | struct omap_gem_sync_waiter *waiter = | |
1147 | kzalloc(sizeof(*waiter), GFP_KERNEL); | |
1148 | ||
1149 | if (!waiter) { | |
1150 | return -ENOMEM; | |
1151 | } | |
1152 | ||
1153 | waiter->omap_obj = omap_obj; | |
1154 | waiter->op = op; | |
1155 | waiter->read_target = omap_obj->sync->read_pending; | |
1156 | waiter->write_target = omap_obj->sync->write_pending; | |
1157 | waiter->notify = sync_notify; | |
1158 | waiter->arg = &waiter_task; | |
1159 | ||
1160 | spin_lock(&sync_lock); | |
1161 | if (is_waiting(waiter)) { | |
1162 | SYNC("waited: %p", waiter); | |
1163 | list_add_tail(&waiter->list, &waiters); | |
1164 | spin_unlock(&sync_lock); | |
1165 | ret = wait_event_interruptible(sync_event, | |
1166 | (waiter_task == NULL)); | |
1167 | spin_lock(&sync_lock); | |
1168 | if (waiter_task) { | |
1169 | SYNC("interrupted: %p", waiter); | |
1170 | /* we were interrupted */ | |
1171 | list_del(&waiter->list); | |
1172 | waiter_task = NULL; | |
1173 | } else { | |
1174 | /* freed in sync_op_update() */ | |
1175 | waiter = NULL; | |
1176 | } | |
1177 | } | |
1178 | spin_unlock(&sync_lock); | |
1179 | ||
1180 | if (waiter) { | |
1181 | kfree(waiter); | |
1182 | } | |
1183 | } | |
1184 | return ret; | |
1185 | } | |
1186 | ||
1187 | /* call fxn(arg), either synchronously or asynchronously if the op | |
1188 | * is currently blocked.. fxn() can be called from any context | |
1189 | * | |
1190 | * (TODO for now fxn is called back from whichever context calls | |
1191 | * omap_gem_op_update().. but this could be better defined later | |
1192 | * if needed) | |
1193 | * | |
1194 | * TODO more code in common w/ _sync().. | |
1195 | */ | |
1196 | int omap_gem_op_async(struct drm_gem_object *obj, enum omap_gem_op op, | |
1197 | void (*fxn)(void *arg), void *arg) | |
1198 | { | |
1199 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
1200 | if (omap_obj->sync) { | |
1201 | struct omap_gem_sync_waiter *waiter = | |
1202 | kzalloc(sizeof(*waiter), GFP_ATOMIC); | |
1203 | ||
1204 | if (!waiter) { | |
1205 | return -ENOMEM; | |
1206 | } | |
1207 | ||
1208 | waiter->omap_obj = omap_obj; | |
1209 | waiter->op = op; | |
1210 | waiter->read_target = omap_obj->sync->read_pending; | |
1211 | waiter->write_target = omap_obj->sync->write_pending; | |
1212 | waiter->notify = fxn; | |
1213 | waiter->arg = arg; | |
1214 | ||
1215 | spin_lock(&sync_lock); | |
1216 | if (is_waiting(waiter)) { | |
1217 | SYNC("waited: %p", waiter); | |
1218 | list_add_tail(&waiter->list, &waiters); | |
1219 | spin_unlock(&sync_lock); | |
1220 | return 0; | |
1221 | } | |
1222 | ||
1223 | spin_unlock(&sync_lock); | |
1224 | } | |
1225 | ||
1226 | /* no waiting.. */ | |
1227 | fxn(arg); | |
1228 | ||
1229 | return 0; | |
1230 | } | |
1231 | ||
1232 | /* special API so PVR can update the buffer to use a sync-object allocated | |
1233 | * from it's sync-obj heap. Only used for a newly allocated (from PVR's | |
1234 | * perspective) sync-object, so we overwrite the new syncobj w/ values | |
1235 | * from the already allocated syncobj (if there is one) | |
1236 | */ | |
1237 | int omap_gem_set_sync_object(struct drm_gem_object *obj, void *syncobj) | |
1238 | { | |
1239 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
1240 | int ret = 0; | |
1241 | ||
1242 | spin_lock(&sync_lock); | |
1243 | ||
1244 | if ((omap_obj->flags & OMAP_BO_EXT_SYNC) && !syncobj) { | |
1245 | /* clearing a previously set syncobj */ | |
1246 | syncobj = kzalloc(sizeof(*omap_obj->sync), GFP_ATOMIC); | |
1247 | if (!syncobj) { | |
1248 | ret = -ENOMEM; | |
1249 | goto unlock; | |
1250 | } | |
1251 | memcpy(syncobj, omap_obj->sync, sizeof(*omap_obj->sync)); | |
1252 | omap_obj->flags &= ~OMAP_BO_EXT_SYNC; | |
1253 | omap_obj->sync = syncobj; | |
1254 | } else if (syncobj && !(omap_obj->flags & OMAP_BO_EXT_SYNC)) { | |
1255 | /* replacing an existing syncobj */ | |
1256 | if (omap_obj->sync) { | |
1257 | memcpy(syncobj, omap_obj->sync, sizeof(*omap_obj->sync)); | |
1258 | kfree(omap_obj->sync); | |
1259 | } | |
1260 | omap_obj->flags |= OMAP_BO_EXT_SYNC; | |
1261 | omap_obj->sync = syncobj; | |
1262 | } | |
1263 | ||
1264 | unlock: | |
1265 | spin_unlock(&sync_lock); | |
1266 | return ret; | |
1267 | } | |
1268 | ||
1269 | int omap_gem_init_object(struct drm_gem_object *obj) | |
1270 | { | |
1271 | return -EINVAL; /* unused */ | |
1272 | } | |
1273 | ||
1274 | /* don't call directly.. called from GEM core when it is time to actually | |
1275 | * free the object.. | |
1276 | */ | |
1277 | void omap_gem_free_object(struct drm_gem_object *obj) | |
1278 | { | |
1279 | struct drm_device *dev = obj->dev; | |
1280 | struct omap_gem_object *omap_obj = to_omap_bo(obj); | |
1281 | ||
f7f9f453 RC |
1282 | evict(obj); |
1283 | ||
f6b6036e RC |
1284 | WARN_ON(!mutex_is_locked(&dev->struct_mutex)); |
1285 | ||
1286 | list_del(&omap_obj->mm_list); | |
1287 | ||
cd5351f4 RC |
1288 | if (obj->map_list.map) { |
1289 | drm_gem_free_mmap_offset(obj); | |
1290 | } | |
1291 | ||
9a0774e0 RC |
1292 | /* this means the object is still pinned.. which really should |
1293 | * not happen. I think.. | |
1294 | */ | |
1295 | WARN_ON(omap_obj->paddr_cnt > 0); | |
1296 | ||
cd5351f4 RC |
1297 | /* don't free externally allocated backing memory */ |
1298 | if (!(omap_obj->flags & OMAP_BO_EXT_MEM)) { | |
1299 | if (omap_obj->pages) { | |
1300 | omap_gem_detach_pages(obj); | |
1301 | } | |
1302 | if (!is_shmem(obj)) { | |
1303 | dma_free_writecombine(dev->dev, obj->size, | |
1304 | omap_obj->vaddr, omap_obj->paddr); | |
f7f9f453 RC |
1305 | } else if (omap_obj->vaddr) { |
1306 | vunmap(omap_obj->vaddr); | |
cd5351f4 RC |
1307 | } |
1308 | } | |
1309 | ||
1310 | /* don't free externally allocated syncobj */ | |
1311 | if (!(omap_obj->flags & OMAP_BO_EXT_SYNC)) { | |
1312 | kfree(omap_obj->sync); | |
1313 | } | |
1314 | ||
1315 | drm_gem_object_release(obj); | |
1316 | ||
1317 | kfree(obj); | |
1318 | } | |
1319 | ||
1320 | /* convenience method to construct a GEM buffer object, and userspace handle */ | |
1321 | int omap_gem_new_handle(struct drm_device *dev, struct drm_file *file, | |
1322 | union omap_gem_size gsize, uint32_t flags, uint32_t *handle) | |
1323 | { | |
1324 | struct drm_gem_object *obj; | |
1325 | int ret; | |
1326 | ||
1327 | obj = omap_gem_new(dev, gsize, flags); | |
1328 | if (!obj) | |
1329 | return -ENOMEM; | |
1330 | ||
1331 | ret = drm_gem_handle_create(file, obj, handle); | |
1332 | if (ret) { | |
1333 | drm_gem_object_release(obj); | |
1334 | kfree(obj); /* TODO isn't there a dtor to call? just copying i915 */ | |
1335 | return ret; | |
1336 | } | |
1337 | ||
1338 | /* drop reference from allocate - handle holds it now */ | |
1339 | drm_gem_object_unreference_unlocked(obj); | |
1340 | ||
1341 | return 0; | |
1342 | } | |
1343 | ||
1344 | /* GEM buffer object constructor */ | |
1345 | struct drm_gem_object *omap_gem_new(struct drm_device *dev, | |
1346 | union omap_gem_size gsize, uint32_t flags) | |
1347 | { | |
a6a91827 | 1348 | struct omap_drm_private *priv = dev->dev_private; |
cd5351f4 RC |
1349 | struct omap_gem_object *omap_obj; |
1350 | struct drm_gem_object *obj = NULL; | |
1351 | size_t size; | |
1352 | int ret; | |
1353 | ||
1354 | if (flags & OMAP_BO_TILED) { | |
f7f9f453 RC |
1355 | if (!usergart) { |
1356 | dev_err(dev->dev, "Tiled buffers require DMM\n"); | |
1357 | goto fail; | |
1358 | } | |
1359 | ||
1360 | /* tiled buffers are always shmem paged backed.. when they are | |
1361 | * scanned out, they are remapped into DMM/TILER | |
1362 | */ | |
1363 | flags &= ~OMAP_BO_SCANOUT; | |
1364 | ||
1365 | /* currently don't allow cached buffers.. there is some caching | |
1366 | * stuff that needs to be handled better | |
1367 | */ | |
1368 | flags &= ~(OMAP_BO_CACHED|OMAP_BO_UNCACHED); | |
1369 | flags |= OMAP_BO_WC; | |
cd5351f4 | 1370 | |
f7f9f453 RC |
1371 | /* align dimensions to slot boundaries... */ |
1372 | tiler_align(gem2fmt(flags), | |
1373 | &gsize.tiled.width, &gsize.tiled.height); | |
1374 | ||
1375 | /* ...and calculate size based on aligned dimensions */ | |
1376 | size = tiler_size(gem2fmt(flags), | |
1377 | gsize.tiled.width, gsize.tiled.height); | |
1378 | } else { | |
1379 | size = PAGE_ALIGN(gsize.bytes); | |
1380 | } | |
cd5351f4 RC |
1381 | |
1382 | omap_obj = kzalloc(sizeof(*omap_obj), GFP_KERNEL); | |
1383 | if (!omap_obj) { | |
1384 | dev_err(dev->dev, "could not allocate GEM object\n"); | |
1385 | goto fail; | |
1386 | } | |
1387 | ||
f6b6036e RC |
1388 | list_add(&omap_obj->mm_list, &priv->obj_list); |
1389 | ||
cd5351f4 RC |
1390 | obj = &omap_obj->base; |
1391 | ||
a6a91827 RC |
1392 | if ((flags & OMAP_BO_SCANOUT) && !priv->has_dmm) { |
1393 | /* attempt to allocate contiguous memory if we don't | |
1394 | * have DMM for remappign discontiguous buffers | |
1395 | */ | |
cd5351f4 RC |
1396 | omap_obj->vaddr = dma_alloc_writecombine(dev->dev, size, |
1397 | &omap_obj->paddr, GFP_KERNEL); | |
1398 | if (omap_obj->vaddr) { | |
1399 | flags |= OMAP_BO_DMA; | |
1400 | } | |
1401 | } | |
1402 | ||
1403 | omap_obj->flags = flags; | |
1404 | ||
f7f9f453 RC |
1405 | if (flags & OMAP_BO_TILED) { |
1406 | omap_obj->width = gsize.tiled.width; | |
1407 | omap_obj->height = gsize.tiled.height; | |
1408 | } | |
1409 | ||
cd5351f4 RC |
1410 | if (flags & (OMAP_BO_DMA|OMAP_BO_EXT_MEM)) { |
1411 | ret = drm_gem_private_object_init(dev, obj, size); | |
1412 | } else { | |
1413 | ret = drm_gem_object_init(dev, obj, size); | |
1414 | } | |
1415 | ||
1416 | if (ret) { | |
1417 | goto fail; | |
1418 | } | |
1419 | ||
1420 | return obj; | |
1421 | ||
1422 | fail: | |
1423 | if (obj) { | |
1424 | omap_gem_free_object(obj); | |
1425 | } | |
1426 | return NULL; | |
1427 | } | |
f7f9f453 RC |
1428 | |
1429 | /* init/cleanup.. if DMM is used, we need to set some stuff up.. */ | |
1430 | void omap_gem_init(struct drm_device *dev) | |
1431 | { | |
a6a91827 | 1432 | struct omap_drm_private *priv = dev->dev_private; |
f7f9f453 RC |
1433 | const enum tiler_fmt fmts[] = { |
1434 | TILFMT_8BIT, TILFMT_16BIT, TILFMT_32BIT | |
1435 | }; | |
5c137797 | 1436 | int i, j; |
f7f9f453 | 1437 | |
e5e4e9b7 | 1438 | if (!dmm_is_available()) { |
f7f9f453 | 1439 | /* DMM only supported on OMAP4 and later, so this isn't fatal */ |
5c137797 | 1440 | dev_warn(dev->dev, "DMM not available, disable DMM support\n"); |
f7f9f453 RC |
1441 | return; |
1442 | } | |
1443 | ||
1444 | usergart = kzalloc(3 * sizeof(*usergart), GFP_KERNEL); | |
b369839b RC |
1445 | if (!usergart) { |
1446 | dev_warn(dev->dev, "could not allocate usergart\n"); | |
1447 | return; | |
1448 | } | |
f7f9f453 RC |
1449 | |
1450 | /* reserve 4k aligned/wide regions for userspace mappings: */ | |
1451 | for (i = 0; i < ARRAY_SIZE(fmts); i++) { | |
1452 | uint16_t h = 1, w = PAGE_SIZE >> i; | |
1453 | tiler_align(fmts[i], &w, &h); | |
1454 | /* note: since each region is 1 4kb page wide, and minimum | |
1455 | * number of rows, the height ends up being the same as the | |
1456 | * # of pages in the region | |
1457 | */ | |
1458 | usergart[i].height = h; | |
1459 | usergart[i].height_shift = ilog2(h); | |
3c810c61 | 1460 | usergart[i].stride_pfn = tiler_stride(fmts[i], 0) >> PAGE_SHIFT; |
f7f9f453 RC |
1461 | usergart[i].slot_shift = ilog2((PAGE_SIZE / h) >> i); |
1462 | for (j = 0; j < NUM_USERGART_ENTRIES; j++) { | |
1463 | struct usergart_entry *entry = &usergart[i].entry[j]; | |
1464 | struct tiler_block *block = | |
1465 | tiler_reserve_2d(fmts[i], w, h, | |
1466 | PAGE_SIZE); | |
1467 | if (IS_ERR(block)) { | |
1468 | dev_err(dev->dev, | |
1469 | "reserve failed: %d, %d, %ld\n", | |
1470 | i, j, PTR_ERR(block)); | |
1471 | return; | |
1472 | } | |
1473 | entry->paddr = tiler_ssptr(block); | |
1474 | entry->block = block; | |
1475 | ||
1476 | DBG("%d:%d: %dx%d: paddr=%08x stride=%d", i, j, w, h, | |
1477 | entry->paddr, | |
1478 | usergart[i].stride_pfn << PAGE_SHIFT); | |
1479 | } | |
1480 | } | |
a6a91827 RC |
1481 | |
1482 | priv->has_dmm = true; | |
f7f9f453 RC |
1483 | } |
1484 | ||
1485 | void omap_gem_deinit(struct drm_device *dev) | |
1486 | { | |
1487 | /* I believe we can rely on there being no more outstanding GEM | |
1488 | * objects which could depend on usergart/dmm at this point. | |
1489 | */ | |
f7f9f453 RC |
1490 | kfree(usergart); |
1491 | } |