]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - arch/tile/kernel/pci-dma.c
tilegx pci: support I/O to arbitrarily-cached pages
[mirror_ubuntu-focal-kernel.git] / arch / tile / kernel / pci-dma.c
CommitLineData
867e359b
CM
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/mm.h>
16#include <linux/dma-mapping.h>
17#include <linux/vmalloc.h>
3989efb7 18#include <linux/export.h>
867e359b
CM
19#include <asm/tlbflush.h>
20#include <asm/homecache.h>
21
22/* Generic DMA mapping functions: */
23
24/*
bbaa22c3
CM
25 * Allocate what Linux calls "coherent" memory. On TILEPro this is
26 * uncached memory; on TILE-Gx it is hash-for-home memory.
867e359b 27 */
bbaa22c3
CM
28#ifdef __tilepro__
29#define PAGE_HOME_DMA PAGE_HOME_UNCACHED
30#else
31#define PAGE_HOME_DMA PAGE_HOME_HASH
32#endif
33
867e359b
CM
34void *dma_alloc_coherent(struct device *dev,
35 size_t size,
36 dma_addr_t *dma_handle,
37 gfp_t gfp)
38{
39 u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
40 int node = dev_to_node(dev);
41 int order = get_order(size);
42 struct page *pg;
43 dma_addr_t addr;
44
482e6f84 45 gfp |= __GFP_ZERO;
867e359b
CM
46
47 /*
48 * By forcing NUMA node 0 for 32-bit masks we ensure that the
49 * high 32 bits of the resulting PA will be zero. If the mask
50 * size is, e.g., 24, we may still not be able to guarantee a
51 * suitable memory address, in which case we will return NULL.
52 * But such devices are uncommon.
53 */
54 if (dma_mask <= DMA_BIT_MASK(32))
55 node = 0;
56
bbaa22c3 57 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
867e359b
CM
58 if (pg == NULL)
59 return NULL;
60
61 addr = page_to_phys(pg);
62 if (addr + size > dma_mask) {
bbaa22c3 63 __homecache_free_pages(pg, order);
867e359b
CM
64 return NULL;
65 }
66
67 *dma_handle = addr;
68 return page_address(pg);
69}
70EXPORT_SYMBOL(dma_alloc_coherent);
71
72/*
73 * Free memory that was allocated with dma_alloc_coherent.
74 */
75void dma_free_coherent(struct device *dev, size_t size,
76 void *vaddr, dma_addr_t dma_handle)
77{
78 homecache_free_pages((unsigned long)vaddr, get_order(size));
79}
80EXPORT_SYMBOL(dma_free_coherent);
81
82/*
83 * The map routines "map" the specified address range for DMA
84 * accesses. The memory belongs to the device after this call is
85 * issued, until it is unmapped with dma_unmap_single.
86 *
87 * We don't need to do any mapping, we just flush the address range
88 * out of the cache and return a DMA address.
89 *
90 * The unmap routines do whatever is necessary before the processor
91 * accesses the memory again, and must be called before the driver
92 * touches the memory. We can get away with a cache invalidate if we
93 * can count on nothing having been touched.
94 */
95
bbaa22c3
CM
96/* Set up a single page for DMA access. */
97static void __dma_prep_page(struct page *page, unsigned long offset,
98 size_t size, enum dma_data_direction direction)
76c567fb 99{
bbaa22c3
CM
100 /*
101 * Flush the page from cache if necessary.
102 * On tilegx, data is delivered to hash-for-home L3; on tilepro,
103 * data is delivered direct to memory.
104 *
105 * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
106 * this to be a "flush" not a "finv" and keep some of the
107 * state in cache across the DMA operation, but it doesn't seem
108 * worth creating the necessary flush_buffer_xxx() infrastructure.
109 */
110 int home = page_home(page);
111 switch (home) {
112 case PAGE_HOME_HASH:
113#ifdef __tilegx__
114 return;
115#endif
116 break;
117 case PAGE_HOME_UNCACHED:
118#ifdef __tilepro__
119 return;
120#endif
121 break;
122 case PAGE_HOME_IMMUTABLE:
123 /* Should be going to the device only. */
124 BUG_ON(direction == DMA_FROM_DEVICE ||
125 direction == DMA_BIDIRECTIONAL);
126 return;
127 case PAGE_HOME_INCOHERENT:
128 /* Incoherent anyway, so no need to work hard here. */
129 return;
130 default:
131 BUG_ON(home < 0 || home >= NR_CPUS);
132 break;
133 }
134 homecache_finv_page(page);
135
136#ifdef DEBUG_ALIGNMENT
137 /* Warn if the region isn't cacheline aligned. */
138 if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1)))
139 pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
140 PFN_PHYS(page_to_pfn(page)) + offset, size);
141#endif
142}
76c567fb 143
bbaa22c3
CM
144/* Make the page ready to be read by the core. */
145static void __dma_complete_page(struct page *page, unsigned long offset,
146 size_t size, enum dma_data_direction direction)
147{
148#ifdef __tilegx__
149 switch (page_home(page)) {
150 case PAGE_HOME_HASH:
151 /* I/O device delivered data the way the cpu wanted it. */
152 break;
153 case PAGE_HOME_INCOHERENT:
154 /* Incoherent anyway, so no need to work hard here. */
155 break;
156 case PAGE_HOME_IMMUTABLE:
157 /* Extra read-only copies are not a problem. */
158 break;
159 default:
160 /* Flush the bogus hash-for-home I/O entries to memory. */
161 homecache_finv_map_page(page, PAGE_HOME_HASH);
162 break;
163 }
164#endif
165}
76c567fb 166
bbaa22c3
CM
167static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size,
168 enum dma_data_direction direction)
169{
170 struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
171 unsigned long offset = dma_addr & (PAGE_SIZE - 1);
172 size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
173
174 while (size != 0) {
175 __dma_prep_page(page, offset, bytes, direction);
176 size -= bytes;
177 ++page;
178 offset = 0;
179 bytes = min((size_t)PAGE_SIZE, size);
180 }
181}
182
183static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
184 enum dma_data_direction direction)
185{
186 struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
187 unsigned long offset = dma_addr & (PAGE_SIZE - 1);
188 size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
189
190 while (size != 0) {
191 __dma_complete_page(page, offset, bytes, direction);
192 size -= bytes;
193 ++page;
194 offset = 0;
195 bytes = min((size_t)PAGE_SIZE, size);
76c567fb
CM
196 }
197}
867e359b 198
bbaa22c3 199
867e359b
CM
200/*
201 * dma_map_single can be passed any memory address, and there appear
202 * to be no alignment constraints.
203 *
204 * There is a chance that the start of the buffer will share a cache
205 * line with some other data that has been touched in the meantime.
206 */
207dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
bbaa22c3 208 enum dma_data_direction direction)
867e359b 209{
76c567fb 210 dma_addr_t dma_addr = __pa(ptr);
867e359b
CM
211
212 BUG_ON(!valid_dma_direction(direction));
213 WARN_ON(size == 0);
214
bbaa22c3 215 __dma_prep_pa_range(dma_addr, size, direction);
867e359b
CM
216
217 return dma_addr;
218}
219EXPORT_SYMBOL(dma_map_single);
220
221void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
bbaa22c3 222 enum dma_data_direction direction)
867e359b
CM
223{
224 BUG_ON(!valid_dma_direction(direction));
bbaa22c3 225 __dma_complete_pa_range(dma_addr, size, direction);
867e359b
CM
226}
227EXPORT_SYMBOL(dma_unmap_single);
228
229int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
bbaa22c3 230 enum dma_data_direction direction)
867e359b
CM
231{
232 struct scatterlist *sg;
233 int i;
234
235 BUG_ON(!valid_dma_direction(direction));
236
237 WARN_ON(nents == 0 || sglist->length == 0);
238
239 for_each_sg(sglist, sg, nents, i) {
867e359b 240 sg->dma_address = sg_phys(sg);
bbaa22c3 241 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
867e359b
CM
242 }
243
244 return nents;
245}
246EXPORT_SYMBOL(dma_map_sg);
247
bbaa22c3
CM
248void dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents,
249 enum dma_data_direction direction)
867e359b 250{
bbaa22c3
CM
251 struct scatterlist *sg;
252 int i;
253
867e359b 254 BUG_ON(!valid_dma_direction(direction));
bbaa22c3
CM
255 for_each_sg(sglist, sg, nents, i) {
256 sg->dma_address = sg_phys(sg);
257 __dma_complete_pa_range(sg->dma_address, sg->length,
258 direction);
259 }
867e359b
CM
260}
261EXPORT_SYMBOL(dma_unmap_sg);
262
263dma_addr_t dma_map_page(struct device *dev, struct page *page,
264 unsigned long offset, size_t size,
265 enum dma_data_direction direction)
266{
267 BUG_ON(!valid_dma_direction(direction));
268
76c567fb 269 BUG_ON(offset + size > PAGE_SIZE);
bbaa22c3 270 __dma_prep_page(page, offset, size, direction);
867e359b
CM
271 return page_to_pa(page) + offset;
272}
273EXPORT_SYMBOL(dma_map_page);
274
275void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
bbaa22c3 276 enum dma_data_direction direction)
867e359b
CM
277{
278 BUG_ON(!valid_dma_direction(direction));
bbaa22c3
CM
279 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
280 dma_address & PAGE_OFFSET, size, direction);
867e359b
CM
281}
282EXPORT_SYMBOL(dma_unmap_page);
283
284void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
285 size_t size, enum dma_data_direction direction)
286{
287 BUG_ON(!valid_dma_direction(direction));
bbaa22c3 288 __dma_complete_pa_range(dma_handle, size, direction);
867e359b
CM
289}
290EXPORT_SYMBOL(dma_sync_single_for_cpu);
291
292void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
293 size_t size, enum dma_data_direction direction)
294{
bbaa22c3 295 __dma_prep_pa_range(dma_handle, size, direction);
867e359b
CM
296}
297EXPORT_SYMBOL(dma_sync_single_for_device);
298
bbaa22c3
CM
299void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
300 int nelems, enum dma_data_direction direction)
867e359b 301{
bbaa22c3
CM
302 struct scatterlist *sg;
303 int i;
304
867e359b 305 BUG_ON(!valid_dma_direction(direction));
bbaa22c3
CM
306 WARN_ON(nelems == 0 || sglist->length == 0);
307
308 for_each_sg(sglist, sg, nelems, i) {
309 dma_sync_single_for_cpu(dev, sg->dma_address,
310 sg_dma_len(sg), direction);
311 }
867e359b
CM
312}
313EXPORT_SYMBOL(dma_sync_sg_for_cpu);
314
867e359b
CM
315void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
316 int nelems, enum dma_data_direction direction)
317{
318 struct scatterlist *sg;
319 int i;
320
321 BUG_ON(!valid_dma_direction(direction));
322 WARN_ON(nelems == 0 || sglist->length == 0);
323
324 for_each_sg(sglist, sg, nelems, i) {
325 dma_sync_single_for_device(dev, sg->dma_address,
326 sg_dma_len(sg), direction);
327 }
328}
329EXPORT_SYMBOL(dma_sync_sg_for_device);
330
331void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
332 unsigned long offset, size_t size,
333 enum dma_data_direction direction)
334{
335 dma_sync_single_for_cpu(dev, dma_handle + offset, size, direction);
336}
337EXPORT_SYMBOL(dma_sync_single_range_for_cpu);
338
339void dma_sync_single_range_for_device(struct device *dev,
340 dma_addr_t dma_handle,
341 unsigned long offset, size_t size,
342 enum dma_data_direction direction)
343{
344 dma_sync_single_for_device(dev, dma_handle + offset, size, direction);
345}
346EXPORT_SYMBOL(dma_sync_single_range_for_device);
347
348/*
bbaa22c3
CM
349 * dma_alloc_noncoherent() is #defined to return coherent memory,
350 * so there's no need to do any flushing here.
867e359b 351 */
ef0aaf87 352void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
867e359b
CM
353 enum dma_data_direction direction)
354{
355}
356EXPORT_SYMBOL(dma_cache_sync);