2 * SWIOTLB-based DMA API implementation
4 * Copyright (C) 2012 ARM Ltd.
5 * Author: Catalin Marinas <catalin.marinas@arm.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include <linux/gfp.h>
21 #include <linux/acpi.h>
22 #include <linux/memblock.h>
23 #include <linux/cache.h>
24 #include <linux/export.h>
25 #include <linux/slab.h>
26 #include <linux/genalloc.h>
27 #include <linux/dma-direct.h>
28 #include <linux/dma-noncoherent.h>
29 #include <linux/dma-contiguous.h>
30 #include <linux/vmalloc.h>
31 #include <linux/swiotlb.h>
32 #include <linux/pci.h>
34 #include <asm/cacheflush.h>
36 pgprot_t
arch_dma_mmap_pgprot(struct device
*dev
, pgprot_t prot
,
39 if (!dev_is_dma_coherent(dev
) || (attrs
& DMA_ATTR_WRITE_COMBINE
))
40 return pgprot_writecombine(prot
);
44 void arch_sync_dma_for_device(struct device
*dev
, phys_addr_t paddr
,
45 size_t size
, enum dma_data_direction dir
)
47 __dma_map_area(phys_to_virt(paddr
), size
, dir
);
50 void arch_sync_dma_for_cpu(struct device
*dev
, phys_addr_t paddr
,
51 size_t size
, enum dma_data_direction dir
)
53 __dma_unmap_area(phys_to_virt(paddr
), size
, dir
);
56 void arch_dma_prep_coherent(struct page
*page
, size_t size
)
58 __dma_flush_area(page_address(page
), size
);
61 #ifdef CONFIG_IOMMU_DMA
62 static int __swiotlb_get_sgtable_page(struct sg_table
*sgt
,
63 struct page
*page
, size_t size
)
65 int ret
= sg_alloc_table(sgt
, 1, GFP_KERNEL
);
68 sg_set_page(sgt
->sgl
, page
, PAGE_ALIGN(size
), 0);
73 static int __swiotlb_mmap_pfn(struct vm_area_struct
*vma
,
74 unsigned long pfn
, size_t size
)
77 unsigned long nr_vma_pages
= vma_pages(vma
);
78 unsigned long nr_pages
= PAGE_ALIGN(size
) >> PAGE_SHIFT
;
79 unsigned long off
= vma
->vm_pgoff
;
81 if (off
< nr_pages
&& nr_vma_pages
<= (nr_pages
- off
)) {
82 ret
= remap_pfn_range(vma
, vma
->vm_start
,
84 vma
->vm_end
- vma
->vm_start
,
90 #endif /* CONFIG_IOMMU_DMA */
92 static int __init
arm64_dma_init(void)
94 WARN_TAINT(ARCH_DMA_MINALIGN
< cache_line_size(),
95 TAINT_CPU_OUT_OF_SPEC
,
96 "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
97 ARCH_DMA_MINALIGN
, cache_line_size());
98 return dma_atomic_pool_init(GFP_DMA32
, __pgprot(PROT_NORMAL_NC
));
100 arch_initcall(arm64_dma_init
);
102 #ifdef CONFIG_IOMMU_DMA
103 #include <linux/dma-iommu.h>
104 #include <linux/platform_device.h>
105 #include <linux/amba/bus.h>
107 /* Thankfully, all cache ops are by VA so we can ignore phys here */
108 static void flush_page(struct device
*dev
, const void *virt
, phys_addr_t phys
)
110 __dma_flush_area(virt
, PAGE_SIZE
);
113 static void *__iommu_alloc_attrs(struct device
*dev
, size_t size
,
114 dma_addr_t
*handle
, gfp_t gfp
,
117 bool coherent
= dev_is_dma_coherent(dev
);
118 int ioprot
= dma_info_to_prot(DMA_BIDIRECTIONAL
, coherent
, attrs
);
119 size_t iosize
= size
;
122 if (WARN(!dev
, "cannot create IOMMU mapping for unknown device\n"))
125 size
= PAGE_ALIGN(size
);
128 * Some drivers rely on this, and we probably don't want the
129 * possibility of stale kernel data being read by devices anyway.
133 if (!gfpflags_allow_blocking(gfp
)) {
136 * In atomic context we can't remap anything, so we'll only
137 * get the virtually contiguous buffer we need by way of a
138 * physically contiguous allocation.
141 page
= alloc_pages(gfp
, get_order(size
));
142 addr
= page
? page_address(page
) : NULL
;
144 addr
= dma_alloc_from_pool(size
, &page
, gfp
);
149 *handle
= iommu_dma_map_page(dev
, page
, 0, iosize
, ioprot
);
150 if (*handle
== DMA_MAPPING_ERROR
) {
152 __free_pages(page
, get_order(size
));
154 dma_free_from_pool(addr
, size
);
157 } else if (attrs
& DMA_ATTR_FORCE_CONTIGUOUS
) {
158 pgprot_t prot
= arch_dma_mmap_pgprot(dev
, PAGE_KERNEL
, attrs
);
161 page
= dma_alloc_from_contiguous(dev
, size
>> PAGE_SHIFT
,
162 get_order(size
), gfp
& __GFP_NOWARN
);
166 *handle
= iommu_dma_map_page(dev
, page
, 0, iosize
, ioprot
);
167 if (*handle
== DMA_MAPPING_ERROR
) {
168 dma_release_from_contiguous(dev
, page
,
172 addr
= dma_common_contiguous_remap(page
, size
, VM_USERMAP
,
174 __builtin_return_address(0));
177 __dma_flush_area(page_to_virt(page
), iosize
);
178 memset(addr
, 0, size
);
180 iommu_dma_unmap_page(dev
, *handle
, iosize
, 0, attrs
);
181 dma_release_from_contiguous(dev
, page
,
185 pgprot_t prot
= arch_dma_mmap_pgprot(dev
, PAGE_KERNEL
, attrs
);
188 pages
= iommu_dma_alloc(dev
, iosize
, gfp
, attrs
, ioprot
,
193 addr
= dma_common_pages_remap(pages
, size
, VM_USERMAP
, prot
,
194 __builtin_return_address(0));
196 iommu_dma_free(dev
, pages
, iosize
, handle
);
201 static void __iommu_free_attrs(struct device
*dev
, size_t size
, void *cpu_addr
,
202 dma_addr_t handle
, unsigned long attrs
)
204 size_t iosize
= size
;
206 size
= PAGE_ALIGN(size
);
208 * @cpu_addr will be one of 4 things depending on how it was allocated:
209 * - A remapped array of pages for contiguous allocations.
210 * - A remapped array of pages from iommu_dma_alloc(), for all
211 * non-atomic allocations.
212 * - A non-cacheable alias from the atomic pool, for atomic
213 * allocations by non-coherent devices.
214 * - A normal lowmem address, for atomic allocations by
216 * Hence how dodgy the below logic looks...
218 if (dma_in_atomic_pool(cpu_addr
, size
)) {
219 iommu_dma_unmap_page(dev
, handle
, iosize
, 0, 0);
220 dma_free_from_pool(cpu_addr
, size
);
221 } else if (attrs
& DMA_ATTR_FORCE_CONTIGUOUS
) {
222 struct page
*page
= vmalloc_to_page(cpu_addr
);
224 iommu_dma_unmap_page(dev
, handle
, iosize
, 0, attrs
);
225 dma_release_from_contiguous(dev
, page
, size
>> PAGE_SHIFT
);
226 dma_common_free_remap(cpu_addr
, size
, VM_USERMAP
);
227 } else if (is_vmalloc_addr(cpu_addr
)){
228 struct vm_struct
*area
= find_vm_area(cpu_addr
);
230 if (WARN_ON(!area
|| !area
->pages
))
232 iommu_dma_free(dev
, area
->pages
, iosize
, &handle
);
233 dma_common_free_remap(cpu_addr
, size
, VM_USERMAP
);
235 iommu_dma_unmap_page(dev
, handle
, iosize
, 0, 0);
236 __free_pages(virt_to_page(cpu_addr
), get_order(size
));
240 static int __iommu_mmap_attrs(struct device
*dev
, struct vm_area_struct
*vma
,
241 void *cpu_addr
, dma_addr_t dma_addr
, size_t size
,
244 struct vm_struct
*area
;
247 vma
->vm_page_prot
= arch_dma_mmap_pgprot(dev
, vma
->vm_page_prot
, attrs
);
249 if (dma_mmap_from_dev_coherent(dev
, vma
, cpu_addr
, size
, &ret
))
252 if (attrs
& DMA_ATTR_FORCE_CONTIGUOUS
) {
254 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
255 * hence in the vmalloc space.
257 unsigned long pfn
= vmalloc_to_pfn(cpu_addr
);
258 return __swiotlb_mmap_pfn(vma
, pfn
, size
);
261 area
= find_vm_area(cpu_addr
);
262 if (WARN_ON(!area
|| !area
->pages
))
265 return iommu_dma_mmap(area
->pages
, size
, vma
);
268 static int __iommu_get_sgtable(struct device
*dev
, struct sg_table
*sgt
,
269 void *cpu_addr
, dma_addr_t dma_addr
,
270 size_t size
, unsigned long attrs
)
272 unsigned int count
= PAGE_ALIGN(size
) >> PAGE_SHIFT
;
273 struct vm_struct
*area
= find_vm_area(cpu_addr
);
275 if (attrs
& DMA_ATTR_FORCE_CONTIGUOUS
) {
277 * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped,
278 * hence in the vmalloc space.
280 struct page
*page
= vmalloc_to_page(cpu_addr
);
281 return __swiotlb_get_sgtable_page(sgt
, page
, size
);
284 if (WARN_ON(!area
|| !area
->pages
))
287 return sg_alloc_table_from_pages(sgt
, area
->pages
, count
, 0, size
,
291 static void __iommu_sync_single_for_cpu(struct device
*dev
,
292 dma_addr_t dev_addr
, size_t size
,
293 enum dma_data_direction dir
)
297 if (dev_is_dma_coherent(dev
))
300 phys
= iommu_iova_to_phys(iommu_get_dma_domain(dev
), dev_addr
);
301 arch_sync_dma_for_cpu(dev
, phys
, size
, dir
);
304 static void __iommu_sync_single_for_device(struct device
*dev
,
305 dma_addr_t dev_addr
, size_t size
,
306 enum dma_data_direction dir
)
310 if (dev_is_dma_coherent(dev
))
313 phys
= iommu_iova_to_phys(iommu_get_dma_domain(dev
), dev_addr
);
314 arch_sync_dma_for_device(dev
, phys
, size
, dir
);
317 static dma_addr_t
__iommu_map_page(struct device
*dev
, struct page
*page
,
318 unsigned long offset
, size_t size
,
319 enum dma_data_direction dir
,
322 bool coherent
= dev_is_dma_coherent(dev
);
323 int prot
= dma_info_to_prot(dir
, coherent
, attrs
);
324 dma_addr_t dev_addr
= iommu_dma_map_page(dev
, page
, offset
, size
, prot
);
326 if (!coherent
&& !(attrs
& DMA_ATTR_SKIP_CPU_SYNC
) &&
327 dev_addr
!= DMA_MAPPING_ERROR
)
328 __dma_map_area(page_address(page
) + offset
, size
, dir
);
333 static void __iommu_unmap_page(struct device
*dev
, dma_addr_t dev_addr
,
334 size_t size
, enum dma_data_direction dir
,
337 if ((attrs
& DMA_ATTR_SKIP_CPU_SYNC
) == 0)
338 __iommu_sync_single_for_cpu(dev
, dev_addr
, size
, dir
);
340 iommu_dma_unmap_page(dev
, dev_addr
, size
, dir
, attrs
);
343 static void __iommu_sync_sg_for_cpu(struct device
*dev
,
344 struct scatterlist
*sgl
, int nelems
,
345 enum dma_data_direction dir
)
347 struct scatterlist
*sg
;
350 if (dev_is_dma_coherent(dev
))
353 for_each_sg(sgl
, sg
, nelems
, i
)
354 arch_sync_dma_for_cpu(dev
, sg_phys(sg
), sg
->length
, dir
);
357 static void __iommu_sync_sg_for_device(struct device
*dev
,
358 struct scatterlist
*sgl
, int nelems
,
359 enum dma_data_direction dir
)
361 struct scatterlist
*sg
;
364 if (dev_is_dma_coherent(dev
))
367 for_each_sg(sgl
, sg
, nelems
, i
)
368 arch_sync_dma_for_device(dev
, sg_phys(sg
), sg
->length
, dir
);
371 static int __iommu_map_sg_attrs(struct device
*dev
, struct scatterlist
*sgl
,
372 int nelems
, enum dma_data_direction dir
,
375 bool coherent
= dev_is_dma_coherent(dev
);
377 if ((attrs
& DMA_ATTR_SKIP_CPU_SYNC
) == 0)
378 __iommu_sync_sg_for_device(dev
, sgl
, nelems
, dir
);
380 return iommu_dma_map_sg(dev
, sgl
, nelems
,
381 dma_info_to_prot(dir
, coherent
, attrs
));
384 static void __iommu_unmap_sg_attrs(struct device
*dev
,
385 struct scatterlist
*sgl
, int nelems
,
386 enum dma_data_direction dir
,
389 if ((attrs
& DMA_ATTR_SKIP_CPU_SYNC
) == 0)
390 __iommu_sync_sg_for_cpu(dev
, sgl
, nelems
, dir
);
392 iommu_dma_unmap_sg(dev
, sgl
, nelems
, dir
, attrs
);
395 static const struct dma_map_ops iommu_dma_ops
= {
396 .alloc
= __iommu_alloc_attrs
,
397 .free
= __iommu_free_attrs
,
398 .mmap
= __iommu_mmap_attrs
,
399 .get_sgtable
= __iommu_get_sgtable
,
400 .map_page
= __iommu_map_page
,
401 .unmap_page
= __iommu_unmap_page
,
402 .map_sg
= __iommu_map_sg_attrs
,
403 .unmap_sg
= __iommu_unmap_sg_attrs
,
404 .sync_single_for_cpu
= __iommu_sync_single_for_cpu
,
405 .sync_single_for_device
= __iommu_sync_single_for_device
,
406 .sync_sg_for_cpu
= __iommu_sync_sg_for_cpu
,
407 .sync_sg_for_device
= __iommu_sync_sg_for_device
,
408 .map_resource
= iommu_dma_map_resource
,
409 .unmap_resource
= iommu_dma_unmap_resource
,
412 static int __init
__iommu_dma_init(void)
414 return iommu_dma_init();
416 arch_initcall(__iommu_dma_init
);
418 static void __iommu_setup_dma_ops(struct device
*dev
, u64 dma_base
, u64 size
,
419 const struct iommu_ops
*ops
)
421 struct iommu_domain
*domain
;
427 * The IOMMU core code allocates the default DMA domain, which the
428 * underlying IOMMU driver needs to support via the dma-iommu layer.
430 domain
= iommu_get_domain_for_dev(dev
);
435 if (domain
->type
== IOMMU_DOMAIN_DMA
) {
436 if (iommu_dma_init_domain(domain
, dma_base
, size
, dev
))
439 dev
->dma_ops
= &iommu_dma_ops
;
445 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
449 void arch_teardown_dma_ops(struct device
*dev
)
456 static void __iommu_setup_dma_ops(struct device
*dev
, u64 dma_base
, u64 size
,
457 const struct iommu_ops
*iommu
)
460 #endif /* CONFIG_IOMMU_DMA */
462 void arch_setup_dma_ops(struct device
*dev
, u64 dma_base
, u64 size
,
463 const struct iommu_ops
*iommu
, bool coherent
)
465 dev
->dma_coherent
= coherent
;
466 __iommu_setup_dma_ops(dev
, dma_base
, size
, iommu
);
469 if (xen_initial_domain())
470 dev
->dma_ops
= xen_dma_ops
;