]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/vfio/vfio_iommu_spapr_tce.c
vfio: powerpc/spapr: Disable DMA mappings on disabled container
[mirror_ubuntu-zesty-kernel.git] / drivers / vfio / vfio_iommu_spapr_tce.c
CommitLineData
5ffd229c
AK
1/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <asm/iommu.h>
23#include <asm/tce.h>
24
25#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru"
27#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
28
29static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group);
31
2d270df8
AK
32static long try_increment_locked_vm(long npages)
33{
34 long ret = 0, locked, lock_limit;
35
36 if (!current || !current->mm)
37 return -ESRCH; /* process exited */
38
39 if (!npages)
40 return 0;
41
42 down_write(&current->mm->mmap_sem);
43 locked = current->mm->locked_vm + npages;
44 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
45 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
46 ret = -ENOMEM;
47 else
48 current->mm->locked_vm += npages;
49
50 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
51 npages << PAGE_SHIFT,
52 current->mm->locked_vm << PAGE_SHIFT,
53 rlimit(RLIMIT_MEMLOCK),
54 ret ? " - exceeded" : "");
55
56 up_write(&current->mm->mmap_sem);
57
58 return ret;
59}
60
61static void decrement_locked_vm(long npages)
62{
63 if (!current || !current->mm || !npages)
64 return; /* process exited */
65
66 down_write(&current->mm->mmap_sem);
67 if (WARN_ON_ONCE(npages > current->mm->locked_vm))
68 npages = current->mm->locked_vm;
69 current->mm->locked_vm -= npages;
70 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
71 npages << PAGE_SHIFT,
72 current->mm->locked_vm << PAGE_SHIFT,
73 rlimit(RLIMIT_MEMLOCK));
74 up_write(&current->mm->mmap_sem);
75}
76
5ffd229c
AK
77/*
78 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
79 *
80 * This code handles mapping and unmapping of user data buffers
81 * into DMA'ble space using the IOMMU
82 */
83
84/*
85 * The container descriptor supports only a single group per container.
86 * Required by the API as the container is not supplied with the IOMMU group
87 * at the moment of initialization.
88 */
89struct tce_container {
90 struct mutex lock;
91 struct iommu_table *tbl;
92 bool enabled;
2d270df8 93 unsigned long locked_pages;
5ffd229c
AK
94};
95
e432bc7e
AK
96static bool tce_page_is_contained(struct page *page, unsigned page_shift)
97{
98 /*
99 * Check that the TCE table granularity is not bigger than the size of
100 * a page we just found. Otherwise the hardware can get access to
101 * a bigger memory chunk that it should.
102 */
103 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
104}
105
5ffd229c
AK
106static int tce_iommu_enable(struct tce_container *container)
107{
108 int ret = 0;
2d270df8 109 unsigned long locked;
5ffd229c
AK
110 struct iommu_table *tbl = container->tbl;
111
112 if (!container->tbl)
113 return -ENXIO;
114
115 if (!current->mm)
116 return -ESRCH; /* process exited */
117
118 if (container->enabled)
119 return -EBUSY;
120
121 /*
122 * When userspace pages are mapped into the IOMMU, they are effectively
123 * locked memory, so, theoretically, we need to update the accounting
124 * of locked pages on each map and unmap. For powerpc, the map unmap
125 * paths can be very hot, though, and the accounting would kill
126 * performance, especially since it would be difficult to impossible
127 * to handle the accounting in real mode only.
128 *
129 * To address that, rather than precisely accounting every page, we
130 * instead account for a worst case on locked memory when the iommu is
131 * enabled and disabled. The worst case upper bound on locked memory
132 * is the size of the whole iommu window, which is usually relatively
133 * small (compared to total memory sizes) on POWER hardware.
134 *
135 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
136 * that would effectively kill the guest at random points, much better
137 * enforcing the limit based on the max that the guest can map.
2d270df8
AK
138 *
139 * Unfortunately at the moment it counts whole tables, no matter how
140 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
141 * each with 2GB DMA window, 8GB will be counted here. The reason for
142 * this is that we cannot tell here the amount of RAM used by the guest
143 * as this information is only available from KVM and VFIO is
144 * KVM agnostic.
5ffd229c 145 */
2d270df8
AK
146 locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
147 ret = try_increment_locked_vm(locked);
148 if (ret)
149 return ret;
5ffd229c 150
2d270df8
AK
151 container->locked_pages = locked;
152
153 container->enabled = true;
5ffd229c
AK
154
155 return ret;
156}
157
158static void tce_iommu_disable(struct tce_container *container)
159{
160 if (!container->enabled)
161 return;
162
163 container->enabled = false;
164
2d270df8 165 if (!current->mm)
5ffd229c
AK
166 return;
167
2d270df8 168 decrement_locked_vm(container->locked_pages);
5ffd229c
AK
169}
170
171static void *tce_iommu_open(unsigned long arg)
172{
173 struct tce_container *container;
174
175 if (arg != VFIO_SPAPR_TCE_IOMMU) {
176 pr_err("tce_vfio: Wrong IOMMU type\n");
177 return ERR_PTR(-EINVAL);
178 }
179
180 container = kzalloc(sizeof(*container), GFP_KERNEL);
181 if (!container)
182 return ERR_PTR(-ENOMEM);
183
184 mutex_init(&container->lock);
185
186 return container;
187}
188
189static void tce_iommu_release(void *iommu_data)
190{
191 struct tce_container *container = iommu_data;
192
193 WARN_ON(container->tbl && !container->tbl->it_group);
194 tce_iommu_disable(container);
195
196 if (container->tbl && container->tbl->it_group)
197 tce_iommu_detach_group(iommu_data, container->tbl->it_group);
198
199 mutex_destroy(&container->lock);
200
201 kfree(container);
202}
203
9b14a1ff
AK
204static int tce_iommu_clear(struct tce_container *container,
205 struct iommu_table *tbl,
206 unsigned long entry, unsigned long pages)
207{
208 unsigned long oldtce;
209 struct page *page;
210
211 for ( ; pages; --pages, ++entry) {
212 oldtce = iommu_clear_tce(tbl, entry);
213 if (!oldtce)
214 continue;
215
216 page = pfn_to_page(oldtce >> PAGE_SHIFT);
217 WARN_ON(!page);
218 if (page) {
219 if (oldtce & TCE_PCI_WRITE)
220 SetPageDirty(page);
221 put_page(page);
222 }
223 }
224
225 return 0;
226}
227
228static long tce_iommu_build(struct tce_container *container,
229 struct iommu_table *tbl,
230 unsigned long entry, unsigned long tce, unsigned long pages)
231{
232 long i, ret = 0;
233 struct page *page = NULL;
234 unsigned long hva;
235 enum dma_data_direction direction = iommu_tce_direction(tce);
236
237 for (i = 0; i < pages; ++i) {
238 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
239
240 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
241 direction != DMA_TO_DEVICE, &page);
242 if (unlikely(ret != 1)) {
243 ret = -EFAULT;
244 break;
245 }
e432bc7e
AK
246
247 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
248 ret = -EPERM;
249 break;
250 }
251
9b14a1ff
AK
252 hva = (unsigned long) page_address(page) + offset;
253
254 ret = iommu_tce_build(tbl, entry + i, hva, direction);
255 if (ret) {
256 put_page(page);
257 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
258 __func__, entry << tbl->it_page_shift,
259 tce, ret);
260 break;
261 }
00663d4e 262 tce += IOMMU_PAGE_SIZE(tbl);
9b14a1ff
AK
263 }
264
265 if (ret)
266 tce_iommu_clear(container, tbl, entry, i);
267
268 return ret;
269}
270
5ffd229c
AK
271static long tce_iommu_ioctl(void *iommu_data,
272 unsigned int cmd, unsigned long arg)
273{
274 struct tce_container *container = iommu_data;
275 unsigned long minsz;
276 long ret;
277
278 switch (cmd) {
279 case VFIO_CHECK_EXTENSION:
1b69be5e
GS
280 switch (arg) {
281 case VFIO_SPAPR_TCE_IOMMU:
282 ret = 1;
283 break;
284 default:
285 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
286 break;
287 }
288
289 return (ret < 0) ? 0 : ret;
5ffd229c
AK
290
291 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
292 struct vfio_iommu_spapr_tce_info info;
293 struct iommu_table *tbl = container->tbl;
294
295 if (WARN_ON(!tbl))
296 return -ENXIO;
297
298 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
299 dma32_window_size);
300
301 if (copy_from_user(&info, (void __user *)arg, minsz))
302 return -EFAULT;
303
304 if (info.argsz < minsz)
305 return -EINVAL;
306
00663d4e
AK
307 info.dma32_window_start = tbl->it_offset << tbl->it_page_shift;
308 info.dma32_window_size = tbl->it_size << tbl->it_page_shift;
5ffd229c
AK
309 info.flags = 0;
310
311 if (copy_to_user((void __user *)arg, &info, minsz))
312 return -EFAULT;
313
314 return 0;
315 }
316 case VFIO_IOMMU_MAP_DMA: {
317 struct vfio_iommu_type1_dma_map param;
318 struct iommu_table *tbl = container->tbl;
9b14a1ff 319 unsigned long tce;
5ffd229c 320
3c56e822
AK
321 if (!container->enabled)
322 return -EPERM;
323
5ffd229c
AK
324 if (!tbl)
325 return -ENXIO;
326
327 BUG_ON(!tbl->it_group);
328
329 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
330
331 if (copy_from_user(&param, (void __user *)arg, minsz))
332 return -EFAULT;
333
334 if (param.argsz < minsz)
335 return -EINVAL;
336
337 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
338 VFIO_DMA_MAP_FLAG_WRITE))
339 return -EINVAL;
340
00663d4e
AK
341 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
342 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
5ffd229c
AK
343 return -EINVAL;
344
345 /* iova is checked by the IOMMU API */
346 tce = param.vaddr;
347 if (param.flags & VFIO_DMA_MAP_FLAG_READ)
348 tce |= TCE_PCI_READ;
349 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
350 tce |= TCE_PCI_WRITE;
351
352 ret = iommu_tce_put_param_check(tbl, param.iova, tce);
353 if (ret)
354 return ret;
355
9b14a1ff 356 ret = tce_iommu_build(container, tbl,
00663d4e
AK
357 param.iova >> tbl->it_page_shift,
358 tce, param.size >> tbl->it_page_shift);
5ffd229c
AK
359
360 iommu_flush_tce(tbl);
361
362 return ret;
363 }
364 case VFIO_IOMMU_UNMAP_DMA: {
365 struct vfio_iommu_type1_dma_unmap param;
366 struct iommu_table *tbl = container->tbl;
367
3c56e822
AK
368 if (!container->enabled)
369 return -EPERM;
370
5ffd229c
AK
371 if (WARN_ON(!tbl))
372 return -ENXIO;
373
374 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
375 size);
376
377 if (copy_from_user(&param, (void __user *)arg, minsz))
378 return -EFAULT;
379
380 if (param.argsz < minsz)
381 return -EINVAL;
382
383 /* No flag is supported now */
384 if (param.flags)
385 return -EINVAL;
386
00663d4e 387 if (param.size & ~IOMMU_PAGE_MASK(tbl))
5ffd229c
AK
388 return -EINVAL;
389
390 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
00663d4e 391 param.size >> tbl->it_page_shift);
5ffd229c
AK
392 if (ret)
393 return ret;
394
9b14a1ff 395 ret = tce_iommu_clear(container, tbl,
00663d4e
AK
396 param.iova >> tbl->it_page_shift,
397 param.size >> tbl->it_page_shift);
5ffd229c
AK
398 iommu_flush_tce(tbl);
399
400 return ret;
401 }
402 case VFIO_IOMMU_ENABLE:
403 mutex_lock(&container->lock);
404 ret = tce_iommu_enable(container);
405 mutex_unlock(&container->lock);
406 return ret;
407
408
409 case VFIO_IOMMU_DISABLE:
410 mutex_lock(&container->lock);
411 tce_iommu_disable(container);
412 mutex_unlock(&container->lock);
413 return 0;
1b69be5e
GS
414 case VFIO_EEH_PE_OP:
415 if (!container->tbl || !container->tbl->it_group)
416 return -ENODEV;
417
418 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
419 cmd, arg);
5ffd229c
AK
420 }
421
422 return -ENOTTY;
423}
424
425static int tce_iommu_attach_group(void *iommu_data,
426 struct iommu_group *iommu_group)
427{
428 int ret;
429 struct tce_container *container = iommu_data;
430 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
431
432 BUG_ON(!tbl);
433 mutex_lock(&container->lock);
434
435 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
436 iommu_group_id(iommu_group), iommu_group); */
437 if (container->tbl) {
438 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
439 iommu_group_id(container->tbl->it_group),
440 iommu_group_id(iommu_group));
441 ret = -EBUSY;
442 } else if (container->enabled) {
443 pr_err("tce_vfio: attaching group #%u to enabled container\n",
444 iommu_group_id(iommu_group));
445 ret = -EBUSY;
446 } else {
447 ret = iommu_take_ownership(tbl);
448 if (!ret)
449 container->tbl = tbl;
450 }
451
452 mutex_unlock(&container->lock);
453
454 return ret;
455}
456
457static void tce_iommu_detach_group(void *iommu_data,
458 struct iommu_group *iommu_group)
459{
460 struct tce_container *container = iommu_data;
461 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
462
463 BUG_ON(!tbl);
464 mutex_lock(&container->lock);
465 if (tbl != container->tbl) {
466 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
467 iommu_group_id(iommu_group),
468 iommu_group_id(tbl->it_group));
469 } else {
470 if (container->enabled) {
471 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
472 iommu_group_id(tbl->it_group));
473 tce_iommu_disable(container);
474 }
475
476 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
477 iommu_group_id(iommu_group), iommu_group); */
478 container->tbl = NULL;
9b14a1ff 479 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
5ffd229c
AK
480 iommu_release_ownership(tbl);
481 }
482 mutex_unlock(&container->lock);
483}
484
485const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
486 .name = "iommu-vfio-powerpc",
487 .owner = THIS_MODULE,
488 .open = tce_iommu_open,
489 .release = tce_iommu_release,
490 .ioctl = tce_iommu_ioctl,
491 .attach_group = tce_iommu_attach_group,
492 .detach_group = tce_iommu_detach_group,
493};
494
495static int __init tce_iommu_init(void)
496{
497 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
498}
499
500static void __exit tce_iommu_cleanup(void)
501{
502 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
503}
504
505module_init(tce_iommu_init);
506module_exit(tce_iommu_cleanup);
507
508MODULE_VERSION(DRIVER_VERSION);
509MODULE_LICENSE("GPL v2");
510MODULE_AUTHOR(DRIVER_AUTHOR);
511MODULE_DESCRIPTION(DRIVER_DESC);
512