]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - drivers/vfio/vfio.c
6e10325596b6695be8c95597c1a639991dc1a6a4
[mirror_ubuntu-zesty-kernel.git] / drivers / vfio / vfio.c
1 /*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16 #include <linux/cdev.h>
17 #include <linux/compat.h>
18 #include <linux/device.h>
19 #include <linux/file.h>
20 #include <linux/anon_inodes.h>
21 #include <linux/fs.h>
22 #include <linux/idr.h>
23 #include <linux/iommu.h>
24 #include <linux/list.h>
25 #include <linux/miscdevice.h>
26 #include <linux/module.h>
27 #include <linux/mutex.h>
28 #include <linux/pci.h>
29 #include <linux/rwsem.h>
30 #include <linux/sched.h>
31 #include <linux/slab.h>
32 #include <linux/stat.h>
33 #include <linux/string.h>
34 #include <linux/uaccess.h>
35 #include <linux/vfio.h>
36 #include <linux/wait.h>
37
38 #define DRIVER_VERSION "0.3"
39 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
40 #define DRIVER_DESC "VFIO - User Level meta-driver"
41
42 static struct vfio {
43 struct class *class;
44 struct list_head iommu_drivers_list;
45 struct mutex iommu_drivers_lock;
46 struct list_head group_list;
47 struct idr group_idr;
48 struct mutex group_lock;
49 struct cdev group_cdev;
50 dev_t group_devt;
51 wait_queue_head_t release_q;
52 } vfio;
53
54 struct vfio_iommu_driver {
55 const struct vfio_iommu_driver_ops *ops;
56 struct list_head vfio_next;
57 };
58
59 struct vfio_container {
60 struct kref kref;
61 struct list_head group_list;
62 struct rw_semaphore group_lock;
63 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data;
65 bool noiommu;
66 };
67
68 struct vfio_unbound_dev {
69 struct device *dev;
70 struct list_head unbound_next;
71 };
72
73 struct vfio_group {
74 struct kref kref;
75 int minor;
76 atomic_t container_users;
77 struct iommu_group *iommu_group;
78 struct vfio_container *container;
79 struct list_head device_list;
80 struct mutex device_lock;
81 struct device *dev;
82 struct notifier_block nb;
83 struct list_head vfio_next;
84 struct list_head container_next;
85 struct list_head unbound_list;
86 struct mutex unbound_lock;
87 atomic_t opened;
88 bool noiommu;
89 struct kvm *kvm;
90 struct blocking_notifier_head notifier;
91 };
92
93 struct vfio_device {
94 struct kref kref;
95 struct device *dev;
96 const struct vfio_device_ops *ops;
97 struct vfio_group *group;
98 struct list_head group_next;
99 void *device_data;
100 };
101
102 #ifdef CONFIG_VFIO_NOIOMMU
103 static bool noiommu __read_mostly;
104 module_param_named(enable_unsafe_noiommu_mode,
105 noiommu, bool, S_IRUGO | S_IWUSR);
106 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
107 #endif
108
109 /*
110 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
111 * and remove functions, any use cases other than acquiring the first
112 * reference for the purpose of calling vfio_add_group_dev() or removing
113 * that symmetric reference after vfio_del_group_dev() should use the raw
114 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
115 * removes the device from the dummy group and cannot be nested.
116 */
117 struct iommu_group *vfio_iommu_group_get(struct device *dev)
118 {
119 struct iommu_group *group;
120 int __maybe_unused ret;
121
122 group = iommu_group_get(dev);
123
124 #ifdef CONFIG_VFIO_NOIOMMU
125 /*
126 * With noiommu enabled, an IOMMU group will be created for a device
127 * that doesn't already have one and doesn't have an iommu_ops on their
128 * bus. We set iommudata simply to be able to identify these groups
129 * as special use and for reclamation later.
130 */
131 if (group || !noiommu || iommu_present(dev->bus))
132 return group;
133
134 group = iommu_group_alloc();
135 if (IS_ERR(group))
136 return NULL;
137
138 iommu_group_set_name(group, "vfio-noiommu");
139 iommu_group_set_iommudata(group, &noiommu, NULL);
140 ret = iommu_group_add_device(group, dev);
141 iommu_group_put(group);
142 if (ret)
143 return NULL;
144
145 /*
146 * Where to taint? At this point we've added an IOMMU group for a
147 * device that is not backed by iommu_ops, therefore any iommu_
148 * callback using iommu_ops can legitimately Oops. So, while we may
149 * be about to give a DMA capable device to a user without IOMMU
150 * protection, which is clearly taint-worthy, let's go ahead and do
151 * it here.
152 */
153 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
154 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
155 #endif
156
157 return group;
158 }
159 EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
160
161 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
162 {
163 #ifdef CONFIG_VFIO_NOIOMMU
164 if (iommu_group_get_iommudata(group) == &noiommu)
165 iommu_group_remove_device(dev);
166 #endif
167
168 iommu_group_put(group);
169 }
170 EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
171
172 #ifdef CONFIG_VFIO_NOIOMMU
173 static void *vfio_noiommu_open(unsigned long arg)
174 {
175 if (arg != VFIO_NOIOMMU_IOMMU)
176 return ERR_PTR(-EINVAL);
177 if (!capable(CAP_SYS_RAWIO))
178 return ERR_PTR(-EPERM);
179
180 return NULL;
181 }
182
183 static void vfio_noiommu_release(void *iommu_data)
184 {
185 }
186
187 static long vfio_noiommu_ioctl(void *iommu_data,
188 unsigned int cmd, unsigned long arg)
189 {
190 if (cmd == VFIO_CHECK_EXTENSION)
191 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
192
193 return -ENOTTY;
194 }
195
196 static int vfio_noiommu_attach_group(void *iommu_data,
197 struct iommu_group *iommu_group)
198 {
199 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
200 }
201
202 static void vfio_noiommu_detach_group(void *iommu_data,
203 struct iommu_group *iommu_group)
204 {
205 }
206
207 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
208 .name = "vfio-noiommu",
209 .owner = THIS_MODULE,
210 .open = vfio_noiommu_open,
211 .release = vfio_noiommu_release,
212 .ioctl = vfio_noiommu_ioctl,
213 .attach_group = vfio_noiommu_attach_group,
214 .detach_group = vfio_noiommu_detach_group,
215 };
216 #endif
217
218
219 /**
220 * IOMMU driver registration
221 */
222 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
223 {
224 struct vfio_iommu_driver *driver, *tmp;
225
226 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
227 if (!driver)
228 return -ENOMEM;
229
230 driver->ops = ops;
231
232 mutex_lock(&vfio.iommu_drivers_lock);
233
234 /* Check for duplicates */
235 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
236 if (tmp->ops == ops) {
237 mutex_unlock(&vfio.iommu_drivers_lock);
238 kfree(driver);
239 return -EINVAL;
240 }
241 }
242
243 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
244
245 mutex_unlock(&vfio.iommu_drivers_lock);
246
247 return 0;
248 }
249 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
250
251 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
252 {
253 struct vfio_iommu_driver *driver;
254
255 mutex_lock(&vfio.iommu_drivers_lock);
256 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
257 if (driver->ops == ops) {
258 list_del(&driver->vfio_next);
259 mutex_unlock(&vfio.iommu_drivers_lock);
260 kfree(driver);
261 return;
262 }
263 }
264 mutex_unlock(&vfio.iommu_drivers_lock);
265 }
266 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
267
268 /**
269 * Group minor allocation/free - both called with vfio.group_lock held
270 */
271 static int vfio_alloc_group_minor(struct vfio_group *group)
272 {
273 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
274 }
275
276 static void vfio_free_group_minor(int minor)
277 {
278 idr_remove(&vfio.group_idr, minor);
279 }
280
281 static int vfio_iommu_group_notifier(struct notifier_block *nb,
282 unsigned long action, void *data);
283 static void vfio_group_get(struct vfio_group *group);
284
285 /**
286 * Container objects - containers are created when /dev/vfio/vfio is
287 * opened, but their lifecycle extends until the last user is done, so
288 * it's freed via kref. Must support container/group/device being
289 * closed in any order.
290 */
291 static void vfio_container_get(struct vfio_container *container)
292 {
293 kref_get(&container->kref);
294 }
295
296 static void vfio_container_release(struct kref *kref)
297 {
298 struct vfio_container *container;
299 container = container_of(kref, struct vfio_container, kref);
300
301 kfree(container);
302 }
303
304 static void vfio_container_put(struct vfio_container *container)
305 {
306 kref_put(&container->kref, vfio_container_release);
307 }
308
309 static void vfio_group_unlock_and_free(struct vfio_group *group)
310 {
311 mutex_unlock(&vfio.group_lock);
312 /*
313 * Unregister outside of lock. A spurious callback is harmless now
314 * that the group is no longer in vfio.group_list.
315 */
316 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
317 kfree(group);
318 }
319
320 /**
321 * Group objects - create, release, get, put, search
322 */
323 static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
324 {
325 struct vfio_group *group, *tmp;
326 struct device *dev;
327 int ret, minor;
328
329 group = kzalloc(sizeof(*group), GFP_KERNEL);
330 if (!group)
331 return ERR_PTR(-ENOMEM);
332
333 kref_init(&group->kref);
334 INIT_LIST_HEAD(&group->device_list);
335 mutex_init(&group->device_lock);
336 INIT_LIST_HEAD(&group->unbound_list);
337 mutex_init(&group->unbound_lock);
338 atomic_set(&group->container_users, 0);
339 atomic_set(&group->opened, 0);
340 group->iommu_group = iommu_group;
341 #ifdef CONFIG_VFIO_NOIOMMU
342 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
343 #endif
344 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
345
346 group->nb.notifier_call = vfio_iommu_group_notifier;
347
348 /*
349 * blocking notifiers acquire a rwsem around registering and hold
350 * it around callback. Therefore, need to register outside of
351 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
352 * do anything unless it can find the group in vfio.group_list, so
353 * no harm in registering early.
354 */
355 ret = iommu_group_register_notifier(iommu_group, &group->nb);
356 if (ret) {
357 kfree(group);
358 return ERR_PTR(ret);
359 }
360
361 mutex_lock(&vfio.group_lock);
362
363 /* Did we race creating this group? */
364 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
365 if (tmp->iommu_group == iommu_group) {
366 vfio_group_get(tmp);
367 vfio_group_unlock_and_free(group);
368 return tmp;
369 }
370 }
371
372 minor = vfio_alloc_group_minor(group);
373 if (minor < 0) {
374 vfio_group_unlock_and_free(group);
375 return ERR_PTR(minor);
376 }
377
378 dev = device_create(vfio.class, NULL,
379 MKDEV(MAJOR(vfio.group_devt), minor),
380 group, "%s%d", group->noiommu ? "noiommu-" : "",
381 iommu_group_id(iommu_group));
382 if (IS_ERR(dev)) {
383 vfio_free_group_minor(minor);
384 vfio_group_unlock_and_free(group);
385 return (struct vfio_group *)dev; /* ERR_PTR */
386 }
387
388 group->minor = minor;
389 group->dev = dev;
390
391 list_add(&group->vfio_next, &vfio.group_list);
392
393 mutex_unlock(&vfio.group_lock);
394
395 return group;
396 }
397
398 /* called with vfio.group_lock held */
399 static void vfio_group_release(struct kref *kref)
400 {
401 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
402 struct vfio_unbound_dev *unbound, *tmp;
403 struct iommu_group *iommu_group = group->iommu_group;
404
405 WARN_ON(!list_empty(&group->device_list));
406 WARN_ON(group->notifier.head);
407
408 list_for_each_entry_safe(unbound, tmp,
409 &group->unbound_list, unbound_next) {
410 list_del(&unbound->unbound_next);
411 kfree(unbound);
412 }
413
414 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
415 list_del(&group->vfio_next);
416 vfio_free_group_minor(group->minor);
417 vfio_group_unlock_and_free(group);
418 iommu_group_put(iommu_group);
419 }
420
421 static void vfio_group_put(struct vfio_group *group)
422 {
423 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
424 }
425
426 /* Assume group_lock or group reference is held */
427 static void vfio_group_get(struct vfio_group *group)
428 {
429 kref_get(&group->kref);
430 }
431
432 /*
433 * Not really a try as we will sleep for mutex, but we need to make
434 * sure the group pointer is valid under lock and get a reference.
435 */
436 static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
437 {
438 struct vfio_group *target = group;
439
440 mutex_lock(&vfio.group_lock);
441 list_for_each_entry(group, &vfio.group_list, vfio_next) {
442 if (group == target) {
443 vfio_group_get(group);
444 mutex_unlock(&vfio.group_lock);
445 return group;
446 }
447 }
448 mutex_unlock(&vfio.group_lock);
449
450 return NULL;
451 }
452
453 static
454 struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
455 {
456 struct vfio_group *group;
457
458 mutex_lock(&vfio.group_lock);
459 list_for_each_entry(group, &vfio.group_list, vfio_next) {
460 if (group->iommu_group == iommu_group) {
461 vfio_group_get(group);
462 mutex_unlock(&vfio.group_lock);
463 return group;
464 }
465 }
466 mutex_unlock(&vfio.group_lock);
467
468 return NULL;
469 }
470
471 static struct vfio_group *vfio_group_get_from_minor(int minor)
472 {
473 struct vfio_group *group;
474
475 mutex_lock(&vfio.group_lock);
476 group = idr_find(&vfio.group_idr, minor);
477 if (!group) {
478 mutex_unlock(&vfio.group_lock);
479 return NULL;
480 }
481 vfio_group_get(group);
482 mutex_unlock(&vfio.group_lock);
483
484 return group;
485 }
486
487 static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
488 {
489 struct iommu_group *iommu_group;
490 struct vfio_group *group;
491
492 iommu_group = iommu_group_get(dev);
493 if (!iommu_group)
494 return NULL;
495
496 group = vfio_group_get_from_iommu(iommu_group);
497 iommu_group_put(iommu_group);
498
499 return group;
500 }
501
502 /**
503 * Device objects - create, release, get, put, search
504 */
505 static
506 struct vfio_device *vfio_group_create_device(struct vfio_group *group,
507 struct device *dev,
508 const struct vfio_device_ops *ops,
509 void *device_data)
510 {
511 struct vfio_device *device;
512
513 device = kzalloc(sizeof(*device), GFP_KERNEL);
514 if (!device)
515 return ERR_PTR(-ENOMEM);
516
517 kref_init(&device->kref);
518 device->dev = dev;
519 device->group = group;
520 device->ops = ops;
521 device->device_data = device_data;
522 dev_set_drvdata(dev, device);
523
524 /* No need to get group_lock, caller has group reference */
525 vfio_group_get(group);
526
527 mutex_lock(&group->device_lock);
528 list_add(&device->group_next, &group->device_list);
529 mutex_unlock(&group->device_lock);
530
531 return device;
532 }
533
534 static void vfio_device_release(struct kref *kref)
535 {
536 struct vfio_device *device = container_of(kref,
537 struct vfio_device, kref);
538 struct vfio_group *group = device->group;
539
540 list_del(&device->group_next);
541 mutex_unlock(&group->device_lock);
542
543 dev_set_drvdata(device->dev, NULL);
544
545 kfree(device);
546
547 /* vfio_del_group_dev may be waiting for this device */
548 wake_up(&vfio.release_q);
549 }
550
551 /* Device reference always implies a group reference */
552 void vfio_device_put(struct vfio_device *device)
553 {
554 struct vfio_group *group = device->group;
555 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
556 vfio_group_put(group);
557 }
558 EXPORT_SYMBOL_GPL(vfio_device_put);
559
560 static void vfio_device_get(struct vfio_device *device)
561 {
562 vfio_group_get(device->group);
563 kref_get(&device->kref);
564 }
565
566 static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
567 struct device *dev)
568 {
569 struct vfio_device *device;
570
571 mutex_lock(&group->device_lock);
572 list_for_each_entry(device, &group->device_list, group_next) {
573 if (device->dev == dev) {
574 vfio_device_get(device);
575 mutex_unlock(&group->device_lock);
576 return device;
577 }
578 }
579 mutex_unlock(&group->device_lock);
580 return NULL;
581 }
582
583 /*
584 * Some drivers, like pci-stub, are only used to prevent other drivers from
585 * claiming a device and are therefore perfectly legitimate for a user owned
586 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
587 * of the device, but it does prevent the user from having direct access to
588 * the device, which is useful in some circumstances.
589 *
590 * We also assume that we can include PCI interconnect devices, ie. bridges.
591 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
592 * then all of the downstream devices will be part of the same IOMMU group as
593 * the bridge. Thus, if placing the bridge into the user owned IOVA space
594 * breaks anything, it only does so for user owned devices downstream. Note
595 * that error notification via MSI can be affected for platforms that handle
596 * MSI within the same IOVA space as DMA.
597 */
598 static const char * const vfio_driver_whitelist[] = { "pci-stub" };
599
600 static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
601 {
602 int i;
603
604 if (dev_is_pci(dev)) {
605 struct pci_dev *pdev = to_pci_dev(dev);
606
607 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
608 return true;
609 }
610
611 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
612 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
613 return true;
614 }
615
616 return false;
617 }
618
619 /*
620 * A vfio group is viable for use by userspace if all devices are in
621 * one of the following states:
622 * - driver-less
623 * - bound to a vfio driver
624 * - bound to a whitelisted driver
625 * - a PCI interconnect device
626 *
627 * We use two methods to determine whether a device is bound to a vfio
628 * driver. The first is to test whether the device exists in the vfio
629 * group. The second is to test if the device exists on the group
630 * unbound_list, indicating it's in the middle of transitioning from
631 * a vfio driver to driver-less.
632 */
633 static int vfio_dev_viable(struct device *dev, void *data)
634 {
635 struct vfio_group *group = data;
636 struct vfio_device *device;
637 struct device_driver *drv = ACCESS_ONCE(dev->driver);
638 struct vfio_unbound_dev *unbound;
639 int ret = -EINVAL;
640
641 mutex_lock(&group->unbound_lock);
642 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
643 if (dev == unbound->dev) {
644 ret = 0;
645 break;
646 }
647 }
648 mutex_unlock(&group->unbound_lock);
649
650 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
651 return 0;
652
653 device = vfio_group_get_device(group, dev);
654 if (device) {
655 vfio_device_put(device);
656 return 0;
657 }
658
659 return ret;
660 }
661
662 /**
663 * Async device support
664 */
665 static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
666 {
667 struct vfio_device *device;
668
669 /* Do we already know about it? We shouldn't */
670 device = vfio_group_get_device(group, dev);
671 if (WARN_ON_ONCE(device)) {
672 vfio_device_put(device);
673 return 0;
674 }
675
676 /* Nothing to do for idle groups */
677 if (!atomic_read(&group->container_users))
678 return 0;
679
680 /* TODO Prevent device auto probing */
681 WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
682 iommu_group_id(group->iommu_group));
683
684 return 0;
685 }
686
687 static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
688 {
689 /* We don't care what happens when the group isn't in use */
690 if (!atomic_read(&group->container_users))
691 return 0;
692
693 return vfio_dev_viable(dev, group);
694 }
695
696 static int vfio_iommu_group_notifier(struct notifier_block *nb,
697 unsigned long action, void *data)
698 {
699 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
700 struct device *dev = data;
701 struct vfio_unbound_dev *unbound;
702
703 /*
704 * Need to go through a group_lock lookup to get a reference or we
705 * risk racing a group being removed. Ignore spurious notifies.
706 */
707 group = vfio_group_try_get(group);
708 if (!group)
709 return NOTIFY_OK;
710
711 switch (action) {
712 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
713 vfio_group_nb_add_dev(group, dev);
714 break;
715 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
716 /*
717 * Nothing to do here. If the device is in use, then the
718 * vfio sub-driver should block the remove callback until
719 * it is unused. If the device is unused or attached to a
720 * stub driver, then it should be released and we don't
721 * care that it will be going away.
722 */
723 break;
724 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
725 pr_debug("%s: Device %s, group %d binding to driver\n",
726 __func__, dev_name(dev),
727 iommu_group_id(group->iommu_group));
728 break;
729 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
730 pr_debug("%s: Device %s, group %d bound to driver %s\n",
731 __func__, dev_name(dev),
732 iommu_group_id(group->iommu_group), dev->driver->name);
733 BUG_ON(vfio_group_nb_verify(group, dev));
734 break;
735 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
736 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
737 __func__, dev_name(dev),
738 iommu_group_id(group->iommu_group), dev->driver->name);
739 break;
740 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
741 pr_debug("%s: Device %s, group %d unbound from driver\n",
742 __func__, dev_name(dev),
743 iommu_group_id(group->iommu_group));
744 /*
745 * XXX An unbound device in a live group is ok, but we'd
746 * really like to avoid the above BUG_ON by preventing other
747 * drivers from binding to it. Once that occurs, we have to
748 * stop the system to maintain isolation. At a minimum, we'd
749 * want a toggle to disable driver auto probe for this device.
750 */
751
752 mutex_lock(&group->unbound_lock);
753 list_for_each_entry(unbound,
754 &group->unbound_list, unbound_next) {
755 if (dev == unbound->dev) {
756 list_del(&unbound->unbound_next);
757 kfree(unbound);
758 break;
759 }
760 }
761 mutex_unlock(&group->unbound_lock);
762 break;
763 }
764
765 vfio_group_put(group);
766 return NOTIFY_OK;
767 }
768
769 /**
770 * VFIO driver API
771 */
772 int vfio_add_group_dev(struct device *dev,
773 const struct vfio_device_ops *ops, void *device_data)
774 {
775 struct iommu_group *iommu_group;
776 struct vfio_group *group;
777 struct vfio_device *device;
778
779 iommu_group = iommu_group_get(dev);
780 if (!iommu_group)
781 return -EINVAL;
782
783 group = vfio_group_get_from_iommu(iommu_group);
784 if (!group) {
785 group = vfio_create_group(iommu_group);
786 if (IS_ERR(group)) {
787 iommu_group_put(iommu_group);
788 return PTR_ERR(group);
789 }
790 } else {
791 /*
792 * A found vfio_group already holds a reference to the
793 * iommu_group. A created vfio_group keeps the reference.
794 */
795 iommu_group_put(iommu_group);
796 }
797
798 device = vfio_group_get_device(group, dev);
799 if (device) {
800 WARN(1, "Device %s already exists on group %d\n",
801 dev_name(dev), iommu_group_id(iommu_group));
802 vfio_device_put(device);
803 vfio_group_put(group);
804 return -EBUSY;
805 }
806
807 device = vfio_group_create_device(group, dev, ops, device_data);
808 if (IS_ERR(device)) {
809 vfio_group_put(group);
810 return PTR_ERR(device);
811 }
812
813 /*
814 * Drop all but the vfio_device reference. The vfio_device holds
815 * a reference to the vfio_group, which holds a reference to the
816 * iommu_group.
817 */
818 vfio_group_put(group);
819
820 return 0;
821 }
822 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
823
824 /**
825 * Get a reference to the vfio_device for a device. Even if the
826 * caller thinks they own the device, they could be racing with a
827 * release call path, so we can't trust drvdata for the shortcut.
828 * Go the long way around, from the iommu_group to the vfio_group
829 * to the vfio_device.
830 */
831 struct vfio_device *vfio_device_get_from_dev(struct device *dev)
832 {
833 struct vfio_group *group;
834 struct vfio_device *device;
835
836 group = vfio_group_get_from_dev(dev);
837 if (!group)
838 return NULL;
839
840 device = vfio_group_get_device(group, dev);
841 vfio_group_put(group);
842
843 return device;
844 }
845 EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
846
847 static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
848 char *buf)
849 {
850 struct vfio_device *it, *device = NULL;
851
852 mutex_lock(&group->device_lock);
853 list_for_each_entry(it, &group->device_list, group_next) {
854 if (!strcmp(dev_name(it->dev), buf)) {
855 device = it;
856 vfio_device_get(device);
857 break;
858 }
859 }
860 mutex_unlock(&group->device_lock);
861
862 return device;
863 }
864
865 /*
866 * Caller must hold a reference to the vfio_device
867 */
868 void *vfio_device_data(struct vfio_device *device)
869 {
870 return device->device_data;
871 }
872 EXPORT_SYMBOL_GPL(vfio_device_data);
873
874 /* Given a referenced group, check if it contains the device */
875 static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
876 {
877 struct vfio_device *device;
878
879 device = vfio_group_get_device(group, dev);
880 if (!device)
881 return false;
882
883 vfio_device_put(device);
884 return true;
885 }
886
887 /*
888 * Decrement the device reference count and wait for the device to be
889 * removed. Open file descriptors for the device... */
890 void *vfio_del_group_dev(struct device *dev)
891 {
892 struct vfio_device *device = dev_get_drvdata(dev);
893 struct vfio_group *group = device->group;
894 void *device_data = device->device_data;
895 struct vfio_unbound_dev *unbound;
896 unsigned int i = 0;
897 long ret;
898 bool interrupted = false;
899
900 /*
901 * The group exists so long as we have a device reference. Get
902 * a group reference and use it to scan for the device going away.
903 */
904 vfio_group_get(group);
905
906 /*
907 * When the device is removed from the group, the group suddenly
908 * becomes non-viable; the device has a driver (until the unbind
909 * completes), but it's not present in the group. This is bad news
910 * for any external users that need to re-acquire a group reference
911 * in order to match and release their existing reference. To
912 * solve this, we track such devices on the unbound_list to bridge
913 * the gap until they're fully unbound.
914 */
915 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
916 if (unbound) {
917 unbound->dev = dev;
918 mutex_lock(&group->unbound_lock);
919 list_add(&unbound->unbound_next, &group->unbound_list);
920 mutex_unlock(&group->unbound_lock);
921 }
922 WARN_ON(!unbound);
923
924 vfio_device_put(device);
925
926 /*
927 * If the device is still present in the group after the above
928 * 'put', then it is in use and we need to request it from the
929 * bus driver. The driver may in turn need to request the
930 * device from the user. We send the request on an arbitrary
931 * interval with counter to allow the driver to take escalating
932 * measures to release the device if it has the ability to do so.
933 */
934 do {
935 device = vfio_group_get_device(group, dev);
936 if (!device)
937 break;
938
939 if (device->ops->request)
940 device->ops->request(device_data, i++);
941
942 vfio_device_put(device);
943
944 if (interrupted) {
945 ret = wait_event_timeout(vfio.release_q,
946 !vfio_dev_present(group, dev), HZ * 10);
947 } else {
948 ret = wait_event_interruptible_timeout(vfio.release_q,
949 !vfio_dev_present(group, dev), HZ * 10);
950 if (ret == -ERESTARTSYS) {
951 interrupted = true;
952 dev_warn(dev,
953 "Device is currently in use, task"
954 " \"%s\" (%d) "
955 "blocked until device is released",
956 current->comm, task_pid_nr(current));
957 }
958 }
959 } while (ret <= 0);
960
961 vfio_group_put(group);
962
963 return device_data;
964 }
965 EXPORT_SYMBOL_GPL(vfio_del_group_dev);
966
967 /**
968 * VFIO base fd, /dev/vfio/vfio
969 */
970 static long vfio_ioctl_check_extension(struct vfio_container *container,
971 unsigned long arg)
972 {
973 struct vfio_iommu_driver *driver;
974 long ret = 0;
975
976 down_read(&container->group_lock);
977
978 driver = container->iommu_driver;
979
980 switch (arg) {
981 /* No base extensions yet */
982 default:
983 /*
984 * If no driver is set, poll all registered drivers for
985 * extensions and return the first positive result. If
986 * a driver is already set, further queries will be passed
987 * only to that driver.
988 */
989 if (!driver) {
990 mutex_lock(&vfio.iommu_drivers_lock);
991 list_for_each_entry(driver, &vfio.iommu_drivers_list,
992 vfio_next) {
993
994 #ifdef CONFIG_VFIO_NOIOMMU
995 if (!list_empty(&container->group_list) &&
996 (container->noiommu !=
997 (driver->ops == &vfio_noiommu_ops)))
998 continue;
999 #endif
1000
1001 if (!try_module_get(driver->ops->owner))
1002 continue;
1003
1004 ret = driver->ops->ioctl(NULL,
1005 VFIO_CHECK_EXTENSION,
1006 arg);
1007 module_put(driver->ops->owner);
1008 if (ret > 0)
1009 break;
1010 }
1011 mutex_unlock(&vfio.iommu_drivers_lock);
1012 } else
1013 ret = driver->ops->ioctl(container->iommu_data,
1014 VFIO_CHECK_EXTENSION, arg);
1015 }
1016
1017 up_read(&container->group_lock);
1018
1019 return ret;
1020 }
1021
1022 /* hold write lock on container->group_lock */
1023 static int __vfio_container_attach_groups(struct vfio_container *container,
1024 struct vfio_iommu_driver *driver,
1025 void *data)
1026 {
1027 struct vfio_group *group;
1028 int ret = -ENODEV;
1029
1030 list_for_each_entry(group, &container->group_list, container_next) {
1031 ret = driver->ops->attach_group(data, group->iommu_group);
1032 if (ret)
1033 goto unwind;
1034 }
1035
1036 return ret;
1037
1038 unwind:
1039 list_for_each_entry_continue_reverse(group, &container->group_list,
1040 container_next) {
1041 driver->ops->detach_group(data, group->iommu_group);
1042 }
1043
1044 return ret;
1045 }
1046
1047 static long vfio_ioctl_set_iommu(struct vfio_container *container,
1048 unsigned long arg)
1049 {
1050 struct vfio_iommu_driver *driver;
1051 long ret = -ENODEV;
1052
1053 down_write(&container->group_lock);
1054
1055 /*
1056 * The container is designed to be an unprivileged interface while
1057 * the group can be assigned to specific users. Therefore, only by
1058 * adding a group to a container does the user get the privilege of
1059 * enabling the iommu, which may allocate finite resources. There
1060 * is no unset_iommu, but by removing all the groups from a container,
1061 * the container is deprivileged and returns to an unset state.
1062 */
1063 if (list_empty(&container->group_list) || container->iommu_driver) {
1064 up_write(&container->group_lock);
1065 return -EINVAL;
1066 }
1067
1068 mutex_lock(&vfio.iommu_drivers_lock);
1069 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
1070 void *data;
1071
1072 #ifdef CONFIG_VFIO_NOIOMMU
1073 /*
1074 * Only noiommu containers can use vfio-noiommu and noiommu
1075 * containers can only use vfio-noiommu.
1076 */
1077 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1078 continue;
1079 #endif
1080
1081 if (!try_module_get(driver->ops->owner))
1082 continue;
1083
1084 /*
1085 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1086 * so test which iommu driver reported support for this
1087 * extension and call open on them. We also pass them the
1088 * magic, allowing a single driver to support multiple
1089 * interfaces if they'd like.
1090 */
1091 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1092 module_put(driver->ops->owner);
1093 continue;
1094 }
1095
1096 data = driver->ops->open(arg);
1097 if (IS_ERR(data)) {
1098 ret = PTR_ERR(data);
1099 module_put(driver->ops->owner);
1100 continue;
1101 }
1102
1103 ret = __vfio_container_attach_groups(container, driver, data);
1104 if (ret) {
1105 driver->ops->release(data);
1106 module_put(driver->ops->owner);
1107 continue;
1108 }
1109
1110 container->iommu_driver = driver;
1111 container->iommu_data = data;
1112 break;
1113 }
1114
1115 mutex_unlock(&vfio.iommu_drivers_lock);
1116 up_write(&container->group_lock);
1117
1118 return ret;
1119 }
1120
1121 static long vfio_fops_unl_ioctl(struct file *filep,
1122 unsigned int cmd, unsigned long arg)
1123 {
1124 struct vfio_container *container = filep->private_data;
1125 struct vfio_iommu_driver *driver;
1126 void *data;
1127 long ret = -EINVAL;
1128
1129 if (!container)
1130 return ret;
1131
1132 switch (cmd) {
1133 case VFIO_GET_API_VERSION:
1134 ret = VFIO_API_VERSION;
1135 break;
1136 case VFIO_CHECK_EXTENSION:
1137 ret = vfio_ioctl_check_extension(container, arg);
1138 break;
1139 case VFIO_SET_IOMMU:
1140 ret = vfio_ioctl_set_iommu(container, arg);
1141 break;
1142 default:
1143 down_read(&container->group_lock);
1144
1145 driver = container->iommu_driver;
1146 data = container->iommu_data;
1147
1148 if (driver) /* passthrough all unrecognized ioctls */
1149 ret = driver->ops->ioctl(data, cmd, arg);
1150
1151 up_read(&container->group_lock);
1152 }
1153
1154 return ret;
1155 }
1156
1157 #ifdef CONFIG_COMPAT
1158 static long vfio_fops_compat_ioctl(struct file *filep,
1159 unsigned int cmd, unsigned long arg)
1160 {
1161 arg = (unsigned long)compat_ptr(arg);
1162 return vfio_fops_unl_ioctl(filep, cmd, arg);
1163 }
1164 #endif /* CONFIG_COMPAT */
1165
1166 static int vfio_fops_open(struct inode *inode, struct file *filep)
1167 {
1168 struct vfio_container *container;
1169
1170 container = kzalloc(sizeof(*container), GFP_KERNEL);
1171 if (!container)
1172 return -ENOMEM;
1173
1174 INIT_LIST_HEAD(&container->group_list);
1175 init_rwsem(&container->group_lock);
1176 kref_init(&container->kref);
1177
1178 filep->private_data = container;
1179
1180 return 0;
1181 }
1182
1183 static int vfio_fops_release(struct inode *inode, struct file *filep)
1184 {
1185 struct vfio_container *container = filep->private_data;
1186
1187 filep->private_data = NULL;
1188
1189 vfio_container_put(container);
1190
1191 return 0;
1192 }
1193
1194 /*
1195 * Once an iommu driver is set, we optionally pass read/write/mmap
1196 * on to the driver, allowing management interfaces beyond ioctl.
1197 */
1198 static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1199 size_t count, loff_t *ppos)
1200 {
1201 struct vfio_container *container = filep->private_data;
1202 struct vfio_iommu_driver *driver;
1203 ssize_t ret = -EINVAL;
1204
1205 down_read(&container->group_lock);
1206
1207 driver = container->iommu_driver;
1208 if (likely(driver && driver->ops->read))
1209 ret = driver->ops->read(container->iommu_data,
1210 buf, count, ppos);
1211
1212 up_read(&container->group_lock);
1213
1214 return ret;
1215 }
1216
1217 static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1218 size_t count, loff_t *ppos)
1219 {
1220 struct vfio_container *container = filep->private_data;
1221 struct vfio_iommu_driver *driver;
1222 ssize_t ret = -EINVAL;
1223
1224 down_read(&container->group_lock);
1225
1226 driver = container->iommu_driver;
1227 if (likely(driver && driver->ops->write))
1228 ret = driver->ops->write(container->iommu_data,
1229 buf, count, ppos);
1230
1231 up_read(&container->group_lock);
1232
1233 return ret;
1234 }
1235
1236 static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1237 {
1238 struct vfio_container *container = filep->private_data;
1239 struct vfio_iommu_driver *driver;
1240 int ret = -EINVAL;
1241
1242 down_read(&container->group_lock);
1243
1244 driver = container->iommu_driver;
1245 if (likely(driver && driver->ops->mmap))
1246 ret = driver->ops->mmap(container->iommu_data, vma);
1247
1248 up_read(&container->group_lock);
1249
1250 return ret;
1251 }
1252
1253 static const struct file_operations vfio_fops = {
1254 .owner = THIS_MODULE,
1255 .open = vfio_fops_open,
1256 .release = vfio_fops_release,
1257 .read = vfio_fops_read,
1258 .write = vfio_fops_write,
1259 .unlocked_ioctl = vfio_fops_unl_ioctl,
1260 #ifdef CONFIG_COMPAT
1261 .compat_ioctl = vfio_fops_compat_ioctl,
1262 #endif
1263 .mmap = vfio_fops_mmap,
1264 };
1265
1266 /**
1267 * VFIO Group fd, /dev/vfio/$GROUP
1268 */
1269 static void __vfio_group_unset_container(struct vfio_group *group)
1270 {
1271 struct vfio_container *container = group->container;
1272 struct vfio_iommu_driver *driver;
1273
1274 down_write(&container->group_lock);
1275
1276 driver = container->iommu_driver;
1277 if (driver)
1278 driver->ops->detach_group(container->iommu_data,
1279 group->iommu_group);
1280
1281 group->container = NULL;
1282 list_del(&group->container_next);
1283
1284 /* Detaching the last group deprivileges a container, remove iommu */
1285 if (driver && list_empty(&container->group_list)) {
1286 driver->ops->release(container->iommu_data);
1287 module_put(driver->ops->owner);
1288 container->iommu_driver = NULL;
1289 container->iommu_data = NULL;
1290 }
1291
1292 up_write(&container->group_lock);
1293
1294 vfio_container_put(container);
1295 }
1296
1297 /*
1298 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1299 * if there was no container to unset. Since the ioctl is called on
1300 * the group, we know that still exists, therefore the only valid
1301 * transition here is 1->0.
1302 */
1303 static int vfio_group_unset_container(struct vfio_group *group)
1304 {
1305 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1306
1307 if (!users)
1308 return -EINVAL;
1309 if (users != 1)
1310 return -EBUSY;
1311
1312 __vfio_group_unset_container(group);
1313
1314 return 0;
1315 }
1316
1317 /*
1318 * When removing container users, anything that removes the last user
1319 * implicitly removes the group from the container. That is, if the
1320 * group file descriptor is closed, as well as any device file descriptors,
1321 * the group is free.
1322 */
1323 static void vfio_group_try_dissolve_container(struct vfio_group *group)
1324 {
1325 if (0 == atomic_dec_if_positive(&group->container_users))
1326 __vfio_group_unset_container(group);
1327 }
1328
1329 static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1330 {
1331 struct fd f;
1332 struct vfio_container *container;
1333 struct vfio_iommu_driver *driver;
1334 int ret = 0;
1335
1336 if (atomic_read(&group->container_users))
1337 return -EINVAL;
1338
1339 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1340 return -EPERM;
1341
1342 f = fdget(container_fd);
1343 if (!f.file)
1344 return -EBADF;
1345
1346 /* Sanity check, is this really our fd? */
1347 if (f.file->f_op != &vfio_fops) {
1348 fdput(f);
1349 return -EINVAL;
1350 }
1351
1352 container = f.file->private_data;
1353 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1354
1355 down_write(&container->group_lock);
1356
1357 /* Real groups and fake groups cannot mix */
1358 if (!list_empty(&container->group_list) &&
1359 container->noiommu != group->noiommu) {
1360 ret = -EPERM;
1361 goto unlock_out;
1362 }
1363
1364 driver = container->iommu_driver;
1365 if (driver) {
1366 ret = driver->ops->attach_group(container->iommu_data,
1367 group->iommu_group);
1368 if (ret)
1369 goto unlock_out;
1370 }
1371
1372 group->container = container;
1373 container->noiommu = group->noiommu;
1374 list_add(&group->container_next, &container->group_list);
1375
1376 /* Get a reference on the container and mark a user within the group */
1377 vfio_container_get(container);
1378 atomic_inc(&group->container_users);
1379
1380 unlock_out:
1381 up_write(&container->group_lock);
1382 fdput(f);
1383 return ret;
1384 }
1385
1386 static bool vfio_group_viable(struct vfio_group *group)
1387 {
1388 return (iommu_group_for_each_dev(group->iommu_group,
1389 group, vfio_dev_viable) == 0);
1390 }
1391
1392 static int vfio_group_add_container_user(struct vfio_group *group)
1393 {
1394 if (!atomic_inc_not_zero(&group->container_users))
1395 return -EINVAL;
1396
1397 if (group->noiommu) {
1398 atomic_dec(&group->container_users);
1399 return -EPERM;
1400 }
1401 if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1402 atomic_dec(&group->container_users);
1403 return -EINVAL;
1404 }
1405
1406 return 0;
1407 }
1408
1409 static const struct file_operations vfio_device_fops;
1410
1411 static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1412 {
1413 struct vfio_device *device;
1414 struct file *filep;
1415 int ret;
1416
1417 if (0 == atomic_read(&group->container_users) ||
1418 !group->container->iommu_driver || !vfio_group_viable(group))
1419 return -EINVAL;
1420
1421 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1422 return -EPERM;
1423
1424 device = vfio_device_get_from_name(group, buf);
1425 if (!device)
1426 return -ENODEV;
1427
1428 ret = device->ops->open(device->device_data);
1429 if (ret) {
1430 vfio_device_put(device);
1431 return ret;
1432 }
1433
1434 /*
1435 * We can't use anon_inode_getfd() because we need to modify
1436 * the f_mode flags directly to allow more than just ioctls
1437 */
1438 ret = get_unused_fd_flags(O_CLOEXEC);
1439 if (ret < 0) {
1440 device->ops->release(device->device_data);
1441 vfio_device_put(device);
1442 return ret;
1443 }
1444
1445 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1446 device, O_RDWR);
1447 if (IS_ERR(filep)) {
1448 put_unused_fd(ret);
1449 ret = PTR_ERR(filep);
1450 device->ops->release(device->device_data);
1451 vfio_device_put(device);
1452 return ret;
1453 }
1454
1455 /*
1456 * TODO: add an anon_inode interface to do this.
1457 * Appears to be missing by lack of need rather than
1458 * explicitly prevented. Now there's need.
1459 */
1460 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1461
1462 atomic_inc(&group->container_users);
1463
1464 fd_install(ret, filep);
1465
1466 if (group->noiommu)
1467 dev_warn(device->dev, "vfio-noiommu device opened by user "
1468 "(%s:%d)\n", current->comm, task_pid_nr(current));
1469
1470 return ret;
1471 }
1472
1473 static long vfio_group_fops_unl_ioctl(struct file *filep,
1474 unsigned int cmd, unsigned long arg)
1475 {
1476 struct vfio_group *group = filep->private_data;
1477 long ret = -ENOTTY;
1478
1479 switch (cmd) {
1480 case VFIO_GROUP_GET_STATUS:
1481 {
1482 struct vfio_group_status status;
1483 unsigned long minsz;
1484
1485 minsz = offsetofend(struct vfio_group_status, flags);
1486
1487 if (copy_from_user(&status, (void __user *)arg, minsz))
1488 return -EFAULT;
1489
1490 if (status.argsz < minsz)
1491 return -EINVAL;
1492
1493 status.flags = 0;
1494
1495 if (vfio_group_viable(group))
1496 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1497
1498 if (group->container)
1499 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1500
1501 if (copy_to_user((void __user *)arg, &status, minsz))
1502 return -EFAULT;
1503
1504 ret = 0;
1505 break;
1506 }
1507 case VFIO_GROUP_SET_CONTAINER:
1508 {
1509 int fd;
1510
1511 if (get_user(fd, (int __user *)arg))
1512 return -EFAULT;
1513
1514 if (fd < 0)
1515 return -EINVAL;
1516
1517 ret = vfio_group_set_container(group, fd);
1518 break;
1519 }
1520 case VFIO_GROUP_UNSET_CONTAINER:
1521 ret = vfio_group_unset_container(group);
1522 break;
1523 case VFIO_GROUP_GET_DEVICE_FD:
1524 {
1525 char *buf;
1526
1527 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1528 if (IS_ERR(buf))
1529 return PTR_ERR(buf);
1530
1531 ret = vfio_group_get_device_fd(group, buf);
1532 kfree(buf);
1533 break;
1534 }
1535 }
1536
1537 return ret;
1538 }
1539
1540 #ifdef CONFIG_COMPAT
1541 static long vfio_group_fops_compat_ioctl(struct file *filep,
1542 unsigned int cmd, unsigned long arg)
1543 {
1544 arg = (unsigned long)compat_ptr(arg);
1545 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1546 }
1547 #endif /* CONFIG_COMPAT */
1548
1549 static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1550 {
1551 struct vfio_group *group;
1552 int opened;
1553
1554 group = vfio_group_get_from_minor(iminor(inode));
1555 if (!group)
1556 return -ENODEV;
1557
1558 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1559 vfio_group_put(group);
1560 return -EPERM;
1561 }
1562
1563 /* Do we need multiple instances of the group open? Seems not. */
1564 opened = atomic_cmpxchg(&group->opened, 0, 1);
1565 if (opened) {
1566 vfio_group_put(group);
1567 return -EBUSY;
1568 }
1569
1570 /* Is something still in use from a previous open? */
1571 if (group->container) {
1572 atomic_dec(&group->opened);
1573 vfio_group_put(group);
1574 return -EBUSY;
1575 }
1576
1577 /* Warn if previous user didn't cleanup and re-init to drop them */
1578 if (WARN_ON(group->notifier.head))
1579 BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
1580
1581 filep->private_data = group;
1582
1583 return 0;
1584 }
1585
1586 static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1587 {
1588 struct vfio_group *group = filep->private_data;
1589
1590 filep->private_data = NULL;
1591
1592 vfio_group_try_dissolve_container(group);
1593
1594 atomic_dec(&group->opened);
1595
1596 vfio_group_put(group);
1597
1598 return 0;
1599 }
1600
1601 static const struct file_operations vfio_group_fops = {
1602 .owner = THIS_MODULE,
1603 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1604 #ifdef CONFIG_COMPAT
1605 .compat_ioctl = vfio_group_fops_compat_ioctl,
1606 #endif
1607 .open = vfio_group_fops_open,
1608 .release = vfio_group_fops_release,
1609 };
1610
1611 /**
1612 * VFIO Device fd
1613 */
1614 static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1615 {
1616 struct vfio_device *device = filep->private_data;
1617
1618 device->ops->release(device->device_data);
1619
1620 vfio_group_try_dissolve_container(device->group);
1621
1622 vfio_device_put(device);
1623
1624 return 0;
1625 }
1626
1627 static long vfio_device_fops_unl_ioctl(struct file *filep,
1628 unsigned int cmd, unsigned long arg)
1629 {
1630 struct vfio_device *device = filep->private_data;
1631
1632 if (unlikely(!device->ops->ioctl))
1633 return -EINVAL;
1634
1635 return device->ops->ioctl(device->device_data, cmd, arg);
1636 }
1637
1638 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1639 size_t count, loff_t *ppos)
1640 {
1641 struct vfio_device *device = filep->private_data;
1642
1643 if (unlikely(!device->ops->read))
1644 return -EINVAL;
1645
1646 return device->ops->read(device->device_data, buf, count, ppos);
1647 }
1648
1649 static ssize_t vfio_device_fops_write(struct file *filep,
1650 const char __user *buf,
1651 size_t count, loff_t *ppos)
1652 {
1653 struct vfio_device *device = filep->private_data;
1654
1655 if (unlikely(!device->ops->write))
1656 return -EINVAL;
1657
1658 return device->ops->write(device->device_data, buf, count, ppos);
1659 }
1660
1661 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1662 {
1663 struct vfio_device *device = filep->private_data;
1664
1665 if (unlikely(!device->ops->mmap))
1666 return -EINVAL;
1667
1668 return device->ops->mmap(device->device_data, vma);
1669 }
1670
1671 #ifdef CONFIG_COMPAT
1672 static long vfio_device_fops_compat_ioctl(struct file *filep,
1673 unsigned int cmd, unsigned long arg)
1674 {
1675 arg = (unsigned long)compat_ptr(arg);
1676 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1677 }
1678 #endif /* CONFIG_COMPAT */
1679
1680 static const struct file_operations vfio_device_fops = {
1681 .owner = THIS_MODULE,
1682 .release = vfio_device_fops_release,
1683 .read = vfio_device_fops_read,
1684 .write = vfio_device_fops_write,
1685 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1686 #ifdef CONFIG_COMPAT
1687 .compat_ioctl = vfio_device_fops_compat_ioctl,
1688 #endif
1689 .mmap = vfio_device_fops_mmap,
1690 };
1691
1692 /**
1693 * External user API, exported by symbols to be linked dynamically.
1694 *
1695 * The protocol includes:
1696 * 1. do normal VFIO init operation:
1697 * - opening a new container;
1698 * - attaching group(s) to it;
1699 * - setting an IOMMU driver for a container.
1700 * When IOMMU is set for a container, all groups in it are
1701 * considered ready to use by an external user.
1702 *
1703 * 2. User space passes a group fd to an external user.
1704 * The external user calls vfio_group_get_external_user()
1705 * to verify that:
1706 * - the group is initialized;
1707 * - IOMMU is set for it.
1708 * If both checks passed, vfio_group_get_external_user()
1709 * increments the container user counter to prevent
1710 * the VFIO group from disposal before KVM exits.
1711 *
1712 * 3. The external user calls vfio_external_user_iommu_id()
1713 * to know an IOMMU ID.
1714 *
1715 * 4. When the external KVM finishes, it calls
1716 * vfio_group_put_external_user() to release the VFIO group.
1717 * This call decrements the container user counter.
1718 */
1719 struct vfio_group *vfio_group_get_external_user(struct file *filep)
1720 {
1721 struct vfio_group *group = filep->private_data;
1722 int ret;
1723
1724 if (filep->f_op != &vfio_group_fops)
1725 return ERR_PTR(-EINVAL);
1726
1727 ret = vfio_group_add_container_user(group);
1728 if (ret)
1729 return ERR_PTR(ret);
1730
1731 vfio_group_get(group);
1732
1733 return group;
1734 }
1735 EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1736
1737 void vfio_group_put_external_user(struct vfio_group *group)
1738 {
1739 vfio_group_try_dissolve_container(group);
1740 vfio_group_put(group);
1741 }
1742 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1743
1744 int vfio_external_user_iommu_id(struct vfio_group *group)
1745 {
1746 return iommu_group_id(group->iommu_group);
1747 }
1748 EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1749
1750 long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1751 {
1752 return vfio_ioctl_check_extension(group->container, arg);
1753 }
1754 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1755
1756 /**
1757 * Sub-module support
1758 */
1759 /*
1760 * Helper for managing a buffer of info chain capabilities, allocate or
1761 * reallocate a buffer with additional @size, filling in @id and @version
1762 * of the capability. A pointer to the new capability is returned.
1763 *
1764 * NB. The chain is based at the head of the buffer, so new entries are
1765 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1766 * next offsets prior to copying to the user buffer.
1767 */
1768 struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1769 size_t size, u16 id, u16 version)
1770 {
1771 void *buf;
1772 struct vfio_info_cap_header *header, *tmp;
1773
1774 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1775 if (!buf) {
1776 kfree(caps->buf);
1777 caps->size = 0;
1778 return ERR_PTR(-ENOMEM);
1779 }
1780
1781 caps->buf = buf;
1782 header = buf + caps->size;
1783
1784 /* Eventually copied to user buffer, zero */
1785 memset(header, 0, size);
1786
1787 header->id = id;
1788 header->version = version;
1789
1790 /* Add to the end of the capability chain */
1791 for (tmp = buf; tmp->next; tmp = buf + tmp->next)
1792 ; /* nothing */
1793
1794 tmp->next = caps->size;
1795 caps->size += size;
1796
1797 return header;
1798 }
1799 EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1800
1801 void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1802 {
1803 struct vfio_info_cap_header *tmp;
1804 void *buf = (void *)caps->buf;
1805
1806 for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
1807 tmp->next += offset;
1808 }
1809 EXPORT_SYMBOL(vfio_info_cap_shift);
1810
1811 static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
1812 {
1813 struct vfio_info_cap_header *header;
1814 struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
1815 size_t size;
1816
1817 size = sizeof(*sparse) + sparse->nr_areas * sizeof(*sparse->areas);
1818 header = vfio_info_cap_add(caps, size,
1819 VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
1820 if (IS_ERR(header))
1821 return PTR_ERR(header);
1822
1823 sparse_cap = container_of(header,
1824 struct vfio_region_info_cap_sparse_mmap, header);
1825 sparse_cap->nr_areas = sparse->nr_areas;
1826 memcpy(sparse_cap->areas, sparse->areas,
1827 sparse->nr_areas * sizeof(*sparse->areas));
1828 return 0;
1829 }
1830
1831 static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
1832 {
1833 struct vfio_info_cap_header *header;
1834 struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
1835
1836 header = vfio_info_cap_add(caps, sizeof(*cap),
1837 VFIO_REGION_INFO_CAP_TYPE, 1);
1838 if (IS_ERR(header))
1839 return PTR_ERR(header);
1840
1841 type_cap = container_of(header, struct vfio_region_info_cap_type,
1842 header);
1843 type_cap->type = cap->type;
1844 type_cap->subtype = cap->subtype;
1845 return 0;
1846 }
1847
1848 int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
1849 void *cap_type)
1850 {
1851 int ret = -EINVAL;
1852
1853 if (!cap_type)
1854 return 0;
1855
1856 switch (cap_type_id) {
1857 case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1858 ret = sparse_mmap_cap(caps, cap_type);
1859 break;
1860
1861 case VFIO_REGION_INFO_CAP_TYPE:
1862 ret = region_type_cap(caps, cap_type);
1863 break;
1864 }
1865
1866 return ret;
1867 }
1868 EXPORT_SYMBOL(vfio_info_add_capability);
1869
1870 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
1871 int max_irq_type, size_t *data_size)
1872 {
1873 unsigned long minsz;
1874 size_t size;
1875
1876 minsz = offsetofend(struct vfio_irq_set, count);
1877
1878 if ((hdr->argsz < minsz) || (hdr->index >= max_irq_type) ||
1879 (hdr->count >= (U32_MAX - hdr->start)) ||
1880 (hdr->flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
1881 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
1882 return -EINVAL;
1883
1884 if (data_size)
1885 *data_size = 0;
1886
1887 if (hdr->start >= num_irqs || hdr->start + hdr->count > num_irqs)
1888 return -EINVAL;
1889
1890 switch (hdr->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
1891 case VFIO_IRQ_SET_DATA_NONE:
1892 size = 0;
1893 break;
1894 case VFIO_IRQ_SET_DATA_BOOL:
1895 size = sizeof(uint8_t);
1896 break;
1897 case VFIO_IRQ_SET_DATA_EVENTFD:
1898 size = sizeof(int32_t);
1899 break;
1900 default:
1901 return -EINVAL;
1902 }
1903
1904 if (size) {
1905 if (hdr->argsz - minsz < hdr->count * size)
1906 return -EINVAL;
1907
1908 if (!data_size)
1909 return -EINVAL;
1910
1911 *data_size = hdr->count * size;
1912 }
1913
1914 return 0;
1915 }
1916 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
1917
1918 /*
1919 * Pin a set of guest PFNs and return their associated host PFNs for local
1920 * domain only.
1921 * @dev [in] : device
1922 * @user_pfn [in]: array of user/guest PFNs to be unpinned.
1923 * @npage [in] : count of elements in user_pfn array. This count should not
1924 * be greater VFIO_PIN_PAGES_MAX_ENTRIES.
1925 * @prot [in] : protection flags
1926 * @phys_pfn[out]: array of host PFNs
1927 * Return error or number of pages pinned.
1928 */
1929 int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1930 int prot, unsigned long *phys_pfn)
1931 {
1932 struct vfio_container *container;
1933 struct vfio_group *group;
1934 struct vfio_iommu_driver *driver;
1935 int ret;
1936
1937 if (!dev || !user_pfn || !phys_pfn || !npage)
1938 return -EINVAL;
1939
1940 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1941 return -E2BIG;
1942
1943 group = vfio_group_get_from_dev(dev);
1944 if (!group)
1945 return -ENODEV;
1946
1947 ret = vfio_group_add_container_user(group);
1948 if (ret)
1949 goto err_pin_pages;
1950
1951 container = group->container;
1952 down_read(&container->group_lock);
1953
1954 driver = container->iommu_driver;
1955 if (likely(driver && driver->ops->pin_pages))
1956 ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
1957 npage, prot, phys_pfn);
1958 else
1959 ret = -ENOTTY;
1960
1961 up_read(&container->group_lock);
1962 vfio_group_try_dissolve_container(group);
1963
1964 err_pin_pages:
1965 vfio_group_put(group);
1966 return ret;
1967 }
1968 EXPORT_SYMBOL(vfio_pin_pages);
1969
1970 /*
1971 * Unpin set of host PFNs for local domain only.
1972 * @dev [in] : device
1973 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
1974 * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1975 * @npage [in] : count of elements in user_pfn array. This count should not
1976 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1977 * Return error or number of pages unpinned.
1978 */
1979 int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1980 {
1981 struct vfio_container *container;
1982 struct vfio_group *group;
1983 struct vfio_iommu_driver *driver;
1984 int ret;
1985
1986 if (!dev || !user_pfn || !npage)
1987 return -EINVAL;
1988
1989 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1990 return -E2BIG;
1991
1992 group = vfio_group_get_from_dev(dev);
1993 if (!group)
1994 return -ENODEV;
1995
1996 ret = vfio_group_add_container_user(group);
1997 if (ret)
1998 goto err_unpin_pages;
1999
2000 container = group->container;
2001 down_read(&container->group_lock);
2002
2003 driver = container->iommu_driver;
2004 if (likely(driver && driver->ops->unpin_pages))
2005 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
2006 npage);
2007 else
2008 ret = -ENOTTY;
2009
2010 up_read(&container->group_lock);
2011 vfio_group_try_dissolve_container(group);
2012
2013 err_unpin_pages:
2014 vfio_group_put(group);
2015 return ret;
2016 }
2017 EXPORT_SYMBOL(vfio_unpin_pages);
2018
2019 static int vfio_register_iommu_notifier(struct vfio_group *group,
2020 unsigned long *events,
2021 struct notifier_block *nb)
2022 {
2023 struct vfio_container *container;
2024 struct vfio_iommu_driver *driver;
2025 int ret;
2026
2027 ret = vfio_group_add_container_user(group);
2028 if (ret)
2029 return -EINVAL;
2030
2031 container = group->container;
2032 down_read(&container->group_lock);
2033
2034 driver = container->iommu_driver;
2035 if (likely(driver && driver->ops->register_notifier))
2036 ret = driver->ops->register_notifier(container->iommu_data,
2037 events, nb);
2038 else
2039 ret = -ENOTTY;
2040
2041 up_read(&container->group_lock);
2042 vfio_group_try_dissolve_container(group);
2043
2044 return ret;
2045 }
2046
2047 static int vfio_unregister_iommu_notifier(struct vfio_group *group,
2048 struct notifier_block *nb)
2049 {
2050 struct vfio_container *container;
2051 struct vfio_iommu_driver *driver;
2052 int ret;
2053
2054 ret = vfio_group_add_container_user(group);
2055 if (ret)
2056 return -EINVAL;
2057
2058 container = group->container;
2059 down_read(&container->group_lock);
2060
2061 driver = container->iommu_driver;
2062 if (likely(driver && driver->ops->unregister_notifier))
2063 ret = driver->ops->unregister_notifier(container->iommu_data,
2064 nb);
2065 else
2066 ret = -ENOTTY;
2067
2068 up_read(&container->group_lock);
2069 vfio_group_try_dissolve_container(group);
2070
2071 return ret;
2072 }
2073
2074 void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm)
2075 {
2076 group->kvm = kvm;
2077 blocking_notifier_call_chain(&group->notifier,
2078 VFIO_GROUP_NOTIFY_SET_KVM, kvm);
2079 }
2080 EXPORT_SYMBOL_GPL(vfio_group_set_kvm);
2081
2082 static int vfio_register_group_notifier(struct vfio_group *group,
2083 unsigned long *events,
2084 struct notifier_block *nb)
2085 {
2086 struct vfio_container *container;
2087 int ret;
2088 bool set_kvm = false;
2089
2090 if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
2091 set_kvm = true;
2092
2093 /* clear known events */
2094 *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
2095
2096 /* refuse to continue if still events remaining */
2097 if (*events)
2098 return -EINVAL;
2099
2100 ret = vfio_group_add_container_user(group);
2101 if (ret)
2102 return -EINVAL;
2103
2104 container = group->container;
2105 down_read(&container->group_lock);
2106
2107 ret = blocking_notifier_chain_register(&group->notifier, nb);
2108
2109 /*
2110 * The attaching of kvm and vfio_group might already happen, so
2111 * here we replay once upon registration.
2112 */
2113 if (!ret && set_kvm && group->kvm)
2114 blocking_notifier_call_chain(&group->notifier,
2115 VFIO_GROUP_NOTIFY_SET_KVM, group->kvm);
2116
2117 up_read(&container->group_lock);
2118 vfio_group_try_dissolve_container(group);
2119
2120 return ret;
2121 }
2122
2123 static int vfio_unregister_group_notifier(struct vfio_group *group,
2124 struct notifier_block *nb)
2125 {
2126 struct vfio_container *container;
2127 int ret;
2128
2129 ret = vfio_group_add_container_user(group);
2130 if (ret)
2131 return -EINVAL;
2132
2133 container = group->container;
2134 down_read(&container->group_lock);
2135
2136 ret = blocking_notifier_chain_unregister(&group->notifier, nb);
2137
2138 up_read(&container->group_lock);
2139 vfio_group_try_dissolve_container(group);
2140
2141 return ret;
2142 }
2143
2144 int vfio_register_notifier(struct device *dev, enum vfio_notify_type type,
2145 unsigned long *events, struct notifier_block *nb)
2146 {
2147 struct vfio_group *group;
2148 int ret;
2149
2150 if (!dev || !nb || !events || (*events == 0))
2151 return -EINVAL;
2152
2153 group = vfio_group_get_from_dev(dev);
2154 if (!group)
2155 return -ENODEV;
2156
2157 switch (type) {
2158 case VFIO_IOMMU_NOTIFY:
2159 ret = vfio_register_iommu_notifier(group, events, nb);
2160 break;
2161 case VFIO_GROUP_NOTIFY:
2162 ret = vfio_register_group_notifier(group, events, nb);
2163 break;
2164 default:
2165 ret = -EINVAL;
2166 }
2167
2168 vfio_group_put(group);
2169 return ret;
2170 }
2171 EXPORT_SYMBOL(vfio_register_notifier);
2172
2173 int vfio_unregister_notifier(struct device *dev, enum vfio_notify_type type,
2174 struct notifier_block *nb)
2175 {
2176 struct vfio_group *group;
2177 int ret;
2178
2179 if (!dev || !nb)
2180 return -EINVAL;
2181
2182 group = vfio_group_get_from_dev(dev);
2183 if (!group)
2184 return -ENODEV;
2185
2186 switch (type) {
2187 case VFIO_IOMMU_NOTIFY:
2188 ret = vfio_unregister_iommu_notifier(group, nb);
2189 break;
2190 case VFIO_GROUP_NOTIFY:
2191 ret = vfio_unregister_group_notifier(group, nb);
2192 break;
2193 default:
2194 ret = -EINVAL;
2195 }
2196
2197 vfio_group_put(group);
2198 return ret;
2199 }
2200 EXPORT_SYMBOL(vfio_unregister_notifier);
2201
2202 /**
2203 * Module/class support
2204 */
2205 static char *vfio_devnode(struct device *dev, umode_t *mode)
2206 {
2207 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
2208 }
2209
2210 static struct miscdevice vfio_dev = {
2211 .minor = VFIO_MINOR,
2212 .name = "vfio",
2213 .fops = &vfio_fops,
2214 .nodename = "vfio/vfio",
2215 .mode = S_IRUGO | S_IWUGO,
2216 };
2217
2218 static int __init vfio_init(void)
2219 {
2220 int ret;
2221
2222 idr_init(&vfio.group_idr);
2223 mutex_init(&vfio.group_lock);
2224 mutex_init(&vfio.iommu_drivers_lock);
2225 INIT_LIST_HEAD(&vfio.group_list);
2226 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
2227 init_waitqueue_head(&vfio.release_q);
2228
2229 ret = misc_register(&vfio_dev);
2230 if (ret) {
2231 pr_err("vfio: misc device register failed\n");
2232 return ret;
2233 }
2234
2235 /* /dev/vfio/$GROUP */
2236 vfio.class = class_create(THIS_MODULE, "vfio");
2237 if (IS_ERR(vfio.class)) {
2238 ret = PTR_ERR(vfio.class);
2239 goto err_class;
2240 }
2241
2242 vfio.class->devnode = vfio_devnode;
2243
2244 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
2245 if (ret)
2246 goto err_alloc_chrdev;
2247
2248 cdev_init(&vfio.group_cdev, &vfio_group_fops);
2249 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
2250 if (ret)
2251 goto err_cdev_add;
2252
2253 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
2254
2255 /*
2256 * Attempt to load known iommu-drivers. This gives us a working
2257 * environment without the user needing to explicitly load iommu
2258 * drivers.
2259 */
2260 request_module_nowait("vfio_iommu_type1");
2261 request_module_nowait("vfio_iommu_spapr_tce");
2262
2263 #ifdef CONFIG_VFIO_NOIOMMU
2264 vfio_register_iommu_driver(&vfio_noiommu_ops);
2265 #endif
2266 return 0;
2267
2268 err_cdev_add:
2269 unregister_chrdev_region(vfio.group_devt, MINORMASK);
2270 err_alloc_chrdev:
2271 class_destroy(vfio.class);
2272 vfio.class = NULL;
2273 err_class:
2274 misc_deregister(&vfio_dev);
2275 return ret;
2276 }
2277
2278 static void __exit vfio_cleanup(void)
2279 {
2280 WARN_ON(!list_empty(&vfio.group_list));
2281
2282 #ifdef CONFIG_VFIO_NOIOMMU
2283 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
2284 #endif
2285 idr_destroy(&vfio.group_idr);
2286 cdev_del(&vfio.group_cdev);
2287 unregister_chrdev_region(vfio.group_devt, MINORMASK);
2288 class_destroy(vfio.class);
2289 vfio.class = NULL;
2290 misc_deregister(&vfio_dev);
2291 }
2292
2293 module_init(vfio_init);
2294 module_exit(vfio_cleanup);
2295
2296 MODULE_VERSION(DRIVER_VERSION);
2297 MODULE_LICENSE("GPL v2");
2298 MODULE_AUTHOR(DRIVER_AUTHOR);
2299 MODULE_DESCRIPTION(DRIVER_DESC);
2300 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2301 MODULE_ALIAS("devname:vfio/vfio");