]> git.proxmox.com Git - mirror_qemu.git/blob - hw/vfio/platform.c
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[mirror_qemu.git] / hw / vfio / platform.c
1 /*
2 * vfio based device assignment support - platform devices
3 *
4 * Copyright Linaro Limited, 2014
5 *
6 * Authors:
7 * Kim Phillips <kim.phillips@linaro.org>
8 * Eric Auger <eric.auger@linaro.org>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
12 *
13 * Based on vfio based PCI device assignment support:
14 * Copyright Red Hat, Inc. 2012
15 */
16
17 #include <linux/vfio.h>
18 #include <sys/ioctl.h>
19
20 #include "hw/vfio/vfio-platform.h"
21 #include "qemu/error-report.h"
22 #include "qemu/range.h"
23 #include "sysemu/sysemu.h"
24 #include "exec/memory.h"
25 #include "qemu/queue.h"
26 #include "hw/sysbus.h"
27 #include "trace.h"
28 #include "hw/platform-bus.h"
29
30 /*
31 * Functions used whatever the injection method
32 */
33
34 /**
35 * vfio_init_intp - allocate, initialize the IRQ struct pointer
36 * and add it into the list of IRQs
37 * @vbasedev: the VFIO device handle
38 * @info: irq info struct retrieved from VFIO driver
39 */
40 static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
41 struct vfio_irq_info info)
42 {
43 int ret;
44 VFIOPlatformDevice *vdev =
45 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
46 SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
47 VFIOINTp *intp;
48
49 intp = g_malloc0(sizeof(*intp));
50 intp->vdev = vdev;
51 intp->pin = info.index;
52 intp->flags = info.flags;
53 intp->state = VFIO_IRQ_INACTIVE;
54
55 sysbus_init_irq(sbdev, &intp->qemuirq);
56
57 /* Get an eventfd for trigger */
58 ret = event_notifier_init(&intp->interrupt, 0);
59 if (ret) {
60 g_free(intp);
61 error_report("vfio: Error: trigger event_notifier_init failed ");
62 return NULL;
63 }
64
65 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
66 return intp;
67 }
68
69 /**
70 * vfio_set_trigger_eventfd - set VFIO eventfd handling
71 *
72 * @intp: IRQ struct handle
73 * @handler: handler to be called on eventfd signaling
74 *
75 * Setup VFIO signaling and attach an optional user-side handler
76 * to the eventfd
77 */
78 static int vfio_set_trigger_eventfd(VFIOINTp *intp,
79 eventfd_user_side_handler_t handler)
80 {
81 VFIODevice *vbasedev = &intp->vdev->vbasedev;
82 struct vfio_irq_set *irq_set;
83 int argsz, ret;
84 int32_t *pfd;
85
86 argsz = sizeof(*irq_set) + sizeof(*pfd);
87 irq_set = g_malloc0(argsz);
88 irq_set->argsz = argsz;
89 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
90 irq_set->index = intp->pin;
91 irq_set->start = 0;
92 irq_set->count = 1;
93 pfd = (int32_t *)&irq_set->data;
94 *pfd = event_notifier_get_fd(&intp->interrupt);
95 qemu_set_fd_handler(*pfd, (IOHandler *)handler, NULL, intp);
96 ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
97 g_free(irq_set);
98 if (ret < 0) {
99 error_report("vfio: Failed to set trigger eventfd: %m");
100 qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
101 }
102 return ret;
103 }
104
105 /*
106 * Functions only used when eventfds are handled on user-side
107 * ie. without irqfd
108 */
109
110 /**
111 * vfio_mmap_set_enabled - enable/disable the fast path mode
112 * @vdev: the VFIO platform device
113 * @enabled: the target mmap state
114 *
115 * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
116 * enabled = false ~ slow path = MMIO region is trapped and region callbacks
117 * are called; slow path enables to trap the device IRQ status register reset
118 */
119
120 static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
121 {
122 int i;
123
124 trace_vfio_platform_mmap_set_enabled(enabled);
125
126 for (i = 0; i < vdev->vbasedev.num_regions; i++) {
127 VFIORegion *region = vdev->regions[i];
128
129 memory_region_set_enabled(&region->mmap_mem, enabled);
130 }
131 }
132
133 /**
134 * vfio_intp_mmap_enable - timer function, restores the fast path
135 * if there is no more active IRQ
136 * @opaque: actually points to the VFIO platform device
137 *
138 * Called on mmap timer timout, this function checks whether the
139 * IRQ is still active and if not, restores the fast path.
140 * by construction a single eventfd is handled at a time.
141 * if the IRQ is still active, the timer is re-programmed.
142 */
143 static void vfio_intp_mmap_enable(void *opaque)
144 {
145 VFIOINTp *tmp;
146 VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
147
148 qemu_mutex_lock(&vdev->intp_mutex);
149 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
150 if (tmp->state == VFIO_IRQ_ACTIVE) {
151 trace_vfio_platform_intp_mmap_enable(tmp->pin);
152 /* re-program the timer to check active status later */
153 timer_mod(vdev->mmap_timer,
154 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
155 vdev->mmap_timeout);
156 qemu_mutex_unlock(&vdev->intp_mutex);
157 return;
158 }
159 }
160 vfio_mmap_set_enabled(vdev, true);
161 qemu_mutex_unlock(&vdev->intp_mutex);
162 }
163
164 /**
165 * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
166 * @opaque: opaque pointer, in practice the VFIOINTp handle
167 *
168 * The function is called on a previous IRQ completion, from
169 * vfio_platform_eoi, while the intp_mutex is locked.
170 * Also in such situation, the slow path already is set and
171 * the mmap timer was already programmed.
172 */
173 static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
174 {
175 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
176 event_notifier_get_fd(&intp->interrupt));
177
178 intp->state = VFIO_IRQ_ACTIVE;
179
180 /* trigger the virtual IRQ */
181 qemu_set_irq(intp->qemuirq, 1);
182 }
183
184 /**
185 * vfio_intp_interrupt - The user-side eventfd handler
186 * @opaque: opaque pointer which in practice is the VFIOINTp handle
187 *
188 * the function is entered in event handler context:
189 * the vIRQ is injected into the guest if there is no other active
190 * or pending IRQ.
191 */
192 static void vfio_intp_interrupt(VFIOINTp *intp)
193 {
194 int ret;
195 VFIOINTp *tmp;
196 VFIOPlatformDevice *vdev = intp->vdev;
197 bool delay_handling = false;
198
199 qemu_mutex_lock(&vdev->intp_mutex);
200 if (intp->state == VFIO_IRQ_INACTIVE) {
201 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
202 if (tmp->state == VFIO_IRQ_ACTIVE ||
203 tmp->state == VFIO_IRQ_PENDING) {
204 delay_handling = true;
205 break;
206 }
207 }
208 }
209 if (delay_handling) {
210 /*
211 * the new IRQ gets a pending status and is pushed in
212 * the pending queue
213 */
214 intp->state = VFIO_IRQ_PENDING;
215 trace_vfio_intp_interrupt_set_pending(intp->pin);
216 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
217 intp, pqnext);
218 ret = event_notifier_test_and_clear(&intp->interrupt);
219 qemu_mutex_unlock(&vdev->intp_mutex);
220 return;
221 }
222
223 trace_vfio_platform_intp_interrupt(intp->pin,
224 event_notifier_get_fd(&intp->interrupt));
225
226 ret = event_notifier_test_and_clear(&intp->interrupt);
227 if (!ret) {
228 error_report("Error when clearing fd=%d (ret = %d)\n",
229 event_notifier_get_fd(&intp->interrupt), ret);
230 }
231
232 intp->state = VFIO_IRQ_ACTIVE;
233
234 /* sets slow path */
235 vfio_mmap_set_enabled(vdev, false);
236
237 /* trigger the virtual IRQ */
238 qemu_set_irq(intp->qemuirq, 1);
239
240 /*
241 * Schedule the mmap timer which will restore fastpath when no IRQ
242 * is active anymore
243 */
244 if (vdev->mmap_timeout) {
245 timer_mod(vdev->mmap_timer,
246 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
247 vdev->mmap_timeout);
248 }
249 qemu_mutex_unlock(&vdev->intp_mutex);
250 }
251
252 /**
253 * vfio_platform_eoi - IRQ completion routine
254 * @vbasedev: the VFIO device handle
255 *
256 * De-asserts the active virtual IRQ and unmasks the physical IRQ
257 * (effective for level sensitive IRQ auto-masked by the VFIO driver).
258 * Then it handles next pending IRQ if any.
259 * eoi function is called on the first access to any MMIO region
260 * after an IRQ was triggered, trapped since slow path was set.
261 * It is assumed this access corresponds to the IRQ status
262 * register reset. With such a mechanism, a single IRQ can be
263 * handled at a time since there is no way to know which IRQ
264 * was completed by the guest (we would need additional details
265 * about the IRQ status register mask).
266 */
267 static void vfio_platform_eoi(VFIODevice *vbasedev)
268 {
269 VFIOINTp *intp;
270 VFIOPlatformDevice *vdev =
271 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
272
273 qemu_mutex_lock(&vdev->intp_mutex);
274 QLIST_FOREACH(intp, &vdev->intp_list, next) {
275 if (intp->state == VFIO_IRQ_ACTIVE) {
276 trace_vfio_platform_eoi(intp->pin,
277 event_notifier_get_fd(&intp->interrupt));
278 intp->state = VFIO_IRQ_INACTIVE;
279
280 /* deassert the virtual IRQ */
281 qemu_set_irq(intp->qemuirq, 0);
282
283 if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
284 /* unmasks the physical level-sensitive IRQ */
285 vfio_unmask_single_irqindex(vbasedev, intp->pin);
286 }
287
288 /* a single IRQ can be active at a time */
289 break;
290 }
291 }
292 /* in case there are pending IRQs, handle the first one */
293 if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
294 intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
295 vfio_intp_inject_pending_lockheld(intp);
296 QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
297 }
298 qemu_mutex_unlock(&vdev->intp_mutex);
299 }
300
301 /**
302 * vfio_start_eventfd_injection - starts the virtual IRQ injection using
303 * user-side handled eventfds
304 * @intp: the IRQ struct pointer
305 */
306
307 static int vfio_start_eventfd_injection(VFIOINTp *intp)
308 {
309 int ret;
310
311 ret = vfio_set_trigger_eventfd(intp, vfio_intp_interrupt);
312 if (ret) {
313 error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
314 }
315 return ret;
316 }
317
318 /* VFIO skeleton */
319
320 static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
321 {
322 vbasedev->needs_reset = true;
323 }
324
325 /* not implemented yet */
326 static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
327 {
328 return -1;
329 }
330
331 /**
332 * vfio_populate_device - Allocate and populate MMIO region
333 * and IRQ structs according to driver returned information
334 * @vbasedev: the VFIO device handle
335 *
336 */
337 static int vfio_populate_device(VFIODevice *vbasedev)
338 {
339 VFIOINTp *intp, *tmp;
340 int i, ret = -1;
341 VFIOPlatformDevice *vdev =
342 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
343
344 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
345 error_report("vfio: Um, this isn't a platform device");
346 return ret;
347 }
348
349 vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
350
351 for (i = 0; i < vbasedev->num_regions; i++) {
352 struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
353 VFIORegion *ptr;
354
355 vdev->regions[i] = g_malloc0(sizeof(VFIORegion));
356 ptr = vdev->regions[i];
357 reg_info.index = i;
358 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
359 if (ret) {
360 error_report("vfio: Error getting region %d info: %m", i);
361 goto reg_error;
362 }
363 ptr->flags = reg_info.flags;
364 ptr->size = reg_info.size;
365 ptr->fd_offset = reg_info.offset;
366 ptr->nr = i;
367 ptr->vbasedev = vbasedev;
368
369 trace_vfio_platform_populate_regions(ptr->nr,
370 (unsigned long)ptr->flags,
371 (unsigned long)ptr->size,
372 ptr->vbasedev->fd,
373 (unsigned long)ptr->fd_offset);
374 }
375
376 vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
377 vfio_intp_mmap_enable, vdev);
378
379 QSIMPLEQ_INIT(&vdev->pending_intp_queue);
380
381 for (i = 0; i < vbasedev->num_irqs; i++) {
382 struct vfio_irq_info irq = { .argsz = sizeof(irq) };
383
384 irq.index = i;
385 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
386 if (ret) {
387 error_printf("vfio: error getting device %s irq info",
388 vbasedev->name);
389 goto irq_err;
390 } else {
391 trace_vfio_platform_populate_interrupts(irq.index,
392 irq.count,
393 irq.flags);
394 intp = vfio_init_intp(vbasedev, irq);
395 if (!intp) {
396 error_report("vfio: Error installing IRQ %d up", i);
397 goto irq_err;
398 }
399 }
400 }
401 return 0;
402 irq_err:
403 timer_del(vdev->mmap_timer);
404 QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
405 QLIST_REMOVE(intp, next);
406 g_free(intp);
407 }
408 reg_error:
409 for (i = 0; i < vbasedev->num_regions; i++) {
410 g_free(vdev->regions[i]);
411 }
412 g_free(vdev->regions);
413 return ret;
414 }
415
416 /* specialized functions for VFIO Platform devices */
417 static VFIODeviceOps vfio_platform_ops = {
418 .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
419 .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
420 .vfio_eoi = vfio_platform_eoi,
421 };
422
423 /**
424 * vfio_base_device_init - perform preliminary VFIO setup
425 * @vbasedev: the VFIO device handle
426 *
427 * Implement the VFIO command sequence that allows to discover
428 * assigned device resources: group extraction, device
429 * fd retrieval, resource query.
430 * Precondition: the device name must be initialized
431 */
432 static int vfio_base_device_init(VFIODevice *vbasedev)
433 {
434 VFIOGroup *group;
435 VFIODevice *vbasedev_iter;
436 char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
437 ssize_t len;
438 struct stat st;
439 int groupid;
440 int ret;
441
442 /* name must be set prior to the call */
443 if (!vbasedev->name || strchr(vbasedev->name, '/')) {
444 return -EINVAL;
445 }
446
447 /* Check that the host device exists */
448 g_snprintf(path, sizeof(path), "/sys/bus/platform/devices/%s/",
449 vbasedev->name);
450
451 if (stat(path, &st) < 0) {
452 error_report("vfio: error: no such host device: %s", path);
453 return -errno;
454 }
455
456 g_strlcat(path, "iommu_group", sizeof(path));
457 len = readlink(path, iommu_group_path, sizeof(iommu_group_path));
458 if (len < 0 || len >= sizeof(iommu_group_path)) {
459 error_report("vfio: error no iommu_group for device");
460 return len < 0 ? -errno : -ENAMETOOLONG;
461 }
462
463 iommu_group_path[len] = 0;
464 group_name = basename(iommu_group_path);
465
466 if (sscanf(group_name, "%d", &groupid) != 1) {
467 error_report("vfio: error reading %s: %m", path);
468 return -errno;
469 }
470
471 trace_vfio_platform_base_device_init(vbasedev->name, groupid);
472
473 group = vfio_get_group(groupid, &address_space_memory);
474 if (!group) {
475 error_report("vfio: failed to get group %d", groupid);
476 return -ENOENT;
477 }
478
479 g_snprintf(path, sizeof(path), "%s", vbasedev->name);
480
481 QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
482 if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
483 error_report("vfio: error: device %s is already attached", path);
484 vfio_put_group(group);
485 return -EBUSY;
486 }
487 }
488 ret = vfio_get_device(group, path, vbasedev);
489 if (ret) {
490 error_report("vfio: failed to get device %s", path);
491 vfio_put_group(group);
492 return ret;
493 }
494
495 ret = vfio_populate_device(vbasedev);
496 if (ret) {
497 error_report("vfio: failed to populate device %s", path);
498 vfio_put_group(group);
499 }
500
501 return ret;
502 }
503
504 /**
505 * vfio_map_region - initialize the 2 memory regions for a given
506 * MMIO region index
507 * @vdev: the VFIO platform device handle
508 * @nr: the index of the region
509 *
510 * Init the top memory region and the mmapped memory region beneath
511 * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
512 * and could not be passed to memory region functions
513 */
514 static void vfio_map_region(VFIOPlatformDevice *vdev, int nr)
515 {
516 VFIORegion *region = vdev->regions[nr];
517 uint64_t size = region->size;
518 char name[64];
519
520 if (!size) {
521 return;
522 }
523
524 g_snprintf(name, sizeof(name), "VFIO %s region %d",
525 vdev->vbasedev.name, nr);
526
527 /* A "slow" read/write mapping underlies all regions */
528 memory_region_init_io(&region->mem, OBJECT(vdev), &vfio_region_ops,
529 region, name, size);
530
531 g_strlcat(name, " mmap", sizeof(name));
532
533 if (vfio_mmap_region(OBJECT(vdev), region, &region->mem,
534 &region->mmap_mem, &region->mmap, size, 0, name)) {
535 error_report("%s unsupported. Performance may be slow", name);
536 }
537 }
538
539 /**
540 * vfio_platform_realize - the device realize function
541 * @dev: device state pointer
542 * @errp: error
543 *
544 * initialize the device, its memory regions and IRQ structures
545 * IRQ are started separately
546 */
547 static void vfio_platform_realize(DeviceState *dev, Error **errp)
548 {
549 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
550 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
551 VFIODevice *vbasedev = &vdev->vbasedev;
552 VFIOINTp *intp;
553 int i, ret;
554
555 vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
556 vbasedev->ops = &vfio_platform_ops;
557
558 trace_vfio_platform_realize(vbasedev->name, vdev->compat);
559
560 ret = vfio_base_device_init(vbasedev);
561 if (ret) {
562 error_setg(errp, "vfio: vfio_base_device_init failed for %s",
563 vbasedev->name);
564 return;
565 }
566
567 for (i = 0; i < vbasedev->num_regions; i++) {
568 vfio_map_region(vdev, i);
569 sysbus_init_mmio(sbdev, &vdev->regions[i]->mem);
570 }
571
572 QLIST_FOREACH(intp, &vdev->intp_list, next) {
573 vfio_start_eventfd_injection(intp);
574 }
575 }
576
577 static const VMStateDescription vfio_platform_vmstate = {
578 .name = TYPE_VFIO_PLATFORM,
579 .unmigratable = 1,
580 };
581
582 static Property vfio_platform_dev_properties[] = {
583 DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
584 DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true),
585 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
586 mmap_timeout, 1100),
587 DEFINE_PROP_END_OF_LIST(),
588 };
589
590 static void vfio_platform_class_init(ObjectClass *klass, void *data)
591 {
592 DeviceClass *dc = DEVICE_CLASS(klass);
593
594 dc->realize = vfio_platform_realize;
595 dc->props = vfio_platform_dev_properties;
596 dc->vmsd = &vfio_platform_vmstate;
597 dc->desc = "VFIO-based platform device assignment";
598 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
599 }
600
601 static const TypeInfo vfio_platform_dev_info = {
602 .name = TYPE_VFIO_PLATFORM,
603 .parent = TYPE_SYS_BUS_DEVICE,
604 .instance_size = sizeof(VFIOPlatformDevice),
605 .class_init = vfio_platform_class_init,
606 .class_size = sizeof(VFIOPlatformDeviceClass),
607 .abstract = true,
608 };
609
610 static void register_vfio_platform_dev_type(void)
611 {
612 type_register_static(&vfio_platform_dev_info);
613 }
614
615 type_init(register_vfio_platform_dev_type)