]> git.proxmox.com Git - mirror_qemu.git/blame - hw/vfio/ccw.c
vfio/ccw/pci: Allow devices to opt-in for ballooning
[mirror_qemu.git] / hw / vfio / ccw.c
CommitLineData
1dcac3e1
XFR
1/*
2 * vfio based subchannel assignment support
3 *
4 * Copyright 2017 IBM Corp.
5 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
6 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
7 * Pierre Morel <pmorel@linux.vnet.ibm.com>
8 *
08b824aa
CH
9 * This work is licensed under the terms of the GNU GPL, version 2 or (at
10 * your option) any later version. See the COPYING file in the top-level
1dcac3e1
XFR
11 * directory.
12 */
13
e9808d09 14#include "qemu/osdep.h"
1dcac3e1 15#include <linux/vfio.h>
c14e706c 16#include <linux/vfio_ccw.h>
1dcac3e1
XFR
17#include <sys/ioctl.h>
18
1dcac3e1
XFR
19#include "qapi/error.h"
20#include "hw/sysbus.h"
21#include "hw/vfio/vfio.h"
22#include "hw/vfio/vfio-common.h"
23#include "hw/s390x/s390-ccw.h"
24#include "hw/s390x/ccw-device.h"
d791937f 25#include "exec/address-spaces.h"
4886b3e9 26#include "qemu/error-report.h"
1dcac3e1
XFR
27
28#define TYPE_VFIO_CCW "vfio-ccw"
29typedef struct VFIOCCWDevice {
30 S390CCWDevice cdev;
31 VFIODevice vdev;
c14e706c
DJS
32 uint64_t io_region_size;
33 uint64_t io_region_offset;
34 struct ccw_io_region *io_region;
4886b3e9 35 EventNotifier io_notifier;
9a51c9ee
HP
36 bool force_orb_pfch;
37 bool warned_orb_pfch;
1dcac3e1
XFR
38} VFIOCCWDevice;
39
9a51c9ee
HP
40static inline void warn_once(bool *warned, const char *fmt, ...)
41{
42 va_list ap;
43
44 if (!warned || *warned) {
45 return;
46 }
47 *warned = true;
48 va_start(ap, fmt);
49 warn_vreport(fmt, ap);
50 va_end(ap);
51}
52
53static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
54 const char *msg)
55{
56 warn_once(&vcdev->warned_orb_pfch, "vfio-ccw (devno %x.%x.%04x): %s",
57 sch->cssid, sch->ssid, sch->devno, msg);
58}
59
1dcac3e1
XFR
60static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
61{
62 vdev->needs_reset = false;
63}
64
65/*
66 * We don't need vfio_hot_reset_multi and vfio_eoi operations for
67 * vfio_ccw device now.
68 */
69struct VFIODeviceOps vfio_ccw_ops = {
70 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset,
71};
72
66dc50f7 73static IOInstEnding vfio_ccw_handle_request(SubchDev *sch)
8ca2b376 74{
66dc50f7 75 S390CCWDevice *cdev = sch->driver_data;
8ca2b376
XFR
76 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
77 struct ccw_io_region *region = vcdev->io_region;
78 int ret;
79
9a51c9ee
HP
80 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) {
81 if (!(vcdev->force_orb_pfch)) {
82 warn_once_pfch(vcdev, sch, "requires PFCH flag set");
83 sch_gen_unit_exception(sch);
84 css_inject_io_interrupt(sch);
85 return IOINST_CC_EXPECTED;
86 } else {
87 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH;
88 warn_once_pfch(vcdev, sch, "PFCH flag forced");
89 }
90 }
91
8ca2b376
XFR
92 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
93 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW));
94 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB));
95
96 memset(region, 0, sizeof(*region));
97
66dc50f7
HP
98 memcpy(region->orb_area, &sch->orb, sizeof(ORB));
99 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW));
8ca2b376
XFR
100
101again:
102 ret = pwrite(vcdev->vdev.fd, region,
103 vcdev->io_region_size, vcdev->io_region_offset);
104 if (ret != vcdev->io_region_size) {
105 if (errno == EAGAIN) {
106 goto again;
107 }
108 error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno);
66dc50f7
HP
109 ret = -errno;
110 } else {
111 ret = region->ret_code;
112 }
113 switch (ret) {
114 case 0:
115 return IOINST_CC_EXPECTED;
116 case -EBUSY:
117 return IOINST_CC_BUSY;
118 case -ENODEV:
119 case -EACCES:
120 return IOINST_CC_NOT_OPERATIONAL;
121 case -EFAULT:
122 default:
123 sch_gen_unit_exception(sch);
124 css_inject_io_interrupt(sch);
125 return IOINST_CC_EXPECTED;
8ca2b376 126 }
8ca2b376
XFR
127}
128
1dcac3e1
XFR
129static void vfio_ccw_reset(DeviceState *dev)
130{
131 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
132 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
133 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
134
135 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
136}
137
4886b3e9
DJS
138static void vfio_ccw_io_notifier_handler(void *opaque)
139{
140 VFIOCCWDevice *vcdev = opaque;
8ca2b376
XFR
141 struct ccw_io_region *region = vcdev->io_region;
142 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev);
143 CcwDevice *ccw_dev = CCW_DEVICE(cdev);
144 SubchDev *sch = ccw_dev->sch;
145 SCSW *s = &sch->curr_status.scsw;
334e7685 146 PMCW *p = &sch->curr_status.pmcw;
8ca2b376
XFR
147 IRB irb;
148 int size;
4886b3e9
DJS
149
150 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) {
151 return;
152 }
8ca2b376
XFR
153
154 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size,
155 vcdev->io_region_offset);
156 if (size == -1) {
157 switch (errno) {
158 case ENODEV:
159 /* Generate a deferred cc 3 condition. */
160 s->flags |= SCSW_FLAGS_MASK_CC;
161 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
162 s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
163 goto read_err;
164 case EFAULT:
165 /* Memory problem, generate channel data check. */
166 s->ctrl &= ~SCSW_ACTL_START_PEND;
167 s->cstat = SCSW_CSTAT_DATA_CHECK;
168 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
169 s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
170 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
171 goto read_err;
172 default:
173 /* Error, generate channel program check. */
174 s->ctrl &= ~SCSW_ACTL_START_PEND;
175 s->cstat = SCSW_CSTAT_PROG_CHECK;
176 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
177 s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
178 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
179 goto read_err;
180 }
181 } else if (size != vcdev->io_region_size) {
182 /* Information transfer error, generate channel-control check. */
183 s->ctrl &= ~SCSW_ACTL_START_PEND;
184 s->cstat = SCSW_CSTAT_CHN_CTRL_CHK;
185 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
186 s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
187 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
188 goto read_err;
189 }
190
191 memcpy(&irb, region->irb_area, sizeof(IRB));
192
193 /* Update control block via irb. */
194 copy_scsw_to_guest(s, &irb.scsw);
195
334e7685
DJS
196 /* If a uint check is pending, copy sense data. */
197 if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) &&
198 (p->chars & PMCW_CHARS_MASK_CSENSE)) {
199 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw));
200 }
201
8ca2b376
XFR
202read_err:
203 css_inject_io_interrupt(sch);
4886b3e9
DJS
204}
205
206static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
207{
208 VFIODevice *vdev = &vcdev->vdev;
209 struct vfio_irq_info *irq_info;
210 struct vfio_irq_set *irq_set;
211 size_t argsz;
212 int32_t *pfd;
213
214 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) {
215 error_setg(errp, "vfio: unexpected number of io irqs %u",
216 vdev->num_irqs);
217 return;
218 }
219
28e22d4b 220 argsz = sizeof(*irq_info);
4886b3e9
DJS
221 irq_info = g_malloc0(argsz);
222 irq_info->index = VFIO_CCW_IO_IRQ_INDEX;
223 irq_info->argsz = argsz;
224 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
225 irq_info) < 0 || irq_info->count < 1) {
226 error_setg_errno(errp, errno, "vfio: Error getting irq info");
227 goto out_free_info;
228 }
229
230 if (event_notifier_init(&vcdev->io_notifier, 0)) {
231 error_setg_errno(errp, errno,
232 "vfio: Unable to init event notifier for IO");
233 goto out_free_info;
234 }
235
236 argsz = sizeof(*irq_set) + sizeof(*pfd);
237 irq_set = g_malloc0(argsz);
238 irq_set->argsz = argsz;
239 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
240 VFIO_IRQ_SET_ACTION_TRIGGER;
241 irq_set->index = VFIO_CCW_IO_IRQ_INDEX;
242 irq_set->start = 0;
243 irq_set->count = 1;
244 pfd = (int32_t *) &irq_set->data;
245
246 *pfd = event_notifier_get_fd(&vcdev->io_notifier);
247 qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev);
248 if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
249 error_setg(errp, "vfio: Failed to set up io notification");
250 qemu_set_fd_handler(*pfd, NULL, NULL, vcdev);
251 event_notifier_cleanup(&vcdev->io_notifier);
252 }
253
254 g_free(irq_set);
255
256out_free_info:
257 g_free(irq_info);
258}
259
260static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev)
261{
262 struct vfio_irq_set *irq_set;
263 size_t argsz;
264 int32_t *pfd;
265
266 argsz = sizeof(*irq_set) + sizeof(*pfd);
267 irq_set = g_malloc0(argsz);
268 irq_set->argsz = argsz;
269 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
270 VFIO_IRQ_SET_ACTION_TRIGGER;
271 irq_set->index = VFIO_CCW_IO_IRQ_INDEX;
272 irq_set->start = 0;
273 irq_set->count = 1;
274 pfd = (int32_t *) &irq_set->data;
275 *pfd = -1;
276
277 if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
278 error_report("vfio: Failed to de-assign device io fd: %m");
279 }
280
281 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier),
282 NULL, NULL, vcdev);
283 event_notifier_cleanup(&vcdev->io_notifier);
284
285 g_free(irq_set);
286}
287
c14e706c
DJS
288static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
289{
290 VFIODevice *vdev = &vcdev->vdev;
291 struct vfio_region_info *info;
292 int ret;
293
294 /* Sanity check device */
295 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) {
296 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device");
297 return;
298 }
299
300 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) {
301 error_setg(errp, "vfio: Unexpected number of the I/O region %u",
302 vdev->num_regions);
303 return;
304 }
305
306 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info);
307 if (ret) {
308 error_setg_errno(errp, -ret, "vfio: Error getting config info");
309 return;
310 }
311
312 vcdev->io_region_size = info->size;
313 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
314 error_setg(errp, "vfio: Unexpected size of the I/O region");
315 g_free(info);
316 return;
317 }
318
319 vcdev->io_region_offset = info->offset;
320 vcdev->io_region = g_malloc0(info->size);
321
322 g_free(info);
323}
324
325static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
326{
327 g_free(vcdev->io_region);
328}
329
c96f2c2a 330static void vfio_ccw_put_device(VFIOCCWDevice *vcdev)
1dcac3e1
XFR
331{
332 g_free(vcdev->vdev.name);
333 vfio_put_base_device(&vcdev->vdev);
334}
335
c96f2c2a
GK
336static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
337 Error **errp)
338{
339 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid,
340 vcdev->cdev.hostid.ssid,
341 vcdev->cdev.hostid.devid);
342 VFIODevice *vbasedev;
343
344 QLIST_FOREACH(vbasedev, &group->device_list, next) {
345 if (strcmp(vbasedev->name, name) == 0) {
346 error_setg(errp, "vfio: subchannel %s has already been attached",
347 name);
348 goto out_err;
349 }
350 }
238e9172
AW
351
352 /*
353 * All vfio-ccw devices are believed to operate in a way compatible with
354 * memory ballooning, ie. pages pinned in the host are in the current
355 * working set of the guest driver and therefore never overlap with pages
356 * available to the guest balloon driver. This needs to be set before
357 * vfio_get_device() for vfio common to handle the balloon inhibitor.
358 */
359 vcdev->vdev.balloon_allowed = true;
c96f2c2a
GK
360
361 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
362 goto out_err;
363 }
364
365 vcdev->vdev.ops = &vfio_ccw_ops;
366 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
367 vcdev->vdev.name = name;
368 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
369
370 return;
371
372out_err:
373 g_free(name);
374}
375
1dcac3e1
XFR
376static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp)
377{
378 char *tmp, group_path[PATH_MAX];
379 ssize_t len;
380 int groupid;
381
382 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group",
383 cdev->hostid.cssid, cdev->hostid.ssid,
384 cdev->hostid.devid, cdev->mdevid);
385 len = readlink(tmp, group_path, sizeof(group_path));
386 g_free(tmp);
387
388 if (len <= 0 || len >= sizeof(group_path)) {
389 error_setg(errp, "vfio: no iommu_group found");
390 return NULL;
391 }
392
393 group_path[len] = 0;
394
395 if (sscanf(basename(group_path), "%d", &groupid) != 1) {
396 error_setg(errp, "vfio: failed to read %s", group_path);
397 return NULL;
398 }
399
400 return vfio_get_group(groupid, &address_space_memory, errp);
401}
402
403static void vfio_ccw_realize(DeviceState *dev, Error **errp)
404{
1dcac3e1
XFR
405 VFIOGroup *group;
406 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
407 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
408 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
409 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
410 Error *err = NULL;
411
412 /* Call the class init function for subchannel. */
413 if (cdc->realize) {
414 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err);
415 if (err) {
416 goto out_err_propagate;
417 }
418 }
419
420 group = vfio_ccw_get_group(cdev, &err);
421 if (!group) {
422 goto out_group_err;
423 }
424
c96f2c2a
GK
425 vfio_ccw_get_device(group, vcdev, &err);
426 if (err) {
1dcac3e1
XFR
427 goto out_device_err;
428 }
429
c14e706c
DJS
430 vfio_ccw_get_region(vcdev, &err);
431 if (err) {
432 goto out_region_err;
433 }
434
4886b3e9
DJS
435 vfio_ccw_register_io_notifier(vcdev, &err);
436 if (err) {
437 goto out_notifier_err;
438 }
439
1dcac3e1
XFR
440 return;
441
4886b3e9
DJS
442out_notifier_err:
443 vfio_ccw_put_region(vcdev);
c14e706c 444out_region_err:
c96f2c2a 445 vfio_ccw_put_device(vcdev);
1dcac3e1
XFR
446out_device_err:
447 vfio_put_group(group);
448out_group_err:
449 if (cdc->unrealize) {
450 cdc->unrealize(cdev, NULL);
451 }
452out_err_propagate:
453 error_propagate(errp, err);
454}
455
456static void vfio_ccw_unrealize(DeviceState *dev, Error **errp)
457{
458 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
459 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
460 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
461 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
462 VFIOGroup *group = vcdev->vdev.group;
463
4886b3e9 464 vfio_ccw_unregister_io_notifier(vcdev);
c14e706c 465 vfio_ccw_put_region(vcdev);
c96f2c2a 466 vfio_ccw_put_device(vcdev);
1dcac3e1
XFR
467 vfio_put_group(group);
468
469 if (cdc->unrealize) {
470 cdc->unrealize(cdev, errp);
471 }
472}
473
474static Property vfio_ccw_properties[] = {
475 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev),
9a51c9ee 476 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false),
1dcac3e1
XFR
477 DEFINE_PROP_END_OF_LIST(),
478};
479
480static const VMStateDescription vfio_ccw_vmstate = {
481 .name = TYPE_VFIO_CCW,
482 .unmigratable = 1,
483};
484
485static void vfio_ccw_class_init(ObjectClass *klass, void *data)
486{
487 DeviceClass *dc = DEVICE_CLASS(klass);
8ca2b376 488 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
1dcac3e1
XFR
489
490 dc->props = vfio_ccw_properties;
491 dc->vmsd = &vfio_ccw_vmstate;
492 dc->desc = "VFIO-based subchannel assignment";
bd2aef10 493 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1dcac3e1
XFR
494 dc->realize = vfio_ccw_realize;
495 dc->unrealize = vfio_ccw_unrealize;
496 dc->reset = vfio_ccw_reset;
8ca2b376
XFR
497
498 cdc->handle_request = vfio_ccw_handle_request;
1dcac3e1
XFR
499}
500
501static const TypeInfo vfio_ccw_info = {
502 .name = TYPE_VFIO_CCW,
503 .parent = TYPE_S390_CCW,
504 .instance_size = sizeof(VFIOCCWDevice),
505 .class_init = vfio_ccw_class_init,
506};
507
508static void register_vfio_ccw_type(void)
509{
510 type_register_static(&vfio_ccw_info);
511}
512
513type_init(register_vfio_ccw_type)