]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/vhost-user.c
Merge remote-tracking branch 'remotes/lalrae/tags/mips-20160729' into staging
[mirror_qemu.git] / hw / virtio / vhost-user.c
1 /*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-backend.h"
15 #include "hw/virtio/virtio-net.h"
16 #include "sysemu/char.h"
17 #include "sysemu/kvm.h"
18 #include "qemu/error-report.h"
19 #include "qemu/sockets.h"
20 #include "migration/migration.h"
21
22 #include <sys/ioctl.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 #include <linux/vhost.h>
26
27 #define VHOST_MEMORY_MAX_NREGIONS 8
28 #define VHOST_USER_F_PROTOCOL_FEATURES 30
29
30 enum VhostUserProtocolFeature {
31 VHOST_USER_PROTOCOL_F_MQ = 0,
32 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
33 VHOST_USER_PROTOCOL_F_RARP = 2,
34
35 VHOST_USER_PROTOCOL_F_MAX
36 };
37
38 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
39
40 typedef enum VhostUserRequest {
41 VHOST_USER_NONE = 0,
42 VHOST_USER_GET_FEATURES = 1,
43 VHOST_USER_SET_FEATURES = 2,
44 VHOST_USER_SET_OWNER = 3,
45 VHOST_USER_RESET_OWNER = 4,
46 VHOST_USER_SET_MEM_TABLE = 5,
47 VHOST_USER_SET_LOG_BASE = 6,
48 VHOST_USER_SET_LOG_FD = 7,
49 VHOST_USER_SET_VRING_NUM = 8,
50 VHOST_USER_SET_VRING_ADDR = 9,
51 VHOST_USER_SET_VRING_BASE = 10,
52 VHOST_USER_GET_VRING_BASE = 11,
53 VHOST_USER_SET_VRING_KICK = 12,
54 VHOST_USER_SET_VRING_CALL = 13,
55 VHOST_USER_SET_VRING_ERR = 14,
56 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
57 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
58 VHOST_USER_GET_QUEUE_NUM = 17,
59 VHOST_USER_SET_VRING_ENABLE = 18,
60 VHOST_USER_SEND_RARP = 19,
61 VHOST_USER_MAX
62 } VhostUserRequest;
63
64 typedef struct VhostUserMemoryRegion {
65 uint64_t guest_phys_addr;
66 uint64_t memory_size;
67 uint64_t userspace_addr;
68 uint64_t mmap_offset;
69 } VhostUserMemoryRegion;
70
71 typedef struct VhostUserMemory {
72 uint32_t nregions;
73 uint32_t padding;
74 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
75 } VhostUserMemory;
76
77 typedef struct VhostUserLog {
78 uint64_t mmap_size;
79 uint64_t mmap_offset;
80 } VhostUserLog;
81
82 typedef struct VhostUserMsg {
83 VhostUserRequest request;
84
85 #define VHOST_USER_VERSION_MASK (0x3)
86 #define VHOST_USER_REPLY_MASK (0x1<<2)
87 uint32_t flags;
88 uint32_t size; /* the following payload size */
89 union {
90 #define VHOST_USER_VRING_IDX_MASK (0xff)
91 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
92 uint64_t u64;
93 struct vhost_vring_state state;
94 struct vhost_vring_addr addr;
95 VhostUserMemory memory;
96 VhostUserLog log;
97 } payload;
98 } QEMU_PACKED VhostUserMsg;
99
100 static VhostUserMsg m __attribute__ ((unused));
101 #define VHOST_USER_HDR_SIZE (sizeof(m.request) \
102 + sizeof(m.flags) \
103 + sizeof(m.size))
104
105 #define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
106
107 /* The version of the protocol we support */
108 #define VHOST_USER_VERSION (0x1)
109
110 static bool ioeventfd_enabled(void)
111 {
112 return kvm_enabled() && kvm_eventfds_enabled();
113 }
114
115 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
116 {
117 CharDriverState *chr = dev->opaque;
118 uint8_t *p = (uint8_t *) msg;
119 int r, size = VHOST_USER_HDR_SIZE;
120
121 r = qemu_chr_fe_read_all(chr, p, size);
122 if (r != size) {
123 error_report("Failed to read msg header. Read %d instead of %d."
124 " Original request %d.", r, size, msg->request);
125 goto fail;
126 }
127
128 /* validate received flags */
129 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
130 error_report("Failed to read msg header."
131 " Flags 0x%x instead of 0x%x.", msg->flags,
132 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
133 goto fail;
134 }
135
136 /* validate message size is sane */
137 if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
138 error_report("Failed to read msg header."
139 " Size %d exceeds the maximum %zu.", msg->size,
140 VHOST_USER_PAYLOAD_SIZE);
141 goto fail;
142 }
143
144 if (msg->size) {
145 p += VHOST_USER_HDR_SIZE;
146 size = msg->size;
147 r = qemu_chr_fe_read_all(chr, p, size);
148 if (r != size) {
149 error_report("Failed to read msg payload."
150 " Read %d instead of %d.", r, msg->size);
151 goto fail;
152 }
153 }
154
155 return 0;
156
157 fail:
158 return -1;
159 }
160
161 static bool vhost_user_one_time_request(VhostUserRequest request)
162 {
163 switch (request) {
164 case VHOST_USER_SET_OWNER:
165 case VHOST_USER_RESET_OWNER:
166 case VHOST_USER_SET_MEM_TABLE:
167 case VHOST_USER_GET_QUEUE_NUM:
168 return true;
169 default:
170 return false;
171 }
172 }
173
174 /* most non-init callers ignore the error */
175 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
176 int *fds, int fd_num)
177 {
178 CharDriverState *chr = dev->opaque;
179 int ret, size = VHOST_USER_HDR_SIZE + msg->size;
180
181 /*
182 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
183 * we just need send it once in the first time. For later such
184 * request, we just ignore it.
185 */
186 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
187 return 0;
188 }
189
190 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
191 error_report("Failed to set msg fds.");
192 return -1;
193 }
194
195 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
196 if (ret != size) {
197 error_report("Failed to write msg."
198 " Wrote %d instead of %d.", ret, size);
199 return -1;
200 }
201
202 return 0;
203 }
204
205 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
206 struct vhost_log *log)
207 {
208 int fds[VHOST_MEMORY_MAX_NREGIONS];
209 size_t fd_num = 0;
210 bool shmfd = virtio_has_feature(dev->protocol_features,
211 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
212 VhostUserMsg msg = {
213 .request = VHOST_USER_SET_LOG_BASE,
214 .flags = VHOST_USER_VERSION,
215 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
216 .payload.log.mmap_offset = 0,
217 .size = sizeof(msg.payload.log),
218 };
219
220 if (shmfd && log->fd != -1) {
221 fds[fd_num++] = log->fd;
222 }
223
224 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
225 return -1;
226 }
227
228 if (shmfd) {
229 msg.size = 0;
230 if (vhost_user_read(dev, &msg) < 0) {
231 return -1;
232 }
233
234 if (msg.request != VHOST_USER_SET_LOG_BASE) {
235 error_report("Received unexpected msg type. "
236 "Expected %d received %d",
237 VHOST_USER_SET_LOG_BASE, msg.request);
238 return -1;
239 }
240 }
241
242 return 0;
243 }
244
245 static int vhost_user_set_mem_table(struct vhost_dev *dev,
246 struct vhost_memory *mem)
247 {
248 int fds[VHOST_MEMORY_MAX_NREGIONS];
249 int i, fd;
250 size_t fd_num = 0;
251 VhostUserMsg msg = {
252 .request = VHOST_USER_SET_MEM_TABLE,
253 .flags = VHOST_USER_VERSION,
254 };
255
256 for (i = 0; i < dev->mem->nregions; ++i) {
257 struct vhost_memory_region *reg = dev->mem->regions + i;
258 ram_addr_t offset;
259 MemoryRegion *mr;
260
261 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
262 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
263 &offset);
264 fd = memory_region_get_fd(mr);
265 if (fd > 0) {
266 msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
267 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
268 msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
269 msg.payload.memory.regions[fd_num].mmap_offset = offset;
270 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
271 fds[fd_num++] = fd;
272 }
273 }
274
275 msg.payload.memory.nregions = fd_num;
276
277 if (!fd_num) {
278 error_report("Failed initializing vhost-user memory map, "
279 "consider using -object memory-backend-file share=on");
280 return -1;
281 }
282
283 msg.size = sizeof(msg.payload.memory.nregions);
284 msg.size += sizeof(msg.payload.memory.padding);
285 msg.size += fd_num * sizeof(VhostUserMemoryRegion);
286
287 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
288 return -1;
289 }
290
291 return 0;
292 }
293
294 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
295 struct vhost_vring_addr *addr)
296 {
297 VhostUserMsg msg = {
298 .request = VHOST_USER_SET_VRING_ADDR,
299 .flags = VHOST_USER_VERSION,
300 .payload.addr = *addr,
301 .size = sizeof(msg.payload.addr),
302 };
303
304 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
305 return -1;
306 }
307
308 return 0;
309 }
310
311 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
312 struct vhost_vring_state *ring)
313 {
314 error_report("vhost-user trying to send unhandled ioctl");
315 return -1;
316 }
317
318 static int vhost_set_vring(struct vhost_dev *dev,
319 unsigned long int request,
320 struct vhost_vring_state *ring)
321 {
322 VhostUserMsg msg = {
323 .request = request,
324 .flags = VHOST_USER_VERSION,
325 .payload.state = *ring,
326 .size = sizeof(msg.payload.state),
327 };
328
329 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
330 return -1;
331 }
332
333 return 0;
334 }
335
336 static int vhost_user_set_vring_num(struct vhost_dev *dev,
337 struct vhost_vring_state *ring)
338 {
339 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
340 }
341
342 static int vhost_user_set_vring_base(struct vhost_dev *dev,
343 struct vhost_vring_state *ring)
344 {
345 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
346 }
347
348 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
349 {
350 int i;
351
352 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
353 return -1;
354 }
355
356 for (i = 0; i < dev->nvqs; ++i) {
357 struct vhost_vring_state state = {
358 .index = dev->vq_index + i,
359 .num = enable,
360 };
361
362 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
363 }
364
365 return 0;
366 }
367
368 static int vhost_user_get_vring_base(struct vhost_dev *dev,
369 struct vhost_vring_state *ring)
370 {
371 VhostUserMsg msg = {
372 .request = VHOST_USER_GET_VRING_BASE,
373 .flags = VHOST_USER_VERSION,
374 .payload.state = *ring,
375 .size = sizeof(msg.payload.state),
376 };
377
378 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
379 return -1;
380 }
381
382 if (vhost_user_read(dev, &msg) < 0) {
383 return -1;
384 }
385
386 if (msg.request != VHOST_USER_GET_VRING_BASE) {
387 error_report("Received unexpected msg type. Expected %d received %d",
388 VHOST_USER_GET_VRING_BASE, msg.request);
389 return -1;
390 }
391
392 if (msg.size != sizeof(msg.payload.state)) {
393 error_report("Received bad msg size.");
394 return -1;
395 }
396
397 *ring = msg.payload.state;
398
399 return 0;
400 }
401
402 static int vhost_set_vring_file(struct vhost_dev *dev,
403 VhostUserRequest request,
404 struct vhost_vring_file *file)
405 {
406 int fds[VHOST_MEMORY_MAX_NREGIONS];
407 size_t fd_num = 0;
408 VhostUserMsg msg = {
409 .request = request,
410 .flags = VHOST_USER_VERSION,
411 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
412 .size = sizeof(msg.payload.u64),
413 };
414
415 if (ioeventfd_enabled() && file->fd > 0) {
416 fds[fd_num++] = file->fd;
417 } else {
418 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
419 }
420
421 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
422 return -1;
423 }
424
425 return 0;
426 }
427
428 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
429 struct vhost_vring_file *file)
430 {
431 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
432 }
433
434 static int vhost_user_set_vring_call(struct vhost_dev *dev,
435 struct vhost_vring_file *file)
436 {
437 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
438 }
439
440 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
441 {
442 VhostUserMsg msg = {
443 .request = request,
444 .flags = VHOST_USER_VERSION,
445 .payload.u64 = u64,
446 .size = sizeof(msg.payload.u64),
447 };
448
449 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
450 return -1;
451 }
452
453 return 0;
454 }
455
456 static int vhost_user_set_features(struct vhost_dev *dev,
457 uint64_t features)
458 {
459 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
460 }
461
462 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
463 uint64_t features)
464 {
465 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
466 }
467
468 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
469 {
470 VhostUserMsg msg = {
471 .request = request,
472 .flags = VHOST_USER_VERSION,
473 };
474
475 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
476 return 0;
477 }
478
479 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
480 return -1;
481 }
482
483 if (vhost_user_read(dev, &msg) < 0) {
484 return -1;
485 }
486
487 if (msg.request != request) {
488 error_report("Received unexpected msg type. Expected %d received %d",
489 request, msg.request);
490 return -1;
491 }
492
493 if (msg.size != sizeof(msg.payload.u64)) {
494 error_report("Received bad msg size.");
495 return -1;
496 }
497
498 *u64 = msg.payload.u64;
499
500 return 0;
501 }
502
503 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
504 {
505 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
506 }
507
508 static int vhost_user_set_owner(struct vhost_dev *dev)
509 {
510 VhostUserMsg msg = {
511 .request = VHOST_USER_SET_OWNER,
512 .flags = VHOST_USER_VERSION,
513 };
514
515 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
516 return -1;
517 }
518
519 return 0;
520 }
521
522 static int vhost_user_reset_device(struct vhost_dev *dev)
523 {
524 VhostUserMsg msg = {
525 .request = VHOST_USER_RESET_OWNER,
526 .flags = VHOST_USER_VERSION,
527 };
528
529 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
530 return -1;
531 }
532
533 return 0;
534 }
535
536 static int vhost_user_init(struct vhost_dev *dev, void *opaque)
537 {
538 uint64_t features;
539 int err;
540
541 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
542
543 dev->opaque = opaque;
544
545 err = vhost_user_get_features(dev, &features);
546 if (err < 0) {
547 return err;
548 }
549
550 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
551 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
552
553 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
554 &features);
555 if (err < 0) {
556 return err;
557 }
558
559 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
560 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
561 if (err < 0) {
562 return err;
563 }
564
565 /* query the max queues we support if backend supports Multiple Queue */
566 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
567 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
568 &dev->max_queues);
569 if (err < 0) {
570 return err;
571 }
572 }
573 }
574
575 if (dev->migration_blocker == NULL &&
576 !virtio_has_feature(dev->protocol_features,
577 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
578 error_setg(&dev->migration_blocker,
579 "Migration disabled: vhost-user backend lacks "
580 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
581 }
582
583 return 0;
584 }
585
586 static int vhost_user_cleanup(struct vhost_dev *dev)
587 {
588 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
589
590 dev->opaque = 0;
591
592 return 0;
593 }
594
595 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
596 {
597 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
598
599 return idx;
600 }
601
602 static int vhost_user_memslots_limit(struct vhost_dev *dev)
603 {
604 return VHOST_MEMORY_MAX_NREGIONS;
605 }
606
607 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
608 {
609 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
610
611 return virtio_has_feature(dev->protocol_features,
612 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
613 }
614
615 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
616 {
617 VhostUserMsg msg = { 0 };
618
619 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
620
621 /* If guest supports GUEST_ANNOUNCE do nothing */
622 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
623 return 0;
624 }
625
626 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
627 if (virtio_has_feature(dev->protocol_features,
628 VHOST_USER_PROTOCOL_F_RARP)) {
629 msg.request = VHOST_USER_SEND_RARP;
630 msg.flags = VHOST_USER_VERSION;
631 memcpy((char *)&msg.payload.u64, mac_addr, 6);
632 msg.size = sizeof(msg.payload.u64);
633
634 return vhost_user_write(dev, &msg, NULL, 0);
635 }
636 return -1;
637 }
638
639 static bool vhost_user_can_merge(struct vhost_dev *dev,
640 uint64_t start1, uint64_t size1,
641 uint64_t start2, uint64_t size2)
642 {
643 ram_addr_t offset;
644 int mfd, rfd;
645 MemoryRegion *mr;
646
647 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
648 mfd = memory_region_get_fd(mr);
649
650 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
651 rfd = memory_region_get_fd(mr);
652
653 return mfd == rfd;
654 }
655
656 const VhostOps user_ops = {
657 .backend_type = VHOST_BACKEND_TYPE_USER,
658 .vhost_backend_init = vhost_user_init,
659 .vhost_backend_cleanup = vhost_user_cleanup,
660 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
661 .vhost_set_log_base = vhost_user_set_log_base,
662 .vhost_set_mem_table = vhost_user_set_mem_table,
663 .vhost_set_vring_addr = vhost_user_set_vring_addr,
664 .vhost_set_vring_endian = vhost_user_set_vring_endian,
665 .vhost_set_vring_num = vhost_user_set_vring_num,
666 .vhost_set_vring_base = vhost_user_set_vring_base,
667 .vhost_get_vring_base = vhost_user_get_vring_base,
668 .vhost_set_vring_kick = vhost_user_set_vring_kick,
669 .vhost_set_vring_call = vhost_user_set_vring_call,
670 .vhost_set_features = vhost_user_set_features,
671 .vhost_get_features = vhost_user_get_features,
672 .vhost_set_owner = vhost_user_set_owner,
673 .vhost_reset_device = vhost_user_reset_device,
674 .vhost_get_vq_index = vhost_user_get_vq_index,
675 .vhost_set_vring_enable = vhost_user_set_vring_enable,
676 .vhost_requires_shm_log = vhost_user_requires_shm_log,
677 .vhost_migration_done = vhost_user_migration_done,
678 .vhost_backend_can_merge = vhost_user_can_merge,
679 };