]> git.proxmox.com Git - mirror_qemu.git/blame - hw/virtio/vhost-user.c
vhost user: add rarp sending after live migration for legacy guest
[mirror_qemu.git] / hw / virtio / vhost-user.c
CommitLineData
5f6f6664
NN
1/*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11#include "hw/virtio/vhost.h"
12#include "hw/virtio/vhost-backend.h"
3e866365 13#include "hw/virtio/virtio-net.h"
5f6f6664
NN
14#include "sysemu/char.h"
15#include "sysemu/kvm.h"
16#include "qemu/error-report.h"
17#include "qemu/sockets.h"
3fd74b84 18#include "exec/ram_addr.h"
d2fc4402 19#include "migration/migration.h"
5f6f6664
NN
20
21#include <fcntl.h>
22#include <unistd.h>
23#include <sys/ioctl.h>
24#include <sys/socket.h>
25#include <sys/un.h>
26#include <linux/vhost.h>
27
28#define VHOST_MEMORY_MAX_NREGIONS 8
dcb10c00 29#define VHOST_USER_F_PROTOCOL_FEATURES 30
e2051e9e 30
3e866365 31#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x7ULL
1be0ac21
MAL
32#define VHOST_USER_PROTOCOL_F_MQ 0
33#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
3e866365 34#define VHOST_USER_PROTOCOL_F_RARP 2
5f6f6664
NN
35
36typedef enum VhostUserRequest {
37 VHOST_USER_NONE = 0,
38 VHOST_USER_GET_FEATURES = 1,
39 VHOST_USER_SET_FEATURES = 2,
40 VHOST_USER_SET_OWNER = 3,
d1f8b30e 41 VHOST_USER_RESET_DEVICE = 4,
5f6f6664
NN
42 VHOST_USER_SET_MEM_TABLE = 5,
43 VHOST_USER_SET_LOG_BASE = 6,
44 VHOST_USER_SET_LOG_FD = 7,
45 VHOST_USER_SET_VRING_NUM = 8,
46 VHOST_USER_SET_VRING_ADDR = 9,
47 VHOST_USER_SET_VRING_BASE = 10,
48 VHOST_USER_GET_VRING_BASE = 11,
49 VHOST_USER_SET_VRING_KICK = 12,
50 VHOST_USER_SET_VRING_CALL = 13,
51 VHOST_USER_SET_VRING_ERR = 14,
dcb10c00
MT
52 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
53 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
e2051e9e 54 VHOST_USER_GET_QUEUE_NUM = 17,
7263a0ad 55 VHOST_USER_SET_VRING_ENABLE = 18,
3e866365 56 VHOST_USER_SEND_RARP = 19,
5f6f6664
NN
57 VHOST_USER_MAX
58} VhostUserRequest;
59
60typedef struct VhostUserMemoryRegion {
61 uint64_t guest_phys_addr;
62 uint64_t memory_size;
63 uint64_t userspace_addr;
3fd74b84 64 uint64_t mmap_offset;
5f6f6664
NN
65} VhostUserMemoryRegion;
66
67typedef struct VhostUserMemory {
68 uint32_t nregions;
69 uint32_t padding;
70 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
71} VhostUserMemory;
72
73typedef struct VhostUserMsg {
74 VhostUserRequest request;
75
76#define VHOST_USER_VERSION_MASK (0x3)
77#define VHOST_USER_REPLY_MASK (0x1<<2)
78 uint32_t flags;
79 uint32_t size; /* the following payload size */
80 union {
81#define VHOST_USER_VRING_IDX_MASK (0xff)
82#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
83 uint64_t u64;
84 struct vhost_vring_state state;
85 struct vhost_vring_addr addr;
86 VhostUserMemory memory;
87 };
88} QEMU_PACKED VhostUserMsg;
89
90static VhostUserMsg m __attribute__ ((unused));
91#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
92 + sizeof(m.flags) \
93 + sizeof(m.size))
94
95#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
96
97/* The version of the protocol we support */
98#define VHOST_USER_VERSION (0x1)
99
100static bool ioeventfd_enabled(void)
101{
102 return kvm_enabled() && kvm_eventfds_enabled();
103}
104
5f6f6664
NN
105static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
106{
107 CharDriverState *chr = dev->opaque;
108 uint8_t *p = (uint8_t *) msg;
109 int r, size = VHOST_USER_HDR_SIZE;
110
111 r = qemu_chr_fe_read_all(chr, p, size);
112 if (r != size) {
ab7c5aaf 113 error_report("Failed to read msg header. Read %d instead of %d.", r,
5f6f6664
NN
114 size);
115 goto fail;
116 }
117
118 /* validate received flags */
119 if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
120 error_report("Failed to read msg header."
ab7c5aaf 121 " Flags 0x%x instead of 0x%x.", msg->flags,
5f6f6664
NN
122 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
123 goto fail;
124 }
125
126 /* validate message size is sane */
127 if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
128 error_report("Failed to read msg header."
ab7c5aaf 129 " Size %d exceeds the maximum %zu.", msg->size,
5f6f6664
NN
130 VHOST_USER_PAYLOAD_SIZE);
131 goto fail;
132 }
133
134 if (msg->size) {
135 p += VHOST_USER_HDR_SIZE;
136 size = msg->size;
137 r = qemu_chr_fe_read_all(chr, p, size);
138 if (r != size) {
139 error_report("Failed to read msg payload."
ab7c5aaf 140 " Read %d instead of %d.", r, msg->size);
5f6f6664
NN
141 goto fail;
142 }
143 }
144
145 return 0;
146
147fail:
148 return -1;
149}
150
21e70425
MAL
151static bool vhost_user_one_time_request(VhostUserRequest request)
152{
153 switch (request) {
154 case VHOST_USER_SET_OWNER:
155 case VHOST_USER_RESET_DEVICE:
156 case VHOST_USER_SET_MEM_TABLE:
157 case VHOST_USER_GET_QUEUE_NUM:
158 return true;
159 default:
160 return false;
161 }
162}
163
164/* most non-init callers ignore the error */
5f6f6664
NN
165static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
166 int *fds, int fd_num)
167{
168 CharDriverState *chr = dev->opaque;
169 int size = VHOST_USER_HDR_SIZE + msg->size;
170
21e70425
MAL
171 /*
172 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
173 * we just need send it once in the first time. For later such
174 * request, we just ignore it.
175 */
176 if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
177 return 0;
178 }
179
5f6f6664
NN
180 if (fd_num) {
181 qemu_chr_fe_set_msgfds(chr, fds, fd_num);
182 }
183
184 return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
185 0 : -1;
186}
187
21e70425
MAL
188static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
189 struct vhost_log *log)
b931bfbf 190{
21e70425
MAL
191 int fds[VHOST_MEMORY_MAX_NREGIONS];
192 size_t fd_num = 0;
193 bool shmfd = virtio_has_feature(dev->protocol_features,
194 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
195 VhostUserMsg msg = {
196 .request = VHOST_USER_SET_LOG_BASE,
197 .flags = VHOST_USER_VERSION,
198 .u64 = base,
199 .size = sizeof(m.u64),
200 };
201
202 if (shmfd && log->fd != -1) {
203 fds[fd_num++] = log->fd;
204 }
205
206 vhost_user_write(dev, &msg, fds, fd_num);
207
208 if (shmfd) {
209 msg.size = 0;
210 if (vhost_user_read(dev, &msg) < 0) {
211 return 0;
212 }
213
214 if (msg.request != VHOST_USER_SET_LOG_BASE) {
215 error_report("Received unexpected msg type. "
216 "Expected %d received %d",
217 VHOST_USER_SET_LOG_BASE, msg.request);
218 return -1;
219 }
b931bfbf 220 }
21e70425
MAL
221
222 return 0;
b931bfbf
CO
223}
224
21e70425
MAL
225static int vhost_user_set_mem_table(struct vhost_dev *dev,
226 struct vhost_memory *mem)
5f6f6664 227{
5f6f6664 228 int fds[VHOST_MEMORY_MAX_NREGIONS];
3fd74b84 229 int i, fd;
5f6f6664 230 size_t fd_num = 0;
21e70425
MAL
231 VhostUserMsg msg = {
232 .request = VHOST_USER_SET_MEM_TABLE,
233 .flags = VHOST_USER_VERSION,
234 };
5f6f6664 235
21e70425
MAL
236 for (i = 0; i < dev->mem->nregions; ++i) {
237 struct vhost_memory_region *reg = dev->mem->regions + i;
238 ram_addr_t ram_addr;
239
240 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
241 qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
242 &ram_addr);
243 fd = qemu_get_ram_fd(ram_addr);
244 if (fd > 0) {
245 msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
246 msg.memory.regions[fd_num].memory_size = reg->memory_size;
247 msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
248 msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
249 (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
250 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
251 fds[fd_num++] = fd;
252 }
7305483a
YL
253 }
254
21e70425
MAL
255 msg.memory.nregions = fd_num;
256
257 if (!fd_num) {
258 error_report("Failed initializing vhost-user memory map, "
259 "consider using -object memory-backend-file share=on");
260 return -1;
b931bfbf
CO
261 }
262
21e70425
MAL
263 msg.size = sizeof(m.memory.nregions);
264 msg.size += sizeof(m.memory.padding);
265 msg.size += fd_num * sizeof(VhostUserMemoryRegion);
5f6f6664 266
21e70425 267 vhost_user_write(dev, &msg, fds, fd_num);
5f6f6664 268
21e70425
MAL
269 return 0;
270}
5f6f6664 271
21e70425
MAL
272static int vhost_user_set_vring_addr(struct vhost_dev *dev,
273 struct vhost_vring_addr *addr)
274{
275 VhostUserMsg msg = {
276 .request = VHOST_USER_SET_VRING_ADDR,
277 .flags = VHOST_USER_VERSION,
278 .addr = *addr,
279 .size = sizeof(*addr),
280 };
5f6f6664 281
21e70425 282 vhost_user_write(dev, &msg, NULL, 0);
5f6f6664 283
21e70425
MAL
284 return 0;
285}
5f6f6664 286
21e70425
MAL
287static int vhost_user_set_vring_endian(struct vhost_dev *dev,
288 struct vhost_vring_state *ring)
289{
290 error_report("vhost-user trying to send unhandled ioctl");
291 return -1;
292}
5f6f6664 293
21e70425
MAL
294static int vhost_set_vring(struct vhost_dev *dev,
295 unsigned long int request,
296 struct vhost_vring_state *ring)
297{
298 VhostUserMsg msg = {
299 .request = request,
300 .flags = VHOST_USER_VERSION,
301 .state = *ring,
302 .size = sizeof(*ring),
303 };
304
305 vhost_user_write(dev, &msg, NULL, 0);
306
307 return 0;
308}
309
310static int vhost_user_set_vring_num(struct vhost_dev *dev,
311 struct vhost_vring_state *ring)
312{
313 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
314}
315
316static int vhost_user_set_vring_base(struct vhost_dev *dev,
317 struct vhost_vring_state *ring)
318{
319 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
320}
321
322static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
323{
324 struct vhost_vring_state state = {
325 .index = dev->vq_index,
326 .num = enable,
327 };
328
329 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) {
5f6f6664 330 return -1;
5f6f6664
NN
331 }
332
21e70425
MAL
333 return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
334}
335
336
337static int vhost_user_get_vring_base(struct vhost_dev *dev,
338 struct vhost_vring_state *ring)
339{
340 VhostUserMsg msg = {
341 .request = VHOST_USER_GET_VRING_BASE,
342 .flags = VHOST_USER_VERSION,
343 .state = *ring,
344 .size = sizeof(*ring),
345 };
346
347 vhost_user_write(dev, &msg, NULL, 0);
348
349 if (vhost_user_read(dev, &msg) < 0) {
5f6f6664
NN
350 return 0;
351 }
352
21e70425
MAL
353 if (msg.request != VHOST_USER_GET_VRING_BASE) {
354 error_report("Received unexpected msg type. Expected %d received %d",
355 VHOST_USER_GET_VRING_BASE, msg.request);
356 return -1;
357 }
5f6f6664 358
21e70425
MAL
359 if (msg.size != sizeof(m.state)) {
360 error_report("Received bad msg size.");
361 return -1;
5f6f6664
NN
362 }
363
21e70425
MAL
364 *ring = msg.state;
365
5f6f6664
NN
366 return 0;
367}
368
21e70425
MAL
369static int vhost_set_vring_file(struct vhost_dev *dev,
370 VhostUserRequest request,
371 struct vhost_vring_file *file)
c2bea314 372{
9a78a5dd
MAL
373 int fds[VHOST_MEMORY_MAX_NREGIONS];
374 size_t fd_num = 0;
c2bea314 375 VhostUserMsg msg = {
21e70425 376 .request = request,
c2bea314 377 .flags = VHOST_USER_VERSION,
21e70425 378 .u64 = file->index & VHOST_USER_VRING_IDX_MASK,
c2bea314
MAL
379 .size = sizeof(m.u64),
380 };
381
21e70425
MAL
382 if (ioeventfd_enabled() && file->fd > 0) {
383 fds[fd_num++] = file->fd;
384 } else {
385 msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
9a78a5dd
MAL
386 }
387
388 vhost_user_write(dev, &msg, fds, fd_num);
389
21e70425
MAL
390 return 0;
391}
9a78a5dd 392
21e70425
MAL
393static int vhost_user_set_vring_kick(struct vhost_dev *dev,
394 struct vhost_vring_file *file)
395{
396 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
397}
398
399static int vhost_user_set_vring_call(struct vhost_dev *dev,
400 struct vhost_vring_file *file)
401{
402 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
403}
404
405static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
406{
407 VhostUserMsg msg = {
408 .request = request,
409 .flags = VHOST_USER_VERSION,
410 .u64 = u64,
411 .size = sizeof(m.u64),
412 };
413
414 vhost_user_write(dev, &msg, NULL, 0);
415
416 return 0;
417}
418
419static int vhost_user_set_features(struct vhost_dev *dev,
420 uint64_t features)
421{
422 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
423}
424
425static int vhost_user_set_protocol_features(struct vhost_dev *dev,
426 uint64_t features)
427{
428 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
429}
430
431static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
432{
433 VhostUserMsg msg = {
434 .request = request,
435 .flags = VHOST_USER_VERSION,
436 };
437
438 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
439 return 0;
9a78a5dd 440 }
c2bea314 441
21e70425
MAL
442 vhost_user_write(dev, &msg, NULL, 0);
443
444 if (vhost_user_read(dev, &msg) < 0) {
445 return 0;
446 }
447
448 if (msg.request != request) {
449 error_report("Received unexpected msg type. Expected %d received %d",
450 request, msg.request);
451 return -1;
452 }
453
454 if (msg.size != sizeof(m.u64)) {
455 error_report("Received bad msg size.");
456 return -1;
457 }
458
459 *u64 = msg.u64;
460
461 return 0;
462}
463
464static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
465{
466 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
467}
468
469static int vhost_user_set_owner(struct vhost_dev *dev)
470{
471 VhostUserMsg msg = {
472 .request = VHOST_USER_SET_OWNER,
473 .flags = VHOST_USER_VERSION,
474 };
475
476 vhost_user_write(dev, &msg, NULL, 0);
477
478 return 0;
479}
480
481static int vhost_user_reset_device(struct vhost_dev *dev)
482{
483 VhostUserMsg msg = {
484 .request = VHOST_USER_RESET_DEVICE,
485 .flags = VHOST_USER_VERSION,
486 };
487
488 vhost_user_write(dev, &msg, NULL, 0);
489
c2bea314
MAL
490 return 0;
491}
492
5f6f6664
NN
493static int vhost_user_init(struct vhost_dev *dev, void *opaque)
494{
21e70425 495 uint64_t features;
dcb10c00
MT
496 int err;
497
5f6f6664
NN
498 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
499
500 dev->opaque = opaque;
501
21e70425 502 err = vhost_user_get_features(dev, &features);
dcb10c00
MT
503 if (err < 0) {
504 return err;
505 }
506
507 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
508 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
509
21e70425
MAL
510 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
511 &features);
dcb10c00
MT
512 if (err < 0) {
513 return err;
514 }
515
516 dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
21e70425 517 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
dcb10c00
MT
518 if (err < 0) {
519 return err;
520 }
e2051e9e
YL
521
522 /* query the max queues we support if backend supports Multiple Queue */
523 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
21e70425
MAL
524 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
525 &dev->max_queues);
e2051e9e
YL
526 if (err < 0) {
527 return err;
528 }
529 }
dcb10c00
MT
530 }
531
d2fc4402
MAL
532 if (dev->migration_blocker == NULL &&
533 !virtio_has_feature(dev->protocol_features,
534 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
535 error_setg(&dev->migration_blocker,
536 "Migration disabled: vhost-user backend lacks "
537 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
538 }
539
5f6f6664
NN
540 return 0;
541}
542
543static int vhost_user_cleanup(struct vhost_dev *dev)
544{
545 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
546
547 dev->opaque = 0;
548
549 return 0;
550}
551
fc57fd99
YL
552static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
553{
554 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
555
556 return idx;
557}
558
2ce68e4c
IM
559static int vhost_user_memslots_limit(struct vhost_dev *dev)
560{
561 return VHOST_MEMORY_MAX_NREGIONS;
562}
563
1be0ac21
MAL
564static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
565{
566 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
567
568 return virtio_has_feature(dev->protocol_features,
569 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
570}
571
3e866365
TC
572static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
573{
574 VhostUserMsg msg = { 0 };
575 int err;
576
577 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
578
579 /* If guest supports GUEST_ANNOUNCE do nothing */
580 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
581 return 0;
582 }
583
584 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
585 if (virtio_has_feature(dev->protocol_features,
586 VHOST_USER_PROTOCOL_F_RARP)) {
587 msg.request = VHOST_USER_SEND_RARP;
588 msg.flags = VHOST_USER_VERSION;
589 memcpy((char *)&msg.u64, mac_addr, 6);
590 msg.size = sizeof(m.u64);
591
592 err = vhost_user_write(dev, &msg, NULL, 0);
593 return err;
594 }
595 return -1;
596}
597
5f6f6664
NN
598const VhostOps user_ops = {
599 .backend_type = VHOST_BACKEND_TYPE_USER,
5f6f6664 600 .vhost_backend_init = vhost_user_init,
fc57fd99 601 .vhost_backend_cleanup = vhost_user_cleanup,
2ce68e4c 602 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
21e70425
MAL
603 .vhost_set_log_base = vhost_user_set_log_base,
604 .vhost_set_mem_table = vhost_user_set_mem_table,
605 .vhost_set_vring_addr = vhost_user_set_vring_addr,
606 .vhost_set_vring_endian = vhost_user_set_vring_endian,
607 .vhost_set_vring_num = vhost_user_set_vring_num,
608 .vhost_set_vring_base = vhost_user_set_vring_base,
609 .vhost_get_vring_base = vhost_user_get_vring_base,
610 .vhost_set_vring_kick = vhost_user_set_vring_kick,
611 .vhost_set_vring_call = vhost_user_set_vring_call,
612 .vhost_set_features = vhost_user_set_features,
613 .vhost_get_features = vhost_user_get_features,
614 .vhost_set_owner = vhost_user_set_owner,
615 .vhost_reset_device = vhost_user_reset_device,
616 .vhost_get_vq_index = vhost_user_get_vq_index,
617 .vhost_set_vring_enable = vhost_user_set_vring_enable,
1be0ac21 618 .vhost_requires_shm_log = vhost_user_requires_shm_log,
3e866365 619 .vhost_migration_done = vhost_user_migration_done,
fc57fd99 620};