]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/vhost-user.c
vhost-user: fully use new backend/frontend naming
[mirror_qemu.git] / hw / virtio / vhost-user.c
1 /*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/virtio-crypto.h"
15 #include "hw/virtio/vhost-user.h"
16 #include "hw/virtio/vhost-backend.h"
17 #include "hw/virtio/virtio.h"
18 #include "hw/virtio/virtio-net.h"
19 #include "chardev/char-fe.h"
20 #include "io/channel-socket.h"
21 #include "sysemu/kvm.h"
22 #include "qemu/error-report.h"
23 #include "qemu/main-loop.h"
24 #include "qemu/sockets.h"
25 #include "sysemu/runstate.h"
26 #include "sysemu/cryptodev.h"
27 #include "migration/migration.h"
28 #include "migration/postcopy-ram.h"
29 #include "trace.h"
30 #include "exec/ramblock.h"
31
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <sys/un.h>
35
36 #include "standard-headers/linux/vhost_types.h"
37
38 #ifdef CONFIG_LINUX
39 #include <linux/userfaultfd.h>
40 #endif
41
42 #define VHOST_MEMORY_BASELINE_NREGIONS 8
43 #define VHOST_USER_F_PROTOCOL_FEATURES 30
44 #define VHOST_USER_BACKEND_MAX_FDS 8
45
46 #if defined(TARGET_PPC) || defined(TARGET_PPC64)
47 #include "hw/ppc/spapr.h"
48 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
49
50 #else
51 #define VHOST_USER_MAX_RAM_SLOTS 512
52 #endif
53
54 /*
55 * Maximum size of virtio device config space
56 */
57 #define VHOST_USER_MAX_CONFIG_SIZE 256
58
59 enum VhostUserProtocolFeature {
60 VHOST_USER_PROTOCOL_F_MQ = 0,
61 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
62 VHOST_USER_PROTOCOL_F_RARP = 2,
63 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
64 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
65 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
66 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
67 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
68 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
69 VHOST_USER_PROTOCOL_F_CONFIG = 9,
70 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
71 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
72 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
73 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
74 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
75 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
76 VHOST_USER_PROTOCOL_F_STATUS = 16,
77 VHOST_USER_PROTOCOL_F_MAX
78 };
79
80 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
81
82 typedef enum VhostUserRequest {
83 VHOST_USER_NONE = 0,
84 VHOST_USER_GET_FEATURES = 1,
85 VHOST_USER_SET_FEATURES = 2,
86 VHOST_USER_SET_OWNER = 3,
87 VHOST_USER_RESET_OWNER = 4,
88 VHOST_USER_SET_MEM_TABLE = 5,
89 VHOST_USER_SET_LOG_BASE = 6,
90 VHOST_USER_SET_LOG_FD = 7,
91 VHOST_USER_SET_VRING_NUM = 8,
92 VHOST_USER_SET_VRING_ADDR = 9,
93 VHOST_USER_SET_VRING_BASE = 10,
94 VHOST_USER_GET_VRING_BASE = 11,
95 VHOST_USER_SET_VRING_KICK = 12,
96 VHOST_USER_SET_VRING_CALL = 13,
97 VHOST_USER_SET_VRING_ERR = 14,
98 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
99 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
100 VHOST_USER_GET_QUEUE_NUM = 17,
101 VHOST_USER_SET_VRING_ENABLE = 18,
102 VHOST_USER_SEND_RARP = 19,
103 VHOST_USER_NET_SET_MTU = 20,
104 VHOST_USER_SET_BACKEND_REQ_FD = 21,
105 VHOST_USER_IOTLB_MSG = 22,
106 VHOST_USER_SET_VRING_ENDIAN = 23,
107 VHOST_USER_GET_CONFIG = 24,
108 VHOST_USER_SET_CONFIG = 25,
109 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
110 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
111 VHOST_USER_POSTCOPY_ADVISE = 28,
112 VHOST_USER_POSTCOPY_LISTEN = 29,
113 VHOST_USER_POSTCOPY_END = 30,
114 VHOST_USER_GET_INFLIGHT_FD = 31,
115 VHOST_USER_SET_INFLIGHT_FD = 32,
116 VHOST_USER_GPU_SET_SOCKET = 33,
117 VHOST_USER_RESET_DEVICE = 34,
118 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
119 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
120 VHOST_USER_ADD_MEM_REG = 37,
121 VHOST_USER_REM_MEM_REG = 38,
122 VHOST_USER_SET_STATUS = 39,
123 VHOST_USER_GET_STATUS = 40,
124 VHOST_USER_MAX
125 } VhostUserRequest;
126
127 typedef enum VhostUserBackendRequest {
128 VHOST_USER_BACKEND_NONE = 0,
129 VHOST_USER_BACKEND_IOTLB_MSG = 1,
130 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
131 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
132 VHOST_USER_BACKEND_MAX
133 } VhostUserBackendRequest;
134
135 typedef struct VhostUserMemoryRegion {
136 uint64_t guest_phys_addr;
137 uint64_t memory_size;
138 uint64_t userspace_addr;
139 uint64_t mmap_offset;
140 } VhostUserMemoryRegion;
141
142 typedef struct VhostUserMemory {
143 uint32_t nregions;
144 uint32_t padding;
145 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
146 } VhostUserMemory;
147
148 typedef struct VhostUserMemRegMsg {
149 uint64_t padding;
150 VhostUserMemoryRegion region;
151 } VhostUserMemRegMsg;
152
153 typedef struct VhostUserLog {
154 uint64_t mmap_size;
155 uint64_t mmap_offset;
156 } VhostUserLog;
157
158 typedef struct VhostUserConfig {
159 uint32_t offset;
160 uint32_t size;
161 uint32_t flags;
162 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
163 } VhostUserConfig;
164
165 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
166 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
167 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024
168
169 typedef struct VhostUserCryptoSession {
170 uint64_t op_code;
171 union {
172 struct {
173 CryptoDevBackendSymSessionInfo session_setup_data;
174 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
175 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
176 } sym;
177 struct {
178 CryptoDevBackendAsymSessionInfo session_setup_data;
179 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN];
180 } asym;
181 } u;
182
183 /* session id for success, -1 on errors */
184 int64_t session_id;
185 } VhostUserCryptoSession;
186
187 static VhostUserConfig c __attribute__ ((unused));
188 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
189 + sizeof(c.size) \
190 + sizeof(c.flags))
191
192 typedef struct VhostUserVringArea {
193 uint64_t u64;
194 uint64_t size;
195 uint64_t offset;
196 } VhostUserVringArea;
197
198 typedef struct VhostUserInflight {
199 uint64_t mmap_size;
200 uint64_t mmap_offset;
201 uint16_t num_queues;
202 uint16_t queue_size;
203 } VhostUserInflight;
204
205 typedef struct {
206 VhostUserRequest request;
207
208 #define VHOST_USER_VERSION_MASK (0x3)
209 #define VHOST_USER_REPLY_MASK (0x1 << 2)
210 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
211 uint32_t flags;
212 uint32_t size; /* the following payload size */
213 } QEMU_PACKED VhostUserHeader;
214
215 typedef union {
216 #define VHOST_USER_VRING_IDX_MASK (0xff)
217 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
218 uint64_t u64;
219 struct vhost_vring_state state;
220 struct vhost_vring_addr addr;
221 VhostUserMemory memory;
222 VhostUserMemRegMsg mem_reg;
223 VhostUserLog log;
224 struct vhost_iotlb_msg iotlb;
225 VhostUserConfig config;
226 VhostUserCryptoSession session;
227 VhostUserVringArea area;
228 VhostUserInflight inflight;
229 } VhostUserPayload;
230
231 typedef struct VhostUserMsg {
232 VhostUserHeader hdr;
233 VhostUserPayload payload;
234 } QEMU_PACKED VhostUserMsg;
235
236 static VhostUserMsg m __attribute__ ((unused));
237 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
238
239 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
240
241 /* The version of the protocol we support */
242 #define VHOST_USER_VERSION (0x1)
243
244 struct vhost_user {
245 struct vhost_dev *dev;
246 /* Shared between vhost devs of the same virtio device */
247 VhostUserState *user;
248 QIOChannel *backend_ioc;
249 GSource *backend_src;
250 NotifierWithReturn postcopy_notifier;
251 struct PostCopyFD postcopy_fd;
252 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
253 /* Length of the region_rb and region_rb_offset arrays */
254 size_t region_rb_len;
255 /* RAMBlock associated with a given region */
256 RAMBlock **region_rb;
257 /*
258 * The offset from the start of the RAMBlock to the start of the
259 * vhost region.
260 */
261 ram_addr_t *region_rb_offset;
262
263 /* True once we've entered postcopy_listen */
264 bool postcopy_listen;
265
266 /* Our current regions */
267 int num_shadow_regions;
268 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
269 };
270
271 struct scrub_regions {
272 struct vhost_memory_region *region;
273 int reg_idx;
274 int fd_idx;
275 };
276
277 static bool ioeventfd_enabled(void)
278 {
279 return !kvm_enabled() || kvm_eventfds_enabled();
280 }
281
282 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
283 {
284 struct vhost_user *u = dev->opaque;
285 CharBackend *chr = u->user->chr;
286 uint8_t *p = (uint8_t *) msg;
287 int r, size = VHOST_USER_HDR_SIZE;
288
289 r = qemu_chr_fe_read_all(chr, p, size);
290 if (r != size) {
291 int saved_errno = errno;
292 error_report("Failed to read msg header. Read %d instead of %d."
293 " Original request %d.", r, size, msg->hdr.request);
294 return r < 0 ? -saved_errno : -EIO;
295 }
296
297 /* validate received flags */
298 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
299 error_report("Failed to read msg header."
300 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
301 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
302 return -EPROTO;
303 }
304
305 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
306
307 return 0;
308 }
309
310 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
311 {
312 struct vhost_user *u = dev->opaque;
313 CharBackend *chr = u->user->chr;
314 uint8_t *p = (uint8_t *) msg;
315 int r, size;
316
317 r = vhost_user_read_header(dev, msg);
318 if (r < 0) {
319 return r;
320 }
321
322 /* validate message size is sane */
323 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
324 error_report("Failed to read msg header."
325 " Size %d exceeds the maximum %zu.", msg->hdr.size,
326 VHOST_USER_PAYLOAD_SIZE);
327 return -EPROTO;
328 }
329
330 if (msg->hdr.size) {
331 p += VHOST_USER_HDR_SIZE;
332 size = msg->hdr.size;
333 r = qemu_chr_fe_read_all(chr, p, size);
334 if (r != size) {
335 int saved_errno = errno;
336 error_report("Failed to read msg payload."
337 " Read %d instead of %d.", r, msg->hdr.size);
338 return r < 0 ? -saved_errno : -EIO;
339 }
340 }
341
342 return 0;
343 }
344
345 static int process_message_reply(struct vhost_dev *dev,
346 const VhostUserMsg *msg)
347 {
348 int ret;
349 VhostUserMsg msg_reply;
350
351 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
352 return 0;
353 }
354
355 ret = vhost_user_read(dev, &msg_reply);
356 if (ret < 0) {
357 return ret;
358 }
359
360 if (msg_reply.hdr.request != msg->hdr.request) {
361 error_report("Received unexpected msg type. "
362 "Expected %d received %d",
363 msg->hdr.request, msg_reply.hdr.request);
364 return -EPROTO;
365 }
366
367 return msg_reply.payload.u64 ? -EIO : 0;
368 }
369
370 static bool vhost_user_one_time_request(VhostUserRequest request)
371 {
372 switch (request) {
373 case VHOST_USER_SET_OWNER:
374 case VHOST_USER_RESET_OWNER:
375 case VHOST_USER_SET_MEM_TABLE:
376 case VHOST_USER_GET_QUEUE_NUM:
377 case VHOST_USER_NET_SET_MTU:
378 case VHOST_USER_ADD_MEM_REG:
379 case VHOST_USER_REM_MEM_REG:
380 return true;
381 default:
382 return false;
383 }
384 }
385
386 /* most non-init callers ignore the error */
387 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
388 int *fds, int fd_num)
389 {
390 struct vhost_user *u = dev->opaque;
391 CharBackend *chr = u->user->chr;
392 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
393
394 /*
395 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
396 * we just need send it once in the first time. For later such
397 * request, we just ignore it.
398 */
399 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
400 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
401 return 0;
402 }
403
404 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
405 error_report("Failed to set msg fds.");
406 return -EINVAL;
407 }
408
409 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
410 if (ret != size) {
411 int saved_errno = errno;
412 error_report("Failed to write msg."
413 " Wrote %d instead of %d.", ret, size);
414 return ret < 0 ? -saved_errno : -EIO;
415 }
416
417 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
418
419 return 0;
420 }
421
422 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
423 {
424 VhostUserMsg msg = {
425 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
426 .hdr.flags = VHOST_USER_VERSION,
427 };
428
429 return vhost_user_write(dev, &msg, &fd, 1);
430 }
431
432 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
433 struct vhost_log *log)
434 {
435 int fds[VHOST_USER_MAX_RAM_SLOTS];
436 size_t fd_num = 0;
437 bool shmfd = virtio_has_feature(dev->protocol_features,
438 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
439 int ret;
440 VhostUserMsg msg = {
441 .hdr.request = VHOST_USER_SET_LOG_BASE,
442 .hdr.flags = VHOST_USER_VERSION,
443 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
444 .payload.log.mmap_offset = 0,
445 .hdr.size = sizeof(msg.payload.log),
446 };
447
448 /* Send only once with first queue pair */
449 if (dev->vq_index != 0) {
450 return 0;
451 }
452
453 if (shmfd && log->fd != -1) {
454 fds[fd_num++] = log->fd;
455 }
456
457 ret = vhost_user_write(dev, &msg, fds, fd_num);
458 if (ret < 0) {
459 return ret;
460 }
461
462 if (shmfd) {
463 msg.hdr.size = 0;
464 ret = vhost_user_read(dev, &msg);
465 if (ret < 0) {
466 return ret;
467 }
468
469 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
470 error_report("Received unexpected msg type. "
471 "Expected %d received %d",
472 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
473 return -EPROTO;
474 }
475 }
476
477 return 0;
478 }
479
480 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
481 int *fd)
482 {
483 MemoryRegion *mr;
484
485 assert((uintptr_t)addr == addr);
486 mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
487 *fd = memory_region_get_fd(mr);
488 *offset += mr->ram_block->fd_offset;
489
490 return mr;
491 }
492
493 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
494 struct vhost_memory_region *src,
495 uint64_t mmap_offset)
496 {
497 assert(src != NULL && dst != NULL);
498 dst->userspace_addr = src->userspace_addr;
499 dst->memory_size = src->memory_size;
500 dst->guest_phys_addr = src->guest_phys_addr;
501 dst->mmap_offset = mmap_offset;
502 }
503
504 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
505 struct vhost_dev *dev,
506 VhostUserMsg *msg,
507 int *fds, size_t *fd_num,
508 bool track_ramblocks)
509 {
510 int i, fd;
511 ram_addr_t offset;
512 MemoryRegion *mr;
513 struct vhost_memory_region *reg;
514 VhostUserMemoryRegion region_buffer;
515
516 msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
517
518 for (i = 0; i < dev->mem->nregions; ++i) {
519 reg = dev->mem->regions + i;
520
521 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
522 if (fd > 0) {
523 if (track_ramblocks) {
524 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
525 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
526 reg->memory_size,
527 reg->guest_phys_addr,
528 reg->userspace_addr,
529 offset);
530 u->region_rb_offset[i] = offset;
531 u->region_rb[i] = mr->ram_block;
532 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
533 error_report("Failed preparing vhost-user memory table msg");
534 return -ENOBUFS;
535 }
536 vhost_user_fill_msg_region(&region_buffer, reg, offset);
537 msg->payload.memory.regions[*fd_num] = region_buffer;
538 fds[(*fd_num)++] = fd;
539 } else if (track_ramblocks) {
540 u->region_rb_offset[i] = 0;
541 u->region_rb[i] = NULL;
542 }
543 }
544
545 msg->payload.memory.nregions = *fd_num;
546
547 if (!*fd_num) {
548 error_report("Failed initializing vhost-user memory map, "
549 "consider using -object memory-backend-file share=on");
550 return -EINVAL;
551 }
552
553 msg->hdr.size = sizeof(msg->payload.memory.nregions);
554 msg->hdr.size += sizeof(msg->payload.memory.padding);
555 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
556
557 return 0;
558 }
559
560 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
561 struct vhost_memory_region *vdev_reg)
562 {
563 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
564 shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
565 shadow_reg->memory_size == vdev_reg->memory_size;
566 }
567
568 static void scrub_shadow_regions(struct vhost_dev *dev,
569 struct scrub_regions *add_reg,
570 int *nr_add_reg,
571 struct scrub_regions *rem_reg,
572 int *nr_rem_reg, uint64_t *shadow_pcb,
573 bool track_ramblocks)
574 {
575 struct vhost_user *u = dev->opaque;
576 bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
577 struct vhost_memory_region *reg, *shadow_reg;
578 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
579 ram_addr_t offset;
580 MemoryRegion *mr;
581 bool matching;
582
583 /*
584 * Find memory regions present in our shadow state which are not in
585 * the device's current memory state.
586 *
587 * Mark regions in both the shadow and device state as "found".
588 */
589 for (i = 0; i < u->num_shadow_regions; i++) {
590 shadow_reg = &u->shadow_regions[i];
591 matching = false;
592
593 for (j = 0; j < dev->mem->nregions; j++) {
594 reg = &dev->mem->regions[j];
595
596 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
597
598 if (reg_equal(shadow_reg, reg)) {
599 matching = true;
600 found[j] = true;
601 if (track_ramblocks) {
602 /*
603 * Reset postcopy client bases, region_rb, and
604 * region_rb_offset in case regions are removed.
605 */
606 if (fd > 0) {
607 u->region_rb_offset[j] = offset;
608 u->region_rb[j] = mr->ram_block;
609 shadow_pcb[j] = u->postcopy_client_bases[i];
610 } else {
611 u->region_rb_offset[j] = 0;
612 u->region_rb[j] = NULL;
613 }
614 }
615 break;
616 }
617 }
618
619 /*
620 * If the region was not found in the current device memory state
621 * create an entry for it in the removed list.
622 */
623 if (!matching) {
624 rem_reg[rm_idx].region = shadow_reg;
625 rem_reg[rm_idx++].reg_idx = i;
626 }
627 }
628
629 /*
630 * For regions not marked "found", create entries in the added list.
631 *
632 * Note their indexes in the device memory state and the indexes of their
633 * file descriptors.
634 */
635 for (i = 0; i < dev->mem->nregions; i++) {
636 reg = &dev->mem->regions[i];
637 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
638 if (fd > 0) {
639 ++fd_num;
640 }
641
642 /*
643 * If the region was in both the shadow and device state we don't
644 * need to send a VHOST_USER_ADD_MEM_REG message for it.
645 */
646 if (found[i]) {
647 continue;
648 }
649
650 add_reg[add_idx].region = reg;
651 add_reg[add_idx].reg_idx = i;
652 add_reg[add_idx++].fd_idx = fd_num;
653 }
654 *nr_rem_reg = rm_idx;
655 *nr_add_reg = add_idx;
656
657 return;
658 }
659
660 static int send_remove_regions(struct vhost_dev *dev,
661 struct scrub_regions *remove_reg,
662 int nr_rem_reg, VhostUserMsg *msg,
663 bool reply_supported)
664 {
665 struct vhost_user *u = dev->opaque;
666 struct vhost_memory_region *shadow_reg;
667 int i, fd, shadow_reg_idx, ret;
668 ram_addr_t offset;
669 VhostUserMemoryRegion region_buffer;
670
671 /*
672 * The regions in remove_reg appear in the same order they do in the
673 * shadow table. Therefore we can minimize memory copies by iterating
674 * through remove_reg backwards.
675 */
676 for (i = nr_rem_reg - 1; i >= 0; i--) {
677 shadow_reg = remove_reg[i].region;
678 shadow_reg_idx = remove_reg[i].reg_idx;
679
680 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
681
682 if (fd > 0) {
683 msg->hdr.request = VHOST_USER_REM_MEM_REG;
684 vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
685 msg->payload.mem_reg.region = region_buffer;
686
687 ret = vhost_user_write(dev, msg, NULL, 0);
688 if (ret < 0) {
689 return ret;
690 }
691
692 if (reply_supported) {
693 ret = process_message_reply(dev, msg);
694 if (ret) {
695 return ret;
696 }
697 }
698 }
699
700 /*
701 * At this point we know the backend has unmapped the region. It is now
702 * safe to remove it from the shadow table.
703 */
704 memmove(&u->shadow_regions[shadow_reg_idx],
705 &u->shadow_regions[shadow_reg_idx + 1],
706 sizeof(struct vhost_memory_region) *
707 (u->num_shadow_regions - shadow_reg_idx - 1));
708 u->num_shadow_regions--;
709 }
710
711 return 0;
712 }
713
714 static int send_add_regions(struct vhost_dev *dev,
715 struct scrub_regions *add_reg, int nr_add_reg,
716 VhostUserMsg *msg, uint64_t *shadow_pcb,
717 bool reply_supported, bool track_ramblocks)
718 {
719 struct vhost_user *u = dev->opaque;
720 int i, fd, ret, reg_idx, reg_fd_idx;
721 struct vhost_memory_region *reg;
722 MemoryRegion *mr;
723 ram_addr_t offset;
724 VhostUserMsg msg_reply;
725 VhostUserMemoryRegion region_buffer;
726
727 for (i = 0; i < nr_add_reg; i++) {
728 reg = add_reg[i].region;
729 reg_idx = add_reg[i].reg_idx;
730 reg_fd_idx = add_reg[i].fd_idx;
731
732 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
733
734 if (fd > 0) {
735 if (track_ramblocks) {
736 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
737 reg->memory_size,
738 reg->guest_phys_addr,
739 reg->userspace_addr,
740 offset);
741 u->region_rb_offset[reg_idx] = offset;
742 u->region_rb[reg_idx] = mr->ram_block;
743 }
744 msg->hdr.request = VHOST_USER_ADD_MEM_REG;
745 vhost_user_fill_msg_region(&region_buffer, reg, offset);
746 msg->payload.mem_reg.region = region_buffer;
747
748 ret = vhost_user_write(dev, msg, &fd, 1);
749 if (ret < 0) {
750 return ret;
751 }
752
753 if (track_ramblocks) {
754 uint64_t reply_gpa;
755
756 ret = vhost_user_read(dev, &msg_reply);
757 if (ret < 0) {
758 return ret;
759 }
760
761 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
762
763 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
764 error_report("%s: Received unexpected msg type."
765 "Expected %d received %d", __func__,
766 VHOST_USER_ADD_MEM_REG,
767 msg_reply.hdr.request);
768 return -EPROTO;
769 }
770
771 /*
772 * We're using the same structure, just reusing one of the
773 * fields, so it should be the same size.
774 */
775 if (msg_reply.hdr.size != msg->hdr.size) {
776 error_report("%s: Unexpected size for postcopy reply "
777 "%d vs %d", __func__, msg_reply.hdr.size,
778 msg->hdr.size);
779 return -EPROTO;
780 }
781
782 /* Get the postcopy client base from the backend's reply. */
783 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
784 shadow_pcb[reg_idx] =
785 msg_reply.payload.mem_reg.region.userspace_addr;
786 trace_vhost_user_set_mem_table_postcopy(
787 msg_reply.payload.mem_reg.region.userspace_addr,
788 msg->payload.mem_reg.region.userspace_addr,
789 reg_fd_idx, reg_idx);
790 } else {
791 error_report("%s: invalid postcopy reply for region. "
792 "Got guest physical address %" PRIX64 ", expected "
793 "%" PRIX64, __func__, reply_gpa,
794 dev->mem->regions[reg_idx].guest_phys_addr);
795 return -EPROTO;
796 }
797 } else if (reply_supported) {
798 ret = process_message_reply(dev, msg);
799 if (ret) {
800 return ret;
801 }
802 }
803 } else if (track_ramblocks) {
804 u->region_rb_offset[reg_idx] = 0;
805 u->region_rb[reg_idx] = NULL;
806 }
807
808 /*
809 * At this point, we know the backend has mapped in the new
810 * region, if the region has a valid file descriptor.
811 *
812 * The region should now be added to the shadow table.
813 */
814 u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
815 reg->guest_phys_addr;
816 u->shadow_regions[u->num_shadow_regions].userspace_addr =
817 reg->userspace_addr;
818 u->shadow_regions[u->num_shadow_regions].memory_size =
819 reg->memory_size;
820 u->num_shadow_regions++;
821 }
822
823 return 0;
824 }
825
826 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
827 VhostUserMsg *msg,
828 bool reply_supported,
829 bool track_ramblocks)
830 {
831 struct vhost_user *u = dev->opaque;
832 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
833 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
834 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
835 int nr_add_reg, nr_rem_reg;
836 int ret;
837
838 msg->hdr.size = sizeof(msg->payload.mem_reg);
839
840 /* Find the regions which need to be removed or added. */
841 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
842 shadow_pcb, track_ramblocks);
843
844 if (nr_rem_reg) {
845 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
846 reply_supported);
847 if (ret < 0) {
848 goto err;
849 }
850 }
851
852 if (nr_add_reg) {
853 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
854 reply_supported, track_ramblocks);
855 if (ret < 0) {
856 goto err;
857 }
858 }
859
860 if (track_ramblocks) {
861 memcpy(u->postcopy_client_bases, shadow_pcb,
862 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
863 /*
864 * Now we've registered this with the postcopy code, we ack to the
865 * client, because now we're in the position to be able to deal with
866 * any faults it generates.
867 */
868 /* TODO: Use this for failure cases as well with a bad value. */
869 msg->hdr.size = sizeof(msg->payload.u64);
870 msg->payload.u64 = 0; /* OK */
871
872 ret = vhost_user_write(dev, msg, NULL, 0);
873 if (ret < 0) {
874 return ret;
875 }
876 }
877
878 return 0;
879
880 err:
881 if (track_ramblocks) {
882 memcpy(u->postcopy_client_bases, shadow_pcb,
883 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
884 }
885
886 return ret;
887 }
888
889 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
890 struct vhost_memory *mem,
891 bool reply_supported,
892 bool config_mem_slots)
893 {
894 struct vhost_user *u = dev->opaque;
895 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
896 size_t fd_num = 0;
897 VhostUserMsg msg_reply;
898 int region_i, msg_i;
899 int ret;
900
901 VhostUserMsg msg = {
902 .hdr.flags = VHOST_USER_VERSION,
903 };
904
905 if (u->region_rb_len < dev->mem->nregions) {
906 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
907 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
908 dev->mem->nregions);
909 memset(&(u->region_rb[u->region_rb_len]), '\0',
910 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
911 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
912 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
913 u->region_rb_len = dev->mem->nregions;
914 }
915
916 if (config_mem_slots) {
917 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
918 if (ret < 0) {
919 return ret;
920 }
921 } else {
922 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
923 true);
924 if (ret < 0) {
925 return ret;
926 }
927
928 ret = vhost_user_write(dev, &msg, fds, fd_num);
929 if (ret < 0) {
930 return ret;
931 }
932
933 ret = vhost_user_read(dev, &msg_reply);
934 if (ret < 0) {
935 return ret;
936 }
937
938 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
939 error_report("%s: Received unexpected msg type."
940 "Expected %d received %d", __func__,
941 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
942 return -EPROTO;
943 }
944
945 /*
946 * We're using the same structure, just reusing one of the
947 * fields, so it should be the same size.
948 */
949 if (msg_reply.hdr.size != msg.hdr.size) {
950 error_report("%s: Unexpected size for postcopy reply "
951 "%d vs %d", __func__, msg_reply.hdr.size,
952 msg.hdr.size);
953 return -EPROTO;
954 }
955
956 memset(u->postcopy_client_bases, 0,
957 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
958
959 /*
960 * They're in the same order as the regions that were sent
961 * but some of the regions were skipped (above) if they
962 * didn't have fd's
963 */
964 for (msg_i = 0, region_i = 0;
965 region_i < dev->mem->nregions;
966 region_i++) {
967 if (msg_i < fd_num &&
968 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
969 dev->mem->regions[region_i].guest_phys_addr) {
970 u->postcopy_client_bases[region_i] =
971 msg_reply.payload.memory.regions[msg_i].userspace_addr;
972 trace_vhost_user_set_mem_table_postcopy(
973 msg_reply.payload.memory.regions[msg_i].userspace_addr,
974 msg.payload.memory.regions[msg_i].userspace_addr,
975 msg_i, region_i);
976 msg_i++;
977 }
978 }
979 if (msg_i != fd_num) {
980 error_report("%s: postcopy reply not fully consumed "
981 "%d vs %zd",
982 __func__, msg_i, fd_num);
983 return -EIO;
984 }
985
986 /*
987 * Now we've registered this with the postcopy code, we ack to the
988 * client, because now we're in the position to be able to deal
989 * with any faults it generates.
990 */
991 /* TODO: Use this for failure cases as well with a bad value. */
992 msg.hdr.size = sizeof(msg.payload.u64);
993 msg.payload.u64 = 0; /* OK */
994 ret = vhost_user_write(dev, &msg, NULL, 0);
995 if (ret < 0) {
996 return ret;
997 }
998 }
999
1000 return 0;
1001 }
1002
1003 static int vhost_user_set_mem_table(struct vhost_dev *dev,
1004 struct vhost_memory *mem)
1005 {
1006 struct vhost_user *u = dev->opaque;
1007 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
1008 size_t fd_num = 0;
1009 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
1010 bool reply_supported = virtio_has_feature(dev->protocol_features,
1011 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1012 bool config_mem_slots =
1013 virtio_has_feature(dev->protocol_features,
1014 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
1015 int ret;
1016
1017 if (do_postcopy) {
1018 /*
1019 * Postcopy has enough differences that it's best done in it's own
1020 * version
1021 */
1022 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1023 config_mem_slots);
1024 }
1025
1026 VhostUserMsg msg = {
1027 .hdr.flags = VHOST_USER_VERSION,
1028 };
1029
1030 if (reply_supported) {
1031 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1032 }
1033
1034 if (config_mem_slots) {
1035 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1036 if (ret < 0) {
1037 return ret;
1038 }
1039 } else {
1040 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1041 false);
1042 if (ret < 0) {
1043 return ret;
1044 }
1045
1046 ret = vhost_user_write(dev, &msg, fds, fd_num);
1047 if (ret < 0) {
1048 return ret;
1049 }
1050
1051 if (reply_supported) {
1052 return process_message_reply(dev, &msg);
1053 }
1054 }
1055
1056 return 0;
1057 }
1058
1059 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1060 struct vhost_vring_state *ring)
1061 {
1062 bool cross_endian = virtio_has_feature(dev->protocol_features,
1063 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1064 VhostUserMsg msg = {
1065 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1066 .hdr.flags = VHOST_USER_VERSION,
1067 .payload.state = *ring,
1068 .hdr.size = sizeof(msg.payload.state),
1069 };
1070
1071 if (!cross_endian) {
1072 error_report("vhost-user trying to send unhandled ioctl");
1073 return -ENOTSUP;
1074 }
1075
1076 return vhost_user_write(dev, &msg, NULL, 0);
1077 }
1078
1079 static int vhost_set_vring(struct vhost_dev *dev,
1080 unsigned long int request,
1081 struct vhost_vring_state *ring)
1082 {
1083 VhostUserMsg msg = {
1084 .hdr.request = request,
1085 .hdr.flags = VHOST_USER_VERSION,
1086 .payload.state = *ring,
1087 .hdr.size = sizeof(msg.payload.state),
1088 };
1089
1090 return vhost_user_write(dev, &msg, NULL, 0);
1091 }
1092
1093 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1094 struct vhost_vring_state *ring)
1095 {
1096 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1097 }
1098
1099 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
1100 {
1101 assert(n && n->unmap_addr);
1102 munmap(n->unmap_addr, qemu_real_host_page_size());
1103 n->unmap_addr = NULL;
1104 }
1105
1106 /*
1107 * clean-up function for notifier, will finally free the structure
1108 * under rcu.
1109 */
1110 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
1111 VirtIODevice *vdev)
1112 {
1113 if (n->addr) {
1114 if (vdev) {
1115 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
1116 }
1117 assert(!n->unmap_addr);
1118 n->unmap_addr = n->addr;
1119 n->addr = NULL;
1120 call_rcu(n, vhost_user_host_notifier_free, rcu);
1121 }
1122 }
1123
1124 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1125 struct vhost_vring_state *ring)
1126 {
1127 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1128 }
1129
1130 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1131 {
1132 int i;
1133
1134 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1135 return -EINVAL;
1136 }
1137
1138 for (i = 0; i < dev->nvqs; ++i) {
1139 int ret;
1140 struct vhost_vring_state state = {
1141 .index = dev->vq_index + i,
1142 .num = enable,
1143 };
1144
1145 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1146 if (ret < 0) {
1147 /*
1148 * Restoring the previous state is likely infeasible, as well as
1149 * proceeding regardless the error, so just bail out and hope for
1150 * the device-level recovery.
1151 */
1152 return ret;
1153 }
1154 }
1155
1156 return 0;
1157 }
1158
1159 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
1160 int idx)
1161 {
1162 if (idx >= u->notifiers->len) {
1163 return NULL;
1164 }
1165 return g_ptr_array_index(u->notifiers, idx);
1166 }
1167
1168 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1169 struct vhost_vring_state *ring)
1170 {
1171 int ret;
1172 VhostUserMsg msg = {
1173 .hdr.request = VHOST_USER_GET_VRING_BASE,
1174 .hdr.flags = VHOST_USER_VERSION,
1175 .payload.state = *ring,
1176 .hdr.size = sizeof(msg.payload.state),
1177 };
1178 struct vhost_user *u = dev->opaque;
1179
1180 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
1181 if (n) {
1182 vhost_user_host_notifier_remove(n, dev->vdev);
1183 }
1184
1185 ret = vhost_user_write(dev, &msg, NULL, 0);
1186 if (ret < 0) {
1187 return ret;
1188 }
1189
1190 ret = vhost_user_read(dev, &msg);
1191 if (ret < 0) {
1192 return ret;
1193 }
1194
1195 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1196 error_report("Received unexpected msg type. Expected %d received %d",
1197 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1198 return -EPROTO;
1199 }
1200
1201 if (msg.hdr.size != sizeof(msg.payload.state)) {
1202 error_report("Received bad msg size.");
1203 return -EPROTO;
1204 }
1205
1206 *ring = msg.payload.state;
1207
1208 return 0;
1209 }
1210
1211 static int vhost_set_vring_file(struct vhost_dev *dev,
1212 VhostUserRequest request,
1213 struct vhost_vring_file *file)
1214 {
1215 int fds[VHOST_USER_MAX_RAM_SLOTS];
1216 size_t fd_num = 0;
1217 VhostUserMsg msg = {
1218 .hdr.request = request,
1219 .hdr.flags = VHOST_USER_VERSION,
1220 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1221 .hdr.size = sizeof(msg.payload.u64),
1222 };
1223
1224 if (ioeventfd_enabled() && file->fd > 0) {
1225 fds[fd_num++] = file->fd;
1226 } else {
1227 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1228 }
1229
1230 return vhost_user_write(dev, &msg, fds, fd_num);
1231 }
1232
1233 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1234 struct vhost_vring_file *file)
1235 {
1236 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1237 }
1238
1239 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1240 struct vhost_vring_file *file)
1241 {
1242 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1243 }
1244
1245 static int vhost_user_set_vring_err(struct vhost_dev *dev,
1246 struct vhost_vring_file *file)
1247 {
1248 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
1249 }
1250
1251 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1252 {
1253 int ret;
1254 VhostUserMsg msg = {
1255 .hdr.request = request,
1256 .hdr.flags = VHOST_USER_VERSION,
1257 };
1258
1259 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1260 return 0;
1261 }
1262
1263 ret = vhost_user_write(dev, &msg, NULL, 0);
1264 if (ret < 0) {
1265 return ret;
1266 }
1267
1268 ret = vhost_user_read(dev, &msg);
1269 if (ret < 0) {
1270 return ret;
1271 }
1272
1273 if (msg.hdr.request != request) {
1274 error_report("Received unexpected msg type. Expected %d received %d",
1275 request, msg.hdr.request);
1276 return -EPROTO;
1277 }
1278
1279 if (msg.hdr.size != sizeof(msg.payload.u64)) {
1280 error_report("Received bad msg size.");
1281 return -EPROTO;
1282 }
1283
1284 *u64 = msg.payload.u64;
1285
1286 return 0;
1287 }
1288
1289 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1290 {
1291 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1292 return -EPROTO;
1293 }
1294
1295 return 0;
1296 }
1297
1298 static int enforce_reply(struct vhost_dev *dev,
1299 const VhostUserMsg *msg)
1300 {
1301 uint64_t dummy;
1302
1303 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1304 return process_message_reply(dev, msg);
1305 }
1306
1307 /*
1308 * We need to wait for a reply but the backend does not
1309 * support replies for the command we just sent.
1310 * Send VHOST_USER_GET_FEATURES which makes all backends
1311 * send a reply.
1312 */
1313 return vhost_user_get_features(dev, &dummy);
1314 }
1315
1316 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1317 struct vhost_vring_addr *addr)
1318 {
1319 int ret;
1320 VhostUserMsg msg = {
1321 .hdr.request = VHOST_USER_SET_VRING_ADDR,
1322 .hdr.flags = VHOST_USER_VERSION,
1323 .payload.addr = *addr,
1324 .hdr.size = sizeof(msg.payload.addr),
1325 };
1326
1327 bool reply_supported = virtio_has_feature(dev->protocol_features,
1328 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1329
1330 /*
1331 * wait for a reply if logging is enabled to make sure
1332 * backend is actually logging changes
1333 */
1334 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1335
1336 if (reply_supported && wait_for_reply) {
1337 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1338 }
1339
1340 ret = vhost_user_write(dev, &msg, NULL, 0);
1341 if (ret < 0) {
1342 return ret;
1343 }
1344
1345 if (wait_for_reply) {
1346 return enforce_reply(dev, &msg);
1347 }
1348
1349 return 0;
1350 }
1351
1352 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1353 bool wait_for_reply)
1354 {
1355 VhostUserMsg msg = {
1356 .hdr.request = request,
1357 .hdr.flags = VHOST_USER_VERSION,
1358 .payload.u64 = u64,
1359 .hdr.size = sizeof(msg.payload.u64),
1360 };
1361 int ret;
1362
1363 if (wait_for_reply) {
1364 bool reply_supported = virtio_has_feature(dev->protocol_features,
1365 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1366 if (reply_supported) {
1367 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1368 }
1369 }
1370
1371 ret = vhost_user_write(dev, &msg, NULL, 0);
1372 if (ret < 0) {
1373 return ret;
1374 }
1375
1376 if (wait_for_reply) {
1377 return enforce_reply(dev, &msg);
1378 }
1379
1380 return 0;
1381 }
1382
1383 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
1384 {
1385 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
1386 }
1387
1388 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
1389 {
1390 uint64_t value;
1391 int ret;
1392
1393 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
1394 if (ret < 0) {
1395 return ret;
1396 }
1397 *status = value;
1398
1399 return 0;
1400 }
1401
1402 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
1403 {
1404 uint8_t s;
1405 int ret;
1406
1407 ret = vhost_user_get_status(dev, &s);
1408 if (ret < 0) {
1409 return ret;
1410 }
1411
1412 if ((s & status) == status) {
1413 return 0;
1414 }
1415 s |= status;
1416
1417 return vhost_user_set_status(dev, s);
1418 }
1419
1420 static int vhost_user_set_features(struct vhost_dev *dev,
1421 uint64_t features)
1422 {
1423 /*
1424 * wait for a reply if logging is enabled to make sure
1425 * backend is actually logging changes
1426 */
1427 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
1428 int ret;
1429
1430 /*
1431 * We need to include any extra backend only feature bits that
1432 * might be needed by our device. Currently this includes the
1433 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
1434 * features.
1435 */
1436 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
1437 features | dev->backend_features,
1438 log_enabled);
1439
1440 if (virtio_has_feature(dev->protocol_features,
1441 VHOST_USER_PROTOCOL_F_STATUS)) {
1442 if (!ret) {
1443 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
1444 }
1445 }
1446
1447 return ret;
1448 }
1449
1450 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1451 uint64_t features)
1452 {
1453 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1454 false);
1455 }
1456
1457 static int vhost_user_set_owner(struct vhost_dev *dev)
1458 {
1459 VhostUserMsg msg = {
1460 .hdr.request = VHOST_USER_SET_OWNER,
1461 .hdr.flags = VHOST_USER_VERSION,
1462 };
1463
1464 return vhost_user_write(dev, &msg, NULL, 0);
1465 }
1466
1467 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1468 uint64_t *max_memslots)
1469 {
1470 uint64_t backend_max_memslots;
1471 int err;
1472
1473 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1474 &backend_max_memslots);
1475 if (err < 0) {
1476 return err;
1477 }
1478
1479 *max_memslots = backend_max_memslots;
1480
1481 return 0;
1482 }
1483
1484 static int vhost_user_reset_device(struct vhost_dev *dev)
1485 {
1486 VhostUserMsg msg = {
1487 .hdr.flags = VHOST_USER_VERSION,
1488 };
1489
1490 msg.hdr.request = virtio_has_feature(dev->protocol_features,
1491 VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1492 ? VHOST_USER_RESET_DEVICE
1493 : VHOST_USER_RESET_OWNER;
1494
1495 return vhost_user_write(dev, &msg, NULL, 0);
1496 }
1497
1498 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev)
1499 {
1500 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1501 return -ENOSYS;
1502 }
1503
1504 return dev->config_ops->vhost_dev_config_notifier(dev);
1505 }
1506
1507 /*
1508 * Fetch or create the notifier for a given idx. Newly created
1509 * notifiers are added to the pointer array that tracks them.
1510 */
1511 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
1512 int idx)
1513 {
1514 VhostUserHostNotifier *n = NULL;
1515 if (idx >= u->notifiers->len) {
1516 g_ptr_array_set_size(u->notifiers, idx + 1);
1517 }
1518
1519 n = g_ptr_array_index(u->notifiers, idx);
1520 if (!n) {
1521 /*
1522 * In case notification arrive out-of-order,
1523 * make room for current index.
1524 */
1525 g_ptr_array_remove_index(u->notifiers, idx);
1526 n = g_new0(VhostUserHostNotifier, 1);
1527 n->idx = idx;
1528 g_ptr_array_insert(u->notifiers, idx, n);
1529 trace_vhost_user_create_notifier(idx, n);
1530 }
1531
1532 return n;
1533 }
1534
1535 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
1536 VhostUserVringArea *area,
1537 int fd)
1538 {
1539 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1540 size_t page_size = qemu_real_host_page_size();
1541 struct vhost_user *u = dev->opaque;
1542 VhostUserState *user = u->user;
1543 VirtIODevice *vdev = dev->vdev;
1544 VhostUserHostNotifier *n;
1545 void *addr;
1546 char *name;
1547
1548 if (!virtio_has_feature(dev->protocol_features,
1549 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1550 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1551 return -EINVAL;
1552 }
1553
1554 /*
1555 * Fetch notifier and invalidate any old data before setting up
1556 * new mapped address.
1557 */
1558 n = fetch_or_create_notifier(user, queue_idx);
1559 vhost_user_host_notifier_remove(n, vdev);
1560
1561 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1562 return 0;
1563 }
1564
1565 /* Sanity check. */
1566 if (area->size != page_size) {
1567 return -EINVAL;
1568 }
1569
1570 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1571 fd, area->offset);
1572 if (addr == MAP_FAILED) {
1573 return -EFAULT;
1574 }
1575
1576 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1577 user, queue_idx);
1578 if (!n->mr.ram) { /* Don't init again after suspend. */
1579 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1580 page_size, addr);
1581 } else {
1582 n->mr.ram_block->host = addr;
1583 }
1584 g_free(name);
1585
1586 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1587 object_unparent(OBJECT(&n->mr));
1588 munmap(addr, page_size);
1589 return -ENXIO;
1590 }
1591
1592 n->addr = addr;
1593
1594 return 0;
1595 }
1596
1597 static void close_backend_channel(struct vhost_user *u)
1598 {
1599 g_source_destroy(u->backend_src);
1600 g_source_unref(u->backend_src);
1601 u->backend_src = NULL;
1602 object_unref(OBJECT(u->backend_ioc));
1603 u->backend_ioc = NULL;
1604 }
1605
1606 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition,
1607 gpointer opaque)
1608 {
1609 struct vhost_dev *dev = opaque;
1610 struct vhost_user *u = dev->opaque;
1611 VhostUserHeader hdr = { 0, };
1612 VhostUserPayload payload = { 0, };
1613 Error *local_err = NULL;
1614 gboolean rc = G_SOURCE_CONTINUE;
1615 int ret = 0;
1616 struct iovec iov;
1617 g_autofree int *fd = NULL;
1618 size_t fdsize = 0;
1619 int i;
1620
1621 /* Read header */
1622 iov.iov_base = &hdr;
1623 iov.iov_len = VHOST_USER_HDR_SIZE;
1624
1625 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1626 error_report_err(local_err);
1627 goto err;
1628 }
1629
1630 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1631 error_report("Failed to read msg header."
1632 " Size %d exceeds the maximum %zu.", hdr.size,
1633 VHOST_USER_PAYLOAD_SIZE);
1634 goto err;
1635 }
1636
1637 /* Read payload */
1638 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1639 error_report_err(local_err);
1640 goto err;
1641 }
1642
1643 switch (hdr.request) {
1644 case VHOST_USER_BACKEND_IOTLB_MSG:
1645 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1646 break;
1647 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
1648 ret = vhost_user_backend_handle_config_change(dev);
1649 break;
1650 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
1651 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area,
1652 fd ? fd[0] : -1);
1653 break;
1654 default:
1655 error_report("Received unexpected msg type: %d.", hdr.request);
1656 ret = -EINVAL;
1657 }
1658
1659 /*
1660 * REPLY_ACK feature handling. Other reply types has to be managed
1661 * directly in their request handlers.
1662 */
1663 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1664 struct iovec iovec[2];
1665
1666
1667 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1668 hdr.flags |= VHOST_USER_REPLY_MASK;
1669
1670 payload.u64 = !!ret;
1671 hdr.size = sizeof(payload.u64);
1672
1673 iovec[0].iov_base = &hdr;
1674 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1675 iovec[1].iov_base = &payload;
1676 iovec[1].iov_len = hdr.size;
1677
1678 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1679 error_report_err(local_err);
1680 goto err;
1681 }
1682 }
1683
1684 goto fdcleanup;
1685
1686 err:
1687 close_backend_channel(u);
1688 rc = G_SOURCE_REMOVE;
1689
1690 fdcleanup:
1691 if (fd) {
1692 for (i = 0; i < fdsize; i++) {
1693 close(fd[i]);
1694 }
1695 }
1696 return rc;
1697 }
1698
1699 static int vhost_setup_backend_channel(struct vhost_dev *dev)
1700 {
1701 VhostUserMsg msg = {
1702 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
1703 .hdr.flags = VHOST_USER_VERSION,
1704 };
1705 struct vhost_user *u = dev->opaque;
1706 int sv[2], ret = 0;
1707 bool reply_supported = virtio_has_feature(dev->protocol_features,
1708 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1709 Error *local_err = NULL;
1710 QIOChannel *ioc;
1711
1712 if (!virtio_has_feature(dev->protocol_features,
1713 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
1714 return 0;
1715 }
1716
1717 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1718 int saved_errno = errno;
1719 error_report("socketpair() failed");
1720 return -saved_errno;
1721 }
1722
1723 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1724 if (!ioc) {
1725 error_report_err(local_err);
1726 return -ECONNREFUSED;
1727 }
1728 u->backend_ioc = ioc;
1729 u->backend_src = qio_channel_add_watch_source(u->backend_ioc,
1730 G_IO_IN | G_IO_HUP,
1731 backend_read, dev, NULL, NULL);
1732
1733 if (reply_supported) {
1734 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1735 }
1736
1737 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1738 if (ret) {
1739 goto out;
1740 }
1741
1742 if (reply_supported) {
1743 ret = process_message_reply(dev, &msg);
1744 }
1745
1746 out:
1747 close(sv[1]);
1748 if (ret) {
1749 close_backend_channel(u);
1750 }
1751
1752 return ret;
1753 }
1754
1755 #ifdef CONFIG_LINUX
1756 /*
1757 * Called back from the postcopy fault thread when a fault is received on our
1758 * ufd.
1759 * TODO: This is Linux specific
1760 */
1761 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1762 void *ufd)
1763 {
1764 struct vhost_dev *dev = pcfd->data;
1765 struct vhost_user *u = dev->opaque;
1766 struct uffd_msg *msg = ufd;
1767 uint64_t faultaddr = msg->arg.pagefault.address;
1768 RAMBlock *rb = NULL;
1769 uint64_t rb_offset;
1770 int i;
1771
1772 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1773 dev->mem->nregions);
1774 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1775 trace_vhost_user_postcopy_fault_handler_loop(i,
1776 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1777 if (faultaddr >= u->postcopy_client_bases[i]) {
1778 /* Ofset of the fault address in the vhost region */
1779 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1780 if (region_offset < dev->mem->regions[i].memory_size) {
1781 rb_offset = region_offset + u->region_rb_offset[i];
1782 trace_vhost_user_postcopy_fault_handler_found(i,
1783 region_offset, rb_offset);
1784 rb = u->region_rb[i];
1785 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1786 rb_offset);
1787 }
1788 }
1789 }
1790 error_report("%s: Failed to find region for fault %" PRIx64,
1791 __func__, faultaddr);
1792 return -1;
1793 }
1794
1795 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1796 uint64_t offset)
1797 {
1798 struct vhost_dev *dev = pcfd->data;
1799 struct vhost_user *u = dev->opaque;
1800 int i;
1801
1802 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1803
1804 if (!u) {
1805 return 0;
1806 }
1807 /* Translate the offset into an address in the clients address space */
1808 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1809 if (u->region_rb[i] == rb &&
1810 offset >= u->region_rb_offset[i] &&
1811 offset < (u->region_rb_offset[i] +
1812 dev->mem->regions[i].memory_size)) {
1813 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1814 u->postcopy_client_bases[i];
1815 trace_vhost_user_postcopy_waker_found(client_addr);
1816 return postcopy_wake_shared(pcfd, client_addr, rb);
1817 }
1818 }
1819
1820 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1821 return 0;
1822 }
1823 #endif
1824
1825 /*
1826 * Called at the start of an inbound postcopy on reception of the
1827 * 'advise' command.
1828 */
1829 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1830 {
1831 #ifdef CONFIG_LINUX
1832 struct vhost_user *u = dev->opaque;
1833 CharBackend *chr = u->user->chr;
1834 int ufd;
1835 int ret;
1836 VhostUserMsg msg = {
1837 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1838 .hdr.flags = VHOST_USER_VERSION,
1839 };
1840
1841 ret = vhost_user_write(dev, &msg, NULL, 0);
1842 if (ret < 0) {
1843 error_setg(errp, "Failed to send postcopy_advise to vhost");
1844 return ret;
1845 }
1846
1847 ret = vhost_user_read(dev, &msg);
1848 if (ret < 0) {
1849 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1850 return ret;
1851 }
1852
1853 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1854 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1855 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1856 return -EPROTO;
1857 }
1858
1859 if (msg.hdr.size) {
1860 error_setg(errp, "Received bad msg size.");
1861 return -EPROTO;
1862 }
1863 ufd = qemu_chr_fe_get_msgfd(chr);
1864 if (ufd < 0) {
1865 error_setg(errp, "%s: Failed to get ufd", __func__);
1866 return -EIO;
1867 }
1868 qemu_socket_set_nonblock(ufd);
1869
1870 /* register ufd with userfault thread */
1871 u->postcopy_fd.fd = ufd;
1872 u->postcopy_fd.data = dev;
1873 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1874 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1875 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1876 postcopy_register_shared_ufd(&u->postcopy_fd);
1877 return 0;
1878 #else
1879 error_setg(errp, "Postcopy not supported on non-Linux systems");
1880 return -ENOSYS;
1881 #endif
1882 }
1883
1884 /*
1885 * Called at the switch to postcopy on reception of the 'listen' command.
1886 */
1887 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1888 {
1889 struct vhost_user *u = dev->opaque;
1890 int ret;
1891 VhostUserMsg msg = {
1892 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1893 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1894 };
1895 u->postcopy_listen = true;
1896
1897 trace_vhost_user_postcopy_listen();
1898
1899 ret = vhost_user_write(dev, &msg, NULL, 0);
1900 if (ret < 0) {
1901 error_setg(errp, "Failed to send postcopy_listen to vhost");
1902 return ret;
1903 }
1904
1905 ret = process_message_reply(dev, &msg);
1906 if (ret) {
1907 error_setg(errp, "Failed to receive reply to postcopy_listen");
1908 return ret;
1909 }
1910
1911 return 0;
1912 }
1913
1914 /*
1915 * Called at the end of postcopy
1916 */
1917 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1918 {
1919 VhostUserMsg msg = {
1920 .hdr.request = VHOST_USER_POSTCOPY_END,
1921 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1922 };
1923 int ret;
1924 struct vhost_user *u = dev->opaque;
1925
1926 trace_vhost_user_postcopy_end_entry();
1927
1928 ret = vhost_user_write(dev, &msg, NULL, 0);
1929 if (ret < 0) {
1930 error_setg(errp, "Failed to send postcopy_end to vhost");
1931 return ret;
1932 }
1933
1934 ret = process_message_reply(dev, &msg);
1935 if (ret) {
1936 error_setg(errp, "Failed to receive reply to postcopy_end");
1937 return ret;
1938 }
1939 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1940 close(u->postcopy_fd.fd);
1941 u->postcopy_fd.handler = NULL;
1942
1943 trace_vhost_user_postcopy_end_exit();
1944
1945 return 0;
1946 }
1947
1948 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1949 void *opaque)
1950 {
1951 struct PostcopyNotifyData *pnd = opaque;
1952 struct vhost_user *u = container_of(notifier, struct vhost_user,
1953 postcopy_notifier);
1954 struct vhost_dev *dev = u->dev;
1955
1956 switch (pnd->reason) {
1957 case POSTCOPY_NOTIFY_PROBE:
1958 if (!virtio_has_feature(dev->protocol_features,
1959 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1960 /* TODO: Get the device name into this error somehow */
1961 error_setg(pnd->errp,
1962 "vhost-user backend not capable of postcopy");
1963 return -ENOENT;
1964 }
1965 break;
1966
1967 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1968 return vhost_user_postcopy_advise(dev, pnd->errp);
1969
1970 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1971 return vhost_user_postcopy_listen(dev, pnd->errp);
1972
1973 case POSTCOPY_NOTIFY_INBOUND_END:
1974 return vhost_user_postcopy_end(dev, pnd->errp);
1975
1976 default:
1977 /* We ignore notifications we don't know */
1978 break;
1979 }
1980
1981 return 0;
1982 }
1983
1984 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1985 Error **errp)
1986 {
1987 uint64_t features, ram_slots;
1988 struct vhost_user *u;
1989 VhostUserState *vus = (VhostUserState *) opaque;
1990 int err;
1991
1992 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1993
1994 u = g_new0(struct vhost_user, 1);
1995 u->user = vus;
1996 u->dev = dev;
1997 dev->opaque = u;
1998
1999 err = vhost_user_get_features(dev, &features);
2000 if (err < 0) {
2001 error_setg_errno(errp, -err, "vhost_backend_init failed");
2002 return err;
2003 }
2004
2005 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
2006 bool supports_f_config = vus->supports_config ||
2007 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
2008 uint64_t protocol_features;
2009
2010 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
2011
2012 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
2013 &protocol_features);
2014 if (err < 0) {
2015 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2016 return -EPROTO;
2017 }
2018
2019 /*
2020 * We will use all the protocol features we support - although
2021 * we suppress F_CONFIG if we know QEMUs internal code can not support
2022 * it.
2023 */
2024 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
2025
2026 if (supports_f_config) {
2027 if (!virtio_has_feature(protocol_features,
2028 VHOST_USER_PROTOCOL_F_CONFIG)) {
2029 error_setg(errp, "vhost-user device expecting "
2030 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
2031 "not support it.");
2032 return -EPROTO;
2033 }
2034 } else {
2035 if (virtio_has_feature(protocol_features,
2036 VHOST_USER_PROTOCOL_F_CONFIG)) {
2037 warn_report("vhost-user backend supports "
2038 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
2039 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
2040 }
2041 }
2042
2043 /* final set of protocol features */
2044 dev->protocol_features = protocol_features;
2045 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
2046 if (err < 0) {
2047 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2048 return -EPROTO;
2049 }
2050
2051 /* query the max queues we support if backend supports Multiple Queue */
2052 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
2053 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2054 &dev->max_queues);
2055 if (err < 0) {
2056 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2057 return -EPROTO;
2058 }
2059 } else {
2060 dev->max_queues = 1;
2061 }
2062
2063 if (dev->num_queues && dev->max_queues < dev->num_queues) {
2064 error_setg(errp, "The maximum number of queues supported by the "
2065 "backend is %" PRIu64, dev->max_queues);
2066 return -EINVAL;
2067 }
2068
2069 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2070 !(virtio_has_feature(dev->protocol_features,
2071 VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
2072 virtio_has_feature(dev->protocol_features,
2073 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
2074 error_setg(errp, "IOMMU support requires reply-ack and "
2075 "backend-req protocol features.");
2076 return -EINVAL;
2077 }
2078
2079 /* get max memory regions if backend supports configurable RAM slots */
2080 if (!virtio_has_feature(dev->protocol_features,
2081 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
2082 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
2083 } else {
2084 err = vhost_user_get_max_memslots(dev, &ram_slots);
2085 if (err < 0) {
2086 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2087 return -EPROTO;
2088 }
2089
2090 if (ram_slots < u->user->memory_slots) {
2091 error_setg(errp, "The backend specified a max ram slots limit "
2092 "of %" PRIu64", when the prior validated limit was "
2093 "%d. This limit should never decrease.", ram_slots,
2094 u->user->memory_slots);
2095 return -EINVAL;
2096 }
2097
2098 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
2099 }
2100 }
2101
2102 if (dev->migration_blocker == NULL &&
2103 !virtio_has_feature(dev->protocol_features,
2104 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2105 error_setg(&dev->migration_blocker,
2106 "Migration disabled: vhost-user backend lacks "
2107 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2108 }
2109
2110 if (dev->vq_index == 0) {
2111 err = vhost_setup_backend_channel(dev);
2112 if (err < 0) {
2113 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2114 return -EPROTO;
2115 }
2116 }
2117
2118 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2119 postcopy_add_notifier(&u->postcopy_notifier);
2120
2121 return 0;
2122 }
2123
2124 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
2125 {
2126 struct vhost_user *u;
2127
2128 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2129
2130 u = dev->opaque;
2131 if (u->postcopy_notifier.notify) {
2132 postcopy_remove_notifier(&u->postcopy_notifier);
2133 u->postcopy_notifier.notify = NULL;
2134 }
2135 u->postcopy_listen = false;
2136 if (u->postcopy_fd.handler) {
2137 postcopy_unregister_shared_ufd(&u->postcopy_fd);
2138 close(u->postcopy_fd.fd);
2139 u->postcopy_fd.handler = NULL;
2140 }
2141 if (u->backend_ioc) {
2142 close_backend_channel(u);
2143 }
2144 g_free(u->region_rb);
2145 u->region_rb = NULL;
2146 g_free(u->region_rb_offset);
2147 u->region_rb_offset = NULL;
2148 u->region_rb_len = 0;
2149 g_free(u);
2150 dev->opaque = 0;
2151
2152 return 0;
2153 }
2154
2155 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2156 {
2157 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2158
2159 return idx;
2160 }
2161
2162 static int vhost_user_memslots_limit(struct vhost_dev *dev)
2163 {
2164 struct vhost_user *u = dev->opaque;
2165
2166 return u->user->memory_slots;
2167 }
2168
2169 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2170 {
2171 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2172
2173 return virtio_has_feature(dev->protocol_features,
2174 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2175 }
2176
2177 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2178 {
2179 VhostUserMsg msg = { };
2180
2181 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2182
2183 /* If guest supports GUEST_ANNOUNCE do nothing */
2184 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2185 return 0;
2186 }
2187
2188 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2189 if (virtio_has_feature(dev->protocol_features,
2190 VHOST_USER_PROTOCOL_F_RARP)) {
2191 msg.hdr.request = VHOST_USER_SEND_RARP;
2192 msg.hdr.flags = VHOST_USER_VERSION;
2193 memcpy((char *)&msg.payload.u64, mac_addr, 6);
2194 msg.hdr.size = sizeof(msg.payload.u64);
2195
2196 return vhost_user_write(dev, &msg, NULL, 0);
2197 }
2198 return -ENOTSUP;
2199 }
2200
2201 static bool vhost_user_can_merge(struct vhost_dev *dev,
2202 uint64_t start1, uint64_t size1,
2203 uint64_t start2, uint64_t size2)
2204 {
2205 ram_addr_t offset;
2206 int mfd, rfd;
2207
2208 (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2209 (void)vhost_user_get_mr_data(start2, &offset, &rfd);
2210
2211 return mfd == rfd;
2212 }
2213
2214 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2215 {
2216 VhostUserMsg msg;
2217 bool reply_supported = virtio_has_feature(dev->protocol_features,
2218 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2219 int ret;
2220
2221 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2222 return 0;
2223 }
2224
2225 msg.hdr.request = VHOST_USER_NET_SET_MTU;
2226 msg.payload.u64 = mtu;
2227 msg.hdr.size = sizeof(msg.payload.u64);
2228 msg.hdr.flags = VHOST_USER_VERSION;
2229 if (reply_supported) {
2230 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2231 }
2232
2233 ret = vhost_user_write(dev, &msg, NULL, 0);
2234 if (ret < 0) {
2235 return ret;
2236 }
2237
2238 /* If reply_ack supported, backend has to ack specified MTU is valid */
2239 if (reply_supported) {
2240 return process_message_reply(dev, &msg);
2241 }
2242
2243 return 0;
2244 }
2245
2246 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2247 struct vhost_iotlb_msg *imsg)
2248 {
2249 int ret;
2250 VhostUserMsg msg = {
2251 .hdr.request = VHOST_USER_IOTLB_MSG,
2252 .hdr.size = sizeof(msg.payload.iotlb),
2253 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2254 .payload.iotlb = *imsg,
2255 };
2256
2257 ret = vhost_user_write(dev, &msg, NULL, 0);
2258 if (ret < 0) {
2259 return ret;
2260 }
2261
2262 return process_message_reply(dev, &msg);
2263 }
2264
2265
2266 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2267 {
2268 /* No-op as the receive channel is not dedicated to IOTLB messages. */
2269 }
2270
2271 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2272 uint32_t config_len, Error **errp)
2273 {
2274 int ret;
2275 VhostUserMsg msg = {
2276 .hdr.request = VHOST_USER_GET_CONFIG,
2277 .hdr.flags = VHOST_USER_VERSION,
2278 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2279 };
2280
2281 if (!virtio_has_feature(dev->protocol_features,
2282 VHOST_USER_PROTOCOL_F_CONFIG)) {
2283 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2284 return -EINVAL;
2285 }
2286
2287 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
2288
2289 msg.payload.config.offset = 0;
2290 msg.payload.config.size = config_len;
2291 ret = vhost_user_write(dev, &msg, NULL, 0);
2292 if (ret < 0) {
2293 error_setg_errno(errp, -ret, "vhost_get_config failed");
2294 return ret;
2295 }
2296
2297 ret = vhost_user_read(dev, &msg);
2298 if (ret < 0) {
2299 error_setg_errno(errp, -ret, "vhost_get_config failed");
2300 return ret;
2301 }
2302
2303 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2304 error_setg(errp,
2305 "Received unexpected msg type. Expected %d received %d",
2306 VHOST_USER_GET_CONFIG, msg.hdr.request);
2307 return -EPROTO;
2308 }
2309
2310 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2311 error_setg(errp, "Received bad msg size.");
2312 return -EPROTO;
2313 }
2314
2315 memcpy(config, msg.payload.config.region, config_len);
2316
2317 return 0;
2318 }
2319
2320 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2321 uint32_t offset, uint32_t size, uint32_t flags)
2322 {
2323 int ret;
2324 uint8_t *p;
2325 bool reply_supported = virtio_has_feature(dev->protocol_features,
2326 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2327
2328 VhostUserMsg msg = {
2329 .hdr.request = VHOST_USER_SET_CONFIG,
2330 .hdr.flags = VHOST_USER_VERSION,
2331 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2332 };
2333
2334 if (!virtio_has_feature(dev->protocol_features,
2335 VHOST_USER_PROTOCOL_F_CONFIG)) {
2336 return -ENOTSUP;
2337 }
2338
2339 if (reply_supported) {
2340 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2341 }
2342
2343 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2344 return -EINVAL;
2345 }
2346
2347 msg.payload.config.offset = offset,
2348 msg.payload.config.size = size,
2349 msg.payload.config.flags = flags,
2350 p = msg.payload.config.region;
2351 memcpy(p, data, size);
2352
2353 ret = vhost_user_write(dev, &msg, NULL, 0);
2354 if (ret < 0) {
2355 return ret;
2356 }
2357
2358 if (reply_supported) {
2359 return process_message_reply(dev, &msg);
2360 }
2361
2362 return 0;
2363 }
2364
2365 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2366 void *session_info,
2367 uint64_t *session_id)
2368 {
2369 int ret;
2370 bool crypto_session = virtio_has_feature(dev->protocol_features,
2371 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2372 CryptoDevBackendSessionInfo *backend_info = session_info;
2373 VhostUserMsg msg = {
2374 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2375 .hdr.flags = VHOST_USER_VERSION,
2376 .hdr.size = sizeof(msg.payload.session),
2377 };
2378
2379 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2380
2381 if (!crypto_session) {
2382 error_report("vhost-user trying to send unhandled ioctl");
2383 return -ENOTSUP;
2384 }
2385
2386 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) {
2387 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info;
2388 size_t keylen;
2389
2390 memcpy(&msg.payload.session.u.asym.session_setup_data, sess,
2391 sizeof(CryptoDevBackendAsymSessionInfo));
2392 if (sess->keylen) {
2393 keylen = sizeof(msg.payload.session.u.asym.key);
2394 if (sess->keylen > keylen) {
2395 error_report("Unsupported asymmetric key size");
2396 return -ENOTSUP;
2397 }
2398
2399 memcpy(&msg.payload.session.u.asym.key, sess->key,
2400 sess->keylen);
2401 }
2402 } else {
2403 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info;
2404 size_t keylen;
2405
2406 memcpy(&msg.payload.session.u.sym.session_setup_data, sess,
2407 sizeof(CryptoDevBackendSymSessionInfo));
2408 if (sess->key_len) {
2409 keylen = sizeof(msg.payload.session.u.sym.key);
2410 if (sess->key_len > keylen) {
2411 error_report("Unsupported cipher key size");
2412 return -ENOTSUP;
2413 }
2414
2415 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key,
2416 sess->key_len);
2417 }
2418
2419 if (sess->auth_key_len > 0) {
2420 keylen = sizeof(msg.payload.session.u.sym.auth_key);
2421 if (sess->auth_key_len > keylen) {
2422 error_report("Unsupported auth key size");
2423 return -ENOTSUP;
2424 }
2425
2426 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key,
2427 sess->auth_key_len);
2428 }
2429 }
2430
2431 msg.payload.session.op_code = backend_info->op_code;
2432 msg.payload.session.session_id = backend_info->session_id;
2433 ret = vhost_user_write(dev, &msg, NULL, 0);
2434 if (ret < 0) {
2435 error_report("vhost_user_write() return %d, create session failed",
2436 ret);
2437 return ret;
2438 }
2439
2440 ret = vhost_user_read(dev, &msg);
2441 if (ret < 0) {
2442 error_report("vhost_user_read() return %d, create session failed",
2443 ret);
2444 return ret;
2445 }
2446
2447 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2448 error_report("Received unexpected msg type. Expected %d received %d",
2449 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2450 return -EPROTO;
2451 }
2452
2453 if (msg.hdr.size != sizeof(msg.payload.session)) {
2454 error_report("Received bad msg size.");
2455 return -EPROTO;
2456 }
2457
2458 if (msg.payload.session.session_id < 0) {
2459 error_report("Bad session id: %" PRId64 "",
2460 msg.payload.session.session_id);
2461 return -EINVAL;
2462 }
2463 *session_id = msg.payload.session.session_id;
2464
2465 return 0;
2466 }
2467
2468 static int
2469 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2470 {
2471 int ret;
2472 bool crypto_session = virtio_has_feature(dev->protocol_features,
2473 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2474 VhostUserMsg msg = {
2475 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2476 .hdr.flags = VHOST_USER_VERSION,
2477 .hdr.size = sizeof(msg.payload.u64),
2478 };
2479 msg.payload.u64 = session_id;
2480
2481 if (!crypto_session) {
2482 error_report("vhost-user trying to send unhandled ioctl");
2483 return -ENOTSUP;
2484 }
2485
2486 ret = vhost_user_write(dev, &msg, NULL, 0);
2487 if (ret < 0) {
2488 error_report("vhost_user_write() return %d, close session failed",
2489 ret);
2490 return ret;
2491 }
2492
2493 return 0;
2494 }
2495
2496 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2497 MemoryRegionSection *section)
2498 {
2499 return memory_region_get_fd(section->mr) >= 0;
2500 }
2501
2502 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2503 uint16_t queue_size,
2504 struct vhost_inflight *inflight)
2505 {
2506 void *addr;
2507 int fd;
2508 int ret;
2509 struct vhost_user *u = dev->opaque;
2510 CharBackend *chr = u->user->chr;
2511 VhostUserMsg msg = {
2512 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2513 .hdr.flags = VHOST_USER_VERSION,
2514 .payload.inflight.num_queues = dev->nvqs,
2515 .payload.inflight.queue_size = queue_size,
2516 .hdr.size = sizeof(msg.payload.inflight),
2517 };
2518
2519 if (!virtio_has_feature(dev->protocol_features,
2520 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2521 return 0;
2522 }
2523
2524 ret = vhost_user_write(dev, &msg, NULL, 0);
2525 if (ret < 0) {
2526 return ret;
2527 }
2528
2529 ret = vhost_user_read(dev, &msg);
2530 if (ret < 0) {
2531 return ret;
2532 }
2533
2534 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2535 error_report("Received unexpected msg type. "
2536 "Expected %d received %d",
2537 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2538 return -EPROTO;
2539 }
2540
2541 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2542 error_report("Received bad msg size.");
2543 return -EPROTO;
2544 }
2545
2546 if (!msg.payload.inflight.mmap_size) {
2547 return 0;
2548 }
2549
2550 fd = qemu_chr_fe_get_msgfd(chr);
2551 if (fd < 0) {
2552 error_report("Failed to get mem fd");
2553 return -EIO;
2554 }
2555
2556 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2557 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2558
2559 if (addr == MAP_FAILED) {
2560 error_report("Failed to mmap mem fd");
2561 close(fd);
2562 return -EFAULT;
2563 }
2564
2565 inflight->addr = addr;
2566 inflight->fd = fd;
2567 inflight->size = msg.payload.inflight.mmap_size;
2568 inflight->offset = msg.payload.inflight.mmap_offset;
2569 inflight->queue_size = queue_size;
2570
2571 return 0;
2572 }
2573
2574 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2575 struct vhost_inflight *inflight)
2576 {
2577 VhostUserMsg msg = {
2578 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2579 .hdr.flags = VHOST_USER_VERSION,
2580 .payload.inflight.mmap_size = inflight->size,
2581 .payload.inflight.mmap_offset = inflight->offset,
2582 .payload.inflight.num_queues = dev->nvqs,
2583 .payload.inflight.queue_size = inflight->queue_size,
2584 .hdr.size = sizeof(msg.payload.inflight),
2585 };
2586
2587 if (!virtio_has_feature(dev->protocol_features,
2588 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2589 return 0;
2590 }
2591
2592 return vhost_user_write(dev, &msg, &inflight->fd, 1);
2593 }
2594
2595 static void vhost_user_state_destroy(gpointer data)
2596 {
2597 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
2598 if (n) {
2599 vhost_user_host_notifier_remove(n, NULL);
2600 object_unparent(OBJECT(&n->mr));
2601 /*
2602 * We can't free until vhost_user_host_notifier_remove has
2603 * done it's thing so schedule the free with RCU.
2604 */
2605 g_free_rcu(n, rcu);
2606 }
2607 }
2608
2609 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2610 {
2611 if (user->chr) {
2612 error_setg(errp, "Cannot initialize vhost-user state");
2613 return false;
2614 }
2615 user->chr = chr;
2616 user->memory_slots = 0;
2617 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
2618 &vhost_user_state_destroy);
2619 return true;
2620 }
2621
2622 void vhost_user_cleanup(VhostUserState *user)
2623 {
2624 if (!user->chr) {
2625 return;
2626 }
2627 memory_region_transaction_begin();
2628 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
2629 memory_region_transaction_commit();
2630 user->chr = NULL;
2631 }
2632
2633
2634 typedef struct {
2635 vu_async_close_fn cb;
2636 DeviceState *dev;
2637 CharBackend *cd;
2638 struct vhost_dev *vhost;
2639 } VhostAsyncCallback;
2640
2641 static void vhost_user_async_close_bh(void *opaque)
2642 {
2643 VhostAsyncCallback *data = opaque;
2644 struct vhost_dev *vhost = data->vhost;
2645
2646 /*
2647 * If the vhost_dev has been cleared in the meantime there is
2648 * nothing left to do as some other path has completed the
2649 * cleanup.
2650 */
2651 if (vhost->vdev) {
2652 data->cb(data->dev);
2653 }
2654
2655 g_free(data);
2656 }
2657
2658 /*
2659 * We only schedule the work if the machine is running. If suspended
2660 * we want to keep all the in-flight data as is for migration
2661 * purposes.
2662 */
2663 void vhost_user_async_close(DeviceState *d,
2664 CharBackend *chardev, struct vhost_dev *vhost,
2665 vu_async_close_fn cb)
2666 {
2667 if (!runstate_check(RUN_STATE_SHUTDOWN)) {
2668 /*
2669 * A close event may happen during a read/write, but vhost
2670 * code assumes the vhost_dev remains setup, so delay the
2671 * stop & clear.
2672 */
2673 AioContext *ctx = qemu_get_current_aio_context();
2674 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
2675
2676 /* Save data for the callback */
2677 data->cb = cb;
2678 data->dev = d;
2679 data->cd = chardev;
2680 data->vhost = vhost;
2681
2682 /* Disable any further notifications on the chardev */
2683 qemu_chr_fe_set_handlers(chardev,
2684 NULL, NULL, NULL, NULL, NULL, NULL,
2685 false);
2686
2687 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
2688
2689 /*
2690 * Move vhost device to the stopped state. The vhost-user device
2691 * will be clean up and disconnected in BH. This can be useful in
2692 * the vhost migration code. If disconnect was caught there is an
2693 * option for the general vhost code to get the dev state without
2694 * knowing its type (in this case vhost-user).
2695 *
2696 * Note if the vhost device is fully cleared by the time we
2697 * execute the bottom half we won't continue with the cleanup.
2698 */
2699 vhost->started = false;
2700 }
2701 }
2702
2703 static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
2704 {
2705 if (!virtio_has_feature(dev->protocol_features,
2706 VHOST_USER_PROTOCOL_F_STATUS)) {
2707 return 0;
2708 }
2709
2710 /* Set device status only for last queue pair */
2711 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2712 return 0;
2713 }
2714
2715 if (started) {
2716 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
2717 VIRTIO_CONFIG_S_DRIVER |
2718 VIRTIO_CONFIG_S_DRIVER_OK);
2719 } else {
2720 return 0;
2721 }
2722 }
2723
2724 static void vhost_user_reset_status(struct vhost_dev *dev)
2725 {
2726 /* Set device status only for last queue pair */
2727 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2728 return;
2729 }
2730
2731 if (virtio_has_feature(dev->protocol_features,
2732 VHOST_USER_PROTOCOL_F_STATUS)) {
2733 vhost_user_set_status(dev, 0);
2734 }
2735 }
2736
2737 const VhostOps user_ops = {
2738 .backend_type = VHOST_BACKEND_TYPE_USER,
2739 .vhost_backend_init = vhost_user_backend_init,
2740 .vhost_backend_cleanup = vhost_user_backend_cleanup,
2741 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2742 .vhost_set_log_base = vhost_user_set_log_base,
2743 .vhost_set_mem_table = vhost_user_set_mem_table,
2744 .vhost_set_vring_addr = vhost_user_set_vring_addr,
2745 .vhost_set_vring_endian = vhost_user_set_vring_endian,
2746 .vhost_set_vring_num = vhost_user_set_vring_num,
2747 .vhost_set_vring_base = vhost_user_set_vring_base,
2748 .vhost_get_vring_base = vhost_user_get_vring_base,
2749 .vhost_set_vring_kick = vhost_user_set_vring_kick,
2750 .vhost_set_vring_call = vhost_user_set_vring_call,
2751 .vhost_set_vring_err = vhost_user_set_vring_err,
2752 .vhost_set_features = vhost_user_set_features,
2753 .vhost_get_features = vhost_user_get_features,
2754 .vhost_set_owner = vhost_user_set_owner,
2755 .vhost_reset_device = vhost_user_reset_device,
2756 .vhost_get_vq_index = vhost_user_get_vq_index,
2757 .vhost_set_vring_enable = vhost_user_set_vring_enable,
2758 .vhost_requires_shm_log = vhost_user_requires_shm_log,
2759 .vhost_migration_done = vhost_user_migration_done,
2760 .vhost_backend_can_merge = vhost_user_can_merge,
2761 .vhost_net_set_mtu = vhost_user_net_set_mtu,
2762 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2763 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2764 .vhost_get_config = vhost_user_get_config,
2765 .vhost_set_config = vhost_user_set_config,
2766 .vhost_crypto_create_session = vhost_user_crypto_create_session,
2767 .vhost_crypto_close_session = vhost_user_crypto_close_session,
2768 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2769 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2770 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2771 .vhost_dev_start = vhost_user_dev_start,
2772 .vhost_reset_status = vhost_user_reset_status,
2773 };