]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/vhost-user.c
Merge tag 'pull-qapi-2023-07-10' of https://repo.or.cz/qemu/armbru into staging
[mirror_qemu.git] / hw / virtio / vhost-user.c
1 /*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/virtio-crypto.h"
15 #include "hw/virtio/vhost-user.h"
16 #include "hw/virtio/vhost-backend.h"
17 #include "hw/virtio/virtio.h"
18 #include "hw/virtio/virtio-net.h"
19 #include "chardev/char-fe.h"
20 #include "io/channel-socket.h"
21 #include "sysemu/kvm.h"
22 #include "qemu/error-report.h"
23 #include "qemu/main-loop.h"
24 #include "qemu/sockets.h"
25 #include "sysemu/runstate.h"
26 #include "sysemu/cryptodev.h"
27 #include "migration/migration.h"
28 #include "migration/postcopy-ram.h"
29 #include "trace.h"
30 #include "exec/ramblock.h"
31
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <sys/un.h>
35
36 #include "standard-headers/linux/vhost_types.h"
37
38 #ifdef CONFIG_LINUX
39 #include <linux/userfaultfd.h>
40 #endif
41
42 #define VHOST_MEMORY_BASELINE_NREGIONS 8
43 #define VHOST_USER_F_PROTOCOL_FEATURES 30
44 #define VHOST_USER_BACKEND_MAX_FDS 8
45
46 #if defined(TARGET_PPC) || defined(TARGET_PPC64)
47 #include "hw/ppc/spapr.h"
48 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
49
50 #else
51 #define VHOST_USER_MAX_RAM_SLOTS 512
52 #endif
53
54 /*
55 * Maximum size of virtio device config space
56 */
57 #define VHOST_USER_MAX_CONFIG_SIZE 256
58
59 enum VhostUserProtocolFeature {
60 VHOST_USER_PROTOCOL_F_MQ = 0,
61 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
62 VHOST_USER_PROTOCOL_F_RARP = 2,
63 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
64 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
65 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
66 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
67 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
68 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
69 VHOST_USER_PROTOCOL_F_CONFIG = 9,
70 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
71 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
72 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
73 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
74 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
75 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
76 VHOST_USER_PROTOCOL_F_STATUS = 16,
77 VHOST_USER_PROTOCOL_F_MAX
78 };
79
80 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
81
82 typedef enum VhostUserRequest {
83 VHOST_USER_NONE = 0,
84 VHOST_USER_GET_FEATURES = 1,
85 VHOST_USER_SET_FEATURES = 2,
86 VHOST_USER_SET_OWNER = 3,
87 VHOST_USER_RESET_OWNER = 4,
88 VHOST_USER_SET_MEM_TABLE = 5,
89 VHOST_USER_SET_LOG_BASE = 6,
90 VHOST_USER_SET_LOG_FD = 7,
91 VHOST_USER_SET_VRING_NUM = 8,
92 VHOST_USER_SET_VRING_ADDR = 9,
93 VHOST_USER_SET_VRING_BASE = 10,
94 VHOST_USER_GET_VRING_BASE = 11,
95 VHOST_USER_SET_VRING_KICK = 12,
96 VHOST_USER_SET_VRING_CALL = 13,
97 VHOST_USER_SET_VRING_ERR = 14,
98 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
99 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
100 VHOST_USER_GET_QUEUE_NUM = 17,
101 VHOST_USER_SET_VRING_ENABLE = 18,
102 VHOST_USER_SEND_RARP = 19,
103 VHOST_USER_NET_SET_MTU = 20,
104 VHOST_USER_SET_BACKEND_REQ_FD = 21,
105 VHOST_USER_IOTLB_MSG = 22,
106 VHOST_USER_SET_VRING_ENDIAN = 23,
107 VHOST_USER_GET_CONFIG = 24,
108 VHOST_USER_SET_CONFIG = 25,
109 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
110 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
111 VHOST_USER_POSTCOPY_ADVISE = 28,
112 VHOST_USER_POSTCOPY_LISTEN = 29,
113 VHOST_USER_POSTCOPY_END = 30,
114 VHOST_USER_GET_INFLIGHT_FD = 31,
115 VHOST_USER_SET_INFLIGHT_FD = 32,
116 VHOST_USER_GPU_SET_SOCKET = 33,
117 VHOST_USER_RESET_DEVICE = 34,
118 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
119 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
120 VHOST_USER_ADD_MEM_REG = 37,
121 VHOST_USER_REM_MEM_REG = 38,
122 VHOST_USER_SET_STATUS = 39,
123 VHOST_USER_GET_STATUS = 40,
124 VHOST_USER_MAX
125 } VhostUserRequest;
126
127 typedef enum VhostUserBackendRequest {
128 VHOST_USER_BACKEND_NONE = 0,
129 VHOST_USER_BACKEND_IOTLB_MSG = 1,
130 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
131 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
132 VHOST_USER_BACKEND_MAX
133 } VhostUserBackendRequest;
134
135 typedef struct VhostUserMemoryRegion {
136 uint64_t guest_phys_addr;
137 uint64_t memory_size;
138 uint64_t userspace_addr;
139 uint64_t mmap_offset;
140 } VhostUserMemoryRegion;
141
142 typedef struct VhostUserMemory {
143 uint32_t nregions;
144 uint32_t padding;
145 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
146 } VhostUserMemory;
147
148 typedef struct VhostUserMemRegMsg {
149 uint64_t padding;
150 VhostUserMemoryRegion region;
151 } VhostUserMemRegMsg;
152
153 typedef struct VhostUserLog {
154 uint64_t mmap_size;
155 uint64_t mmap_offset;
156 } VhostUserLog;
157
158 typedef struct VhostUserConfig {
159 uint32_t offset;
160 uint32_t size;
161 uint32_t flags;
162 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
163 } VhostUserConfig;
164
165 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
166 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
167 #define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024
168
169 typedef struct VhostUserCryptoSession {
170 uint64_t op_code;
171 union {
172 struct {
173 CryptoDevBackendSymSessionInfo session_setup_data;
174 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
175 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
176 } sym;
177 struct {
178 CryptoDevBackendAsymSessionInfo session_setup_data;
179 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN];
180 } asym;
181 } u;
182
183 /* session id for success, -1 on errors */
184 int64_t session_id;
185 } VhostUserCryptoSession;
186
187 static VhostUserConfig c __attribute__ ((unused));
188 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
189 + sizeof(c.size) \
190 + sizeof(c.flags))
191
192 typedef struct VhostUserVringArea {
193 uint64_t u64;
194 uint64_t size;
195 uint64_t offset;
196 } VhostUserVringArea;
197
198 typedef struct VhostUserInflight {
199 uint64_t mmap_size;
200 uint64_t mmap_offset;
201 uint16_t num_queues;
202 uint16_t queue_size;
203 } VhostUserInflight;
204
205 typedef struct {
206 VhostUserRequest request;
207
208 #define VHOST_USER_VERSION_MASK (0x3)
209 #define VHOST_USER_REPLY_MASK (0x1 << 2)
210 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
211 uint32_t flags;
212 uint32_t size; /* the following payload size */
213 } QEMU_PACKED VhostUserHeader;
214
215 typedef union {
216 #define VHOST_USER_VRING_IDX_MASK (0xff)
217 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
218 uint64_t u64;
219 struct vhost_vring_state state;
220 struct vhost_vring_addr addr;
221 VhostUserMemory memory;
222 VhostUserMemRegMsg mem_reg;
223 VhostUserLog log;
224 struct vhost_iotlb_msg iotlb;
225 VhostUserConfig config;
226 VhostUserCryptoSession session;
227 VhostUserVringArea area;
228 VhostUserInflight inflight;
229 } VhostUserPayload;
230
231 typedef struct VhostUserMsg {
232 VhostUserHeader hdr;
233 VhostUserPayload payload;
234 } QEMU_PACKED VhostUserMsg;
235
236 static VhostUserMsg m __attribute__ ((unused));
237 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
238
239 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
240
241 /* The version of the protocol we support */
242 #define VHOST_USER_VERSION (0x1)
243
244 struct vhost_user {
245 struct vhost_dev *dev;
246 /* Shared between vhost devs of the same virtio device */
247 VhostUserState *user;
248 QIOChannel *backend_ioc;
249 GSource *backend_src;
250 NotifierWithReturn postcopy_notifier;
251 struct PostCopyFD postcopy_fd;
252 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
253 /* Length of the region_rb and region_rb_offset arrays */
254 size_t region_rb_len;
255 /* RAMBlock associated with a given region */
256 RAMBlock **region_rb;
257 /*
258 * The offset from the start of the RAMBlock to the start of the
259 * vhost region.
260 */
261 ram_addr_t *region_rb_offset;
262
263 /* True once we've entered postcopy_listen */
264 bool postcopy_listen;
265
266 /* Our current regions */
267 int num_shadow_regions;
268 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
269 };
270
271 struct scrub_regions {
272 struct vhost_memory_region *region;
273 int reg_idx;
274 int fd_idx;
275 };
276
277 static bool ioeventfd_enabled(void)
278 {
279 return !kvm_enabled() || kvm_eventfds_enabled();
280 }
281
282 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
283 {
284 struct vhost_user *u = dev->opaque;
285 CharBackend *chr = u->user->chr;
286 uint8_t *p = (uint8_t *) msg;
287 int r, size = VHOST_USER_HDR_SIZE;
288
289 r = qemu_chr_fe_read_all(chr, p, size);
290 if (r != size) {
291 int saved_errno = errno;
292 error_report("Failed to read msg header. Read %d instead of %d."
293 " Original request %d.", r, size, msg->hdr.request);
294 return r < 0 ? -saved_errno : -EIO;
295 }
296
297 /* validate received flags */
298 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
299 error_report("Failed to read msg header."
300 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
301 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
302 return -EPROTO;
303 }
304
305 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
306
307 return 0;
308 }
309
310 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
311 {
312 struct vhost_user *u = dev->opaque;
313 CharBackend *chr = u->user->chr;
314 uint8_t *p = (uint8_t *) msg;
315 int r, size;
316
317 r = vhost_user_read_header(dev, msg);
318 if (r < 0) {
319 return r;
320 }
321
322 /* validate message size is sane */
323 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
324 error_report("Failed to read msg header."
325 " Size %d exceeds the maximum %zu.", msg->hdr.size,
326 VHOST_USER_PAYLOAD_SIZE);
327 return -EPROTO;
328 }
329
330 if (msg->hdr.size) {
331 p += VHOST_USER_HDR_SIZE;
332 size = msg->hdr.size;
333 r = qemu_chr_fe_read_all(chr, p, size);
334 if (r != size) {
335 int saved_errno = errno;
336 error_report("Failed to read msg payload."
337 " Read %d instead of %d.", r, msg->hdr.size);
338 return r < 0 ? -saved_errno : -EIO;
339 }
340 }
341
342 return 0;
343 }
344
345 static int process_message_reply(struct vhost_dev *dev,
346 const VhostUserMsg *msg)
347 {
348 int ret;
349 VhostUserMsg msg_reply;
350
351 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
352 return 0;
353 }
354
355 ret = vhost_user_read(dev, &msg_reply);
356 if (ret < 0) {
357 return ret;
358 }
359
360 if (msg_reply.hdr.request != msg->hdr.request) {
361 error_report("Received unexpected msg type. "
362 "Expected %d received %d",
363 msg->hdr.request, msg_reply.hdr.request);
364 return -EPROTO;
365 }
366
367 return msg_reply.payload.u64 ? -EIO : 0;
368 }
369
370 static bool vhost_user_per_device_request(VhostUserRequest request)
371 {
372 switch (request) {
373 case VHOST_USER_SET_OWNER:
374 case VHOST_USER_RESET_OWNER:
375 case VHOST_USER_SET_MEM_TABLE:
376 case VHOST_USER_GET_QUEUE_NUM:
377 case VHOST_USER_NET_SET_MTU:
378 case VHOST_USER_RESET_DEVICE:
379 case VHOST_USER_ADD_MEM_REG:
380 case VHOST_USER_REM_MEM_REG:
381 return true;
382 default:
383 return false;
384 }
385 }
386
387 /* most non-init callers ignore the error */
388 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
389 int *fds, int fd_num)
390 {
391 struct vhost_user *u = dev->opaque;
392 CharBackend *chr = u->user->chr;
393 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
394
395 /*
396 * Some devices, like virtio-scsi, are implemented as a single vhost_dev,
397 * while others, like virtio-net, contain multiple vhost_devs. For
398 * operations such as configuring device memory mappings or issuing device
399 * resets, which affect the whole device instead of individual VQs,
400 * vhost-user messages should only be sent once.
401 *
402 * Devices with multiple vhost_devs are given an associated dev->vq_index
403 * so per_device requests are only sent if vq_index is 0.
404 */
405 if (vhost_user_per_device_request(msg->hdr.request)
406 && dev->vq_index != 0) {
407 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
408 return 0;
409 }
410
411 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
412 error_report("Failed to set msg fds.");
413 return -EINVAL;
414 }
415
416 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
417 if (ret != size) {
418 int saved_errno = errno;
419 error_report("Failed to write msg."
420 " Wrote %d instead of %d.", ret, size);
421 return ret < 0 ? -saved_errno : -EIO;
422 }
423
424 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
425
426 return 0;
427 }
428
429 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
430 {
431 VhostUserMsg msg = {
432 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
433 .hdr.flags = VHOST_USER_VERSION,
434 };
435
436 return vhost_user_write(dev, &msg, &fd, 1);
437 }
438
439 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
440 struct vhost_log *log)
441 {
442 int fds[VHOST_USER_MAX_RAM_SLOTS];
443 size_t fd_num = 0;
444 bool shmfd = virtio_has_feature(dev->protocol_features,
445 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
446 int ret;
447 VhostUserMsg msg = {
448 .hdr.request = VHOST_USER_SET_LOG_BASE,
449 .hdr.flags = VHOST_USER_VERSION,
450 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
451 .payload.log.mmap_offset = 0,
452 .hdr.size = sizeof(msg.payload.log),
453 };
454
455 /* Send only once with first queue pair */
456 if (dev->vq_index != 0) {
457 return 0;
458 }
459
460 if (shmfd && log->fd != -1) {
461 fds[fd_num++] = log->fd;
462 }
463
464 ret = vhost_user_write(dev, &msg, fds, fd_num);
465 if (ret < 0) {
466 return ret;
467 }
468
469 if (shmfd) {
470 msg.hdr.size = 0;
471 ret = vhost_user_read(dev, &msg);
472 if (ret < 0) {
473 return ret;
474 }
475
476 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
477 error_report("Received unexpected msg type. "
478 "Expected %d received %d",
479 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
480 return -EPROTO;
481 }
482 }
483
484 return 0;
485 }
486
487 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
488 int *fd)
489 {
490 MemoryRegion *mr;
491
492 assert((uintptr_t)addr == addr);
493 mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
494 *fd = memory_region_get_fd(mr);
495 *offset += mr->ram_block->fd_offset;
496
497 return mr;
498 }
499
500 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
501 struct vhost_memory_region *src,
502 uint64_t mmap_offset)
503 {
504 assert(src != NULL && dst != NULL);
505 dst->userspace_addr = src->userspace_addr;
506 dst->memory_size = src->memory_size;
507 dst->guest_phys_addr = src->guest_phys_addr;
508 dst->mmap_offset = mmap_offset;
509 }
510
511 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
512 struct vhost_dev *dev,
513 VhostUserMsg *msg,
514 int *fds, size_t *fd_num,
515 bool track_ramblocks)
516 {
517 int i, fd;
518 ram_addr_t offset;
519 MemoryRegion *mr;
520 struct vhost_memory_region *reg;
521 VhostUserMemoryRegion region_buffer;
522
523 msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
524
525 for (i = 0; i < dev->mem->nregions; ++i) {
526 reg = dev->mem->regions + i;
527
528 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
529 if (fd > 0) {
530 if (track_ramblocks) {
531 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
532 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
533 reg->memory_size,
534 reg->guest_phys_addr,
535 reg->userspace_addr,
536 offset);
537 u->region_rb_offset[i] = offset;
538 u->region_rb[i] = mr->ram_block;
539 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
540 error_report("Failed preparing vhost-user memory table msg");
541 return -ENOBUFS;
542 }
543 vhost_user_fill_msg_region(&region_buffer, reg, offset);
544 msg->payload.memory.regions[*fd_num] = region_buffer;
545 fds[(*fd_num)++] = fd;
546 } else if (track_ramblocks) {
547 u->region_rb_offset[i] = 0;
548 u->region_rb[i] = NULL;
549 }
550 }
551
552 msg->payload.memory.nregions = *fd_num;
553
554 if (!*fd_num) {
555 error_report("Failed initializing vhost-user memory map, "
556 "consider using -object memory-backend-file share=on");
557 return -EINVAL;
558 }
559
560 msg->hdr.size = sizeof(msg->payload.memory.nregions);
561 msg->hdr.size += sizeof(msg->payload.memory.padding);
562 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
563
564 return 0;
565 }
566
567 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
568 struct vhost_memory_region *vdev_reg)
569 {
570 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
571 shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
572 shadow_reg->memory_size == vdev_reg->memory_size;
573 }
574
575 static void scrub_shadow_regions(struct vhost_dev *dev,
576 struct scrub_regions *add_reg,
577 int *nr_add_reg,
578 struct scrub_regions *rem_reg,
579 int *nr_rem_reg, uint64_t *shadow_pcb,
580 bool track_ramblocks)
581 {
582 struct vhost_user *u = dev->opaque;
583 bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
584 struct vhost_memory_region *reg, *shadow_reg;
585 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
586 ram_addr_t offset;
587 MemoryRegion *mr;
588 bool matching;
589
590 /*
591 * Find memory regions present in our shadow state which are not in
592 * the device's current memory state.
593 *
594 * Mark regions in both the shadow and device state as "found".
595 */
596 for (i = 0; i < u->num_shadow_regions; i++) {
597 shadow_reg = &u->shadow_regions[i];
598 matching = false;
599
600 for (j = 0; j < dev->mem->nregions; j++) {
601 reg = &dev->mem->regions[j];
602
603 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
604
605 if (reg_equal(shadow_reg, reg)) {
606 matching = true;
607 found[j] = true;
608 if (track_ramblocks) {
609 /*
610 * Reset postcopy client bases, region_rb, and
611 * region_rb_offset in case regions are removed.
612 */
613 if (fd > 0) {
614 u->region_rb_offset[j] = offset;
615 u->region_rb[j] = mr->ram_block;
616 shadow_pcb[j] = u->postcopy_client_bases[i];
617 } else {
618 u->region_rb_offset[j] = 0;
619 u->region_rb[j] = NULL;
620 }
621 }
622 break;
623 }
624 }
625
626 /*
627 * If the region was not found in the current device memory state
628 * create an entry for it in the removed list.
629 */
630 if (!matching) {
631 rem_reg[rm_idx].region = shadow_reg;
632 rem_reg[rm_idx++].reg_idx = i;
633 }
634 }
635
636 /*
637 * For regions not marked "found", create entries in the added list.
638 *
639 * Note their indexes in the device memory state and the indexes of their
640 * file descriptors.
641 */
642 for (i = 0; i < dev->mem->nregions; i++) {
643 reg = &dev->mem->regions[i];
644 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
645 if (fd > 0) {
646 ++fd_num;
647 }
648
649 /*
650 * If the region was in both the shadow and device state we don't
651 * need to send a VHOST_USER_ADD_MEM_REG message for it.
652 */
653 if (found[i]) {
654 continue;
655 }
656
657 add_reg[add_idx].region = reg;
658 add_reg[add_idx].reg_idx = i;
659 add_reg[add_idx++].fd_idx = fd_num;
660 }
661 *nr_rem_reg = rm_idx;
662 *nr_add_reg = add_idx;
663
664 return;
665 }
666
667 static int send_remove_regions(struct vhost_dev *dev,
668 struct scrub_regions *remove_reg,
669 int nr_rem_reg, VhostUserMsg *msg,
670 bool reply_supported)
671 {
672 struct vhost_user *u = dev->opaque;
673 struct vhost_memory_region *shadow_reg;
674 int i, fd, shadow_reg_idx, ret;
675 ram_addr_t offset;
676 VhostUserMemoryRegion region_buffer;
677
678 /*
679 * The regions in remove_reg appear in the same order they do in the
680 * shadow table. Therefore we can minimize memory copies by iterating
681 * through remove_reg backwards.
682 */
683 for (i = nr_rem_reg - 1; i >= 0; i--) {
684 shadow_reg = remove_reg[i].region;
685 shadow_reg_idx = remove_reg[i].reg_idx;
686
687 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
688
689 if (fd > 0) {
690 msg->hdr.request = VHOST_USER_REM_MEM_REG;
691 vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
692 msg->payload.mem_reg.region = region_buffer;
693
694 ret = vhost_user_write(dev, msg, NULL, 0);
695 if (ret < 0) {
696 return ret;
697 }
698
699 if (reply_supported) {
700 ret = process_message_reply(dev, msg);
701 if (ret) {
702 return ret;
703 }
704 }
705 }
706
707 /*
708 * At this point we know the backend has unmapped the region. It is now
709 * safe to remove it from the shadow table.
710 */
711 memmove(&u->shadow_regions[shadow_reg_idx],
712 &u->shadow_regions[shadow_reg_idx + 1],
713 sizeof(struct vhost_memory_region) *
714 (u->num_shadow_regions - shadow_reg_idx - 1));
715 u->num_shadow_regions--;
716 }
717
718 return 0;
719 }
720
721 static int send_add_regions(struct vhost_dev *dev,
722 struct scrub_regions *add_reg, int nr_add_reg,
723 VhostUserMsg *msg, uint64_t *shadow_pcb,
724 bool reply_supported, bool track_ramblocks)
725 {
726 struct vhost_user *u = dev->opaque;
727 int i, fd, ret, reg_idx, reg_fd_idx;
728 struct vhost_memory_region *reg;
729 MemoryRegion *mr;
730 ram_addr_t offset;
731 VhostUserMsg msg_reply;
732 VhostUserMemoryRegion region_buffer;
733
734 for (i = 0; i < nr_add_reg; i++) {
735 reg = add_reg[i].region;
736 reg_idx = add_reg[i].reg_idx;
737 reg_fd_idx = add_reg[i].fd_idx;
738
739 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
740
741 if (fd > 0) {
742 if (track_ramblocks) {
743 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
744 reg->memory_size,
745 reg->guest_phys_addr,
746 reg->userspace_addr,
747 offset);
748 u->region_rb_offset[reg_idx] = offset;
749 u->region_rb[reg_idx] = mr->ram_block;
750 }
751 msg->hdr.request = VHOST_USER_ADD_MEM_REG;
752 vhost_user_fill_msg_region(&region_buffer, reg, offset);
753 msg->payload.mem_reg.region = region_buffer;
754
755 ret = vhost_user_write(dev, msg, &fd, 1);
756 if (ret < 0) {
757 return ret;
758 }
759
760 if (track_ramblocks) {
761 uint64_t reply_gpa;
762
763 ret = vhost_user_read(dev, &msg_reply);
764 if (ret < 0) {
765 return ret;
766 }
767
768 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
769
770 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
771 error_report("%s: Received unexpected msg type."
772 "Expected %d received %d", __func__,
773 VHOST_USER_ADD_MEM_REG,
774 msg_reply.hdr.request);
775 return -EPROTO;
776 }
777
778 /*
779 * We're using the same structure, just reusing one of the
780 * fields, so it should be the same size.
781 */
782 if (msg_reply.hdr.size != msg->hdr.size) {
783 error_report("%s: Unexpected size for postcopy reply "
784 "%d vs %d", __func__, msg_reply.hdr.size,
785 msg->hdr.size);
786 return -EPROTO;
787 }
788
789 /* Get the postcopy client base from the backend's reply. */
790 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
791 shadow_pcb[reg_idx] =
792 msg_reply.payload.mem_reg.region.userspace_addr;
793 trace_vhost_user_set_mem_table_postcopy(
794 msg_reply.payload.mem_reg.region.userspace_addr,
795 msg->payload.mem_reg.region.userspace_addr,
796 reg_fd_idx, reg_idx);
797 } else {
798 error_report("%s: invalid postcopy reply for region. "
799 "Got guest physical address %" PRIX64 ", expected "
800 "%" PRIX64, __func__, reply_gpa,
801 dev->mem->regions[reg_idx].guest_phys_addr);
802 return -EPROTO;
803 }
804 } else if (reply_supported) {
805 ret = process_message_reply(dev, msg);
806 if (ret) {
807 return ret;
808 }
809 }
810 } else if (track_ramblocks) {
811 u->region_rb_offset[reg_idx] = 0;
812 u->region_rb[reg_idx] = NULL;
813 }
814
815 /*
816 * At this point, we know the backend has mapped in the new
817 * region, if the region has a valid file descriptor.
818 *
819 * The region should now be added to the shadow table.
820 */
821 u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
822 reg->guest_phys_addr;
823 u->shadow_regions[u->num_shadow_regions].userspace_addr =
824 reg->userspace_addr;
825 u->shadow_regions[u->num_shadow_regions].memory_size =
826 reg->memory_size;
827 u->num_shadow_regions++;
828 }
829
830 return 0;
831 }
832
833 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
834 VhostUserMsg *msg,
835 bool reply_supported,
836 bool track_ramblocks)
837 {
838 struct vhost_user *u = dev->opaque;
839 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
840 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
841 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
842 int nr_add_reg, nr_rem_reg;
843 int ret;
844
845 msg->hdr.size = sizeof(msg->payload.mem_reg);
846
847 /* Find the regions which need to be removed or added. */
848 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
849 shadow_pcb, track_ramblocks);
850
851 if (nr_rem_reg) {
852 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
853 reply_supported);
854 if (ret < 0) {
855 goto err;
856 }
857 }
858
859 if (nr_add_reg) {
860 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
861 reply_supported, track_ramblocks);
862 if (ret < 0) {
863 goto err;
864 }
865 }
866
867 if (track_ramblocks) {
868 memcpy(u->postcopy_client_bases, shadow_pcb,
869 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
870 /*
871 * Now we've registered this with the postcopy code, we ack to the
872 * client, because now we're in the position to be able to deal with
873 * any faults it generates.
874 */
875 /* TODO: Use this for failure cases as well with a bad value. */
876 msg->hdr.size = sizeof(msg->payload.u64);
877 msg->payload.u64 = 0; /* OK */
878
879 ret = vhost_user_write(dev, msg, NULL, 0);
880 if (ret < 0) {
881 return ret;
882 }
883 }
884
885 return 0;
886
887 err:
888 if (track_ramblocks) {
889 memcpy(u->postcopy_client_bases, shadow_pcb,
890 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
891 }
892
893 return ret;
894 }
895
896 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
897 struct vhost_memory *mem,
898 bool reply_supported,
899 bool config_mem_slots)
900 {
901 struct vhost_user *u = dev->opaque;
902 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
903 size_t fd_num = 0;
904 VhostUserMsg msg_reply;
905 int region_i, msg_i;
906 int ret;
907
908 VhostUserMsg msg = {
909 .hdr.flags = VHOST_USER_VERSION,
910 };
911
912 if (u->region_rb_len < dev->mem->nregions) {
913 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
914 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
915 dev->mem->nregions);
916 memset(&(u->region_rb[u->region_rb_len]), '\0',
917 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
918 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
919 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
920 u->region_rb_len = dev->mem->nregions;
921 }
922
923 if (config_mem_slots) {
924 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
925 if (ret < 0) {
926 return ret;
927 }
928 } else {
929 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
930 true);
931 if (ret < 0) {
932 return ret;
933 }
934
935 ret = vhost_user_write(dev, &msg, fds, fd_num);
936 if (ret < 0) {
937 return ret;
938 }
939
940 ret = vhost_user_read(dev, &msg_reply);
941 if (ret < 0) {
942 return ret;
943 }
944
945 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
946 error_report("%s: Received unexpected msg type."
947 "Expected %d received %d", __func__,
948 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
949 return -EPROTO;
950 }
951
952 /*
953 * We're using the same structure, just reusing one of the
954 * fields, so it should be the same size.
955 */
956 if (msg_reply.hdr.size != msg.hdr.size) {
957 error_report("%s: Unexpected size for postcopy reply "
958 "%d vs %d", __func__, msg_reply.hdr.size,
959 msg.hdr.size);
960 return -EPROTO;
961 }
962
963 memset(u->postcopy_client_bases, 0,
964 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
965
966 /*
967 * They're in the same order as the regions that were sent
968 * but some of the regions were skipped (above) if they
969 * didn't have fd's
970 */
971 for (msg_i = 0, region_i = 0;
972 region_i < dev->mem->nregions;
973 region_i++) {
974 if (msg_i < fd_num &&
975 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
976 dev->mem->regions[region_i].guest_phys_addr) {
977 u->postcopy_client_bases[region_i] =
978 msg_reply.payload.memory.regions[msg_i].userspace_addr;
979 trace_vhost_user_set_mem_table_postcopy(
980 msg_reply.payload.memory.regions[msg_i].userspace_addr,
981 msg.payload.memory.regions[msg_i].userspace_addr,
982 msg_i, region_i);
983 msg_i++;
984 }
985 }
986 if (msg_i != fd_num) {
987 error_report("%s: postcopy reply not fully consumed "
988 "%d vs %zd",
989 __func__, msg_i, fd_num);
990 return -EIO;
991 }
992
993 /*
994 * Now we've registered this with the postcopy code, we ack to the
995 * client, because now we're in the position to be able to deal
996 * with any faults it generates.
997 */
998 /* TODO: Use this for failure cases as well with a bad value. */
999 msg.hdr.size = sizeof(msg.payload.u64);
1000 msg.payload.u64 = 0; /* OK */
1001 ret = vhost_user_write(dev, &msg, NULL, 0);
1002 if (ret < 0) {
1003 return ret;
1004 }
1005 }
1006
1007 return 0;
1008 }
1009
1010 static int vhost_user_set_mem_table(struct vhost_dev *dev,
1011 struct vhost_memory *mem)
1012 {
1013 struct vhost_user *u = dev->opaque;
1014 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
1015 size_t fd_num = 0;
1016 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
1017 bool reply_supported = virtio_has_feature(dev->protocol_features,
1018 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1019 bool config_mem_slots =
1020 virtio_has_feature(dev->protocol_features,
1021 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
1022 int ret;
1023
1024 if (do_postcopy) {
1025 /*
1026 * Postcopy has enough differences that it's best done in it's own
1027 * version
1028 */
1029 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1030 config_mem_slots);
1031 }
1032
1033 VhostUserMsg msg = {
1034 .hdr.flags = VHOST_USER_VERSION,
1035 };
1036
1037 if (reply_supported) {
1038 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1039 }
1040
1041 if (config_mem_slots) {
1042 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1043 if (ret < 0) {
1044 return ret;
1045 }
1046 } else {
1047 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1048 false);
1049 if (ret < 0) {
1050 return ret;
1051 }
1052
1053 ret = vhost_user_write(dev, &msg, fds, fd_num);
1054 if (ret < 0) {
1055 return ret;
1056 }
1057
1058 if (reply_supported) {
1059 return process_message_reply(dev, &msg);
1060 }
1061 }
1062
1063 return 0;
1064 }
1065
1066 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1067 struct vhost_vring_state *ring)
1068 {
1069 bool cross_endian = virtio_has_feature(dev->protocol_features,
1070 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1071 VhostUserMsg msg = {
1072 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1073 .hdr.flags = VHOST_USER_VERSION,
1074 .payload.state = *ring,
1075 .hdr.size = sizeof(msg.payload.state),
1076 };
1077
1078 if (!cross_endian) {
1079 error_report("vhost-user trying to send unhandled ioctl");
1080 return -ENOTSUP;
1081 }
1082
1083 return vhost_user_write(dev, &msg, NULL, 0);
1084 }
1085
1086 static int vhost_set_vring(struct vhost_dev *dev,
1087 unsigned long int request,
1088 struct vhost_vring_state *ring)
1089 {
1090 VhostUserMsg msg = {
1091 .hdr.request = request,
1092 .hdr.flags = VHOST_USER_VERSION,
1093 .payload.state = *ring,
1094 .hdr.size = sizeof(msg.payload.state),
1095 };
1096
1097 return vhost_user_write(dev, &msg, NULL, 0);
1098 }
1099
1100 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1101 struct vhost_vring_state *ring)
1102 {
1103 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1104 }
1105
1106 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
1107 {
1108 assert(n && n->unmap_addr);
1109 munmap(n->unmap_addr, qemu_real_host_page_size());
1110 n->unmap_addr = NULL;
1111 }
1112
1113 /*
1114 * clean-up function for notifier, will finally free the structure
1115 * under rcu.
1116 */
1117 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
1118 VirtIODevice *vdev)
1119 {
1120 if (n->addr) {
1121 if (vdev) {
1122 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
1123 }
1124 assert(!n->unmap_addr);
1125 n->unmap_addr = n->addr;
1126 n->addr = NULL;
1127 call_rcu(n, vhost_user_host_notifier_free, rcu);
1128 }
1129 }
1130
1131 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1132 struct vhost_vring_state *ring)
1133 {
1134 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1135 }
1136
1137 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1138 {
1139 int i;
1140
1141 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1142 return -EINVAL;
1143 }
1144
1145 for (i = 0; i < dev->nvqs; ++i) {
1146 int ret;
1147 struct vhost_vring_state state = {
1148 .index = dev->vq_index + i,
1149 .num = enable,
1150 };
1151
1152 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1153 if (ret < 0) {
1154 /*
1155 * Restoring the previous state is likely infeasible, as well as
1156 * proceeding regardless the error, so just bail out and hope for
1157 * the device-level recovery.
1158 */
1159 return ret;
1160 }
1161 }
1162
1163 return 0;
1164 }
1165
1166 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
1167 int idx)
1168 {
1169 if (idx >= u->notifiers->len) {
1170 return NULL;
1171 }
1172 return g_ptr_array_index(u->notifiers, idx);
1173 }
1174
1175 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1176 struct vhost_vring_state *ring)
1177 {
1178 int ret;
1179 VhostUserMsg msg = {
1180 .hdr.request = VHOST_USER_GET_VRING_BASE,
1181 .hdr.flags = VHOST_USER_VERSION,
1182 .payload.state = *ring,
1183 .hdr.size = sizeof(msg.payload.state),
1184 };
1185 struct vhost_user *u = dev->opaque;
1186
1187 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
1188 if (n) {
1189 vhost_user_host_notifier_remove(n, dev->vdev);
1190 }
1191
1192 ret = vhost_user_write(dev, &msg, NULL, 0);
1193 if (ret < 0) {
1194 return ret;
1195 }
1196
1197 ret = vhost_user_read(dev, &msg);
1198 if (ret < 0) {
1199 return ret;
1200 }
1201
1202 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1203 error_report("Received unexpected msg type. Expected %d received %d",
1204 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1205 return -EPROTO;
1206 }
1207
1208 if (msg.hdr.size != sizeof(msg.payload.state)) {
1209 error_report("Received bad msg size.");
1210 return -EPROTO;
1211 }
1212
1213 *ring = msg.payload.state;
1214
1215 return 0;
1216 }
1217
1218 static int vhost_set_vring_file(struct vhost_dev *dev,
1219 VhostUserRequest request,
1220 struct vhost_vring_file *file)
1221 {
1222 int fds[VHOST_USER_MAX_RAM_SLOTS];
1223 size_t fd_num = 0;
1224 VhostUserMsg msg = {
1225 .hdr.request = request,
1226 .hdr.flags = VHOST_USER_VERSION,
1227 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1228 .hdr.size = sizeof(msg.payload.u64),
1229 };
1230
1231 if (ioeventfd_enabled() && file->fd > 0) {
1232 fds[fd_num++] = file->fd;
1233 } else {
1234 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1235 }
1236
1237 return vhost_user_write(dev, &msg, fds, fd_num);
1238 }
1239
1240 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1241 struct vhost_vring_file *file)
1242 {
1243 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1244 }
1245
1246 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1247 struct vhost_vring_file *file)
1248 {
1249 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1250 }
1251
1252 static int vhost_user_set_vring_err(struct vhost_dev *dev,
1253 struct vhost_vring_file *file)
1254 {
1255 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
1256 }
1257
1258 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1259 {
1260 int ret;
1261 VhostUserMsg msg = {
1262 .hdr.request = request,
1263 .hdr.flags = VHOST_USER_VERSION,
1264 };
1265
1266 if (vhost_user_per_device_request(request) && dev->vq_index != 0) {
1267 return 0;
1268 }
1269
1270 ret = vhost_user_write(dev, &msg, NULL, 0);
1271 if (ret < 0) {
1272 return ret;
1273 }
1274
1275 ret = vhost_user_read(dev, &msg);
1276 if (ret < 0) {
1277 return ret;
1278 }
1279
1280 if (msg.hdr.request != request) {
1281 error_report("Received unexpected msg type. Expected %d received %d",
1282 request, msg.hdr.request);
1283 return -EPROTO;
1284 }
1285
1286 if (msg.hdr.size != sizeof(msg.payload.u64)) {
1287 error_report("Received bad msg size.");
1288 return -EPROTO;
1289 }
1290
1291 *u64 = msg.payload.u64;
1292
1293 return 0;
1294 }
1295
1296 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1297 {
1298 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1299 return -EPROTO;
1300 }
1301
1302 return 0;
1303 }
1304
1305 static int enforce_reply(struct vhost_dev *dev,
1306 const VhostUserMsg *msg)
1307 {
1308 uint64_t dummy;
1309
1310 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1311 return process_message_reply(dev, msg);
1312 }
1313
1314 /*
1315 * We need to wait for a reply but the backend does not
1316 * support replies for the command we just sent.
1317 * Send VHOST_USER_GET_FEATURES which makes all backends
1318 * send a reply.
1319 */
1320 return vhost_user_get_features(dev, &dummy);
1321 }
1322
1323 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1324 struct vhost_vring_addr *addr)
1325 {
1326 int ret;
1327 VhostUserMsg msg = {
1328 .hdr.request = VHOST_USER_SET_VRING_ADDR,
1329 .hdr.flags = VHOST_USER_VERSION,
1330 .payload.addr = *addr,
1331 .hdr.size = sizeof(msg.payload.addr),
1332 };
1333
1334 bool reply_supported = virtio_has_feature(dev->protocol_features,
1335 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1336
1337 /*
1338 * wait for a reply if logging is enabled to make sure
1339 * backend is actually logging changes
1340 */
1341 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1342
1343 if (reply_supported && wait_for_reply) {
1344 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1345 }
1346
1347 ret = vhost_user_write(dev, &msg, NULL, 0);
1348 if (ret < 0) {
1349 return ret;
1350 }
1351
1352 if (wait_for_reply) {
1353 return enforce_reply(dev, &msg);
1354 }
1355
1356 return 0;
1357 }
1358
1359 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1360 bool wait_for_reply)
1361 {
1362 VhostUserMsg msg = {
1363 .hdr.request = request,
1364 .hdr.flags = VHOST_USER_VERSION,
1365 .payload.u64 = u64,
1366 .hdr.size = sizeof(msg.payload.u64),
1367 };
1368 int ret;
1369
1370 if (wait_for_reply) {
1371 bool reply_supported = virtio_has_feature(dev->protocol_features,
1372 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1373 if (reply_supported) {
1374 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1375 }
1376 }
1377
1378 ret = vhost_user_write(dev, &msg, NULL, 0);
1379 if (ret < 0) {
1380 return ret;
1381 }
1382
1383 if (wait_for_reply) {
1384 return enforce_reply(dev, &msg);
1385 }
1386
1387 return 0;
1388 }
1389
1390 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
1391 {
1392 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
1393 }
1394
1395 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
1396 {
1397 uint64_t value;
1398 int ret;
1399
1400 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
1401 if (ret < 0) {
1402 return ret;
1403 }
1404 *status = value;
1405
1406 return 0;
1407 }
1408
1409 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
1410 {
1411 uint8_t s;
1412 int ret;
1413
1414 ret = vhost_user_get_status(dev, &s);
1415 if (ret < 0) {
1416 return ret;
1417 }
1418
1419 if ((s & status) == status) {
1420 return 0;
1421 }
1422 s |= status;
1423
1424 return vhost_user_set_status(dev, s);
1425 }
1426
1427 static int vhost_user_set_features(struct vhost_dev *dev,
1428 uint64_t features)
1429 {
1430 /*
1431 * wait for a reply if logging is enabled to make sure
1432 * backend is actually logging changes
1433 */
1434 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
1435 int ret;
1436
1437 /*
1438 * We need to include any extra backend only feature bits that
1439 * might be needed by our device. Currently this includes the
1440 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
1441 * features.
1442 */
1443 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
1444 features | dev->backend_features,
1445 log_enabled);
1446
1447 if (virtio_has_feature(dev->protocol_features,
1448 VHOST_USER_PROTOCOL_F_STATUS)) {
1449 if (!ret) {
1450 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
1451 }
1452 }
1453
1454 return ret;
1455 }
1456
1457 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1458 uint64_t features)
1459 {
1460 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1461 false);
1462 }
1463
1464 static int vhost_user_set_owner(struct vhost_dev *dev)
1465 {
1466 VhostUserMsg msg = {
1467 .hdr.request = VHOST_USER_SET_OWNER,
1468 .hdr.flags = VHOST_USER_VERSION,
1469 };
1470
1471 return vhost_user_write(dev, &msg, NULL, 0);
1472 }
1473
1474 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1475 uint64_t *max_memslots)
1476 {
1477 uint64_t backend_max_memslots;
1478 int err;
1479
1480 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1481 &backend_max_memslots);
1482 if (err < 0) {
1483 return err;
1484 }
1485
1486 *max_memslots = backend_max_memslots;
1487
1488 return 0;
1489 }
1490
1491 static int vhost_user_reset_device(struct vhost_dev *dev)
1492 {
1493 VhostUserMsg msg = {
1494 .hdr.flags = VHOST_USER_VERSION,
1495 };
1496
1497 msg.hdr.request = virtio_has_feature(dev->protocol_features,
1498 VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1499 ? VHOST_USER_RESET_DEVICE
1500 : VHOST_USER_RESET_OWNER;
1501
1502 return vhost_user_write(dev, &msg, NULL, 0);
1503 }
1504
1505 static int vhost_user_backend_handle_config_change(struct vhost_dev *dev)
1506 {
1507 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1508 return -ENOSYS;
1509 }
1510
1511 return dev->config_ops->vhost_dev_config_notifier(dev);
1512 }
1513
1514 /*
1515 * Fetch or create the notifier for a given idx. Newly created
1516 * notifiers are added to the pointer array that tracks them.
1517 */
1518 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
1519 int idx)
1520 {
1521 VhostUserHostNotifier *n = NULL;
1522 if (idx >= u->notifiers->len) {
1523 g_ptr_array_set_size(u->notifiers, idx + 1);
1524 }
1525
1526 n = g_ptr_array_index(u->notifiers, idx);
1527 if (!n) {
1528 /*
1529 * In case notification arrive out-of-order,
1530 * make room for current index.
1531 */
1532 g_ptr_array_remove_index(u->notifiers, idx);
1533 n = g_new0(VhostUserHostNotifier, 1);
1534 n->idx = idx;
1535 g_ptr_array_insert(u->notifiers, idx, n);
1536 trace_vhost_user_create_notifier(idx, n);
1537 }
1538
1539 return n;
1540 }
1541
1542 static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
1543 VhostUserVringArea *area,
1544 int fd)
1545 {
1546 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1547 size_t page_size = qemu_real_host_page_size();
1548 struct vhost_user *u = dev->opaque;
1549 VhostUserState *user = u->user;
1550 VirtIODevice *vdev = dev->vdev;
1551 VhostUserHostNotifier *n;
1552 void *addr;
1553 char *name;
1554
1555 if (!virtio_has_feature(dev->protocol_features,
1556 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1557 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1558 return -EINVAL;
1559 }
1560
1561 /*
1562 * Fetch notifier and invalidate any old data before setting up
1563 * new mapped address.
1564 */
1565 n = fetch_or_create_notifier(user, queue_idx);
1566 vhost_user_host_notifier_remove(n, vdev);
1567
1568 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1569 return 0;
1570 }
1571
1572 /* Sanity check. */
1573 if (area->size != page_size) {
1574 return -EINVAL;
1575 }
1576
1577 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1578 fd, area->offset);
1579 if (addr == MAP_FAILED) {
1580 return -EFAULT;
1581 }
1582
1583 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1584 user, queue_idx);
1585 if (!n->mr.ram) { /* Don't init again after suspend. */
1586 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1587 page_size, addr);
1588 } else {
1589 n->mr.ram_block->host = addr;
1590 }
1591 g_free(name);
1592
1593 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1594 object_unparent(OBJECT(&n->mr));
1595 munmap(addr, page_size);
1596 return -ENXIO;
1597 }
1598
1599 n->addr = addr;
1600
1601 return 0;
1602 }
1603
1604 static void close_backend_channel(struct vhost_user *u)
1605 {
1606 g_source_destroy(u->backend_src);
1607 g_source_unref(u->backend_src);
1608 u->backend_src = NULL;
1609 object_unref(OBJECT(u->backend_ioc));
1610 u->backend_ioc = NULL;
1611 }
1612
1613 static gboolean backend_read(QIOChannel *ioc, GIOCondition condition,
1614 gpointer opaque)
1615 {
1616 struct vhost_dev *dev = opaque;
1617 struct vhost_user *u = dev->opaque;
1618 VhostUserHeader hdr = { 0, };
1619 VhostUserPayload payload = { 0, };
1620 Error *local_err = NULL;
1621 gboolean rc = G_SOURCE_CONTINUE;
1622 int ret = 0;
1623 struct iovec iov;
1624 g_autofree int *fd = NULL;
1625 size_t fdsize = 0;
1626 int i;
1627
1628 /* Read header */
1629 iov.iov_base = &hdr;
1630 iov.iov_len = VHOST_USER_HDR_SIZE;
1631
1632 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1633 error_report_err(local_err);
1634 goto err;
1635 }
1636
1637 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1638 error_report("Failed to read msg header."
1639 " Size %d exceeds the maximum %zu.", hdr.size,
1640 VHOST_USER_PAYLOAD_SIZE);
1641 goto err;
1642 }
1643
1644 /* Read payload */
1645 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1646 error_report_err(local_err);
1647 goto err;
1648 }
1649
1650 switch (hdr.request) {
1651 case VHOST_USER_BACKEND_IOTLB_MSG:
1652 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1653 break;
1654 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
1655 ret = vhost_user_backend_handle_config_change(dev);
1656 break;
1657 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
1658 ret = vhost_user_backend_handle_vring_host_notifier(dev, &payload.area,
1659 fd ? fd[0] : -1);
1660 break;
1661 default:
1662 error_report("Received unexpected msg type: %d.", hdr.request);
1663 ret = -EINVAL;
1664 }
1665
1666 /*
1667 * REPLY_ACK feature handling. Other reply types has to be managed
1668 * directly in their request handlers.
1669 */
1670 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1671 struct iovec iovec[2];
1672
1673
1674 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1675 hdr.flags |= VHOST_USER_REPLY_MASK;
1676
1677 payload.u64 = !!ret;
1678 hdr.size = sizeof(payload.u64);
1679
1680 iovec[0].iov_base = &hdr;
1681 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1682 iovec[1].iov_base = &payload;
1683 iovec[1].iov_len = hdr.size;
1684
1685 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1686 error_report_err(local_err);
1687 goto err;
1688 }
1689 }
1690
1691 goto fdcleanup;
1692
1693 err:
1694 close_backend_channel(u);
1695 rc = G_SOURCE_REMOVE;
1696
1697 fdcleanup:
1698 if (fd) {
1699 for (i = 0; i < fdsize; i++) {
1700 close(fd[i]);
1701 }
1702 }
1703 return rc;
1704 }
1705
1706 static int vhost_setup_backend_channel(struct vhost_dev *dev)
1707 {
1708 VhostUserMsg msg = {
1709 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
1710 .hdr.flags = VHOST_USER_VERSION,
1711 };
1712 struct vhost_user *u = dev->opaque;
1713 int sv[2], ret = 0;
1714 bool reply_supported = virtio_has_feature(dev->protocol_features,
1715 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1716 Error *local_err = NULL;
1717 QIOChannel *ioc;
1718
1719 if (!virtio_has_feature(dev->protocol_features,
1720 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
1721 return 0;
1722 }
1723
1724 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1725 int saved_errno = errno;
1726 error_report("socketpair() failed");
1727 return -saved_errno;
1728 }
1729
1730 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1731 if (!ioc) {
1732 error_report_err(local_err);
1733 return -ECONNREFUSED;
1734 }
1735 u->backend_ioc = ioc;
1736 u->backend_src = qio_channel_add_watch_source(u->backend_ioc,
1737 G_IO_IN | G_IO_HUP,
1738 backend_read, dev, NULL, NULL);
1739
1740 if (reply_supported) {
1741 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1742 }
1743
1744 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1745 if (ret) {
1746 goto out;
1747 }
1748
1749 if (reply_supported) {
1750 ret = process_message_reply(dev, &msg);
1751 }
1752
1753 out:
1754 close(sv[1]);
1755 if (ret) {
1756 close_backend_channel(u);
1757 }
1758
1759 return ret;
1760 }
1761
1762 #ifdef CONFIG_LINUX
1763 /*
1764 * Called back from the postcopy fault thread when a fault is received on our
1765 * ufd.
1766 * TODO: This is Linux specific
1767 */
1768 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1769 void *ufd)
1770 {
1771 struct vhost_dev *dev = pcfd->data;
1772 struct vhost_user *u = dev->opaque;
1773 struct uffd_msg *msg = ufd;
1774 uint64_t faultaddr = msg->arg.pagefault.address;
1775 RAMBlock *rb = NULL;
1776 uint64_t rb_offset;
1777 int i;
1778
1779 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1780 dev->mem->nregions);
1781 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1782 trace_vhost_user_postcopy_fault_handler_loop(i,
1783 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1784 if (faultaddr >= u->postcopy_client_bases[i]) {
1785 /* Ofset of the fault address in the vhost region */
1786 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1787 if (region_offset < dev->mem->regions[i].memory_size) {
1788 rb_offset = region_offset + u->region_rb_offset[i];
1789 trace_vhost_user_postcopy_fault_handler_found(i,
1790 region_offset, rb_offset);
1791 rb = u->region_rb[i];
1792 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1793 rb_offset);
1794 }
1795 }
1796 }
1797 error_report("%s: Failed to find region for fault %" PRIx64,
1798 __func__, faultaddr);
1799 return -1;
1800 }
1801
1802 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1803 uint64_t offset)
1804 {
1805 struct vhost_dev *dev = pcfd->data;
1806 struct vhost_user *u = dev->opaque;
1807 int i;
1808
1809 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1810
1811 if (!u) {
1812 return 0;
1813 }
1814 /* Translate the offset into an address in the clients address space */
1815 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1816 if (u->region_rb[i] == rb &&
1817 offset >= u->region_rb_offset[i] &&
1818 offset < (u->region_rb_offset[i] +
1819 dev->mem->regions[i].memory_size)) {
1820 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1821 u->postcopy_client_bases[i];
1822 trace_vhost_user_postcopy_waker_found(client_addr);
1823 return postcopy_wake_shared(pcfd, client_addr, rb);
1824 }
1825 }
1826
1827 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1828 return 0;
1829 }
1830 #endif
1831
1832 /*
1833 * Called at the start of an inbound postcopy on reception of the
1834 * 'advise' command.
1835 */
1836 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1837 {
1838 #ifdef CONFIG_LINUX
1839 struct vhost_user *u = dev->opaque;
1840 CharBackend *chr = u->user->chr;
1841 int ufd;
1842 int ret;
1843 VhostUserMsg msg = {
1844 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1845 .hdr.flags = VHOST_USER_VERSION,
1846 };
1847
1848 ret = vhost_user_write(dev, &msg, NULL, 0);
1849 if (ret < 0) {
1850 error_setg(errp, "Failed to send postcopy_advise to vhost");
1851 return ret;
1852 }
1853
1854 ret = vhost_user_read(dev, &msg);
1855 if (ret < 0) {
1856 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1857 return ret;
1858 }
1859
1860 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1861 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1862 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1863 return -EPROTO;
1864 }
1865
1866 if (msg.hdr.size) {
1867 error_setg(errp, "Received bad msg size.");
1868 return -EPROTO;
1869 }
1870 ufd = qemu_chr_fe_get_msgfd(chr);
1871 if (ufd < 0) {
1872 error_setg(errp, "%s: Failed to get ufd", __func__);
1873 return -EIO;
1874 }
1875 qemu_socket_set_nonblock(ufd);
1876
1877 /* register ufd with userfault thread */
1878 u->postcopy_fd.fd = ufd;
1879 u->postcopy_fd.data = dev;
1880 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1881 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1882 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1883 postcopy_register_shared_ufd(&u->postcopy_fd);
1884 return 0;
1885 #else
1886 error_setg(errp, "Postcopy not supported on non-Linux systems");
1887 return -ENOSYS;
1888 #endif
1889 }
1890
1891 /*
1892 * Called at the switch to postcopy on reception of the 'listen' command.
1893 */
1894 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1895 {
1896 struct vhost_user *u = dev->opaque;
1897 int ret;
1898 VhostUserMsg msg = {
1899 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1900 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1901 };
1902 u->postcopy_listen = true;
1903
1904 trace_vhost_user_postcopy_listen();
1905
1906 ret = vhost_user_write(dev, &msg, NULL, 0);
1907 if (ret < 0) {
1908 error_setg(errp, "Failed to send postcopy_listen to vhost");
1909 return ret;
1910 }
1911
1912 ret = process_message_reply(dev, &msg);
1913 if (ret) {
1914 error_setg(errp, "Failed to receive reply to postcopy_listen");
1915 return ret;
1916 }
1917
1918 return 0;
1919 }
1920
1921 /*
1922 * Called at the end of postcopy
1923 */
1924 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1925 {
1926 VhostUserMsg msg = {
1927 .hdr.request = VHOST_USER_POSTCOPY_END,
1928 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1929 };
1930 int ret;
1931 struct vhost_user *u = dev->opaque;
1932
1933 trace_vhost_user_postcopy_end_entry();
1934
1935 ret = vhost_user_write(dev, &msg, NULL, 0);
1936 if (ret < 0) {
1937 error_setg(errp, "Failed to send postcopy_end to vhost");
1938 return ret;
1939 }
1940
1941 ret = process_message_reply(dev, &msg);
1942 if (ret) {
1943 error_setg(errp, "Failed to receive reply to postcopy_end");
1944 return ret;
1945 }
1946 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1947 close(u->postcopy_fd.fd);
1948 u->postcopy_fd.handler = NULL;
1949
1950 trace_vhost_user_postcopy_end_exit();
1951
1952 return 0;
1953 }
1954
1955 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1956 void *opaque)
1957 {
1958 struct PostcopyNotifyData *pnd = opaque;
1959 struct vhost_user *u = container_of(notifier, struct vhost_user,
1960 postcopy_notifier);
1961 struct vhost_dev *dev = u->dev;
1962
1963 switch (pnd->reason) {
1964 case POSTCOPY_NOTIFY_PROBE:
1965 if (!virtio_has_feature(dev->protocol_features,
1966 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1967 /* TODO: Get the device name into this error somehow */
1968 error_setg(pnd->errp,
1969 "vhost-user backend not capable of postcopy");
1970 return -ENOENT;
1971 }
1972 break;
1973
1974 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1975 return vhost_user_postcopy_advise(dev, pnd->errp);
1976
1977 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1978 return vhost_user_postcopy_listen(dev, pnd->errp);
1979
1980 case POSTCOPY_NOTIFY_INBOUND_END:
1981 return vhost_user_postcopy_end(dev, pnd->errp);
1982
1983 default:
1984 /* We ignore notifications we don't know */
1985 break;
1986 }
1987
1988 return 0;
1989 }
1990
1991 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1992 Error **errp)
1993 {
1994 uint64_t features, ram_slots;
1995 struct vhost_user *u;
1996 VhostUserState *vus = (VhostUserState *) opaque;
1997 int err;
1998
1999 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2000
2001 u = g_new0(struct vhost_user, 1);
2002 u->user = vus;
2003 u->dev = dev;
2004 dev->opaque = u;
2005
2006 err = vhost_user_get_features(dev, &features);
2007 if (err < 0) {
2008 error_setg_errno(errp, -err, "vhost_backend_init failed");
2009 return err;
2010 }
2011
2012 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
2013 bool supports_f_config = vus->supports_config ||
2014 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
2015 uint64_t protocol_features;
2016
2017 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
2018
2019 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
2020 &protocol_features);
2021 if (err < 0) {
2022 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2023 return -EPROTO;
2024 }
2025
2026 /*
2027 * We will use all the protocol features we support - although
2028 * we suppress F_CONFIG if we know QEMUs internal code can not support
2029 * it.
2030 */
2031 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
2032
2033 if (supports_f_config) {
2034 if (!virtio_has_feature(protocol_features,
2035 VHOST_USER_PROTOCOL_F_CONFIG)) {
2036 error_setg(errp, "vhost-user device expecting "
2037 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
2038 "not support it.");
2039 return -EPROTO;
2040 }
2041 } else {
2042 if (virtio_has_feature(protocol_features,
2043 VHOST_USER_PROTOCOL_F_CONFIG)) {
2044 warn_report("vhost-user backend supports "
2045 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
2046 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
2047 }
2048 }
2049
2050 /* final set of protocol features */
2051 dev->protocol_features = protocol_features;
2052 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
2053 if (err < 0) {
2054 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2055 return -EPROTO;
2056 }
2057
2058 /* query the max queues we support if backend supports Multiple Queue */
2059 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
2060 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2061 &dev->max_queues);
2062 if (err < 0) {
2063 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2064 return -EPROTO;
2065 }
2066 } else {
2067 dev->max_queues = 1;
2068 }
2069
2070 if (dev->num_queues && dev->max_queues < dev->num_queues) {
2071 error_setg(errp, "The maximum number of queues supported by the "
2072 "backend is %" PRIu64, dev->max_queues);
2073 return -EINVAL;
2074 }
2075
2076 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2077 !(virtio_has_feature(dev->protocol_features,
2078 VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
2079 virtio_has_feature(dev->protocol_features,
2080 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
2081 error_setg(errp, "IOMMU support requires reply-ack and "
2082 "backend-req protocol features.");
2083 return -EINVAL;
2084 }
2085
2086 /* get max memory regions if backend supports configurable RAM slots */
2087 if (!virtio_has_feature(dev->protocol_features,
2088 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
2089 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
2090 } else {
2091 err = vhost_user_get_max_memslots(dev, &ram_slots);
2092 if (err < 0) {
2093 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2094 return -EPROTO;
2095 }
2096
2097 if (ram_slots < u->user->memory_slots) {
2098 error_setg(errp, "The backend specified a max ram slots limit "
2099 "of %" PRIu64", when the prior validated limit was "
2100 "%d. This limit should never decrease.", ram_slots,
2101 u->user->memory_slots);
2102 return -EINVAL;
2103 }
2104
2105 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
2106 }
2107 }
2108
2109 if (dev->migration_blocker == NULL &&
2110 !virtio_has_feature(dev->protocol_features,
2111 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2112 error_setg(&dev->migration_blocker,
2113 "Migration disabled: vhost-user backend lacks "
2114 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2115 }
2116
2117 if (dev->vq_index == 0) {
2118 err = vhost_setup_backend_channel(dev);
2119 if (err < 0) {
2120 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2121 return -EPROTO;
2122 }
2123 }
2124
2125 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2126 postcopy_add_notifier(&u->postcopy_notifier);
2127
2128 return 0;
2129 }
2130
2131 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
2132 {
2133 struct vhost_user *u;
2134
2135 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2136
2137 u = dev->opaque;
2138 if (u->postcopy_notifier.notify) {
2139 postcopy_remove_notifier(&u->postcopy_notifier);
2140 u->postcopy_notifier.notify = NULL;
2141 }
2142 u->postcopy_listen = false;
2143 if (u->postcopy_fd.handler) {
2144 postcopy_unregister_shared_ufd(&u->postcopy_fd);
2145 close(u->postcopy_fd.fd);
2146 u->postcopy_fd.handler = NULL;
2147 }
2148 if (u->backend_ioc) {
2149 close_backend_channel(u);
2150 }
2151 g_free(u->region_rb);
2152 u->region_rb = NULL;
2153 g_free(u->region_rb_offset);
2154 u->region_rb_offset = NULL;
2155 u->region_rb_len = 0;
2156 g_free(u);
2157 dev->opaque = 0;
2158
2159 return 0;
2160 }
2161
2162 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2163 {
2164 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2165
2166 return idx;
2167 }
2168
2169 static int vhost_user_memslots_limit(struct vhost_dev *dev)
2170 {
2171 struct vhost_user *u = dev->opaque;
2172
2173 return u->user->memory_slots;
2174 }
2175
2176 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2177 {
2178 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2179
2180 return virtio_has_feature(dev->protocol_features,
2181 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2182 }
2183
2184 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2185 {
2186 VhostUserMsg msg = { };
2187
2188 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2189
2190 /* If guest supports GUEST_ANNOUNCE do nothing */
2191 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2192 return 0;
2193 }
2194
2195 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2196 if (virtio_has_feature(dev->protocol_features,
2197 VHOST_USER_PROTOCOL_F_RARP)) {
2198 msg.hdr.request = VHOST_USER_SEND_RARP;
2199 msg.hdr.flags = VHOST_USER_VERSION;
2200 memcpy((char *)&msg.payload.u64, mac_addr, 6);
2201 msg.hdr.size = sizeof(msg.payload.u64);
2202
2203 return vhost_user_write(dev, &msg, NULL, 0);
2204 }
2205 return -ENOTSUP;
2206 }
2207
2208 static bool vhost_user_can_merge(struct vhost_dev *dev,
2209 uint64_t start1, uint64_t size1,
2210 uint64_t start2, uint64_t size2)
2211 {
2212 ram_addr_t offset;
2213 int mfd, rfd;
2214
2215 (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2216 (void)vhost_user_get_mr_data(start2, &offset, &rfd);
2217
2218 return mfd == rfd;
2219 }
2220
2221 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2222 {
2223 VhostUserMsg msg;
2224 bool reply_supported = virtio_has_feature(dev->protocol_features,
2225 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2226 int ret;
2227
2228 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2229 return 0;
2230 }
2231
2232 msg.hdr.request = VHOST_USER_NET_SET_MTU;
2233 msg.payload.u64 = mtu;
2234 msg.hdr.size = sizeof(msg.payload.u64);
2235 msg.hdr.flags = VHOST_USER_VERSION;
2236 if (reply_supported) {
2237 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2238 }
2239
2240 ret = vhost_user_write(dev, &msg, NULL, 0);
2241 if (ret < 0) {
2242 return ret;
2243 }
2244
2245 /* If reply_ack supported, backend has to ack specified MTU is valid */
2246 if (reply_supported) {
2247 return process_message_reply(dev, &msg);
2248 }
2249
2250 return 0;
2251 }
2252
2253 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2254 struct vhost_iotlb_msg *imsg)
2255 {
2256 int ret;
2257 VhostUserMsg msg = {
2258 .hdr.request = VHOST_USER_IOTLB_MSG,
2259 .hdr.size = sizeof(msg.payload.iotlb),
2260 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2261 .payload.iotlb = *imsg,
2262 };
2263
2264 ret = vhost_user_write(dev, &msg, NULL, 0);
2265 if (ret < 0) {
2266 return ret;
2267 }
2268
2269 return process_message_reply(dev, &msg);
2270 }
2271
2272
2273 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2274 {
2275 /* No-op as the receive channel is not dedicated to IOTLB messages. */
2276 }
2277
2278 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2279 uint32_t config_len, Error **errp)
2280 {
2281 int ret;
2282 VhostUserMsg msg = {
2283 .hdr.request = VHOST_USER_GET_CONFIG,
2284 .hdr.flags = VHOST_USER_VERSION,
2285 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2286 };
2287
2288 if (!virtio_has_feature(dev->protocol_features,
2289 VHOST_USER_PROTOCOL_F_CONFIG)) {
2290 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2291 return -EINVAL;
2292 }
2293
2294 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
2295
2296 msg.payload.config.offset = 0;
2297 msg.payload.config.size = config_len;
2298 ret = vhost_user_write(dev, &msg, NULL, 0);
2299 if (ret < 0) {
2300 error_setg_errno(errp, -ret, "vhost_get_config failed");
2301 return ret;
2302 }
2303
2304 ret = vhost_user_read(dev, &msg);
2305 if (ret < 0) {
2306 error_setg_errno(errp, -ret, "vhost_get_config failed");
2307 return ret;
2308 }
2309
2310 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2311 error_setg(errp,
2312 "Received unexpected msg type. Expected %d received %d",
2313 VHOST_USER_GET_CONFIG, msg.hdr.request);
2314 return -EPROTO;
2315 }
2316
2317 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2318 error_setg(errp, "Received bad msg size.");
2319 return -EPROTO;
2320 }
2321
2322 memcpy(config, msg.payload.config.region, config_len);
2323
2324 return 0;
2325 }
2326
2327 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2328 uint32_t offset, uint32_t size, uint32_t flags)
2329 {
2330 int ret;
2331 uint8_t *p;
2332 bool reply_supported = virtio_has_feature(dev->protocol_features,
2333 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2334
2335 VhostUserMsg msg = {
2336 .hdr.request = VHOST_USER_SET_CONFIG,
2337 .hdr.flags = VHOST_USER_VERSION,
2338 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2339 };
2340
2341 if (!virtio_has_feature(dev->protocol_features,
2342 VHOST_USER_PROTOCOL_F_CONFIG)) {
2343 return -ENOTSUP;
2344 }
2345
2346 if (reply_supported) {
2347 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2348 }
2349
2350 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2351 return -EINVAL;
2352 }
2353
2354 msg.payload.config.offset = offset,
2355 msg.payload.config.size = size,
2356 msg.payload.config.flags = flags,
2357 p = msg.payload.config.region;
2358 memcpy(p, data, size);
2359
2360 ret = vhost_user_write(dev, &msg, NULL, 0);
2361 if (ret < 0) {
2362 return ret;
2363 }
2364
2365 if (reply_supported) {
2366 return process_message_reply(dev, &msg);
2367 }
2368
2369 return 0;
2370 }
2371
2372 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2373 void *session_info,
2374 uint64_t *session_id)
2375 {
2376 int ret;
2377 bool crypto_session = virtio_has_feature(dev->protocol_features,
2378 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2379 CryptoDevBackendSessionInfo *backend_info = session_info;
2380 VhostUserMsg msg = {
2381 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2382 .hdr.flags = VHOST_USER_VERSION,
2383 .hdr.size = sizeof(msg.payload.session),
2384 };
2385
2386 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2387
2388 if (!crypto_session) {
2389 error_report("vhost-user trying to send unhandled ioctl");
2390 return -ENOTSUP;
2391 }
2392
2393 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) {
2394 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info;
2395 size_t keylen;
2396
2397 memcpy(&msg.payload.session.u.asym.session_setup_data, sess,
2398 sizeof(CryptoDevBackendAsymSessionInfo));
2399 if (sess->keylen) {
2400 keylen = sizeof(msg.payload.session.u.asym.key);
2401 if (sess->keylen > keylen) {
2402 error_report("Unsupported asymmetric key size");
2403 return -ENOTSUP;
2404 }
2405
2406 memcpy(&msg.payload.session.u.asym.key, sess->key,
2407 sess->keylen);
2408 }
2409 } else {
2410 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info;
2411 size_t keylen;
2412
2413 memcpy(&msg.payload.session.u.sym.session_setup_data, sess,
2414 sizeof(CryptoDevBackendSymSessionInfo));
2415 if (sess->key_len) {
2416 keylen = sizeof(msg.payload.session.u.sym.key);
2417 if (sess->key_len > keylen) {
2418 error_report("Unsupported cipher key size");
2419 return -ENOTSUP;
2420 }
2421
2422 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key,
2423 sess->key_len);
2424 }
2425
2426 if (sess->auth_key_len > 0) {
2427 keylen = sizeof(msg.payload.session.u.sym.auth_key);
2428 if (sess->auth_key_len > keylen) {
2429 error_report("Unsupported auth key size");
2430 return -ENOTSUP;
2431 }
2432
2433 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key,
2434 sess->auth_key_len);
2435 }
2436 }
2437
2438 msg.payload.session.op_code = backend_info->op_code;
2439 msg.payload.session.session_id = backend_info->session_id;
2440 ret = vhost_user_write(dev, &msg, NULL, 0);
2441 if (ret < 0) {
2442 error_report("vhost_user_write() return %d, create session failed",
2443 ret);
2444 return ret;
2445 }
2446
2447 ret = vhost_user_read(dev, &msg);
2448 if (ret < 0) {
2449 error_report("vhost_user_read() return %d, create session failed",
2450 ret);
2451 return ret;
2452 }
2453
2454 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2455 error_report("Received unexpected msg type. Expected %d received %d",
2456 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2457 return -EPROTO;
2458 }
2459
2460 if (msg.hdr.size != sizeof(msg.payload.session)) {
2461 error_report("Received bad msg size.");
2462 return -EPROTO;
2463 }
2464
2465 if (msg.payload.session.session_id < 0) {
2466 error_report("Bad session id: %" PRId64 "",
2467 msg.payload.session.session_id);
2468 return -EINVAL;
2469 }
2470 *session_id = msg.payload.session.session_id;
2471
2472 return 0;
2473 }
2474
2475 static int
2476 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2477 {
2478 int ret;
2479 bool crypto_session = virtio_has_feature(dev->protocol_features,
2480 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2481 VhostUserMsg msg = {
2482 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2483 .hdr.flags = VHOST_USER_VERSION,
2484 .hdr.size = sizeof(msg.payload.u64),
2485 };
2486 msg.payload.u64 = session_id;
2487
2488 if (!crypto_session) {
2489 error_report("vhost-user trying to send unhandled ioctl");
2490 return -ENOTSUP;
2491 }
2492
2493 ret = vhost_user_write(dev, &msg, NULL, 0);
2494 if (ret < 0) {
2495 error_report("vhost_user_write() return %d, close session failed",
2496 ret);
2497 return ret;
2498 }
2499
2500 return 0;
2501 }
2502
2503 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2504 MemoryRegionSection *section)
2505 {
2506 return memory_region_get_fd(section->mr) >= 0;
2507 }
2508
2509 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2510 uint16_t queue_size,
2511 struct vhost_inflight *inflight)
2512 {
2513 void *addr;
2514 int fd;
2515 int ret;
2516 struct vhost_user *u = dev->opaque;
2517 CharBackend *chr = u->user->chr;
2518 VhostUserMsg msg = {
2519 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2520 .hdr.flags = VHOST_USER_VERSION,
2521 .payload.inflight.num_queues = dev->nvqs,
2522 .payload.inflight.queue_size = queue_size,
2523 .hdr.size = sizeof(msg.payload.inflight),
2524 };
2525
2526 if (!virtio_has_feature(dev->protocol_features,
2527 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2528 return 0;
2529 }
2530
2531 ret = vhost_user_write(dev, &msg, NULL, 0);
2532 if (ret < 0) {
2533 return ret;
2534 }
2535
2536 ret = vhost_user_read(dev, &msg);
2537 if (ret < 0) {
2538 return ret;
2539 }
2540
2541 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2542 error_report("Received unexpected msg type. "
2543 "Expected %d received %d",
2544 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2545 return -EPROTO;
2546 }
2547
2548 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2549 error_report("Received bad msg size.");
2550 return -EPROTO;
2551 }
2552
2553 if (!msg.payload.inflight.mmap_size) {
2554 return 0;
2555 }
2556
2557 fd = qemu_chr_fe_get_msgfd(chr);
2558 if (fd < 0) {
2559 error_report("Failed to get mem fd");
2560 return -EIO;
2561 }
2562
2563 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2564 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2565
2566 if (addr == MAP_FAILED) {
2567 error_report("Failed to mmap mem fd");
2568 close(fd);
2569 return -EFAULT;
2570 }
2571
2572 inflight->addr = addr;
2573 inflight->fd = fd;
2574 inflight->size = msg.payload.inflight.mmap_size;
2575 inflight->offset = msg.payload.inflight.mmap_offset;
2576 inflight->queue_size = queue_size;
2577
2578 return 0;
2579 }
2580
2581 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2582 struct vhost_inflight *inflight)
2583 {
2584 VhostUserMsg msg = {
2585 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2586 .hdr.flags = VHOST_USER_VERSION,
2587 .payload.inflight.mmap_size = inflight->size,
2588 .payload.inflight.mmap_offset = inflight->offset,
2589 .payload.inflight.num_queues = dev->nvqs,
2590 .payload.inflight.queue_size = inflight->queue_size,
2591 .hdr.size = sizeof(msg.payload.inflight),
2592 };
2593
2594 if (!virtio_has_feature(dev->protocol_features,
2595 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2596 return 0;
2597 }
2598
2599 return vhost_user_write(dev, &msg, &inflight->fd, 1);
2600 }
2601
2602 static void vhost_user_state_destroy(gpointer data)
2603 {
2604 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
2605 if (n) {
2606 vhost_user_host_notifier_remove(n, NULL);
2607 object_unparent(OBJECT(&n->mr));
2608 /*
2609 * We can't free until vhost_user_host_notifier_remove has
2610 * done it's thing so schedule the free with RCU.
2611 */
2612 g_free_rcu(n, rcu);
2613 }
2614 }
2615
2616 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2617 {
2618 if (user->chr) {
2619 error_setg(errp, "Cannot initialize vhost-user state");
2620 return false;
2621 }
2622 user->chr = chr;
2623 user->memory_slots = 0;
2624 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
2625 &vhost_user_state_destroy);
2626 return true;
2627 }
2628
2629 void vhost_user_cleanup(VhostUserState *user)
2630 {
2631 if (!user->chr) {
2632 return;
2633 }
2634 memory_region_transaction_begin();
2635 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
2636 memory_region_transaction_commit();
2637 user->chr = NULL;
2638 }
2639
2640
2641 typedef struct {
2642 vu_async_close_fn cb;
2643 DeviceState *dev;
2644 CharBackend *cd;
2645 struct vhost_dev *vhost;
2646 } VhostAsyncCallback;
2647
2648 static void vhost_user_async_close_bh(void *opaque)
2649 {
2650 VhostAsyncCallback *data = opaque;
2651 struct vhost_dev *vhost = data->vhost;
2652
2653 /*
2654 * If the vhost_dev has been cleared in the meantime there is
2655 * nothing left to do as some other path has completed the
2656 * cleanup.
2657 */
2658 if (vhost->vdev) {
2659 data->cb(data->dev);
2660 }
2661
2662 g_free(data);
2663 }
2664
2665 /*
2666 * We only schedule the work if the machine is running. If suspended
2667 * we want to keep all the in-flight data as is for migration
2668 * purposes.
2669 */
2670 void vhost_user_async_close(DeviceState *d,
2671 CharBackend *chardev, struct vhost_dev *vhost,
2672 vu_async_close_fn cb)
2673 {
2674 if (!runstate_check(RUN_STATE_SHUTDOWN)) {
2675 /*
2676 * A close event may happen during a read/write, but vhost
2677 * code assumes the vhost_dev remains setup, so delay the
2678 * stop & clear.
2679 */
2680 AioContext *ctx = qemu_get_current_aio_context();
2681 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
2682
2683 /* Save data for the callback */
2684 data->cb = cb;
2685 data->dev = d;
2686 data->cd = chardev;
2687 data->vhost = vhost;
2688
2689 /* Disable any further notifications on the chardev */
2690 qemu_chr_fe_set_handlers(chardev,
2691 NULL, NULL, NULL, NULL, NULL, NULL,
2692 false);
2693
2694 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
2695
2696 /*
2697 * Move vhost device to the stopped state. The vhost-user device
2698 * will be clean up and disconnected in BH. This can be useful in
2699 * the vhost migration code. If disconnect was caught there is an
2700 * option for the general vhost code to get the dev state without
2701 * knowing its type (in this case vhost-user).
2702 *
2703 * Note if the vhost device is fully cleared by the time we
2704 * execute the bottom half we won't continue with the cleanup.
2705 */
2706 vhost->started = false;
2707 }
2708 }
2709
2710 static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
2711 {
2712 if (!virtio_has_feature(dev->protocol_features,
2713 VHOST_USER_PROTOCOL_F_STATUS)) {
2714 return 0;
2715 }
2716
2717 /* Set device status only for last queue pair */
2718 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2719 return 0;
2720 }
2721
2722 if (started) {
2723 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
2724 VIRTIO_CONFIG_S_DRIVER |
2725 VIRTIO_CONFIG_S_DRIVER_OK);
2726 } else {
2727 return 0;
2728 }
2729 }
2730
2731 static void vhost_user_reset_status(struct vhost_dev *dev)
2732 {
2733 /* Set device status only for last queue pair */
2734 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2735 return;
2736 }
2737
2738 if (virtio_has_feature(dev->protocol_features,
2739 VHOST_USER_PROTOCOL_F_STATUS)) {
2740 vhost_user_set_status(dev, 0);
2741 }
2742 }
2743
2744 const VhostOps user_ops = {
2745 .backend_type = VHOST_BACKEND_TYPE_USER,
2746 .vhost_backend_init = vhost_user_backend_init,
2747 .vhost_backend_cleanup = vhost_user_backend_cleanup,
2748 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2749 .vhost_set_log_base = vhost_user_set_log_base,
2750 .vhost_set_mem_table = vhost_user_set_mem_table,
2751 .vhost_set_vring_addr = vhost_user_set_vring_addr,
2752 .vhost_set_vring_endian = vhost_user_set_vring_endian,
2753 .vhost_set_vring_num = vhost_user_set_vring_num,
2754 .vhost_set_vring_base = vhost_user_set_vring_base,
2755 .vhost_get_vring_base = vhost_user_get_vring_base,
2756 .vhost_set_vring_kick = vhost_user_set_vring_kick,
2757 .vhost_set_vring_call = vhost_user_set_vring_call,
2758 .vhost_set_vring_err = vhost_user_set_vring_err,
2759 .vhost_set_features = vhost_user_set_features,
2760 .vhost_get_features = vhost_user_get_features,
2761 .vhost_set_owner = vhost_user_set_owner,
2762 .vhost_reset_device = vhost_user_reset_device,
2763 .vhost_get_vq_index = vhost_user_get_vq_index,
2764 .vhost_set_vring_enable = vhost_user_set_vring_enable,
2765 .vhost_requires_shm_log = vhost_user_requires_shm_log,
2766 .vhost_migration_done = vhost_user_migration_done,
2767 .vhost_backend_can_merge = vhost_user_can_merge,
2768 .vhost_net_set_mtu = vhost_user_net_set_mtu,
2769 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2770 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2771 .vhost_get_config = vhost_user_get_config,
2772 .vhost_set_config = vhost_user_set_config,
2773 .vhost_crypto_create_session = vhost_user_crypto_create_session,
2774 .vhost_crypto_close_session = vhost_user_crypto_close_session,
2775 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2776 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2777 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2778 .vhost_dev_start = vhost_user_dev_start,
2779 .vhost_reset_status = vhost_user_reset_status,
2780 };