]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/vhost-user.c
hostmem-file: add offset option
[mirror_qemu.git] / hw / virtio / vhost-user.c
1 /*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "io/channel-socket.h"
20 #include "sysemu/kvm.h"
21 #include "qemu/error-report.h"
22 #include "qemu/main-loop.h"
23 #include "qemu/sockets.h"
24 #include "sysemu/runstate.h"
25 #include "sysemu/cryptodev.h"
26 #include "migration/migration.h"
27 #include "migration/postcopy-ram.h"
28 #include "trace.h"
29 #include "exec/ramblock.h"
30
31 #include <sys/ioctl.h>
32 #include <sys/socket.h>
33 #include <sys/un.h>
34
35 #include "standard-headers/linux/vhost_types.h"
36
37 #ifdef CONFIG_LINUX
38 #include <linux/userfaultfd.h>
39 #endif
40
41 #define VHOST_MEMORY_BASELINE_NREGIONS 8
42 #define VHOST_USER_F_PROTOCOL_FEATURES 30
43 #define VHOST_USER_BACKEND_MAX_FDS 8
44
45 #if defined(TARGET_PPC) || defined(TARGET_PPC64)
46 #include "hw/ppc/spapr.h"
47 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
48
49 #else
50 #define VHOST_USER_MAX_RAM_SLOTS 512
51 #endif
52
53 /*
54 * Maximum size of virtio device config space
55 */
56 #define VHOST_USER_MAX_CONFIG_SIZE 256
57
58 enum VhostUserProtocolFeature {
59 VHOST_USER_PROTOCOL_F_MQ = 0,
60 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
61 VHOST_USER_PROTOCOL_F_RARP = 2,
62 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
63 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
64 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
65 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
66 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
67 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
68 VHOST_USER_PROTOCOL_F_CONFIG = 9,
69 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
70 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
71 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
72 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
73 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
74 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
75 VHOST_USER_PROTOCOL_F_STATUS = 16,
76 VHOST_USER_PROTOCOL_F_MAX
77 };
78
79 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
80
81 typedef enum VhostUserRequest {
82 VHOST_USER_NONE = 0,
83 VHOST_USER_GET_FEATURES = 1,
84 VHOST_USER_SET_FEATURES = 2,
85 VHOST_USER_SET_OWNER = 3,
86 VHOST_USER_RESET_OWNER = 4,
87 VHOST_USER_SET_MEM_TABLE = 5,
88 VHOST_USER_SET_LOG_BASE = 6,
89 VHOST_USER_SET_LOG_FD = 7,
90 VHOST_USER_SET_VRING_NUM = 8,
91 VHOST_USER_SET_VRING_ADDR = 9,
92 VHOST_USER_SET_VRING_BASE = 10,
93 VHOST_USER_GET_VRING_BASE = 11,
94 VHOST_USER_SET_VRING_KICK = 12,
95 VHOST_USER_SET_VRING_CALL = 13,
96 VHOST_USER_SET_VRING_ERR = 14,
97 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
98 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
99 VHOST_USER_GET_QUEUE_NUM = 17,
100 VHOST_USER_SET_VRING_ENABLE = 18,
101 VHOST_USER_SEND_RARP = 19,
102 VHOST_USER_NET_SET_MTU = 20,
103 VHOST_USER_SET_BACKEND_REQ_FD = 21,
104 VHOST_USER_IOTLB_MSG = 22,
105 VHOST_USER_SET_VRING_ENDIAN = 23,
106 VHOST_USER_GET_CONFIG = 24,
107 VHOST_USER_SET_CONFIG = 25,
108 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
109 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
110 VHOST_USER_POSTCOPY_ADVISE = 28,
111 VHOST_USER_POSTCOPY_LISTEN = 29,
112 VHOST_USER_POSTCOPY_END = 30,
113 VHOST_USER_GET_INFLIGHT_FD = 31,
114 VHOST_USER_SET_INFLIGHT_FD = 32,
115 VHOST_USER_GPU_SET_SOCKET = 33,
116 VHOST_USER_RESET_DEVICE = 34,
117 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
118 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
119 VHOST_USER_ADD_MEM_REG = 37,
120 VHOST_USER_REM_MEM_REG = 38,
121 VHOST_USER_SET_STATUS = 39,
122 VHOST_USER_GET_STATUS = 40,
123 VHOST_USER_MAX
124 } VhostUserRequest;
125
126 typedef enum VhostUserSlaveRequest {
127 VHOST_USER_BACKEND_NONE = 0,
128 VHOST_USER_BACKEND_IOTLB_MSG = 1,
129 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
130 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
131 VHOST_USER_BACKEND_MAX
132 } VhostUserSlaveRequest;
133
134 typedef struct VhostUserMemoryRegion {
135 uint64_t guest_phys_addr;
136 uint64_t memory_size;
137 uint64_t userspace_addr;
138 uint64_t mmap_offset;
139 } VhostUserMemoryRegion;
140
141 typedef struct VhostUserMemory {
142 uint32_t nregions;
143 uint32_t padding;
144 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
145 } VhostUserMemory;
146
147 typedef struct VhostUserMemRegMsg {
148 uint64_t padding;
149 VhostUserMemoryRegion region;
150 } VhostUserMemRegMsg;
151
152 typedef struct VhostUserLog {
153 uint64_t mmap_size;
154 uint64_t mmap_offset;
155 } VhostUserLog;
156
157 typedef struct VhostUserConfig {
158 uint32_t offset;
159 uint32_t size;
160 uint32_t flags;
161 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
162 } VhostUserConfig;
163
164 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
165 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
166
167 typedef struct VhostUserCryptoSession {
168 /* session id for success, -1 on errors */
169 int64_t session_id;
170 CryptoDevBackendSymSessionInfo session_setup_data;
171 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
172 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
173 } VhostUserCryptoSession;
174
175 static VhostUserConfig c __attribute__ ((unused));
176 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
177 + sizeof(c.size) \
178 + sizeof(c.flags))
179
180 typedef struct VhostUserVringArea {
181 uint64_t u64;
182 uint64_t size;
183 uint64_t offset;
184 } VhostUserVringArea;
185
186 typedef struct VhostUserInflight {
187 uint64_t mmap_size;
188 uint64_t mmap_offset;
189 uint16_t num_queues;
190 uint16_t queue_size;
191 } VhostUserInflight;
192
193 typedef struct {
194 VhostUserRequest request;
195
196 #define VHOST_USER_VERSION_MASK (0x3)
197 #define VHOST_USER_REPLY_MASK (0x1 << 2)
198 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
199 uint32_t flags;
200 uint32_t size; /* the following payload size */
201 } QEMU_PACKED VhostUserHeader;
202
203 typedef union {
204 #define VHOST_USER_VRING_IDX_MASK (0xff)
205 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
206 uint64_t u64;
207 struct vhost_vring_state state;
208 struct vhost_vring_addr addr;
209 VhostUserMemory memory;
210 VhostUserMemRegMsg mem_reg;
211 VhostUserLog log;
212 struct vhost_iotlb_msg iotlb;
213 VhostUserConfig config;
214 VhostUserCryptoSession session;
215 VhostUserVringArea area;
216 VhostUserInflight inflight;
217 } VhostUserPayload;
218
219 typedef struct VhostUserMsg {
220 VhostUserHeader hdr;
221 VhostUserPayload payload;
222 } QEMU_PACKED VhostUserMsg;
223
224 static VhostUserMsg m __attribute__ ((unused));
225 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
226
227 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
228
229 /* The version of the protocol we support */
230 #define VHOST_USER_VERSION (0x1)
231
232 struct vhost_user {
233 struct vhost_dev *dev;
234 /* Shared between vhost devs of the same virtio device */
235 VhostUserState *user;
236 QIOChannel *slave_ioc;
237 GSource *slave_src;
238 NotifierWithReturn postcopy_notifier;
239 struct PostCopyFD postcopy_fd;
240 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
241 /* Length of the region_rb and region_rb_offset arrays */
242 size_t region_rb_len;
243 /* RAMBlock associated with a given region */
244 RAMBlock **region_rb;
245 /*
246 * The offset from the start of the RAMBlock to the start of the
247 * vhost region.
248 */
249 ram_addr_t *region_rb_offset;
250
251 /* True once we've entered postcopy_listen */
252 bool postcopy_listen;
253
254 /* Our current regions */
255 int num_shadow_regions;
256 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
257 };
258
259 struct scrub_regions {
260 struct vhost_memory_region *region;
261 int reg_idx;
262 int fd_idx;
263 };
264
265 static bool ioeventfd_enabled(void)
266 {
267 return !kvm_enabled() || kvm_eventfds_enabled();
268 }
269
270 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
271 {
272 struct vhost_user *u = dev->opaque;
273 CharBackend *chr = u->user->chr;
274 uint8_t *p = (uint8_t *) msg;
275 int r, size = VHOST_USER_HDR_SIZE;
276
277 r = qemu_chr_fe_read_all(chr, p, size);
278 if (r != size) {
279 int saved_errno = errno;
280 error_report("Failed to read msg header. Read %d instead of %d."
281 " Original request %d.", r, size, msg->hdr.request);
282 return r < 0 ? -saved_errno : -EIO;
283 }
284
285 /* validate received flags */
286 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
287 error_report("Failed to read msg header."
288 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
289 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
290 return -EPROTO;
291 }
292
293 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
294
295 return 0;
296 }
297
298 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
299 {
300 struct vhost_user *u = dev->opaque;
301 CharBackend *chr = u->user->chr;
302 uint8_t *p = (uint8_t *) msg;
303 int r, size;
304
305 r = vhost_user_read_header(dev, msg);
306 if (r < 0) {
307 return r;
308 }
309
310 /* validate message size is sane */
311 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
312 error_report("Failed to read msg header."
313 " Size %d exceeds the maximum %zu.", msg->hdr.size,
314 VHOST_USER_PAYLOAD_SIZE);
315 return -EPROTO;
316 }
317
318 if (msg->hdr.size) {
319 p += VHOST_USER_HDR_SIZE;
320 size = msg->hdr.size;
321 r = qemu_chr_fe_read_all(chr, p, size);
322 if (r != size) {
323 int saved_errno = errno;
324 error_report("Failed to read msg payload."
325 " Read %d instead of %d.", r, msg->hdr.size);
326 return r < 0 ? -saved_errno : -EIO;
327 }
328 }
329
330 return 0;
331 }
332
333 static int process_message_reply(struct vhost_dev *dev,
334 const VhostUserMsg *msg)
335 {
336 int ret;
337 VhostUserMsg msg_reply;
338
339 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
340 return 0;
341 }
342
343 ret = vhost_user_read(dev, &msg_reply);
344 if (ret < 0) {
345 return ret;
346 }
347
348 if (msg_reply.hdr.request != msg->hdr.request) {
349 error_report("Received unexpected msg type. "
350 "Expected %d received %d",
351 msg->hdr.request, msg_reply.hdr.request);
352 return -EPROTO;
353 }
354
355 return msg_reply.payload.u64 ? -EIO : 0;
356 }
357
358 static bool vhost_user_one_time_request(VhostUserRequest request)
359 {
360 switch (request) {
361 case VHOST_USER_SET_OWNER:
362 case VHOST_USER_RESET_OWNER:
363 case VHOST_USER_SET_MEM_TABLE:
364 case VHOST_USER_GET_QUEUE_NUM:
365 case VHOST_USER_NET_SET_MTU:
366 case VHOST_USER_ADD_MEM_REG:
367 case VHOST_USER_REM_MEM_REG:
368 return true;
369 default:
370 return false;
371 }
372 }
373
374 /* most non-init callers ignore the error */
375 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
376 int *fds, int fd_num)
377 {
378 struct vhost_user *u = dev->opaque;
379 CharBackend *chr = u->user->chr;
380 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
381
382 /*
383 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
384 * we just need send it once in the first time. For later such
385 * request, we just ignore it.
386 */
387 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
388 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
389 return 0;
390 }
391
392 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
393 error_report("Failed to set msg fds.");
394 return -EINVAL;
395 }
396
397 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
398 if (ret != size) {
399 int saved_errno = errno;
400 error_report("Failed to write msg."
401 " Wrote %d instead of %d.", ret, size);
402 return ret < 0 ? -saved_errno : -EIO;
403 }
404
405 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
406
407 return 0;
408 }
409
410 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
411 {
412 VhostUserMsg msg = {
413 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
414 .hdr.flags = VHOST_USER_VERSION,
415 };
416
417 return vhost_user_write(dev, &msg, &fd, 1);
418 }
419
420 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
421 struct vhost_log *log)
422 {
423 int fds[VHOST_USER_MAX_RAM_SLOTS];
424 size_t fd_num = 0;
425 bool shmfd = virtio_has_feature(dev->protocol_features,
426 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
427 int ret;
428 VhostUserMsg msg = {
429 .hdr.request = VHOST_USER_SET_LOG_BASE,
430 .hdr.flags = VHOST_USER_VERSION,
431 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
432 .payload.log.mmap_offset = 0,
433 .hdr.size = sizeof(msg.payload.log),
434 };
435
436 /* Send only once with first queue pair */
437 if (dev->vq_index != 0) {
438 return 0;
439 }
440
441 if (shmfd && log->fd != -1) {
442 fds[fd_num++] = log->fd;
443 }
444
445 ret = vhost_user_write(dev, &msg, fds, fd_num);
446 if (ret < 0) {
447 return ret;
448 }
449
450 if (shmfd) {
451 msg.hdr.size = 0;
452 ret = vhost_user_read(dev, &msg);
453 if (ret < 0) {
454 return ret;
455 }
456
457 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
458 error_report("Received unexpected msg type. "
459 "Expected %d received %d",
460 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
461 return -EPROTO;
462 }
463 }
464
465 return 0;
466 }
467
468 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
469 int *fd)
470 {
471 MemoryRegion *mr;
472
473 assert((uintptr_t)addr == addr);
474 mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
475 *fd = memory_region_get_fd(mr);
476 *offset += mr->ram_block->fd_offset;
477
478 return mr;
479 }
480
481 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
482 struct vhost_memory_region *src,
483 uint64_t mmap_offset)
484 {
485 assert(src != NULL && dst != NULL);
486 dst->userspace_addr = src->userspace_addr;
487 dst->memory_size = src->memory_size;
488 dst->guest_phys_addr = src->guest_phys_addr;
489 dst->mmap_offset = mmap_offset;
490 }
491
492 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
493 struct vhost_dev *dev,
494 VhostUserMsg *msg,
495 int *fds, size_t *fd_num,
496 bool track_ramblocks)
497 {
498 int i, fd;
499 ram_addr_t offset;
500 MemoryRegion *mr;
501 struct vhost_memory_region *reg;
502 VhostUserMemoryRegion region_buffer;
503
504 msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
505
506 for (i = 0; i < dev->mem->nregions; ++i) {
507 reg = dev->mem->regions + i;
508
509 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
510 if (fd > 0) {
511 if (track_ramblocks) {
512 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
513 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
514 reg->memory_size,
515 reg->guest_phys_addr,
516 reg->userspace_addr,
517 offset);
518 u->region_rb_offset[i] = offset;
519 u->region_rb[i] = mr->ram_block;
520 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
521 error_report("Failed preparing vhost-user memory table msg");
522 return -ENOBUFS;
523 }
524 vhost_user_fill_msg_region(&region_buffer, reg, offset);
525 msg->payload.memory.regions[*fd_num] = region_buffer;
526 fds[(*fd_num)++] = fd;
527 } else if (track_ramblocks) {
528 u->region_rb_offset[i] = 0;
529 u->region_rb[i] = NULL;
530 }
531 }
532
533 msg->payload.memory.nregions = *fd_num;
534
535 if (!*fd_num) {
536 error_report("Failed initializing vhost-user memory map, "
537 "consider using -object memory-backend-file share=on");
538 return -EINVAL;
539 }
540
541 msg->hdr.size = sizeof(msg->payload.memory.nregions);
542 msg->hdr.size += sizeof(msg->payload.memory.padding);
543 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
544
545 return 0;
546 }
547
548 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
549 struct vhost_memory_region *vdev_reg)
550 {
551 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
552 shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
553 shadow_reg->memory_size == vdev_reg->memory_size;
554 }
555
556 static void scrub_shadow_regions(struct vhost_dev *dev,
557 struct scrub_regions *add_reg,
558 int *nr_add_reg,
559 struct scrub_regions *rem_reg,
560 int *nr_rem_reg, uint64_t *shadow_pcb,
561 bool track_ramblocks)
562 {
563 struct vhost_user *u = dev->opaque;
564 bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
565 struct vhost_memory_region *reg, *shadow_reg;
566 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
567 ram_addr_t offset;
568 MemoryRegion *mr;
569 bool matching;
570
571 /*
572 * Find memory regions present in our shadow state which are not in
573 * the device's current memory state.
574 *
575 * Mark regions in both the shadow and device state as "found".
576 */
577 for (i = 0; i < u->num_shadow_regions; i++) {
578 shadow_reg = &u->shadow_regions[i];
579 matching = false;
580
581 for (j = 0; j < dev->mem->nregions; j++) {
582 reg = &dev->mem->regions[j];
583
584 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
585
586 if (reg_equal(shadow_reg, reg)) {
587 matching = true;
588 found[j] = true;
589 if (track_ramblocks) {
590 /*
591 * Reset postcopy client bases, region_rb, and
592 * region_rb_offset in case regions are removed.
593 */
594 if (fd > 0) {
595 u->region_rb_offset[j] = offset;
596 u->region_rb[j] = mr->ram_block;
597 shadow_pcb[j] = u->postcopy_client_bases[i];
598 } else {
599 u->region_rb_offset[j] = 0;
600 u->region_rb[j] = NULL;
601 }
602 }
603 break;
604 }
605 }
606
607 /*
608 * If the region was not found in the current device memory state
609 * create an entry for it in the removed list.
610 */
611 if (!matching) {
612 rem_reg[rm_idx].region = shadow_reg;
613 rem_reg[rm_idx++].reg_idx = i;
614 }
615 }
616
617 /*
618 * For regions not marked "found", create entries in the added list.
619 *
620 * Note their indexes in the device memory state and the indexes of their
621 * file descriptors.
622 */
623 for (i = 0; i < dev->mem->nregions; i++) {
624 reg = &dev->mem->regions[i];
625 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
626 if (fd > 0) {
627 ++fd_num;
628 }
629
630 /*
631 * If the region was in both the shadow and device state we don't
632 * need to send a VHOST_USER_ADD_MEM_REG message for it.
633 */
634 if (found[i]) {
635 continue;
636 }
637
638 add_reg[add_idx].region = reg;
639 add_reg[add_idx].reg_idx = i;
640 add_reg[add_idx++].fd_idx = fd_num;
641 }
642 *nr_rem_reg = rm_idx;
643 *nr_add_reg = add_idx;
644
645 return;
646 }
647
648 static int send_remove_regions(struct vhost_dev *dev,
649 struct scrub_regions *remove_reg,
650 int nr_rem_reg, VhostUserMsg *msg,
651 bool reply_supported)
652 {
653 struct vhost_user *u = dev->opaque;
654 struct vhost_memory_region *shadow_reg;
655 int i, fd, shadow_reg_idx, ret;
656 ram_addr_t offset;
657 VhostUserMemoryRegion region_buffer;
658
659 /*
660 * The regions in remove_reg appear in the same order they do in the
661 * shadow table. Therefore we can minimize memory copies by iterating
662 * through remove_reg backwards.
663 */
664 for (i = nr_rem_reg - 1; i >= 0; i--) {
665 shadow_reg = remove_reg[i].region;
666 shadow_reg_idx = remove_reg[i].reg_idx;
667
668 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
669
670 if (fd > 0) {
671 msg->hdr.request = VHOST_USER_REM_MEM_REG;
672 vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
673 msg->payload.mem_reg.region = region_buffer;
674
675 ret = vhost_user_write(dev, msg, NULL, 0);
676 if (ret < 0) {
677 return ret;
678 }
679
680 if (reply_supported) {
681 ret = process_message_reply(dev, msg);
682 if (ret) {
683 return ret;
684 }
685 }
686 }
687
688 /*
689 * At this point we know the backend has unmapped the region. It is now
690 * safe to remove it from the shadow table.
691 */
692 memmove(&u->shadow_regions[shadow_reg_idx],
693 &u->shadow_regions[shadow_reg_idx + 1],
694 sizeof(struct vhost_memory_region) *
695 (u->num_shadow_regions - shadow_reg_idx - 1));
696 u->num_shadow_regions--;
697 }
698
699 return 0;
700 }
701
702 static int send_add_regions(struct vhost_dev *dev,
703 struct scrub_regions *add_reg, int nr_add_reg,
704 VhostUserMsg *msg, uint64_t *shadow_pcb,
705 bool reply_supported, bool track_ramblocks)
706 {
707 struct vhost_user *u = dev->opaque;
708 int i, fd, ret, reg_idx, reg_fd_idx;
709 struct vhost_memory_region *reg;
710 MemoryRegion *mr;
711 ram_addr_t offset;
712 VhostUserMsg msg_reply;
713 VhostUserMemoryRegion region_buffer;
714
715 for (i = 0; i < nr_add_reg; i++) {
716 reg = add_reg[i].region;
717 reg_idx = add_reg[i].reg_idx;
718 reg_fd_idx = add_reg[i].fd_idx;
719
720 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
721
722 if (fd > 0) {
723 if (track_ramblocks) {
724 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
725 reg->memory_size,
726 reg->guest_phys_addr,
727 reg->userspace_addr,
728 offset);
729 u->region_rb_offset[reg_idx] = offset;
730 u->region_rb[reg_idx] = mr->ram_block;
731 }
732 msg->hdr.request = VHOST_USER_ADD_MEM_REG;
733 vhost_user_fill_msg_region(&region_buffer, reg, offset);
734 msg->payload.mem_reg.region = region_buffer;
735
736 ret = vhost_user_write(dev, msg, &fd, 1);
737 if (ret < 0) {
738 return ret;
739 }
740
741 if (track_ramblocks) {
742 uint64_t reply_gpa;
743
744 ret = vhost_user_read(dev, &msg_reply);
745 if (ret < 0) {
746 return ret;
747 }
748
749 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
750
751 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
752 error_report("%s: Received unexpected msg type."
753 "Expected %d received %d", __func__,
754 VHOST_USER_ADD_MEM_REG,
755 msg_reply.hdr.request);
756 return -EPROTO;
757 }
758
759 /*
760 * We're using the same structure, just reusing one of the
761 * fields, so it should be the same size.
762 */
763 if (msg_reply.hdr.size != msg->hdr.size) {
764 error_report("%s: Unexpected size for postcopy reply "
765 "%d vs %d", __func__, msg_reply.hdr.size,
766 msg->hdr.size);
767 return -EPROTO;
768 }
769
770 /* Get the postcopy client base from the backend's reply. */
771 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
772 shadow_pcb[reg_idx] =
773 msg_reply.payload.mem_reg.region.userspace_addr;
774 trace_vhost_user_set_mem_table_postcopy(
775 msg_reply.payload.mem_reg.region.userspace_addr,
776 msg->payload.mem_reg.region.userspace_addr,
777 reg_fd_idx, reg_idx);
778 } else {
779 error_report("%s: invalid postcopy reply for region. "
780 "Got guest physical address %" PRIX64 ", expected "
781 "%" PRIX64, __func__, reply_gpa,
782 dev->mem->regions[reg_idx].guest_phys_addr);
783 return -EPROTO;
784 }
785 } else if (reply_supported) {
786 ret = process_message_reply(dev, msg);
787 if (ret) {
788 return ret;
789 }
790 }
791 } else if (track_ramblocks) {
792 u->region_rb_offset[reg_idx] = 0;
793 u->region_rb[reg_idx] = NULL;
794 }
795
796 /*
797 * At this point, we know the backend has mapped in the new
798 * region, if the region has a valid file descriptor.
799 *
800 * The region should now be added to the shadow table.
801 */
802 u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
803 reg->guest_phys_addr;
804 u->shadow_regions[u->num_shadow_regions].userspace_addr =
805 reg->userspace_addr;
806 u->shadow_regions[u->num_shadow_regions].memory_size =
807 reg->memory_size;
808 u->num_shadow_regions++;
809 }
810
811 return 0;
812 }
813
814 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
815 VhostUserMsg *msg,
816 bool reply_supported,
817 bool track_ramblocks)
818 {
819 struct vhost_user *u = dev->opaque;
820 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
821 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
822 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
823 int nr_add_reg, nr_rem_reg;
824 int ret;
825
826 msg->hdr.size = sizeof(msg->payload.mem_reg);
827
828 /* Find the regions which need to be removed or added. */
829 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
830 shadow_pcb, track_ramblocks);
831
832 if (nr_rem_reg) {
833 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
834 reply_supported);
835 if (ret < 0) {
836 goto err;
837 }
838 }
839
840 if (nr_add_reg) {
841 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
842 reply_supported, track_ramblocks);
843 if (ret < 0) {
844 goto err;
845 }
846 }
847
848 if (track_ramblocks) {
849 memcpy(u->postcopy_client_bases, shadow_pcb,
850 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
851 /*
852 * Now we've registered this with the postcopy code, we ack to the
853 * client, because now we're in the position to be able to deal with
854 * any faults it generates.
855 */
856 /* TODO: Use this for failure cases as well with a bad value. */
857 msg->hdr.size = sizeof(msg->payload.u64);
858 msg->payload.u64 = 0; /* OK */
859
860 ret = vhost_user_write(dev, msg, NULL, 0);
861 if (ret < 0) {
862 return ret;
863 }
864 }
865
866 return 0;
867
868 err:
869 if (track_ramblocks) {
870 memcpy(u->postcopy_client_bases, shadow_pcb,
871 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
872 }
873
874 return ret;
875 }
876
877 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
878 struct vhost_memory *mem,
879 bool reply_supported,
880 bool config_mem_slots)
881 {
882 struct vhost_user *u = dev->opaque;
883 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
884 size_t fd_num = 0;
885 VhostUserMsg msg_reply;
886 int region_i, msg_i;
887 int ret;
888
889 VhostUserMsg msg = {
890 .hdr.flags = VHOST_USER_VERSION,
891 };
892
893 if (u->region_rb_len < dev->mem->nregions) {
894 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
895 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
896 dev->mem->nregions);
897 memset(&(u->region_rb[u->region_rb_len]), '\0',
898 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
899 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
900 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
901 u->region_rb_len = dev->mem->nregions;
902 }
903
904 if (config_mem_slots) {
905 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
906 if (ret < 0) {
907 return ret;
908 }
909 } else {
910 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
911 true);
912 if (ret < 0) {
913 return ret;
914 }
915
916 ret = vhost_user_write(dev, &msg, fds, fd_num);
917 if (ret < 0) {
918 return ret;
919 }
920
921 ret = vhost_user_read(dev, &msg_reply);
922 if (ret < 0) {
923 return ret;
924 }
925
926 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
927 error_report("%s: Received unexpected msg type."
928 "Expected %d received %d", __func__,
929 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
930 return -EPROTO;
931 }
932
933 /*
934 * We're using the same structure, just reusing one of the
935 * fields, so it should be the same size.
936 */
937 if (msg_reply.hdr.size != msg.hdr.size) {
938 error_report("%s: Unexpected size for postcopy reply "
939 "%d vs %d", __func__, msg_reply.hdr.size,
940 msg.hdr.size);
941 return -EPROTO;
942 }
943
944 memset(u->postcopy_client_bases, 0,
945 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
946
947 /*
948 * They're in the same order as the regions that were sent
949 * but some of the regions were skipped (above) if they
950 * didn't have fd's
951 */
952 for (msg_i = 0, region_i = 0;
953 region_i < dev->mem->nregions;
954 region_i++) {
955 if (msg_i < fd_num &&
956 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
957 dev->mem->regions[region_i].guest_phys_addr) {
958 u->postcopy_client_bases[region_i] =
959 msg_reply.payload.memory.regions[msg_i].userspace_addr;
960 trace_vhost_user_set_mem_table_postcopy(
961 msg_reply.payload.memory.regions[msg_i].userspace_addr,
962 msg.payload.memory.regions[msg_i].userspace_addr,
963 msg_i, region_i);
964 msg_i++;
965 }
966 }
967 if (msg_i != fd_num) {
968 error_report("%s: postcopy reply not fully consumed "
969 "%d vs %zd",
970 __func__, msg_i, fd_num);
971 return -EIO;
972 }
973
974 /*
975 * Now we've registered this with the postcopy code, we ack to the
976 * client, because now we're in the position to be able to deal
977 * with any faults it generates.
978 */
979 /* TODO: Use this for failure cases as well with a bad value. */
980 msg.hdr.size = sizeof(msg.payload.u64);
981 msg.payload.u64 = 0; /* OK */
982 ret = vhost_user_write(dev, &msg, NULL, 0);
983 if (ret < 0) {
984 return ret;
985 }
986 }
987
988 return 0;
989 }
990
991 static int vhost_user_set_mem_table(struct vhost_dev *dev,
992 struct vhost_memory *mem)
993 {
994 struct vhost_user *u = dev->opaque;
995 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
996 size_t fd_num = 0;
997 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
998 bool reply_supported = virtio_has_feature(dev->protocol_features,
999 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1000 bool config_mem_slots =
1001 virtio_has_feature(dev->protocol_features,
1002 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
1003 int ret;
1004
1005 if (do_postcopy) {
1006 /*
1007 * Postcopy has enough differences that it's best done in it's own
1008 * version
1009 */
1010 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1011 config_mem_slots);
1012 }
1013
1014 VhostUserMsg msg = {
1015 .hdr.flags = VHOST_USER_VERSION,
1016 };
1017
1018 if (reply_supported) {
1019 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1020 }
1021
1022 if (config_mem_slots) {
1023 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1024 if (ret < 0) {
1025 return ret;
1026 }
1027 } else {
1028 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1029 false);
1030 if (ret < 0) {
1031 return ret;
1032 }
1033
1034 ret = vhost_user_write(dev, &msg, fds, fd_num);
1035 if (ret < 0) {
1036 return ret;
1037 }
1038
1039 if (reply_supported) {
1040 return process_message_reply(dev, &msg);
1041 }
1042 }
1043
1044 return 0;
1045 }
1046
1047 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1048 struct vhost_vring_state *ring)
1049 {
1050 bool cross_endian = virtio_has_feature(dev->protocol_features,
1051 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1052 VhostUserMsg msg = {
1053 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1054 .hdr.flags = VHOST_USER_VERSION,
1055 .payload.state = *ring,
1056 .hdr.size = sizeof(msg.payload.state),
1057 };
1058
1059 if (!cross_endian) {
1060 error_report("vhost-user trying to send unhandled ioctl");
1061 return -ENOTSUP;
1062 }
1063
1064 return vhost_user_write(dev, &msg, NULL, 0);
1065 }
1066
1067 static int vhost_set_vring(struct vhost_dev *dev,
1068 unsigned long int request,
1069 struct vhost_vring_state *ring)
1070 {
1071 VhostUserMsg msg = {
1072 .hdr.request = request,
1073 .hdr.flags = VHOST_USER_VERSION,
1074 .payload.state = *ring,
1075 .hdr.size = sizeof(msg.payload.state),
1076 };
1077
1078 return vhost_user_write(dev, &msg, NULL, 0);
1079 }
1080
1081 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1082 struct vhost_vring_state *ring)
1083 {
1084 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1085 }
1086
1087 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
1088 {
1089 assert(n && n->unmap_addr);
1090 munmap(n->unmap_addr, qemu_real_host_page_size());
1091 n->unmap_addr = NULL;
1092 }
1093
1094 /*
1095 * clean-up function for notifier, will finally free the structure
1096 * under rcu.
1097 */
1098 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
1099 VirtIODevice *vdev)
1100 {
1101 if (n->addr) {
1102 if (vdev) {
1103 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
1104 }
1105 assert(!n->unmap_addr);
1106 n->unmap_addr = n->addr;
1107 n->addr = NULL;
1108 call_rcu(n, vhost_user_host_notifier_free, rcu);
1109 }
1110 }
1111
1112 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1113 struct vhost_vring_state *ring)
1114 {
1115 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1116 }
1117
1118 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1119 {
1120 int i;
1121
1122 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1123 return -EINVAL;
1124 }
1125
1126 for (i = 0; i < dev->nvqs; ++i) {
1127 int ret;
1128 struct vhost_vring_state state = {
1129 .index = dev->vq_index + i,
1130 .num = enable,
1131 };
1132
1133 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1134 if (ret < 0) {
1135 /*
1136 * Restoring the previous state is likely infeasible, as well as
1137 * proceeding regardless the error, so just bail out and hope for
1138 * the device-level recovery.
1139 */
1140 return ret;
1141 }
1142 }
1143
1144 return 0;
1145 }
1146
1147 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
1148 int idx)
1149 {
1150 if (idx >= u->notifiers->len) {
1151 return NULL;
1152 }
1153 return g_ptr_array_index(u->notifiers, idx);
1154 }
1155
1156 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1157 struct vhost_vring_state *ring)
1158 {
1159 int ret;
1160 VhostUserMsg msg = {
1161 .hdr.request = VHOST_USER_GET_VRING_BASE,
1162 .hdr.flags = VHOST_USER_VERSION,
1163 .payload.state = *ring,
1164 .hdr.size = sizeof(msg.payload.state),
1165 };
1166 struct vhost_user *u = dev->opaque;
1167
1168 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
1169 if (n) {
1170 vhost_user_host_notifier_remove(n, dev->vdev);
1171 }
1172
1173 ret = vhost_user_write(dev, &msg, NULL, 0);
1174 if (ret < 0) {
1175 return ret;
1176 }
1177
1178 ret = vhost_user_read(dev, &msg);
1179 if (ret < 0) {
1180 return ret;
1181 }
1182
1183 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1184 error_report("Received unexpected msg type. Expected %d received %d",
1185 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1186 return -EPROTO;
1187 }
1188
1189 if (msg.hdr.size != sizeof(msg.payload.state)) {
1190 error_report("Received bad msg size.");
1191 return -EPROTO;
1192 }
1193
1194 *ring = msg.payload.state;
1195
1196 return 0;
1197 }
1198
1199 static int vhost_set_vring_file(struct vhost_dev *dev,
1200 VhostUserRequest request,
1201 struct vhost_vring_file *file)
1202 {
1203 int fds[VHOST_USER_MAX_RAM_SLOTS];
1204 size_t fd_num = 0;
1205 VhostUserMsg msg = {
1206 .hdr.request = request,
1207 .hdr.flags = VHOST_USER_VERSION,
1208 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1209 .hdr.size = sizeof(msg.payload.u64),
1210 };
1211
1212 if (ioeventfd_enabled() && file->fd > 0) {
1213 fds[fd_num++] = file->fd;
1214 } else {
1215 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1216 }
1217
1218 return vhost_user_write(dev, &msg, fds, fd_num);
1219 }
1220
1221 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1222 struct vhost_vring_file *file)
1223 {
1224 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1225 }
1226
1227 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1228 struct vhost_vring_file *file)
1229 {
1230 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1231 }
1232
1233 static int vhost_user_set_vring_err(struct vhost_dev *dev,
1234 struct vhost_vring_file *file)
1235 {
1236 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
1237 }
1238
1239 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1240 {
1241 int ret;
1242 VhostUserMsg msg = {
1243 .hdr.request = request,
1244 .hdr.flags = VHOST_USER_VERSION,
1245 };
1246
1247 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1248 return 0;
1249 }
1250
1251 ret = vhost_user_write(dev, &msg, NULL, 0);
1252 if (ret < 0) {
1253 return ret;
1254 }
1255
1256 ret = vhost_user_read(dev, &msg);
1257 if (ret < 0) {
1258 return ret;
1259 }
1260
1261 if (msg.hdr.request != request) {
1262 error_report("Received unexpected msg type. Expected %d received %d",
1263 request, msg.hdr.request);
1264 return -EPROTO;
1265 }
1266
1267 if (msg.hdr.size != sizeof(msg.payload.u64)) {
1268 error_report("Received bad msg size.");
1269 return -EPROTO;
1270 }
1271
1272 *u64 = msg.payload.u64;
1273
1274 return 0;
1275 }
1276
1277 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1278 {
1279 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1280 return -EPROTO;
1281 }
1282
1283 return 0;
1284 }
1285
1286 static int enforce_reply(struct vhost_dev *dev,
1287 const VhostUserMsg *msg)
1288 {
1289 uint64_t dummy;
1290
1291 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1292 return process_message_reply(dev, msg);
1293 }
1294
1295 /*
1296 * We need to wait for a reply but the backend does not
1297 * support replies for the command we just sent.
1298 * Send VHOST_USER_GET_FEATURES which makes all backends
1299 * send a reply.
1300 */
1301 return vhost_user_get_features(dev, &dummy);
1302 }
1303
1304 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1305 struct vhost_vring_addr *addr)
1306 {
1307 int ret;
1308 VhostUserMsg msg = {
1309 .hdr.request = VHOST_USER_SET_VRING_ADDR,
1310 .hdr.flags = VHOST_USER_VERSION,
1311 .payload.addr = *addr,
1312 .hdr.size = sizeof(msg.payload.addr),
1313 };
1314
1315 bool reply_supported = virtio_has_feature(dev->protocol_features,
1316 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1317
1318 /*
1319 * wait for a reply if logging is enabled to make sure
1320 * backend is actually logging changes
1321 */
1322 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1323
1324 if (reply_supported && wait_for_reply) {
1325 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1326 }
1327
1328 ret = vhost_user_write(dev, &msg, NULL, 0);
1329 if (ret < 0) {
1330 return ret;
1331 }
1332
1333 if (wait_for_reply) {
1334 return enforce_reply(dev, &msg);
1335 }
1336
1337 return 0;
1338 }
1339
1340 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1341 bool wait_for_reply)
1342 {
1343 VhostUserMsg msg = {
1344 .hdr.request = request,
1345 .hdr.flags = VHOST_USER_VERSION,
1346 .payload.u64 = u64,
1347 .hdr.size = sizeof(msg.payload.u64),
1348 };
1349 int ret;
1350
1351 if (wait_for_reply) {
1352 bool reply_supported = virtio_has_feature(dev->protocol_features,
1353 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1354 if (reply_supported) {
1355 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1356 }
1357 }
1358
1359 ret = vhost_user_write(dev, &msg, NULL, 0);
1360 if (ret < 0) {
1361 return ret;
1362 }
1363
1364 if (wait_for_reply) {
1365 return enforce_reply(dev, &msg);
1366 }
1367
1368 return 0;
1369 }
1370
1371 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
1372 {
1373 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
1374 }
1375
1376 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
1377 {
1378 uint64_t value;
1379 int ret;
1380
1381 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
1382 if (ret < 0) {
1383 return ret;
1384 }
1385 *status = value;
1386
1387 return 0;
1388 }
1389
1390 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
1391 {
1392 uint8_t s;
1393 int ret;
1394
1395 ret = vhost_user_get_status(dev, &s);
1396 if (ret < 0) {
1397 return ret;
1398 }
1399
1400 if ((s & status) == status) {
1401 return 0;
1402 }
1403 s |= status;
1404
1405 return vhost_user_set_status(dev, s);
1406 }
1407
1408 static int vhost_user_set_features(struct vhost_dev *dev,
1409 uint64_t features)
1410 {
1411 /*
1412 * wait for a reply if logging is enabled to make sure
1413 * backend is actually logging changes
1414 */
1415 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
1416 int ret;
1417
1418 /*
1419 * We need to include any extra backend only feature bits that
1420 * might be needed by our device. Currently this includes the
1421 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
1422 * features.
1423 */
1424 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
1425 features | dev->backend_features,
1426 log_enabled);
1427
1428 if (virtio_has_feature(dev->protocol_features,
1429 VHOST_USER_PROTOCOL_F_STATUS)) {
1430 if (!ret) {
1431 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
1432 }
1433 }
1434
1435 return ret;
1436 }
1437
1438 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1439 uint64_t features)
1440 {
1441 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1442 false);
1443 }
1444
1445 static int vhost_user_set_owner(struct vhost_dev *dev)
1446 {
1447 VhostUserMsg msg = {
1448 .hdr.request = VHOST_USER_SET_OWNER,
1449 .hdr.flags = VHOST_USER_VERSION,
1450 };
1451
1452 return vhost_user_write(dev, &msg, NULL, 0);
1453 }
1454
1455 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1456 uint64_t *max_memslots)
1457 {
1458 uint64_t backend_max_memslots;
1459 int err;
1460
1461 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1462 &backend_max_memslots);
1463 if (err < 0) {
1464 return err;
1465 }
1466
1467 *max_memslots = backend_max_memslots;
1468
1469 return 0;
1470 }
1471
1472 static int vhost_user_reset_device(struct vhost_dev *dev)
1473 {
1474 VhostUserMsg msg = {
1475 .hdr.flags = VHOST_USER_VERSION,
1476 };
1477
1478 msg.hdr.request = virtio_has_feature(dev->protocol_features,
1479 VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1480 ? VHOST_USER_RESET_DEVICE
1481 : VHOST_USER_RESET_OWNER;
1482
1483 return vhost_user_write(dev, &msg, NULL, 0);
1484 }
1485
1486 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1487 {
1488 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1489 return -ENOSYS;
1490 }
1491
1492 return dev->config_ops->vhost_dev_config_notifier(dev);
1493 }
1494
1495 /*
1496 * Fetch or create the notifier for a given idx. Newly created
1497 * notifiers are added to the pointer array that tracks them.
1498 */
1499 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
1500 int idx)
1501 {
1502 VhostUserHostNotifier *n = NULL;
1503 if (idx >= u->notifiers->len) {
1504 g_ptr_array_set_size(u->notifiers, idx + 1);
1505 }
1506
1507 n = g_ptr_array_index(u->notifiers, idx);
1508 if (!n) {
1509 /*
1510 * In case notification arrive out-of-order,
1511 * make room for current index.
1512 */
1513 g_ptr_array_remove_index(u->notifiers, idx);
1514 n = g_new0(VhostUserHostNotifier, 1);
1515 n->idx = idx;
1516 g_ptr_array_insert(u->notifiers, idx, n);
1517 trace_vhost_user_create_notifier(idx, n);
1518 }
1519
1520 return n;
1521 }
1522
1523 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1524 VhostUserVringArea *area,
1525 int fd)
1526 {
1527 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1528 size_t page_size = qemu_real_host_page_size();
1529 struct vhost_user *u = dev->opaque;
1530 VhostUserState *user = u->user;
1531 VirtIODevice *vdev = dev->vdev;
1532 VhostUserHostNotifier *n;
1533 void *addr;
1534 char *name;
1535
1536 if (!virtio_has_feature(dev->protocol_features,
1537 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1538 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1539 return -EINVAL;
1540 }
1541
1542 /*
1543 * Fetch notifier and invalidate any old data before setting up
1544 * new mapped address.
1545 */
1546 n = fetch_or_create_notifier(user, queue_idx);
1547 vhost_user_host_notifier_remove(n, vdev);
1548
1549 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1550 return 0;
1551 }
1552
1553 /* Sanity check. */
1554 if (area->size != page_size) {
1555 return -EINVAL;
1556 }
1557
1558 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1559 fd, area->offset);
1560 if (addr == MAP_FAILED) {
1561 return -EFAULT;
1562 }
1563
1564 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1565 user, queue_idx);
1566 if (!n->mr.ram) { /* Don't init again after suspend. */
1567 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1568 page_size, addr);
1569 } else {
1570 n->mr.ram_block->host = addr;
1571 }
1572 g_free(name);
1573
1574 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1575 object_unparent(OBJECT(&n->mr));
1576 munmap(addr, page_size);
1577 return -ENXIO;
1578 }
1579
1580 n->addr = addr;
1581
1582 return 0;
1583 }
1584
1585 static void close_slave_channel(struct vhost_user *u)
1586 {
1587 g_source_destroy(u->slave_src);
1588 g_source_unref(u->slave_src);
1589 u->slave_src = NULL;
1590 object_unref(OBJECT(u->slave_ioc));
1591 u->slave_ioc = NULL;
1592 }
1593
1594 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
1595 gpointer opaque)
1596 {
1597 struct vhost_dev *dev = opaque;
1598 struct vhost_user *u = dev->opaque;
1599 VhostUserHeader hdr = { 0, };
1600 VhostUserPayload payload = { 0, };
1601 Error *local_err = NULL;
1602 gboolean rc = G_SOURCE_CONTINUE;
1603 int ret = 0;
1604 struct iovec iov;
1605 g_autofree int *fd = NULL;
1606 size_t fdsize = 0;
1607 int i;
1608
1609 /* Read header */
1610 iov.iov_base = &hdr;
1611 iov.iov_len = VHOST_USER_HDR_SIZE;
1612
1613 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1614 error_report_err(local_err);
1615 goto err;
1616 }
1617
1618 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1619 error_report("Failed to read msg header."
1620 " Size %d exceeds the maximum %zu.", hdr.size,
1621 VHOST_USER_PAYLOAD_SIZE);
1622 goto err;
1623 }
1624
1625 /* Read payload */
1626 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1627 error_report_err(local_err);
1628 goto err;
1629 }
1630
1631 switch (hdr.request) {
1632 case VHOST_USER_BACKEND_IOTLB_MSG:
1633 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1634 break;
1635 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
1636 ret = vhost_user_slave_handle_config_change(dev);
1637 break;
1638 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
1639 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1640 fd ? fd[0] : -1);
1641 break;
1642 default:
1643 error_report("Received unexpected msg type: %d.", hdr.request);
1644 ret = -EINVAL;
1645 }
1646
1647 /*
1648 * REPLY_ACK feature handling. Other reply types has to be managed
1649 * directly in their request handlers.
1650 */
1651 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1652 struct iovec iovec[2];
1653
1654
1655 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1656 hdr.flags |= VHOST_USER_REPLY_MASK;
1657
1658 payload.u64 = !!ret;
1659 hdr.size = sizeof(payload.u64);
1660
1661 iovec[0].iov_base = &hdr;
1662 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1663 iovec[1].iov_base = &payload;
1664 iovec[1].iov_len = hdr.size;
1665
1666 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1667 error_report_err(local_err);
1668 goto err;
1669 }
1670 }
1671
1672 goto fdcleanup;
1673
1674 err:
1675 close_slave_channel(u);
1676 rc = G_SOURCE_REMOVE;
1677
1678 fdcleanup:
1679 if (fd) {
1680 for (i = 0; i < fdsize; i++) {
1681 close(fd[i]);
1682 }
1683 }
1684 return rc;
1685 }
1686
1687 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1688 {
1689 VhostUserMsg msg = {
1690 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
1691 .hdr.flags = VHOST_USER_VERSION,
1692 };
1693 struct vhost_user *u = dev->opaque;
1694 int sv[2], ret = 0;
1695 bool reply_supported = virtio_has_feature(dev->protocol_features,
1696 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1697 Error *local_err = NULL;
1698 QIOChannel *ioc;
1699
1700 if (!virtio_has_feature(dev->protocol_features,
1701 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
1702 return 0;
1703 }
1704
1705 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1706 int saved_errno = errno;
1707 error_report("socketpair() failed");
1708 return -saved_errno;
1709 }
1710
1711 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1712 if (!ioc) {
1713 error_report_err(local_err);
1714 return -ECONNREFUSED;
1715 }
1716 u->slave_ioc = ioc;
1717 u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
1718 G_IO_IN | G_IO_HUP,
1719 slave_read, dev, NULL, NULL);
1720
1721 if (reply_supported) {
1722 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1723 }
1724
1725 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1726 if (ret) {
1727 goto out;
1728 }
1729
1730 if (reply_supported) {
1731 ret = process_message_reply(dev, &msg);
1732 }
1733
1734 out:
1735 close(sv[1]);
1736 if (ret) {
1737 close_slave_channel(u);
1738 }
1739
1740 return ret;
1741 }
1742
1743 #ifdef CONFIG_LINUX
1744 /*
1745 * Called back from the postcopy fault thread when a fault is received on our
1746 * ufd.
1747 * TODO: This is Linux specific
1748 */
1749 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1750 void *ufd)
1751 {
1752 struct vhost_dev *dev = pcfd->data;
1753 struct vhost_user *u = dev->opaque;
1754 struct uffd_msg *msg = ufd;
1755 uint64_t faultaddr = msg->arg.pagefault.address;
1756 RAMBlock *rb = NULL;
1757 uint64_t rb_offset;
1758 int i;
1759
1760 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1761 dev->mem->nregions);
1762 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1763 trace_vhost_user_postcopy_fault_handler_loop(i,
1764 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1765 if (faultaddr >= u->postcopy_client_bases[i]) {
1766 /* Ofset of the fault address in the vhost region */
1767 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1768 if (region_offset < dev->mem->regions[i].memory_size) {
1769 rb_offset = region_offset + u->region_rb_offset[i];
1770 trace_vhost_user_postcopy_fault_handler_found(i,
1771 region_offset, rb_offset);
1772 rb = u->region_rb[i];
1773 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1774 rb_offset);
1775 }
1776 }
1777 }
1778 error_report("%s: Failed to find region for fault %" PRIx64,
1779 __func__, faultaddr);
1780 return -1;
1781 }
1782
1783 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1784 uint64_t offset)
1785 {
1786 struct vhost_dev *dev = pcfd->data;
1787 struct vhost_user *u = dev->opaque;
1788 int i;
1789
1790 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1791
1792 if (!u) {
1793 return 0;
1794 }
1795 /* Translate the offset into an address in the clients address space */
1796 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1797 if (u->region_rb[i] == rb &&
1798 offset >= u->region_rb_offset[i] &&
1799 offset < (u->region_rb_offset[i] +
1800 dev->mem->regions[i].memory_size)) {
1801 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1802 u->postcopy_client_bases[i];
1803 trace_vhost_user_postcopy_waker_found(client_addr);
1804 return postcopy_wake_shared(pcfd, client_addr, rb);
1805 }
1806 }
1807
1808 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1809 return 0;
1810 }
1811 #endif
1812
1813 /*
1814 * Called at the start of an inbound postcopy on reception of the
1815 * 'advise' command.
1816 */
1817 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1818 {
1819 #ifdef CONFIG_LINUX
1820 struct vhost_user *u = dev->opaque;
1821 CharBackend *chr = u->user->chr;
1822 int ufd;
1823 int ret;
1824 VhostUserMsg msg = {
1825 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1826 .hdr.flags = VHOST_USER_VERSION,
1827 };
1828
1829 ret = vhost_user_write(dev, &msg, NULL, 0);
1830 if (ret < 0) {
1831 error_setg(errp, "Failed to send postcopy_advise to vhost");
1832 return ret;
1833 }
1834
1835 ret = vhost_user_read(dev, &msg);
1836 if (ret < 0) {
1837 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1838 return ret;
1839 }
1840
1841 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1842 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1843 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1844 return -EPROTO;
1845 }
1846
1847 if (msg.hdr.size) {
1848 error_setg(errp, "Received bad msg size.");
1849 return -EPROTO;
1850 }
1851 ufd = qemu_chr_fe_get_msgfd(chr);
1852 if (ufd < 0) {
1853 error_setg(errp, "%s: Failed to get ufd", __func__);
1854 return -EIO;
1855 }
1856 qemu_socket_set_nonblock(ufd);
1857
1858 /* register ufd with userfault thread */
1859 u->postcopy_fd.fd = ufd;
1860 u->postcopy_fd.data = dev;
1861 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1862 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1863 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1864 postcopy_register_shared_ufd(&u->postcopy_fd);
1865 return 0;
1866 #else
1867 error_setg(errp, "Postcopy not supported on non-Linux systems");
1868 return -ENOSYS;
1869 #endif
1870 }
1871
1872 /*
1873 * Called at the switch to postcopy on reception of the 'listen' command.
1874 */
1875 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1876 {
1877 struct vhost_user *u = dev->opaque;
1878 int ret;
1879 VhostUserMsg msg = {
1880 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1881 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1882 };
1883 u->postcopy_listen = true;
1884
1885 trace_vhost_user_postcopy_listen();
1886
1887 ret = vhost_user_write(dev, &msg, NULL, 0);
1888 if (ret < 0) {
1889 error_setg(errp, "Failed to send postcopy_listen to vhost");
1890 return ret;
1891 }
1892
1893 ret = process_message_reply(dev, &msg);
1894 if (ret) {
1895 error_setg(errp, "Failed to receive reply to postcopy_listen");
1896 return ret;
1897 }
1898
1899 return 0;
1900 }
1901
1902 /*
1903 * Called at the end of postcopy
1904 */
1905 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1906 {
1907 VhostUserMsg msg = {
1908 .hdr.request = VHOST_USER_POSTCOPY_END,
1909 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1910 };
1911 int ret;
1912 struct vhost_user *u = dev->opaque;
1913
1914 trace_vhost_user_postcopy_end_entry();
1915
1916 ret = vhost_user_write(dev, &msg, NULL, 0);
1917 if (ret < 0) {
1918 error_setg(errp, "Failed to send postcopy_end to vhost");
1919 return ret;
1920 }
1921
1922 ret = process_message_reply(dev, &msg);
1923 if (ret) {
1924 error_setg(errp, "Failed to receive reply to postcopy_end");
1925 return ret;
1926 }
1927 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1928 close(u->postcopy_fd.fd);
1929 u->postcopy_fd.handler = NULL;
1930
1931 trace_vhost_user_postcopy_end_exit();
1932
1933 return 0;
1934 }
1935
1936 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1937 void *opaque)
1938 {
1939 struct PostcopyNotifyData *pnd = opaque;
1940 struct vhost_user *u = container_of(notifier, struct vhost_user,
1941 postcopy_notifier);
1942 struct vhost_dev *dev = u->dev;
1943
1944 switch (pnd->reason) {
1945 case POSTCOPY_NOTIFY_PROBE:
1946 if (!virtio_has_feature(dev->protocol_features,
1947 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1948 /* TODO: Get the device name into this error somehow */
1949 error_setg(pnd->errp,
1950 "vhost-user backend not capable of postcopy");
1951 return -ENOENT;
1952 }
1953 break;
1954
1955 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1956 return vhost_user_postcopy_advise(dev, pnd->errp);
1957
1958 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1959 return vhost_user_postcopy_listen(dev, pnd->errp);
1960
1961 case POSTCOPY_NOTIFY_INBOUND_END:
1962 return vhost_user_postcopy_end(dev, pnd->errp);
1963
1964 default:
1965 /* We ignore notifications we don't know */
1966 break;
1967 }
1968
1969 return 0;
1970 }
1971
1972 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1973 Error **errp)
1974 {
1975 uint64_t features, ram_slots;
1976 struct vhost_user *u;
1977 VhostUserState *vus = (VhostUserState *) opaque;
1978 int err;
1979
1980 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1981
1982 u = g_new0(struct vhost_user, 1);
1983 u->user = vus;
1984 u->dev = dev;
1985 dev->opaque = u;
1986
1987 err = vhost_user_get_features(dev, &features);
1988 if (err < 0) {
1989 error_setg_errno(errp, -err, "vhost_backend_init failed");
1990 return err;
1991 }
1992
1993 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1994 bool supports_f_config = vus->supports_config ||
1995 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
1996 uint64_t protocol_features;
1997
1998 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1999
2000 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
2001 &protocol_features);
2002 if (err < 0) {
2003 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2004 return -EPROTO;
2005 }
2006
2007 /*
2008 * We will use all the protocol features we support - although
2009 * we suppress F_CONFIG if we know QEMUs internal code can not support
2010 * it.
2011 */
2012 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
2013
2014 if (supports_f_config) {
2015 if (!virtio_has_feature(protocol_features,
2016 VHOST_USER_PROTOCOL_F_CONFIG)) {
2017 error_setg(errp, "vhost-user device expecting "
2018 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
2019 "not support it.");
2020 return -EPROTO;
2021 }
2022 } else {
2023 if (virtio_has_feature(protocol_features,
2024 VHOST_USER_PROTOCOL_F_CONFIG)) {
2025 warn_report("vhost-user backend supports "
2026 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
2027 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
2028 }
2029 }
2030
2031 /* final set of protocol features */
2032 dev->protocol_features = protocol_features;
2033 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
2034 if (err < 0) {
2035 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2036 return -EPROTO;
2037 }
2038
2039 /* query the max queues we support if backend supports Multiple Queue */
2040 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
2041 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2042 &dev->max_queues);
2043 if (err < 0) {
2044 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2045 return -EPROTO;
2046 }
2047 } else {
2048 dev->max_queues = 1;
2049 }
2050
2051 if (dev->num_queues && dev->max_queues < dev->num_queues) {
2052 error_setg(errp, "The maximum number of queues supported by the "
2053 "backend is %" PRIu64, dev->max_queues);
2054 return -EINVAL;
2055 }
2056
2057 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2058 !(virtio_has_feature(dev->protocol_features,
2059 VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
2060 virtio_has_feature(dev->protocol_features,
2061 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
2062 error_setg(errp, "IOMMU support requires reply-ack and "
2063 "slave-req protocol features.");
2064 return -EINVAL;
2065 }
2066
2067 /* get max memory regions if backend supports configurable RAM slots */
2068 if (!virtio_has_feature(dev->protocol_features,
2069 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
2070 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
2071 } else {
2072 err = vhost_user_get_max_memslots(dev, &ram_slots);
2073 if (err < 0) {
2074 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2075 return -EPROTO;
2076 }
2077
2078 if (ram_slots < u->user->memory_slots) {
2079 error_setg(errp, "The backend specified a max ram slots limit "
2080 "of %" PRIu64", when the prior validated limit was "
2081 "%d. This limit should never decrease.", ram_slots,
2082 u->user->memory_slots);
2083 return -EINVAL;
2084 }
2085
2086 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
2087 }
2088 }
2089
2090 if (dev->migration_blocker == NULL &&
2091 !virtio_has_feature(dev->protocol_features,
2092 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2093 error_setg(&dev->migration_blocker,
2094 "Migration disabled: vhost-user backend lacks "
2095 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2096 }
2097
2098 if (dev->vq_index == 0) {
2099 err = vhost_setup_slave_channel(dev);
2100 if (err < 0) {
2101 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2102 return -EPROTO;
2103 }
2104 }
2105
2106 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2107 postcopy_add_notifier(&u->postcopy_notifier);
2108
2109 return 0;
2110 }
2111
2112 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
2113 {
2114 struct vhost_user *u;
2115
2116 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2117
2118 u = dev->opaque;
2119 if (u->postcopy_notifier.notify) {
2120 postcopy_remove_notifier(&u->postcopy_notifier);
2121 u->postcopy_notifier.notify = NULL;
2122 }
2123 u->postcopy_listen = false;
2124 if (u->postcopy_fd.handler) {
2125 postcopy_unregister_shared_ufd(&u->postcopy_fd);
2126 close(u->postcopy_fd.fd);
2127 u->postcopy_fd.handler = NULL;
2128 }
2129 if (u->slave_ioc) {
2130 close_slave_channel(u);
2131 }
2132 g_free(u->region_rb);
2133 u->region_rb = NULL;
2134 g_free(u->region_rb_offset);
2135 u->region_rb_offset = NULL;
2136 u->region_rb_len = 0;
2137 g_free(u);
2138 dev->opaque = 0;
2139
2140 return 0;
2141 }
2142
2143 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2144 {
2145 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2146
2147 return idx;
2148 }
2149
2150 static int vhost_user_memslots_limit(struct vhost_dev *dev)
2151 {
2152 struct vhost_user *u = dev->opaque;
2153
2154 return u->user->memory_slots;
2155 }
2156
2157 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2158 {
2159 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2160
2161 return virtio_has_feature(dev->protocol_features,
2162 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2163 }
2164
2165 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2166 {
2167 VhostUserMsg msg = { };
2168
2169 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2170
2171 /* If guest supports GUEST_ANNOUNCE do nothing */
2172 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2173 return 0;
2174 }
2175
2176 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2177 if (virtio_has_feature(dev->protocol_features,
2178 VHOST_USER_PROTOCOL_F_RARP)) {
2179 msg.hdr.request = VHOST_USER_SEND_RARP;
2180 msg.hdr.flags = VHOST_USER_VERSION;
2181 memcpy((char *)&msg.payload.u64, mac_addr, 6);
2182 msg.hdr.size = sizeof(msg.payload.u64);
2183
2184 return vhost_user_write(dev, &msg, NULL, 0);
2185 }
2186 return -ENOTSUP;
2187 }
2188
2189 static bool vhost_user_can_merge(struct vhost_dev *dev,
2190 uint64_t start1, uint64_t size1,
2191 uint64_t start2, uint64_t size2)
2192 {
2193 ram_addr_t offset;
2194 int mfd, rfd;
2195
2196 (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2197 (void)vhost_user_get_mr_data(start2, &offset, &rfd);
2198
2199 return mfd == rfd;
2200 }
2201
2202 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2203 {
2204 VhostUserMsg msg;
2205 bool reply_supported = virtio_has_feature(dev->protocol_features,
2206 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2207 int ret;
2208
2209 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2210 return 0;
2211 }
2212
2213 msg.hdr.request = VHOST_USER_NET_SET_MTU;
2214 msg.payload.u64 = mtu;
2215 msg.hdr.size = sizeof(msg.payload.u64);
2216 msg.hdr.flags = VHOST_USER_VERSION;
2217 if (reply_supported) {
2218 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2219 }
2220
2221 ret = vhost_user_write(dev, &msg, NULL, 0);
2222 if (ret < 0) {
2223 return ret;
2224 }
2225
2226 /* If reply_ack supported, slave has to ack specified MTU is valid */
2227 if (reply_supported) {
2228 return process_message_reply(dev, &msg);
2229 }
2230
2231 return 0;
2232 }
2233
2234 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2235 struct vhost_iotlb_msg *imsg)
2236 {
2237 int ret;
2238 VhostUserMsg msg = {
2239 .hdr.request = VHOST_USER_IOTLB_MSG,
2240 .hdr.size = sizeof(msg.payload.iotlb),
2241 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2242 .payload.iotlb = *imsg,
2243 };
2244
2245 ret = vhost_user_write(dev, &msg, NULL, 0);
2246 if (ret < 0) {
2247 return ret;
2248 }
2249
2250 return process_message_reply(dev, &msg);
2251 }
2252
2253
2254 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2255 {
2256 /* No-op as the receive channel is not dedicated to IOTLB messages. */
2257 }
2258
2259 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2260 uint32_t config_len, Error **errp)
2261 {
2262 int ret;
2263 VhostUserMsg msg = {
2264 .hdr.request = VHOST_USER_GET_CONFIG,
2265 .hdr.flags = VHOST_USER_VERSION,
2266 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2267 };
2268
2269 if (!virtio_has_feature(dev->protocol_features,
2270 VHOST_USER_PROTOCOL_F_CONFIG)) {
2271 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2272 return -EINVAL;
2273 }
2274
2275 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
2276
2277 msg.payload.config.offset = 0;
2278 msg.payload.config.size = config_len;
2279 ret = vhost_user_write(dev, &msg, NULL, 0);
2280 if (ret < 0) {
2281 error_setg_errno(errp, -ret, "vhost_get_config failed");
2282 return ret;
2283 }
2284
2285 ret = vhost_user_read(dev, &msg);
2286 if (ret < 0) {
2287 error_setg_errno(errp, -ret, "vhost_get_config failed");
2288 return ret;
2289 }
2290
2291 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2292 error_setg(errp,
2293 "Received unexpected msg type. Expected %d received %d",
2294 VHOST_USER_GET_CONFIG, msg.hdr.request);
2295 return -EPROTO;
2296 }
2297
2298 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2299 error_setg(errp, "Received bad msg size.");
2300 return -EPROTO;
2301 }
2302
2303 memcpy(config, msg.payload.config.region, config_len);
2304
2305 return 0;
2306 }
2307
2308 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2309 uint32_t offset, uint32_t size, uint32_t flags)
2310 {
2311 int ret;
2312 uint8_t *p;
2313 bool reply_supported = virtio_has_feature(dev->protocol_features,
2314 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2315
2316 VhostUserMsg msg = {
2317 .hdr.request = VHOST_USER_SET_CONFIG,
2318 .hdr.flags = VHOST_USER_VERSION,
2319 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2320 };
2321
2322 if (!virtio_has_feature(dev->protocol_features,
2323 VHOST_USER_PROTOCOL_F_CONFIG)) {
2324 return -ENOTSUP;
2325 }
2326
2327 if (reply_supported) {
2328 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2329 }
2330
2331 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2332 return -EINVAL;
2333 }
2334
2335 msg.payload.config.offset = offset,
2336 msg.payload.config.size = size,
2337 msg.payload.config.flags = flags,
2338 p = msg.payload.config.region;
2339 memcpy(p, data, size);
2340
2341 ret = vhost_user_write(dev, &msg, NULL, 0);
2342 if (ret < 0) {
2343 return ret;
2344 }
2345
2346 if (reply_supported) {
2347 return process_message_reply(dev, &msg);
2348 }
2349
2350 return 0;
2351 }
2352
2353 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2354 void *session_info,
2355 uint64_t *session_id)
2356 {
2357 int ret;
2358 bool crypto_session = virtio_has_feature(dev->protocol_features,
2359 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2360 CryptoDevBackendSymSessionInfo *sess_info = session_info;
2361 VhostUserMsg msg = {
2362 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2363 .hdr.flags = VHOST_USER_VERSION,
2364 .hdr.size = sizeof(msg.payload.session),
2365 };
2366
2367 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2368
2369 if (!crypto_session) {
2370 error_report("vhost-user trying to send unhandled ioctl");
2371 return -ENOTSUP;
2372 }
2373
2374 memcpy(&msg.payload.session.session_setup_data, sess_info,
2375 sizeof(CryptoDevBackendSymSessionInfo));
2376 if (sess_info->key_len) {
2377 memcpy(&msg.payload.session.key, sess_info->cipher_key,
2378 sess_info->key_len);
2379 }
2380 if (sess_info->auth_key_len > 0) {
2381 memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2382 sess_info->auth_key_len);
2383 }
2384 ret = vhost_user_write(dev, &msg, NULL, 0);
2385 if (ret < 0) {
2386 error_report("vhost_user_write() return %d, create session failed",
2387 ret);
2388 return ret;
2389 }
2390
2391 ret = vhost_user_read(dev, &msg);
2392 if (ret < 0) {
2393 error_report("vhost_user_read() return %d, create session failed",
2394 ret);
2395 return ret;
2396 }
2397
2398 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2399 error_report("Received unexpected msg type. Expected %d received %d",
2400 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2401 return -EPROTO;
2402 }
2403
2404 if (msg.hdr.size != sizeof(msg.payload.session)) {
2405 error_report("Received bad msg size.");
2406 return -EPROTO;
2407 }
2408
2409 if (msg.payload.session.session_id < 0) {
2410 error_report("Bad session id: %" PRId64 "",
2411 msg.payload.session.session_id);
2412 return -EINVAL;
2413 }
2414 *session_id = msg.payload.session.session_id;
2415
2416 return 0;
2417 }
2418
2419 static int
2420 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2421 {
2422 int ret;
2423 bool crypto_session = virtio_has_feature(dev->protocol_features,
2424 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2425 VhostUserMsg msg = {
2426 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2427 .hdr.flags = VHOST_USER_VERSION,
2428 .hdr.size = sizeof(msg.payload.u64),
2429 };
2430 msg.payload.u64 = session_id;
2431
2432 if (!crypto_session) {
2433 error_report("vhost-user trying to send unhandled ioctl");
2434 return -ENOTSUP;
2435 }
2436
2437 ret = vhost_user_write(dev, &msg, NULL, 0);
2438 if (ret < 0) {
2439 error_report("vhost_user_write() return %d, close session failed",
2440 ret);
2441 return ret;
2442 }
2443
2444 return 0;
2445 }
2446
2447 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2448 MemoryRegionSection *section)
2449 {
2450 return memory_region_get_fd(section->mr) >= 0;
2451 }
2452
2453 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2454 uint16_t queue_size,
2455 struct vhost_inflight *inflight)
2456 {
2457 void *addr;
2458 int fd;
2459 int ret;
2460 struct vhost_user *u = dev->opaque;
2461 CharBackend *chr = u->user->chr;
2462 VhostUserMsg msg = {
2463 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2464 .hdr.flags = VHOST_USER_VERSION,
2465 .payload.inflight.num_queues = dev->nvqs,
2466 .payload.inflight.queue_size = queue_size,
2467 .hdr.size = sizeof(msg.payload.inflight),
2468 };
2469
2470 if (!virtio_has_feature(dev->protocol_features,
2471 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2472 return 0;
2473 }
2474
2475 ret = vhost_user_write(dev, &msg, NULL, 0);
2476 if (ret < 0) {
2477 return ret;
2478 }
2479
2480 ret = vhost_user_read(dev, &msg);
2481 if (ret < 0) {
2482 return ret;
2483 }
2484
2485 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2486 error_report("Received unexpected msg type. "
2487 "Expected %d received %d",
2488 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2489 return -EPROTO;
2490 }
2491
2492 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2493 error_report("Received bad msg size.");
2494 return -EPROTO;
2495 }
2496
2497 if (!msg.payload.inflight.mmap_size) {
2498 return 0;
2499 }
2500
2501 fd = qemu_chr_fe_get_msgfd(chr);
2502 if (fd < 0) {
2503 error_report("Failed to get mem fd");
2504 return -EIO;
2505 }
2506
2507 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2508 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2509
2510 if (addr == MAP_FAILED) {
2511 error_report("Failed to mmap mem fd");
2512 close(fd);
2513 return -EFAULT;
2514 }
2515
2516 inflight->addr = addr;
2517 inflight->fd = fd;
2518 inflight->size = msg.payload.inflight.mmap_size;
2519 inflight->offset = msg.payload.inflight.mmap_offset;
2520 inflight->queue_size = queue_size;
2521
2522 return 0;
2523 }
2524
2525 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2526 struct vhost_inflight *inflight)
2527 {
2528 VhostUserMsg msg = {
2529 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2530 .hdr.flags = VHOST_USER_VERSION,
2531 .payload.inflight.mmap_size = inflight->size,
2532 .payload.inflight.mmap_offset = inflight->offset,
2533 .payload.inflight.num_queues = dev->nvqs,
2534 .payload.inflight.queue_size = inflight->queue_size,
2535 .hdr.size = sizeof(msg.payload.inflight),
2536 };
2537
2538 if (!virtio_has_feature(dev->protocol_features,
2539 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2540 return 0;
2541 }
2542
2543 return vhost_user_write(dev, &msg, &inflight->fd, 1);
2544 }
2545
2546 static void vhost_user_state_destroy(gpointer data)
2547 {
2548 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
2549 if (n) {
2550 vhost_user_host_notifier_remove(n, NULL);
2551 object_unparent(OBJECT(&n->mr));
2552 /*
2553 * We can't free until vhost_user_host_notifier_remove has
2554 * done it's thing so schedule the free with RCU.
2555 */
2556 g_free_rcu(n, rcu);
2557 }
2558 }
2559
2560 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2561 {
2562 if (user->chr) {
2563 error_setg(errp, "Cannot initialize vhost-user state");
2564 return false;
2565 }
2566 user->chr = chr;
2567 user->memory_slots = 0;
2568 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
2569 &vhost_user_state_destroy);
2570 return true;
2571 }
2572
2573 void vhost_user_cleanup(VhostUserState *user)
2574 {
2575 if (!user->chr) {
2576 return;
2577 }
2578 memory_region_transaction_begin();
2579 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
2580 memory_region_transaction_commit();
2581 user->chr = NULL;
2582 }
2583
2584
2585 typedef struct {
2586 vu_async_close_fn cb;
2587 DeviceState *dev;
2588 CharBackend *cd;
2589 struct vhost_dev *vhost;
2590 } VhostAsyncCallback;
2591
2592 static void vhost_user_async_close_bh(void *opaque)
2593 {
2594 VhostAsyncCallback *data = opaque;
2595 struct vhost_dev *vhost = data->vhost;
2596
2597 /*
2598 * If the vhost_dev has been cleared in the meantime there is
2599 * nothing left to do as some other path has completed the
2600 * cleanup.
2601 */
2602 if (vhost->vdev) {
2603 data->cb(data->dev);
2604 }
2605
2606 g_free(data);
2607 }
2608
2609 /*
2610 * We only schedule the work if the machine is running. If suspended
2611 * we want to keep all the in-flight data as is for migration
2612 * purposes.
2613 */
2614 void vhost_user_async_close(DeviceState *d,
2615 CharBackend *chardev, struct vhost_dev *vhost,
2616 vu_async_close_fn cb)
2617 {
2618 if (!runstate_check(RUN_STATE_SHUTDOWN)) {
2619 /*
2620 * A close event may happen during a read/write, but vhost
2621 * code assumes the vhost_dev remains setup, so delay the
2622 * stop & clear.
2623 */
2624 AioContext *ctx = qemu_get_current_aio_context();
2625 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
2626
2627 /* Save data for the callback */
2628 data->cb = cb;
2629 data->dev = d;
2630 data->cd = chardev;
2631 data->vhost = vhost;
2632
2633 /* Disable any further notifications on the chardev */
2634 qemu_chr_fe_set_handlers(chardev,
2635 NULL, NULL, NULL, NULL, NULL, NULL,
2636 false);
2637
2638 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
2639
2640 /*
2641 * Move vhost device to the stopped state. The vhost-user device
2642 * will be clean up and disconnected in BH. This can be useful in
2643 * the vhost migration code. If disconnect was caught there is an
2644 * option for the general vhost code to get the dev state without
2645 * knowing its type (in this case vhost-user).
2646 *
2647 * Note if the vhost device is fully cleared by the time we
2648 * execute the bottom half we won't continue with the cleanup.
2649 */
2650 vhost->started = false;
2651 }
2652 }
2653
2654 static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
2655 {
2656 if (!virtio_has_feature(dev->protocol_features,
2657 VHOST_USER_PROTOCOL_F_STATUS)) {
2658 return 0;
2659 }
2660
2661 /* Set device status only for last queue pair */
2662 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2663 return 0;
2664 }
2665
2666 if (started) {
2667 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
2668 VIRTIO_CONFIG_S_DRIVER |
2669 VIRTIO_CONFIG_S_DRIVER_OK);
2670 } else {
2671 return 0;
2672 }
2673 }
2674
2675 static void vhost_user_reset_status(struct vhost_dev *dev)
2676 {
2677 /* Set device status only for last queue pair */
2678 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2679 return;
2680 }
2681
2682 if (virtio_has_feature(dev->protocol_features,
2683 VHOST_USER_PROTOCOL_F_STATUS)) {
2684 vhost_user_set_status(dev, 0);
2685 }
2686 }
2687
2688 const VhostOps user_ops = {
2689 .backend_type = VHOST_BACKEND_TYPE_USER,
2690 .vhost_backend_init = vhost_user_backend_init,
2691 .vhost_backend_cleanup = vhost_user_backend_cleanup,
2692 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2693 .vhost_set_log_base = vhost_user_set_log_base,
2694 .vhost_set_mem_table = vhost_user_set_mem_table,
2695 .vhost_set_vring_addr = vhost_user_set_vring_addr,
2696 .vhost_set_vring_endian = vhost_user_set_vring_endian,
2697 .vhost_set_vring_num = vhost_user_set_vring_num,
2698 .vhost_set_vring_base = vhost_user_set_vring_base,
2699 .vhost_get_vring_base = vhost_user_get_vring_base,
2700 .vhost_set_vring_kick = vhost_user_set_vring_kick,
2701 .vhost_set_vring_call = vhost_user_set_vring_call,
2702 .vhost_set_vring_err = vhost_user_set_vring_err,
2703 .vhost_set_features = vhost_user_set_features,
2704 .vhost_get_features = vhost_user_get_features,
2705 .vhost_set_owner = vhost_user_set_owner,
2706 .vhost_reset_device = vhost_user_reset_device,
2707 .vhost_get_vq_index = vhost_user_get_vq_index,
2708 .vhost_set_vring_enable = vhost_user_set_vring_enable,
2709 .vhost_requires_shm_log = vhost_user_requires_shm_log,
2710 .vhost_migration_done = vhost_user_migration_done,
2711 .vhost_backend_can_merge = vhost_user_can_merge,
2712 .vhost_net_set_mtu = vhost_user_net_set_mtu,
2713 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2714 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2715 .vhost_get_config = vhost_user_get_config,
2716 .vhost_set_config = vhost_user_set_config,
2717 .vhost_crypto_create_session = vhost_user_crypto_create_session,
2718 .vhost_crypto_close_session = vhost_user_crypto_close_session,
2719 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2720 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2721 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2722 .vhost_dev_start = vhost_user_dev_start,
2723 .vhost_reset_status = vhost_user_reset_status,
2724 };