]> git.proxmox.com Git - mirror_qemu.git/blob - hw/virtio/vhost-user.c
vhost-user: Support transferring inflight buffer between qemu and backend
[mirror_qemu.git] / hw / virtio / vhost-user.c
1 /*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "sysemu/kvm.h"
20 #include "qemu/error-report.h"
21 #include "qemu/sockets.h"
22 #include "sysemu/cryptodev.h"
23 #include "migration/migration.h"
24 #include "migration/postcopy-ram.h"
25 #include "trace.h"
26
27 #include <sys/ioctl.h>
28 #include <sys/socket.h>
29 #include <sys/un.h>
30
31 #include "standard-headers/linux/vhost_types.h"
32
33 #ifdef CONFIG_LINUX
34 #include <linux/userfaultfd.h>
35 #endif
36
37 #define VHOST_MEMORY_MAX_NREGIONS 8
38 #define VHOST_USER_F_PROTOCOL_FEATURES 30
39 #define VHOST_USER_SLAVE_MAX_FDS 8
40
41 /*
42 * Maximum size of virtio device config space
43 */
44 #define VHOST_USER_MAX_CONFIG_SIZE 256
45
46 enum VhostUserProtocolFeature {
47 VHOST_USER_PROTOCOL_F_MQ = 0,
48 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
49 VHOST_USER_PROTOCOL_F_RARP = 2,
50 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
51 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
52 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
53 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
54 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
55 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
56 VHOST_USER_PROTOCOL_F_CONFIG = 9,
57 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
58 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
59 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
60 VHOST_USER_PROTOCOL_F_MAX
61 };
62
63 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
64
65 typedef enum VhostUserRequest {
66 VHOST_USER_NONE = 0,
67 VHOST_USER_GET_FEATURES = 1,
68 VHOST_USER_SET_FEATURES = 2,
69 VHOST_USER_SET_OWNER = 3,
70 VHOST_USER_RESET_OWNER = 4,
71 VHOST_USER_SET_MEM_TABLE = 5,
72 VHOST_USER_SET_LOG_BASE = 6,
73 VHOST_USER_SET_LOG_FD = 7,
74 VHOST_USER_SET_VRING_NUM = 8,
75 VHOST_USER_SET_VRING_ADDR = 9,
76 VHOST_USER_SET_VRING_BASE = 10,
77 VHOST_USER_GET_VRING_BASE = 11,
78 VHOST_USER_SET_VRING_KICK = 12,
79 VHOST_USER_SET_VRING_CALL = 13,
80 VHOST_USER_SET_VRING_ERR = 14,
81 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
82 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
83 VHOST_USER_GET_QUEUE_NUM = 17,
84 VHOST_USER_SET_VRING_ENABLE = 18,
85 VHOST_USER_SEND_RARP = 19,
86 VHOST_USER_NET_SET_MTU = 20,
87 VHOST_USER_SET_SLAVE_REQ_FD = 21,
88 VHOST_USER_IOTLB_MSG = 22,
89 VHOST_USER_SET_VRING_ENDIAN = 23,
90 VHOST_USER_GET_CONFIG = 24,
91 VHOST_USER_SET_CONFIG = 25,
92 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
93 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
94 VHOST_USER_POSTCOPY_ADVISE = 28,
95 VHOST_USER_POSTCOPY_LISTEN = 29,
96 VHOST_USER_POSTCOPY_END = 30,
97 VHOST_USER_GET_INFLIGHT_FD = 31,
98 VHOST_USER_SET_INFLIGHT_FD = 32,
99 VHOST_USER_MAX
100 } VhostUserRequest;
101
102 typedef enum VhostUserSlaveRequest {
103 VHOST_USER_SLAVE_NONE = 0,
104 VHOST_USER_SLAVE_IOTLB_MSG = 1,
105 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
106 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
107 VHOST_USER_SLAVE_MAX
108 } VhostUserSlaveRequest;
109
110 typedef struct VhostUserMemoryRegion {
111 uint64_t guest_phys_addr;
112 uint64_t memory_size;
113 uint64_t userspace_addr;
114 uint64_t mmap_offset;
115 } VhostUserMemoryRegion;
116
117 typedef struct VhostUserMemory {
118 uint32_t nregions;
119 uint32_t padding;
120 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
121 } VhostUserMemory;
122
123 typedef struct VhostUserLog {
124 uint64_t mmap_size;
125 uint64_t mmap_offset;
126 } VhostUserLog;
127
128 typedef struct VhostUserConfig {
129 uint32_t offset;
130 uint32_t size;
131 uint32_t flags;
132 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
133 } VhostUserConfig;
134
135 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
136 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
137
138 typedef struct VhostUserCryptoSession {
139 /* session id for success, -1 on errors */
140 int64_t session_id;
141 CryptoDevBackendSymSessionInfo session_setup_data;
142 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
143 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
144 } VhostUserCryptoSession;
145
146 static VhostUserConfig c __attribute__ ((unused));
147 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
148 + sizeof(c.size) \
149 + sizeof(c.flags))
150
151 typedef struct VhostUserVringArea {
152 uint64_t u64;
153 uint64_t size;
154 uint64_t offset;
155 } VhostUserVringArea;
156
157 typedef struct VhostUserInflight {
158 uint64_t mmap_size;
159 uint64_t mmap_offset;
160 uint16_t num_queues;
161 uint16_t queue_size;
162 } VhostUserInflight;
163
164 typedef struct {
165 VhostUserRequest request;
166
167 #define VHOST_USER_VERSION_MASK (0x3)
168 #define VHOST_USER_REPLY_MASK (0x1<<2)
169 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
170 uint32_t flags;
171 uint32_t size; /* the following payload size */
172 } QEMU_PACKED VhostUserHeader;
173
174 typedef union {
175 #define VHOST_USER_VRING_IDX_MASK (0xff)
176 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
177 uint64_t u64;
178 struct vhost_vring_state state;
179 struct vhost_vring_addr addr;
180 VhostUserMemory memory;
181 VhostUserLog log;
182 struct vhost_iotlb_msg iotlb;
183 VhostUserConfig config;
184 VhostUserCryptoSession session;
185 VhostUserVringArea area;
186 VhostUserInflight inflight;
187 } VhostUserPayload;
188
189 typedef struct VhostUserMsg {
190 VhostUserHeader hdr;
191 VhostUserPayload payload;
192 } QEMU_PACKED VhostUserMsg;
193
194 static VhostUserMsg m __attribute__ ((unused));
195 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
196
197 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
198
199 /* The version of the protocol we support */
200 #define VHOST_USER_VERSION (0x1)
201
202 struct vhost_user {
203 struct vhost_dev *dev;
204 /* Shared between vhost devs of the same virtio device */
205 VhostUserState *user;
206 int slave_fd;
207 NotifierWithReturn postcopy_notifier;
208 struct PostCopyFD postcopy_fd;
209 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
210 /* Length of the region_rb and region_rb_offset arrays */
211 size_t region_rb_len;
212 /* RAMBlock associated with a given region */
213 RAMBlock **region_rb;
214 /* The offset from the start of the RAMBlock to the start of the
215 * vhost region.
216 */
217 ram_addr_t *region_rb_offset;
218
219 /* True once we've entered postcopy_listen */
220 bool postcopy_listen;
221 };
222
223 static bool ioeventfd_enabled(void)
224 {
225 return !kvm_enabled() || kvm_eventfds_enabled();
226 }
227
228 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
229 {
230 struct vhost_user *u = dev->opaque;
231 CharBackend *chr = u->user->chr;
232 uint8_t *p = (uint8_t *) msg;
233 int r, size = VHOST_USER_HDR_SIZE;
234
235 r = qemu_chr_fe_read_all(chr, p, size);
236 if (r != size) {
237 error_report("Failed to read msg header. Read %d instead of %d."
238 " Original request %d.", r, size, msg->hdr.request);
239 return -1;
240 }
241
242 /* validate received flags */
243 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
244 error_report("Failed to read msg header."
245 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
246 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
247 return -1;
248 }
249
250 return 0;
251 }
252
253 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
254 {
255 struct vhost_user *u = dev->opaque;
256 CharBackend *chr = u->user->chr;
257 uint8_t *p = (uint8_t *) msg;
258 int r, size;
259
260 if (vhost_user_read_header(dev, msg) < 0) {
261 return -1;
262 }
263
264 /* validate message size is sane */
265 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
266 error_report("Failed to read msg header."
267 " Size %d exceeds the maximum %zu.", msg->hdr.size,
268 VHOST_USER_PAYLOAD_SIZE);
269 return -1;
270 }
271
272 if (msg->hdr.size) {
273 p += VHOST_USER_HDR_SIZE;
274 size = msg->hdr.size;
275 r = qemu_chr_fe_read_all(chr, p, size);
276 if (r != size) {
277 error_report("Failed to read msg payload."
278 " Read %d instead of %d.", r, msg->hdr.size);
279 return -1;
280 }
281 }
282
283 return 0;
284 }
285
286 static int process_message_reply(struct vhost_dev *dev,
287 const VhostUserMsg *msg)
288 {
289 VhostUserMsg msg_reply;
290
291 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
292 return 0;
293 }
294
295 if (vhost_user_read(dev, &msg_reply) < 0) {
296 return -1;
297 }
298
299 if (msg_reply.hdr.request != msg->hdr.request) {
300 error_report("Received unexpected msg type."
301 "Expected %d received %d",
302 msg->hdr.request, msg_reply.hdr.request);
303 return -1;
304 }
305
306 return msg_reply.payload.u64 ? -1 : 0;
307 }
308
309 static bool vhost_user_one_time_request(VhostUserRequest request)
310 {
311 switch (request) {
312 case VHOST_USER_SET_OWNER:
313 case VHOST_USER_RESET_OWNER:
314 case VHOST_USER_SET_MEM_TABLE:
315 case VHOST_USER_GET_QUEUE_NUM:
316 case VHOST_USER_NET_SET_MTU:
317 return true;
318 default:
319 return false;
320 }
321 }
322
323 /* most non-init callers ignore the error */
324 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
325 int *fds, int fd_num)
326 {
327 struct vhost_user *u = dev->opaque;
328 CharBackend *chr = u->user->chr;
329 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
330
331 /*
332 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
333 * we just need send it once in the first time. For later such
334 * request, we just ignore it.
335 */
336 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
337 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
338 return 0;
339 }
340
341 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
342 error_report("Failed to set msg fds.");
343 return -1;
344 }
345
346 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
347 if (ret != size) {
348 error_report("Failed to write msg."
349 " Wrote %d instead of %d.", ret, size);
350 return -1;
351 }
352
353 return 0;
354 }
355
356 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
357 struct vhost_log *log)
358 {
359 int fds[VHOST_MEMORY_MAX_NREGIONS];
360 size_t fd_num = 0;
361 bool shmfd = virtio_has_feature(dev->protocol_features,
362 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
363 VhostUserMsg msg = {
364 .hdr.request = VHOST_USER_SET_LOG_BASE,
365 .hdr.flags = VHOST_USER_VERSION,
366 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
367 .payload.log.mmap_offset = 0,
368 .hdr.size = sizeof(msg.payload.log),
369 };
370
371 if (shmfd && log->fd != -1) {
372 fds[fd_num++] = log->fd;
373 }
374
375 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
376 return -1;
377 }
378
379 if (shmfd) {
380 msg.hdr.size = 0;
381 if (vhost_user_read(dev, &msg) < 0) {
382 return -1;
383 }
384
385 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
386 error_report("Received unexpected msg type. "
387 "Expected %d received %d",
388 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
389 return -1;
390 }
391 }
392
393 return 0;
394 }
395
396 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
397 struct vhost_memory *mem)
398 {
399 struct vhost_user *u = dev->opaque;
400 int fds[VHOST_MEMORY_MAX_NREGIONS];
401 int i, fd;
402 size_t fd_num = 0;
403 VhostUserMsg msg_reply;
404 int region_i, msg_i;
405
406 VhostUserMsg msg = {
407 .hdr.request = VHOST_USER_SET_MEM_TABLE,
408 .hdr.flags = VHOST_USER_VERSION,
409 };
410
411 if (u->region_rb_len < dev->mem->nregions) {
412 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
413 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
414 dev->mem->nregions);
415 memset(&(u->region_rb[u->region_rb_len]), '\0',
416 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
417 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
418 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
419 u->region_rb_len = dev->mem->nregions;
420 }
421
422 for (i = 0; i < dev->mem->nregions; ++i) {
423 struct vhost_memory_region *reg = dev->mem->regions + i;
424 ram_addr_t offset;
425 MemoryRegion *mr;
426
427 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
428 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
429 &offset);
430 fd = memory_region_get_fd(mr);
431 if (fd > 0) {
432 trace_vhost_user_set_mem_table_withfd(fd_num, mr->name,
433 reg->memory_size,
434 reg->guest_phys_addr,
435 reg->userspace_addr, offset);
436 u->region_rb_offset[i] = offset;
437 u->region_rb[i] = mr->ram_block;
438 msg.payload.memory.regions[fd_num].userspace_addr =
439 reg->userspace_addr;
440 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
441 msg.payload.memory.regions[fd_num].guest_phys_addr =
442 reg->guest_phys_addr;
443 msg.payload.memory.regions[fd_num].mmap_offset = offset;
444 assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
445 fds[fd_num++] = fd;
446 } else {
447 u->region_rb_offset[i] = 0;
448 u->region_rb[i] = NULL;
449 }
450 }
451
452 msg.payload.memory.nregions = fd_num;
453
454 if (!fd_num) {
455 error_report("Failed initializing vhost-user memory map, "
456 "consider using -object memory-backend-file share=on");
457 return -1;
458 }
459
460 msg.hdr.size = sizeof(msg.payload.memory.nregions);
461 msg.hdr.size += sizeof(msg.payload.memory.padding);
462 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
463
464 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
465 return -1;
466 }
467
468 if (vhost_user_read(dev, &msg_reply) < 0) {
469 return -1;
470 }
471
472 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
473 error_report("%s: Received unexpected msg type."
474 "Expected %d received %d", __func__,
475 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
476 return -1;
477 }
478 /* We're using the same structure, just reusing one of the
479 * fields, so it should be the same size.
480 */
481 if (msg_reply.hdr.size != msg.hdr.size) {
482 error_report("%s: Unexpected size for postcopy reply "
483 "%d vs %d", __func__, msg_reply.hdr.size, msg.hdr.size);
484 return -1;
485 }
486
487 memset(u->postcopy_client_bases, 0,
488 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
489
490 /* They're in the same order as the regions that were sent
491 * but some of the regions were skipped (above) if they
492 * didn't have fd's
493 */
494 for (msg_i = 0, region_i = 0;
495 region_i < dev->mem->nregions;
496 region_i++) {
497 if (msg_i < fd_num &&
498 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
499 dev->mem->regions[region_i].guest_phys_addr) {
500 u->postcopy_client_bases[region_i] =
501 msg_reply.payload.memory.regions[msg_i].userspace_addr;
502 trace_vhost_user_set_mem_table_postcopy(
503 msg_reply.payload.memory.regions[msg_i].userspace_addr,
504 msg.payload.memory.regions[msg_i].userspace_addr,
505 msg_i, region_i);
506 msg_i++;
507 }
508 }
509 if (msg_i != fd_num) {
510 error_report("%s: postcopy reply not fully consumed "
511 "%d vs %zd",
512 __func__, msg_i, fd_num);
513 return -1;
514 }
515 /* Now we've registered this with the postcopy code, we ack to the client,
516 * because now we're in the position to be able to deal with any faults
517 * it generates.
518 */
519 /* TODO: Use this for failure cases as well with a bad value */
520 msg.hdr.size = sizeof(msg.payload.u64);
521 msg.payload.u64 = 0; /* OK */
522 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
523 return -1;
524 }
525
526 return 0;
527 }
528
529 static int vhost_user_set_mem_table(struct vhost_dev *dev,
530 struct vhost_memory *mem)
531 {
532 struct vhost_user *u = dev->opaque;
533 int fds[VHOST_MEMORY_MAX_NREGIONS];
534 int i, fd;
535 size_t fd_num = 0;
536 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
537 bool reply_supported = virtio_has_feature(dev->protocol_features,
538 VHOST_USER_PROTOCOL_F_REPLY_ACK);
539
540 if (do_postcopy) {
541 /* Postcopy has enough differences that it's best done in it's own
542 * version
543 */
544 return vhost_user_set_mem_table_postcopy(dev, mem);
545 }
546
547 VhostUserMsg msg = {
548 .hdr.request = VHOST_USER_SET_MEM_TABLE,
549 .hdr.flags = VHOST_USER_VERSION,
550 };
551
552 if (reply_supported) {
553 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
554 }
555
556 for (i = 0; i < dev->mem->nregions; ++i) {
557 struct vhost_memory_region *reg = dev->mem->regions + i;
558 ram_addr_t offset;
559 MemoryRegion *mr;
560
561 assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
562 mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
563 &offset);
564 fd = memory_region_get_fd(mr);
565 if (fd > 0) {
566 if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
567 error_report("Failed preparing vhost-user memory table msg");
568 return -1;
569 }
570 msg.payload.memory.regions[fd_num].userspace_addr =
571 reg->userspace_addr;
572 msg.payload.memory.regions[fd_num].memory_size = reg->memory_size;
573 msg.payload.memory.regions[fd_num].guest_phys_addr =
574 reg->guest_phys_addr;
575 msg.payload.memory.regions[fd_num].mmap_offset = offset;
576 fds[fd_num++] = fd;
577 }
578 }
579
580 msg.payload.memory.nregions = fd_num;
581
582 if (!fd_num) {
583 error_report("Failed initializing vhost-user memory map, "
584 "consider using -object memory-backend-file share=on");
585 return -1;
586 }
587
588 msg.hdr.size = sizeof(msg.payload.memory.nregions);
589 msg.hdr.size += sizeof(msg.payload.memory.padding);
590 msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion);
591
592 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
593 return -1;
594 }
595
596 if (reply_supported) {
597 return process_message_reply(dev, &msg);
598 }
599
600 return 0;
601 }
602
603 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
604 struct vhost_vring_addr *addr)
605 {
606 VhostUserMsg msg = {
607 .hdr.request = VHOST_USER_SET_VRING_ADDR,
608 .hdr.flags = VHOST_USER_VERSION,
609 .payload.addr = *addr,
610 .hdr.size = sizeof(msg.payload.addr),
611 };
612
613 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
614 return -1;
615 }
616
617 return 0;
618 }
619
620 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
621 struct vhost_vring_state *ring)
622 {
623 bool cross_endian = virtio_has_feature(dev->protocol_features,
624 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
625 VhostUserMsg msg = {
626 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
627 .hdr.flags = VHOST_USER_VERSION,
628 .payload.state = *ring,
629 .hdr.size = sizeof(msg.payload.state),
630 };
631
632 if (!cross_endian) {
633 error_report("vhost-user trying to send unhandled ioctl");
634 return -1;
635 }
636
637 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
638 return -1;
639 }
640
641 return 0;
642 }
643
644 static int vhost_set_vring(struct vhost_dev *dev,
645 unsigned long int request,
646 struct vhost_vring_state *ring)
647 {
648 VhostUserMsg msg = {
649 .hdr.request = request,
650 .hdr.flags = VHOST_USER_VERSION,
651 .payload.state = *ring,
652 .hdr.size = sizeof(msg.payload.state),
653 };
654
655 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
656 return -1;
657 }
658
659 return 0;
660 }
661
662 static int vhost_user_set_vring_num(struct vhost_dev *dev,
663 struct vhost_vring_state *ring)
664 {
665 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
666 }
667
668 static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
669 int queue_idx)
670 {
671 struct vhost_user *u = dev->opaque;
672 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
673 VirtIODevice *vdev = dev->vdev;
674
675 if (n->addr && !n->set) {
676 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
677 n->set = true;
678 }
679 }
680
681 static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
682 int queue_idx)
683 {
684 struct vhost_user *u = dev->opaque;
685 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
686 VirtIODevice *vdev = dev->vdev;
687
688 if (n->addr && n->set) {
689 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
690 n->set = false;
691 }
692 }
693
694 static int vhost_user_set_vring_base(struct vhost_dev *dev,
695 struct vhost_vring_state *ring)
696 {
697 vhost_user_host_notifier_restore(dev, ring->index);
698
699 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
700 }
701
702 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
703 {
704 int i;
705
706 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
707 return -1;
708 }
709
710 for (i = 0; i < dev->nvqs; ++i) {
711 struct vhost_vring_state state = {
712 .index = dev->vq_index + i,
713 .num = enable,
714 };
715
716 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
717 }
718
719 return 0;
720 }
721
722 static int vhost_user_get_vring_base(struct vhost_dev *dev,
723 struct vhost_vring_state *ring)
724 {
725 VhostUserMsg msg = {
726 .hdr.request = VHOST_USER_GET_VRING_BASE,
727 .hdr.flags = VHOST_USER_VERSION,
728 .payload.state = *ring,
729 .hdr.size = sizeof(msg.payload.state),
730 };
731
732 vhost_user_host_notifier_remove(dev, ring->index);
733
734 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
735 return -1;
736 }
737
738 if (vhost_user_read(dev, &msg) < 0) {
739 return -1;
740 }
741
742 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
743 error_report("Received unexpected msg type. Expected %d received %d",
744 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
745 return -1;
746 }
747
748 if (msg.hdr.size != sizeof(msg.payload.state)) {
749 error_report("Received bad msg size.");
750 return -1;
751 }
752
753 *ring = msg.payload.state;
754
755 return 0;
756 }
757
758 static int vhost_set_vring_file(struct vhost_dev *dev,
759 VhostUserRequest request,
760 struct vhost_vring_file *file)
761 {
762 int fds[VHOST_MEMORY_MAX_NREGIONS];
763 size_t fd_num = 0;
764 VhostUserMsg msg = {
765 .hdr.request = request,
766 .hdr.flags = VHOST_USER_VERSION,
767 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
768 .hdr.size = sizeof(msg.payload.u64),
769 };
770
771 if (ioeventfd_enabled() && file->fd > 0) {
772 fds[fd_num++] = file->fd;
773 } else {
774 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
775 }
776
777 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
778 return -1;
779 }
780
781 return 0;
782 }
783
784 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
785 struct vhost_vring_file *file)
786 {
787 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
788 }
789
790 static int vhost_user_set_vring_call(struct vhost_dev *dev,
791 struct vhost_vring_file *file)
792 {
793 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
794 }
795
796 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
797 {
798 VhostUserMsg msg = {
799 .hdr.request = request,
800 .hdr.flags = VHOST_USER_VERSION,
801 .payload.u64 = u64,
802 .hdr.size = sizeof(msg.payload.u64),
803 };
804
805 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
806 return -1;
807 }
808
809 return 0;
810 }
811
812 static int vhost_user_set_features(struct vhost_dev *dev,
813 uint64_t features)
814 {
815 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
816 }
817
818 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
819 uint64_t features)
820 {
821 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
822 }
823
824 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
825 {
826 VhostUserMsg msg = {
827 .hdr.request = request,
828 .hdr.flags = VHOST_USER_VERSION,
829 };
830
831 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
832 return 0;
833 }
834
835 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
836 return -1;
837 }
838
839 if (vhost_user_read(dev, &msg) < 0) {
840 return -1;
841 }
842
843 if (msg.hdr.request != request) {
844 error_report("Received unexpected msg type. Expected %d received %d",
845 request, msg.hdr.request);
846 return -1;
847 }
848
849 if (msg.hdr.size != sizeof(msg.payload.u64)) {
850 error_report("Received bad msg size.");
851 return -1;
852 }
853
854 *u64 = msg.payload.u64;
855
856 return 0;
857 }
858
859 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
860 {
861 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
862 }
863
864 static int vhost_user_set_owner(struct vhost_dev *dev)
865 {
866 VhostUserMsg msg = {
867 .hdr.request = VHOST_USER_SET_OWNER,
868 .hdr.flags = VHOST_USER_VERSION,
869 };
870
871 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
872 return -1;
873 }
874
875 return 0;
876 }
877
878 static int vhost_user_reset_device(struct vhost_dev *dev)
879 {
880 VhostUserMsg msg = {
881 .hdr.request = VHOST_USER_RESET_OWNER,
882 .hdr.flags = VHOST_USER_VERSION,
883 };
884
885 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
886 return -1;
887 }
888
889 return 0;
890 }
891
892 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
893 {
894 int ret = -1;
895
896 if (!dev->config_ops) {
897 return -1;
898 }
899
900 if (dev->config_ops->vhost_dev_config_notifier) {
901 ret = dev->config_ops->vhost_dev_config_notifier(dev);
902 }
903
904 return ret;
905 }
906
907 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
908 VhostUserVringArea *area,
909 int fd)
910 {
911 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
912 size_t page_size = qemu_real_host_page_size;
913 struct vhost_user *u = dev->opaque;
914 VhostUserState *user = u->user;
915 VirtIODevice *vdev = dev->vdev;
916 VhostUserHostNotifier *n;
917 void *addr;
918 char *name;
919
920 if (!virtio_has_feature(dev->protocol_features,
921 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
922 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
923 return -1;
924 }
925
926 n = &user->notifier[queue_idx];
927
928 if (n->addr) {
929 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
930 object_unparent(OBJECT(&n->mr));
931 munmap(n->addr, page_size);
932 n->addr = NULL;
933 }
934
935 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
936 return 0;
937 }
938
939 /* Sanity check. */
940 if (area->size != page_size) {
941 return -1;
942 }
943
944 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
945 fd, area->offset);
946 if (addr == MAP_FAILED) {
947 return -1;
948 }
949
950 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
951 user, queue_idx);
952 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
953 page_size, addr);
954 g_free(name);
955
956 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
957 munmap(addr, page_size);
958 return -1;
959 }
960
961 n->addr = addr;
962 n->set = true;
963
964 return 0;
965 }
966
967 static void slave_read(void *opaque)
968 {
969 struct vhost_dev *dev = opaque;
970 struct vhost_user *u = dev->opaque;
971 VhostUserHeader hdr = { 0, };
972 VhostUserPayload payload = { 0, };
973 int size, ret = 0;
974 struct iovec iov;
975 struct msghdr msgh;
976 int fd[VHOST_USER_SLAVE_MAX_FDS];
977 char control[CMSG_SPACE(sizeof(fd))];
978 struct cmsghdr *cmsg;
979 int i, fdsize = 0;
980
981 memset(&msgh, 0, sizeof(msgh));
982 msgh.msg_iov = &iov;
983 msgh.msg_iovlen = 1;
984 msgh.msg_control = control;
985 msgh.msg_controllen = sizeof(control);
986
987 memset(fd, -1, sizeof(fd));
988
989 /* Read header */
990 iov.iov_base = &hdr;
991 iov.iov_len = VHOST_USER_HDR_SIZE;
992
993 do {
994 size = recvmsg(u->slave_fd, &msgh, 0);
995 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
996
997 if (size != VHOST_USER_HDR_SIZE) {
998 error_report("Failed to read from slave.");
999 goto err;
1000 }
1001
1002 if (msgh.msg_flags & MSG_CTRUNC) {
1003 error_report("Truncated message.");
1004 goto err;
1005 }
1006
1007 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
1008 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
1009 if (cmsg->cmsg_level == SOL_SOCKET &&
1010 cmsg->cmsg_type == SCM_RIGHTS) {
1011 fdsize = cmsg->cmsg_len - CMSG_LEN(0);
1012 memcpy(fd, CMSG_DATA(cmsg), fdsize);
1013 break;
1014 }
1015 }
1016
1017 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1018 error_report("Failed to read msg header."
1019 " Size %d exceeds the maximum %zu.", hdr.size,
1020 VHOST_USER_PAYLOAD_SIZE);
1021 goto err;
1022 }
1023
1024 /* Read payload */
1025 do {
1026 size = read(u->slave_fd, &payload, hdr.size);
1027 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1028
1029 if (size != hdr.size) {
1030 error_report("Failed to read payload from slave.");
1031 goto err;
1032 }
1033
1034 switch (hdr.request) {
1035 case VHOST_USER_SLAVE_IOTLB_MSG:
1036 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1037 break;
1038 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1039 ret = vhost_user_slave_handle_config_change(dev);
1040 break;
1041 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1042 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1043 fd[0]);
1044 break;
1045 default:
1046 error_report("Received unexpected msg type.");
1047 ret = -EINVAL;
1048 }
1049
1050 /* Close the remaining file descriptors. */
1051 for (i = 0; i < fdsize; i++) {
1052 if (fd[i] != -1) {
1053 close(fd[i]);
1054 }
1055 }
1056
1057 /*
1058 * REPLY_ACK feature handling. Other reply types has to be managed
1059 * directly in their request handlers.
1060 */
1061 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1062 struct iovec iovec[2];
1063
1064
1065 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1066 hdr.flags |= VHOST_USER_REPLY_MASK;
1067
1068 payload.u64 = !!ret;
1069 hdr.size = sizeof(payload.u64);
1070
1071 iovec[0].iov_base = &hdr;
1072 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1073 iovec[1].iov_base = &payload;
1074 iovec[1].iov_len = hdr.size;
1075
1076 do {
1077 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1078 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1079
1080 if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1081 error_report("Failed to send msg reply to slave.");
1082 goto err;
1083 }
1084 }
1085
1086 return;
1087
1088 err:
1089 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1090 close(u->slave_fd);
1091 u->slave_fd = -1;
1092 for (i = 0; i < fdsize; i++) {
1093 if (fd[i] != -1) {
1094 close(fd[i]);
1095 }
1096 }
1097 return;
1098 }
1099
1100 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1101 {
1102 VhostUserMsg msg = {
1103 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1104 .hdr.flags = VHOST_USER_VERSION,
1105 };
1106 struct vhost_user *u = dev->opaque;
1107 int sv[2], ret = 0;
1108 bool reply_supported = virtio_has_feature(dev->protocol_features,
1109 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1110
1111 if (!virtio_has_feature(dev->protocol_features,
1112 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1113 return 0;
1114 }
1115
1116 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1117 error_report("socketpair() failed");
1118 return -1;
1119 }
1120
1121 u->slave_fd = sv[0];
1122 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1123
1124 if (reply_supported) {
1125 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1126 }
1127
1128 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1129 if (ret) {
1130 goto out;
1131 }
1132
1133 if (reply_supported) {
1134 ret = process_message_reply(dev, &msg);
1135 }
1136
1137 out:
1138 close(sv[1]);
1139 if (ret) {
1140 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1141 close(u->slave_fd);
1142 u->slave_fd = -1;
1143 }
1144
1145 return ret;
1146 }
1147
1148 #ifdef CONFIG_LINUX
1149 /*
1150 * Called back from the postcopy fault thread when a fault is received on our
1151 * ufd.
1152 * TODO: This is Linux specific
1153 */
1154 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1155 void *ufd)
1156 {
1157 struct vhost_dev *dev = pcfd->data;
1158 struct vhost_user *u = dev->opaque;
1159 struct uffd_msg *msg = ufd;
1160 uint64_t faultaddr = msg->arg.pagefault.address;
1161 RAMBlock *rb = NULL;
1162 uint64_t rb_offset;
1163 int i;
1164
1165 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1166 dev->mem->nregions);
1167 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1168 trace_vhost_user_postcopy_fault_handler_loop(i,
1169 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1170 if (faultaddr >= u->postcopy_client_bases[i]) {
1171 /* Ofset of the fault address in the vhost region */
1172 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1173 if (region_offset < dev->mem->regions[i].memory_size) {
1174 rb_offset = region_offset + u->region_rb_offset[i];
1175 trace_vhost_user_postcopy_fault_handler_found(i,
1176 region_offset, rb_offset);
1177 rb = u->region_rb[i];
1178 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1179 rb_offset);
1180 }
1181 }
1182 }
1183 error_report("%s: Failed to find region for fault %" PRIx64,
1184 __func__, faultaddr);
1185 return -1;
1186 }
1187
1188 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1189 uint64_t offset)
1190 {
1191 struct vhost_dev *dev = pcfd->data;
1192 struct vhost_user *u = dev->opaque;
1193 int i;
1194
1195 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1196
1197 if (!u) {
1198 return 0;
1199 }
1200 /* Translate the offset into an address in the clients address space */
1201 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1202 if (u->region_rb[i] == rb &&
1203 offset >= u->region_rb_offset[i] &&
1204 offset < (u->region_rb_offset[i] +
1205 dev->mem->regions[i].memory_size)) {
1206 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1207 u->postcopy_client_bases[i];
1208 trace_vhost_user_postcopy_waker_found(client_addr);
1209 return postcopy_wake_shared(pcfd, client_addr, rb);
1210 }
1211 }
1212
1213 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1214 return 0;
1215 }
1216 #endif
1217
1218 /*
1219 * Called at the start of an inbound postcopy on reception of the
1220 * 'advise' command.
1221 */
1222 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1223 {
1224 #ifdef CONFIG_LINUX
1225 struct vhost_user *u = dev->opaque;
1226 CharBackend *chr = u->user->chr;
1227 int ufd;
1228 VhostUserMsg msg = {
1229 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1230 .hdr.flags = VHOST_USER_VERSION,
1231 };
1232
1233 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1234 error_setg(errp, "Failed to send postcopy_advise to vhost");
1235 return -1;
1236 }
1237
1238 if (vhost_user_read(dev, &msg) < 0) {
1239 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1240 return -1;
1241 }
1242
1243 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1244 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1245 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1246 return -1;
1247 }
1248
1249 if (msg.hdr.size) {
1250 error_setg(errp, "Received bad msg size.");
1251 return -1;
1252 }
1253 ufd = qemu_chr_fe_get_msgfd(chr);
1254 if (ufd < 0) {
1255 error_setg(errp, "%s: Failed to get ufd", __func__);
1256 return -1;
1257 }
1258 qemu_set_nonblock(ufd);
1259
1260 /* register ufd with userfault thread */
1261 u->postcopy_fd.fd = ufd;
1262 u->postcopy_fd.data = dev;
1263 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1264 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1265 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1266 postcopy_register_shared_ufd(&u->postcopy_fd);
1267 return 0;
1268 #else
1269 error_setg(errp, "Postcopy not supported on non-Linux systems");
1270 return -1;
1271 #endif
1272 }
1273
1274 /*
1275 * Called at the switch to postcopy on reception of the 'listen' command.
1276 */
1277 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1278 {
1279 struct vhost_user *u = dev->opaque;
1280 int ret;
1281 VhostUserMsg msg = {
1282 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1283 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1284 };
1285 u->postcopy_listen = true;
1286 trace_vhost_user_postcopy_listen();
1287 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1288 error_setg(errp, "Failed to send postcopy_listen to vhost");
1289 return -1;
1290 }
1291
1292 ret = process_message_reply(dev, &msg);
1293 if (ret) {
1294 error_setg(errp, "Failed to receive reply to postcopy_listen");
1295 return ret;
1296 }
1297
1298 return 0;
1299 }
1300
1301 /*
1302 * Called at the end of postcopy
1303 */
1304 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1305 {
1306 VhostUserMsg msg = {
1307 .hdr.request = VHOST_USER_POSTCOPY_END,
1308 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1309 };
1310 int ret;
1311 struct vhost_user *u = dev->opaque;
1312
1313 trace_vhost_user_postcopy_end_entry();
1314 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1315 error_setg(errp, "Failed to send postcopy_end to vhost");
1316 return -1;
1317 }
1318
1319 ret = process_message_reply(dev, &msg);
1320 if (ret) {
1321 error_setg(errp, "Failed to receive reply to postcopy_end");
1322 return ret;
1323 }
1324 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1325 close(u->postcopy_fd.fd);
1326 u->postcopy_fd.handler = NULL;
1327
1328 trace_vhost_user_postcopy_end_exit();
1329
1330 return 0;
1331 }
1332
1333 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1334 void *opaque)
1335 {
1336 struct PostcopyNotifyData *pnd = opaque;
1337 struct vhost_user *u = container_of(notifier, struct vhost_user,
1338 postcopy_notifier);
1339 struct vhost_dev *dev = u->dev;
1340
1341 switch (pnd->reason) {
1342 case POSTCOPY_NOTIFY_PROBE:
1343 if (!virtio_has_feature(dev->protocol_features,
1344 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1345 /* TODO: Get the device name into this error somehow */
1346 error_setg(pnd->errp,
1347 "vhost-user backend not capable of postcopy");
1348 return -ENOENT;
1349 }
1350 break;
1351
1352 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1353 return vhost_user_postcopy_advise(dev, pnd->errp);
1354
1355 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1356 return vhost_user_postcopy_listen(dev, pnd->errp);
1357
1358 case POSTCOPY_NOTIFY_INBOUND_END:
1359 return vhost_user_postcopy_end(dev, pnd->errp);
1360
1361 default:
1362 /* We ignore notifications we don't know */
1363 break;
1364 }
1365
1366 return 0;
1367 }
1368
1369 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1370 {
1371 uint64_t features, protocol_features;
1372 struct vhost_user *u;
1373 int err;
1374
1375 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1376
1377 u = g_new0(struct vhost_user, 1);
1378 u->user = opaque;
1379 u->slave_fd = -1;
1380 u->dev = dev;
1381 dev->opaque = u;
1382
1383 err = vhost_user_get_features(dev, &features);
1384 if (err < 0) {
1385 return err;
1386 }
1387
1388 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1389 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1390
1391 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1392 &protocol_features);
1393 if (err < 0) {
1394 return err;
1395 }
1396
1397 dev->protocol_features =
1398 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1399
1400 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1401 /* Don't acknowledge CONFIG feature if device doesn't support it */
1402 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1403 } else if (!(protocol_features &
1404 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1405 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1406 "but backend does not support it.");
1407 return -1;
1408 }
1409
1410 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1411 if (err < 0) {
1412 return err;
1413 }
1414
1415 /* query the max queues we support if backend supports Multiple Queue */
1416 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1417 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1418 &dev->max_queues);
1419 if (err < 0) {
1420 return err;
1421 }
1422 }
1423
1424 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1425 !(virtio_has_feature(dev->protocol_features,
1426 VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1427 virtio_has_feature(dev->protocol_features,
1428 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1429 error_report("IOMMU support requires reply-ack and "
1430 "slave-req protocol features.");
1431 return -1;
1432 }
1433 }
1434
1435 if (dev->migration_blocker == NULL &&
1436 !virtio_has_feature(dev->protocol_features,
1437 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1438 error_setg(&dev->migration_blocker,
1439 "Migration disabled: vhost-user backend lacks "
1440 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1441 }
1442
1443 err = vhost_setup_slave_channel(dev);
1444 if (err < 0) {
1445 return err;
1446 }
1447
1448 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1449 postcopy_add_notifier(&u->postcopy_notifier);
1450
1451 return 0;
1452 }
1453
1454 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1455 {
1456 struct vhost_user *u;
1457
1458 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1459
1460 u = dev->opaque;
1461 if (u->postcopy_notifier.notify) {
1462 postcopy_remove_notifier(&u->postcopy_notifier);
1463 u->postcopy_notifier.notify = NULL;
1464 }
1465 u->postcopy_listen = false;
1466 if (u->postcopy_fd.handler) {
1467 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1468 close(u->postcopy_fd.fd);
1469 u->postcopy_fd.handler = NULL;
1470 }
1471 if (u->slave_fd >= 0) {
1472 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1473 close(u->slave_fd);
1474 u->slave_fd = -1;
1475 }
1476 g_free(u->region_rb);
1477 u->region_rb = NULL;
1478 g_free(u->region_rb_offset);
1479 u->region_rb_offset = NULL;
1480 u->region_rb_len = 0;
1481 g_free(u);
1482 dev->opaque = 0;
1483
1484 return 0;
1485 }
1486
1487 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1488 {
1489 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1490
1491 return idx;
1492 }
1493
1494 static int vhost_user_memslots_limit(struct vhost_dev *dev)
1495 {
1496 return VHOST_MEMORY_MAX_NREGIONS;
1497 }
1498
1499 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1500 {
1501 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1502
1503 return virtio_has_feature(dev->protocol_features,
1504 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1505 }
1506
1507 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1508 {
1509 VhostUserMsg msg = { };
1510
1511 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1512
1513 /* If guest supports GUEST_ANNOUNCE do nothing */
1514 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1515 return 0;
1516 }
1517
1518 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1519 if (virtio_has_feature(dev->protocol_features,
1520 VHOST_USER_PROTOCOL_F_RARP)) {
1521 msg.hdr.request = VHOST_USER_SEND_RARP;
1522 msg.hdr.flags = VHOST_USER_VERSION;
1523 memcpy((char *)&msg.payload.u64, mac_addr, 6);
1524 msg.hdr.size = sizeof(msg.payload.u64);
1525
1526 return vhost_user_write(dev, &msg, NULL, 0);
1527 }
1528 return -1;
1529 }
1530
1531 static bool vhost_user_can_merge(struct vhost_dev *dev,
1532 uint64_t start1, uint64_t size1,
1533 uint64_t start2, uint64_t size2)
1534 {
1535 ram_addr_t offset;
1536 int mfd, rfd;
1537 MemoryRegion *mr;
1538
1539 mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
1540 mfd = memory_region_get_fd(mr);
1541
1542 mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
1543 rfd = memory_region_get_fd(mr);
1544
1545 return mfd == rfd;
1546 }
1547
1548 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1549 {
1550 VhostUserMsg msg;
1551 bool reply_supported = virtio_has_feature(dev->protocol_features,
1552 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1553
1554 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1555 return 0;
1556 }
1557
1558 msg.hdr.request = VHOST_USER_NET_SET_MTU;
1559 msg.payload.u64 = mtu;
1560 msg.hdr.size = sizeof(msg.payload.u64);
1561 msg.hdr.flags = VHOST_USER_VERSION;
1562 if (reply_supported) {
1563 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1564 }
1565
1566 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1567 return -1;
1568 }
1569
1570 /* If reply_ack supported, slave has to ack specified MTU is valid */
1571 if (reply_supported) {
1572 return process_message_reply(dev, &msg);
1573 }
1574
1575 return 0;
1576 }
1577
1578 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
1579 struct vhost_iotlb_msg *imsg)
1580 {
1581 VhostUserMsg msg = {
1582 .hdr.request = VHOST_USER_IOTLB_MSG,
1583 .hdr.size = sizeof(msg.payload.iotlb),
1584 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1585 .payload.iotlb = *imsg,
1586 };
1587
1588 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1589 return -EFAULT;
1590 }
1591
1592 return process_message_reply(dev, &msg);
1593 }
1594
1595
1596 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
1597 {
1598 /* No-op as the receive channel is not dedicated to IOTLB messages. */
1599 }
1600
1601 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
1602 uint32_t config_len)
1603 {
1604 VhostUserMsg msg = {
1605 .hdr.request = VHOST_USER_GET_CONFIG,
1606 .hdr.flags = VHOST_USER_VERSION,
1607 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
1608 };
1609
1610 if (!virtio_has_feature(dev->protocol_features,
1611 VHOST_USER_PROTOCOL_F_CONFIG)) {
1612 return -1;
1613 }
1614
1615 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
1616 return -1;
1617 }
1618
1619 msg.payload.config.offset = 0;
1620 msg.payload.config.size = config_len;
1621 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1622 return -1;
1623 }
1624
1625 if (vhost_user_read(dev, &msg) < 0) {
1626 return -1;
1627 }
1628
1629 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
1630 error_report("Received unexpected msg type. Expected %d received %d",
1631 VHOST_USER_GET_CONFIG, msg.hdr.request);
1632 return -1;
1633 }
1634
1635 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
1636 error_report("Received bad msg size.");
1637 return -1;
1638 }
1639
1640 memcpy(config, msg.payload.config.region, config_len);
1641
1642 return 0;
1643 }
1644
1645 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
1646 uint32_t offset, uint32_t size, uint32_t flags)
1647 {
1648 uint8_t *p;
1649 bool reply_supported = virtio_has_feature(dev->protocol_features,
1650 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1651
1652 VhostUserMsg msg = {
1653 .hdr.request = VHOST_USER_SET_CONFIG,
1654 .hdr.flags = VHOST_USER_VERSION,
1655 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
1656 };
1657
1658 if (!virtio_has_feature(dev->protocol_features,
1659 VHOST_USER_PROTOCOL_F_CONFIG)) {
1660 return -1;
1661 }
1662
1663 if (reply_supported) {
1664 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1665 }
1666
1667 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
1668 return -1;
1669 }
1670
1671 msg.payload.config.offset = offset,
1672 msg.payload.config.size = size,
1673 msg.payload.config.flags = flags,
1674 p = msg.payload.config.region;
1675 memcpy(p, data, size);
1676
1677 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1678 return -1;
1679 }
1680
1681 if (reply_supported) {
1682 return process_message_reply(dev, &msg);
1683 }
1684
1685 return 0;
1686 }
1687
1688 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
1689 void *session_info,
1690 uint64_t *session_id)
1691 {
1692 bool crypto_session = virtio_has_feature(dev->protocol_features,
1693 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1694 CryptoDevBackendSymSessionInfo *sess_info = session_info;
1695 VhostUserMsg msg = {
1696 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
1697 .hdr.flags = VHOST_USER_VERSION,
1698 .hdr.size = sizeof(msg.payload.session),
1699 };
1700
1701 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1702
1703 if (!crypto_session) {
1704 error_report("vhost-user trying to send unhandled ioctl");
1705 return -1;
1706 }
1707
1708 memcpy(&msg.payload.session.session_setup_data, sess_info,
1709 sizeof(CryptoDevBackendSymSessionInfo));
1710 if (sess_info->key_len) {
1711 memcpy(&msg.payload.session.key, sess_info->cipher_key,
1712 sess_info->key_len);
1713 }
1714 if (sess_info->auth_key_len > 0) {
1715 memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
1716 sess_info->auth_key_len);
1717 }
1718 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1719 error_report("vhost_user_write() return -1, create session failed");
1720 return -1;
1721 }
1722
1723 if (vhost_user_read(dev, &msg) < 0) {
1724 error_report("vhost_user_read() return -1, create session failed");
1725 return -1;
1726 }
1727
1728 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
1729 error_report("Received unexpected msg type. Expected %d received %d",
1730 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
1731 return -1;
1732 }
1733
1734 if (msg.hdr.size != sizeof(msg.payload.session)) {
1735 error_report("Received bad msg size.");
1736 return -1;
1737 }
1738
1739 if (msg.payload.session.session_id < 0) {
1740 error_report("Bad session id: %" PRId64 "",
1741 msg.payload.session.session_id);
1742 return -1;
1743 }
1744 *session_id = msg.payload.session.session_id;
1745
1746 return 0;
1747 }
1748
1749 static int
1750 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
1751 {
1752 bool crypto_session = virtio_has_feature(dev->protocol_features,
1753 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
1754 VhostUserMsg msg = {
1755 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
1756 .hdr.flags = VHOST_USER_VERSION,
1757 .hdr.size = sizeof(msg.payload.u64),
1758 };
1759 msg.payload.u64 = session_id;
1760
1761 if (!crypto_session) {
1762 error_report("vhost-user trying to send unhandled ioctl");
1763 return -1;
1764 }
1765
1766 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1767 error_report("vhost_user_write() return -1, close session failed");
1768 return -1;
1769 }
1770
1771 return 0;
1772 }
1773
1774 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
1775 MemoryRegionSection *section)
1776 {
1777 bool result;
1778
1779 result = memory_region_get_fd(section->mr) >= 0;
1780
1781 return result;
1782 }
1783
1784 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
1785 uint16_t queue_size,
1786 struct vhost_inflight *inflight)
1787 {
1788 void *addr;
1789 int fd;
1790 struct vhost_user *u = dev->opaque;
1791 CharBackend *chr = u->user->chr;
1792 VhostUserMsg msg = {
1793 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
1794 .hdr.flags = VHOST_USER_VERSION,
1795 .payload.inflight.num_queues = dev->nvqs,
1796 .payload.inflight.queue_size = queue_size,
1797 .hdr.size = sizeof(msg.payload.inflight),
1798 };
1799
1800 if (!virtio_has_feature(dev->protocol_features,
1801 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
1802 return 0;
1803 }
1804
1805 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1806 return -1;
1807 }
1808
1809 if (vhost_user_read(dev, &msg) < 0) {
1810 return -1;
1811 }
1812
1813 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
1814 error_report("Received unexpected msg type. "
1815 "Expected %d received %d",
1816 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
1817 return -1;
1818 }
1819
1820 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
1821 error_report("Received bad msg size.");
1822 return -1;
1823 }
1824
1825 if (!msg.payload.inflight.mmap_size) {
1826 return 0;
1827 }
1828
1829 fd = qemu_chr_fe_get_msgfd(chr);
1830 if (fd < 0) {
1831 error_report("Failed to get mem fd");
1832 return -1;
1833 }
1834
1835 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
1836 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
1837
1838 if (addr == MAP_FAILED) {
1839 error_report("Failed to mmap mem fd");
1840 close(fd);
1841 return -1;
1842 }
1843
1844 inflight->addr = addr;
1845 inflight->fd = fd;
1846 inflight->size = msg.payload.inflight.mmap_size;
1847 inflight->offset = msg.payload.inflight.mmap_offset;
1848 inflight->queue_size = queue_size;
1849
1850 return 0;
1851 }
1852
1853 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
1854 struct vhost_inflight *inflight)
1855 {
1856 VhostUserMsg msg = {
1857 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
1858 .hdr.flags = VHOST_USER_VERSION,
1859 .payload.inflight.mmap_size = inflight->size,
1860 .payload.inflight.mmap_offset = inflight->offset,
1861 .payload.inflight.num_queues = dev->nvqs,
1862 .payload.inflight.queue_size = inflight->queue_size,
1863 .hdr.size = sizeof(msg.payload.inflight),
1864 };
1865
1866 if (!virtio_has_feature(dev->protocol_features,
1867 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
1868 return 0;
1869 }
1870
1871 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
1872 return -1;
1873 }
1874
1875 return 0;
1876 }
1877
1878 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
1879 {
1880 if (user->chr) {
1881 error_setg(errp, "Cannot initialize vhost-user state");
1882 return false;
1883 }
1884 user->chr = chr;
1885 return true;
1886 }
1887
1888 void vhost_user_cleanup(VhostUserState *user)
1889 {
1890 int i;
1891
1892 if (!user->chr) {
1893 return;
1894 }
1895
1896 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1897 if (user->notifier[i].addr) {
1898 object_unparent(OBJECT(&user->notifier[i].mr));
1899 munmap(user->notifier[i].addr, qemu_real_host_page_size);
1900 user->notifier[i].addr = NULL;
1901 }
1902 }
1903 user->chr = NULL;
1904 }
1905
1906 const VhostOps user_ops = {
1907 .backend_type = VHOST_BACKEND_TYPE_USER,
1908 .vhost_backend_init = vhost_user_backend_init,
1909 .vhost_backend_cleanup = vhost_user_backend_cleanup,
1910 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
1911 .vhost_set_log_base = vhost_user_set_log_base,
1912 .vhost_set_mem_table = vhost_user_set_mem_table,
1913 .vhost_set_vring_addr = vhost_user_set_vring_addr,
1914 .vhost_set_vring_endian = vhost_user_set_vring_endian,
1915 .vhost_set_vring_num = vhost_user_set_vring_num,
1916 .vhost_set_vring_base = vhost_user_set_vring_base,
1917 .vhost_get_vring_base = vhost_user_get_vring_base,
1918 .vhost_set_vring_kick = vhost_user_set_vring_kick,
1919 .vhost_set_vring_call = vhost_user_set_vring_call,
1920 .vhost_set_features = vhost_user_set_features,
1921 .vhost_get_features = vhost_user_get_features,
1922 .vhost_set_owner = vhost_user_set_owner,
1923 .vhost_reset_device = vhost_user_reset_device,
1924 .vhost_get_vq_index = vhost_user_get_vq_index,
1925 .vhost_set_vring_enable = vhost_user_set_vring_enable,
1926 .vhost_requires_shm_log = vhost_user_requires_shm_log,
1927 .vhost_migration_done = vhost_user_migration_done,
1928 .vhost_backend_can_merge = vhost_user_can_merge,
1929 .vhost_net_set_mtu = vhost_user_net_set_mtu,
1930 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
1931 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
1932 .vhost_get_config = vhost_user_get_config,
1933 .vhost_set_config = vhost_user_set_config,
1934 .vhost_crypto_create_session = vhost_user_crypto_create_session,
1935 .vhost_crypto_close_session = vhost_user_crypto_close_session,
1936 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
1937 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
1938 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
1939 };