]> git.proxmox.com Git - mirror_qemu.git/blame - hw/virtio/vhost-user.c
virtio-scsi: avoid dangling host notifier in ->ioeventfd_stop()
[mirror_qemu.git] / hw / virtio / vhost-user.c
CommitLineData
5f6f6664
NN
1/*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
9b8bfe21 11#include "qemu/osdep.h"
da34e65c 12#include "qapi/error.h"
5f6f6664 13#include "hw/virtio/vhost.h"
5c33f978 14#include "hw/virtio/virtio-crypto.h"
4d0cf552 15#include "hw/virtio/vhost-user.h"
5f6f6664 16#include "hw/virtio/vhost-backend.h"
44866521 17#include "hw/virtio/virtio.h"
3e866365 18#include "hw/virtio/virtio-net.h"
4d43a603 19#include "chardev/char-fe.h"
57dc0217 20#include "io/channel-socket.h"
5f6f6664
NN
21#include "sysemu/kvm.h"
22#include "qemu/error-report.h"
db725815 23#include "qemu/main-loop.h"
5f6f6664 24#include "qemu/sockets.h"
71e076a0 25#include "sysemu/runstate.h"
efbfeb81 26#include "sysemu/cryptodev.h"
9ccbfe14
DDAG
27#include "migration/migration.h"
28#include "migration/postcopy-ram.h"
6864a7b5 29#include "trace.h"
0b0af4d6 30#include "exec/ramblock.h"
5f6f6664 31
5f6f6664
NN
32#include <sys/ioctl.h>
33#include <sys/socket.h>
34#include <sys/un.h>
18658a3c
PB
35
36#include "standard-headers/linux/vhost_types.h"
37
38#ifdef CONFIG_LINUX
375318d0 39#include <linux/userfaultfd.h>
18658a3c 40#endif
5f6f6664 41
27598393 42#define VHOST_MEMORY_BASELINE_NREGIONS 8
dcb10c00 43#define VHOST_USER_F_PROTOCOL_FEATURES 30
a84ec993 44#define VHOST_USER_BACKEND_MAX_FDS 8
e2051e9e 45
bab10530 46#if defined(TARGET_PPC) || defined(TARGET_PPC64)
27598393
RN
47#include "hw/ppc/spapr.h"
48#define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
49
50#else
51#define VHOST_USER_MAX_RAM_SLOTS 512
52#endif
53
4c3e257b
CL
54/*
55 * Maximum size of virtio device config space
56 */
57#define VHOST_USER_MAX_CONFIG_SIZE 256
58
de1372d4
TC
59enum VhostUserProtocolFeature {
60 VHOST_USER_PROTOCOL_F_MQ = 0,
61 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
62 VHOST_USER_PROTOCOL_F_RARP = 2,
ca525ce5 63 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
c5f048d8 64 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
a84ec993 65 VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
5df04f17 66 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
efbfeb81 67 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
9ccbfe14 68 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
1c3e5a26 69 VHOST_USER_PROTOCOL_F_CONFIG = 9,
a84ec993 70 VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
44866521 71 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
5ad204bf 72 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
d91d57e6 73 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
6b0eff1a
RN
74 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
75 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
923b8921 76 VHOST_USER_PROTOCOL_F_STATUS = 16,
de1372d4
TC
77 VHOST_USER_PROTOCOL_F_MAX
78};
79
80#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
5f6f6664
NN
81
82typedef enum VhostUserRequest {
83 VHOST_USER_NONE = 0,
84 VHOST_USER_GET_FEATURES = 1,
85 VHOST_USER_SET_FEATURES = 2,
86 VHOST_USER_SET_OWNER = 3,
60915dc4 87 VHOST_USER_RESET_OWNER = 4,
5f6f6664
NN
88 VHOST_USER_SET_MEM_TABLE = 5,
89 VHOST_USER_SET_LOG_BASE = 6,
90 VHOST_USER_SET_LOG_FD = 7,
91 VHOST_USER_SET_VRING_NUM = 8,
92 VHOST_USER_SET_VRING_ADDR = 9,
93 VHOST_USER_SET_VRING_BASE = 10,
94 VHOST_USER_GET_VRING_BASE = 11,
95 VHOST_USER_SET_VRING_KICK = 12,
96 VHOST_USER_SET_VRING_CALL = 13,
97 VHOST_USER_SET_VRING_ERR = 14,
dcb10c00
MT
98 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
99 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
e2051e9e 100 VHOST_USER_GET_QUEUE_NUM = 17,
7263a0ad 101 VHOST_USER_SET_VRING_ENABLE = 18,
3e866365 102 VHOST_USER_SEND_RARP = 19,
c5f048d8 103 VHOST_USER_NET_SET_MTU = 20,
a84ec993 104 VHOST_USER_SET_BACKEND_REQ_FD = 21,
6dcdd06e 105 VHOST_USER_IOTLB_MSG = 22,
5df04f17 106 VHOST_USER_SET_VRING_ENDIAN = 23,
4c3e257b
CL
107 VHOST_USER_GET_CONFIG = 24,
108 VHOST_USER_SET_CONFIG = 25,
efbfeb81
GA
109 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
110 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
d3dff7a5 111 VHOST_USER_POSTCOPY_ADVISE = 28,
6864a7b5 112 VHOST_USER_POSTCOPY_LISTEN = 29,
c639187e 113 VHOST_USER_POSTCOPY_END = 30,
5ad204bf
XY
114 VHOST_USER_GET_INFLIGHT_FD = 31,
115 VHOST_USER_SET_INFLIGHT_FD = 32,
bd2e44fe 116 VHOST_USER_GPU_SET_SOCKET = 33,
d91d57e6 117 VHOST_USER_RESET_DEVICE = 34,
6b0eff1a
RN
118 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
119 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
f1aeb14b
RN
120 VHOST_USER_ADD_MEM_REG = 37,
121 VHOST_USER_REM_MEM_REG = 38,
923b8921
YW
122 VHOST_USER_SET_STATUS = 39,
123 VHOST_USER_GET_STATUS = 40,
5f6f6664
NN
124 VHOST_USER_MAX
125} VhostUserRequest;
126
4bbeeba0 127typedef enum VhostUserSlaveRequest {
a84ec993
MC
128 VHOST_USER_BACKEND_NONE = 0,
129 VHOST_USER_BACKEND_IOTLB_MSG = 1,
130 VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
131 VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
132 VHOST_USER_BACKEND_MAX
4bbeeba0
MAL
133} VhostUserSlaveRequest;
134
5f6f6664
NN
135typedef struct VhostUserMemoryRegion {
136 uint64_t guest_phys_addr;
137 uint64_t memory_size;
138 uint64_t userspace_addr;
3fd74b84 139 uint64_t mmap_offset;
5f6f6664
NN
140} VhostUserMemoryRegion;
141
142typedef struct VhostUserMemory {
143 uint32_t nregions;
144 uint32_t padding;
27598393 145 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
5f6f6664
NN
146} VhostUserMemory;
147
f1aeb14b 148typedef struct VhostUserMemRegMsg {
3009edff 149 uint64_t padding;
f1aeb14b
RN
150 VhostUserMemoryRegion region;
151} VhostUserMemRegMsg;
152
2b8819c6
VK
153typedef struct VhostUserLog {
154 uint64_t mmap_size;
155 uint64_t mmap_offset;
156} VhostUserLog;
157
4c3e257b
CL
158typedef struct VhostUserConfig {
159 uint32_t offset;
160 uint32_t size;
161 uint32_t flags;
162 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
163} VhostUserConfig;
164
efbfeb81
GA
165#define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
166#define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
5c33f978 167#define VHOST_CRYPTO_ASYM_MAX_KEY_LEN 1024
efbfeb81
GA
168
169typedef struct VhostUserCryptoSession {
5c33f978
GM
170 uint64_t op_code;
171 union {
172 struct {
173 CryptoDevBackendSymSessionInfo session_setup_data;
174 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
175 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
176 } sym;
177 struct {
178 CryptoDevBackendAsymSessionInfo session_setup_data;
179 uint8_t key[VHOST_CRYPTO_ASYM_MAX_KEY_LEN];
180 } asym;
181 } u;
182
efbfeb81
GA
183 /* session id for success, -1 on errors */
184 int64_t session_id;
efbfeb81
GA
185} VhostUserCryptoSession;
186
4c3e257b
CL
187static VhostUserConfig c __attribute__ ((unused));
188#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
189 + sizeof(c.size) \
190 + sizeof(c.flags))
191
44866521
TB
192typedef struct VhostUserVringArea {
193 uint64_t u64;
194 uint64_t size;
195 uint64_t offset;
196} VhostUserVringArea;
197
5ad204bf
XY
198typedef struct VhostUserInflight {
199 uint64_t mmap_size;
200 uint64_t mmap_offset;
201 uint16_t num_queues;
202 uint16_t queue_size;
203} VhostUserInflight;
204
24e34754 205typedef struct {
5f6f6664
NN
206 VhostUserRequest request;
207
208#define VHOST_USER_VERSION_MASK (0x3)
c97c76b3 209#define VHOST_USER_REPLY_MASK (0x1 << 2)
ca525ce5 210#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
5f6f6664
NN
211 uint32_t flags;
212 uint32_t size; /* the following payload size */
24e34754
MT
213} QEMU_PACKED VhostUserHeader;
214
215typedef union {
5f6f6664 216#define VHOST_USER_VRING_IDX_MASK (0xff)
c97c76b3 217#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
5f6f6664
NN
218 uint64_t u64;
219 struct vhost_vring_state state;
220 struct vhost_vring_addr addr;
221 VhostUserMemory memory;
f1aeb14b 222 VhostUserMemRegMsg mem_reg;
2b8819c6 223 VhostUserLog log;
6dcdd06e 224 struct vhost_iotlb_msg iotlb;
4c3e257b 225 VhostUserConfig config;
efbfeb81 226 VhostUserCryptoSession session;
44866521 227 VhostUserVringArea area;
5ad204bf 228 VhostUserInflight inflight;
24e34754
MT
229} VhostUserPayload;
230
231typedef struct VhostUserMsg {
232 VhostUserHeader hdr;
233 VhostUserPayload payload;
5f6f6664
NN
234} QEMU_PACKED VhostUserMsg;
235
236static VhostUserMsg m __attribute__ ((unused));
24e34754 237#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
5f6f6664 238
24e34754 239#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
5f6f6664
NN
240
241/* The version of the protocol we support */
242#define VHOST_USER_VERSION (0x1)
243
2152f3fe 244struct vhost_user {
9ccbfe14 245 struct vhost_dev *dev;
4d0cf552
TB
246 /* Shared between vhost devs of the same virtio device */
247 VhostUserState *user;
57dc0217
GK
248 QIOChannel *slave_ioc;
249 GSource *slave_src;
9ccbfe14 250 NotifierWithReturn postcopy_notifier;
f82c1116 251 struct PostCopyFD postcopy_fd;
27598393 252 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
905125d0
DDAG
253 /* Length of the region_rb and region_rb_offset arrays */
254 size_t region_rb_len;
255 /* RAMBlock associated with a given region */
256 RAMBlock **region_rb;
c97c76b3
AB
257 /*
258 * The offset from the start of the RAMBlock to the start of the
905125d0
DDAG
259 * vhost region.
260 */
261 ram_addr_t *region_rb_offset;
262
6864a7b5
DDAG
263 /* True once we've entered postcopy_listen */
264 bool postcopy_listen;
f1aeb14b
RN
265
266 /* Our current regions */
267 int num_shadow_regions;
27598393 268 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
f1aeb14b
RN
269};
270
271struct scrub_regions {
272 struct vhost_memory_region *region;
273 int reg_idx;
274 int fd_idx;
2152f3fe
MAL
275};
276
5f6f6664
NN
277static bool ioeventfd_enabled(void)
278{
b0aa77d3 279 return !kvm_enabled() || kvm_eventfds_enabled();
5f6f6664
NN
280}
281
9af84c02 282static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
5f6f6664 283{
2152f3fe 284 struct vhost_user *u = dev->opaque;
4d0cf552 285 CharBackend *chr = u->user->chr;
5f6f6664
NN
286 uint8_t *p = (uint8_t *) msg;
287 int r, size = VHOST_USER_HDR_SIZE;
288
289 r = qemu_chr_fe_read_all(chr, p, size);
290 if (r != size) {
025faa87 291 int saved_errno = errno;
5421f318 292 error_report("Failed to read msg header. Read %d instead of %d."
24e34754 293 " Original request %d.", r, size, msg->hdr.request);
025faa87 294 return r < 0 ? -saved_errno : -EIO;
5f6f6664
NN
295 }
296
297 /* validate received flags */
24e34754 298 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
5f6f6664 299 error_report("Failed to read msg header."
24e34754 300 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
5f6f6664 301 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
025faa87 302 return -EPROTO;
9af84c02
MAL
303 }
304
643a9435
AB
305 trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
306
9af84c02
MAL
307 return 0;
308}
309
4382138f 310static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
9af84c02
MAL
311{
312 struct vhost_user *u = dev->opaque;
313 CharBackend *chr = u->user->chr;
314 uint8_t *p = (uint8_t *) msg;
315 int r, size;
316
025faa87
RK
317 r = vhost_user_read_header(dev, msg);
318 if (r < 0) {
4382138f 319 return r;
5f6f6664
NN
320 }
321
322 /* validate message size is sane */
24e34754 323 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
5f6f6664 324 error_report("Failed to read msg header."
24e34754 325 " Size %d exceeds the maximum %zu.", msg->hdr.size,
5f6f6664 326 VHOST_USER_PAYLOAD_SIZE);
4382138f 327 return -EPROTO;
5f6f6664
NN
328 }
329
24e34754 330 if (msg->hdr.size) {
5f6f6664 331 p += VHOST_USER_HDR_SIZE;
24e34754 332 size = msg->hdr.size;
5f6f6664
NN
333 r = qemu_chr_fe_read_all(chr, p, size);
334 if (r != size) {
025faa87 335 int saved_errno = errno;
5f6f6664 336 error_report("Failed to read msg payload."
24e34754 337 " Read %d instead of %d.", r, msg->hdr.size);
4382138f 338 return r < 0 ? -saved_errno : -EIO;
5f6f6664
NN
339 }
340 }
341
4382138f 342 return 0;
5f6f6664
NN
343}
344
ca525ce5 345static int process_message_reply(struct vhost_dev *dev,
3cf7daf8 346 const VhostUserMsg *msg)
ca525ce5 347{
025faa87 348 int ret;
60cd1102 349 VhostUserMsg msg_reply;
ca525ce5 350
24e34754 351 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
60cd1102
ZY
352 return 0;
353 }
354
025faa87
RK
355 ret = vhost_user_read(dev, &msg_reply);
356 if (ret < 0) {
357 return ret;
ca525ce5
PS
358 }
359
24e34754 360 if (msg_reply.hdr.request != msg->hdr.request) {
edb40732 361 error_report("Received unexpected msg type. "
ca525ce5 362 "Expected %d received %d",
24e34754 363 msg->hdr.request, msg_reply.hdr.request);
025faa87 364 return -EPROTO;
ca525ce5
PS
365 }
366
025faa87 367 return msg_reply.payload.u64 ? -EIO : 0;
ca525ce5
PS
368}
369
21e70425
MAL
370static bool vhost_user_one_time_request(VhostUserRequest request)
371{
372 switch (request) {
373 case VHOST_USER_SET_OWNER:
60915dc4 374 case VHOST_USER_RESET_OWNER:
21e70425
MAL
375 case VHOST_USER_SET_MEM_TABLE:
376 case VHOST_USER_GET_QUEUE_NUM:
c5f048d8 377 case VHOST_USER_NET_SET_MTU:
920c184f
MY
378 case VHOST_USER_ADD_MEM_REG:
379 case VHOST_USER_REM_MEM_REG:
21e70425
MAL
380 return true;
381 default:
382 return false;
383 }
384}
385
386/* most non-init callers ignore the error */
5f6f6664
NN
387static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
388 int *fds, int fd_num)
389{
2152f3fe 390 struct vhost_user *u = dev->opaque;
4d0cf552 391 CharBackend *chr = u->user->chr;
24e34754 392 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
5f6f6664 393
21e70425
MAL
394 /*
395 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
396 * we just need send it once in the first time. For later such
397 * request, we just ignore it.
398 */
24e34754
MT
399 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
400 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
21e70425
MAL
401 return 0;
402 }
403
6fab2f3f 404 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
f6b85710 405 error_report("Failed to set msg fds.");
025faa87 406 return -EINVAL;
6fab2f3f 407 }
5f6f6664 408
f6b85710
MAL
409 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
410 if (ret != size) {
025faa87 411 int saved_errno = errno;
f6b85710
MAL
412 error_report("Failed to write msg."
413 " Wrote %d instead of %d.", ret, size);
025faa87 414 return ret < 0 ? -saved_errno : -EIO;
f6b85710
MAL
415 }
416
6ca6d8ee
AB
417 trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
418
f6b85710 419 return 0;
5f6f6664
NN
420}
421
bd2e44fe
MAL
422int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
423{
424 VhostUserMsg msg = {
425 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
426 .hdr.flags = VHOST_USER_VERSION,
427 };
428
429 return vhost_user_write(dev, &msg, &fd, 1);
430}
431
21e70425
MAL
432static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
433 struct vhost_log *log)
b931bfbf 434{
27598393 435 int fds[VHOST_USER_MAX_RAM_SLOTS];
21e70425
MAL
436 size_t fd_num = 0;
437 bool shmfd = virtio_has_feature(dev->protocol_features,
438 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
025faa87 439 int ret;
21e70425 440 VhostUserMsg msg = {
24e34754
MT
441 .hdr.request = VHOST_USER_SET_LOG_BASE,
442 .hdr.flags = VHOST_USER_VERSION,
48854f57 443 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
2b8819c6 444 .payload.log.mmap_offset = 0,
24e34754 445 .hdr.size = sizeof(msg.payload.log),
21e70425
MAL
446 };
447
c98ac64c
YW
448 /* Send only once with first queue pair */
449 if (dev->vq_index != 0) {
450 return 0;
451 }
452
21e70425
MAL
453 if (shmfd && log->fd != -1) {
454 fds[fd_num++] = log->fd;
455 }
456
025faa87
RK
457 ret = vhost_user_write(dev, &msg, fds, fd_num);
458 if (ret < 0) {
459 return ret;
c4843a45 460 }
21e70425
MAL
461
462 if (shmfd) {
24e34754 463 msg.hdr.size = 0;
025faa87
RK
464 ret = vhost_user_read(dev, &msg);
465 if (ret < 0) {
466 return ret;
21e70425
MAL
467 }
468
24e34754 469 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
21e70425
MAL
470 error_report("Received unexpected msg type. "
471 "Expected %d received %d",
24e34754 472 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
025faa87 473 return -EPROTO;
21e70425 474 }
b931bfbf 475 }
21e70425
MAL
476
477 return 0;
b931bfbf
CO
478}
479
23374a84
RN
480static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
481 int *fd)
482{
483 MemoryRegion *mr;
484
485 assert((uintptr_t)addr == addr);
486 mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
487 *fd = memory_region_get_fd(mr);
4b870dc4 488 *offset += mr->ram_block->fd_offset;
23374a84
RN
489
490 return mr;
491}
492
ece99091 493static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
8d193715
RN
494 struct vhost_memory_region *src,
495 uint64_t mmap_offset)
ece99091
RN
496{
497 assert(src != NULL && dst != NULL);
498 dst->userspace_addr = src->userspace_addr;
499 dst->memory_size = src->memory_size;
500 dst->guest_phys_addr = src->guest_phys_addr;
8d193715 501 dst->mmap_offset = mmap_offset;
ece99091
RN
502}
503
2d9da9df
RN
504static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
505 struct vhost_dev *dev,
506 VhostUserMsg *msg,
507 int *fds, size_t *fd_num,
508 bool track_ramblocks)
509{
510 int i, fd;
511 ram_addr_t offset;
512 MemoryRegion *mr;
513 struct vhost_memory_region *reg;
ece99091 514 VhostUserMemoryRegion region_buffer;
2d9da9df
RN
515
516 msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
517
518 for (i = 0; i < dev->mem->nregions; ++i) {
519 reg = dev->mem->regions + i;
520
23374a84 521 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
2d9da9df
RN
522 if (fd > 0) {
523 if (track_ramblocks) {
27598393 524 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
2d9da9df
RN
525 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
526 reg->memory_size,
527 reg->guest_phys_addr,
528 reg->userspace_addr,
529 offset);
530 u->region_rb_offset[i] = offset;
531 u->region_rb[i] = mr->ram_block;
27598393 532 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
2d9da9df 533 error_report("Failed preparing vhost-user memory table msg");
025faa87 534 return -ENOBUFS;
2d9da9df 535 }
8d193715 536 vhost_user_fill_msg_region(&region_buffer, reg, offset);
ece99091 537 msg->payload.memory.regions[*fd_num] = region_buffer;
2d9da9df
RN
538 fds[(*fd_num)++] = fd;
539 } else if (track_ramblocks) {
540 u->region_rb_offset[i] = 0;
541 u->region_rb[i] = NULL;
542 }
543 }
544
545 msg->payload.memory.nregions = *fd_num;
546
547 if (!*fd_num) {
548 error_report("Failed initializing vhost-user memory map, "
549 "consider using -object memory-backend-file share=on");
025faa87 550 return -EINVAL;
2d9da9df
RN
551 }
552
553 msg->hdr.size = sizeof(msg->payload.memory.nregions);
554 msg->hdr.size += sizeof(msg->payload.memory.padding);
555 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
556
025faa87 557 return 0;
2d9da9df
RN
558}
559
f1aeb14b
RN
560static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
561 struct vhost_memory_region *vdev_reg)
562{
563 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
564 shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
565 shadow_reg->memory_size == vdev_reg->memory_size;
566}
567
568static void scrub_shadow_regions(struct vhost_dev *dev,
569 struct scrub_regions *add_reg,
570 int *nr_add_reg,
571 struct scrub_regions *rem_reg,
572 int *nr_rem_reg, uint64_t *shadow_pcb,
573 bool track_ramblocks)
574{
575 struct vhost_user *u = dev->opaque;
27598393 576 bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
f1aeb14b
RN
577 struct vhost_memory_region *reg, *shadow_reg;
578 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
579 ram_addr_t offset;
580 MemoryRegion *mr;
581 bool matching;
582
583 /*
584 * Find memory regions present in our shadow state which are not in
585 * the device's current memory state.
586 *
587 * Mark regions in both the shadow and device state as "found".
588 */
589 for (i = 0; i < u->num_shadow_regions; i++) {
590 shadow_reg = &u->shadow_regions[i];
591 matching = false;
592
593 for (j = 0; j < dev->mem->nregions; j++) {
594 reg = &dev->mem->regions[j];
595
596 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
597
598 if (reg_equal(shadow_reg, reg)) {
599 matching = true;
600 found[j] = true;
601 if (track_ramblocks) {
602 /*
603 * Reset postcopy client bases, region_rb, and
604 * region_rb_offset in case regions are removed.
605 */
606 if (fd > 0) {
607 u->region_rb_offset[j] = offset;
608 u->region_rb[j] = mr->ram_block;
609 shadow_pcb[j] = u->postcopy_client_bases[i];
610 } else {
611 u->region_rb_offset[j] = 0;
612 u->region_rb[j] = NULL;
613 }
614 }
615 break;
616 }
617 }
618
619 /*
620 * If the region was not found in the current device memory state
621 * create an entry for it in the removed list.
622 */
623 if (!matching) {
624 rem_reg[rm_idx].region = shadow_reg;
625 rem_reg[rm_idx++].reg_idx = i;
626 }
627 }
628
629 /*
630 * For regions not marked "found", create entries in the added list.
631 *
632 * Note their indexes in the device memory state and the indexes of their
633 * file descriptors.
634 */
635 for (i = 0; i < dev->mem->nregions; i++) {
636 reg = &dev->mem->regions[i];
8b616bee 637 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
f1aeb14b
RN
638 if (fd > 0) {
639 ++fd_num;
640 }
641
642 /*
643 * If the region was in both the shadow and device state we don't
644 * need to send a VHOST_USER_ADD_MEM_REG message for it.
645 */
646 if (found[i]) {
647 continue;
648 }
649
650 add_reg[add_idx].region = reg;
651 add_reg[add_idx].reg_idx = i;
652 add_reg[add_idx++].fd_idx = fd_num;
653 }
654 *nr_rem_reg = rm_idx;
655 *nr_add_reg = add_idx;
656
657 return;
658}
659
660static int send_remove_regions(struct vhost_dev *dev,
661 struct scrub_regions *remove_reg,
662 int nr_rem_reg, VhostUserMsg *msg,
663 bool reply_supported)
664{
665 struct vhost_user *u = dev->opaque;
666 struct vhost_memory_region *shadow_reg;
667 int i, fd, shadow_reg_idx, ret;
668 ram_addr_t offset;
669 VhostUserMemoryRegion region_buffer;
670
671 /*
672 * The regions in remove_reg appear in the same order they do in the
673 * shadow table. Therefore we can minimize memory copies by iterating
674 * through remove_reg backwards.
675 */
676 for (i = nr_rem_reg - 1; i >= 0; i--) {
677 shadow_reg = remove_reg[i].region;
678 shadow_reg_idx = remove_reg[i].reg_idx;
679
680 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
681
682 if (fd > 0) {
683 msg->hdr.request = VHOST_USER_REM_MEM_REG;
8d193715 684 vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
f1aeb14b
RN
685 msg->payload.mem_reg.region = region_buffer;
686
a81d8d4a 687 ret = vhost_user_write(dev, msg, NULL, 0);
025faa87
RK
688 if (ret < 0) {
689 return ret;
f1aeb14b
RN
690 }
691
692 if (reply_supported) {
693 ret = process_message_reply(dev, msg);
694 if (ret) {
695 return ret;
696 }
697 }
698 }
699
700 /*
701 * At this point we know the backend has unmapped the region. It is now
702 * safe to remove it from the shadow table.
703 */
704 memmove(&u->shadow_regions[shadow_reg_idx],
705 &u->shadow_regions[shadow_reg_idx + 1],
706 sizeof(struct vhost_memory_region) *
4fdecf05 707 (u->num_shadow_regions - shadow_reg_idx - 1));
f1aeb14b
RN
708 u->num_shadow_regions--;
709 }
710
711 return 0;
712}
713
714static int send_add_regions(struct vhost_dev *dev,
715 struct scrub_regions *add_reg, int nr_add_reg,
716 VhostUserMsg *msg, uint64_t *shadow_pcb,
717 bool reply_supported, bool track_ramblocks)
718{
719 struct vhost_user *u = dev->opaque;
720 int i, fd, ret, reg_idx, reg_fd_idx;
721 struct vhost_memory_region *reg;
722 MemoryRegion *mr;
723 ram_addr_t offset;
724 VhostUserMsg msg_reply;
725 VhostUserMemoryRegion region_buffer;
726
727 for (i = 0; i < nr_add_reg; i++) {
728 reg = add_reg[i].region;
729 reg_idx = add_reg[i].reg_idx;
730 reg_fd_idx = add_reg[i].fd_idx;
731
732 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
733
734 if (fd > 0) {
735 if (track_ramblocks) {
736 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
737 reg->memory_size,
738 reg->guest_phys_addr,
739 reg->userspace_addr,
740 offset);
741 u->region_rb_offset[reg_idx] = offset;
742 u->region_rb[reg_idx] = mr->ram_block;
743 }
744 msg->hdr.request = VHOST_USER_ADD_MEM_REG;
8d193715 745 vhost_user_fill_msg_region(&region_buffer, reg, offset);
f1aeb14b 746 msg->payload.mem_reg.region = region_buffer;
f1aeb14b 747
025faa87
RK
748 ret = vhost_user_write(dev, msg, &fd, 1);
749 if (ret < 0) {
750 return ret;
f1aeb14b
RN
751 }
752
753 if (track_ramblocks) {
754 uint64_t reply_gpa;
755
025faa87
RK
756 ret = vhost_user_read(dev, &msg_reply);
757 if (ret < 0) {
758 return ret;
f1aeb14b
RN
759 }
760
761 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
762
763 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
764 error_report("%s: Received unexpected msg type."
765 "Expected %d received %d", __func__,
766 VHOST_USER_ADD_MEM_REG,
767 msg_reply.hdr.request);
025faa87 768 return -EPROTO;
f1aeb14b
RN
769 }
770
771 /*
772 * We're using the same structure, just reusing one of the
773 * fields, so it should be the same size.
774 */
775 if (msg_reply.hdr.size != msg->hdr.size) {
776 error_report("%s: Unexpected size for postcopy reply "
777 "%d vs %d", __func__, msg_reply.hdr.size,
778 msg->hdr.size);
025faa87 779 return -EPROTO;
f1aeb14b
RN
780 }
781
782 /* Get the postcopy client base from the backend's reply. */
783 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
784 shadow_pcb[reg_idx] =
785 msg_reply.payload.mem_reg.region.userspace_addr;
786 trace_vhost_user_set_mem_table_postcopy(
787 msg_reply.payload.mem_reg.region.userspace_addr,
788 msg->payload.mem_reg.region.userspace_addr,
789 reg_fd_idx, reg_idx);
790 } else {
791 error_report("%s: invalid postcopy reply for region. "
792 "Got guest physical address %" PRIX64 ", expected "
793 "%" PRIX64, __func__, reply_gpa,
794 dev->mem->regions[reg_idx].guest_phys_addr);
025faa87 795 return -EPROTO;
f1aeb14b
RN
796 }
797 } else if (reply_supported) {
798 ret = process_message_reply(dev, msg);
799 if (ret) {
800 return ret;
801 }
802 }
803 } else if (track_ramblocks) {
804 u->region_rb_offset[reg_idx] = 0;
805 u->region_rb[reg_idx] = NULL;
806 }
807
808 /*
809 * At this point, we know the backend has mapped in the new
810 * region, if the region has a valid file descriptor.
811 *
812 * The region should now be added to the shadow table.
813 */
814 u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
815 reg->guest_phys_addr;
816 u->shadow_regions[u->num_shadow_regions].userspace_addr =
817 reg->userspace_addr;
818 u->shadow_regions[u->num_shadow_regions].memory_size =
819 reg->memory_size;
820 u->num_shadow_regions++;
821 }
822
823 return 0;
824}
825
826static int vhost_user_add_remove_regions(struct vhost_dev *dev,
827 VhostUserMsg *msg,
828 bool reply_supported,
829 bool track_ramblocks)
830{
831 struct vhost_user *u = dev->opaque;
27598393
RN
832 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
833 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
834 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
f1aeb14b 835 int nr_add_reg, nr_rem_reg;
025faa87 836 int ret;
f1aeb14b 837
3009edff 838 msg->hdr.size = sizeof(msg->payload.mem_reg);
f1aeb14b
RN
839
840 /* Find the regions which need to be removed or added. */
841 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
842 shadow_pcb, track_ramblocks);
843
025faa87
RK
844 if (nr_rem_reg) {
845 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
846 reply_supported);
847 if (ret < 0) {
848 goto err;
849 }
f1aeb14b
RN
850 }
851
025faa87
RK
852 if (nr_add_reg) {
853 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
854 reply_supported, track_ramblocks);
855 if (ret < 0) {
856 goto err;
857 }
f1aeb14b
RN
858 }
859
860 if (track_ramblocks) {
861 memcpy(u->postcopy_client_bases, shadow_pcb,
27598393 862 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
f1aeb14b
RN
863 /*
864 * Now we've registered this with the postcopy code, we ack to the
865 * client, because now we're in the position to be able to deal with
866 * any faults it generates.
867 */
868 /* TODO: Use this for failure cases as well with a bad value. */
869 msg->hdr.size = sizeof(msg->payload.u64);
870 msg->payload.u64 = 0; /* OK */
871
025faa87
RK
872 ret = vhost_user_write(dev, msg, NULL, 0);
873 if (ret < 0) {
874 return ret;
f1aeb14b
RN
875 }
876 }
877
878 return 0;
879
880err:
881 if (track_ramblocks) {
882 memcpy(u->postcopy_client_bases, shadow_pcb,
27598393 883 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
f1aeb14b
RN
884 }
885
025faa87 886 return ret;
f1aeb14b
RN
887}
888
55d754b3 889static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
f1aeb14b
RN
890 struct vhost_memory *mem,
891 bool reply_supported,
892 bool config_mem_slots)
55d754b3 893{
9bb38019 894 struct vhost_user *u = dev->opaque;
27598393 895 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
55d754b3 896 size_t fd_num = 0;
9bb38019
DDAG
897 VhostUserMsg msg_reply;
898 int region_i, msg_i;
025faa87 899 int ret;
9bb38019 900
55d754b3 901 VhostUserMsg msg = {
55d754b3
DDAG
902 .hdr.flags = VHOST_USER_VERSION,
903 };
904
905125d0
DDAG
905 if (u->region_rb_len < dev->mem->nregions) {
906 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
907 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
908 dev->mem->nregions);
909 memset(&(u->region_rb[u->region_rb_len]), '\0',
910 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
911 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
912 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
913 u->region_rb_len = dev->mem->nregions;
914 }
915
f1aeb14b 916 if (config_mem_slots) {
025faa87
RK
917 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
918 if (ret < 0) {
919 return ret;
f1aeb14b
RN
920 }
921 } else {
025faa87
RK
922 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
923 true);
924 if (ret < 0) {
925 return ret;
f1aeb14b 926 }
55d754b3 927
025faa87
RK
928 ret = vhost_user_write(dev, &msg, fds, fd_num);
929 if (ret < 0) {
930 return ret;
f1aeb14b 931 }
55d754b3 932
025faa87
RK
933 ret = vhost_user_read(dev, &msg_reply);
934 if (ret < 0) {
935 return ret;
f1aeb14b 936 }
9bb38019 937
f1aeb14b
RN
938 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
939 error_report("%s: Received unexpected msg type."
940 "Expected %d received %d", __func__,
941 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
025faa87 942 return -EPROTO;
f1aeb14b 943 }
9bb38019 944
f1aeb14b
RN
945 /*
946 * We're using the same structure, just reusing one of the
947 * fields, so it should be the same size.
948 */
949 if (msg_reply.hdr.size != msg.hdr.size) {
950 error_report("%s: Unexpected size for postcopy reply "
951 "%d vs %d", __func__, msg_reply.hdr.size,
952 msg.hdr.size);
025faa87 953 return -EPROTO;
f1aeb14b
RN
954 }
955
956 memset(u->postcopy_client_bases, 0,
27598393 957 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
f1aeb14b
RN
958
959 /*
960 * They're in the same order as the regions that were sent
961 * but some of the regions were skipped (above) if they
962 * didn't have fd's
963 */
964 for (msg_i = 0, region_i = 0;
965 region_i < dev->mem->nregions;
966 region_i++) {
967 if (msg_i < fd_num &&
968 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
969 dev->mem->regions[region_i].guest_phys_addr) {
970 u->postcopy_client_bases[region_i] =
971 msg_reply.payload.memory.regions[msg_i].userspace_addr;
972 trace_vhost_user_set_mem_table_postcopy(
973 msg_reply.payload.memory.regions[msg_i].userspace_addr,
974 msg.payload.memory.regions[msg_i].userspace_addr,
975 msg_i, region_i);
976 msg_i++;
977 }
978 }
979 if (msg_i != fd_num) {
980 error_report("%s: postcopy reply not fully consumed "
981 "%d vs %zd",
982 __func__, msg_i, fd_num);
025faa87 983 return -EIO;
f1aeb14b
RN
984 }
985
986 /*
987 * Now we've registered this with the postcopy code, we ack to the
988 * client, because now we're in the position to be able to deal
989 * with any faults it generates.
990 */
991 /* TODO: Use this for failure cases as well with a bad value. */
992 msg.hdr.size = sizeof(msg.payload.u64);
993 msg.payload.u64 = 0; /* OK */
025faa87
RK
994 ret = vhost_user_write(dev, &msg, NULL, 0);
995 if (ret < 0) {
996 return ret;
9bb38019 997 }
9bb38019
DDAG
998 }
999
55d754b3
DDAG
1000 return 0;
1001}
1002
94c9cb31
MT
1003static int vhost_user_set_mem_table(struct vhost_dev *dev,
1004 struct vhost_memory *mem)
1005{
55d754b3 1006 struct vhost_user *u = dev->opaque;
27598393 1007 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
94c9cb31 1008 size_t fd_num = 0;
55d754b3 1009 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
94c9cb31 1010 bool reply_supported = virtio_has_feature(dev->protocol_features,
5ce43896 1011 VHOST_USER_PROTOCOL_F_REPLY_ACK);
f1aeb14b
RN
1012 bool config_mem_slots =
1013 virtio_has_feature(dev->protocol_features,
1014 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
025faa87 1015 int ret;
94c9cb31 1016
55d754b3 1017 if (do_postcopy) {
f1aeb14b
RN
1018 /*
1019 * Postcopy has enough differences that it's best done in it's own
55d754b3
DDAG
1020 * version
1021 */
f1aeb14b
RN
1022 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1023 config_mem_slots);
55d754b3
DDAG
1024 }
1025
94c9cb31 1026 VhostUserMsg msg = {
24e34754 1027 .hdr.flags = VHOST_USER_VERSION,
94c9cb31
MT
1028 };
1029
1030 if (reply_supported) {
24e34754 1031 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
94c9cb31
MT
1032 }
1033
f1aeb14b 1034 if (config_mem_slots) {
025faa87
RK
1035 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1036 if (ret < 0) {
1037 return ret;
f1aeb14b
RN
1038 }
1039 } else {
025faa87
RK
1040 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1041 false);
1042 if (ret < 0) {
1043 return ret;
f1aeb14b 1044 }
025faa87
RK
1045
1046 ret = vhost_user_write(dev, &msg, fds, fd_num);
1047 if (ret < 0) {
1048 return ret;
f1aeb14b 1049 }
94c9cb31 1050
f1aeb14b
RN
1051 if (reply_supported) {
1052 return process_message_reply(dev, &msg);
1053 }
94c9cb31
MT
1054 }
1055
1056 return 0;
1057}
1058
21e70425
MAL
1059static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1060 struct vhost_vring_state *ring)
1061{
5df04f17
FF
1062 bool cross_endian = virtio_has_feature(dev->protocol_features,
1063 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1064 VhostUserMsg msg = {
24e34754
MT
1065 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1066 .hdr.flags = VHOST_USER_VERSION,
5df04f17 1067 .payload.state = *ring,
24e34754 1068 .hdr.size = sizeof(msg.payload.state),
5df04f17
FF
1069 };
1070
1071 if (!cross_endian) {
1072 error_report("vhost-user trying to send unhandled ioctl");
025faa87 1073 return -ENOTSUP;
5df04f17
FF
1074 }
1075
025faa87 1076 return vhost_user_write(dev, &msg, NULL, 0);
21e70425 1077}
5f6f6664 1078
21e70425
MAL
1079static int vhost_set_vring(struct vhost_dev *dev,
1080 unsigned long int request,
1081 struct vhost_vring_state *ring)
1082{
1083 VhostUserMsg msg = {
24e34754
MT
1084 .hdr.request = request,
1085 .hdr.flags = VHOST_USER_VERSION,
7f4a930e 1086 .payload.state = *ring,
24e34754 1087 .hdr.size = sizeof(msg.payload.state),
21e70425
MAL
1088 };
1089
025faa87 1090 return vhost_user_write(dev, &msg, NULL, 0);
21e70425
MAL
1091}
1092
1093static int vhost_user_set_vring_num(struct vhost_dev *dev,
1094 struct vhost_vring_state *ring)
1095{
1096 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1097}
1098
0b0af4d6 1099static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
44866521 1100{
0b0af4d6 1101 assert(n && n->unmap_addr);
8e3b0cbb 1102 munmap(n->unmap_addr, qemu_real_host_page_size());
0b0af4d6
XL
1103 n->unmap_addr = NULL;
1104}
1105
503e3554
AB
1106/*
1107 * clean-up function for notifier, will finally free the structure
1108 * under rcu.
1109 */
1110static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
1111 VirtIODevice *vdev)
0b0af4d6 1112{
e867144b 1113 if (n->addr) {
0b0af4d6 1114 if (vdev) {
503e3554 1115 virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
0b0af4d6
XL
1116 }
1117 assert(!n->unmap_addr);
1118 n->unmap_addr = n->addr;
1119 n->addr = NULL;
1120 call_rcu(n, vhost_user_host_notifier_free, rcu);
44866521
TB
1121 }
1122}
1123
21e70425
MAL
1124static int vhost_user_set_vring_base(struct vhost_dev *dev,
1125 struct vhost_vring_state *ring)
1126{
1127 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1128}
1129
1130static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1131{
dc3db6ad 1132 int i;
21e70425 1133
923e2d98 1134 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
025faa87 1135 return -EINVAL;
5f6f6664
NN
1136 }
1137
dc3db6ad 1138 for (i = 0; i < dev->nvqs; ++i) {
025faa87 1139 int ret;
dc3db6ad
MT
1140 struct vhost_vring_state state = {
1141 .index = dev->vq_index + i,
1142 .num = enable,
1143 };
1144
025faa87
RK
1145 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1146 if (ret < 0) {
1147 /*
1148 * Restoring the previous state is likely infeasible, as well as
1149 * proceeding regardless the error, so just bail out and hope for
1150 * the device-level recovery.
1151 */
1152 return ret;
1153 }
dc3db6ad 1154 }
21e70425 1155
dc3db6ad
MT
1156 return 0;
1157}
21e70425 1158
503e3554
AB
1159static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
1160 int idx)
1161{
1162 if (idx >= u->notifiers->len) {
1163 return NULL;
1164 }
1165 return g_ptr_array_index(u->notifiers, idx);
1166}
1167
21e70425
MAL
1168static int vhost_user_get_vring_base(struct vhost_dev *dev,
1169 struct vhost_vring_state *ring)
1170{
025faa87 1171 int ret;
21e70425 1172 VhostUserMsg msg = {
24e34754
MT
1173 .hdr.request = VHOST_USER_GET_VRING_BASE,
1174 .hdr.flags = VHOST_USER_VERSION,
7f4a930e 1175 .payload.state = *ring,
24e34754 1176 .hdr.size = sizeof(msg.payload.state),
21e70425 1177 };
0b0af4d6 1178 struct vhost_user *u = dev->opaque;
21e70425 1179
503e3554
AB
1180 VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
1181 if (n) {
1182 vhost_user_host_notifier_remove(n, dev->vdev);
1183 }
44866521 1184
025faa87
RK
1185 ret = vhost_user_write(dev, &msg, NULL, 0);
1186 if (ret < 0) {
1187 return ret;
c4843a45 1188 }
21e70425 1189
025faa87
RK
1190 ret = vhost_user_read(dev, &msg);
1191 if (ret < 0) {
1192 return ret;
5f6f6664
NN
1193 }
1194
24e34754 1195 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
21e70425 1196 error_report("Received unexpected msg type. Expected %d received %d",
24e34754 1197 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
025faa87 1198 return -EPROTO;
21e70425 1199 }
5f6f6664 1200
24e34754 1201 if (msg.hdr.size != sizeof(msg.payload.state)) {
21e70425 1202 error_report("Received bad msg size.");
025faa87 1203 return -EPROTO;
5f6f6664
NN
1204 }
1205
7f4a930e 1206 *ring = msg.payload.state;
21e70425 1207
5f6f6664
NN
1208 return 0;
1209}
1210
21e70425
MAL
1211static int vhost_set_vring_file(struct vhost_dev *dev,
1212 VhostUserRequest request,
1213 struct vhost_vring_file *file)
c2bea314 1214{
27598393 1215 int fds[VHOST_USER_MAX_RAM_SLOTS];
9a78a5dd 1216 size_t fd_num = 0;
c2bea314 1217 VhostUserMsg msg = {
24e34754
MT
1218 .hdr.request = request,
1219 .hdr.flags = VHOST_USER_VERSION,
7f4a930e 1220 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
24e34754 1221 .hdr.size = sizeof(msg.payload.u64),
c2bea314
MAL
1222 };
1223
21e70425
MAL
1224 if (ioeventfd_enabled() && file->fd > 0) {
1225 fds[fd_num++] = file->fd;
1226 } else {
7f4a930e 1227 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
9a78a5dd
MAL
1228 }
1229
025faa87 1230 return vhost_user_write(dev, &msg, fds, fd_num);
21e70425 1231}
9a78a5dd 1232
21e70425
MAL
1233static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1234 struct vhost_vring_file *file)
1235{
1236 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1237}
1238
1239static int vhost_user_set_vring_call(struct vhost_dev *dev,
1240 struct vhost_vring_file *file)
1241{
1242 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1243}
1244
60dc3c5b
KK
1245static int vhost_user_set_vring_err(struct vhost_dev *dev,
1246 struct vhost_vring_file *file)
1247{
1248 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
1249}
21e70425
MAL
1250
1251static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1252{
025faa87 1253 int ret;
21e70425 1254 VhostUserMsg msg = {
24e34754
MT
1255 .hdr.request = request,
1256 .hdr.flags = VHOST_USER_VERSION,
21e70425
MAL
1257 };
1258
1259 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1260 return 0;
9a78a5dd 1261 }
c2bea314 1262
025faa87
RK
1263 ret = vhost_user_write(dev, &msg, NULL, 0);
1264 if (ret < 0) {
1265 return ret;
c4843a45 1266 }
21e70425 1267
025faa87
RK
1268 ret = vhost_user_read(dev, &msg);
1269 if (ret < 0) {
1270 return ret;
21e70425
MAL
1271 }
1272
24e34754 1273 if (msg.hdr.request != request) {
21e70425 1274 error_report("Received unexpected msg type. Expected %d received %d",
24e34754 1275 request, msg.hdr.request);
025faa87 1276 return -EPROTO;
21e70425
MAL
1277 }
1278
24e34754 1279 if (msg.hdr.size != sizeof(msg.payload.u64)) {
21e70425 1280 error_report("Received bad msg size.");
025faa87 1281 return -EPROTO;
21e70425
MAL
1282 }
1283
7f4a930e 1284 *u64 = msg.payload.u64;
21e70425
MAL
1285
1286 return 0;
1287}
1288
1289static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1290{
f2a6e6c4
KW
1291 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1292 return -EPROTO;
1293 }
1294
1295 return 0;
21e70425
MAL
1296}
1297
699f2e53
DP
1298static int enforce_reply(struct vhost_dev *dev,
1299 const VhostUserMsg *msg)
1300{
1301 uint64_t dummy;
1302
1303 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1304 return process_message_reply(dev, msg);
1305 }
1306
1307 /*
1308 * We need to wait for a reply but the backend does not
1309 * support replies for the command we just sent.
1310 * Send VHOST_USER_GET_FEATURES which makes all backends
1311 * send a reply.
1312 */
1313 return vhost_user_get_features(dev, &dummy);
1314}
1315
1316static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1317 struct vhost_vring_addr *addr)
1318{
025faa87 1319 int ret;
699f2e53
DP
1320 VhostUserMsg msg = {
1321 .hdr.request = VHOST_USER_SET_VRING_ADDR,
1322 .hdr.flags = VHOST_USER_VERSION,
1323 .payload.addr = *addr,
1324 .hdr.size = sizeof(msg.payload.addr),
1325 };
1326
1327 bool reply_supported = virtio_has_feature(dev->protocol_features,
1328 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1329
1330 /*
1331 * wait for a reply if logging is enabled to make sure
1332 * backend is actually logging changes
1333 */
1334 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1335
1336 if (reply_supported && wait_for_reply) {
1337 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1338 }
1339
025faa87
RK
1340 ret = vhost_user_write(dev, &msg, NULL, 0);
1341 if (ret < 0) {
1342 return ret;
699f2e53
DP
1343 }
1344
1345 if (wait_for_reply) {
1346 return enforce_reply(dev, &msg);
1347 }
1348
1349 return 0;
1350}
1351
1352static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1353 bool wait_for_reply)
1354{
1355 VhostUserMsg msg = {
1356 .hdr.request = request,
1357 .hdr.flags = VHOST_USER_VERSION,
1358 .payload.u64 = u64,
1359 .hdr.size = sizeof(msg.payload.u64),
1360 };
025faa87 1361 int ret;
699f2e53
DP
1362
1363 if (wait_for_reply) {
1364 bool reply_supported = virtio_has_feature(dev->protocol_features,
1365 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1366 if (reply_supported) {
1367 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1368 }
1369 }
1370
025faa87
RK
1371 ret = vhost_user_write(dev, &msg, NULL, 0);
1372 if (ret < 0) {
1373 return ret;
699f2e53
DP
1374 }
1375
1376 if (wait_for_reply) {
1377 return enforce_reply(dev, &msg);
1378 }
1379
1380 return 0;
1381}
1382
923b8921
YW
1383static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
1384{
1385 return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
1386}
1387
1388static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
1389{
1390 uint64_t value;
1391 int ret;
1392
1393 ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
1394 if (ret < 0) {
1395 return ret;
1396 }
1397 *status = value;
1398
1399 return 0;
1400}
1401
1402static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
1403{
1404 uint8_t s;
1405 int ret;
1406
1407 ret = vhost_user_get_status(dev, &s);
1408 if (ret < 0) {
1409 return ret;
1410 }
1411
1412 if ((s & status) == status) {
1413 return 0;
1414 }
1415 s |= status;
1416
1417 return vhost_user_set_status(dev, s);
1418}
1419
699f2e53
DP
1420static int vhost_user_set_features(struct vhost_dev *dev,
1421 uint64_t features)
1422{
1423 /*
1424 * wait for a reply if logging is enabled to make sure
1425 * backend is actually logging changes
1426 */
1427 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
923b8921 1428 int ret;
699f2e53 1429
02b61f38
AB
1430 /*
1431 * We need to include any extra backend only feature bits that
1432 * might be needed by our device. Currently this includes the
1433 * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
1434 * features.
1435 */
923b8921 1436 ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
02b61f38 1437 features | dev->backend_features,
699f2e53 1438 log_enabled);
923b8921
YW
1439
1440 if (virtio_has_feature(dev->protocol_features,
1441 VHOST_USER_PROTOCOL_F_STATUS)) {
1442 if (!ret) {
1443 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
1444 }
1445 }
1446
1447 return ret;
699f2e53
DP
1448}
1449
1450static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1451 uint64_t features)
1452{
1453 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1454 false);
1455}
1456
21e70425
MAL
1457static int vhost_user_set_owner(struct vhost_dev *dev)
1458{
1459 VhostUserMsg msg = {
24e34754
MT
1460 .hdr.request = VHOST_USER_SET_OWNER,
1461 .hdr.flags = VHOST_USER_VERSION,
21e70425
MAL
1462 };
1463
025faa87 1464 return vhost_user_write(dev, &msg, NULL, 0);
21e70425
MAL
1465}
1466
6b0eff1a
RN
1467static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1468 uint64_t *max_memslots)
1469{
1470 uint64_t backend_max_memslots;
1471 int err;
1472
1473 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1474 &backend_max_memslots);
1475 if (err < 0) {
1476 return err;
1477 }
1478
1479 *max_memslots = backend_max_memslots;
1480
1481 return 0;
1482}
1483
21e70425
MAL
1484static int vhost_user_reset_device(struct vhost_dev *dev)
1485{
1486 VhostUserMsg msg = {
24e34754 1487 .hdr.flags = VHOST_USER_VERSION,
21e70425
MAL
1488 };
1489
d91d57e6
RN
1490 msg.hdr.request = virtio_has_feature(dev->protocol_features,
1491 VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1492 ? VHOST_USER_RESET_DEVICE
1493 : VHOST_USER_RESET_OWNER;
1494
025faa87 1495 return vhost_user_write(dev, &msg, NULL, 0);
c2bea314
MAL
1496}
1497
4c3e257b
CL
1498static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1499{
025faa87
RK
1500 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1501 return -ENOSYS;
4c3e257b
CL
1502 }
1503
025faa87 1504 return dev->config_ops->vhost_dev_config_notifier(dev);
4c3e257b
CL
1505}
1506
503e3554
AB
1507/*
1508 * Fetch or create the notifier for a given idx. Newly created
1509 * notifiers are added to the pointer array that tracks them.
1510 */
1511static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
1512 int idx)
1513{
1514 VhostUserHostNotifier *n = NULL;
1515 if (idx >= u->notifiers->len) {
b595d627 1516 g_ptr_array_set_size(u->notifiers, idx + 1);
503e3554
AB
1517 }
1518
1519 n = g_ptr_array_index(u->notifiers, idx);
1520 if (!n) {
bd437c96
YW
1521 /*
1522 * In case notification arrive out-of-order,
1523 * make room for current index.
1524 */
1525 g_ptr_array_remove_index(u->notifiers, idx);
503e3554
AB
1526 n = g_new0(VhostUserHostNotifier, 1);
1527 n->idx = idx;
1528 g_ptr_array_insert(u->notifiers, idx, n);
1529 trace_vhost_user_create_notifier(idx, n);
1530 }
1531
1532 return n;
1533}
1534
44866521
TB
1535static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1536 VhostUserVringArea *area,
1537 int fd)
1538{
1539 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
8e3b0cbb 1540 size_t page_size = qemu_real_host_page_size();
44866521
TB
1541 struct vhost_user *u = dev->opaque;
1542 VhostUserState *user = u->user;
1543 VirtIODevice *vdev = dev->vdev;
1544 VhostUserHostNotifier *n;
1545 void *addr;
1546 char *name;
1547
1548 if (!virtio_has_feature(dev->protocol_features,
1549 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1550 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
025faa87 1551 return -EINVAL;
44866521
TB
1552 }
1553
503e3554
AB
1554 /*
1555 * Fetch notifier and invalidate any old data before setting up
1556 * new mapped address.
1557 */
1558 n = fetch_or_create_notifier(user, queue_idx);
1559 vhost_user_host_notifier_remove(n, vdev);
44866521
TB
1560
1561 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1562 return 0;
1563 }
1564
1565 /* Sanity check. */
1566 if (area->size != page_size) {
025faa87 1567 return -EINVAL;
44866521
TB
1568 }
1569
1570 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1571 fd, area->offset);
1572 if (addr == MAP_FAILED) {
025faa87 1573 return -EFAULT;
44866521
TB
1574 }
1575
1576 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1577 user, queue_idx);
0b0af4d6 1578 if (!n->mr.ram) { /* Don't init again after suspend. */
a1ed9ef1
XL
1579 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1580 page_size, addr);
0b0af4d6
XL
1581 } else {
1582 n->mr.ram_block->host = addr;
1583 }
44866521
TB
1584 g_free(name);
1585
1586 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1f89d3b9 1587 object_unparent(OBJECT(&n->mr));
44866521 1588 munmap(addr, page_size);
025faa87 1589 return -ENXIO;
44866521
TB
1590 }
1591
1592 n->addr = addr;
44866521
TB
1593
1594 return 0;
1595}
1596
de62e494
GK
1597static void close_slave_channel(struct vhost_user *u)
1598{
57dc0217
GK
1599 g_source_destroy(u->slave_src);
1600 g_source_unref(u->slave_src);
1601 u->slave_src = NULL;
1602 object_unref(OBJECT(u->slave_ioc));
1603 u->slave_ioc = NULL;
de62e494
GK
1604}
1605
57dc0217
GK
1606static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
1607 gpointer opaque)
4bbeeba0
MAL
1608{
1609 struct vhost_dev *dev = opaque;
1610 struct vhost_user *u = dev->opaque;
69aff030
MT
1611 VhostUserHeader hdr = { 0, };
1612 VhostUserPayload payload = { 0, };
57dc0217
GK
1613 Error *local_err = NULL;
1614 gboolean rc = G_SOURCE_CONTINUE;
1615 int ret = 0;
1f3a4519 1616 struct iovec iov;
57dc0217
GK
1617 g_autofree int *fd = NULL;
1618 size_t fdsize = 0;
1619 int i;
5f57fbea 1620
4bbeeba0 1621 /* Read header */
1f3a4519
TB
1622 iov.iov_base = &hdr;
1623 iov.iov_len = VHOST_USER_HDR_SIZE;
1624
57dc0217
GK
1625 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1626 error_report_err(local_err);
4bbeeba0
MAL
1627 goto err;
1628 }
1629
69aff030 1630 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
4bbeeba0 1631 error_report("Failed to read msg header."
69aff030 1632 " Size %d exceeds the maximum %zu.", hdr.size,
4bbeeba0
MAL
1633 VHOST_USER_PAYLOAD_SIZE);
1634 goto err;
1635 }
1636
1637 /* Read payload */
57dc0217
GK
1638 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1639 error_report_err(local_err);
4bbeeba0
MAL
1640 goto err;
1641 }
1642
69aff030 1643 switch (hdr.request) {
a84ec993 1644 case VHOST_USER_BACKEND_IOTLB_MSG:
69aff030 1645 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
6dcdd06e 1646 break;
a84ec993 1647 case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
4c3e257b
CL
1648 ret = vhost_user_slave_handle_config_change(dev);
1649 break;
a84ec993 1650 case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
44866521 1651 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
57dc0217 1652 fd ? fd[0] : -1);
44866521 1653 break;
4bbeeba0 1654 default:
0fdc465d 1655 error_report("Received unexpected msg type: %d.", hdr.request);
4bbeeba0
MAL
1656 ret = -EINVAL;
1657 }
1658
1659 /*
1660 * REPLY_ACK feature handling. Other reply types has to be managed
1661 * directly in their request handlers.
1662 */
69aff030
MT
1663 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1664 struct iovec iovec[2];
4bbeeba0 1665
4bbeeba0 1666
69aff030
MT
1667 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1668 hdr.flags |= VHOST_USER_REPLY_MASK;
1669
1670 payload.u64 = !!ret;
1671 hdr.size = sizeof(payload.u64);
1672
1673 iovec[0].iov_base = &hdr;
1674 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1675 iovec[1].iov_base = &payload;
1676 iovec[1].iov_len = hdr.size;
1677
57dc0217
GK
1678 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1679 error_report_err(local_err);
4bbeeba0
MAL
1680 goto err;
1681 }
1682 }
1683
9e06080b 1684 goto fdcleanup;
4bbeeba0
MAL
1685
1686err:
de62e494 1687 close_slave_channel(u);
57dc0217 1688 rc = G_SOURCE_REMOVE;
9e06080b
GK
1689
1690fdcleanup:
57dc0217
GK
1691 if (fd) {
1692 for (i = 0; i < fdsize; i++) {
5f57fbea
TB
1693 close(fd[i]);
1694 }
1f3a4519 1695 }
57dc0217 1696 return rc;
4bbeeba0
MAL
1697}
1698
1699static int vhost_setup_slave_channel(struct vhost_dev *dev)
1700{
1701 VhostUserMsg msg = {
a84ec993 1702 .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
24e34754 1703 .hdr.flags = VHOST_USER_VERSION,
4bbeeba0
MAL
1704 };
1705 struct vhost_user *u = dev->opaque;
1706 int sv[2], ret = 0;
1707 bool reply_supported = virtio_has_feature(dev->protocol_features,
1708 VHOST_USER_PROTOCOL_F_REPLY_ACK);
57dc0217
GK
1709 Error *local_err = NULL;
1710 QIOChannel *ioc;
4bbeeba0
MAL
1711
1712 if (!virtio_has_feature(dev->protocol_features,
a84ec993 1713 VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
4bbeeba0
MAL
1714 return 0;
1715 }
1716
9cbda7b3 1717 if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
025faa87 1718 int saved_errno = errno;
4bbeeba0 1719 error_report("socketpair() failed");
025faa87 1720 return -saved_errno;
4bbeeba0
MAL
1721 }
1722
57dc0217
GK
1723 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1724 if (!ioc) {
1725 error_report_err(local_err);
025faa87 1726 return -ECONNREFUSED;
57dc0217
GK
1727 }
1728 u->slave_ioc = ioc;
f340a59d
GK
1729 u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
1730 G_IO_IN | G_IO_HUP,
1731 slave_read, dev, NULL, NULL);
4bbeeba0
MAL
1732
1733 if (reply_supported) {
24e34754 1734 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
4bbeeba0
MAL
1735 }
1736
1737 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1738 if (ret) {
1739 goto out;
1740 }
1741
1742 if (reply_supported) {
1743 ret = process_message_reply(dev, &msg);
1744 }
1745
1746out:
1747 close(sv[1]);
1748 if (ret) {
de62e494 1749 close_slave_channel(u);
4bbeeba0
MAL
1750 }
1751
1752 return ret;
1753}
1754
18658a3c 1755#ifdef CONFIG_LINUX
f82c1116
DDAG
1756/*
1757 * Called back from the postcopy fault thread when a fault is received on our
1758 * ufd.
1759 * TODO: This is Linux specific
1760 */
1761static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1762 void *ufd)
1763{
375318d0
DDAG
1764 struct vhost_dev *dev = pcfd->data;
1765 struct vhost_user *u = dev->opaque;
1766 struct uffd_msg *msg = ufd;
1767 uint64_t faultaddr = msg->arg.pagefault.address;
1768 RAMBlock *rb = NULL;
1769 uint64_t rb_offset;
1770 int i;
1771
1772 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1773 dev->mem->nregions);
1774 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1775 trace_vhost_user_postcopy_fault_handler_loop(i,
1776 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1777 if (faultaddr >= u->postcopy_client_bases[i]) {
1778 /* Ofset of the fault address in the vhost region */
1779 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1780 if (region_offset < dev->mem->regions[i].memory_size) {
1781 rb_offset = region_offset + u->region_rb_offset[i];
1782 trace_vhost_user_postcopy_fault_handler_found(i,
1783 region_offset, rb_offset);
1784 rb = u->region_rb[i];
1785 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1786 rb_offset);
1787 }
1788 }
1789 }
1790 error_report("%s: Failed to find region for fault %" PRIx64,
1791 __func__, faultaddr);
1792 return -1;
f82c1116
DDAG
1793}
1794
c07e3615
DDAG
1795static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1796 uint64_t offset)
1797{
1798 struct vhost_dev *dev = pcfd->data;
1799 struct vhost_user *u = dev->opaque;
1800 int i;
1801
1802 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1803
1804 if (!u) {
1805 return 0;
1806 }
1807 /* Translate the offset into an address in the clients address space */
1808 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1809 if (u->region_rb[i] == rb &&
1810 offset >= u->region_rb_offset[i] &&
1811 offset < (u->region_rb_offset[i] +
1812 dev->mem->regions[i].memory_size)) {
1813 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1814 u->postcopy_client_bases[i];
1815 trace_vhost_user_postcopy_waker_found(client_addr);
1816 return postcopy_wake_shared(pcfd, client_addr, rb);
1817 }
1818 }
1819
1820 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1821 return 0;
1822}
18658a3c 1823#endif
c07e3615 1824
d3dff7a5
DDAG
1825/*
1826 * Called at the start of an inbound postcopy on reception of the
1827 * 'advise' command.
1828 */
1829static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1830{
18658a3c 1831#ifdef CONFIG_LINUX
d3dff7a5 1832 struct vhost_user *u = dev->opaque;
4d0cf552 1833 CharBackend *chr = u->user->chr;
d3dff7a5 1834 int ufd;
025faa87 1835 int ret;
d3dff7a5
DDAG
1836 VhostUserMsg msg = {
1837 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1838 .hdr.flags = VHOST_USER_VERSION,
1839 };
1840
025faa87
RK
1841 ret = vhost_user_write(dev, &msg, NULL, 0);
1842 if (ret < 0) {
d3dff7a5 1843 error_setg(errp, "Failed to send postcopy_advise to vhost");
025faa87 1844 return ret;
d3dff7a5
DDAG
1845 }
1846
025faa87
RK
1847 ret = vhost_user_read(dev, &msg);
1848 if (ret < 0) {
d3dff7a5 1849 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
025faa87 1850 return ret;
d3dff7a5
DDAG
1851 }
1852
1853 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1854 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1855 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
025faa87 1856 return -EPROTO;
d3dff7a5
DDAG
1857 }
1858
1859 if (msg.hdr.size) {
1860 error_setg(errp, "Received bad msg size.");
025faa87 1861 return -EPROTO;
d3dff7a5
DDAG
1862 }
1863 ufd = qemu_chr_fe_get_msgfd(chr);
1864 if (ufd < 0) {
1865 error_setg(errp, "%s: Failed to get ufd", __func__);
025faa87 1866 return -EIO;
d3dff7a5 1867 }
ff5927ba 1868 qemu_socket_set_nonblock(ufd);
d3dff7a5 1869
f82c1116
DDAG
1870 /* register ufd with userfault thread */
1871 u->postcopy_fd.fd = ufd;
1872 u->postcopy_fd.data = dev;
1873 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
c07e3615 1874 u->postcopy_fd.waker = vhost_user_postcopy_waker;
f82c1116
DDAG
1875 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1876 postcopy_register_shared_ufd(&u->postcopy_fd);
d3dff7a5 1877 return 0;
18658a3c
PB
1878#else
1879 error_setg(errp, "Postcopy not supported on non-Linux systems");
025faa87 1880 return -ENOSYS;
18658a3c 1881#endif
d3dff7a5
DDAG
1882}
1883
6864a7b5
DDAG
1884/*
1885 * Called at the switch to postcopy on reception of the 'listen' command.
1886 */
1887static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1888{
1889 struct vhost_user *u = dev->opaque;
1890 int ret;
1891 VhostUserMsg msg = {
1892 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1893 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1894 };
1895 u->postcopy_listen = true;
025faa87 1896
6864a7b5 1897 trace_vhost_user_postcopy_listen();
025faa87
RK
1898
1899 ret = vhost_user_write(dev, &msg, NULL, 0);
1900 if (ret < 0) {
6864a7b5 1901 error_setg(errp, "Failed to send postcopy_listen to vhost");
025faa87 1902 return ret;
6864a7b5
DDAG
1903 }
1904
1905 ret = process_message_reply(dev, &msg);
1906 if (ret) {
1907 error_setg(errp, "Failed to receive reply to postcopy_listen");
1908 return ret;
1909 }
1910
1911 return 0;
1912}
1913
46343570
DDAG
1914/*
1915 * Called at the end of postcopy
1916 */
1917static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1918{
1919 VhostUserMsg msg = {
1920 .hdr.request = VHOST_USER_POSTCOPY_END,
1921 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1922 };
1923 int ret;
1924 struct vhost_user *u = dev->opaque;
1925
1926 trace_vhost_user_postcopy_end_entry();
025faa87
RK
1927
1928 ret = vhost_user_write(dev, &msg, NULL, 0);
1929 if (ret < 0) {
46343570 1930 error_setg(errp, "Failed to send postcopy_end to vhost");
025faa87 1931 return ret;
46343570
DDAG
1932 }
1933
1934 ret = process_message_reply(dev, &msg);
1935 if (ret) {
1936 error_setg(errp, "Failed to receive reply to postcopy_end");
1937 return ret;
1938 }
1939 postcopy_unregister_shared_ufd(&u->postcopy_fd);
c4f75385 1940 close(u->postcopy_fd.fd);
46343570
DDAG
1941 u->postcopy_fd.handler = NULL;
1942
1943 trace_vhost_user_postcopy_end_exit();
1944
1945 return 0;
1946}
1947
9ccbfe14
DDAG
1948static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1949 void *opaque)
1950{
1951 struct PostcopyNotifyData *pnd = opaque;
1952 struct vhost_user *u = container_of(notifier, struct vhost_user,
1953 postcopy_notifier);
1954 struct vhost_dev *dev = u->dev;
1955
1956 switch (pnd->reason) {
1957 case POSTCOPY_NOTIFY_PROBE:
1958 if (!virtio_has_feature(dev->protocol_features,
1959 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1960 /* TODO: Get the device name into this error somehow */
1961 error_setg(pnd->errp,
1962 "vhost-user backend not capable of postcopy");
1963 return -ENOENT;
1964 }
1965 break;
1966
d3dff7a5
DDAG
1967 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1968 return vhost_user_postcopy_advise(dev, pnd->errp);
1969
6864a7b5
DDAG
1970 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1971 return vhost_user_postcopy_listen(dev, pnd->errp);
1972
46343570
DDAG
1973 case POSTCOPY_NOTIFY_INBOUND_END:
1974 return vhost_user_postcopy_end(dev, pnd->errp);
1975
9ccbfe14
DDAG
1976 default:
1977 /* We ignore notifications we don't know */
1978 break;
1979 }
1980
1981 return 0;
1982}
1983
28770ff9
KW
1984static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1985 Error **errp)
5f6f6664 1986{
56534930 1987 uint64_t features, ram_slots;
2152f3fe 1988 struct vhost_user *u;
56534930 1989 VhostUserState *vus = (VhostUserState *) opaque;
dcb10c00
MT
1990 int err;
1991
5f6f6664
NN
1992 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1993
2152f3fe 1994 u = g_new0(struct vhost_user, 1);
56534930 1995 u->user = vus;
9ccbfe14 1996 u->dev = dev;
2152f3fe 1997 dev->opaque = u;
5f6f6664 1998
21e70425 1999 err = vhost_user_get_features(dev, &features);
dcb10c00 2000 if (err < 0) {
998647dc 2001 error_setg_errno(errp, -err, "vhost_backend_init failed");
f2a6e6c4 2002 return err;
dcb10c00
MT
2003 }
2004
2005 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
56534930
AB
2006 bool supports_f_config = vus->supports_config ||
2007 (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
2008 uint64_t protocol_features;
2009
dcb10c00
MT
2010 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
2011
21e70425 2012 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
6dcdd06e 2013 &protocol_features);
dcb10c00 2014 if (err < 0) {
998647dc 2015 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
28770ff9 2016 return -EPROTO;
dcb10c00
MT
2017 }
2018
56534930
AB
2019 /*
2020 * We will use all the protocol features we support - although
2021 * we suppress F_CONFIG if we know QEMUs internal code can not support
2022 * it.
2023 */
2024 protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
2025
2026 if (supports_f_config) {
2027 if (!virtio_has_feature(protocol_features,
2028 VHOST_USER_PROTOCOL_F_CONFIG)) {
fb38d0c9 2029 error_setg(errp, "vhost-user device expecting "
56534930 2030 "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
fb38d0c9 2031 "not support it.");
56534930
AB
2032 return -EPROTO;
2033 }
2034 } else {
2035 if (virtio_has_feature(protocol_features,
2036 VHOST_USER_PROTOCOL_F_CONFIG)) {
90e31232
AE
2037 warn_report("vhost-user backend supports "
2038 "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
56534930
AB
2039 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
2040 }
1c3e5a26
MC
2041 }
2042
56534930
AB
2043 /* final set of protocol features */
2044 dev->protocol_features = protocol_features;
21e70425 2045 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
dcb10c00 2046 if (err < 0) {
998647dc 2047 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
28770ff9 2048 return -EPROTO;
dcb10c00 2049 }
e2051e9e
YL
2050
2051 /* query the max queues we support if backend supports Multiple Queue */
2052 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
21e70425
MAL
2053 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2054 &dev->max_queues);
e2051e9e 2055 if (err < 0) {
998647dc 2056 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
28770ff9 2057 return -EPROTO;
e2051e9e 2058 }
84affad1
KW
2059 } else {
2060 dev->max_queues = 1;
e2051e9e 2061 }
84affad1 2062
c90bd505 2063 if (dev->num_queues && dev->max_queues < dev->num_queues) {
28770ff9
KW
2064 error_setg(errp, "The maximum number of queues supported by the "
2065 "backend is %" PRIu64, dev->max_queues);
c90bd505
KW
2066 return -EINVAL;
2067 }
6dcdd06e
MC
2068
2069 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2070 !(virtio_has_feature(dev->protocol_features,
a84ec993 2071 VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
6dcdd06e
MC
2072 virtio_has_feature(dev->protocol_features,
2073 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
28770ff9
KW
2074 error_setg(errp, "IOMMU support requires reply-ack and "
2075 "slave-req protocol features.");
2076 return -EINVAL;
6dcdd06e 2077 }
6b0eff1a
RN
2078
2079 /* get max memory regions if backend supports configurable RAM slots */
2080 if (!virtio_has_feature(dev->protocol_features,
2081 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
27598393 2082 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
6b0eff1a
RN
2083 } else {
2084 err = vhost_user_get_max_memslots(dev, &ram_slots);
2085 if (err < 0) {
998647dc 2086 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
28770ff9 2087 return -EPROTO;
6b0eff1a
RN
2088 }
2089
2090 if (ram_slots < u->user->memory_slots) {
28770ff9
KW
2091 error_setg(errp, "The backend specified a max ram slots limit "
2092 "of %" PRIu64", when the prior validated limit was "
2093 "%d. This limit should never decrease.", ram_slots,
2094 u->user->memory_slots);
2095 return -EINVAL;
6b0eff1a
RN
2096 }
2097
27598393 2098 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
6b0eff1a 2099 }
dcb10c00
MT
2100 }
2101
d2fc4402
MAL
2102 if (dev->migration_blocker == NULL &&
2103 !virtio_has_feature(dev->protocol_features,
2104 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2105 error_setg(&dev->migration_blocker,
2106 "Migration disabled: vhost-user backend lacks "
2107 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2108 }
2109
67b3965e
AM
2110 if (dev->vq_index == 0) {
2111 err = vhost_setup_slave_channel(dev);
2112 if (err < 0) {
998647dc 2113 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
28770ff9 2114 return -EPROTO;
67b3965e 2115 }
4bbeeba0
MAL
2116 }
2117
9ccbfe14
DDAG
2118 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2119 postcopy_add_notifier(&u->postcopy_notifier);
2120
5f6f6664
NN
2121 return 0;
2122}
2123
4d0cf552 2124static int vhost_user_backend_cleanup(struct vhost_dev *dev)
5f6f6664 2125{
2152f3fe
MAL
2126 struct vhost_user *u;
2127
5f6f6664
NN
2128 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2129
2152f3fe 2130 u = dev->opaque;
9ccbfe14
DDAG
2131 if (u->postcopy_notifier.notify) {
2132 postcopy_remove_notifier(&u->postcopy_notifier);
2133 u->postcopy_notifier.notify = NULL;
2134 }
c4f75385
IM
2135 u->postcopy_listen = false;
2136 if (u->postcopy_fd.handler) {
2137 postcopy_unregister_shared_ufd(&u->postcopy_fd);
2138 close(u->postcopy_fd.fd);
2139 u->postcopy_fd.handler = NULL;
2140 }
57dc0217 2141 if (u->slave_ioc) {
de62e494 2142 close_slave_channel(u);
4bbeeba0 2143 }
905125d0
DDAG
2144 g_free(u->region_rb);
2145 u->region_rb = NULL;
2146 g_free(u->region_rb_offset);
2147 u->region_rb_offset = NULL;
2148 u->region_rb_len = 0;
2152f3fe 2149 g_free(u);
5f6f6664
NN
2150 dev->opaque = 0;
2151
2152 return 0;
2153}
2154
fc57fd99
YL
2155static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2156{
2157 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2158
2159 return idx;
2160}
2161
2ce68e4c
IM
2162static int vhost_user_memslots_limit(struct vhost_dev *dev)
2163{
6b0eff1a
RN
2164 struct vhost_user *u = dev->opaque;
2165
2166 return u->user->memory_slots;
2ce68e4c
IM
2167}
2168
1be0ac21
MAL
2169static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2170{
2171 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2172
2173 return virtio_has_feature(dev->protocol_features,
2174 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2175}
2176
3e866365
TC
2177static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2178{
ebf2a499 2179 VhostUserMsg msg = { };
3e866365
TC
2180
2181 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2182
2183 /* If guest supports GUEST_ANNOUNCE do nothing */
2184 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2185 return 0;
2186 }
2187
2188 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2189 if (virtio_has_feature(dev->protocol_features,
2190 VHOST_USER_PROTOCOL_F_RARP)) {
24e34754
MT
2191 msg.hdr.request = VHOST_USER_SEND_RARP;
2192 msg.hdr.flags = VHOST_USER_VERSION;
7f4a930e 2193 memcpy((char *)&msg.payload.u64, mac_addr, 6);
24e34754 2194 msg.hdr.size = sizeof(msg.payload.u64);
3e866365 2195
c4843a45 2196 return vhost_user_write(dev, &msg, NULL, 0);
3e866365 2197 }
025faa87 2198 return -ENOTSUP;
3e866365
TC
2199}
2200
ffe42cc1
MT
2201static bool vhost_user_can_merge(struct vhost_dev *dev,
2202 uint64_t start1, uint64_t size1,
2203 uint64_t start2, uint64_t size2)
2204{
07bdaa41 2205 ram_addr_t offset;
ffe42cc1 2206 int mfd, rfd;
ffe42cc1 2207
23374a84
RN
2208 (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2209 (void)vhost_user_get_mr_data(start2, &offset, &rfd);
ffe42cc1
MT
2210
2211 return mfd == rfd;
2212}
2213
c5f048d8
MC
2214static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2215{
2216 VhostUserMsg msg;
2217 bool reply_supported = virtio_has_feature(dev->protocol_features,
2218 VHOST_USER_PROTOCOL_F_REPLY_ACK);
025faa87 2219 int ret;
c5f048d8
MC
2220
2221 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2222 return 0;
2223 }
2224
24e34754 2225 msg.hdr.request = VHOST_USER_NET_SET_MTU;
c5f048d8 2226 msg.payload.u64 = mtu;
24e34754
MT
2227 msg.hdr.size = sizeof(msg.payload.u64);
2228 msg.hdr.flags = VHOST_USER_VERSION;
c5f048d8 2229 if (reply_supported) {
24e34754 2230 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
c5f048d8
MC
2231 }
2232
025faa87
RK
2233 ret = vhost_user_write(dev, &msg, NULL, 0);
2234 if (ret < 0) {
2235 return ret;
c5f048d8
MC
2236 }
2237
2238 /* If reply_ack supported, slave has to ack specified MTU is valid */
2239 if (reply_supported) {
3cf7daf8 2240 return process_message_reply(dev, &msg);
c5f048d8
MC
2241 }
2242
2243 return 0;
2244}
2245
6dcdd06e
MC
2246static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2247 struct vhost_iotlb_msg *imsg)
2248{
025faa87 2249 int ret;
6dcdd06e 2250 VhostUserMsg msg = {
24e34754
MT
2251 .hdr.request = VHOST_USER_IOTLB_MSG,
2252 .hdr.size = sizeof(msg.payload.iotlb),
2253 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
6dcdd06e
MC
2254 .payload.iotlb = *imsg,
2255 };
2256
025faa87
RK
2257 ret = vhost_user_write(dev, &msg, NULL, 0);
2258 if (ret < 0) {
2259 return ret;
6dcdd06e
MC
2260 }
2261
2262 return process_message_reply(dev, &msg);
2263}
2264
2265
2266static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2267{
2268 /* No-op as the receive channel is not dedicated to IOTLB messages. */
2269}
2270
4c3e257b 2271static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
50de5138 2272 uint32_t config_len, Error **errp)
4c3e257b 2273{
025faa87 2274 int ret;
4c3e257b 2275 VhostUserMsg msg = {
24e34754
MT
2276 .hdr.request = VHOST_USER_GET_CONFIG,
2277 .hdr.flags = VHOST_USER_VERSION,
2278 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
4c3e257b
CL
2279 };
2280
1c3e5a26
MC
2281 if (!virtio_has_feature(dev->protocol_features,
2282 VHOST_USER_PROTOCOL_F_CONFIG)) {
50de5138
KW
2283 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2284 return -EINVAL;
1c3e5a26
MC
2285 }
2286
50de5138 2287 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
4c3e257b
CL
2288
2289 msg.payload.config.offset = 0;
2290 msg.payload.config.size = config_len;
025faa87
RK
2291 ret = vhost_user_write(dev, &msg, NULL, 0);
2292 if (ret < 0) {
2293 error_setg_errno(errp, -ret, "vhost_get_config failed");
2294 return ret;
4c3e257b
CL
2295 }
2296
025faa87
RK
2297 ret = vhost_user_read(dev, &msg);
2298 if (ret < 0) {
2299 error_setg_errno(errp, -ret, "vhost_get_config failed");
2300 return ret;
4c3e257b
CL
2301 }
2302
24e34754 2303 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
50de5138
KW
2304 error_setg(errp,
2305 "Received unexpected msg type. Expected %d received %d",
2306 VHOST_USER_GET_CONFIG, msg.hdr.request);
025faa87 2307 return -EPROTO;
4c3e257b
CL
2308 }
2309
24e34754 2310 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
50de5138 2311 error_setg(errp, "Received bad msg size.");
025faa87 2312 return -EPROTO;
4c3e257b
CL
2313 }
2314
2315 memcpy(config, msg.payload.config.region, config_len);
2316
2317 return 0;
2318}
2319
2320static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2321 uint32_t offset, uint32_t size, uint32_t flags)
2322{
025faa87 2323 int ret;
4c3e257b
CL
2324 uint8_t *p;
2325 bool reply_supported = virtio_has_feature(dev->protocol_features,
2326 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2327
2328 VhostUserMsg msg = {
24e34754
MT
2329 .hdr.request = VHOST_USER_SET_CONFIG,
2330 .hdr.flags = VHOST_USER_VERSION,
2331 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
4c3e257b
CL
2332 };
2333
1c3e5a26
MC
2334 if (!virtio_has_feature(dev->protocol_features,
2335 VHOST_USER_PROTOCOL_F_CONFIG)) {
025faa87 2336 return -ENOTSUP;
1c3e5a26
MC
2337 }
2338
4c3e257b 2339 if (reply_supported) {
24e34754 2340 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
4c3e257b
CL
2341 }
2342
2343 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
025faa87 2344 return -EINVAL;
4c3e257b
CL
2345 }
2346
2347 msg.payload.config.offset = offset,
2348 msg.payload.config.size = size,
2349 msg.payload.config.flags = flags,
2350 p = msg.payload.config.region;
2351 memcpy(p, data, size);
2352
025faa87
RK
2353 ret = vhost_user_write(dev, &msg, NULL, 0);
2354 if (ret < 0) {
2355 return ret;
4c3e257b
CL
2356 }
2357
2358 if (reply_supported) {
2359 return process_message_reply(dev, &msg);
2360 }
2361
2362 return 0;
2363}
2364
efbfeb81
GA
2365static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2366 void *session_info,
2367 uint64_t *session_id)
2368{
025faa87 2369 int ret;
efbfeb81
GA
2370 bool crypto_session = virtio_has_feature(dev->protocol_features,
2371 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
5c33f978 2372 CryptoDevBackendSessionInfo *backend_info = session_info;
efbfeb81
GA
2373 VhostUserMsg msg = {
2374 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2375 .hdr.flags = VHOST_USER_VERSION,
2376 .hdr.size = sizeof(msg.payload.session),
2377 };
2378
2379 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2380
2381 if (!crypto_session) {
2382 error_report("vhost-user trying to send unhandled ioctl");
025faa87 2383 return -ENOTSUP;
efbfeb81
GA
2384 }
2385
5c33f978
GM
2386 if (backend_info->op_code == VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION) {
2387 CryptoDevBackendAsymSessionInfo *sess = &backend_info->u.asym_sess_info;
2388 size_t keylen;
2389
2390 memcpy(&msg.payload.session.u.asym.session_setup_data, sess,
2391 sizeof(CryptoDevBackendAsymSessionInfo));
2392 if (sess->keylen) {
2393 keylen = sizeof(msg.payload.session.u.asym.key);
2394 if (sess->keylen > keylen) {
2395 error_report("Unsupported asymmetric key size");
2396 return -ENOTSUP;
2397 }
2398
2399 memcpy(&msg.payload.session.u.asym.key, sess->key,
2400 sess->keylen);
2401 }
2402 } else {
2403 CryptoDevBackendSymSessionInfo *sess = &backend_info->u.sym_sess_info;
2404 size_t keylen;
2405
2406 memcpy(&msg.payload.session.u.sym.session_setup_data, sess,
2407 sizeof(CryptoDevBackendSymSessionInfo));
2408 if (sess->key_len) {
2409 keylen = sizeof(msg.payload.session.u.sym.key);
2410 if (sess->key_len > keylen) {
2411 error_report("Unsupported cipher key size");
2412 return -ENOTSUP;
2413 }
2414
2415 memcpy(&msg.payload.session.u.sym.key, sess->cipher_key,
2416 sess->key_len);
2417 }
2418
2419 if (sess->auth_key_len > 0) {
2420 keylen = sizeof(msg.payload.session.u.sym.auth_key);
2421 if (sess->auth_key_len > keylen) {
2422 error_report("Unsupported auth key size");
2423 return -ENOTSUP;
2424 }
2425
2426 memcpy(&msg.payload.session.u.sym.auth_key, sess->auth_key,
2427 sess->auth_key_len);
2428 }
efbfeb81 2429 }
5c33f978
GM
2430
2431 msg.payload.session.op_code = backend_info->op_code;
2432 msg.payload.session.session_id = backend_info->session_id;
025faa87
RK
2433 ret = vhost_user_write(dev, &msg, NULL, 0);
2434 if (ret < 0) {
2435 error_report("vhost_user_write() return %d, create session failed",
2436 ret);
2437 return ret;
efbfeb81
GA
2438 }
2439
025faa87
RK
2440 ret = vhost_user_read(dev, &msg);
2441 if (ret < 0) {
2442 error_report("vhost_user_read() return %d, create session failed",
2443 ret);
2444 return ret;
efbfeb81
GA
2445 }
2446
2447 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2448 error_report("Received unexpected msg type. Expected %d received %d",
2449 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
025faa87 2450 return -EPROTO;
efbfeb81
GA
2451 }
2452
2453 if (msg.hdr.size != sizeof(msg.payload.session)) {
2454 error_report("Received bad msg size.");
025faa87 2455 return -EPROTO;
efbfeb81
GA
2456 }
2457
2458 if (msg.payload.session.session_id < 0) {
2459 error_report("Bad session id: %" PRId64 "",
2460 msg.payload.session.session_id);
025faa87 2461 return -EINVAL;
efbfeb81
GA
2462 }
2463 *session_id = msg.payload.session.session_id;
2464
2465 return 0;
2466}
2467
2468static int
2469vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2470{
025faa87 2471 int ret;
efbfeb81
GA
2472 bool crypto_session = virtio_has_feature(dev->protocol_features,
2473 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2474 VhostUserMsg msg = {
2475 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2476 .hdr.flags = VHOST_USER_VERSION,
2477 .hdr.size = sizeof(msg.payload.u64),
2478 };
2479 msg.payload.u64 = session_id;
2480
2481 if (!crypto_session) {
2482 error_report("vhost-user trying to send unhandled ioctl");
025faa87 2483 return -ENOTSUP;
efbfeb81
GA
2484 }
2485
025faa87
RK
2486 ret = vhost_user_write(dev, &msg, NULL, 0);
2487 if (ret < 0) {
2488 error_report("vhost_user_write() return %d, close session failed",
2489 ret);
2490 return ret;
efbfeb81
GA
2491 }
2492
2493 return 0;
2494}
2495
988a2775
TB
2496static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2497 MemoryRegionSection *section)
2498{
66997c42 2499 return memory_region_get_fd(section->mr) >= 0;
988a2775
TB
2500}
2501
5ad204bf
XY
2502static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2503 uint16_t queue_size,
2504 struct vhost_inflight *inflight)
2505{
2506 void *addr;
2507 int fd;
025faa87 2508 int ret;
5ad204bf
XY
2509 struct vhost_user *u = dev->opaque;
2510 CharBackend *chr = u->user->chr;
2511 VhostUserMsg msg = {
2512 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2513 .hdr.flags = VHOST_USER_VERSION,
2514 .payload.inflight.num_queues = dev->nvqs,
2515 .payload.inflight.queue_size = queue_size,
2516 .hdr.size = sizeof(msg.payload.inflight),
2517 };
2518
2519 if (!virtio_has_feature(dev->protocol_features,
2520 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2521 return 0;
2522 }
2523
025faa87
RK
2524 ret = vhost_user_write(dev, &msg, NULL, 0);
2525 if (ret < 0) {
2526 return ret;
5ad204bf
XY
2527 }
2528
025faa87
RK
2529 ret = vhost_user_read(dev, &msg);
2530 if (ret < 0) {
2531 return ret;
5ad204bf
XY
2532 }
2533
2534 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2535 error_report("Received unexpected msg type. "
2536 "Expected %d received %d",
2537 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
025faa87 2538 return -EPROTO;
5ad204bf
XY
2539 }
2540
2541 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2542 error_report("Received bad msg size.");
025faa87 2543 return -EPROTO;
5ad204bf
XY
2544 }
2545
2546 if (!msg.payload.inflight.mmap_size) {
2547 return 0;
2548 }
2549
2550 fd = qemu_chr_fe_get_msgfd(chr);
2551 if (fd < 0) {
2552 error_report("Failed to get mem fd");
025faa87 2553 return -EIO;
5ad204bf
XY
2554 }
2555
2556 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2557 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2558
2559 if (addr == MAP_FAILED) {
2560 error_report("Failed to mmap mem fd");
2561 close(fd);
025faa87 2562 return -EFAULT;
5ad204bf
XY
2563 }
2564
2565 inflight->addr = addr;
2566 inflight->fd = fd;
2567 inflight->size = msg.payload.inflight.mmap_size;
2568 inflight->offset = msg.payload.inflight.mmap_offset;
2569 inflight->queue_size = queue_size;
2570
2571 return 0;
2572}
2573
2574static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2575 struct vhost_inflight *inflight)
2576{
2577 VhostUserMsg msg = {
2578 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2579 .hdr.flags = VHOST_USER_VERSION,
2580 .payload.inflight.mmap_size = inflight->size,
2581 .payload.inflight.mmap_offset = inflight->offset,
2582 .payload.inflight.num_queues = dev->nvqs,
2583 .payload.inflight.queue_size = inflight->queue_size,
2584 .hdr.size = sizeof(msg.payload.inflight),
2585 };
2586
2587 if (!virtio_has_feature(dev->protocol_features,
2588 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2589 return 0;
2590 }
2591
025faa87 2592 return vhost_user_write(dev, &msg, &inflight->fd, 1);
5ad204bf
XY
2593}
2594
503e3554
AB
2595static void vhost_user_state_destroy(gpointer data)
2596{
2597 VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
2598 if (n) {
2599 vhost_user_host_notifier_remove(n, NULL);
2600 object_unparent(OBJECT(&n->mr));
2601 /*
2602 * We can't free until vhost_user_host_notifier_remove has
2603 * done it's thing so schedule the free with RCU.
2604 */
2605 g_free_rcu(n, rcu);
2606 }
2607}
2608
0b99f224 2609bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
4d0cf552 2610{
0b99f224
MAL
2611 if (user->chr) {
2612 error_setg(errp, "Cannot initialize vhost-user state");
2613 return false;
2614 }
2615 user->chr = chr;
6b0eff1a 2616 user->memory_slots = 0;
503e3554
AB
2617 user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
2618 &vhost_user_state_destroy);
0b99f224 2619 return true;
4d0cf552
TB
2620}
2621
2622void vhost_user_cleanup(VhostUserState *user)
2623{
0b99f224
MAL
2624 if (!user->chr) {
2625 return;
2626 }
c6effa9c 2627 memory_region_transaction_begin();
503e3554 2628 user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
c6effa9c 2629 memory_region_transaction_commit();
0b99f224 2630 user->chr = NULL;
4d0cf552
TB
2631}
2632
71e076a0
AB
2633
2634typedef struct {
2635 vu_async_close_fn cb;
2636 DeviceState *dev;
2637 CharBackend *cd;
2638 struct vhost_dev *vhost;
2639} VhostAsyncCallback;
2640
2641static void vhost_user_async_close_bh(void *opaque)
2642{
2643 VhostAsyncCallback *data = opaque;
2644 struct vhost_dev *vhost = data->vhost;
2645
2646 /*
2647 * If the vhost_dev has been cleared in the meantime there is
2648 * nothing left to do as some other path has completed the
2649 * cleanup.
2650 */
2651 if (vhost->vdev) {
2652 data->cb(data->dev);
2653 }
2654
2655 g_free(data);
2656}
2657
2658/*
2659 * We only schedule the work if the machine is running. If suspended
2660 * we want to keep all the in-flight data as is for migration
2661 * purposes.
2662 */
2663void vhost_user_async_close(DeviceState *d,
2664 CharBackend *chardev, struct vhost_dev *vhost,
2665 vu_async_close_fn cb)
2666{
2667 if (!runstate_check(RUN_STATE_SHUTDOWN)) {
2668 /*
2669 * A close event may happen during a read/write, but vhost
2670 * code assumes the vhost_dev remains setup, so delay the
2671 * stop & clear.
2672 */
2673 AioContext *ctx = qemu_get_current_aio_context();
2674 VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
2675
2676 /* Save data for the callback */
2677 data->cb = cb;
2678 data->dev = d;
2679 data->cd = chardev;
2680 data->vhost = vhost;
2681
2682 /* Disable any further notifications on the chardev */
2683 qemu_chr_fe_set_handlers(chardev,
2684 NULL, NULL, NULL, NULL, NULL, NULL,
2685 false);
2686
2687 aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
2688
2689 /*
2690 * Move vhost device to the stopped state. The vhost-user device
2691 * will be clean up and disconnected in BH. This can be useful in
2692 * the vhost migration code. If disconnect was caught there is an
2693 * option for the general vhost code to get the dev state without
2694 * knowing its type (in this case vhost-user).
2695 *
2696 * Note if the vhost device is fully cleared by the time we
2697 * execute the bottom half we won't continue with the cleanup.
2698 */
2699 vhost->started = false;
2700 }
2701}
2702
923b8921
YW
2703static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
2704{
2705 if (!virtio_has_feature(dev->protocol_features,
2706 VHOST_USER_PROTOCOL_F_STATUS)) {
2707 return 0;
2708 }
2709
2710 /* Set device status only for last queue pair */
2711 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2712 return 0;
2713 }
2714
2715 if (started) {
2716 return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
2717 VIRTIO_CONFIG_S_DRIVER |
2718 VIRTIO_CONFIG_S_DRIVER_OK);
2719 } else {
6f8be29e
SH
2720 return 0;
2721 }
2722}
2723
2724static void vhost_user_reset_status(struct vhost_dev *dev)
2725{
2726 /* Set device status only for last queue pair */
2727 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2728 return;
2729 }
2730
2731 if (virtio_has_feature(dev->protocol_features,
2732 VHOST_USER_PROTOCOL_F_STATUS)) {
2733 vhost_user_set_status(dev, 0);
923b8921
YW
2734 }
2735}
2736
5f6f6664
NN
2737const VhostOps user_ops = {
2738 .backend_type = VHOST_BACKEND_TYPE_USER,
4d0cf552
TB
2739 .vhost_backend_init = vhost_user_backend_init,
2740 .vhost_backend_cleanup = vhost_user_backend_cleanup,
2ce68e4c 2741 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
21e70425
MAL
2742 .vhost_set_log_base = vhost_user_set_log_base,
2743 .vhost_set_mem_table = vhost_user_set_mem_table,
2744 .vhost_set_vring_addr = vhost_user_set_vring_addr,
2745 .vhost_set_vring_endian = vhost_user_set_vring_endian,
2746 .vhost_set_vring_num = vhost_user_set_vring_num,
2747 .vhost_set_vring_base = vhost_user_set_vring_base,
2748 .vhost_get_vring_base = vhost_user_get_vring_base,
2749 .vhost_set_vring_kick = vhost_user_set_vring_kick,
2750 .vhost_set_vring_call = vhost_user_set_vring_call,
60dc3c5b 2751 .vhost_set_vring_err = vhost_user_set_vring_err,
21e70425
MAL
2752 .vhost_set_features = vhost_user_set_features,
2753 .vhost_get_features = vhost_user_get_features,
2754 .vhost_set_owner = vhost_user_set_owner,
2755 .vhost_reset_device = vhost_user_reset_device,
2756 .vhost_get_vq_index = vhost_user_get_vq_index,
2757 .vhost_set_vring_enable = vhost_user_set_vring_enable,
1be0ac21 2758 .vhost_requires_shm_log = vhost_user_requires_shm_log,
3e866365 2759 .vhost_migration_done = vhost_user_migration_done,
ffe42cc1 2760 .vhost_backend_can_merge = vhost_user_can_merge,
c5f048d8 2761 .vhost_net_set_mtu = vhost_user_net_set_mtu,
6dcdd06e
MC
2762 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2763 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
4c3e257b
CL
2764 .vhost_get_config = vhost_user_get_config,
2765 .vhost_set_config = vhost_user_set_config,
efbfeb81
GA
2766 .vhost_crypto_create_session = vhost_user_crypto_create_session,
2767 .vhost_crypto_close_session = vhost_user_crypto_close_session,
988a2775 2768 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
5ad204bf
XY
2769 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2770 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
923b8921 2771 .vhost_dev_start = vhost_user_dev_start,
6f8be29e 2772 .vhost_reset_status = vhost_user_reset_status,
fc57fd99 2773};