hw/virtio/vhost-user.c

   1 /*
   2  * vhost-user
   3  *
   4  * Copyright (c) 2013 Virtual Open Systems Sarl.
   5  *
   6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7  * See the COPYING file in the top-level directory.
   8  *
   9  */
  10
  11 #include "qemu/osdep.h"
  12 #include "qapi/error.h"
  13 #include "hw/virtio/vhost.h"
  14 #include "hw/virtio/vhost-user.h"
  15 #include "hw/virtio/vhost-backend.h"
  16 #include "hw/virtio/virtio.h"
  17 #include "hw/virtio/virtio-net.h"
  18 #include "chardev/char-fe.h"
  19 #include "io/channel-socket.h"
  20 #include "sysemu/kvm.h"
  21 #include "qemu/error-report.h"
  22 #include "qemu/main-loop.h"
  23 #include "qemu/sockets.h"
  24 #include "sysemu/runstate.h"
  25 #include "sysemu/cryptodev.h"
  26 #include "migration/migration.h"
  27 #include "migration/postcopy-ram.h"
  28 #include "trace.h"
  29 #include "exec/ramblock.h"
  30
  31 #include <sys/ioctl.h>
  32 #include <sys/socket.h>
  33 #include <sys/un.h>
  34
  35 #include "standard-headers/linux/vhost_types.h"
  36
  37 #ifdef CONFIG_LINUX
  38 #include <linux/userfaultfd.h>
  39 #endif
  40
  41 #define VHOST_MEMORY_BASELINE_NREGIONS    8
  42 #define VHOST_USER_F_PROTOCOL_FEATURES 30
  43 #define VHOST_USER_BACKEND_MAX_FDS     8
  44
  45 #if defined(TARGET_PPC) || defined(TARGET_PPC64)
  46 #include "hw/ppc/spapr.h"
  47 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
  48
  49 #else
  50 #define VHOST_USER_MAX_RAM_SLOTS 512
  51 #endif
  52
  53 /*
  54  * Maximum size of virtio device config space
  55  */
  56 #define VHOST_USER_MAX_CONFIG_SIZE 256
  57
  58 enum VhostUserProtocolFeature {
  59     VHOST_USER_PROTOCOL_F_MQ = 0,
  60     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  61     VHOST_USER_PROTOCOL_F_RARP = 2,
  62     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  63     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  64     VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5,
  65     VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  66     VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  67     VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  68     VHOST_USER_PROTOCOL_F_CONFIG = 9,
  69     VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10,
  70     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  71     VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
  72     VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
  73     /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
  74     VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
  75     VHOST_USER_PROTOCOL_F_STATUS = 16,
  76     VHOST_USER_PROTOCOL_F_MAX
  77 };
  78
  79 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
  80
  81 typedef enum VhostUserRequest {
  82     VHOST_USER_NONE = 0,
  83     VHOST_USER_GET_FEATURES = 1,
  84     VHOST_USER_SET_FEATURES = 2,
  85     VHOST_USER_SET_OWNER = 3,
  86     VHOST_USER_RESET_OWNER = 4,
  87     VHOST_USER_SET_MEM_TABLE = 5,
  88     VHOST_USER_SET_LOG_BASE = 6,
  89     VHOST_USER_SET_LOG_FD = 7,
  90     VHOST_USER_SET_VRING_NUM = 8,
  91     VHOST_USER_SET_VRING_ADDR = 9,
  92     VHOST_USER_SET_VRING_BASE = 10,
  93     VHOST_USER_GET_VRING_BASE = 11,
  94     VHOST_USER_SET_VRING_KICK = 12,
  95     VHOST_USER_SET_VRING_CALL = 13,
  96     VHOST_USER_SET_VRING_ERR = 14,
  97     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
  98     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
  99     VHOST_USER_GET_QUEUE_NUM = 17,
 100     VHOST_USER_SET_VRING_ENABLE = 18,
 101     VHOST_USER_SEND_RARP = 19,
 102     VHOST_USER_NET_SET_MTU = 20,
 103     VHOST_USER_SET_BACKEND_REQ_FD = 21,
 104     VHOST_USER_IOTLB_MSG = 22,
 105     VHOST_USER_SET_VRING_ENDIAN = 23,
 106     VHOST_USER_GET_CONFIG = 24,
 107     VHOST_USER_SET_CONFIG = 25,
 108     VHOST_USER_CREATE_CRYPTO_SESSION = 26,
 109     VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
 110     VHOST_USER_POSTCOPY_ADVISE  = 28,
 111     VHOST_USER_POSTCOPY_LISTEN  = 29,
 112     VHOST_USER_POSTCOPY_END     = 30,
 113     VHOST_USER_GET_INFLIGHT_FD = 31,
 114     VHOST_USER_SET_INFLIGHT_FD = 32,
 115     VHOST_USER_GPU_SET_SOCKET = 33,
 116     VHOST_USER_RESET_DEVICE = 34,
 117     /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
 118     VHOST_USER_GET_MAX_MEM_SLOTS = 36,
 119     VHOST_USER_ADD_MEM_REG = 37,
 120     VHOST_USER_REM_MEM_REG = 38,
 121     VHOST_USER_SET_STATUS = 39,
 122     VHOST_USER_GET_STATUS = 40,
 123     VHOST_USER_MAX
 124 } VhostUserRequest;
 125
 126 typedef enum VhostUserSlaveRequest {
 127     VHOST_USER_BACKEND_NONE = 0,
 128     VHOST_USER_BACKEND_IOTLB_MSG = 1,
 129     VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2,
 130     VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3,
 131     VHOST_USER_BACKEND_MAX
 132 }  VhostUserSlaveRequest;
 133
 134 typedef struct VhostUserMemoryRegion {
 135     uint64_t guest_phys_addr;
 136     uint64_t memory_size;
 137     uint64_t userspace_addr;
 138     uint64_t mmap_offset;
 139 } VhostUserMemoryRegion;
 140
 141 typedef struct VhostUserMemory {
 142     uint32_t nregions;
 143     uint32_t padding;
 144     VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
 145 } VhostUserMemory;
 146
 147 typedef struct VhostUserMemRegMsg {
 148     uint64_t padding;
 149     VhostUserMemoryRegion region;
 150 } VhostUserMemRegMsg;
 151
 152 typedef struct VhostUserLog {
 153     uint64_t mmap_size;
 154     uint64_t mmap_offset;
 155 } VhostUserLog;
 156
 157 typedef struct VhostUserConfig {
 158     uint32_t offset;
 159     uint32_t size;
 160     uint32_t flags;
 161     uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
 162 } VhostUserConfig;
 163
 164 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
 165 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
 166
 167 typedef struct VhostUserCryptoSession {
 168     /* session id for success, -1 on errors */
 169     int64_t session_id;
 170     CryptoDevBackendSymSessionInfo session_setup_data;
 171     uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
 172     uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
 173 } VhostUserCryptoSession;
 174
 175 static VhostUserConfig c __attribute__ ((unused));
 176 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
 177                                    + sizeof(c.size) \
 178                                    + sizeof(c.flags))
 179
 180 typedef struct VhostUserVringArea {
 181     uint64_t u64;
 182     uint64_t size;
 183     uint64_t offset;
 184 } VhostUserVringArea;
 185
 186 typedef struct VhostUserInflight {
 187     uint64_t mmap_size;
 188     uint64_t mmap_offset;
 189     uint16_t num_queues;
 190     uint16_t queue_size;
 191 } VhostUserInflight;
 192
 193 typedef struct {
 194     VhostUserRequest request;
 195
 196 #define VHOST_USER_VERSION_MASK     (0x3)
 197 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
 198 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 199     uint32_t flags;
 200     uint32_t size; /* the following payload size */
 201 } QEMU_PACKED VhostUserHeader;
 202
 203 typedef union {
 204 #define VHOST_USER_VRING_IDX_MASK   (0xff)
 205 #define VHOST_USER_VRING_NOFD_MASK  (0x1 << 8)
 206         uint64_t u64;
 207         struct vhost_vring_state state;
 208         struct vhost_vring_addr addr;
 209         VhostUserMemory memory;
 210         VhostUserMemRegMsg mem_reg;
 211         VhostUserLog log;
 212         struct vhost_iotlb_msg iotlb;
 213         VhostUserConfig config;
 214         VhostUserCryptoSession session;
 215         VhostUserVringArea area;
 216         VhostUserInflight inflight;
 217 } VhostUserPayload;
 218
 219 typedef struct VhostUserMsg {
 220     VhostUserHeader hdr;
 221     VhostUserPayload payload;
 222 } QEMU_PACKED VhostUserMsg;
 223
 224 static VhostUserMsg m __attribute__ ((unused));
 225 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
 226
 227 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
 228
 229 /* The version of the protocol we support */
 230 #define VHOST_USER_VERSION    (0x1)
 231
 232 struct vhost_user {
 233     struct vhost_dev *dev;
 234     /* Shared between vhost devs of the same virtio device */
 235     VhostUserState *user;
 236     QIOChannel *slave_ioc;
 237     GSource *slave_src;
 238     NotifierWithReturn postcopy_notifier;
 239     struct PostCopyFD  postcopy_fd;
 240     uint64_t           postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
 241     /* Length of the region_rb and region_rb_offset arrays */
 242     size_t             region_rb_len;
 243     /* RAMBlock associated with a given region */
 244     RAMBlock         **region_rb;
 245     /*
 246      * The offset from the start of the RAMBlock to the start of the
 247      * vhost region.
 248      */
 249     ram_addr_t        *region_rb_offset;
 250
 251     /* True once we've entered postcopy_listen */
 252     bool               postcopy_listen;
 253
 254     /* Our current regions */
 255     int num_shadow_regions;
 256     struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
 257 };
 258
 259 struct scrub_regions {
 260     struct vhost_memory_region *region;
 261     int reg_idx;
 262     int fd_idx;
 263 };
 264
 265 static bool ioeventfd_enabled(void)
 266 {
 267     return !kvm_enabled() || kvm_eventfds_enabled();
 268 }
 269
 270 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
 271 {
 272     struct vhost_user *u = dev->opaque;
 273     CharBackend *chr = u->user->chr;
 274     uint8_t *p = (uint8_t *) msg;
 275     int r, size = VHOST_USER_HDR_SIZE;
 276
 277     r = qemu_chr_fe_read_all(chr, p, size);
 278     if (r != size) {
 279         int saved_errno = errno;
 280         error_report("Failed to read msg header. Read %d instead of %d."
 281                      " Original request %d.", r, size, msg->hdr.request);
 282         return r < 0 ? -saved_errno : -EIO;
 283     }
 284
 285     /* validate received flags */
 286     if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
 287         error_report("Failed to read msg header."
 288                 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
 289                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
 290         return -EPROTO;
 291     }
 292
 293     trace_vhost_user_read(msg->hdr.request, msg->hdr.flags);
 294
 295     return 0;
 296 }
 297
 298 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 299 {
 300     struct vhost_user *u = dev->opaque;
 301     CharBackend *chr = u->user->chr;
 302     uint8_t *p = (uint8_t *) msg;
 303     int r, size;
 304
 305     r = vhost_user_read_header(dev, msg);
 306     if (r < 0) {
 307         return r;
 308     }
 309
 310     /* validate message size is sane */
 311     if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 312         error_report("Failed to read msg header."
 313                 " Size %d exceeds the maximum %zu.", msg->hdr.size,
 314                 VHOST_USER_PAYLOAD_SIZE);
 315         return -EPROTO;
 316     }
 317
 318     if (msg->hdr.size) {
 319         p += VHOST_USER_HDR_SIZE;
 320         size = msg->hdr.size;
 321         r = qemu_chr_fe_read_all(chr, p, size);
 322         if (r != size) {
 323             int saved_errno = errno;
 324             error_report("Failed to read msg payload."
 325                          " Read %d instead of %d.", r, msg->hdr.size);
 326             return r < 0 ? -saved_errno : -EIO;
 327         }
 328     }
 329
 330     return 0;
 331 }
 332
 333 static int process_message_reply(struct vhost_dev *dev,
 334                                  const VhostUserMsg *msg)
 335 {
 336     int ret;
 337     VhostUserMsg msg_reply;
 338
 339     if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
 340         return 0;
 341     }
 342
 343     ret = vhost_user_read(dev, &msg_reply);
 344     if (ret < 0) {
 345         return ret;
 346     }
 347
 348     if (msg_reply.hdr.request != msg->hdr.request) {
 349         error_report("Received unexpected msg type. "
 350                      "Expected %d received %d",
 351                      msg->hdr.request, msg_reply.hdr.request);
 352         return -EPROTO;
 353     }
 354
 355     return msg_reply.payload.u64 ? -EIO : 0;
 356 }
 357
 358 static bool vhost_user_one_time_request(VhostUserRequest request)
 359 {
 360     switch (request) {
 361     case VHOST_USER_SET_OWNER:
 362     case VHOST_USER_RESET_OWNER:
 363     case VHOST_USER_SET_MEM_TABLE:
 364     case VHOST_USER_GET_QUEUE_NUM:
 365     case VHOST_USER_NET_SET_MTU:
 366     case VHOST_USER_ADD_MEM_REG:
 367     case VHOST_USER_REM_MEM_REG:
 368         return true;
 369     default:
 370         return false;
 371     }
 372 }
 373
 374 /* most non-init callers ignore the error */
 375 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
 376                             int *fds, int fd_num)
 377 {
 378     struct vhost_user *u = dev->opaque;
 379     CharBackend *chr = u->user->chr;
 380     int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
 381
 382     /*
 383      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
 384      * we just need send it once in the first time. For later such
 385      * request, we just ignore it.
 386      */
 387     if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
 388         msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
 389         return 0;
 390     }
 391
 392     if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
 393         error_report("Failed to set msg fds.");
 394         return -EINVAL;
 395     }
 396
 397     ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
 398     if (ret != size) {
 399         int saved_errno = errno;
 400         error_report("Failed to write msg."
 401                      " Wrote %d instead of %d.", ret, size);
 402         return ret < 0 ? -saved_errno : -EIO;
 403     }
 404
 405     trace_vhost_user_write(msg->hdr.request, msg->hdr.flags);
 406
 407     return 0;
 408 }
 409
 410 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
 411 {
 412     VhostUserMsg msg = {
 413         .hdr.request = VHOST_USER_GPU_SET_SOCKET,
 414         .hdr.flags = VHOST_USER_VERSION,
 415     };
 416
 417     return vhost_user_write(dev, &msg, &fd, 1);
 418 }
 419
 420 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
 421                                    struct vhost_log *log)
 422 {
 423     int fds[VHOST_USER_MAX_RAM_SLOTS];
 424     size_t fd_num = 0;
 425     bool shmfd = virtio_has_feature(dev->protocol_features,
 426                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
 427     int ret;
 428     VhostUserMsg msg = {
 429         .hdr.request = VHOST_USER_SET_LOG_BASE,
 430         .hdr.flags = VHOST_USER_VERSION,
 431         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
 432         .payload.log.mmap_offset = 0,
 433         .hdr.size = sizeof(msg.payload.log),
 434     };
 435
 436     /* Send only once with first queue pair */
 437     if (dev->vq_index != 0) {
 438         return 0;
 439     }
 440
 441     if (shmfd && log->fd != -1) {
 442         fds[fd_num++] = log->fd;
 443     }
 444
 445     ret = vhost_user_write(dev, &msg, fds, fd_num);
 446     if (ret < 0) {
 447         return ret;
 448     }
 449
 450     if (shmfd) {
 451         msg.hdr.size = 0;
 452         ret = vhost_user_read(dev, &msg);
 453         if (ret < 0) {
 454             return ret;
 455         }
 456
 457         if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
 458             error_report("Received unexpected msg type. "
 459                          "Expected %d received %d",
 460                          VHOST_USER_SET_LOG_BASE, msg.hdr.request);
 461             return -EPROTO;
 462         }
 463     }
 464
 465     return 0;
 466 }
 467
 468 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
 469                                             int *fd)
 470 {
 471     MemoryRegion *mr;
 472
 473     assert((uintptr_t)addr == addr);
 474     mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
 475     *fd = memory_region_get_fd(mr);
 476     *offset += mr->ram_block->fd_offset;
 477
 478     return mr;
 479 }
 480
 481 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
 482                                        struct vhost_memory_region *src,
 483                                        uint64_t mmap_offset)
 484 {
 485     assert(src != NULL && dst != NULL);
 486     dst->userspace_addr = src->userspace_addr;
 487     dst->memory_size = src->memory_size;
 488     dst->guest_phys_addr = src->guest_phys_addr;
 489     dst->mmap_offset = mmap_offset;
 490 }
 491
 492 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
 493                                              struct vhost_dev *dev,
 494                                              VhostUserMsg *msg,
 495                                              int *fds, size_t *fd_num,
 496                                              bool track_ramblocks)
 497 {
 498     int i, fd;
 499     ram_addr_t offset;
 500     MemoryRegion *mr;
 501     struct vhost_memory_region *reg;
 502     VhostUserMemoryRegion region_buffer;
 503
 504     msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
 505
 506     for (i = 0; i < dev->mem->nregions; ++i) {
 507         reg = dev->mem->regions + i;
 508
 509         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 510         if (fd > 0) {
 511             if (track_ramblocks) {
 512                 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
 513                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
 514                                                       reg->memory_size,
 515                                                       reg->guest_phys_addr,
 516                                                       reg->userspace_addr,
 517                                                       offset);
 518                 u->region_rb_offset[i] = offset;
 519                 u->region_rb[i] = mr->ram_block;
 520             } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
 521                 error_report("Failed preparing vhost-user memory table msg");
 522                 return -ENOBUFS;
 523             }
 524             vhost_user_fill_msg_region(&region_buffer, reg, offset);
 525             msg->payload.memory.regions[*fd_num] = region_buffer;
 526             fds[(*fd_num)++] = fd;
 527         } else if (track_ramblocks) {
 528             u->region_rb_offset[i] = 0;
 529             u->region_rb[i] = NULL;
 530         }
 531     }
 532
 533     msg->payload.memory.nregions = *fd_num;
 534
 535     if (!*fd_num) {
 536         error_report("Failed initializing vhost-user memory map, "
 537                      "consider using -object memory-backend-file share=on");
 538         return -EINVAL;
 539     }
 540
 541     msg->hdr.size = sizeof(msg->payload.memory.nregions);
 542     msg->hdr.size += sizeof(msg->payload.memory.padding);
 543     msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
 544
 545     return 0;
 546 }
 547
 548 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
 549                              struct vhost_memory_region *vdev_reg)
 550 {
 551     return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
 552         shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
 553         shadow_reg->memory_size == vdev_reg->memory_size;
 554 }
 555
 556 static void scrub_shadow_regions(struct vhost_dev *dev,
 557                                  struct scrub_regions *add_reg,
 558                                  int *nr_add_reg,
 559                                  struct scrub_regions *rem_reg,
 560                                  int *nr_rem_reg, uint64_t *shadow_pcb,
 561                                  bool track_ramblocks)
 562 {
 563     struct vhost_user *u = dev->opaque;
 564     bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
 565     struct vhost_memory_region *reg, *shadow_reg;
 566     int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
 567     ram_addr_t offset;
 568     MemoryRegion *mr;
 569     bool matching;
 570
 571     /*
 572      * Find memory regions present in our shadow state which are not in
 573      * the device's current memory state.
 574      *
 575      * Mark regions in both the shadow and device state as "found".
 576      */
 577     for (i = 0; i < u->num_shadow_regions; i++) {
 578         shadow_reg = &u->shadow_regions[i];
 579         matching = false;
 580
 581         for (j = 0; j < dev->mem->nregions; j++) {
 582             reg = &dev->mem->regions[j];
 583
 584             mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 585
 586             if (reg_equal(shadow_reg, reg)) {
 587                 matching = true;
 588                 found[j] = true;
 589                 if (track_ramblocks) {
 590                     /*
 591                      * Reset postcopy client bases, region_rb, and
 592                      * region_rb_offset in case regions are removed.
 593                      */
 594                     if (fd > 0) {
 595                         u->region_rb_offset[j] = offset;
 596                         u->region_rb[j] = mr->ram_block;
 597                         shadow_pcb[j] = u->postcopy_client_bases[i];
 598                     } else {
 599                         u->region_rb_offset[j] = 0;
 600                         u->region_rb[j] = NULL;
 601                     }
 602                 }
 603                 break;
 604             }
 605         }
 606
 607         /*
 608          * If the region was not found in the current device memory state
 609          * create an entry for it in the removed list.
 610          */
 611         if (!matching) {
 612             rem_reg[rm_idx].region = shadow_reg;
 613             rem_reg[rm_idx++].reg_idx = i;
 614         }
 615     }
 616
 617     /*
 618      * For regions not marked "found", create entries in the added list.
 619      *
 620      * Note their indexes in the device memory state and the indexes of their
 621      * file descriptors.
 622      */
 623     for (i = 0; i < dev->mem->nregions; i++) {
 624         reg = &dev->mem->regions[i];
 625         vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 626         if (fd > 0) {
 627             ++fd_num;
 628         }
 629
 630         /*
 631          * If the region was in both the shadow and device state we don't
 632          * need to send a VHOST_USER_ADD_MEM_REG message for it.
 633          */
 634         if (found[i]) {
 635             continue;
 636         }
 637
 638         add_reg[add_idx].region = reg;
 639         add_reg[add_idx].reg_idx = i;
 640         add_reg[add_idx++].fd_idx = fd_num;
 641     }
 642     *nr_rem_reg = rm_idx;
 643     *nr_add_reg = add_idx;
 644
 645     return;
 646 }
 647
 648 static int send_remove_regions(struct vhost_dev *dev,
 649                                struct scrub_regions *remove_reg,
 650                                int nr_rem_reg, VhostUserMsg *msg,
 651                                bool reply_supported)
 652 {
 653     struct vhost_user *u = dev->opaque;
 654     struct vhost_memory_region *shadow_reg;
 655     int i, fd, shadow_reg_idx, ret;
 656     ram_addr_t offset;
 657     VhostUserMemoryRegion region_buffer;
 658
 659     /*
 660      * The regions in remove_reg appear in the same order they do in the
 661      * shadow table. Therefore we can minimize memory copies by iterating
 662      * through remove_reg backwards.
 663      */
 664     for (i = nr_rem_reg - 1; i >= 0; i--) {
 665         shadow_reg = remove_reg[i].region;
 666         shadow_reg_idx = remove_reg[i].reg_idx;
 667
 668         vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
 669
 670         if (fd > 0) {
 671             msg->hdr.request = VHOST_USER_REM_MEM_REG;
 672             vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
 673             msg->payload.mem_reg.region = region_buffer;
 674
 675             ret = vhost_user_write(dev, msg, NULL, 0);
 676             if (ret < 0) {
 677                 return ret;
 678             }
 679
 680             if (reply_supported) {
 681                 ret = process_message_reply(dev, msg);
 682                 if (ret) {
 683                     return ret;
 684                 }
 685             }
 686         }
 687
 688         /*
 689          * At this point we know the backend has unmapped the region. It is now
 690          * safe to remove it from the shadow table.
 691          */
 692         memmove(&u->shadow_regions[shadow_reg_idx],
 693                 &u->shadow_regions[shadow_reg_idx + 1],
 694                 sizeof(struct vhost_memory_region) *
 695                 (u->num_shadow_regions - shadow_reg_idx - 1));
 696         u->num_shadow_regions--;
 697     }
 698
 699     return 0;
 700 }
 701
 702 static int send_add_regions(struct vhost_dev *dev,
 703                             struct scrub_regions *add_reg, int nr_add_reg,
 704                             VhostUserMsg *msg, uint64_t *shadow_pcb,
 705                             bool reply_supported, bool track_ramblocks)
 706 {
 707     struct vhost_user *u = dev->opaque;
 708     int i, fd, ret, reg_idx, reg_fd_idx;
 709     struct vhost_memory_region *reg;
 710     MemoryRegion *mr;
 711     ram_addr_t offset;
 712     VhostUserMsg msg_reply;
 713     VhostUserMemoryRegion region_buffer;
 714
 715     for (i = 0; i < nr_add_reg; i++) {
 716         reg = add_reg[i].region;
 717         reg_idx = add_reg[i].reg_idx;
 718         reg_fd_idx = add_reg[i].fd_idx;
 719
 720         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 721
 722         if (fd > 0) {
 723             if (track_ramblocks) {
 724                 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
 725                                                       reg->memory_size,
 726                                                       reg->guest_phys_addr,
 727                                                       reg->userspace_addr,
 728                                                       offset);
 729                 u->region_rb_offset[reg_idx] = offset;
 730                 u->region_rb[reg_idx] = mr->ram_block;
 731             }
 732             msg->hdr.request = VHOST_USER_ADD_MEM_REG;
 733             vhost_user_fill_msg_region(&region_buffer, reg, offset);
 734             msg->payload.mem_reg.region = region_buffer;
 735
 736             ret = vhost_user_write(dev, msg, &fd, 1);
 737             if (ret < 0) {
 738                 return ret;
 739             }
 740
 741             if (track_ramblocks) {
 742                 uint64_t reply_gpa;
 743
 744                 ret = vhost_user_read(dev, &msg_reply);
 745                 if (ret < 0) {
 746                     return ret;
 747                 }
 748
 749                 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
 750
 751                 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
 752                     error_report("%s: Received unexpected msg type."
 753                                  "Expected %d received %d", __func__,
 754                                  VHOST_USER_ADD_MEM_REG,
 755                                  msg_reply.hdr.request);
 756                     return -EPROTO;
 757                 }
 758
 759                 /*
 760                  * We're using the same structure, just reusing one of the
 761                  * fields, so it should be the same size.
 762                  */
 763                 if (msg_reply.hdr.size != msg->hdr.size) {
 764                     error_report("%s: Unexpected size for postcopy reply "
 765                                  "%d vs %d", __func__, msg_reply.hdr.size,
 766                                  msg->hdr.size);
 767                     return -EPROTO;
 768                 }
 769
 770                 /* Get the postcopy client base from the backend's reply. */
 771                 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
 772                     shadow_pcb[reg_idx] =
 773                         msg_reply.payload.mem_reg.region.userspace_addr;
 774                     trace_vhost_user_set_mem_table_postcopy(
 775                         msg_reply.payload.mem_reg.region.userspace_addr,
 776                         msg->payload.mem_reg.region.userspace_addr,
 777                         reg_fd_idx, reg_idx);
 778                 } else {
 779                     error_report("%s: invalid postcopy reply for region. "
 780                                  "Got guest physical address %" PRIX64 ", expected "
 781                                  "%" PRIX64, __func__, reply_gpa,
 782                                  dev->mem->regions[reg_idx].guest_phys_addr);
 783                     return -EPROTO;
 784                 }
 785             } else if (reply_supported) {
 786                 ret = process_message_reply(dev, msg);
 787                 if (ret) {
 788                     return ret;
 789                 }
 790             }
 791         } else if (track_ramblocks) {
 792             u->region_rb_offset[reg_idx] = 0;
 793             u->region_rb[reg_idx] = NULL;
 794         }
 795
 796         /*
 797          * At this point, we know the backend has mapped in the new
 798          * region, if the region has a valid file descriptor.
 799          *
 800          * The region should now be added to the shadow table.
 801          */
 802         u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
 803             reg->guest_phys_addr;
 804         u->shadow_regions[u->num_shadow_regions].userspace_addr =
 805             reg->userspace_addr;
 806         u->shadow_regions[u->num_shadow_regions].memory_size =
 807             reg->memory_size;
 808         u->num_shadow_regions++;
 809     }
 810
 811     return 0;
 812 }
 813
 814 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
 815                                          VhostUserMsg *msg,
 816                                          bool reply_supported,
 817                                          bool track_ramblocks)
 818 {
 819     struct vhost_user *u = dev->opaque;
 820     struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
 821     struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
 822     uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
 823     int nr_add_reg, nr_rem_reg;
 824     int ret;
 825
 826     msg->hdr.size = sizeof(msg->payload.mem_reg);
 827
 828     /* Find the regions which need to be removed or added. */
 829     scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
 830                          shadow_pcb, track_ramblocks);
 831
 832     if (nr_rem_reg) {
 833         ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
 834                                   reply_supported);
 835         if (ret < 0) {
 836             goto err;
 837         }
 838     }
 839
 840     if (nr_add_reg) {
 841         ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
 842                                reply_supported, track_ramblocks);
 843         if (ret < 0) {
 844             goto err;
 845         }
 846     }
 847
 848     if (track_ramblocks) {
 849         memcpy(u->postcopy_client_bases, shadow_pcb,
 850                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
 851         /*
 852          * Now we've registered this with the postcopy code, we ack to the
 853          * client, because now we're in the position to be able to deal with
 854          * any faults it generates.
 855          */
 856         /* TODO: Use this for failure cases as well with a bad value. */
 857         msg->hdr.size = sizeof(msg->payload.u64);
 858         msg->payload.u64 = 0; /* OK */
 859
 860         ret = vhost_user_write(dev, msg, NULL, 0);
 861         if (ret < 0) {
 862             return ret;
 863         }
 864     }
 865
 866     return 0;
 867
 868 err:
 869     if (track_ramblocks) {
 870         memcpy(u->postcopy_client_bases, shadow_pcb,
 871                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
 872     }
 873
 874     return ret;
 875 }
 876
 877 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
 878                                              struct vhost_memory *mem,
 879                                              bool reply_supported,
 880                                              bool config_mem_slots)
 881 {
 882     struct vhost_user *u = dev->opaque;
 883     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
 884     size_t fd_num = 0;
 885     VhostUserMsg msg_reply;
 886     int region_i, msg_i;
 887     int ret;
 888
 889     VhostUserMsg msg = {
 890         .hdr.flags = VHOST_USER_VERSION,
 891     };
 892
 893     if (u->region_rb_len < dev->mem->nregions) {
 894         u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
 895         u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
 896                                       dev->mem->nregions);
 897         memset(&(u->region_rb[u->region_rb_len]), '\0',
 898                sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
 899         memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
 900                sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
 901         u->region_rb_len = dev->mem->nregions;
 902     }
 903
 904     if (config_mem_slots) {
 905         ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
 906         if (ret < 0) {
 907             return ret;
 908         }
 909     } else {
 910         ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
 911                                                 true);
 912         if (ret < 0) {
 913             return ret;
 914         }
 915
 916         ret = vhost_user_write(dev, &msg, fds, fd_num);
 917         if (ret < 0) {
 918             return ret;
 919         }
 920
 921         ret = vhost_user_read(dev, &msg_reply);
 922         if (ret < 0) {
 923             return ret;
 924         }
 925
 926         if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
 927             error_report("%s: Received unexpected msg type."
 928                          "Expected %d received %d", __func__,
 929                          VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
 930             return -EPROTO;
 931         }
 932
 933         /*
 934          * We're using the same structure, just reusing one of the
 935          * fields, so it should be the same size.
 936          */
 937         if (msg_reply.hdr.size != msg.hdr.size) {
 938             error_report("%s: Unexpected size for postcopy reply "
 939                          "%d vs %d", __func__, msg_reply.hdr.size,
 940                          msg.hdr.size);
 941             return -EPROTO;
 942         }
 943
 944         memset(u->postcopy_client_bases, 0,
 945                sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
 946
 947         /*
 948          * They're in the same order as the regions that were sent
 949          * but some of the regions were skipped (above) if they
 950          * didn't have fd's
 951          */
 952         for (msg_i = 0, region_i = 0;
 953              region_i < dev->mem->nregions;
 954              region_i++) {
 955             if (msg_i < fd_num &&
 956                 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
 957                 dev->mem->regions[region_i].guest_phys_addr) {
 958                 u->postcopy_client_bases[region_i] =
 959                     msg_reply.payload.memory.regions[msg_i].userspace_addr;
 960                 trace_vhost_user_set_mem_table_postcopy(
 961                     msg_reply.payload.memory.regions[msg_i].userspace_addr,
 962                     msg.payload.memory.regions[msg_i].userspace_addr,
 963                     msg_i, region_i);
 964                 msg_i++;
 965             }
 966         }
 967         if (msg_i != fd_num) {
 968             error_report("%s: postcopy reply not fully consumed "
 969                          "%d vs %zd",
 970                          __func__, msg_i, fd_num);
 971             return -EIO;
 972         }
 973
 974         /*
 975          * Now we've registered this with the postcopy code, we ack to the
 976          * client, because now we're in the position to be able to deal
 977          * with any faults it generates.
 978          */
 979         /* TODO: Use this for failure cases as well with a bad value. */
 980         msg.hdr.size = sizeof(msg.payload.u64);
 981         msg.payload.u64 = 0; /* OK */
 982         ret = vhost_user_write(dev, &msg, NULL, 0);
 983         if (ret < 0) {
 984             return ret;
 985         }
 986     }
 987
 988     return 0;
 989 }
 990
 991 static int vhost_user_set_mem_table(struct vhost_dev *dev,
 992                                     struct vhost_memory *mem)
 993 {
 994     struct vhost_user *u = dev->opaque;
 995     int fds[VHOST_MEMORY_BASELINE_NREGIONS];
 996     size_t fd_num = 0;
 997     bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
 998     bool reply_supported = virtio_has_feature(dev->protocol_features,
 999                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1000     bool config_mem_slots =
1001         virtio_has_feature(dev->protocol_features,
1002                            VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
1003     int ret;
1004
1005     if (do_postcopy) {
1006         /*
1007          * Postcopy has enough differences that it's best done in it's own
1008          * version
1009          */
1010         return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1011                                                  config_mem_slots);
1012     }
1013
1014     VhostUserMsg msg = {
1015         .hdr.flags = VHOST_USER_VERSION,
1016     };
1017
1018     if (reply_supported) {
1019         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1020     }
1021
1022     if (config_mem_slots) {
1023         ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1024         if (ret < 0) {
1025             return ret;
1026         }
1027     } else {
1028         ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1029                                                 false);
1030         if (ret < 0) {
1031             return ret;
1032         }
1033
1034         ret = vhost_user_write(dev, &msg, fds, fd_num);
1035         if (ret < 0) {
1036             return ret;
1037         }
1038
1039         if (reply_supported) {
1040             return process_message_reply(dev, &msg);
1041         }
1042     }
1043
1044     return 0;
1045 }
1046
1047 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1048                                        struct vhost_vring_state *ring)
1049 {
1050     bool cross_endian = virtio_has_feature(dev->protocol_features,
1051                                            VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1052     VhostUserMsg msg = {
1053         .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1054         .hdr.flags = VHOST_USER_VERSION,
1055         .payload.state = *ring,
1056         .hdr.size = sizeof(msg.payload.state),
1057     };
1058
1059     if (!cross_endian) {
1060         error_report("vhost-user trying to send unhandled ioctl");
1061         return -ENOTSUP;
1062     }
1063
1064     return vhost_user_write(dev, &msg, NULL, 0);
1065 }
1066
1067 static int vhost_set_vring(struct vhost_dev *dev,
1068                            unsigned long int request,
1069                            struct vhost_vring_state *ring)
1070 {
1071     VhostUserMsg msg = {
1072         .hdr.request = request,
1073         .hdr.flags = VHOST_USER_VERSION,
1074         .payload.state = *ring,
1075         .hdr.size = sizeof(msg.payload.state),
1076     };
1077
1078     return vhost_user_write(dev, &msg, NULL, 0);
1079 }
1080
1081 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1082                                     struct vhost_vring_state *ring)
1083 {
1084     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1085 }
1086
1087 static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
1088 {
1089     assert(n && n->unmap_addr);
1090     munmap(n->unmap_addr, qemu_real_host_page_size());
1091     n->unmap_addr = NULL;
1092 }
1093
1094 /*
1095  * clean-up function for notifier, will finally free the structure
1096  * under rcu.
1097  */
1098 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
1099                                             VirtIODevice *vdev)
1100 {
1101     if (n->addr) {
1102         if (vdev) {
1103             virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false);
1104         }
1105         assert(!n->unmap_addr);
1106         n->unmap_addr = n->addr;
1107         n->addr = NULL;
1108         call_rcu(n, vhost_user_host_notifier_free, rcu);
1109     }
1110 }
1111
1112 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1113                                      struct vhost_vring_state *ring)
1114 {
1115     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1116 }
1117
1118 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1119 {
1120     int i;
1121
1122     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1123         return -EINVAL;
1124     }
1125
1126     for (i = 0; i < dev->nvqs; ++i) {
1127         int ret;
1128         struct vhost_vring_state state = {
1129             .index = dev->vq_index + i,
1130             .num   = enable,
1131         };
1132
1133         ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1134         if (ret < 0) {
1135             /*
1136              * Restoring the previous state is likely infeasible, as well as
1137              * proceeding regardless the error, so just bail out and hope for
1138              * the device-level recovery.
1139              */
1140             return ret;
1141         }
1142     }
1143
1144     return 0;
1145 }
1146
1147 static VhostUserHostNotifier *fetch_notifier(VhostUserState *u,
1148                                              int idx)
1149 {
1150     if (idx >= u->notifiers->len) {
1151         return NULL;
1152     }
1153     return g_ptr_array_index(u->notifiers, idx);
1154 }
1155
1156 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1157                                      struct vhost_vring_state *ring)
1158 {
1159     int ret;
1160     VhostUserMsg msg = {
1161         .hdr.request = VHOST_USER_GET_VRING_BASE,
1162         .hdr.flags = VHOST_USER_VERSION,
1163         .payload.state = *ring,
1164         .hdr.size = sizeof(msg.payload.state),
1165     };
1166     struct vhost_user *u = dev->opaque;
1167
1168     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
1169     if (n) {
1170         vhost_user_host_notifier_remove(n, dev->vdev);
1171     }
1172
1173     ret = vhost_user_write(dev, &msg, NULL, 0);
1174     if (ret < 0) {
1175         return ret;
1176     }
1177
1178     ret = vhost_user_read(dev, &msg);
1179     if (ret < 0) {
1180         return ret;
1181     }
1182
1183     if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1184         error_report("Received unexpected msg type. Expected %d received %d",
1185                      VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1186         return -EPROTO;
1187     }
1188
1189     if (msg.hdr.size != sizeof(msg.payload.state)) {
1190         error_report("Received bad msg size.");
1191         return -EPROTO;
1192     }
1193
1194     *ring = msg.payload.state;
1195
1196     return 0;
1197 }
1198
1199 static int vhost_set_vring_file(struct vhost_dev *dev,
1200                                 VhostUserRequest request,
1201                                 struct vhost_vring_file *file)
1202 {
1203     int fds[VHOST_USER_MAX_RAM_SLOTS];
1204     size_t fd_num = 0;
1205     VhostUserMsg msg = {
1206         .hdr.request = request,
1207         .hdr.flags = VHOST_USER_VERSION,
1208         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1209         .hdr.size = sizeof(msg.payload.u64),
1210     };
1211
1212     if (ioeventfd_enabled() && file->fd > 0) {
1213         fds[fd_num++] = file->fd;
1214     } else {
1215         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1216     }
1217
1218     return vhost_user_write(dev, &msg, fds, fd_num);
1219 }
1220
1221 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1222                                      struct vhost_vring_file *file)
1223 {
1224     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1225 }
1226
1227 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1228                                      struct vhost_vring_file *file)
1229 {
1230     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1231 }
1232
1233 static int vhost_user_set_vring_err(struct vhost_dev *dev,
1234                                     struct vhost_vring_file *file)
1235 {
1236     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file);
1237 }
1238
1239 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1240 {
1241     int ret;
1242     VhostUserMsg msg = {
1243         .hdr.request = request,
1244         .hdr.flags = VHOST_USER_VERSION,
1245     };
1246
1247     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1248         return 0;
1249     }
1250
1251     ret = vhost_user_write(dev, &msg, NULL, 0);
1252     if (ret < 0) {
1253         return ret;
1254     }
1255
1256     ret = vhost_user_read(dev, &msg);
1257     if (ret < 0) {
1258         return ret;
1259     }
1260
1261     if (msg.hdr.request != request) {
1262         error_report("Received unexpected msg type. Expected %d received %d",
1263                      request, msg.hdr.request);
1264         return -EPROTO;
1265     }
1266
1267     if (msg.hdr.size != sizeof(msg.payload.u64)) {
1268         error_report("Received bad msg size.");
1269         return -EPROTO;
1270     }
1271
1272     *u64 = msg.payload.u64;
1273
1274     return 0;
1275 }
1276
1277 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1278 {
1279     if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1280         return -EPROTO;
1281     }
1282
1283     return 0;
1284 }
1285
1286 static int enforce_reply(struct vhost_dev *dev,
1287                          const VhostUserMsg *msg)
1288 {
1289     uint64_t dummy;
1290
1291     if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1292         return process_message_reply(dev, msg);
1293     }
1294
1295    /*
1296     * We need to wait for a reply but the backend does not
1297     * support replies for the command we just sent.
1298     * Send VHOST_USER_GET_FEATURES which makes all backends
1299     * send a reply.
1300     */
1301     return vhost_user_get_features(dev, &dummy);
1302 }
1303
1304 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1305                                      struct vhost_vring_addr *addr)
1306 {
1307     int ret;
1308     VhostUserMsg msg = {
1309         .hdr.request = VHOST_USER_SET_VRING_ADDR,
1310         .hdr.flags = VHOST_USER_VERSION,
1311         .payload.addr = *addr,
1312         .hdr.size = sizeof(msg.payload.addr),
1313     };
1314
1315     bool reply_supported = virtio_has_feature(dev->protocol_features,
1316                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1317
1318     /*
1319      * wait for a reply if logging is enabled to make sure
1320      * backend is actually logging changes
1321      */
1322     bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1323
1324     if (reply_supported && wait_for_reply) {
1325         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1326     }
1327
1328     ret = vhost_user_write(dev, &msg, NULL, 0);
1329     if (ret < 0) {
1330         return ret;
1331     }
1332
1333     if (wait_for_reply) {
1334         return enforce_reply(dev, &msg);
1335     }
1336
1337     return 0;
1338 }
1339
1340 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1341                               bool wait_for_reply)
1342 {
1343     VhostUserMsg msg = {
1344         .hdr.request = request,
1345         .hdr.flags = VHOST_USER_VERSION,
1346         .payload.u64 = u64,
1347         .hdr.size = sizeof(msg.payload.u64),
1348     };
1349     int ret;
1350
1351     if (wait_for_reply) {
1352         bool reply_supported = virtio_has_feature(dev->protocol_features,
1353                                           VHOST_USER_PROTOCOL_F_REPLY_ACK);
1354         if (reply_supported) {
1355             msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1356         }
1357     }
1358
1359     ret = vhost_user_write(dev, &msg, NULL, 0);
1360     if (ret < 0) {
1361         return ret;
1362     }
1363
1364     if (wait_for_reply) {
1365         return enforce_reply(dev, &msg);
1366     }
1367
1368     return 0;
1369 }
1370
1371 static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status)
1372 {
1373     return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false);
1374 }
1375
1376 static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status)
1377 {
1378     uint64_t value;
1379     int ret;
1380
1381     ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value);
1382     if (ret < 0) {
1383         return ret;
1384     }
1385     *status = value;
1386
1387     return 0;
1388 }
1389
1390 static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status)
1391 {
1392     uint8_t s;
1393     int ret;
1394
1395     ret = vhost_user_get_status(dev, &s);
1396     if (ret < 0) {
1397         return ret;
1398     }
1399
1400     if ((s & status) == status) {
1401         return 0;
1402     }
1403     s |= status;
1404
1405     return vhost_user_set_status(dev, s);
1406 }
1407
1408 static int vhost_user_set_features(struct vhost_dev *dev,
1409                                    uint64_t features)
1410 {
1411     /*
1412      * wait for a reply if logging is enabled to make sure
1413      * backend is actually logging changes
1414      */
1415     bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
1416     int ret;
1417
1418     /*
1419      * We need to include any extra backend only feature bits that
1420      * might be needed by our device. Currently this includes the
1421      * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol
1422      * features.
1423      */
1424     ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES,
1425                               features | dev->backend_features,
1426                               log_enabled);
1427
1428     if (virtio_has_feature(dev->protocol_features,
1429                            VHOST_USER_PROTOCOL_F_STATUS)) {
1430         if (!ret) {
1431             return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
1432         }
1433     }
1434
1435     return ret;
1436 }
1437
1438 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1439                                             uint64_t features)
1440 {
1441     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1442                               false);
1443 }
1444
1445 static int vhost_user_set_owner(struct vhost_dev *dev)
1446 {
1447     VhostUserMsg msg = {
1448         .hdr.request = VHOST_USER_SET_OWNER,
1449         .hdr.flags = VHOST_USER_VERSION,
1450     };
1451
1452     return vhost_user_write(dev, &msg, NULL, 0);
1453 }
1454
1455 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1456                                        uint64_t *max_memslots)
1457 {
1458     uint64_t backend_max_memslots;
1459     int err;
1460
1461     err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1462                              &backend_max_memslots);
1463     if (err < 0) {
1464         return err;
1465     }
1466
1467     *max_memslots = backend_max_memslots;
1468
1469     return 0;
1470 }
1471
1472 static int vhost_user_reset_device(struct vhost_dev *dev)
1473 {
1474     VhostUserMsg msg = {
1475         .hdr.flags = VHOST_USER_VERSION,
1476     };
1477
1478     msg.hdr.request = virtio_has_feature(dev->protocol_features,
1479                                          VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1480         ? VHOST_USER_RESET_DEVICE
1481         : VHOST_USER_RESET_OWNER;
1482
1483     return vhost_user_write(dev, &msg, NULL, 0);
1484 }
1485
1486 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1487 {
1488     if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1489         return -ENOSYS;
1490     }
1491
1492     return dev->config_ops->vhost_dev_config_notifier(dev);
1493 }
1494
1495 /*
1496  * Fetch or create the notifier for a given idx. Newly created
1497  * notifiers are added to the pointer array that tracks them.
1498  */
1499 static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u,
1500                                                        int idx)
1501 {
1502     VhostUserHostNotifier *n = NULL;
1503     if (idx >= u->notifiers->len) {
1504         g_ptr_array_set_size(u->notifiers, idx + 1);
1505     }
1506
1507     n = g_ptr_array_index(u->notifiers, idx);
1508     if (!n) {
1509         /*
1510          * In case notification arrive out-of-order,
1511          * make room for current index.
1512          */
1513         g_ptr_array_remove_index(u->notifiers, idx);
1514         n = g_new0(VhostUserHostNotifier, 1);
1515         n->idx = idx;
1516         g_ptr_array_insert(u->notifiers, idx, n);
1517         trace_vhost_user_create_notifier(idx, n);
1518     }
1519
1520     return n;
1521 }
1522
1523 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1524                                                        VhostUserVringArea *area,
1525                                                        int fd)
1526 {
1527     int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1528     size_t page_size = qemu_real_host_page_size();
1529     struct vhost_user *u = dev->opaque;
1530     VhostUserState *user = u->user;
1531     VirtIODevice *vdev = dev->vdev;
1532     VhostUserHostNotifier *n;
1533     void *addr;
1534     char *name;
1535
1536     if (!virtio_has_feature(dev->protocol_features,
1537                             VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1538         vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1539         return -EINVAL;
1540     }
1541
1542     /*
1543      * Fetch notifier and invalidate any old data before setting up
1544      * new mapped address.
1545      */
1546     n = fetch_or_create_notifier(user, queue_idx);
1547     vhost_user_host_notifier_remove(n, vdev);
1548
1549     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1550         return 0;
1551     }
1552
1553     /* Sanity check. */
1554     if (area->size != page_size) {
1555         return -EINVAL;
1556     }
1557
1558     addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1559                 fd, area->offset);
1560     if (addr == MAP_FAILED) {
1561         return -EFAULT;
1562     }
1563
1564     name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1565                            user, queue_idx);
1566     if (!n->mr.ram) { /* Don't init again after suspend. */
1567         memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1568                                           page_size, addr);
1569     } else {
1570         n->mr.ram_block->host = addr;
1571     }
1572     g_free(name);
1573
1574     if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1575         object_unparent(OBJECT(&n->mr));
1576         munmap(addr, page_size);
1577         return -ENXIO;
1578     }
1579
1580     n->addr = addr;
1581
1582     return 0;
1583 }
1584
1585 static void close_slave_channel(struct vhost_user *u)
1586 {
1587     g_source_destroy(u->slave_src);
1588     g_source_unref(u->slave_src);
1589     u->slave_src = NULL;
1590     object_unref(OBJECT(u->slave_ioc));
1591     u->slave_ioc = NULL;
1592 }
1593
1594 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
1595                            gpointer opaque)
1596 {
1597     struct vhost_dev *dev = opaque;
1598     struct vhost_user *u = dev->opaque;
1599     VhostUserHeader hdr = { 0, };
1600     VhostUserPayload payload = { 0, };
1601     Error *local_err = NULL;
1602     gboolean rc = G_SOURCE_CONTINUE;
1603     int ret = 0;
1604     struct iovec iov;
1605     g_autofree int *fd = NULL;
1606     size_t fdsize = 0;
1607     int i;
1608
1609     /* Read header */
1610     iov.iov_base = &hdr;
1611     iov.iov_len = VHOST_USER_HDR_SIZE;
1612
1613     if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1614         error_report_err(local_err);
1615         goto err;
1616     }
1617
1618     if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1619         error_report("Failed to read msg header."
1620                 " Size %d exceeds the maximum %zu.", hdr.size,
1621                 VHOST_USER_PAYLOAD_SIZE);
1622         goto err;
1623     }
1624
1625     /* Read payload */
1626     if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1627         error_report_err(local_err);
1628         goto err;
1629     }
1630
1631     switch (hdr.request) {
1632     case VHOST_USER_BACKEND_IOTLB_MSG:
1633         ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1634         break;
1635     case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG:
1636         ret = vhost_user_slave_handle_config_change(dev);
1637         break;
1638     case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG:
1639         ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1640                                                           fd ? fd[0] : -1);
1641         break;
1642     default:
1643         error_report("Received unexpected msg type: %d.", hdr.request);
1644         ret = -EINVAL;
1645     }
1646
1647     /*
1648      * REPLY_ACK feature handling. Other reply types has to be managed
1649      * directly in their request handlers.
1650      */
1651     if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1652         struct iovec iovec[2];
1653
1654
1655         hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1656         hdr.flags |= VHOST_USER_REPLY_MASK;
1657
1658         payload.u64 = !!ret;
1659         hdr.size = sizeof(payload.u64);
1660
1661         iovec[0].iov_base = &hdr;
1662         iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1663         iovec[1].iov_base = &payload;
1664         iovec[1].iov_len = hdr.size;
1665
1666         if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1667             error_report_err(local_err);
1668             goto err;
1669         }
1670     }
1671
1672     goto fdcleanup;
1673
1674 err:
1675     close_slave_channel(u);
1676     rc = G_SOURCE_REMOVE;
1677
1678 fdcleanup:
1679     if (fd) {
1680         for (i = 0; i < fdsize; i++) {
1681             close(fd[i]);
1682         }
1683     }
1684     return rc;
1685 }
1686
1687 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1688 {
1689     VhostUserMsg msg = {
1690         .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD,
1691         .hdr.flags = VHOST_USER_VERSION,
1692     };
1693     struct vhost_user *u = dev->opaque;
1694     int sv[2], ret = 0;
1695     bool reply_supported = virtio_has_feature(dev->protocol_features,
1696                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1697     Error *local_err = NULL;
1698     QIOChannel *ioc;
1699
1700     if (!virtio_has_feature(dev->protocol_features,
1701                             VHOST_USER_PROTOCOL_F_BACKEND_REQ)) {
1702         return 0;
1703     }
1704
1705     if (qemu_socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1706         int saved_errno = errno;
1707         error_report("socketpair() failed");
1708         return -saved_errno;
1709     }
1710
1711     ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1712     if (!ioc) {
1713         error_report_err(local_err);
1714         return -ECONNREFUSED;
1715     }
1716     u->slave_ioc = ioc;
1717     u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
1718                                                 G_IO_IN | G_IO_HUP,
1719                                                 slave_read, dev, NULL, NULL);
1720
1721     if (reply_supported) {
1722         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1723     }
1724
1725     ret = vhost_user_write(dev, &msg, &sv[1], 1);
1726     if (ret) {
1727         goto out;
1728     }
1729
1730     if (reply_supported) {
1731         ret = process_message_reply(dev, &msg);
1732     }
1733
1734 out:
1735     close(sv[1]);
1736     if (ret) {
1737         close_slave_channel(u);
1738     }
1739
1740     return ret;
1741 }
1742
1743 #ifdef CONFIG_LINUX
1744 /*
1745  * Called back from the postcopy fault thread when a fault is received on our
1746  * ufd.
1747  * TODO: This is Linux specific
1748  */
1749 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1750                                              void *ufd)
1751 {
1752     struct vhost_dev *dev = pcfd->data;
1753     struct vhost_user *u = dev->opaque;
1754     struct uffd_msg *msg = ufd;
1755     uint64_t faultaddr = msg->arg.pagefault.address;
1756     RAMBlock *rb = NULL;
1757     uint64_t rb_offset;
1758     int i;
1759
1760     trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1761                                             dev->mem->nregions);
1762     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1763         trace_vhost_user_postcopy_fault_handler_loop(i,
1764                 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1765         if (faultaddr >= u->postcopy_client_bases[i]) {
1766             /* Ofset of the fault address in the vhost region */
1767             uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1768             if (region_offset < dev->mem->regions[i].memory_size) {
1769                 rb_offset = region_offset + u->region_rb_offset[i];
1770                 trace_vhost_user_postcopy_fault_handler_found(i,
1771                         region_offset, rb_offset);
1772                 rb = u->region_rb[i];
1773                 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1774                                                     rb_offset);
1775             }
1776         }
1777     }
1778     error_report("%s: Failed to find region for fault %" PRIx64,
1779                  __func__, faultaddr);
1780     return -1;
1781 }
1782
1783 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1784                                      uint64_t offset)
1785 {
1786     struct vhost_dev *dev = pcfd->data;
1787     struct vhost_user *u = dev->opaque;
1788     int i;
1789
1790     trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1791
1792     if (!u) {
1793         return 0;
1794     }
1795     /* Translate the offset into an address in the clients address space */
1796     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1797         if (u->region_rb[i] == rb &&
1798             offset >= u->region_rb_offset[i] &&
1799             offset < (u->region_rb_offset[i] +
1800                       dev->mem->regions[i].memory_size)) {
1801             uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1802                                    u->postcopy_client_bases[i];
1803             trace_vhost_user_postcopy_waker_found(client_addr);
1804             return postcopy_wake_shared(pcfd, client_addr, rb);
1805         }
1806     }
1807
1808     trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1809     return 0;
1810 }
1811 #endif
1812
1813 /*
1814  * Called at the start of an inbound postcopy on reception of the
1815  * 'advise' command.
1816  */
1817 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1818 {
1819 #ifdef CONFIG_LINUX
1820     struct vhost_user *u = dev->opaque;
1821     CharBackend *chr = u->user->chr;
1822     int ufd;
1823     int ret;
1824     VhostUserMsg msg = {
1825         .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1826         .hdr.flags = VHOST_USER_VERSION,
1827     };
1828
1829     ret = vhost_user_write(dev, &msg, NULL, 0);
1830     if (ret < 0) {
1831         error_setg(errp, "Failed to send postcopy_advise to vhost");
1832         return ret;
1833     }
1834
1835     ret = vhost_user_read(dev, &msg);
1836     if (ret < 0) {
1837         error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1838         return ret;
1839     }
1840
1841     if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1842         error_setg(errp, "Unexpected msg type. Expected %d received %d",
1843                      VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1844         return -EPROTO;
1845     }
1846
1847     if (msg.hdr.size) {
1848         error_setg(errp, "Received bad msg size.");
1849         return -EPROTO;
1850     }
1851     ufd = qemu_chr_fe_get_msgfd(chr);
1852     if (ufd < 0) {
1853         error_setg(errp, "%s: Failed to get ufd", __func__);
1854         return -EIO;
1855     }
1856     qemu_socket_set_nonblock(ufd);
1857
1858     /* register ufd with userfault thread */
1859     u->postcopy_fd.fd = ufd;
1860     u->postcopy_fd.data = dev;
1861     u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1862     u->postcopy_fd.waker = vhost_user_postcopy_waker;
1863     u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1864     postcopy_register_shared_ufd(&u->postcopy_fd);
1865     return 0;
1866 #else
1867     error_setg(errp, "Postcopy not supported on non-Linux systems");
1868     return -ENOSYS;
1869 #endif
1870 }
1871
1872 /*
1873  * Called at the switch to postcopy on reception of the 'listen' command.
1874  */
1875 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1876 {
1877     struct vhost_user *u = dev->opaque;
1878     int ret;
1879     VhostUserMsg msg = {
1880         .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1881         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1882     };
1883     u->postcopy_listen = true;
1884
1885     trace_vhost_user_postcopy_listen();
1886
1887     ret = vhost_user_write(dev, &msg, NULL, 0);
1888     if (ret < 0) {
1889         error_setg(errp, "Failed to send postcopy_listen to vhost");
1890         return ret;
1891     }
1892
1893     ret = process_message_reply(dev, &msg);
1894     if (ret) {
1895         error_setg(errp, "Failed to receive reply to postcopy_listen");
1896         return ret;
1897     }
1898
1899     return 0;
1900 }
1901
1902 /*
1903  * Called at the end of postcopy
1904  */
1905 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1906 {
1907     VhostUserMsg msg = {
1908         .hdr.request = VHOST_USER_POSTCOPY_END,
1909         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1910     };
1911     int ret;
1912     struct vhost_user *u = dev->opaque;
1913
1914     trace_vhost_user_postcopy_end_entry();
1915
1916     ret = vhost_user_write(dev, &msg, NULL, 0);
1917     if (ret < 0) {
1918         error_setg(errp, "Failed to send postcopy_end to vhost");
1919         return ret;
1920     }
1921
1922     ret = process_message_reply(dev, &msg);
1923     if (ret) {
1924         error_setg(errp, "Failed to receive reply to postcopy_end");
1925         return ret;
1926     }
1927     postcopy_unregister_shared_ufd(&u->postcopy_fd);
1928     close(u->postcopy_fd.fd);
1929     u->postcopy_fd.handler = NULL;
1930
1931     trace_vhost_user_postcopy_end_exit();
1932
1933     return 0;
1934 }
1935
1936 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1937                                         void *opaque)
1938 {
1939     struct PostcopyNotifyData *pnd = opaque;
1940     struct vhost_user *u = container_of(notifier, struct vhost_user,
1941                                          postcopy_notifier);
1942     struct vhost_dev *dev = u->dev;
1943
1944     switch (pnd->reason) {
1945     case POSTCOPY_NOTIFY_PROBE:
1946         if (!virtio_has_feature(dev->protocol_features,
1947                                 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1948             /* TODO: Get the device name into this error somehow */
1949             error_setg(pnd->errp,
1950                        "vhost-user backend not capable of postcopy");
1951             return -ENOENT;
1952         }
1953         break;
1954
1955     case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1956         return vhost_user_postcopy_advise(dev, pnd->errp);
1957
1958     case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1959         return vhost_user_postcopy_listen(dev, pnd->errp);
1960
1961     case POSTCOPY_NOTIFY_INBOUND_END:
1962         return vhost_user_postcopy_end(dev, pnd->errp);
1963
1964     default:
1965         /* We ignore notifications we don't know */
1966         break;
1967     }
1968
1969     return 0;
1970 }
1971
1972 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1973                                    Error **errp)
1974 {
1975     uint64_t features, ram_slots;
1976     struct vhost_user *u;
1977     VhostUserState *vus = (VhostUserState *) opaque;
1978     int err;
1979
1980     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1981
1982     u = g_new0(struct vhost_user, 1);
1983     u->user = vus;
1984     u->dev = dev;
1985     dev->opaque = u;
1986
1987     err = vhost_user_get_features(dev, &features);
1988     if (err < 0) {
1989         error_setg_errno(errp, -err, "vhost_backend_init failed");
1990         return err;
1991     }
1992
1993     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1994         bool supports_f_config = vus->supports_config ||
1995             (dev->config_ops && dev->config_ops->vhost_dev_config_notifier);
1996         uint64_t protocol_features;
1997
1998         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1999
2000         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
2001                                  &protocol_features);
2002         if (err < 0) {
2003             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2004             return -EPROTO;
2005         }
2006
2007         /*
2008          * We will use all the protocol features we support - although
2009          * we suppress F_CONFIG if we know QEMUs internal code can not support
2010          * it.
2011          */
2012         protocol_features &= VHOST_USER_PROTOCOL_FEATURE_MASK;
2013
2014         if (supports_f_config) {
2015             if (!virtio_has_feature(protocol_features,
2016                                     VHOST_USER_PROTOCOL_F_CONFIG)) {
2017                 error_setg(errp, "vhost-user device expecting "
2018                            "VHOST_USER_PROTOCOL_F_CONFIG but the vhost-user backend does "
2019                            "not support it.");
2020                 return -EPROTO;
2021             }
2022         } else {
2023             if (virtio_has_feature(protocol_features,
2024                                    VHOST_USER_PROTOCOL_F_CONFIG)) {
2025                 warn_report("vhost-user backend supports "
2026                             "VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
2027                 protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
2028             }
2029         }
2030
2031         /* final set of protocol features */
2032         dev->protocol_features = protocol_features;
2033         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
2034         if (err < 0) {
2035             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2036             return -EPROTO;
2037         }
2038
2039         /* query the max queues we support if backend supports Multiple Queue */
2040         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
2041             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2042                                      &dev->max_queues);
2043             if (err < 0) {
2044                 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2045                 return -EPROTO;
2046             }
2047         } else {
2048             dev->max_queues = 1;
2049         }
2050
2051         if (dev->num_queues && dev->max_queues < dev->num_queues) {
2052             error_setg(errp, "The maximum number of queues supported by the "
2053                        "backend is %" PRIu64, dev->max_queues);
2054             return -EINVAL;
2055         }
2056
2057         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2058                 !(virtio_has_feature(dev->protocol_features,
2059                     VHOST_USER_PROTOCOL_F_BACKEND_REQ) &&
2060                  virtio_has_feature(dev->protocol_features,
2061                     VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
2062             error_setg(errp, "IOMMU support requires reply-ack and "
2063                        "slave-req protocol features.");
2064             return -EINVAL;
2065         }
2066
2067         /* get max memory regions if backend supports configurable RAM slots */
2068         if (!virtio_has_feature(dev->protocol_features,
2069                                 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
2070             u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
2071         } else {
2072             err = vhost_user_get_max_memslots(dev, &ram_slots);
2073             if (err < 0) {
2074                 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2075                 return -EPROTO;
2076             }
2077
2078             if (ram_slots < u->user->memory_slots) {
2079                 error_setg(errp, "The backend specified a max ram slots limit "
2080                            "of %" PRIu64", when the prior validated limit was "
2081                            "%d. This limit should never decrease.", ram_slots,
2082                            u->user->memory_slots);
2083                 return -EINVAL;
2084             }
2085
2086             u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
2087         }
2088     }
2089
2090     if (dev->migration_blocker == NULL &&
2091         !virtio_has_feature(dev->protocol_features,
2092                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2093         error_setg(&dev->migration_blocker,
2094                    "Migration disabled: vhost-user backend lacks "
2095                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2096     }
2097
2098     if (dev->vq_index == 0) {
2099         err = vhost_setup_slave_channel(dev);
2100         if (err < 0) {
2101             error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2102             return -EPROTO;
2103         }
2104     }
2105
2106     u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2107     postcopy_add_notifier(&u->postcopy_notifier);
2108
2109     return 0;
2110 }
2111
2112 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
2113 {
2114     struct vhost_user *u;
2115
2116     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2117
2118     u = dev->opaque;
2119     if (u->postcopy_notifier.notify) {
2120         postcopy_remove_notifier(&u->postcopy_notifier);
2121         u->postcopy_notifier.notify = NULL;
2122     }
2123     u->postcopy_listen = false;
2124     if (u->postcopy_fd.handler) {
2125         postcopy_unregister_shared_ufd(&u->postcopy_fd);
2126         close(u->postcopy_fd.fd);
2127         u->postcopy_fd.handler = NULL;
2128     }
2129     if (u->slave_ioc) {
2130         close_slave_channel(u);
2131     }
2132     g_free(u->region_rb);
2133     u->region_rb = NULL;
2134     g_free(u->region_rb_offset);
2135     u->region_rb_offset = NULL;
2136     u->region_rb_len = 0;
2137     g_free(u);
2138     dev->opaque = 0;
2139
2140     return 0;
2141 }
2142
2143 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2144 {
2145     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2146
2147     return idx;
2148 }
2149
2150 static int vhost_user_memslots_limit(struct vhost_dev *dev)
2151 {
2152     struct vhost_user *u = dev->opaque;
2153
2154     return u->user->memory_slots;
2155 }
2156
2157 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2158 {
2159     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2160
2161     return virtio_has_feature(dev->protocol_features,
2162                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2163 }
2164
2165 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2166 {
2167     VhostUserMsg msg = { };
2168
2169     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2170
2171     /* If guest supports GUEST_ANNOUNCE do nothing */
2172     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2173         return 0;
2174     }
2175
2176     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2177     if (virtio_has_feature(dev->protocol_features,
2178                            VHOST_USER_PROTOCOL_F_RARP)) {
2179         msg.hdr.request = VHOST_USER_SEND_RARP;
2180         msg.hdr.flags = VHOST_USER_VERSION;
2181         memcpy((char *)&msg.payload.u64, mac_addr, 6);
2182         msg.hdr.size = sizeof(msg.payload.u64);
2183
2184         return vhost_user_write(dev, &msg, NULL, 0);
2185     }
2186     return -ENOTSUP;
2187 }
2188
2189 static bool vhost_user_can_merge(struct vhost_dev *dev,
2190                                  uint64_t start1, uint64_t size1,
2191                                  uint64_t start2, uint64_t size2)
2192 {
2193     ram_addr_t offset;
2194     int mfd, rfd;
2195
2196     (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2197     (void)vhost_user_get_mr_data(start2, &offset, &rfd);
2198
2199     return mfd == rfd;
2200 }
2201
2202 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2203 {
2204     VhostUserMsg msg;
2205     bool reply_supported = virtio_has_feature(dev->protocol_features,
2206                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
2207     int ret;
2208
2209     if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2210         return 0;
2211     }
2212
2213     msg.hdr.request = VHOST_USER_NET_SET_MTU;
2214     msg.payload.u64 = mtu;
2215     msg.hdr.size = sizeof(msg.payload.u64);
2216     msg.hdr.flags = VHOST_USER_VERSION;
2217     if (reply_supported) {
2218         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2219     }
2220
2221     ret = vhost_user_write(dev, &msg, NULL, 0);
2222     if (ret < 0) {
2223         return ret;
2224     }
2225
2226     /* If reply_ack supported, slave has to ack specified MTU is valid */
2227     if (reply_supported) {
2228         return process_message_reply(dev, &msg);
2229     }
2230
2231     return 0;
2232 }
2233
2234 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2235                                             struct vhost_iotlb_msg *imsg)
2236 {
2237     int ret;
2238     VhostUserMsg msg = {
2239         .hdr.request = VHOST_USER_IOTLB_MSG,
2240         .hdr.size = sizeof(msg.payload.iotlb),
2241         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2242         .payload.iotlb = *imsg,
2243     };
2244
2245     ret = vhost_user_write(dev, &msg, NULL, 0);
2246     if (ret < 0) {
2247         return ret;
2248     }
2249
2250     return process_message_reply(dev, &msg);
2251 }
2252
2253
2254 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2255 {
2256     /* No-op as the receive channel is not dedicated to IOTLB messages. */
2257 }
2258
2259 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2260                                  uint32_t config_len, Error **errp)
2261 {
2262     int ret;
2263     VhostUserMsg msg = {
2264         .hdr.request = VHOST_USER_GET_CONFIG,
2265         .hdr.flags = VHOST_USER_VERSION,
2266         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2267     };
2268
2269     if (!virtio_has_feature(dev->protocol_features,
2270                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2271         error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2272         return -EINVAL;
2273     }
2274
2275     assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
2276
2277     msg.payload.config.offset = 0;
2278     msg.payload.config.size = config_len;
2279     ret = vhost_user_write(dev, &msg, NULL, 0);
2280     if (ret < 0) {
2281         error_setg_errno(errp, -ret, "vhost_get_config failed");
2282         return ret;
2283     }
2284
2285     ret = vhost_user_read(dev, &msg);
2286     if (ret < 0) {
2287         error_setg_errno(errp, -ret, "vhost_get_config failed");
2288         return ret;
2289     }
2290
2291     if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2292         error_setg(errp,
2293                    "Received unexpected msg type. Expected %d received %d",
2294                    VHOST_USER_GET_CONFIG, msg.hdr.request);
2295         return -EPROTO;
2296     }
2297
2298     if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2299         error_setg(errp, "Received bad msg size.");
2300         return -EPROTO;
2301     }
2302
2303     memcpy(config, msg.payload.config.region, config_len);
2304
2305     return 0;
2306 }
2307
2308 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2309                                  uint32_t offset, uint32_t size, uint32_t flags)
2310 {
2311     int ret;
2312     uint8_t *p;
2313     bool reply_supported = virtio_has_feature(dev->protocol_features,
2314                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
2315
2316     VhostUserMsg msg = {
2317         .hdr.request = VHOST_USER_SET_CONFIG,
2318         .hdr.flags = VHOST_USER_VERSION,
2319         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2320     };
2321
2322     if (!virtio_has_feature(dev->protocol_features,
2323                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2324         return -ENOTSUP;
2325     }
2326
2327     if (reply_supported) {
2328         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2329     }
2330
2331     if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2332         return -EINVAL;
2333     }
2334
2335     msg.payload.config.offset = offset,
2336     msg.payload.config.size = size,
2337     msg.payload.config.flags = flags,
2338     p = msg.payload.config.region;
2339     memcpy(p, data, size);
2340
2341     ret = vhost_user_write(dev, &msg, NULL, 0);
2342     if (ret < 0) {
2343         return ret;
2344     }
2345
2346     if (reply_supported) {
2347         return process_message_reply(dev, &msg);
2348     }
2349
2350     return 0;
2351 }
2352
2353 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2354                                             void *session_info,
2355                                             uint64_t *session_id)
2356 {
2357     int ret;
2358     bool crypto_session = virtio_has_feature(dev->protocol_features,
2359                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2360     CryptoDevBackendSymSessionInfo *sess_info = session_info;
2361     VhostUserMsg msg = {
2362         .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2363         .hdr.flags = VHOST_USER_VERSION,
2364         .hdr.size = sizeof(msg.payload.session),
2365     };
2366
2367     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2368
2369     if (!crypto_session) {
2370         error_report("vhost-user trying to send unhandled ioctl");
2371         return -ENOTSUP;
2372     }
2373
2374     memcpy(&msg.payload.session.session_setup_data, sess_info,
2375               sizeof(CryptoDevBackendSymSessionInfo));
2376     if (sess_info->key_len) {
2377         memcpy(&msg.payload.session.key, sess_info->cipher_key,
2378                sess_info->key_len);
2379     }
2380     if (sess_info->auth_key_len > 0) {
2381         memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2382                sess_info->auth_key_len);
2383     }
2384     ret = vhost_user_write(dev, &msg, NULL, 0);
2385     if (ret < 0) {
2386         error_report("vhost_user_write() return %d, create session failed",
2387                      ret);
2388         return ret;
2389     }
2390
2391     ret = vhost_user_read(dev, &msg);
2392     if (ret < 0) {
2393         error_report("vhost_user_read() return %d, create session failed",
2394                      ret);
2395         return ret;
2396     }
2397
2398     if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2399         error_report("Received unexpected msg type. Expected %d received %d",
2400                      VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2401         return -EPROTO;
2402     }
2403
2404     if (msg.hdr.size != sizeof(msg.payload.session)) {
2405         error_report("Received bad msg size.");
2406         return -EPROTO;
2407     }
2408
2409     if (msg.payload.session.session_id < 0) {
2410         error_report("Bad session id: %" PRId64 "",
2411                               msg.payload.session.session_id);
2412         return -EINVAL;
2413     }
2414     *session_id = msg.payload.session.session_id;
2415
2416     return 0;
2417 }
2418
2419 static int
2420 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2421 {
2422     int ret;
2423     bool crypto_session = virtio_has_feature(dev->protocol_features,
2424                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2425     VhostUserMsg msg = {
2426         .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2427         .hdr.flags = VHOST_USER_VERSION,
2428         .hdr.size = sizeof(msg.payload.u64),
2429     };
2430     msg.payload.u64 = session_id;
2431
2432     if (!crypto_session) {
2433         error_report("vhost-user trying to send unhandled ioctl");
2434         return -ENOTSUP;
2435     }
2436
2437     ret = vhost_user_write(dev, &msg, NULL, 0);
2438     if (ret < 0) {
2439         error_report("vhost_user_write() return %d, close session failed",
2440                      ret);
2441         return ret;
2442     }
2443
2444     return 0;
2445 }
2446
2447 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2448                                           MemoryRegionSection *section)
2449 {
2450     return memory_region_get_fd(section->mr) >= 0;
2451 }
2452
2453 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2454                                       uint16_t queue_size,
2455                                       struct vhost_inflight *inflight)
2456 {
2457     void *addr;
2458     int fd;
2459     int ret;
2460     struct vhost_user *u = dev->opaque;
2461     CharBackend *chr = u->user->chr;
2462     VhostUserMsg msg = {
2463         .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2464         .hdr.flags = VHOST_USER_VERSION,
2465         .payload.inflight.num_queues = dev->nvqs,
2466         .payload.inflight.queue_size = queue_size,
2467         .hdr.size = sizeof(msg.payload.inflight),
2468     };
2469
2470     if (!virtio_has_feature(dev->protocol_features,
2471                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2472         return 0;
2473     }
2474
2475     ret = vhost_user_write(dev, &msg, NULL, 0);
2476     if (ret < 0) {
2477         return ret;
2478     }
2479
2480     ret = vhost_user_read(dev, &msg);
2481     if (ret < 0) {
2482         return ret;
2483     }
2484
2485     if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2486         error_report("Received unexpected msg type. "
2487                      "Expected %d received %d",
2488                      VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2489         return -EPROTO;
2490     }
2491
2492     if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2493         error_report("Received bad msg size.");
2494         return -EPROTO;
2495     }
2496
2497     if (!msg.payload.inflight.mmap_size) {
2498         return 0;
2499     }
2500
2501     fd = qemu_chr_fe_get_msgfd(chr);
2502     if (fd < 0) {
2503         error_report("Failed to get mem fd");
2504         return -EIO;
2505     }
2506
2507     addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2508                 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2509
2510     if (addr == MAP_FAILED) {
2511         error_report("Failed to mmap mem fd");
2512         close(fd);
2513         return -EFAULT;
2514     }
2515
2516     inflight->addr = addr;
2517     inflight->fd = fd;
2518     inflight->size = msg.payload.inflight.mmap_size;
2519     inflight->offset = msg.payload.inflight.mmap_offset;
2520     inflight->queue_size = queue_size;
2521
2522     return 0;
2523 }
2524
2525 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2526                                       struct vhost_inflight *inflight)
2527 {
2528     VhostUserMsg msg = {
2529         .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2530         .hdr.flags = VHOST_USER_VERSION,
2531         .payload.inflight.mmap_size = inflight->size,
2532         .payload.inflight.mmap_offset = inflight->offset,
2533         .payload.inflight.num_queues = dev->nvqs,
2534         .payload.inflight.queue_size = inflight->queue_size,
2535         .hdr.size = sizeof(msg.payload.inflight),
2536     };
2537
2538     if (!virtio_has_feature(dev->protocol_features,
2539                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2540         return 0;
2541     }
2542
2543     return vhost_user_write(dev, &msg, &inflight->fd, 1);
2544 }
2545
2546 static void vhost_user_state_destroy(gpointer data)
2547 {
2548     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
2549     if (n) {
2550         vhost_user_host_notifier_remove(n, NULL);
2551         object_unparent(OBJECT(&n->mr));
2552         /*
2553          * We can't free until vhost_user_host_notifier_remove has
2554          * done it's thing so schedule the free with RCU.
2555          */
2556         g_free_rcu(n, rcu);
2557     }
2558 }
2559
2560 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2561 {
2562     if (user->chr) {
2563         error_setg(errp, "Cannot initialize vhost-user state");
2564         return false;
2565     }
2566     user->chr = chr;
2567     user->memory_slots = 0;
2568     user->notifiers = g_ptr_array_new_full(VIRTIO_QUEUE_MAX / 4,
2569                                            &vhost_user_state_destroy);
2570     return true;
2571 }
2572
2573 void vhost_user_cleanup(VhostUserState *user)
2574 {
2575     if (!user->chr) {
2576         return;
2577     }
2578     memory_region_transaction_begin();
2579     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
2580     memory_region_transaction_commit();
2581     user->chr = NULL;
2582 }
2583
2584
2585 typedef struct {
2586     vu_async_close_fn cb;
2587     DeviceState *dev;
2588     CharBackend *cd;
2589     struct vhost_dev *vhost;
2590 } VhostAsyncCallback;
2591
2592 static void vhost_user_async_close_bh(void *opaque)
2593 {
2594     VhostAsyncCallback *data = opaque;
2595     struct vhost_dev *vhost = data->vhost;
2596
2597     /*
2598      * If the vhost_dev has been cleared in the meantime there is
2599      * nothing left to do as some other path has completed the
2600      * cleanup.
2601      */
2602     if (vhost->vdev) {
2603         data->cb(data->dev);
2604     }
2605
2606     g_free(data);
2607 }
2608
2609 /*
2610  * We only schedule the work if the machine is running. If suspended
2611  * we want to keep all the in-flight data as is for migration
2612  * purposes.
2613  */
2614 void vhost_user_async_close(DeviceState *d,
2615                             CharBackend *chardev, struct vhost_dev *vhost,
2616                             vu_async_close_fn cb)
2617 {
2618     if (!runstate_check(RUN_STATE_SHUTDOWN)) {
2619         /*
2620          * A close event may happen during a read/write, but vhost
2621          * code assumes the vhost_dev remains setup, so delay the
2622          * stop & clear.
2623          */
2624         AioContext *ctx = qemu_get_current_aio_context();
2625         VhostAsyncCallback *data = g_new0(VhostAsyncCallback, 1);
2626
2627         /* Save data for the callback */
2628         data->cb = cb;
2629         data->dev = d;
2630         data->cd = chardev;
2631         data->vhost = vhost;
2632
2633         /* Disable any further notifications on the chardev */
2634         qemu_chr_fe_set_handlers(chardev,
2635                                  NULL, NULL, NULL, NULL, NULL, NULL,
2636                                  false);
2637
2638         aio_bh_schedule_oneshot(ctx, vhost_user_async_close_bh, data);
2639
2640         /*
2641          * Move vhost device to the stopped state. The vhost-user device
2642          * will be clean up and disconnected in BH. This can be useful in
2643          * the vhost migration code. If disconnect was caught there is an
2644          * option for the general vhost code to get the dev state without
2645          * knowing its type (in this case vhost-user).
2646          *
2647          * Note if the vhost device is fully cleared by the time we
2648          * execute the bottom half we won't continue with the cleanup.
2649          */
2650         vhost->started = false;
2651     }
2652 }
2653
2654 static int vhost_user_dev_start(struct vhost_dev *dev, bool started)
2655 {
2656     if (!virtio_has_feature(dev->protocol_features,
2657                             VHOST_USER_PROTOCOL_F_STATUS)) {
2658         return 0;
2659     }
2660
2661     /* Set device status only for last queue pair */
2662     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2663         return 0;
2664     }
2665
2666     if (started) {
2667         return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
2668                                           VIRTIO_CONFIG_S_DRIVER |
2669                                           VIRTIO_CONFIG_S_DRIVER_OK);
2670     } else {
2671         return 0;
2672     }
2673 }
2674
2675 static void vhost_user_reset_status(struct vhost_dev *dev)
2676 {
2677     /* Set device status only for last queue pair */
2678     if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
2679         return;
2680     }
2681
2682     if (virtio_has_feature(dev->protocol_features,
2683                            VHOST_USER_PROTOCOL_F_STATUS)) {
2684         vhost_user_set_status(dev, 0);
2685     }
2686 }
2687
2688 const VhostOps user_ops = {
2689         .backend_type = VHOST_BACKEND_TYPE_USER,
2690         .vhost_backend_init = vhost_user_backend_init,
2691         .vhost_backend_cleanup = vhost_user_backend_cleanup,
2692         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2693         .vhost_set_log_base = vhost_user_set_log_base,
2694         .vhost_set_mem_table = vhost_user_set_mem_table,
2695         .vhost_set_vring_addr = vhost_user_set_vring_addr,
2696         .vhost_set_vring_endian = vhost_user_set_vring_endian,
2697         .vhost_set_vring_num = vhost_user_set_vring_num,
2698         .vhost_set_vring_base = vhost_user_set_vring_base,
2699         .vhost_get_vring_base = vhost_user_get_vring_base,
2700         .vhost_set_vring_kick = vhost_user_set_vring_kick,
2701         .vhost_set_vring_call = vhost_user_set_vring_call,
2702         .vhost_set_vring_err = vhost_user_set_vring_err,
2703         .vhost_set_features = vhost_user_set_features,
2704         .vhost_get_features = vhost_user_get_features,
2705         .vhost_set_owner = vhost_user_set_owner,
2706         .vhost_reset_device = vhost_user_reset_device,
2707         .vhost_get_vq_index = vhost_user_get_vq_index,
2708         .vhost_set_vring_enable = vhost_user_set_vring_enable,
2709         .vhost_requires_shm_log = vhost_user_requires_shm_log,
2710         .vhost_migration_done = vhost_user_migration_done,
2711         .vhost_backend_can_merge = vhost_user_can_merge,
2712         .vhost_net_set_mtu = vhost_user_net_set_mtu,
2713         .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2714         .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2715         .vhost_get_config = vhost_user_get_config,
2716         .vhost_set_config = vhost_user_set_config,
2717         .vhost_crypto_create_session = vhost_user_crypto_create_session,
2718         .vhost_crypto_close_session = vhost_user_crypto_close_session,
2719         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2720         .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2721         .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2722         .vhost_dev_start = vhost_user_dev_start,
2723         .vhost_reset_status = vhost_user_reset_status,
2724 };