1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2018 Intel Corporation
7 * The vhost-user protocol connection is an external interface, so it must be
8 * robust against invalid inputs.
10 * This is important because the vhost-user master is only one step removed
11 * from the guest. Malicious guests that have escaped will then launch further
12 * attacks from the vhost-user master.
14 * Even in deployments where guests are trusted, a bug in the vhost-user master
15 * can still cause invalid messages to be sent. Such messages must not
16 * compromise the stability of the DPDK application by causing crashes, memory
17 * corruption, or other problematic behavior.
19 * Do not assume received VhostUserMsg fields contain sensible values!
28 #include <sys/ioctl.h>
30 #include <sys/types.h>
32 #include <sys/syscall.h>
34 #ifdef RTE_LIBRTE_VHOST_NUMA
37 #ifdef RTE_LIBRTE_VHOST_POSTCOPY
38 #include <linux/userfaultfd.h>
41 #include <rte_common.h>
42 #include <rte_malloc.h>
47 #include "vhost_user.h"
49 #define VIRTIO_MIN_MTU 68
50 #define VIRTIO_MAX_MTU 65535
52 static const char *vhost_message_str
[VHOST_USER_MAX
] = {
53 [VHOST_USER_NONE
] = "VHOST_USER_NONE",
54 [VHOST_USER_GET_FEATURES
] = "VHOST_USER_GET_FEATURES",
55 [VHOST_USER_SET_FEATURES
] = "VHOST_USER_SET_FEATURES",
56 [VHOST_USER_SET_OWNER
] = "VHOST_USER_SET_OWNER",
57 [VHOST_USER_RESET_OWNER
] = "VHOST_USER_RESET_OWNER",
58 [VHOST_USER_SET_MEM_TABLE
] = "VHOST_USER_SET_MEM_TABLE",
59 [VHOST_USER_SET_LOG_BASE
] = "VHOST_USER_SET_LOG_BASE",
60 [VHOST_USER_SET_LOG_FD
] = "VHOST_USER_SET_LOG_FD",
61 [VHOST_USER_SET_VRING_NUM
] = "VHOST_USER_SET_VRING_NUM",
62 [VHOST_USER_SET_VRING_ADDR
] = "VHOST_USER_SET_VRING_ADDR",
63 [VHOST_USER_SET_VRING_BASE
] = "VHOST_USER_SET_VRING_BASE",
64 [VHOST_USER_GET_VRING_BASE
] = "VHOST_USER_GET_VRING_BASE",
65 [VHOST_USER_SET_VRING_KICK
] = "VHOST_USER_SET_VRING_KICK",
66 [VHOST_USER_SET_VRING_CALL
] = "VHOST_USER_SET_VRING_CALL",
67 [VHOST_USER_SET_VRING_ERR
] = "VHOST_USER_SET_VRING_ERR",
68 [VHOST_USER_GET_PROTOCOL_FEATURES
] = "VHOST_USER_GET_PROTOCOL_FEATURES",
69 [VHOST_USER_SET_PROTOCOL_FEATURES
] = "VHOST_USER_SET_PROTOCOL_FEATURES",
70 [VHOST_USER_GET_QUEUE_NUM
] = "VHOST_USER_GET_QUEUE_NUM",
71 [VHOST_USER_SET_VRING_ENABLE
] = "VHOST_USER_SET_VRING_ENABLE",
72 [VHOST_USER_SEND_RARP
] = "VHOST_USER_SEND_RARP",
73 [VHOST_USER_NET_SET_MTU
] = "VHOST_USER_NET_SET_MTU",
74 [VHOST_USER_SET_SLAVE_REQ_FD
] = "VHOST_USER_SET_SLAVE_REQ_FD",
75 [VHOST_USER_IOTLB_MSG
] = "VHOST_USER_IOTLB_MSG",
76 [VHOST_USER_CRYPTO_CREATE_SESS
] = "VHOST_USER_CRYPTO_CREATE_SESS",
77 [VHOST_USER_CRYPTO_CLOSE_SESS
] = "VHOST_USER_CRYPTO_CLOSE_SESS",
78 [VHOST_USER_POSTCOPY_ADVISE
] = "VHOST_USER_POSTCOPY_ADVISE",
79 [VHOST_USER_POSTCOPY_LISTEN
] = "VHOST_USER_POSTCOPY_LISTEN",
80 [VHOST_USER_POSTCOPY_END
] = "VHOST_USER_POSTCOPY_END",
83 static int send_vhost_reply(int sockfd
, struct VhostUserMsg
*msg
);
84 static int read_vhost_message(int sockfd
, struct VhostUserMsg
*msg
);
92 ret
= fstat(fd
, &stat
);
93 return ret
== -1 ? (uint64_t)-1 : (uint64_t)stat
.st_blksize
;
97 * Reclaim all the outstanding zmbufs for a virtqueue.
100 drain_zmbuf_list(struct vhost_virtqueue
*vq
)
102 struct zcopy_mbuf
*zmbuf
, *next
;
104 for (zmbuf
= TAILQ_FIRST(&vq
->zmbuf_list
);
105 zmbuf
!= NULL
; zmbuf
= next
) {
106 next
= TAILQ_NEXT(zmbuf
, next
);
108 while (!mbuf_is_consumed(zmbuf
->mbuf
))
111 TAILQ_REMOVE(&vq
->zmbuf_list
, zmbuf
, next
);
112 restore_mbuf(zmbuf
->mbuf
);
113 rte_pktmbuf_free(zmbuf
->mbuf
);
120 free_mem_region(struct virtio_net
*dev
)
123 struct rte_vhost_mem_region
*reg
;
124 struct vhost_virtqueue
*vq
;
126 if (!dev
|| !dev
->mem
)
129 if (dev
->dequeue_zero_copy
) {
130 for (i
= 0; i
< dev
->nr_vring
; i
++) {
131 vq
= dev
->virtqueue
[i
];
133 drain_zmbuf_list(vq
);
137 for (i
= 0; i
< dev
->mem
->nregions
; i
++) {
138 reg
= &dev
->mem
->regions
[i
];
139 if (reg
->host_user_addr
) {
140 munmap(reg
->mmap_addr
, reg
->mmap_size
);
147 vhost_backend_cleanup(struct virtio_net
*dev
)
150 free_mem_region(dev
);
155 free(dev
->guest_pages
);
156 dev
->guest_pages
= NULL
;
159 munmap((void *)(uintptr_t)dev
->log_addr
, dev
->log_size
);
163 if (dev
->slave_req_fd
>= 0) {
164 close(dev
->slave_req_fd
);
165 dev
->slave_req_fd
= -1;
168 if (dev
->postcopy_ufd
>= 0) {
169 close(dev
->postcopy_ufd
);
170 dev
->postcopy_ufd
= -1;
173 dev
->postcopy_listening
= 0;
177 * This function just returns success at the moment unless
178 * the device hasn't been initialised.
181 vhost_user_set_owner(struct virtio_net
**pdev __rte_unused
,
182 struct VhostUserMsg
*msg __rte_unused
,
183 int main_fd __rte_unused
)
185 return RTE_VHOST_MSG_RESULT_OK
;
189 vhost_user_reset_owner(struct virtio_net
**pdev
,
190 struct VhostUserMsg
*msg __rte_unused
,
191 int main_fd __rte_unused
)
193 struct virtio_net
*dev
= *pdev
;
194 vhost_destroy_device_notify(dev
);
196 cleanup_device(dev
, 0);
198 return RTE_VHOST_MSG_RESULT_OK
;
202 * The features that we support are requested.
205 vhost_user_get_features(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
206 int main_fd __rte_unused
)
208 struct virtio_net
*dev
= *pdev
;
209 uint64_t features
= 0;
211 rte_vhost_driver_get_features(dev
->ifname
, &features
);
213 msg
->payload
.u64
= features
;
214 msg
->size
= sizeof(msg
->payload
.u64
);
217 return RTE_VHOST_MSG_RESULT_REPLY
;
221 * The queue number that we support are requested.
224 vhost_user_get_queue_num(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
225 int main_fd __rte_unused
)
227 struct virtio_net
*dev
= *pdev
;
228 uint32_t queue_num
= 0;
230 rte_vhost_driver_get_queue_num(dev
->ifname
, &queue_num
);
232 msg
->payload
.u64
= (uint64_t)queue_num
;
233 msg
->size
= sizeof(msg
->payload
.u64
);
236 return RTE_VHOST_MSG_RESULT_REPLY
;
240 * We receive the negotiated features supported by us and the virtio device.
243 vhost_user_set_features(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
244 int main_fd __rte_unused
)
246 struct virtio_net
*dev
= *pdev
;
247 uint64_t features
= msg
->payload
.u64
;
248 uint64_t vhost_features
= 0;
249 struct rte_vdpa_device
*vdpa_dev
;
252 rte_vhost_driver_get_features(dev
->ifname
, &vhost_features
);
253 if (features
& ~vhost_features
) {
254 RTE_LOG(ERR
, VHOST_CONFIG
,
255 "(%d) received invalid negotiated features.\n",
257 return RTE_VHOST_MSG_RESULT_ERR
;
260 if (dev
->flags
& VIRTIO_DEV_RUNNING
) {
261 if (dev
->features
== features
)
262 return RTE_VHOST_MSG_RESULT_OK
;
265 * Error out if master tries to change features while device is
266 * in running state. The exception being VHOST_F_LOG_ALL, which
267 * is enabled when the live-migration starts.
269 if ((dev
->features
^ features
) & ~(1ULL << VHOST_F_LOG_ALL
)) {
270 RTE_LOG(ERR
, VHOST_CONFIG
,
271 "(%d) features changed while device is running.\n",
273 return RTE_VHOST_MSG_RESULT_ERR
;
276 if (dev
->notify_ops
->features_changed
)
277 dev
->notify_ops
->features_changed(dev
->vid
, features
);
280 dev
->features
= features
;
282 ((1 << VIRTIO_NET_F_MRG_RXBUF
) | (1ULL << VIRTIO_F_VERSION_1
))) {
283 dev
->vhost_hlen
= sizeof(struct virtio_net_hdr_mrg_rxbuf
);
285 dev
->vhost_hlen
= sizeof(struct virtio_net_hdr
);
287 VHOST_LOG_DEBUG(VHOST_CONFIG
,
288 "(%d) mergeable RX buffers %s, virtio 1 %s\n",
290 (dev
->features
& (1 << VIRTIO_NET_F_MRG_RXBUF
)) ? "on" : "off",
291 (dev
->features
& (1ULL << VIRTIO_F_VERSION_1
)) ? "on" : "off");
293 if ((dev
->flags
& VIRTIO_DEV_BUILTIN_VIRTIO_NET
) &&
294 !(dev
->features
& (1ULL << VIRTIO_NET_F_MQ
))) {
296 * Remove all but first queue pair if MQ hasn't been
297 * negotiated. This is safe because the device is not
298 * running at this stage.
300 while (dev
->nr_vring
> 2) {
301 struct vhost_virtqueue
*vq
;
303 vq
= dev
->virtqueue
[--dev
->nr_vring
];
307 dev
->virtqueue
[dev
->nr_vring
] = NULL
;
313 did
= dev
->vdpa_dev_id
;
314 vdpa_dev
= rte_vdpa_get_device(did
);
315 if (vdpa_dev
&& vdpa_dev
->ops
->set_features
)
316 vdpa_dev
->ops
->set_features(dev
->vid
);
318 return RTE_VHOST_MSG_RESULT_OK
;
322 * The virtio device sends us the size of the descriptor ring.
325 vhost_user_set_vring_num(struct virtio_net
**pdev
,
326 struct VhostUserMsg
*msg
,
327 int main_fd __rte_unused
)
329 struct virtio_net
*dev
= *pdev
;
330 struct vhost_virtqueue
*vq
= dev
->virtqueue
[msg
->payload
.state
.index
];
332 vq
->size
= msg
->payload
.state
.num
;
334 /* VIRTIO 1.0, 2.4 Virtqueues says:
336 * Queue Size value is always a power of 2. The maximum Queue Size
339 if ((vq
->size
& (vq
->size
- 1)) || vq
->size
> 32768) {
340 RTE_LOG(ERR
, VHOST_CONFIG
,
341 "invalid virtqueue size %u\n", vq
->size
);
342 return RTE_VHOST_MSG_RESULT_ERR
;
345 if (dev
->dequeue_zero_copy
) {
347 vq
->last_zmbuf_idx
= 0;
348 vq
->zmbuf_size
= vq
->size
;
349 vq
->zmbufs
= rte_zmalloc(NULL
, vq
->zmbuf_size
*
350 sizeof(struct zcopy_mbuf
), 0);
351 if (vq
->zmbufs
== NULL
) {
352 RTE_LOG(WARNING
, VHOST_CONFIG
,
353 "failed to allocate mem for zero copy; "
354 "zero copy is force disabled\n");
355 dev
->dequeue_zero_copy
= 0;
357 TAILQ_INIT(&vq
->zmbuf_list
);
360 if (vq_is_packed(dev
)) {
361 vq
->shadow_used_packed
= rte_malloc(NULL
,
363 sizeof(struct vring_used_elem_packed
),
364 RTE_CACHE_LINE_SIZE
);
365 if (!vq
->shadow_used_packed
) {
366 RTE_LOG(ERR
, VHOST_CONFIG
,
367 "failed to allocate memory for shadow used ring.\n");
368 return RTE_VHOST_MSG_RESULT_ERR
;
372 vq
->shadow_used_split
= rte_malloc(NULL
,
373 vq
->size
* sizeof(struct vring_used_elem
),
374 RTE_CACHE_LINE_SIZE
);
375 if (!vq
->shadow_used_split
) {
376 RTE_LOG(ERR
, VHOST_CONFIG
,
377 "failed to allocate memory for shadow used ring.\n");
378 return RTE_VHOST_MSG_RESULT_ERR
;
382 vq
->batch_copy_elems
= rte_malloc(NULL
,
383 vq
->size
* sizeof(struct batch_copy_elem
),
384 RTE_CACHE_LINE_SIZE
);
385 if (!vq
->batch_copy_elems
) {
386 RTE_LOG(ERR
, VHOST_CONFIG
,
387 "failed to allocate memory for batching copy.\n");
388 return RTE_VHOST_MSG_RESULT_ERR
;
391 return RTE_VHOST_MSG_RESULT_OK
;
395 * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the
396 * same numa node as the memory of vring descriptor.
398 #ifdef RTE_LIBRTE_VHOST_NUMA
399 static struct virtio_net
*
400 numa_realloc(struct virtio_net
*dev
, int index
)
402 int oldnode
, newnode
;
403 struct virtio_net
*old_dev
;
404 struct vhost_virtqueue
*old_vq
, *vq
;
405 struct zcopy_mbuf
*new_zmbuf
;
406 struct vring_used_elem
*new_shadow_used_split
;
407 struct vring_used_elem_packed
*new_shadow_used_packed
;
408 struct batch_copy_elem
*new_batch_copy_elems
;
412 vq
= old_vq
= dev
->virtqueue
[index
];
414 ret
= get_mempolicy(&newnode
, NULL
, 0, old_vq
->desc
,
415 MPOL_F_NODE
| MPOL_F_ADDR
);
417 /* check if we need to reallocate vq */
418 ret
|= get_mempolicy(&oldnode
, NULL
, 0, old_vq
,
419 MPOL_F_NODE
| MPOL_F_ADDR
);
421 RTE_LOG(ERR
, VHOST_CONFIG
,
422 "Unable to get vq numa information.\n");
425 if (oldnode
!= newnode
) {
426 RTE_LOG(INFO
, VHOST_CONFIG
,
427 "reallocate vq from %d to %d node\n", oldnode
, newnode
);
428 vq
= rte_malloc_socket(NULL
, sizeof(*vq
), 0, newnode
);
432 memcpy(vq
, old_vq
, sizeof(*vq
));
433 TAILQ_INIT(&vq
->zmbuf_list
);
435 if (dev
->dequeue_zero_copy
) {
436 new_zmbuf
= rte_malloc_socket(NULL
, vq
->zmbuf_size
*
437 sizeof(struct zcopy_mbuf
), 0, newnode
);
439 rte_free(vq
->zmbufs
);
440 vq
->zmbufs
= new_zmbuf
;
444 if (vq_is_packed(dev
)) {
445 new_shadow_used_packed
= rte_malloc_socket(NULL
,
447 sizeof(struct vring_used_elem_packed
),
450 if (new_shadow_used_packed
) {
451 rte_free(vq
->shadow_used_packed
);
452 vq
->shadow_used_packed
= new_shadow_used_packed
;
455 new_shadow_used_split
= rte_malloc_socket(NULL
,
457 sizeof(struct vring_used_elem
),
460 if (new_shadow_used_split
) {
461 rte_free(vq
->shadow_used_split
);
462 vq
->shadow_used_split
= new_shadow_used_split
;
466 new_batch_copy_elems
= rte_malloc_socket(NULL
,
467 vq
->size
* sizeof(struct batch_copy_elem
),
470 if (new_batch_copy_elems
) {
471 rte_free(vq
->batch_copy_elems
);
472 vq
->batch_copy_elems
= new_batch_copy_elems
;
478 /* check if we need to reallocate dev */
479 ret
= get_mempolicy(&oldnode
, NULL
, 0, old_dev
,
480 MPOL_F_NODE
| MPOL_F_ADDR
);
482 RTE_LOG(ERR
, VHOST_CONFIG
,
483 "Unable to get dev numa information.\n");
486 if (oldnode
!= newnode
) {
487 RTE_LOG(INFO
, VHOST_CONFIG
,
488 "reallocate dev from %d to %d node\n",
490 dev
= rte_malloc_socket(NULL
, sizeof(*dev
), 0, newnode
);
496 memcpy(dev
, old_dev
, sizeof(*dev
));
501 dev
->virtqueue
[index
] = vq
;
502 vhost_devices
[dev
->vid
] = dev
;
505 vhost_user_iotlb_init(dev
, index
);
510 static struct virtio_net
*
511 numa_realloc(struct virtio_net
*dev
, int index __rte_unused
)
517 /* Converts QEMU virtual address to Vhost virtual address. */
519 qva_to_vva(struct virtio_net
*dev
, uint64_t qva
, uint64_t *len
)
521 struct rte_vhost_mem_region
*r
;
524 if (unlikely(!dev
|| !dev
->mem
))
527 /* Find the region where the address lives. */
528 for (i
= 0; i
< dev
->mem
->nregions
; i
++) {
529 r
= &dev
->mem
->regions
[i
];
531 if (qva
>= r
->guest_user_addr
&&
532 qva
< r
->guest_user_addr
+ r
->size
) {
534 if (unlikely(*len
> r
->guest_user_addr
+ r
->size
- qva
))
535 *len
= r
->guest_user_addr
+ r
->size
- qva
;
537 return qva
- r
->guest_user_addr
+
549 * Converts ring address to Vhost virtual address.
550 * If IOMMU is enabled, the ring address is a guest IO virtual address,
551 * else it is a QEMU virtual address.
554 ring_addr_to_vva(struct virtio_net
*dev
, struct vhost_virtqueue
*vq
,
555 uint64_t ra
, uint64_t *size
)
557 if (dev
->features
& (1ULL << VIRTIO_F_IOMMU_PLATFORM
)) {
560 vva
= vhost_user_iotlb_cache_find(vq
, ra
,
561 size
, VHOST_ACCESS_RW
);
563 vhost_user_iotlb_miss(dev
, ra
, VHOST_ACCESS_RW
);
568 return qva_to_vva(dev
, ra
, size
);
571 static struct virtio_net
*
572 translate_ring_addresses(struct virtio_net
*dev
, int vq_index
)
574 struct vhost_virtqueue
*vq
= dev
->virtqueue
[vq_index
];
575 struct vhost_vring_addr
*addr
= &vq
->ring_addrs
;
576 uint64_t len
, expected_len
;
578 if (vq_is_packed(dev
)) {
579 len
= sizeof(struct vring_packed_desc
) * vq
->size
;
580 vq
->desc_packed
= (struct vring_packed_desc
*)(uintptr_t)
581 ring_addr_to_vva(dev
, vq
, addr
->desc_user_addr
, &len
);
582 vq
->log_guest_addr
= 0;
583 if (vq
->desc_packed
== NULL
||
584 len
!= sizeof(struct vring_packed_desc
) *
586 RTE_LOG(DEBUG
, VHOST_CONFIG
,
587 "(%d) failed to map desc_packed ring.\n",
592 dev
= numa_realloc(dev
, vq_index
);
593 vq
= dev
->virtqueue
[vq_index
];
594 addr
= &vq
->ring_addrs
;
596 len
= sizeof(struct vring_packed_desc_event
);
597 vq
->driver_event
= (struct vring_packed_desc_event
*)
598 (uintptr_t)ring_addr_to_vva(dev
,
599 vq
, addr
->avail_user_addr
, &len
);
600 if (vq
->driver_event
== NULL
||
601 len
!= sizeof(struct vring_packed_desc_event
)) {
602 RTE_LOG(DEBUG
, VHOST_CONFIG
,
603 "(%d) failed to find driver area address.\n",
608 len
= sizeof(struct vring_packed_desc_event
);
609 vq
->device_event
= (struct vring_packed_desc_event
*)
610 (uintptr_t)ring_addr_to_vva(dev
,
611 vq
, addr
->used_user_addr
, &len
);
612 if (vq
->device_event
== NULL
||
613 len
!= sizeof(struct vring_packed_desc_event
)) {
614 RTE_LOG(DEBUG
, VHOST_CONFIG
,
615 "(%d) failed to find device area address.\n",
623 /* The addresses are converted from QEMU virtual to Vhost virtual. */
624 if (vq
->desc
&& vq
->avail
&& vq
->used
)
627 len
= sizeof(struct vring_desc
) * vq
->size
;
628 vq
->desc
= (struct vring_desc
*)(uintptr_t)ring_addr_to_vva(dev
,
629 vq
, addr
->desc_user_addr
, &len
);
630 if (vq
->desc
== 0 || len
!= sizeof(struct vring_desc
) * vq
->size
) {
631 RTE_LOG(DEBUG
, VHOST_CONFIG
,
632 "(%d) failed to map desc ring.\n",
637 dev
= numa_realloc(dev
, vq_index
);
638 vq
= dev
->virtqueue
[vq_index
];
639 addr
= &vq
->ring_addrs
;
641 len
= sizeof(struct vring_avail
) + sizeof(uint16_t) * vq
->size
;
642 if (dev
->features
& (1ULL << VIRTIO_RING_F_EVENT_IDX
))
643 len
+= sizeof(uint16_t);
645 vq
->avail
= (struct vring_avail
*)(uintptr_t)ring_addr_to_vva(dev
,
646 vq
, addr
->avail_user_addr
, &len
);
647 if (vq
->avail
== 0 || len
!= expected_len
) {
648 RTE_LOG(DEBUG
, VHOST_CONFIG
,
649 "(%d) failed to map avail ring.\n",
654 len
= sizeof(struct vring_used
) +
655 sizeof(struct vring_used_elem
) * vq
->size
;
656 if (dev
->features
& (1ULL << VIRTIO_RING_F_EVENT_IDX
))
657 len
+= sizeof(uint16_t);
659 vq
->used
= (struct vring_used
*)(uintptr_t)ring_addr_to_vva(dev
,
660 vq
, addr
->used_user_addr
, &len
);
661 if (vq
->used
== 0 || len
!= expected_len
) {
662 RTE_LOG(DEBUG
, VHOST_CONFIG
,
663 "(%d) failed to map used ring.\n",
668 if (vq
->last_used_idx
!= vq
->used
->idx
) {
669 RTE_LOG(WARNING
, VHOST_CONFIG
,
670 "last_used_idx (%u) and vq->used->idx (%u) mismatches; "
671 "some packets maybe resent for Tx and dropped for Rx\n",
672 vq
->last_used_idx
, vq
->used
->idx
);
673 vq
->last_used_idx
= vq
->used
->idx
;
674 vq
->last_avail_idx
= vq
->used
->idx
;
677 vq
->log_guest_addr
= addr
->log_guest_addr
;
679 VHOST_LOG_DEBUG(VHOST_CONFIG
, "(%d) mapped address desc: %p\n",
681 VHOST_LOG_DEBUG(VHOST_CONFIG
, "(%d) mapped address avail: %p\n",
682 dev
->vid
, vq
->avail
);
683 VHOST_LOG_DEBUG(VHOST_CONFIG
, "(%d) mapped address used: %p\n",
685 VHOST_LOG_DEBUG(VHOST_CONFIG
, "(%d) log_guest_addr: %" PRIx64
"\n",
686 dev
->vid
, vq
->log_guest_addr
);
692 * The virtio device sends us the desc, used and avail ring addresses.
693 * This function then converts these to our address space.
696 vhost_user_set_vring_addr(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
697 int main_fd __rte_unused
)
699 struct virtio_net
*dev
= *pdev
;
700 struct vhost_virtqueue
*vq
;
701 struct vhost_vring_addr
*addr
= &msg
->payload
.addr
;
703 if (dev
->mem
== NULL
)
704 return RTE_VHOST_MSG_RESULT_ERR
;
706 /* addr->index refers to the queue index. The txq 1, rxq is 0. */
707 vq
= dev
->virtqueue
[msg
->payload
.addr
.index
];
710 * Rings addresses should not be interpreted as long as the ring is not
711 * started and enabled
713 memcpy(&vq
->ring_addrs
, addr
, sizeof(*addr
));
715 vring_invalidate(dev
, vq
);
717 if (vq
->enabled
&& (dev
->features
&
718 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES
))) {
719 dev
= translate_ring_addresses(dev
, msg
->payload
.addr
.index
);
721 return RTE_VHOST_MSG_RESULT_ERR
;
726 return RTE_VHOST_MSG_RESULT_OK
;
730 * The virtio device sends us the available ring last used index.
733 vhost_user_set_vring_base(struct virtio_net
**pdev
,
734 struct VhostUserMsg
*msg
,
735 int main_fd __rte_unused
)
737 struct virtio_net
*dev
= *pdev
;
738 struct vhost_virtqueue
*vq
= dev
->virtqueue
[msg
->payload
.state
.index
];
739 uint64_t val
= msg
->payload
.state
.num
;
741 if (vq_is_packed(dev
)) {
743 * Bit[0:14]: avail index
744 * Bit[15]: avail wrap counter
746 vq
->last_avail_idx
= val
& 0x7fff;
747 vq
->avail_wrap_counter
= !!(val
& (0x1 << 15));
749 * Set used index to same value as available one, as
750 * their values should be the same since ring processing
751 * was stopped at get time.
753 vq
->last_used_idx
= vq
->last_avail_idx
;
754 vq
->used_wrap_counter
= vq
->avail_wrap_counter
;
756 vq
->last_used_idx
= msg
->payload
.state
.num
;
757 vq
->last_avail_idx
= msg
->payload
.state
.num
;
760 return RTE_VHOST_MSG_RESULT_OK
;
764 add_one_guest_page(struct virtio_net
*dev
, uint64_t guest_phys_addr
,
765 uint64_t host_phys_addr
, uint64_t size
)
767 struct guest_page
*page
, *last_page
;
768 struct guest_page
*old_pages
;
770 if (dev
->nr_guest_pages
== dev
->max_guest_pages
) {
771 dev
->max_guest_pages
*= 2;
772 old_pages
= dev
->guest_pages
;
773 dev
->guest_pages
= realloc(dev
->guest_pages
,
774 dev
->max_guest_pages
* sizeof(*page
));
775 if (!dev
->guest_pages
) {
776 RTE_LOG(ERR
, VHOST_CONFIG
, "cannot realloc guest_pages\n");
782 if (dev
->nr_guest_pages
> 0) {
783 last_page
= &dev
->guest_pages
[dev
->nr_guest_pages
- 1];
784 /* merge if the two pages are continuous */
785 if (host_phys_addr
== last_page
->host_phys_addr
+
787 last_page
->size
+= size
;
792 page
= &dev
->guest_pages
[dev
->nr_guest_pages
++];
793 page
->guest_phys_addr
= guest_phys_addr
;
794 page
->host_phys_addr
= host_phys_addr
;
801 add_guest_pages(struct virtio_net
*dev
, struct rte_vhost_mem_region
*reg
,
804 uint64_t reg_size
= reg
->size
;
805 uint64_t host_user_addr
= reg
->host_user_addr
;
806 uint64_t guest_phys_addr
= reg
->guest_phys_addr
;
807 uint64_t host_phys_addr
;
810 host_phys_addr
= rte_mem_virt2iova((void *)(uintptr_t)host_user_addr
);
811 size
= page_size
- (guest_phys_addr
& (page_size
- 1));
812 size
= RTE_MIN(size
, reg_size
);
814 if (add_one_guest_page(dev
, guest_phys_addr
, host_phys_addr
, size
) < 0)
817 host_user_addr
+= size
;
818 guest_phys_addr
+= size
;
821 while (reg_size
> 0) {
822 size
= RTE_MIN(reg_size
, page_size
);
823 host_phys_addr
= rte_mem_virt2iova((void *)(uintptr_t)
825 if (add_one_guest_page(dev
, guest_phys_addr
, host_phys_addr
,
829 host_user_addr
+= size
;
830 guest_phys_addr
+= size
;
837 #ifdef RTE_LIBRTE_VHOST_DEBUG
838 /* TODO: enable it only in debug mode? */
840 dump_guest_pages(struct virtio_net
*dev
)
843 struct guest_page
*page
;
845 for (i
= 0; i
< dev
->nr_guest_pages
; i
++) {
846 page
= &dev
->guest_pages
[i
];
848 RTE_LOG(INFO
, VHOST_CONFIG
,
849 "guest physical page region %u\n"
850 "\t guest_phys_addr: %" PRIx64
"\n"
851 "\t host_phys_addr : %" PRIx64
"\n"
852 "\t size : %" PRIx64
"\n",
854 page
->guest_phys_addr
,
855 page
->host_phys_addr
,
860 #define dump_guest_pages(dev)
864 vhost_memory_changed(struct VhostUserMemory
*new,
865 struct rte_vhost_memory
*old
)
869 if (new->nregions
!= old
->nregions
)
872 for (i
= 0; i
< new->nregions
; ++i
) {
873 VhostUserMemoryRegion
*new_r
= &new->regions
[i
];
874 struct rte_vhost_mem_region
*old_r
= &old
->regions
[i
];
876 if (new_r
->guest_phys_addr
!= old_r
->guest_phys_addr
)
878 if (new_r
->memory_size
!= old_r
->size
)
880 if (new_r
->userspace_addr
!= old_r
->guest_user_addr
)
888 vhost_user_set_mem_table(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
891 struct virtio_net
*dev
= *pdev
;
892 struct VhostUserMemory
*memory
= &msg
->payload
.memory
;
893 struct rte_vhost_mem_region
*reg
;
896 uint64_t mmap_offset
;
902 if (memory
->nregions
> VHOST_MEMORY_MAX_NREGIONS
) {
903 RTE_LOG(ERR
, VHOST_CONFIG
,
904 "too many memory regions (%u)\n", memory
->nregions
);
905 return RTE_VHOST_MSG_RESULT_ERR
;
908 if (dev
->mem
&& !vhost_memory_changed(memory
, dev
->mem
)) {
909 RTE_LOG(INFO
, VHOST_CONFIG
,
910 "(%d) memory regions not changed\n", dev
->vid
);
912 for (i
= 0; i
< memory
->nregions
; i
++)
915 return RTE_VHOST_MSG_RESULT_OK
;
919 free_mem_region(dev
);
924 /* Flush IOTLB cache as previous HVAs are now invalid */
925 if (dev
->features
& (1ULL << VIRTIO_F_IOMMU_PLATFORM
))
926 for (i
= 0; i
< dev
->nr_vring
; i
++)
927 vhost_user_iotlb_flush_all(dev
->virtqueue
[i
]);
929 dev
->nr_guest_pages
= 0;
930 if (!dev
->guest_pages
) {
931 dev
->max_guest_pages
= 8;
932 dev
->guest_pages
= malloc(dev
->max_guest_pages
*
933 sizeof(struct guest_page
));
934 if (dev
->guest_pages
== NULL
) {
935 RTE_LOG(ERR
, VHOST_CONFIG
,
936 "(%d) failed to allocate memory "
937 "for dev->guest_pages\n",
939 return RTE_VHOST_MSG_RESULT_ERR
;
943 dev
->mem
= rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory
) +
944 sizeof(struct rte_vhost_mem_region
) * memory
->nregions
, 0);
945 if (dev
->mem
== NULL
) {
946 RTE_LOG(ERR
, VHOST_CONFIG
,
947 "(%d) failed to allocate memory for dev->mem\n",
949 return RTE_VHOST_MSG_RESULT_ERR
;
951 dev
->mem
->nregions
= memory
->nregions
;
953 for (i
= 0; i
< memory
->nregions
; i
++) {
955 reg
= &dev
->mem
->regions
[i
];
957 reg
->guest_phys_addr
= memory
->regions
[i
].guest_phys_addr
;
958 reg
->guest_user_addr
= memory
->regions
[i
].userspace_addr
;
959 reg
->size
= memory
->regions
[i
].memory_size
;
962 mmap_offset
= memory
->regions
[i
].mmap_offset
;
964 /* Check for memory_size + mmap_offset overflow */
965 if (mmap_offset
>= -reg
->size
) {
966 RTE_LOG(ERR
, VHOST_CONFIG
,
967 "mmap_offset (%#"PRIx64
") and memory_size "
968 "(%#"PRIx64
") overflow\n",
969 mmap_offset
, reg
->size
);
973 mmap_size
= reg
->size
+ mmap_offset
;
975 /* mmap() without flag of MAP_ANONYMOUS, should be called
976 * with length argument aligned with hugepagesz at older
977 * longterm version Linux, like 2.6.32 and 3.2.72, or
978 * mmap() will fail with EINVAL.
980 * to avoid failure, make sure in caller to keep length
983 alignment
= get_blk_size(fd
);
984 if (alignment
== (uint64_t)-1) {
985 RTE_LOG(ERR
, VHOST_CONFIG
,
986 "couldn't get hugepage size through fstat\n");
989 mmap_size
= RTE_ALIGN_CEIL(mmap_size
, alignment
);
991 populate
= (dev
->dequeue_zero_copy
) ? MAP_POPULATE
: 0;
992 mmap_addr
= mmap(NULL
, mmap_size
, PROT_READ
| PROT_WRITE
,
993 MAP_SHARED
| populate
, fd
, 0);
995 if (mmap_addr
== MAP_FAILED
) {
996 RTE_LOG(ERR
, VHOST_CONFIG
,
997 "mmap region %u failed.\n", i
);
1001 reg
->mmap_addr
= mmap_addr
;
1002 reg
->mmap_size
= mmap_size
;
1003 reg
->host_user_addr
= (uint64_t)(uintptr_t)mmap_addr
+
1006 if (dev
->dequeue_zero_copy
)
1007 if (add_guest_pages(dev
, reg
, alignment
) < 0) {
1008 RTE_LOG(ERR
, VHOST_CONFIG
,
1009 "adding guest pages to region %u failed.\n",
1014 RTE_LOG(INFO
, VHOST_CONFIG
,
1015 "guest memory region %u, size: 0x%" PRIx64
"\n"
1016 "\t guest physical addr: 0x%" PRIx64
"\n"
1017 "\t guest virtual addr: 0x%" PRIx64
"\n"
1018 "\t host virtual addr: 0x%" PRIx64
"\n"
1019 "\t mmap addr : 0x%" PRIx64
"\n"
1020 "\t mmap size : 0x%" PRIx64
"\n"
1021 "\t mmap align: 0x%" PRIx64
"\n"
1022 "\t mmap off : 0x%" PRIx64
"\n",
1024 reg
->guest_phys_addr
,
1025 reg
->guest_user_addr
,
1026 reg
->host_user_addr
,
1027 (uint64_t)(uintptr_t)mmap_addr
,
1032 if (dev
->postcopy_listening
) {
1034 * We haven't a better way right now than sharing
1035 * DPDK's virtual address with Qemu, so that Qemu can
1036 * retrieve the region offset when handling userfaults.
1038 memory
->regions
[i
].userspace_addr
=
1039 reg
->host_user_addr
;
1042 if (dev
->postcopy_listening
) {
1043 /* Send the addresses back to qemu */
1045 send_vhost_reply(main_fd
, msg
);
1047 /* Wait for qemu to acknolwedge it's got the addresses
1048 * we've got to wait before we're allowed to generate faults.
1050 VhostUserMsg ack_msg
;
1051 if (read_vhost_message(main_fd
, &ack_msg
) <= 0) {
1052 RTE_LOG(ERR
, VHOST_CONFIG
,
1053 "Failed to read qemu ack on postcopy set-mem-table\n");
1056 if (ack_msg
.request
.master
!= VHOST_USER_SET_MEM_TABLE
) {
1057 RTE_LOG(ERR
, VHOST_CONFIG
,
1058 "Bad qemu ack on postcopy set-mem-table (%d)\n",
1059 ack_msg
.request
.master
);
1063 /* Now userfault register and we can use the memory */
1064 for (i
= 0; i
< memory
->nregions
; i
++) {
1065 #ifdef RTE_LIBRTE_VHOST_POSTCOPY
1066 reg
= &dev
->mem
->regions
[i
];
1067 struct uffdio_register reg_struct
;
1070 * Let's register all the mmap'ed area to ensure
1071 * alignment on page boundary.
1073 reg_struct
.range
.start
=
1074 (uint64_t)(uintptr_t)reg
->mmap_addr
;
1075 reg_struct
.range
.len
= reg
->mmap_size
;
1076 reg_struct
.mode
= UFFDIO_REGISTER_MODE_MISSING
;
1078 if (ioctl(dev
->postcopy_ufd
, UFFDIO_REGISTER
,
1080 RTE_LOG(ERR
, VHOST_CONFIG
,
1081 "Failed to register ufd for region %d: (ufd = %d) %s\n",
1082 i
, dev
->postcopy_ufd
,
1086 RTE_LOG(INFO
, VHOST_CONFIG
,
1087 "\t userfaultfd registered for range : %llx - %llx\n",
1088 reg_struct
.range
.start
,
1089 reg_struct
.range
.start
+
1090 reg_struct
.range
.len
- 1);
1097 for (i
= 0; i
< dev
->nr_vring
; i
++) {
1098 struct vhost_virtqueue
*vq
= dev
->virtqueue
[i
];
1100 if (vq
->desc
|| vq
->avail
|| vq
->used
) {
1102 * If the memory table got updated, the ring addresses
1103 * need to be translated again as virtual addresses have
1106 vring_invalidate(dev
, vq
);
1108 dev
= translate_ring_addresses(dev
, i
);
1118 dump_guest_pages(dev
);
1120 return RTE_VHOST_MSG_RESULT_OK
;
1123 free_mem_region(dev
);
1126 return RTE_VHOST_MSG_RESULT_ERR
;
1130 vq_is_ready(struct virtio_net
*dev
, struct vhost_virtqueue
*vq
)
1137 if (vq_is_packed(dev
))
1138 rings_ok
= !!vq
->desc_packed
;
1140 rings_ok
= vq
->desc
&& vq
->avail
&& vq
->used
;
1143 vq
->kickfd
!= VIRTIO_UNINITIALIZED_EVENTFD
&&
1144 vq
->callfd
!= VIRTIO_UNINITIALIZED_EVENTFD
;
1148 virtio_is_ready(struct virtio_net
*dev
)
1150 struct vhost_virtqueue
*vq
;
1153 if (dev
->nr_vring
== 0)
1156 for (i
= 0; i
< dev
->nr_vring
; i
++) {
1157 vq
= dev
->virtqueue
[i
];
1159 if (!vq_is_ready(dev
, vq
))
1163 RTE_LOG(INFO
, VHOST_CONFIG
,
1164 "virtio is now ready for processing.\n");
1169 vhost_user_set_vring_call(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1170 int main_fd __rte_unused
)
1172 struct virtio_net
*dev
= *pdev
;
1173 struct vhost_vring_file file
;
1174 struct vhost_virtqueue
*vq
;
1176 file
.index
= msg
->payload
.u64
& VHOST_USER_VRING_IDX_MASK
;
1177 if (msg
->payload
.u64
& VHOST_USER_VRING_NOFD_MASK
)
1178 file
.fd
= VIRTIO_INVALID_EVENTFD
;
1180 file
.fd
= msg
->fds
[0];
1181 RTE_LOG(INFO
, VHOST_CONFIG
,
1182 "vring call idx:%d file:%d\n", file
.index
, file
.fd
);
1184 vq
= dev
->virtqueue
[file
.index
];
1185 if (vq
->callfd
>= 0)
1188 vq
->callfd
= file
.fd
;
1190 return RTE_VHOST_MSG_RESULT_OK
;
1193 static int vhost_user_set_vring_err(struct virtio_net
**pdev __rte_unused
,
1194 struct VhostUserMsg
*msg
,
1195 int main_fd __rte_unused
)
1197 if (!(msg
->payload
.u64
& VHOST_USER_VRING_NOFD_MASK
))
1199 RTE_LOG(INFO
, VHOST_CONFIG
, "not implemented\n");
1201 return RTE_VHOST_MSG_RESULT_OK
;
1205 vhost_user_set_vring_kick(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1206 int main_fd __rte_unused
)
1208 struct virtio_net
*dev
= *pdev
;
1209 struct vhost_vring_file file
;
1210 struct vhost_virtqueue
*vq
;
1212 file
.index
= msg
->payload
.u64
& VHOST_USER_VRING_IDX_MASK
;
1213 if (msg
->payload
.u64
& VHOST_USER_VRING_NOFD_MASK
)
1214 file
.fd
= VIRTIO_INVALID_EVENTFD
;
1216 file
.fd
= msg
->fds
[0];
1217 RTE_LOG(INFO
, VHOST_CONFIG
,
1218 "vring kick idx:%d file:%d\n", file
.index
, file
.fd
);
1220 /* Interpret ring addresses only when ring is started. */
1221 dev
= translate_ring_addresses(dev
, file
.index
);
1223 return RTE_VHOST_MSG_RESULT_ERR
;
1227 vq
= dev
->virtqueue
[file
.index
];
1230 * When VHOST_USER_F_PROTOCOL_FEATURES is not negotiated,
1231 * the ring starts already enabled. Otherwise, it is enabled via
1232 * the SET_VRING_ENABLE message.
1234 if (!(dev
->features
& (1ULL << VHOST_USER_F_PROTOCOL_FEATURES
))) {
1236 if (dev
->notify_ops
->vring_state_changed
)
1237 dev
->notify_ops
->vring_state_changed(
1238 dev
->vid
, file
.index
, 1);
1241 if (vq
->kickfd
>= 0)
1243 vq
->kickfd
= file
.fd
;
1245 return RTE_VHOST_MSG_RESULT_OK
;
1249 free_zmbufs(struct vhost_virtqueue
*vq
)
1251 drain_zmbuf_list(vq
);
1253 rte_free(vq
->zmbufs
);
1257 * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
1260 vhost_user_get_vring_base(struct virtio_net
**pdev
,
1261 struct VhostUserMsg
*msg
,
1262 int main_fd __rte_unused
)
1264 struct virtio_net
*dev
= *pdev
;
1265 struct vhost_virtqueue
*vq
= dev
->virtqueue
[msg
->payload
.state
.index
];
1268 /* We have to stop the queue (virtio) if it is running. */
1269 vhost_destroy_device_notify(dev
);
1271 dev
->flags
&= ~VIRTIO_DEV_READY
;
1272 dev
->flags
&= ~VIRTIO_DEV_VDPA_CONFIGURED
;
1274 /* Here we are safe to get the indexes */
1275 if (vq_is_packed(dev
)) {
1277 * Bit[0:14]: avail index
1278 * Bit[15]: avail wrap counter
1280 val
= vq
->last_avail_idx
& 0x7fff;
1281 val
|= vq
->avail_wrap_counter
<< 15;
1282 msg
->payload
.state
.num
= val
;
1284 msg
->payload
.state
.num
= vq
->last_avail_idx
;
1287 RTE_LOG(INFO
, VHOST_CONFIG
,
1288 "vring base idx:%d file:%d\n", msg
->payload
.state
.index
,
1289 msg
->payload
.state
.num
);
1291 * Based on current qemu vhost-user implementation, this message is
1292 * sent and only sent in vhost_vring_stop.
1293 * TODO: cleanup the vring, it isn't usable since here.
1295 if (vq
->kickfd
>= 0)
1298 vq
->kickfd
= VIRTIO_UNINITIALIZED_EVENTFD
;
1300 if (vq
->callfd
>= 0)
1303 vq
->callfd
= VIRTIO_UNINITIALIZED_EVENTFD
;
1305 vq
->signalled_used_valid
= false;
1307 if (dev
->dequeue_zero_copy
)
1309 if (vq_is_packed(dev
)) {
1310 rte_free(vq
->shadow_used_packed
);
1311 vq
->shadow_used_packed
= NULL
;
1313 rte_free(vq
->shadow_used_split
);
1314 vq
->shadow_used_split
= NULL
;
1317 rte_free(vq
->batch_copy_elems
);
1318 vq
->batch_copy_elems
= NULL
;
1320 msg
->size
= sizeof(msg
->payload
.state
);
1323 return RTE_VHOST_MSG_RESULT_REPLY
;
1327 * when virtio queues are ready to work, qemu will send us to
1328 * enable the virtio queue pair.
1331 vhost_user_set_vring_enable(struct virtio_net
**pdev
,
1332 struct VhostUserMsg
*msg
,
1333 int main_fd __rte_unused
)
1335 struct virtio_net
*dev
= *pdev
;
1336 int enable
= (int)msg
->payload
.state
.num
;
1337 int index
= (int)msg
->payload
.state
.index
;
1338 struct rte_vdpa_device
*vdpa_dev
;
1341 RTE_LOG(INFO
, VHOST_CONFIG
,
1342 "set queue enable: %d to qp idx: %d\n",
1345 did
= dev
->vdpa_dev_id
;
1346 vdpa_dev
= rte_vdpa_get_device(did
);
1347 if (vdpa_dev
&& vdpa_dev
->ops
->set_vring_state
)
1348 vdpa_dev
->ops
->set_vring_state(dev
->vid
, index
, enable
);
1350 if (dev
->notify_ops
->vring_state_changed
)
1351 dev
->notify_ops
->vring_state_changed(dev
->vid
,
1354 /* On disable, rings have to be stopped being processed. */
1355 if (!enable
&& dev
->dequeue_zero_copy
)
1356 drain_zmbuf_list(dev
->virtqueue
[index
]);
1358 dev
->virtqueue
[index
]->enabled
= enable
;
1360 return RTE_VHOST_MSG_RESULT_OK
;
1364 vhost_user_get_protocol_features(struct virtio_net
**pdev
,
1365 struct VhostUserMsg
*msg
,
1366 int main_fd __rte_unused
)
1368 struct virtio_net
*dev
= *pdev
;
1369 uint64_t features
, protocol_features
;
1371 rte_vhost_driver_get_features(dev
->ifname
, &features
);
1372 rte_vhost_driver_get_protocol_features(dev
->ifname
, &protocol_features
);
1375 * REPLY_ACK protocol feature is only mandatory for now
1376 * for IOMMU feature. If IOMMU is explicitly disabled by the
1377 * application, disable also REPLY_ACK feature for older buggy
1378 * Qemu versions (from v2.7.0 to v2.9.0).
1380 if (!(features
& (1ULL << VIRTIO_F_IOMMU_PLATFORM
)))
1381 protocol_features
&= ~(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK
);
1383 msg
->payload
.u64
= protocol_features
;
1384 msg
->size
= sizeof(msg
->payload
.u64
);
1387 return RTE_VHOST_MSG_RESULT_REPLY
;
1391 vhost_user_set_protocol_features(struct virtio_net
**pdev
,
1392 struct VhostUserMsg
*msg
,
1393 int main_fd __rte_unused
)
1395 struct virtio_net
*dev
= *pdev
;
1396 uint64_t protocol_features
= msg
->payload
.u64
;
1397 uint64_t slave_protocol_features
= 0;
1399 rte_vhost_driver_get_protocol_features(dev
->ifname
,
1400 &slave_protocol_features
);
1401 if (protocol_features
& ~slave_protocol_features
) {
1402 RTE_LOG(ERR
, VHOST_CONFIG
,
1403 "(%d) received invalid protocol features.\n",
1405 return RTE_VHOST_MSG_RESULT_ERR
;
1408 dev
->protocol_features
= protocol_features
;
1410 return RTE_VHOST_MSG_RESULT_OK
;
1414 vhost_user_set_log_base(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1415 int main_fd __rte_unused
)
1417 struct virtio_net
*dev
= *pdev
;
1418 int fd
= msg
->fds
[0];
1423 RTE_LOG(ERR
, VHOST_CONFIG
, "invalid log fd: %d\n", fd
);
1424 return RTE_VHOST_MSG_RESULT_ERR
;
1427 if (msg
->size
!= sizeof(VhostUserLog
)) {
1428 RTE_LOG(ERR
, VHOST_CONFIG
,
1429 "invalid log base msg size: %"PRId32
" != %d\n",
1430 msg
->size
, (int)sizeof(VhostUserLog
));
1431 return RTE_VHOST_MSG_RESULT_ERR
;
1434 size
= msg
->payload
.log
.mmap_size
;
1435 off
= msg
->payload
.log
.mmap_offset
;
1437 /* Don't allow mmap_offset to point outside the mmap region */
1439 RTE_LOG(ERR
, VHOST_CONFIG
,
1440 "log offset %#"PRIx64
" exceeds log size %#"PRIx64
"\n",
1442 return RTE_VHOST_MSG_RESULT_ERR
;
1445 RTE_LOG(INFO
, VHOST_CONFIG
,
1446 "log mmap size: %"PRId64
", offset: %"PRId64
"\n",
1450 * mmap from 0 to workaround a hugepage mmap bug: mmap will
1451 * fail when offset is not page size aligned.
1453 addr
= mmap(0, size
+ off
, PROT_READ
| PROT_WRITE
, MAP_SHARED
, fd
, 0);
1455 if (addr
== MAP_FAILED
) {
1456 RTE_LOG(ERR
, VHOST_CONFIG
, "mmap log base failed!\n");
1457 return RTE_VHOST_MSG_RESULT_ERR
;
1461 * Free previously mapped log memory on occasionally
1462 * multiple VHOST_USER_SET_LOG_BASE.
1464 if (dev
->log_addr
) {
1465 munmap((void *)(uintptr_t)dev
->log_addr
, dev
->log_size
);
1467 dev
->log_addr
= (uint64_t)(uintptr_t)addr
;
1468 dev
->log_base
= dev
->log_addr
+ off
;
1469 dev
->log_size
= size
;
1472 * The spec is not clear about it (yet), but QEMU doesn't expect
1473 * any payload in the reply.
1478 return RTE_VHOST_MSG_RESULT_REPLY
;
1481 static int vhost_user_set_log_fd(struct virtio_net
**pdev __rte_unused
,
1482 struct VhostUserMsg
*msg
,
1483 int main_fd __rte_unused
)
1486 RTE_LOG(INFO
, VHOST_CONFIG
, "not implemented.\n");
1488 return RTE_VHOST_MSG_RESULT_OK
;
1492 * An rarp packet is constructed and broadcasted to notify switches about
1493 * the new location of the migrated VM, so that packets from outside will
1494 * not be lost after migration.
1496 * However, we don't actually "send" a rarp packet here, instead, we set
1497 * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
1500 vhost_user_send_rarp(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1501 int main_fd __rte_unused
)
1503 struct virtio_net
*dev
= *pdev
;
1504 uint8_t *mac
= (uint8_t *)&msg
->payload
.u64
;
1505 struct rte_vdpa_device
*vdpa_dev
;
1508 RTE_LOG(DEBUG
, VHOST_CONFIG
,
1509 ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n",
1510 mac
[0], mac
[1], mac
[2], mac
[3], mac
[4], mac
[5]);
1511 memcpy(dev
->mac
.addr_bytes
, mac
, 6);
1514 * Set the flag to inject a RARP broadcast packet at
1515 * rte_vhost_dequeue_burst().
1517 * rte_smp_wmb() is for making sure the mac is copied
1518 * before the flag is set.
1521 rte_atomic16_set(&dev
->broadcast_rarp
, 1);
1522 did
= dev
->vdpa_dev_id
;
1523 vdpa_dev
= rte_vdpa_get_device(did
);
1524 if (vdpa_dev
&& vdpa_dev
->ops
->migration_done
)
1525 vdpa_dev
->ops
->migration_done(dev
->vid
);
1527 return RTE_VHOST_MSG_RESULT_OK
;
1531 vhost_user_net_set_mtu(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1532 int main_fd __rte_unused
)
1534 struct virtio_net
*dev
= *pdev
;
1535 if (msg
->payload
.u64
< VIRTIO_MIN_MTU
||
1536 msg
->payload
.u64
> VIRTIO_MAX_MTU
) {
1537 RTE_LOG(ERR
, VHOST_CONFIG
, "Invalid MTU size (%"PRIu64
")\n",
1540 return RTE_VHOST_MSG_RESULT_ERR
;
1543 dev
->mtu
= msg
->payload
.u64
;
1545 return RTE_VHOST_MSG_RESULT_OK
;
1549 vhost_user_set_req_fd(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1550 int main_fd __rte_unused
)
1552 struct virtio_net
*dev
= *pdev
;
1553 int fd
= msg
->fds
[0];
1556 RTE_LOG(ERR
, VHOST_CONFIG
,
1557 "Invalid file descriptor for slave channel (%d)\n",
1559 return RTE_VHOST_MSG_RESULT_ERR
;
1562 dev
->slave_req_fd
= fd
;
1564 return RTE_VHOST_MSG_RESULT_OK
;
1568 is_vring_iotlb_update(struct vhost_virtqueue
*vq
, struct vhost_iotlb_msg
*imsg
)
1570 struct vhost_vring_addr
*ra
;
1571 uint64_t start
, end
;
1574 end
= start
+ imsg
->size
;
1576 ra
= &vq
->ring_addrs
;
1577 if (ra
->desc_user_addr
>= start
&& ra
->desc_user_addr
< end
)
1579 if (ra
->avail_user_addr
>= start
&& ra
->avail_user_addr
< end
)
1581 if (ra
->used_user_addr
>= start
&& ra
->used_user_addr
< end
)
1588 is_vring_iotlb_invalidate(struct vhost_virtqueue
*vq
,
1589 struct vhost_iotlb_msg
*imsg
)
1591 uint64_t istart
, iend
, vstart
, vend
;
1593 istart
= imsg
->iova
;
1594 iend
= istart
+ imsg
->size
- 1;
1596 vstart
= (uintptr_t)vq
->desc
;
1597 vend
= vstart
+ sizeof(struct vring_desc
) * vq
->size
- 1;
1598 if (vstart
<= iend
&& istart
<= vend
)
1601 vstart
= (uintptr_t)vq
->avail
;
1602 vend
= vstart
+ sizeof(struct vring_avail
);
1603 vend
+= sizeof(uint16_t) * vq
->size
- 1;
1604 if (vstart
<= iend
&& istart
<= vend
)
1607 vstart
= (uintptr_t)vq
->used
;
1608 vend
= vstart
+ sizeof(struct vring_used
);
1609 vend
+= sizeof(struct vring_used_elem
) * vq
->size
- 1;
1610 if (vstart
<= iend
&& istart
<= vend
)
1617 vhost_user_iotlb_msg(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1618 int main_fd __rte_unused
)
1620 struct virtio_net
*dev
= *pdev
;
1621 struct vhost_iotlb_msg
*imsg
= &msg
->payload
.iotlb
;
1625 switch (imsg
->type
) {
1626 case VHOST_IOTLB_UPDATE
:
1628 vva
= qva_to_vva(dev
, imsg
->uaddr
, &len
);
1630 return RTE_VHOST_MSG_RESULT_ERR
;
1632 for (i
= 0; i
< dev
->nr_vring
; i
++) {
1633 struct vhost_virtqueue
*vq
= dev
->virtqueue
[i
];
1635 vhost_user_iotlb_cache_insert(vq
, imsg
->iova
, vva
,
1638 if (is_vring_iotlb_update(vq
, imsg
))
1639 *pdev
= dev
= translate_ring_addresses(dev
, i
);
1642 case VHOST_IOTLB_INVALIDATE
:
1643 for (i
= 0; i
< dev
->nr_vring
; i
++) {
1644 struct vhost_virtqueue
*vq
= dev
->virtqueue
[i
];
1646 vhost_user_iotlb_cache_remove(vq
, imsg
->iova
,
1649 if (is_vring_iotlb_invalidate(vq
, imsg
))
1650 vring_invalidate(dev
, vq
);
1654 RTE_LOG(ERR
, VHOST_CONFIG
, "Invalid IOTLB message type (%d)\n",
1656 return RTE_VHOST_MSG_RESULT_ERR
;
1659 return RTE_VHOST_MSG_RESULT_OK
;
1663 vhost_user_set_postcopy_advise(struct virtio_net
**pdev
,
1664 struct VhostUserMsg
*msg
,
1665 int main_fd __rte_unused
)
1667 struct virtio_net
*dev
= *pdev
;
1668 #ifdef RTE_LIBRTE_VHOST_POSTCOPY
1669 struct uffdio_api api_struct
;
1671 dev
->postcopy_ufd
= syscall(__NR_userfaultfd
, O_CLOEXEC
| O_NONBLOCK
);
1673 if (dev
->postcopy_ufd
== -1) {
1674 RTE_LOG(ERR
, VHOST_CONFIG
, "Userfaultfd not available: %s\n",
1676 return RTE_VHOST_MSG_RESULT_ERR
;
1678 api_struct
.api
= UFFD_API
;
1679 api_struct
.features
= 0;
1680 if (ioctl(dev
->postcopy_ufd
, UFFDIO_API
, &api_struct
)) {
1681 RTE_LOG(ERR
, VHOST_CONFIG
, "UFFDIO_API ioctl failure: %s\n",
1683 close(dev
->postcopy_ufd
);
1684 dev
->postcopy_ufd
= -1;
1685 return RTE_VHOST_MSG_RESULT_ERR
;
1687 msg
->fds
[0] = dev
->postcopy_ufd
;
1690 return RTE_VHOST_MSG_RESULT_REPLY
;
1692 dev
->postcopy_ufd
= -1;
1695 return RTE_VHOST_MSG_RESULT_ERR
;
1700 vhost_user_set_postcopy_listen(struct virtio_net
**pdev
,
1701 struct VhostUserMsg
*msg __rte_unused
,
1702 int main_fd __rte_unused
)
1704 struct virtio_net
*dev
= *pdev
;
1706 if (dev
->mem
&& dev
->mem
->nregions
) {
1707 RTE_LOG(ERR
, VHOST_CONFIG
,
1708 "Regions already registered at postcopy-listen\n");
1709 return RTE_VHOST_MSG_RESULT_ERR
;
1711 dev
->postcopy_listening
= 1;
1713 return RTE_VHOST_MSG_RESULT_OK
;
1717 vhost_user_postcopy_end(struct virtio_net
**pdev
, struct VhostUserMsg
*msg
,
1718 int main_fd __rte_unused
)
1720 struct virtio_net
*dev
= *pdev
;
1722 dev
->postcopy_listening
= 0;
1723 if (dev
->postcopy_ufd
>= 0) {
1724 close(dev
->postcopy_ufd
);
1725 dev
->postcopy_ufd
= -1;
1728 msg
->payload
.u64
= 0;
1729 msg
->size
= sizeof(msg
->payload
.u64
);
1732 return RTE_VHOST_MSG_RESULT_REPLY
;
1735 typedef int (*vhost_message_handler_t
)(struct virtio_net
**pdev
,
1736 struct VhostUserMsg
*msg
,
1738 static vhost_message_handler_t vhost_message_handlers
[VHOST_USER_MAX
] = {
1739 [VHOST_USER_NONE
] = NULL
,
1740 [VHOST_USER_GET_FEATURES
] = vhost_user_get_features
,
1741 [VHOST_USER_SET_FEATURES
] = vhost_user_set_features
,
1742 [VHOST_USER_SET_OWNER
] = vhost_user_set_owner
,
1743 [VHOST_USER_RESET_OWNER
] = vhost_user_reset_owner
,
1744 [VHOST_USER_SET_MEM_TABLE
] = vhost_user_set_mem_table
,
1745 [VHOST_USER_SET_LOG_BASE
] = vhost_user_set_log_base
,
1746 [VHOST_USER_SET_LOG_FD
] = vhost_user_set_log_fd
,
1747 [VHOST_USER_SET_VRING_NUM
] = vhost_user_set_vring_num
,
1748 [VHOST_USER_SET_VRING_ADDR
] = vhost_user_set_vring_addr
,
1749 [VHOST_USER_SET_VRING_BASE
] = vhost_user_set_vring_base
,
1750 [VHOST_USER_GET_VRING_BASE
] = vhost_user_get_vring_base
,
1751 [VHOST_USER_SET_VRING_KICK
] = vhost_user_set_vring_kick
,
1752 [VHOST_USER_SET_VRING_CALL
] = vhost_user_set_vring_call
,
1753 [VHOST_USER_SET_VRING_ERR
] = vhost_user_set_vring_err
,
1754 [VHOST_USER_GET_PROTOCOL_FEATURES
] = vhost_user_get_protocol_features
,
1755 [VHOST_USER_SET_PROTOCOL_FEATURES
] = vhost_user_set_protocol_features
,
1756 [VHOST_USER_GET_QUEUE_NUM
] = vhost_user_get_queue_num
,
1757 [VHOST_USER_SET_VRING_ENABLE
] = vhost_user_set_vring_enable
,
1758 [VHOST_USER_SEND_RARP
] = vhost_user_send_rarp
,
1759 [VHOST_USER_NET_SET_MTU
] = vhost_user_net_set_mtu
,
1760 [VHOST_USER_SET_SLAVE_REQ_FD
] = vhost_user_set_req_fd
,
1761 [VHOST_USER_IOTLB_MSG
] = vhost_user_iotlb_msg
,
1762 [VHOST_USER_POSTCOPY_ADVISE
] = vhost_user_set_postcopy_advise
,
1763 [VHOST_USER_POSTCOPY_LISTEN
] = vhost_user_set_postcopy_listen
,
1764 [VHOST_USER_POSTCOPY_END
] = vhost_user_postcopy_end
,
1768 /* return bytes# of read on success or negative val on failure. */
1770 read_vhost_message(int sockfd
, struct VhostUserMsg
*msg
)
1774 ret
= read_fd_message(sockfd
, (char *)msg
, VHOST_USER_HDR_SIZE
,
1775 msg
->fds
, VHOST_MEMORY_MAX_NREGIONS
, &msg
->fd_num
);
1780 if (msg
->size
> sizeof(msg
->payload
)) {
1781 RTE_LOG(ERR
, VHOST_CONFIG
,
1782 "invalid msg size: %d\n", msg
->size
);
1785 ret
= read(sockfd
, &msg
->payload
, msg
->size
);
1788 if (ret
!= (int)msg
->size
) {
1789 RTE_LOG(ERR
, VHOST_CONFIG
,
1790 "read control message failed\n");
1799 send_vhost_message(int sockfd
, struct VhostUserMsg
*msg
)
1804 return send_fd_message(sockfd
, (char *)msg
,
1805 VHOST_USER_HDR_SIZE
+ msg
->size
, msg
->fds
, msg
->fd_num
);
1809 send_vhost_reply(int sockfd
, struct VhostUserMsg
*msg
)
1814 msg
->flags
&= ~VHOST_USER_VERSION_MASK
;
1815 msg
->flags
&= ~VHOST_USER_NEED_REPLY
;
1816 msg
->flags
|= VHOST_USER_VERSION
;
1817 msg
->flags
|= VHOST_USER_REPLY_MASK
;
1819 return send_vhost_message(sockfd
, msg
);
1823 send_vhost_slave_message(struct virtio_net
*dev
, struct VhostUserMsg
*msg
)
1827 if (msg
->flags
& VHOST_USER_NEED_REPLY
)
1828 rte_spinlock_lock(&dev
->slave_req_lock
);
1830 ret
= send_vhost_message(dev
->slave_req_fd
, msg
);
1831 if (ret
< 0 && (msg
->flags
& VHOST_USER_NEED_REPLY
))
1832 rte_spinlock_unlock(&dev
->slave_req_lock
);
1838 * Allocate a queue pair if it hasn't been allocated yet
1841 vhost_user_check_and_alloc_queue_pair(struct virtio_net
*dev
,
1842 struct VhostUserMsg
*msg
)
1846 switch (msg
->request
.master
) {
1847 case VHOST_USER_SET_VRING_KICK
:
1848 case VHOST_USER_SET_VRING_CALL
:
1849 case VHOST_USER_SET_VRING_ERR
:
1850 vring_idx
= msg
->payload
.u64
& VHOST_USER_VRING_IDX_MASK
;
1852 case VHOST_USER_SET_VRING_NUM
:
1853 case VHOST_USER_SET_VRING_BASE
:
1854 case VHOST_USER_SET_VRING_ENABLE
:
1855 vring_idx
= msg
->payload
.state
.index
;
1857 case VHOST_USER_SET_VRING_ADDR
:
1858 vring_idx
= msg
->payload
.addr
.index
;
1864 if (vring_idx
>= VHOST_MAX_VRING
) {
1865 RTE_LOG(ERR
, VHOST_CONFIG
,
1866 "invalid vring index: %u\n", vring_idx
);
1870 if (dev
->virtqueue
[vring_idx
])
1873 return alloc_vring_queue(dev
, vring_idx
);
1877 vhost_user_lock_all_queue_pairs(struct virtio_net
*dev
)
1880 unsigned int vq_num
= 0;
1882 while (vq_num
< dev
->nr_vring
) {
1883 struct vhost_virtqueue
*vq
= dev
->virtqueue
[i
];
1886 rte_spinlock_lock(&vq
->access_lock
);
1894 vhost_user_unlock_all_queue_pairs(struct virtio_net
*dev
)
1897 unsigned int vq_num
= 0;
1899 while (vq_num
< dev
->nr_vring
) {
1900 struct vhost_virtqueue
*vq
= dev
->virtqueue
[i
];
1903 rte_spinlock_unlock(&vq
->access_lock
);
1911 vhost_user_msg_handler(int vid
, int fd
)
1913 struct virtio_net
*dev
;
1914 struct VhostUserMsg msg
;
1915 struct rte_vdpa_device
*vdpa_dev
;
1918 int unlock_required
= 0;
1922 dev
= get_device(vid
);
1926 if (!dev
->notify_ops
) {
1927 dev
->notify_ops
= vhost_driver_callback_get(dev
->ifname
);
1928 if (!dev
->notify_ops
) {
1929 RTE_LOG(ERR
, VHOST_CONFIG
,
1930 "failed to get callback ops for driver %s\n",
1936 ret
= read_vhost_message(fd
, &msg
);
1939 RTE_LOG(ERR
, VHOST_CONFIG
,
1940 "vhost read message failed\n");
1942 RTE_LOG(INFO
, VHOST_CONFIG
,
1943 "vhost peer closed\n");
1949 request
= msg
.request
.master
;
1950 if (request
> VHOST_USER_NONE
&& request
< VHOST_USER_MAX
&&
1951 vhost_message_str
[request
]) {
1952 if (request
!= VHOST_USER_IOTLB_MSG
)
1953 RTE_LOG(INFO
, VHOST_CONFIG
, "read message %s\n",
1954 vhost_message_str
[request
]);
1956 RTE_LOG(DEBUG
, VHOST_CONFIG
, "read message %s\n",
1957 vhost_message_str
[request
]);
1959 RTE_LOG(DEBUG
, VHOST_CONFIG
, "External request %d\n", request
);
1962 ret
= vhost_user_check_and_alloc_queue_pair(dev
, &msg
);
1964 RTE_LOG(ERR
, VHOST_CONFIG
,
1965 "failed to alloc queue\n");
1970 * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE
1971 * and VHOST_USER_RESET_OWNER, since it is sent when virtio stops
1972 * and device is destroyed. destroy_device waits for queues to be
1973 * inactive, so it is safe. Otherwise taking the access_lock
1974 * would cause a dead lock.
1977 case VHOST_USER_SET_FEATURES
:
1978 case VHOST_USER_SET_PROTOCOL_FEATURES
:
1979 case VHOST_USER_SET_OWNER
:
1980 case VHOST_USER_SET_MEM_TABLE
:
1981 case VHOST_USER_SET_LOG_BASE
:
1982 case VHOST_USER_SET_LOG_FD
:
1983 case VHOST_USER_SET_VRING_NUM
:
1984 case VHOST_USER_SET_VRING_ADDR
:
1985 case VHOST_USER_SET_VRING_BASE
:
1986 case VHOST_USER_SET_VRING_KICK
:
1987 case VHOST_USER_SET_VRING_CALL
:
1988 case VHOST_USER_SET_VRING_ERR
:
1989 case VHOST_USER_SET_VRING_ENABLE
:
1990 case VHOST_USER_SEND_RARP
:
1991 case VHOST_USER_NET_SET_MTU
:
1992 case VHOST_USER_SET_SLAVE_REQ_FD
:
1993 vhost_user_lock_all_queue_pairs(dev
);
1994 unlock_required
= 1;
2002 if (dev
->extern_ops
.pre_msg_handle
) {
2003 ret
= (*dev
->extern_ops
.pre_msg_handle
)(dev
->vid
,
2006 case RTE_VHOST_MSG_RESULT_REPLY
:
2007 send_vhost_reply(fd
, &msg
);
2009 case RTE_VHOST_MSG_RESULT_ERR
:
2010 case RTE_VHOST_MSG_RESULT_OK
:
2012 goto skip_to_post_handle
;
2013 case RTE_VHOST_MSG_RESULT_NOT_HANDLED
:
2019 if (request
> VHOST_USER_NONE
&& request
< VHOST_USER_MAX
) {
2020 if (!vhost_message_handlers
[request
])
2021 goto skip_to_post_handle
;
2022 ret
= vhost_message_handlers
[request
](&dev
, &msg
, fd
);
2025 case RTE_VHOST_MSG_RESULT_ERR
:
2026 RTE_LOG(ERR
, VHOST_CONFIG
,
2027 "Processing %s failed.\n",
2028 vhost_message_str
[request
]);
2031 case RTE_VHOST_MSG_RESULT_OK
:
2032 RTE_LOG(DEBUG
, VHOST_CONFIG
,
2033 "Processing %s succeeded.\n",
2034 vhost_message_str
[request
]);
2037 case RTE_VHOST_MSG_RESULT_REPLY
:
2038 RTE_LOG(DEBUG
, VHOST_CONFIG
,
2039 "Processing %s succeeded and needs reply.\n",
2040 vhost_message_str
[request
]);
2041 send_vhost_reply(fd
, &msg
);
2049 skip_to_post_handle
:
2050 if (ret
!= RTE_VHOST_MSG_RESULT_ERR
&&
2051 dev
->extern_ops
.post_msg_handle
) {
2052 ret
= (*dev
->extern_ops
.post_msg_handle
)(dev
->vid
,
2055 case RTE_VHOST_MSG_RESULT_REPLY
:
2056 send_vhost_reply(fd
, &msg
);
2058 case RTE_VHOST_MSG_RESULT_ERR
:
2059 case RTE_VHOST_MSG_RESULT_OK
:
2061 case RTE_VHOST_MSG_RESULT_NOT_HANDLED
:
2067 if (unlock_required
)
2068 vhost_user_unlock_all_queue_pairs(dev
);
2070 /* If message was not handled at this stage, treat it as an error */
2072 RTE_LOG(ERR
, VHOST_CONFIG
,
2073 "vhost message (req: %d) was not handled.\n", request
);
2074 ret
= RTE_VHOST_MSG_RESULT_ERR
;
2078 * If the request required a reply that was already sent,
2079 * this optional reply-ack won't be sent as the
2080 * VHOST_USER_NEED_REPLY was cleared in send_vhost_reply().
2082 if (msg
.flags
& VHOST_USER_NEED_REPLY
) {
2083 msg
.payload
.u64
= ret
== RTE_VHOST_MSG_RESULT_ERR
;
2084 msg
.size
= sizeof(msg
.payload
.u64
);
2086 send_vhost_reply(fd
, &msg
);
2087 } else if (ret
== RTE_VHOST_MSG_RESULT_ERR
) {
2088 RTE_LOG(ERR
, VHOST_CONFIG
,
2089 "vhost message handling failed.\n");
2093 if (!(dev
->flags
& VIRTIO_DEV_RUNNING
) && virtio_is_ready(dev
)) {
2094 dev
->flags
|= VIRTIO_DEV_READY
;
2096 if (!(dev
->flags
& VIRTIO_DEV_RUNNING
)) {
2097 if (dev
->dequeue_zero_copy
) {
2098 RTE_LOG(INFO
, VHOST_CONFIG
,
2099 "dequeue zero copy is enabled\n");
2102 if (dev
->notify_ops
->new_device(dev
->vid
) == 0)
2103 dev
->flags
|= VIRTIO_DEV_RUNNING
;
2107 did
= dev
->vdpa_dev_id
;
2108 vdpa_dev
= rte_vdpa_get_device(did
);
2109 if (vdpa_dev
&& virtio_is_ready(dev
) &&
2110 !(dev
->flags
& VIRTIO_DEV_VDPA_CONFIGURED
) &&
2111 msg
.request
.master
== VHOST_USER_SET_VRING_CALL
) {
2112 if (vdpa_dev
->ops
->dev_conf
)
2113 vdpa_dev
->ops
->dev_conf(dev
->vid
);
2114 dev
->flags
|= VIRTIO_DEV_VDPA_CONFIGURED
;
2120 static int process_slave_message_reply(struct virtio_net
*dev
,
2121 const struct VhostUserMsg
*msg
)
2123 struct VhostUserMsg msg_reply
;
2126 if ((msg
->flags
& VHOST_USER_NEED_REPLY
) == 0)
2129 if (read_vhost_message(dev
->slave_req_fd
, &msg_reply
) < 0) {
2134 if (msg_reply
.request
.slave
!= msg
->request
.slave
) {
2135 RTE_LOG(ERR
, VHOST_CONFIG
,
2136 "Received unexpected msg type (%u), expected %u\n",
2137 msg_reply
.request
.slave
, msg
->request
.slave
);
2142 ret
= msg_reply
.payload
.u64
? -1 : 0;
2145 rte_spinlock_unlock(&dev
->slave_req_lock
);
2150 vhost_user_iotlb_miss(struct virtio_net
*dev
, uint64_t iova
, uint8_t perm
)
2153 struct VhostUserMsg msg
= {
2154 .request
.slave
= VHOST_USER_SLAVE_IOTLB_MSG
,
2155 .flags
= VHOST_USER_VERSION
,
2156 .size
= sizeof(msg
.payload
.iotlb
),
2160 .type
= VHOST_IOTLB_MISS
,
2164 ret
= send_vhost_message(dev
->slave_req_fd
, &msg
);
2166 RTE_LOG(ERR
, VHOST_CONFIG
,
2167 "Failed to send IOTLB miss message (%d)\n",
2175 static int vhost_user_slave_set_vring_host_notifier(struct virtio_net
*dev
,
2181 struct VhostUserMsg msg
= {
2182 .request
.slave
= VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG
,
2183 .flags
= VHOST_USER_VERSION
| VHOST_USER_NEED_REPLY
,
2184 .size
= sizeof(msg
.payload
.area
),
2186 .u64
= index
& VHOST_USER_VRING_IDX_MASK
,
2193 msg
.payload
.area
.u64
|= VHOST_USER_VRING_NOFD_MASK
;
2199 ret
= send_vhost_slave_message(dev
, &msg
);
2201 RTE_LOG(ERR
, VHOST_CONFIG
,
2202 "Failed to set host notifier (%d)\n", ret
);
2206 return process_slave_message_reply(dev
, &msg
);
2209 int rte_vhost_host_notifier_ctrl(int vid
, bool enable
)
2211 struct virtio_net
*dev
;
2212 struct rte_vdpa_device
*vdpa_dev
;
2213 int vfio_device_fd
, did
, ret
= 0;
2214 uint64_t offset
, size
;
2217 dev
= get_device(vid
);
2221 did
= dev
->vdpa_dev_id
;
2225 if (!(dev
->features
& (1ULL << VIRTIO_F_VERSION_1
)) ||
2226 !(dev
->features
& (1ULL << VHOST_USER_F_PROTOCOL_FEATURES
)) ||
2227 !(dev
->protocol_features
&
2228 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ
)) ||
2229 !(dev
->protocol_features
&
2230 (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
)) ||
2231 !(dev
->protocol_features
&
2232 (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
)))
2235 vdpa_dev
= rte_vdpa_get_device(did
);
2239 RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev
->ops
->get_vfio_device_fd
, -ENOTSUP
);
2240 RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev
->ops
->get_notify_area
, -ENOTSUP
);
2242 vfio_device_fd
= vdpa_dev
->ops
->get_vfio_device_fd(vid
);
2243 if (vfio_device_fd
< 0)
2247 for (i
= 0; i
< dev
->nr_vring
; i
++) {
2248 if (vdpa_dev
->ops
->get_notify_area(vid
, i
, &offset
,
2254 if (vhost_user_slave_set_vring_host_notifier(dev
, i
,
2255 vfio_device_fd
, offset
, size
) < 0) {
2262 for (i
= 0; i
< dev
->nr_vring
; i
++) {
2263 vhost_user_slave_set_vring_host_notifier(dev
, i
, -1,