1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
10 * Interface to vhost-user
14 #include <sys/eventfd.h>
16 #include <rte_memory.h>
17 #include <rte_mempool.h>
23 /* These are not C++-aware. */
24 #include <linux/vhost.h>
25 #include <linux/virtio_ring.h>
27 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
28 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
29 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
30 #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
31 #define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4)
33 /** Protocol features. */
34 #ifndef VHOST_USER_PROTOCOL_F_MQ
35 #define VHOST_USER_PROTOCOL_F_MQ 0
38 #ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD
39 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
42 #ifndef VHOST_USER_PROTOCOL_F_RARP
43 #define VHOST_USER_PROTOCOL_F_RARP 2
46 #ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
47 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
50 #ifndef VHOST_USER_PROTOCOL_F_NET_MTU
51 #define VHOST_USER_PROTOCOL_F_NET_MTU 4
54 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ
55 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
58 #ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
59 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
62 #ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
63 #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
66 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
67 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
70 #ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
71 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
74 /** Indicate whether protocol features negotiation is supported. */
75 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
76 #define VHOST_USER_F_PROTOCOL_FEATURES 30
80 * Information relating to memory regions including offsets to
81 * addresses in QEMUs memory file.
83 struct rte_vhost_mem_region
{
84 uint64_t guest_phys_addr
;
85 uint64_t guest_user_addr
;
86 uint64_t host_user_addr
;
94 * Memory structure includes region and mapping information.
96 struct rte_vhost_memory
{
98 struct rte_vhost_mem_region regions
[];
101 struct rte_vhost_vring
{
102 struct vring_desc
*desc
;
103 struct vring_avail
*avail
;
104 struct vring_used
*used
;
105 uint64_t log_guest_addr
;
107 /** Deprecated, use rte_vhost_vring_call() instead. */
115 * Possible results of the vhost user message handling callbacks
117 enum rte_vhost_msg_result
{
118 /* Message handling failed */
119 RTE_VHOST_MSG_RESULT_ERR
= -1,
120 /* Message handling successful */
121 RTE_VHOST_MSG_RESULT_OK
= 0,
122 /* Message handling successful and reply prepared */
123 RTE_VHOST_MSG_RESULT_REPLY
= 1,
124 /* Message not handled */
125 RTE_VHOST_MSG_RESULT_NOT_HANDLED
,
129 * Function prototype for the vhost backend to handle specific vhost user
137 * RTE_VHOST_MSG_RESULT_OK on success,
138 * RTE_VHOST_MSG_RESULT_REPLY on success with reply,
139 * RTE_VHOST_MSG_RESULT_ERR on failure,
140 * RTE_VHOST_MSG_RESULT_NOT_HANDLED if message was not handled.
142 typedef enum rte_vhost_msg_result (*rte_vhost_msg_handle
)(int vid
, void *msg
);
145 * Optional vhost user message handlers.
147 struct rte_vhost_user_extern_ops
{
148 /* Called prior to the master message handling. */
149 rte_vhost_msg_handle pre_msg_handle
;
150 /* Called after the master message handling. */
151 rte_vhost_msg_handle post_msg_handle
;
155 * Device and vring operations.
157 struct vhost_device_ops
{
158 int (*new_device
)(int vid
); /**< Add device. */
159 void (*destroy_device
)(int vid
); /**< Remove device. */
161 int (*vring_state_changed
)(int vid
, uint16_t queue_id
, int enable
); /**< triggered when a vring is enabled or disabled */
164 * Features could be changed after the feature negotiation.
165 * For example, VHOST_F_LOG_ALL will be set/cleared at the
166 * start/end of live migration, respectively. This callback
167 * is used to inform the application on such change.
169 int (*features_changed
)(int vid
, uint64_t features
);
171 int (*new_connection
)(int vid
);
172 void (*destroy_connection
)(int vid
);
174 void *reserved
[2]; /**< Reserved for future extension */
178 * Convert guest physical address to host virtual address
180 * This function is deprecated because unsafe.
181 * New rte_vhost_va_from_guest_pa() should be used instead to ensure
182 * guest physical ranges are fully and contiguously mapped into
183 * process virtual address space.
186 * the guest memory regions
188 * the guest physical address for querying
190 * the host virtual address on success, 0 on failure
193 static __rte_always_inline
uint64_t
194 rte_vhost_gpa_to_vva(struct rte_vhost_memory
*mem
, uint64_t gpa
)
196 struct rte_vhost_mem_region
*reg
;
199 for (i
= 0; i
< mem
->nregions
; i
++) {
200 reg
= &mem
->regions
[i
];
201 if (gpa
>= reg
->guest_phys_addr
&&
202 gpa
< reg
->guest_phys_addr
+ reg
->size
) {
203 return gpa
- reg
->guest_phys_addr
+
212 * Convert guest physical address to host virtual address safely
214 * This variant of rte_vhost_gpa_to_vva() takes care all the
215 * requested length is mapped and contiguous in process address
219 * the guest memory regions
221 * the guest physical address for querying
223 * the size of the requested area to map, updated with actual size mapped
225 * the host virtual address on success, 0 on failure
227 static __rte_always_inline
uint64_t
228 rte_vhost_va_from_guest_pa(struct rte_vhost_memory
*mem
,
229 uint64_t gpa
, uint64_t *len
)
231 struct rte_vhost_mem_region
*r
;
234 for (i
= 0; i
< mem
->nregions
; i
++) {
235 r
= &mem
->regions
[i
];
236 if (gpa
>= r
->guest_phys_addr
&&
237 gpa
< r
->guest_phys_addr
+ r
->size
) {
239 if (unlikely(*len
> r
->guest_phys_addr
+ r
->size
- gpa
))
240 *len
= r
->guest_phys_addr
+ r
->size
- gpa
;
242 return gpa
- r
->guest_phys_addr
+
251 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
254 * Log the memory write start with given address.
256 * This function only need be invoked when the live migration starts.
257 * Therefore, we won't need call it at all in the most of time. For
258 * making the performance impact be minimum, it's suggested to do a
259 * check before calling it:
261 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
262 * rte_vhost_log_write(vid, addr, len);
267 * the starting address for write
269 * the length to write
271 void rte_vhost_log_write(int vid
, uint64_t addr
, uint64_t len
);
274 * Log the used ring update start at given offset.
276 * Same as rte_vhost_log_write, it's suggested to do a check before
279 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
280 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
287 * the offset inside the used ring
289 * the length to write
291 void rte_vhost_log_used_vring(int vid
, uint16_t vring_idx
,
292 uint64_t offset
, uint64_t len
);
294 int rte_vhost_enable_guest_notification(int vid
, uint16_t queue_id
, int enable
);
297 * Register vhost driver. path could be different for multiple
300 int rte_vhost_driver_register(const char *path
, uint64_t flags
);
302 /* Unregister vhost driver. This is only meaningful to vhost user. */
303 int rte_vhost_driver_unregister(const char *path
);
306 * Set the vdpa device id, enforce single connection per socket
309 * The vhost-user socket file path
313 * 0 on success, -1 on failure
315 int __rte_experimental
316 rte_vhost_driver_attach_vdpa_device(const char *path
, int did
);
319 * Unset the vdpa device id
322 * The vhost-user socket file path
324 * 0 on success, -1 on failure
326 int __rte_experimental
327 rte_vhost_driver_detach_vdpa_device(const char *path
);
333 * The vhost-user socket file path
335 * Device id, -1 on failure
337 int __rte_experimental
338 rte_vhost_driver_get_vdpa_device_id(const char *path
);
341 * Set the feature bits the vhost-user driver supports.
344 * The vhost-user socket file path
348 * 0 on success, -1 on failure
350 int rte_vhost_driver_set_features(const char *path
, uint64_t features
);
353 * Enable vhost-user driver features.
356 * - the param features should be a subset of the feature bits provided
357 * by rte_vhost_driver_set_features().
358 * - it must be invoked before vhost-user negotiation starts.
361 * The vhost-user socket file path
365 * 0 on success, -1 on failure
367 int rte_vhost_driver_enable_features(const char *path
, uint64_t features
);
370 * Disable vhost-user driver features.
372 * The two notes at rte_vhost_driver_enable_features() also apply here.
375 * The vhost-user socket file path
377 * Features to disable
379 * 0 on success, -1 on failure
381 int rte_vhost_driver_disable_features(const char *path
, uint64_t features
);
384 * Get the feature bits before feature negotiation.
387 * The vhost-user socket file path
389 * A pointer to store the queried feature bits
391 * 0 on success, -1 on failure
393 int rte_vhost_driver_get_features(const char *path
, uint64_t *features
);
396 * Set the protocol feature bits before feature negotiation.
399 * The vhost-user socket file path
400 * @param protocol_features
401 * Supported protocol features
403 * 0 on success, -1 on failure
405 int __rte_experimental
406 rte_vhost_driver_set_protocol_features(const char *path
,
407 uint64_t protocol_features
);
410 * Get the protocol feature bits before feature negotiation.
413 * The vhost-user socket file path
414 * @param protocol_features
415 * A pointer to store the queried protocol feature bits
417 * 0 on success, -1 on failure
419 int __rte_experimental
420 rte_vhost_driver_get_protocol_features(const char *path
,
421 uint64_t *protocol_features
);
424 * Get the queue number bits before feature negotiation.
427 * The vhost-user socket file path
429 * A pointer to store the queried queue number bits
431 * 0 on success, -1 on failure
433 int __rte_experimental
434 rte_vhost_driver_get_queue_num(const char *path
, uint32_t *queue_num
);
437 * Get the feature bits after negotiation
442 * A pointer to store the queried feature bits
444 * 0 on success, -1 on failure
446 int rte_vhost_get_negotiated_features(int vid
, uint64_t *features
);
448 /* Register callbacks. */
449 int rte_vhost_driver_callback_register(const char *path
,
450 struct vhost_device_ops
const * const ops
);
454 * Start the vhost-user driver.
456 * This function triggers the vhost-user negotiation.
459 * The vhost-user socket file path
461 * 0 on success, -1 on failure
463 int rte_vhost_driver_start(const char *path
);
466 * Get the MTU value of the device if set in QEMU.
469 * virtio-net device ID
471 * The variable to store the MTU value
475 * -EAGAIN: device not yet started
476 * -ENOTSUP: device does not support MTU feature
478 int rte_vhost_get_mtu(int vid
, uint16_t *mtu
);
481 * Get the numa node from which the virtio net device's memory
488 * The numa node, -1 on failure
490 int rte_vhost_get_numa_node(int vid
);
494 * Get the number of queues the device supports.
496 * Note this function is deprecated, as it returns a queue pair number,
497 * which is vhost specific. Instead, rte_vhost_get_vring_num should
504 * The number of queues, 0 on failure
507 uint32_t rte_vhost_get_queue_num(int vid
);
510 * Get the number of vrings the device supports.
516 * The number of vrings, 0 on failure
518 uint16_t rte_vhost_get_vring_num(int vid
);
521 * Get the virtio net device's ifname, which is the vhost-user socket
527 * The buffer to stored the queried ifname
532 * 0 on success, -1 on failure
534 int rte_vhost_get_ifname(int vid
, char *buf
, size_t len
);
537 * Get how many avail entries are left in the queue
545 * num of avail entries left
547 uint16_t rte_vhost_avail_entries(int vid
, uint16_t queue_id
);
552 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
553 * be received from the physical port or from another virtual device. A packet
554 * count is returned to indicate the number of packets that were successfully
555 * added to the RX queue.
559 * virtio queue index in mq case
561 * array to contain packets to be enqueued
563 * packets num to be enqueued
565 * num of packets enqueued
567 uint16_t rte_vhost_enqueue_burst(int vid
, uint16_t queue_id
,
568 struct rte_mbuf
**pkts
, uint16_t count
);
571 * This function gets guest buffers from the virtio device TX virtqueue,
572 * construct host mbufs, copies guest buffer content to host mbufs and
573 * store them in pkts to be processed.
577 * virtio queue index in mq case
579 * mbuf_pool where host mbuf is allocated.
581 * array to contain packets to be dequeued
583 * packets num to be dequeued
585 * num of packets dequeued
587 uint16_t rte_vhost_dequeue_burst(int vid
, uint16_t queue_id
,
588 struct rte_mempool
*mbuf_pool
, struct rte_mbuf
**pkts
, uint16_t count
);
591 * Get guest mem table: a list of memory regions.
593 * An rte_vhost_vhost_memory object will be allocated internally, to hold the
594 * guest memory regions. Application should free it at destroy_device()
600 * To store the returned mem regions
602 * 0 on success, -1 on failure
604 int rte_vhost_get_mem_table(int vid
, struct rte_vhost_memory
**mem
);
607 * Get guest vring info, including the vring address, vring size, etc.
614 * the structure to hold the requested vring info
616 * 0 on success, -1 on failure
618 int rte_vhost_get_vhost_vring(int vid
, uint16_t vring_idx
,
619 struct rte_vhost_vring
*vring
);
622 * Notify the guest that used descriptors have been added to the vring. This
623 * function acts as a memory barrier.
630 * 0 on success, -1 on failure
632 int rte_vhost_vring_call(int vid
, uint16_t vring_idx
);
635 * Get vhost RX queue avail count.
640 * virtio queue index in mq case
642 * num of desc available
644 uint32_t rte_vhost_rx_queue_count(int vid
, uint16_t qid
);
647 * Get log base and log size of the vhost device
656 * 0 on success, -1 on failure
658 int __rte_experimental
659 rte_vhost_get_log_base(int vid
, uint64_t *log_base
, uint64_t *log_size
);
662 * Get last_avail/used_idx of the vhost virtqueue
668 * @param last_avail_idx
669 * vhost last_avail_idx to get
670 * @param last_used_idx
671 * vhost last_used_idx to get
673 * 0 on success, -1 on failure
675 int __rte_experimental
676 rte_vhost_get_vring_base(int vid
, uint16_t queue_id
,
677 uint16_t *last_avail_idx
, uint16_t *last_used_idx
);
680 * Set last_avail/used_idx of the vhost virtqueue
686 * @param last_avail_idx
687 * last_avail_idx to set
688 * @param last_used_idx
689 * last_used_idx to set
691 * 0 on success, -1 on failure
693 int __rte_experimental
694 rte_vhost_set_vring_base(int vid
, uint16_t queue_id
,
695 uint16_t last_avail_idx
, uint16_t last_used_idx
);
698 * Register external message handling callbacks
703 * virtio external callbacks to register
705 * additional context passed to the callbacks
707 * 0 on success, -1 on failure
709 int __rte_experimental
710 rte_vhost_extern_callback_register(int vid
,
711 struct rte_vhost_user_extern_ops
const * const ops
, void *ctx
);
714 * Get vdpa device id for vhost device.
721 int __rte_experimental
722 rte_vhost_get_vdpa_device_id(int vid
);
728 #endif /* _RTE_VHOST_H_ */