4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * Interface to vhost-user
43 #include <linux/vhost.h>
44 #include <linux/virtio_ring.h>
45 #include <sys/eventfd.h>
47 #include <rte_config.h>
48 #include <rte_memory.h>
49 #include <rte_mempool.h>
51 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
52 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
53 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
56 * Information relating to memory regions including offsets to
57 * addresses in QEMUs memory file.
59 struct rte_vhost_mem_region
{
60 uint64_t guest_phys_addr
;
61 uint64_t guest_user_addr
;
62 uint64_t host_user_addr
;
70 * Memory structure includes region and mapping information.
72 struct rte_vhost_memory
{
74 struct rte_vhost_mem_region regions
[0];
77 struct rte_vhost_vring
{
78 struct vring_desc
*desc
;
79 struct vring_avail
*avail
;
80 struct vring_used
*used
;
81 uint64_t log_guest_addr
;
89 * Device and vring operations.
91 struct vhost_device_ops
{
92 int (*new_device
)(int vid
); /**< Add device. */
93 void (*destroy_device
)(int vid
); /**< Remove device. */
95 int (*vring_state_changed
)(int vid
, uint16_t queue_id
, int enable
); /**< triggered when a vring is enabled or disabled */
98 * Features could be changed after the feature negotiation.
99 * For example, VHOST_F_LOG_ALL will be set/cleared at the
100 * start/end of live migration, respectively. This callback
101 * is used to inform the application on such change.
103 int (*features_changed
)(int vid
, uint64_t features
);
104 int (*vhost_nvme_admin_passthrough
)(int vid
, void *cmd
, void *cqe
, void *buf
);
105 int (*vhost_nvme_set_cq_call
)(int vid
, uint16_t qid
, int fd
);
106 int (*vhost_nvme_set_bar_mr
)(int vid
, void *bar_addr
, uint64_t bar_size
);
107 int (*vhost_nvme_get_cap
)(int vid
, uint64_t *cap
);
109 int (*new_connection
)(int vid
);
110 void (*destroy_connection
)(int vid
);
112 int (*get_config
)(int vid
, uint8_t *config
, uint32_t config_len
);
113 int (*set_config
)(int vid
, uint8_t *config
, uint32_t offset
,
114 uint32_t len
, uint32_t flags
);
116 void *reserved
[2]; /**< Reserved for future extension */
120 * Convert guest physical address to host virtual address
123 * the guest memory regions
125 * the guest physical address for querying
127 * the host virtual address on success, 0 on failure
129 static inline uint64_t __attribute__((always_inline
))
130 rte_vhost_gpa_to_vva(struct rte_vhost_memory
*mem
, uint64_t gpa
)
132 struct rte_vhost_mem_region
*reg
;
135 for (i
= 0; i
< mem
->nregions
; i
++) {
136 reg
= &mem
->regions
[i
];
137 if (gpa
>= reg
->guest_phys_addr
&&
138 gpa
< reg
->guest_phys_addr
+ reg
->size
) {
139 return gpa
- reg
->guest_phys_addr
+
148 * Convert guest physical address to host virtual address safely
150 * This variant of rte_vhost_gpa_to_vva() takes care all the
151 * requested length is mapped and contiguous in process address
155 * the guest memory regions
157 * the guest physical address for querying
159 * the size of the requested area to map,
160 * updated with actual size mapped
162 * the host virtual address on success, 0 on failure */
163 static inline uint64_t
164 rte_vhost_va_from_guest_pa(struct rte_vhost_memory
*mem
,
165 uint64_t gpa
, uint64_t *len
)
167 struct rte_vhost_mem_region
*r
;
170 for (i
= 0; i
< mem
->nregions
; i
++) {
171 r
= &mem
->regions
[i
];
172 if (gpa
>= r
->guest_phys_addr
&&
173 gpa
< r
->guest_phys_addr
+ r
->size
) {
175 if (unlikely(*len
> r
->guest_phys_addr
+ r
->size
- gpa
))
176 *len
= r
->guest_phys_addr
+ r
->size
- gpa
;
178 return gpa
- r
->guest_phys_addr
+
187 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
190 * Log the memory write start with given address.
192 * This function only need be invoked when the live migration starts.
193 * Therefore, we won't need call it at all in the most of time. For
194 * making the performance impact be minimum, it's suggested to do a
195 * check before calling it:
197 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
198 * rte_vhost_log_write(vid, addr, len);
203 * the starting address for write
205 * the length to write
207 void rte_vhost_log_write(int vid
, uint64_t addr
, uint64_t len
);
210 * Log the used ring update start at given offset.
212 * Same as rte_vhost_log_write, it's suggested to do a check before
215 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
216 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
223 * the offset inside the used ring
225 * the length to write
227 void rte_vhost_log_used_vring(int vid
, uint16_t vring_idx
,
228 uint64_t offset
, uint64_t len
);
230 int rte_vhost_enable_guest_notification(int vid
, uint16_t queue_id
, int enable
);
233 * Register vhost driver. path could be different for multiple
236 int rte_vhost_driver_register(const char *path
, uint64_t flags
);
238 /* Unregister vhost driver. This is only meaningful to vhost user. */
239 int rte_vhost_driver_unregister(const char *path
);
242 * Set the feature bits the vhost-user driver supports.
245 * The vhost-user socket file path
247 * 0 on success, -1 on failure
249 int rte_vhost_driver_set_features(const char *path
, uint64_t features
);
252 * Enable vhost-user driver features.
255 * - the param @features should be a subset of the feature bits provided
256 * by rte_vhost_driver_set_features().
257 * - it must be invoked before vhost-user negotiation starts.
260 * The vhost-user socket file path
264 * 0 on success, -1 on failure
266 int rte_vhost_driver_enable_features(const char *path
, uint64_t features
);
269 * Disable vhost-user driver features.
271 * The two notes at rte_vhost_driver_enable_features() also apply here.
274 * The vhost-user socket file path
276 * Features to disable
278 * 0 on success, -1 on failure
280 int rte_vhost_driver_disable_features(const char *path
, uint64_t features
);
283 * Get the feature bits before feature negotiation.
286 * The vhost-user socket file path
288 * A pointer to store the queried feature bits
290 * 0 on success, -1 on failure
292 int rte_vhost_driver_get_features(const char *path
, uint64_t *features
);
295 * Get the feature bits after negotiation
300 * A pointer to store the queried feature bits
302 * 0 on success, -1 on failure
304 int rte_vhost_get_negotiated_features(int vid
, uint64_t *features
);
306 /* Register callbacks. */
307 int rte_vhost_driver_callback_register(const char *path
,
308 struct vhost_device_ops
const * const ops
);
312 * Start the vhost-user driver.
314 * This function triggers the vhost-user negotiation.
317 * The vhost-user socket file path
319 * 0 on success, -1 on failure
321 int rte_vhost_driver_start(const char *path
);
324 * Get the MTU value of the device if set in QEMU.
327 * virtio-net device ID
329 * The variable to store the MTU value
333 * -EAGAIN: device not yet started
334 * -ENOTSUP: device does not support MTU feature
336 int rte_vhost_get_mtu(int vid
, uint16_t *mtu
);
339 * Get the numa node from which the virtio net device's memory
346 * The numa node, -1 on failure
348 int rte_vhost_get_numa_node(int vid
);
351 * Get the virtio net device's ifname, which is the vhost-user socket
357 * The buffer to stored the queried ifname
362 * 0 on success, -1 on failure
364 int rte_vhost_get_ifname(int vid
, char *buf
, size_t len
);
367 * Get how many avail entries are left in the queue
375 * num of avail entires left
377 uint16_t rte_vhost_avail_entries(int vid
, uint16_t queue_id
);
382 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
383 * be received from the physical port or from another virtual device. A packet
384 * count is returned to indicate the number of packets that were succesfully
385 * added to the RX queue.
389 * virtio queue index in mq case
391 * array to contain packets to be enqueued
393 * packets num to be enqueued
395 * num of packets enqueued
397 uint16_t rte_vhost_enqueue_burst(int vid
, uint16_t queue_id
,
398 struct rte_mbuf
**pkts
, uint16_t count
);
401 * This function gets guest buffers from the virtio device TX virtqueue,
402 * construct host mbufs, copies guest buffer content to host mbufs and
403 * store them in pkts to be processed.
407 * virtio queue index in mq case
409 * mbuf_pool where host mbuf is allocated.
411 * array to contain packets to be dequeued
413 * packets num to be dequeued
415 * num of packets dequeued
417 uint16_t rte_vhost_dequeue_burst(int vid
, uint16_t queue_id
,
418 struct rte_mempool
*mbuf_pool
, struct rte_mbuf
**pkts
, uint16_t count
);
421 * Get guest mem table: a list of memory regions.
423 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
424 * guest memory regions. Application should free it at destroy_device()
430 * To store the returned mem regions
432 * 0 on success, -1 on failure
434 int rte_vhost_get_mem_table(int vid
, struct rte_vhost_memory
**mem
);
437 * Get guest vring info, including the vring address, vring size, etc.
444 * the structure to hold the requested vring info
446 * 0 on success, -1 on failure
448 int rte_vhost_get_vhost_vring(int vid
, uint16_t vring_idx
,
449 struct rte_vhost_vring
*vring
);
452 * Set id of the last descriptors in avail and used guest vrings.
454 * In case user application operates directly on buffers, it should use this
455 * function on device destruction to retrieve the same values later on in device
456 * creation via rte_vhost_get_vhost_vring(int, uint16_t, struct rte_vhost_vring *)
462 * @param last_avail_idx
463 * id of the last descriptor in avail ring to be set
464 * @param last_used_idx
465 * id of the last descriptor in used ring to be set
467 * 0 on success, -1 on failure
469 int rte_vhost_set_vring_base(int vid
, uint16_t queue_id
,
470 uint16_t last_avail_idx
, uint16_t last_used_idx
);
472 int rte_vhost_get_vring_base(int vid
, uint16_t queue_id
,
473 uint16_t *last_avail_idx
, uint16_t *last_used_idx
);
476 * Notify the guest that used descriptors have been added to the vring.
483 * 0 on success, -1 on failure
485 int rte_vhost_vring_call(int vid
, uint16_t vring_idx
);
487 #endif /* _RTE_VHOST_H_ */