4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * Interface to vhost-user
43 #include <linux/vhost.h>
44 #include <linux/virtio_ring.h>
45 #include <sys/eventfd.h>
47 #include <rte_config.h>
48 #include <rte_memory.h>
49 #include <rte_mempool.h>
51 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
52 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
53 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
56 * Information relating to memory regions including offsets to
57 * addresses in QEMUs memory file.
59 struct rte_vhost_mem_region
{
60 uint64_t guest_phys_addr
;
61 uint64_t guest_user_addr
;
62 uint64_t host_user_addr
;
70 * Memory structure includes region and mapping information.
72 struct rte_vhost_memory
{
74 struct rte_vhost_mem_region regions
[0];
77 struct rte_vhost_vring
{
78 struct vring_desc
*desc
;
79 struct vring_avail
*avail
;
80 struct vring_used
*used
;
81 uint64_t log_guest_addr
;
87 uint16_t last_avail_idx
;
88 uint16_t last_used_idx
;
92 * Device and vring operations.
94 struct vhost_device_ops
{
95 int (*new_device
)(int vid
); /**< Add device. */
96 void (*destroy_device
)(int vid
); /**< Remove device. */
98 int (*vring_state_changed
)(int vid
, uint16_t queue_id
, int enable
); /**< triggered when a vring is enabled or disabled */
101 * Features could be changed after the feature negotiation.
102 * For example, VHOST_F_LOG_ALL will be set/cleared at the
103 * start/end of live migration, respectively. This callback
104 * is used to inform the application on such change.
106 int (*features_changed
)(int vid
, uint64_t features
);
107 int (*vhost_nvme_admin_passthrough
)(int vid
, void *cmd
, void *cqe
, void *buf
);
108 int (*vhost_nvme_set_cq_call
)(int vid
, uint16_t qid
, int fd
);
109 int (*vhost_nvme_get_cap
)(int vid
, uint64_t *cap
);
111 int (*new_connection
)(int vid
);
112 void (*destroy_connection
)(int vid
);
114 int (*get_config
)(int vid
, uint8_t *config
, uint32_t config_len
);
115 int (*set_config
)(int vid
, uint8_t *config
, uint32_t offset
,
116 uint32_t len
, uint32_t flags
);
118 void *reserved
[2]; /**< Reserved for future extension */
122 * Convert guest physical address to host virtual address
125 * the guest memory regions
127 * the guest physical address for querying
129 * the host virtual address on success, 0 on failure
131 static inline uint64_t __attribute__((always_inline
))
132 rte_vhost_gpa_to_vva(struct rte_vhost_memory
*mem
, uint64_t gpa
)
134 struct rte_vhost_mem_region
*reg
;
137 for (i
= 0; i
< mem
->nregions
; i
++) {
138 reg
= &mem
->regions
[i
];
139 if (gpa
>= reg
->guest_phys_addr
&&
140 gpa
< reg
->guest_phys_addr
+ reg
->size
) {
141 return gpa
- reg
->guest_phys_addr
+
150 * Convert guest physical address to host virtual address safely
152 * This variant of rte_vhost_gpa_to_vva() takes care all the
153 * requested length is mapped and contiguous in process address
157 * the guest memory regions
159 * the guest physical address for querying
161 * the size of the requested area to map,
162 * updated with actual size mapped
164 * the host virtual address on success, 0 on failure */
165 static inline uint64_t
166 rte_vhost_va_from_guest_pa(struct rte_vhost_memory
*mem
,
167 uint64_t gpa
, uint64_t *len
)
169 struct rte_vhost_mem_region
*r
;
172 for (i
= 0; i
< mem
->nregions
; i
++) {
173 r
= &mem
->regions
[i
];
174 if (gpa
>= r
->guest_phys_addr
&&
175 gpa
< r
->guest_phys_addr
+ r
->size
) {
177 if (unlikely(*len
> r
->guest_phys_addr
+ r
->size
- gpa
))
178 *len
= r
->guest_phys_addr
+ r
->size
- gpa
;
180 return gpa
- r
->guest_phys_addr
+
189 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
192 * Log the memory write start with given address.
194 * This function only need be invoked when the live migration starts.
195 * Therefore, we won't need call it at all in the most of time. For
196 * making the performance impact be minimum, it's suggested to do a
197 * check before calling it:
199 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
200 * rte_vhost_log_write(vid, addr, len);
205 * the starting address for write
207 * the length to write
209 void rte_vhost_log_write(int vid
, uint64_t addr
, uint64_t len
);
212 * Log the used ring update start at given offset.
214 * Same as rte_vhost_log_write, it's suggested to do a check before
217 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
218 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
225 * the offset inside the used ring
227 * the length to write
229 void rte_vhost_log_used_vring(int vid
, uint16_t vring_idx
,
230 uint64_t offset
, uint64_t len
);
232 int rte_vhost_enable_guest_notification(int vid
, uint16_t queue_id
, int enable
);
235 * Register vhost driver. path could be different for multiple
238 int rte_vhost_driver_register(const char *path
, uint64_t flags
);
240 /* Unregister vhost driver. This is only meaningful to vhost user. */
241 int rte_vhost_driver_unregister(const char *path
);
244 * Set the feature bits the vhost-user driver supports.
247 * The vhost-user socket file path
249 * 0 on success, -1 on failure
251 int rte_vhost_driver_set_features(const char *path
, uint64_t features
);
254 * Enable vhost-user driver features.
257 * - the param @features should be a subset of the feature bits provided
258 * by rte_vhost_driver_set_features().
259 * - it must be invoked before vhost-user negotiation starts.
262 * The vhost-user socket file path
266 * 0 on success, -1 on failure
268 int rte_vhost_driver_enable_features(const char *path
, uint64_t features
);
271 * Disable vhost-user driver features.
273 * The two notes at rte_vhost_driver_enable_features() also apply here.
276 * The vhost-user socket file path
278 * Features to disable
280 * 0 on success, -1 on failure
282 int rte_vhost_driver_disable_features(const char *path
, uint64_t features
);
285 * Get the feature bits before feature negotiation.
288 * The vhost-user socket file path
290 * A pointer to store the queried feature bits
292 * 0 on success, -1 on failure
294 int rte_vhost_driver_get_features(const char *path
, uint64_t *features
);
297 * Get the feature bits after negotiation
302 * A pointer to store the queried feature bits
304 * 0 on success, -1 on failure
306 int rte_vhost_get_negotiated_features(int vid
, uint64_t *features
);
308 /* Register callbacks. */
309 int rte_vhost_driver_callback_register(const char *path
,
310 struct vhost_device_ops
const * const ops
);
314 * Start the vhost-user driver.
316 * This function triggers the vhost-user negotiation.
319 * The vhost-user socket file path
321 * 0 on success, -1 on failure
323 int rte_vhost_driver_start(const char *path
);
326 * Get the MTU value of the device if set in QEMU.
329 * virtio-net device ID
331 * The variable to store the MTU value
335 * -EAGAIN: device not yet started
336 * -ENOTSUP: device does not support MTU feature
338 int rte_vhost_get_mtu(int vid
, uint16_t *mtu
);
341 * Get the numa node from which the virtio net device's memory
348 * The numa node, -1 on failure
350 int rte_vhost_get_numa_node(int vid
);
353 * Get the virtio net device's ifname, which is the vhost-user socket
359 * The buffer to stored the queried ifname
364 * 0 on success, -1 on failure
366 int rte_vhost_get_ifname(int vid
, char *buf
, size_t len
);
369 * Get how many avail entries are left in the queue
377 * num of avail entires left
379 uint16_t rte_vhost_avail_entries(int vid
, uint16_t queue_id
);
384 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
385 * be received from the physical port or from another virtual device. A packet
386 * count is returned to indicate the number of packets that were succesfully
387 * added to the RX queue.
391 * virtio queue index in mq case
393 * array to contain packets to be enqueued
395 * packets num to be enqueued
397 * num of packets enqueued
399 uint16_t rte_vhost_enqueue_burst(int vid
, uint16_t queue_id
,
400 struct rte_mbuf
**pkts
, uint16_t count
);
403 * This function gets guest buffers from the virtio device TX virtqueue,
404 * construct host mbufs, copies guest buffer content to host mbufs and
405 * store them in pkts to be processed.
409 * virtio queue index in mq case
411 * mbuf_pool where host mbuf is allocated.
413 * array to contain packets to be dequeued
415 * packets num to be dequeued
417 * num of packets dequeued
419 uint16_t rte_vhost_dequeue_burst(int vid
, uint16_t queue_id
,
420 struct rte_mempool
*mbuf_pool
, struct rte_mbuf
**pkts
, uint16_t count
);
423 * Get guest mem table: a list of memory regions.
425 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
426 * guest memory regions. Application should free it at destroy_device()
432 * To store the returned mem regions
434 * 0 on success, -1 on failure
436 int rte_vhost_get_mem_table(int vid
, struct rte_vhost_memory
**mem
);
439 * Get guest vring info, including the vring address, vring size, etc.
446 * the structure to hold the requested vring info
448 * 0 on success, -1 on failure
450 int rte_vhost_get_vhost_vring(int vid
, uint16_t vring_idx
,
451 struct rte_vhost_vring
*vring
);
454 * Set id of the last descriptors in avail and used guest vrings.
456 * In case user application operates directly on buffers, it should use this
457 * function on device destruction to retrieve the same values later on in device
458 * creation via rte_vhost_get_vhost_vring(int, uint16_t, struct rte_vhost_vring *)
464 * @param last_avail_idx
465 * id of the last descriptor in avail ring to be set
466 * @param last_used_idx
467 * id of the last descriptor in used ring to be set
469 * 0 on success, -1 on failure
471 int rte_vhost_set_vhost_vring_last_idx(int vid
, uint16_t vring_idx
,
472 uint16_t last_avail_idx
, uint16_t last_used_idx
);
474 #endif /* _RTE_VHOST_H_ */