]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/vhost/rte_vhost/rte_vhost.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / lib / vhost / rte_vhost / rte_vhost.h
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef _RTE_VHOST_H_
35 #define _RTE_VHOST_H_
36
37 /**
38 * @file
39 * Interface to vhost-user
40 */
41
42 #include <stdint.h>
43 #include <linux/vhost.h>
44 #include <linux/virtio_ring.h>
45 #include <sys/eventfd.h>
46
47 #include <rte_config.h>
48 #include <rte_memory.h>
49 #include <rte_mempool.h>
50
51 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
52 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
53 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
54
55 /**
56 * Information relating to memory regions including offsets to
57 * addresses in QEMUs memory file.
58 */
59 struct rte_vhost_mem_region {
60 uint64_t guest_phys_addr;
61 uint64_t guest_user_addr;
62 uint64_t host_user_addr;
63 uint64_t size;
64 void *mmap_addr;
65 uint64_t mmap_size;
66 int fd;
67 };
68
69 /**
70 * Memory structure includes region and mapping information.
71 */
72 struct rte_vhost_memory {
73 uint32_t nregions;
74 struct rte_vhost_mem_region regions[0];
75 };
76
77 struct rte_vhost_vring {
78 struct vring_desc *desc;
79 struct vring_avail *avail;
80 struct vring_used *used;
81 uint64_t log_guest_addr;
82
83 int callfd;
84 int kickfd;
85 uint16_t size;
86
87 uint16_t last_avail_idx;
88 uint16_t last_used_idx;
89 };
90
91 /**
92 * Device and vring operations.
93 */
94 struct vhost_device_ops {
95 int (*new_device)(int vid); /**< Add device. */
96 void (*destroy_device)(int vid); /**< Remove device. */
97
98 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
99
100 /**
101 * Features could be changed after the feature negotiation.
102 * For example, VHOST_F_LOG_ALL will be set/cleared at the
103 * start/end of live migration, respectively. This callback
104 * is used to inform the application on such change.
105 */
106 int (*features_changed)(int vid, uint64_t features);
107 int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf);
108 int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd);
109 int (*vhost_nvme_get_cap)(int vid, uint64_t *cap);
110
111 int (*new_connection)(int vid);
112 void (*destroy_connection)(int vid);
113
114 int (*get_config)(int vid, uint8_t *config, uint32_t config_len);
115 int (*set_config)(int vid, uint8_t *config, uint32_t offset,
116 uint32_t len, uint32_t flags);
117
118 void *reserved[2]; /**< Reserved for future extension */
119 };
120
121 /**
122 * Convert guest physical address to host virtual address
123 *
124 * @param mem
125 * the guest memory regions
126 * @param gpa
127 * the guest physical address for querying
128 * @return
129 * the host virtual address on success, 0 on failure
130 */
131 static inline uint64_t __attribute__((always_inline))
132 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
133 {
134 struct rte_vhost_mem_region *reg;
135 uint32_t i;
136
137 for (i = 0; i < mem->nregions; i++) {
138 reg = &mem->regions[i];
139 if (gpa >= reg->guest_phys_addr &&
140 gpa < reg->guest_phys_addr + reg->size) {
141 return gpa - reg->guest_phys_addr +
142 reg->host_user_addr;
143 }
144 }
145
146 return 0;
147 }
148
149 /**
150 * Convert guest physical address to host virtual address safely
151 *
152 * This variant of rte_vhost_gpa_to_vva() takes care all the
153 * requested length is mapped and contiguous in process address
154 * space.
155 *
156 * @param mem
157 * the guest memory regions
158 * @param gpa
159 * the guest physical address for querying
160 * @param len
161 * the size of the requested area to map,
162 * updated with actual size mapped
163 * @return
164 * the host virtual address on success, 0 on failure */
165 static inline uint64_t
166 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
167 uint64_t gpa, uint64_t *len)
168 {
169 struct rte_vhost_mem_region *r;
170 uint32_t i;
171
172 for (i = 0; i < mem->nregions; i++) {
173 r = &mem->regions[i];
174 if (gpa >= r->guest_phys_addr &&
175 gpa < r->guest_phys_addr + r->size) {
176
177 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
178 *len = r->guest_phys_addr + r->size - gpa;
179
180 return gpa - r->guest_phys_addr +
181 r->host_user_addr;
182 }
183 }
184 *len = 0;
185
186 return 0;
187 }
188
189 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
190
191 /**
192 * Log the memory write start with given address.
193 *
194 * This function only need be invoked when the live migration starts.
195 * Therefore, we won't need call it at all in the most of time. For
196 * making the performance impact be minimum, it's suggested to do a
197 * check before calling it:
198 *
199 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
200 * rte_vhost_log_write(vid, addr, len);
201 *
202 * @param vid
203 * vhost device ID
204 * @param addr
205 * the starting address for write
206 * @param len
207 * the length to write
208 */
209 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
210
211 /**
212 * Log the used ring update start at given offset.
213 *
214 * Same as rte_vhost_log_write, it's suggested to do a check before
215 * calling it:
216 *
217 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
218 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
219 *
220 * @param vid
221 * vhost device ID
222 * @param vring_idx
223 * the vring index
224 * @param offset
225 * the offset inside the used ring
226 * @param len
227 * the length to write
228 */
229 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
230 uint64_t offset, uint64_t len);
231
232 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
233
234 /**
235 * Register vhost driver. path could be different for multiple
236 * instance support.
237 */
238 int rte_vhost_driver_register(const char *path, uint64_t flags);
239
240 /* Unregister vhost driver. This is only meaningful to vhost user. */
241 int rte_vhost_driver_unregister(const char *path);
242
243 /**
244 * Set the feature bits the vhost-user driver supports.
245 *
246 * @param path
247 * The vhost-user socket file path
248 * @return
249 * 0 on success, -1 on failure
250 */
251 int rte_vhost_driver_set_features(const char *path, uint64_t features);
252
253 /**
254 * Enable vhost-user driver features.
255 *
256 * Note that
257 * - the param @features should be a subset of the feature bits provided
258 * by rte_vhost_driver_set_features().
259 * - it must be invoked before vhost-user negotiation starts.
260 *
261 * @param path
262 * The vhost-user socket file path
263 * @param features
264 * Features to enable
265 * @return
266 * 0 on success, -1 on failure
267 */
268 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
269
270 /**
271 * Disable vhost-user driver features.
272 *
273 * The two notes at rte_vhost_driver_enable_features() also apply here.
274 *
275 * @param path
276 * The vhost-user socket file path
277 * @param features
278 * Features to disable
279 * @return
280 * 0 on success, -1 on failure
281 */
282 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
283
284 /**
285 * Get the feature bits before feature negotiation.
286 *
287 * @param path
288 * The vhost-user socket file path
289 * @param features
290 * A pointer to store the queried feature bits
291 * @return
292 * 0 on success, -1 on failure
293 */
294 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
295
296 /**
297 * Get the feature bits after negotiation
298 *
299 * @param vid
300 * Vhost device ID
301 * @param features
302 * A pointer to store the queried feature bits
303 * @return
304 * 0 on success, -1 on failure
305 */
306 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
307
308 /* Register callbacks. */
309 int rte_vhost_driver_callback_register(const char *path,
310 struct vhost_device_ops const * const ops);
311
312 /**
313 *
314 * Start the vhost-user driver.
315 *
316 * This function triggers the vhost-user negotiation.
317 *
318 * @param path
319 * The vhost-user socket file path
320 * @return
321 * 0 on success, -1 on failure
322 */
323 int rte_vhost_driver_start(const char *path);
324
325 /**
326 * Get the MTU value of the device if set in QEMU.
327 *
328 * @param vid
329 * virtio-net device ID
330 * @param mtu
331 * The variable to store the MTU value
332 *
333 * @return
334 * 0: success
335 * -EAGAIN: device not yet started
336 * -ENOTSUP: device does not support MTU feature
337 */
338 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
339
340 /**
341 * Get the numa node from which the virtio net device's memory
342 * is allocated.
343 *
344 * @param vid
345 * vhost device ID
346 *
347 * @return
348 * The numa node, -1 on failure
349 */
350 int rte_vhost_get_numa_node(int vid);
351
352 /**
353 * Get the virtio net device's ifname, which is the vhost-user socket
354 * file path.
355 *
356 * @param vid
357 * vhost device ID
358 * @param buf
359 * The buffer to stored the queried ifname
360 * @param len
361 * The length of buf
362 *
363 * @return
364 * 0 on success, -1 on failure
365 */
366 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
367
368 /**
369 * Get how many avail entries are left in the queue
370 *
371 * @param vid
372 * vhost device ID
373 * @param queue_id
374 * virtio queue index
375 *
376 * @return
377 * num of avail entires left
378 */
379 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
380
381 struct rte_mbuf;
382 struct rte_mempool;
383 /**
384 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
385 * be received from the physical port or from another virtual device. A packet
386 * count is returned to indicate the number of packets that were succesfully
387 * added to the RX queue.
388 * @param vid
389 * vhost device ID
390 * @param queue_id
391 * virtio queue index in mq case
392 * @param pkts
393 * array to contain packets to be enqueued
394 * @param count
395 * packets num to be enqueued
396 * @return
397 * num of packets enqueued
398 */
399 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
400 struct rte_mbuf **pkts, uint16_t count);
401
402 /**
403 * This function gets guest buffers from the virtio device TX virtqueue,
404 * construct host mbufs, copies guest buffer content to host mbufs and
405 * store them in pkts to be processed.
406 * @param vid
407 * vhost device ID
408 * @param queue_id
409 * virtio queue index in mq case
410 * @param mbuf_pool
411 * mbuf_pool where host mbuf is allocated.
412 * @param pkts
413 * array to contain packets to be dequeued
414 * @param count
415 * packets num to be dequeued
416 * @return
417 * num of packets dequeued
418 */
419 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
420 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
421
422 /**
423 * Get guest mem table: a list of memory regions.
424 *
425 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
426 * guest memory regions. Application should free it at destroy_device()
427 * callback.
428 *
429 * @param vid
430 * vhost device ID
431 * @param mem
432 * To store the returned mem regions
433 * @return
434 * 0 on success, -1 on failure
435 */
436 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
437
438 /**
439 * Get guest vring info, including the vring address, vring size, etc.
440 *
441 * @param vid
442 * vhost device ID
443 * @param vring_idx
444 * vring index
445 * @param vring
446 * the structure to hold the requested vring info
447 * @return
448 * 0 on success, -1 on failure
449 */
450 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
451 struct rte_vhost_vring *vring);
452
453 /**
454 * Set id of the last descriptors in avail and used guest vrings.
455 *
456 * In case user application operates directly on buffers, it should use this
457 * function on device destruction to retrieve the same values later on in device
458 * creation via rte_vhost_get_vhost_vring(int, uint16_t, struct rte_vhost_vring *)
459 *
460 * @param vid
461 * vhost device ID
462 * @param vring_idx
463 * vring index
464 * @param last_avail_idx
465 * id of the last descriptor in avail ring to be set
466 * @param last_used_idx
467 * id of the last descriptor in used ring to be set
468 * @return
469 * 0 on success, -1 on failure
470 */
471 int rte_vhost_set_vhost_vring_last_idx(int vid, uint16_t vring_idx,
472 uint16_t last_avail_idx, uint16_t last_used_idx);
473
474 #endif /* _RTE_VHOST_H_ */