]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/lib/librte_vhost/rte_vhost.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / dpdk / lib / librte_vhost / rte_vhost.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
3 */
4
5 #ifndef _RTE_VHOST_H_
6 #define _RTE_VHOST_H_
7
8 /**
9 * @file
10 * Interface to vhost-user
11 */
12
13 #include <stdint.h>
14 #include <sys/eventfd.h>
15
16 #include <rte_memory.h>
17 #include <rte_mempool.h>
18
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22
23 /* These are not C++-aware. */
24 #include <linux/vhost.h>
25 #include <linux/virtio_ring.h>
26
27 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
28 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
29 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
30 #define RTE_VHOST_USER_IOMMU_SUPPORT (1ULL << 3)
31 #define RTE_VHOST_USER_POSTCOPY_SUPPORT (1ULL << 4)
32
33 /** Protocol features. */
34 #ifndef VHOST_USER_PROTOCOL_F_MQ
35 #define VHOST_USER_PROTOCOL_F_MQ 0
36 #endif
37
38 #ifndef VHOST_USER_PROTOCOL_F_LOG_SHMFD
39 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
40 #endif
41
42 #ifndef VHOST_USER_PROTOCOL_F_RARP
43 #define VHOST_USER_PROTOCOL_F_RARP 2
44 #endif
45
46 #ifndef VHOST_USER_PROTOCOL_F_REPLY_ACK
47 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3
48 #endif
49
50 #ifndef VHOST_USER_PROTOCOL_F_NET_MTU
51 #define VHOST_USER_PROTOCOL_F_NET_MTU 4
52 #endif
53
54 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_REQ
55 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5
56 #endif
57
58 #ifndef VHOST_USER_PROTOCOL_F_CRYPTO_SESSION
59 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
60 #endif
61
62 #ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
63 #define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
64 #endif
65
66 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
67 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
68 #endif
69
70 #ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
71 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
72 #endif
73
74 /** Indicate whether protocol features negotiation is supported. */
75 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
76 #define VHOST_USER_F_PROTOCOL_FEATURES 30
77 #endif
78
79 /**
80 * Information relating to memory regions including offsets to
81 * addresses in QEMUs memory file.
82 */
83 struct rte_vhost_mem_region {
84 uint64_t guest_phys_addr;
85 uint64_t guest_user_addr;
86 uint64_t host_user_addr;
87 uint64_t size;
88 void *mmap_addr;
89 uint64_t mmap_size;
90 int fd;
91 };
92
93 /**
94 * Memory structure includes region and mapping information.
95 */
96 struct rte_vhost_memory {
97 uint32_t nregions;
98 struct rte_vhost_mem_region regions[];
99 };
100
101 struct rte_vhost_vring {
102 struct vring_desc *desc;
103 struct vring_avail *avail;
104 struct vring_used *used;
105 uint64_t log_guest_addr;
106
107 /** Deprecated, use rte_vhost_vring_call() instead. */
108 int callfd;
109
110 int kickfd;
111 uint16_t size;
112 };
113
114 /**
115 * Possible results of the vhost user message handling callbacks
116 */
117 enum rte_vhost_msg_result {
118 /* Message handling failed */
119 RTE_VHOST_MSG_RESULT_ERR = -1,
120 /* Message handling successful */
121 RTE_VHOST_MSG_RESULT_OK = 0,
122 /* Message handling successful and reply prepared */
123 RTE_VHOST_MSG_RESULT_REPLY = 1,
124 /* Message not handled */
125 RTE_VHOST_MSG_RESULT_NOT_HANDLED,
126 };
127
128 /**
129 * Function prototype for the vhost backend to handle specific vhost user
130 * messages.
131 *
132 * @param vid
133 * vhost device id
134 * @param msg
135 * Message pointer.
136 * @return
137 * RTE_VHOST_MSG_RESULT_OK on success,
138 * RTE_VHOST_MSG_RESULT_REPLY on success with reply,
139 * RTE_VHOST_MSG_RESULT_ERR on failure,
140 * RTE_VHOST_MSG_RESULT_NOT_HANDLED if message was not handled.
141 */
142 typedef enum rte_vhost_msg_result (*rte_vhost_msg_handle)(int vid, void *msg);
143
144 /**
145 * Optional vhost user message handlers.
146 */
147 struct rte_vhost_user_extern_ops {
148 /* Called prior to the master message handling. */
149 rte_vhost_msg_handle pre_msg_handle;
150 /* Called after the master message handling. */
151 rte_vhost_msg_handle post_msg_handle;
152 };
153
154 /**
155 * Device and vring operations.
156 */
157 struct vhost_device_ops {
158 int (*new_device)(int vid); /**< Add device. */
159 void (*destroy_device)(int vid); /**< Remove device. */
160
161 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
162
163 /**
164 * Features could be changed after the feature negotiation.
165 * For example, VHOST_F_LOG_ALL will be set/cleared at the
166 * start/end of live migration, respectively. This callback
167 * is used to inform the application on such change.
168 */
169 int (*features_changed)(int vid, uint64_t features);
170
171 int (*new_connection)(int vid);
172 void (*destroy_connection)(int vid);
173
174 void *reserved[2]; /**< Reserved for future extension */
175 };
176
177 /**
178 * Convert guest physical address to host virtual address
179 *
180 * This function is deprecated because unsafe.
181 * New rte_vhost_va_from_guest_pa() should be used instead to ensure
182 * guest physical ranges are fully and contiguously mapped into
183 * process virtual address space.
184 *
185 * @param mem
186 * the guest memory regions
187 * @param gpa
188 * the guest physical address for querying
189 * @return
190 * the host virtual address on success, 0 on failure
191 */
192 __rte_deprecated
193 static __rte_always_inline uint64_t
194 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
195 {
196 struct rte_vhost_mem_region *reg;
197 uint32_t i;
198
199 for (i = 0; i < mem->nregions; i++) {
200 reg = &mem->regions[i];
201 if (gpa >= reg->guest_phys_addr &&
202 gpa < reg->guest_phys_addr + reg->size) {
203 return gpa - reg->guest_phys_addr +
204 reg->host_user_addr;
205 }
206 }
207
208 return 0;
209 }
210
211 /**
212 * Convert guest physical address to host virtual address safely
213 *
214 * This variant of rte_vhost_gpa_to_vva() takes care all the
215 * requested length is mapped and contiguous in process address
216 * space.
217 *
218 * @param mem
219 * the guest memory regions
220 * @param gpa
221 * the guest physical address for querying
222 * @param len
223 * the size of the requested area to map, updated with actual size mapped
224 * @return
225 * the host virtual address on success, 0 on failure
226 */
227 static __rte_always_inline uint64_t
228 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
229 uint64_t gpa, uint64_t *len)
230 {
231 struct rte_vhost_mem_region *r;
232 uint32_t i;
233
234 for (i = 0; i < mem->nregions; i++) {
235 r = &mem->regions[i];
236 if (gpa >= r->guest_phys_addr &&
237 gpa < r->guest_phys_addr + r->size) {
238
239 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
240 *len = r->guest_phys_addr + r->size - gpa;
241
242 return gpa - r->guest_phys_addr +
243 r->host_user_addr;
244 }
245 }
246 *len = 0;
247
248 return 0;
249 }
250
251 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
252
253 /**
254 * Log the memory write start with given address.
255 *
256 * This function only need be invoked when the live migration starts.
257 * Therefore, we won't need call it at all in the most of time. For
258 * making the performance impact be minimum, it's suggested to do a
259 * check before calling it:
260 *
261 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
262 * rte_vhost_log_write(vid, addr, len);
263 *
264 * @param vid
265 * vhost device ID
266 * @param addr
267 * the starting address for write
268 * @param len
269 * the length to write
270 */
271 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
272
273 /**
274 * Log the used ring update start at given offset.
275 *
276 * Same as rte_vhost_log_write, it's suggested to do a check before
277 * calling it:
278 *
279 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
280 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
281 *
282 * @param vid
283 * vhost device ID
284 * @param vring_idx
285 * the vring index
286 * @param offset
287 * the offset inside the used ring
288 * @param len
289 * the length to write
290 */
291 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
292 uint64_t offset, uint64_t len);
293
294 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
295
296 /**
297 * Register vhost driver. path could be different for multiple
298 * instance support.
299 */
300 int rte_vhost_driver_register(const char *path, uint64_t flags);
301
302 /* Unregister vhost driver. This is only meaningful to vhost user. */
303 int rte_vhost_driver_unregister(const char *path);
304
305 /**
306 * Set the vdpa device id, enforce single connection per socket
307 *
308 * @param path
309 * The vhost-user socket file path
310 * @param did
311 * Device id
312 * @return
313 * 0 on success, -1 on failure
314 */
315 int __rte_experimental
316 rte_vhost_driver_attach_vdpa_device(const char *path, int did);
317
318 /**
319 * Unset the vdpa device id
320 *
321 * @param path
322 * The vhost-user socket file path
323 * @return
324 * 0 on success, -1 on failure
325 */
326 int __rte_experimental
327 rte_vhost_driver_detach_vdpa_device(const char *path);
328
329 /**
330 * Get the device id
331 *
332 * @param path
333 * The vhost-user socket file path
334 * @return
335 * Device id, -1 on failure
336 */
337 int __rte_experimental
338 rte_vhost_driver_get_vdpa_device_id(const char *path);
339
340 /**
341 * Set the feature bits the vhost-user driver supports.
342 *
343 * @param path
344 * The vhost-user socket file path
345 * @param features
346 * Supported features
347 * @return
348 * 0 on success, -1 on failure
349 */
350 int rte_vhost_driver_set_features(const char *path, uint64_t features);
351
352 /**
353 * Enable vhost-user driver features.
354 *
355 * Note that
356 * - the param features should be a subset of the feature bits provided
357 * by rte_vhost_driver_set_features().
358 * - it must be invoked before vhost-user negotiation starts.
359 *
360 * @param path
361 * The vhost-user socket file path
362 * @param features
363 * Features to enable
364 * @return
365 * 0 on success, -1 on failure
366 */
367 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
368
369 /**
370 * Disable vhost-user driver features.
371 *
372 * The two notes at rte_vhost_driver_enable_features() also apply here.
373 *
374 * @param path
375 * The vhost-user socket file path
376 * @param features
377 * Features to disable
378 * @return
379 * 0 on success, -1 on failure
380 */
381 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
382
383 /**
384 * Get the feature bits before feature negotiation.
385 *
386 * @param path
387 * The vhost-user socket file path
388 * @param features
389 * A pointer to store the queried feature bits
390 * @return
391 * 0 on success, -1 on failure
392 */
393 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
394
395 /**
396 * Set the protocol feature bits before feature negotiation.
397 *
398 * @param path
399 * The vhost-user socket file path
400 * @param protocol_features
401 * Supported protocol features
402 * @return
403 * 0 on success, -1 on failure
404 */
405 int __rte_experimental
406 rte_vhost_driver_set_protocol_features(const char *path,
407 uint64_t protocol_features);
408
409 /**
410 * Get the protocol feature bits before feature negotiation.
411 *
412 * @param path
413 * The vhost-user socket file path
414 * @param protocol_features
415 * A pointer to store the queried protocol feature bits
416 * @return
417 * 0 on success, -1 on failure
418 */
419 int __rte_experimental
420 rte_vhost_driver_get_protocol_features(const char *path,
421 uint64_t *protocol_features);
422
423 /**
424 * Get the queue number bits before feature negotiation.
425 *
426 * @param path
427 * The vhost-user socket file path
428 * @param queue_num
429 * A pointer to store the queried queue number bits
430 * @return
431 * 0 on success, -1 on failure
432 */
433 int __rte_experimental
434 rte_vhost_driver_get_queue_num(const char *path, uint32_t *queue_num);
435
436 /**
437 * Get the feature bits after negotiation
438 *
439 * @param vid
440 * Vhost device ID
441 * @param features
442 * A pointer to store the queried feature bits
443 * @return
444 * 0 on success, -1 on failure
445 */
446 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
447
448 /* Register callbacks. */
449 int rte_vhost_driver_callback_register(const char *path,
450 struct vhost_device_ops const * const ops);
451
452 /**
453 *
454 * Start the vhost-user driver.
455 *
456 * This function triggers the vhost-user negotiation.
457 *
458 * @param path
459 * The vhost-user socket file path
460 * @return
461 * 0 on success, -1 on failure
462 */
463 int rte_vhost_driver_start(const char *path);
464
465 /**
466 * Get the MTU value of the device if set in QEMU.
467 *
468 * @param vid
469 * virtio-net device ID
470 * @param mtu
471 * The variable to store the MTU value
472 *
473 * @return
474 * 0: success
475 * -EAGAIN: device not yet started
476 * -ENOTSUP: device does not support MTU feature
477 */
478 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
479
480 /**
481 * Get the numa node from which the virtio net device's memory
482 * is allocated.
483 *
484 * @param vid
485 * vhost device ID
486 *
487 * @return
488 * The numa node, -1 on failure
489 */
490 int rte_vhost_get_numa_node(int vid);
491
492 /**
493 * @deprecated
494 * Get the number of queues the device supports.
495 *
496 * Note this function is deprecated, as it returns a queue pair number,
497 * which is vhost specific. Instead, rte_vhost_get_vring_num should
498 * be used.
499 *
500 * @param vid
501 * vhost device ID
502 *
503 * @return
504 * The number of queues, 0 on failure
505 */
506 __rte_deprecated
507 uint32_t rte_vhost_get_queue_num(int vid);
508
509 /**
510 * Get the number of vrings the device supports.
511 *
512 * @param vid
513 * vhost device ID
514 *
515 * @return
516 * The number of vrings, 0 on failure
517 */
518 uint16_t rte_vhost_get_vring_num(int vid);
519
520 /**
521 * Get the virtio net device's ifname, which is the vhost-user socket
522 * file path.
523 *
524 * @param vid
525 * vhost device ID
526 * @param buf
527 * The buffer to stored the queried ifname
528 * @param len
529 * The length of buf
530 *
531 * @return
532 * 0 on success, -1 on failure
533 */
534 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
535
536 /**
537 * Get how many avail entries are left in the queue
538 *
539 * @param vid
540 * vhost device ID
541 * @param queue_id
542 * virtio queue index
543 *
544 * @return
545 * num of avail entries left
546 */
547 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
548
549 struct rte_mbuf;
550 struct rte_mempool;
551 /**
552 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
553 * be received from the physical port or from another virtual device. A packet
554 * count is returned to indicate the number of packets that were successfully
555 * added to the RX queue.
556 * @param vid
557 * vhost device ID
558 * @param queue_id
559 * virtio queue index in mq case
560 * @param pkts
561 * array to contain packets to be enqueued
562 * @param count
563 * packets num to be enqueued
564 * @return
565 * num of packets enqueued
566 */
567 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
568 struct rte_mbuf **pkts, uint16_t count);
569
570 /**
571 * This function gets guest buffers from the virtio device TX virtqueue,
572 * construct host mbufs, copies guest buffer content to host mbufs and
573 * store them in pkts to be processed.
574 * @param vid
575 * vhost device ID
576 * @param queue_id
577 * virtio queue index in mq case
578 * @param mbuf_pool
579 * mbuf_pool where host mbuf is allocated.
580 * @param pkts
581 * array to contain packets to be dequeued
582 * @param count
583 * packets num to be dequeued
584 * @return
585 * num of packets dequeued
586 */
587 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
588 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
589
590 /**
591 * Get guest mem table: a list of memory regions.
592 *
593 * An rte_vhost_vhost_memory object will be allocated internally, to hold the
594 * guest memory regions. Application should free it at destroy_device()
595 * callback.
596 *
597 * @param vid
598 * vhost device ID
599 * @param mem
600 * To store the returned mem regions
601 * @return
602 * 0 on success, -1 on failure
603 */
604 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
605
606 /**
607 * Get guest vring info, including the vring address, vring size, etc.
608 *
609 * @param vid
610 * vhost device ID
611 * @param vring_idx
612 * vring index
613 * @param vring
614 * the structure to hold the requested vring info
615 * @return
616 * 0 on success, -1 on failure
617 */
618 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
619 struct rte_vhost_vring *vring);
620
621 /**
622 * Notify the guest that used descriptors have been added to the vring. This
623 * function acts as a memory barrier.
624 *
625 * @param vid
626 * vhost device ID
627 * @param vring_idx
628 * vring index
629 * @return
630 * 0 on success, -1 on failure
631 */
632 int rte_vhost_vring_call(int vid, uint16_t vring_idx);
633
634 /**
635 * Get vhost RX queue avail count.
636 *
637 * @param vid
638 * vhost device ID
639 * @param qid
640 * virtio queue index in mq case
641 * @return
642 * num of desc available
643 */
644 uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
645
646 /**
647 * Get log base and log size of the vhost device
648 *
649 * @param vid
650 * vhost device ID
651 * @param log_base
652 * vhost log base
653 * @param log_size
654 * vhost log size
655 * @return
656 * 0 on success, -1 on failure
657 */
658 int __rte_experimental
659 rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
660
661 /**
662 * Get last_avail/used_idx of the vhost virtqueue
663 *
664 * @param vid
665 * vhost device ID
666 * @param queue_id
667 * vhost queue index
668 * @param last_avail_idx
669 * vhost last_avail_idx to get
670 * @param last_used_idx
671 * vhost last_used_idx to get
672 * @return
673 * 0 on success, -1 on failure
674 */
675 int __rte_experimental
676 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
677 uint16_t *last_avail_idx, uint16_t *last_used_idx);
678
679 /**
680 * Set last_avail/used_idx of the vhost virtqueue
681 *
682 * @param vid
683 * vhost device ID
684 * @param queue_id
685 * vhost queue index
686 * @param last_avail_idx
687 * last_avail_idx to set
688 * @param last_used_idx
689 * last_used_idx to set
690 * @return
691 * 0 on success, -1 on failure
692 */
693 int __rte_experimental
694 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
695 uint16_t last_avail_idx, uint16_t last_used_idx);
696
697 /**
698 * Register external message handling callbacks
699 *
700 * @param vid
701 * vhost device ID
702 * @param ops
703 * virtio external callbacks to register
704 * @param ctx
705 * additional context passed to the callbacks
706 * @return
707 * 0 on success, -1 on failure
708 */
709 int __rte_experimental
710 rte_vhost_extern_callback_register(int vid,
711 struct rte_vhost_user_extern_ops const * const ops, void *ctx);
712
713 /**
714 * Get vdpa device id for vhost device.
715 *
716 * @param vid
717 * vhost device id
718 * @return
719 * device id
720 */
721 int __rte_experimental
722 rte_vhost_get_vdpa_device_id(int vid);
723
724 #ifdef __cplusplus
725 }
726 #endif
727
728 #endif /* _RTE_VHOST_H_ */