]> git.proxmox.com Git - mirror_qemu.git/blob - contrib/libvhost-user/libvhost-user.h
docs/devel/testing: Fix typo in dockerfile path
[mirror_qemu.git] / contrib / libvhost-user / libvhost-user.h
1 /*
2 * Vhost User library
3 *
4 * Copyright (c) 2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
12 */
13
14 #ifndef LIBVHOST_USER_H
15 #define LIBVHOST_USER_H
16
17 #include <stdint.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <sys/poll.h>
21 #include <linux/vhost.h>
22 #include "standard-headers/linux/virtio_ring.h"
23
24 /* Based on qemu/hw/virtio/vhost-user.c */
25 #define VHOST_USER_F_PROTOCOL_FEATURES 30
26 #define VHOST_LOG_PAGE 4096
27
28 #define VHOST_MAX_NR_VIRTQUEUE 8
29 #define VIRTQUEUE_MAX_SIZE 1024
30
31 #define VHOST_MEMORY_MAX_NREGIONS 8
32
33 typedef enum VhostSetConfigType {
34 VHOST_SET_CONFIG_TYPE_MASTER = 0,
35 VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
36 } VhostSetConfigType;
37
38 /*
39 * Maximum size of virtio device config space
40 */
41 #define VHOST_USER_MAX_CONFIG_SIZE 256
42
43 enum VhostUserProtocolFeature {
44 VHOST_USER_PROTOCOL_F_MQ = 0,
45 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
46 VHOST_USER_PROTOCOL_F_RARP = 2,
47 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
48 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
49 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
50 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
51 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
52 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
53 VHOST_USER_PROTOCOL_F_CONFIG = 9,
54 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
55 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
56 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
57
58 VHOST_USER_PROTOCOL_F_MAX
59 };
60
61 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
62
63 typedef enum VhostUserRequest {
64 VHOST_USER_NONE = 0,
65 VHOST_USER_GET_FEATURES = 1,
66 VHOST_USER_SET_FEATURES = 2,
67 VHOST_USER_SET_OWNER = 3,
68 VHOST_USER_RESET_OWNER = 4,
69 VHOST_USER_SET_MEM_TABLE = 5,
70 VHOST_USER_SET_LOG_BASE = 6,
71 VHOST_USER_SET_LOG_FD = 7,
72 VHOST_USER_SET_VRING_NUM = 8,
73 VHOST_USER_SET_VRING_ADDR = 9,
74 VHOST_USER_SET_VRING_BASE = 10,
75 VHOST_USER_GET_VRING_BASE = 11,
76 VHOST_USER_SET_VRING_KICK = 12,
77 VHOST_USER_SET_VRING_CALL = 13,
78 VHOST_USER_SET_VRING_ERR = 14,
79 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
80 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
81 VHOST_USER_GET_QUEUE_NUM = 17,
82 VHOST_USER_SET_VRING_ENABLE = 18,
83 VHOST_USER_SEND_RARP = 19,
84 VHOST_USER_NET_SET_MTU = 20,
85 VHOST_USER_SET_SLAVE_REQ_FD = 21,
86 VHOST_USER_IOTLB_MSG = 22,
87 VHOST_USER_SET_VRING_ENDIAN = 23,
88 VHOST_USER_GET_CONFIG = 24,
89 VHOST_USER_SET_CONFIG = 25,
90 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
91 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
92 VHOST_USER_POSTCOPY_ADVISE = 28,
93 VHOST_USER_POSTCOPY_LISTEN = 29,
94 VHOST_USER_POSTCOPY_END = 30,
95 VHOST_USER_GET_INFLIGHT_FD = 31,
96 VHOST_USER_SET_INFLIGHT_FD = 32,
97 VHOST_USER_GPU_SET_SOCKET = 33,
98 VHOST_USER_MAX
99 } VhostUserRequest;
100
101 typedef enum VhostUserSlaveRequest {
102 VHOST_USER_SLAVE_NONE = 0,
103 VHOST_USER_SLAVE_IOTLB_MSG = 1,
104 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
105 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
106 VHOST_USER_SLAVE_MAX
107 } VhostUserSlaveRequest;
108
109 typedef struct VhostUserMemoryRegion {
110 uint64_t guest_phys_addr;
111 uint64_t memory_size;
112 uint64_t userspace_addr;
113 uint64_t mmap_offset;
114 } VhostUserMemoryRegion;
115
116 typedef struct VhostUserMemory {
117 uint32_t nregions;
118 uint32_t padding;
119 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
120 } VhostUserMemory;
121
122 typedef struct VhostUserLog {
123 uint64_t mmap_size;
124 uint64_t mmap_offset;
125 } VhostUserLog;
126
127 typedef struct VhostUserConfig {
128 uint32_t offset;
129 uint32_t size;
130 uint32_t flags;
131 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
132 } VhostUserConfig;
133
134 static VhostUserConfig c __attribute__ ((unused));
135 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
136 + sizeof(c.size) \
137 + sizeof(c.flags))
138
139 typedef struct VhostUserVringArea {
140 uint64_t u64;
141 uint64_t size;
142 uint64_t offset;
143 } VhostUserVringArea;
144
145 typedef struct VhostUserInflight {
146 uint64_t mmap_size;
147 uint64_t mmap_offset;
148 uint16_t num_queues;
149 uint16_t queue_size;
150 } VhostUserInflight;
151
152 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
153 # define VU_PACKED __attribute__((gcc_struct, packed))
154 #else
155 # define VU_PACKED __attribute__((packed))
156 #endif
157
158 typedef struct VhostUserMsg {
159 int request;
160
161 #define VHOST_USER_VERSION_MASK (0x3)
162 #define VHOST_USER_REPLY_MASK (0x1 << 2)
163 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
164 uint32_t flags;
165 uint32_t size; /* the following payload size */
166
167 union {
168 #define VHOST_USER_VRING_IDX_MASK (0xff)
169 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
170 uint64_t u64;
171 struct vhost_vring_state state;
172 struct vhost_vring_addr addr;
173 VhostUserMemory memory;
174 VhostUserLog log;
175 VhostUserConfig config;
176 VhostUserVringArea area;
177 VhostUserInflight inflight;
178 } payload;
179
180 int fds[VHOST_MEMORY_MAX_NREGIONS];
181 int fd_num;
182 uint8_t *data;
183 } VU_PACKED VhostUserMsg;
184
185 typedef struct VuDevRegion {
186 /* Guest Physical address. */
187 uint64_t gpa;
188 /* Memory region size. */
189 uint64_t size;
190 /* QEMU virtual address (userspace). */
191 uint64_t qva;
192 /* Starting offset in our mmaped space. */
193 uint64_t mmap_offset;
194 /* Start address of mmaped space. */
195 uint64_t mmap_addr;
196 } VuDevRegion;
197
198 typedef struct VuDev VuDev;
199
200 typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
201 typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
202 typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
203 int *do_reply);
204 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
205 typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
206 typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
207 typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
208 uint32_t offset, uint32_t size,
209 uint32_t flags);
210
211 typedef struct VuDevIface {
212 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
213 vu_get_features_cb get_features;
214 /* enable vhost implementation features */
215 vu_set_features_cb set_features;
216 /* get the protocol feature bitmask from the underlying vhost
217 * implementation */
218 vu_get_features_cb get_protocol_features;
219 /* enable protocol features in the underlying vhost implementation. */
220 vu_set_features_cb set_protocol_features;
221 /* process_msg is called for each vhost-user message received */
222 /* skip libvhost-user processing if return value != 0 */
223 vu_process_msg_cb process_msg;
224 /* tells when queues can be processed */
225 vu_queue_set_started_cb queue_set_started;
226 /*
227 * If the queue is processed in order, in which case it will be
228 * resumed to vring.used->idx. This can help to support resuming
229 * on unmanaged exit/crash.
230 */
231 vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
232 /* get the config space of the device */
233 vu_get_config_cb get_config;
234 /* set the config space of the device */
235 vu_set_config_cb set_config;
236 } VuDevIface;
237
238 typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
239
240 typedef struct VuRing {
241 unsigned int num;
242 struct vring_desc *desc;
243 struct vring_avail *avail;
244 struct vring_used *used;
245 uint64_t log_guest_addr;
246 uint32_t flags;
247 } VuRing;
248
249 typedef struct VuDescStateSplit {
250 /* Indicate whether this descriptor is inflight or not.
251 * Only available for head-descriptor. */
252 uint8_t inflight;
253
254 /* Padding */
255 uint8_t padding[5];
256
257 /* Maintain a list for the last batch of used descriptors.
258 * Only available when batching is used for submitting */
259 uint16_t next;
260
261 /* Used to preserve the order of fetching available descriptors.
262 * Only available for head-descriptor. */
263 uint64_t counter;
264 } VuDescStateSplit;
265
266 typedef struct VuVirtqInflight {
267 /* The feature flags of this region. Now it's initialized to 0. */
268 uint64_t features;
269
270 /* The version of this region. It's 1 currently.
271 * Zero value indicates a vm reset happened. */
272 uint16_t version;
273
274 /* The size of VuDescStateSplit array. It's equal to the virtqueue
275 * size. Slave could get it from queue size field of VhostUserInflight. */
276 uint16_t desc_num;
277
278 /* The head of list that track the last batch of used descriptors. */
279 uint16_t last_batch_head;
280
281 /* Storing the idx value of used ring */
282 uint16_t used_idx;
283
284 /* Used to track the state of each descriptor in descriptor table */
285 VuDescStateSplit desc[0];
286 } VuVirtqInflight;
287
288 typedef struct VuVirtqInflightDesc {
289 uint16_t index;
290 uint64_t counter;
291 } VuVirtqInflightDesc;
292
293 typedef struct VuVirtq {
294 VuRing vring;
295
296 VuVirtqInflight *inflight;
297
298 VuVirtqInflightDesc *resubmit_list;
299
300 uint16_t resubmit_num;
301
302 uint64_t counter;
303
304 /* Next head to pop */
305 uint16_t last_avail_idx;
306
307 /* Last avail_idx read from VQ. */
308 uint16_t shadow_avail_idx;
309
310 uint16_t used_idx;
311
312 /* Last used index value we have signalled on */
313 uint16_t signalled_used;
314
315 /* Last used index value we have signalled on */
316 bool signalled_used_valid;
317
318 /* Notification enabled? */
319 bool notification;
320
321 int inuse;
322
323 vu_queue_handler_cb handler;
324
325 int call_fd;
326 int kick_fd;
327 int err_fd;
328 unsigned int enable;
329 bool started;
330 } VuVirtq;
331
332 enum VuWatchCondtion {
333 VU_WATCH_IN = POLLIN,
334 VU_WATCH_OUT = POLLOUT,
335 VU_WATCH_PRI = POLLPRI,
336 VU_WATCH_ERR = POLLERR,
337 VU_WATCH_HUP = POLLHUP,
338 };
339
340 typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
341 typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
342 typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
343 vu_watch_cb cb, void *data);
344 typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
345
346 typedef struct VuDevInflightInfo {
347 int fd;
348 void *addr;
349 uint64_t size;
350 } VuDevInflightInfo;
351
352 struct VuDev {
353 int sock;
354 uint32_t nregions;
355 VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
356 VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
357 VuDevInflightInfo inflight_info;
358 int log_call_fd;
359 int slave_fd;
360 uint64_t log_size;
361 uint8_t *log_table;
362 uint64_t features;
363 uint64_t protocol_features;
364 bool broken;
365
366 /* @set_watch: add or update the given fd to the watch set,
367 * call cb when condition is met */
368 vu_set_watch_cb set_watch;
369
370 /* @remove_watch: remove the given fd from the watch set */
371 vu_remove_watch_cb remove_watch;
372
373 /* @panic: encountered an unrecoverable error, you may try to
374 * re-initialize */
375 vu_panic_cb panic;
376 const VuDevIface *iface;
377
378 /* Postcopy data */
379 int postcopy_ufd;
380 bool postcopy_listening;
381 };
382
383 typedef struct VuVirtqElement {
384 unsigned int index;
385 unsigned int out_num;
386 unsigned int in_num;
387 struct iovec *in_sg;
388 struct iovec *out_sg;
389 } VuVirtqElement;
390
391 /**
392 * vu_init:
393 * @dev: a VuDev context
394 * @socket: the socket connected to vhost-user master
395 * @panic: a panic callback
396 * @set_watch: a set_watch callback
397 * @remove_watch: a remove_watch callback
398 * @iface: a VuDevIface structure with vhost-user device callbacks
399 *
400 * Intializes a VuDev vhost-user context.
401 **/
402 void vu_init(VuDev *dev,
403 int socket,
404 vu_panic_cb panic,
405 vu_set_watch_cb set_watch,
406 vu_remove_watch_cb remove_watch,
407 const VuDevIface *iface);
408
409
410 /**
411 * vu_deinit:
412 * @dev: a VuDev context
413 *
414 * Cleans up the VuDev context
415 */
416 void vu_deinit(VuDev *dev);
417
418 /**
419 * vu_dispatch:
420 * @dev: a VuDev context
421 *
422 * Process one vhost-user message.
423 *
424 * Returns: TRUE on success, FALSE on failure.
425 */
426 bool vu_dispatch(VuDev *dev);
427
428 /**
429 * vu_gpa_to_va:
430 * @dev: a VuDev context
431 * @plen: guest memory size
432 * @guest_addr: guest address
433 *
434 * Translate a guest address to a pointer. Returns NULL on failure.
435 */
436 void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr);
437
438 /**
439 * vu_get_queue:
440 * @dev: a VuDev context
441 * @qidx: queue index
442 *
443 * Returns the queue number @qidx.
444 */
445 VuVirtq *vu_get_queue(VuDev *dev, int qidx);
446
447 /**
448 * vu_set_queue_handler:
449 * @dev: a VuDev context
450 * @vq: a VuVirtq queue
451 * @handler: the queue handler callback
452 *
453 * Set the queue handler. This function may be called several times
454 * for the same queue. If called with NULL @handler, the handler is
455 * removed.
456 */
457 void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
458 vu_queue_handler_cb handler);
459
460 /**
461 * vu_set_queue_host_notifier:
462 * @dev: a VuDev context
463 * @vq: a VuVirtq queue
464 * @fd: a file descriptor
465 * @size: host page size
466 * @offset: notifier offset in @fd file
467 *
468 * Set queue's host notifier. This function may be called several
469 * times for the same queue. If called with -1 @fd, the notifier
470 * is removed.
471 */
472 bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
473 int size, int offset);
474
475 /**
476 * vu_queue_set_notification:
477 * @dev: a VuDev context
478 * @vq: a VuVirtq queue
479 * @enable: state
480 *
481 * Set whether the queue notifies (via event index or interrupt)
482 */
483 void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
484
485 /**
486 * vu_queue_enabled:
487 * @dev: a VuDev context
488 * @vq: a VuVirtq queue
489 *
490 * Returns: whether the queue is enabled.
491 */
492 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
493
494 /**
495 * vu_queue_started:
496 * @dev: a VuDev context
497 * @vq: a VuVirtq queue
498 *
499 * Returns: whether the queue is started.
500 */
501 bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
502
503 /**
504 * vu_queue_empty:
505 * @dev: a VuDev context
506 * @vq: a VuVirtq queue
507 *
508 * Returns: true if the queue is empty or not ready.
509 */
510 bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
511
512 /**
513 * vu_queue_notify:
514 * @dev: a VuDev context
515 * @vq: a VuVirtq queue
516 *
517 * Request to notify the queue via callfd (skipped if unnecessary)
518 */
519 void vu_queue_notify(VuDev *dev, VuVirtq *vq);
520
521 /**
522 * vu_queue_pop:
523 * @dev: a VuDev context
524 * @vq: a VuVirtq queue
525 * @sz: the size of struct to return (must be >= VuVirtqElement)
526 *
527 * Returns: a VuVirtqElement filled from the queue or NULL. The
528 * returned element must be free()-d by the caller.
529 */
530 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
531
532
533 /**
534 * vu_queue_unpop:
535 * @dev: a VuDev context
536 * @vq: a VuVirtq queue
537 * @elem: The #VuVirtqElement
538 * @len: number of bytes written
539 *
540 * Pretend the most recent element wasn't popped from the virtqueue. The next
541 * call to vu_queue_pop() will refetch the element.
542 */
543 void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
544 size_t len);
545
546 /**
547 * vu_queue_rewind:
548 * @dev: a VuDev context
549 * @vq: a VuVirtq queue
550 * @num: number of elements to push back
551 *
552 * Pretend that elements weren't popped from the virtqueue. The next
553 * virtqueue_pop() will refetch the oldest element.
554 *
555 * Returns: true on success, false if @num is greater than the number of in use
556 * elements.
557 */
558 bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
559
560 /**
561 * vu_queue_fill:
562 * @dev: a VuDev context
563 * @vq: a VuVirtq queue
564 * @elem: a VuVirtqElement
565 * @len: length in bytes to write
566 * @idx: optional offset for the used ring index (0 in general)
567 *
568 * Fill the used ring with @elem element.
569 */
570 void vu_queue_fill(VuDev *dev, VuVirtq *vq,
571 const VuVirtqElement *elem,
572 unsigned int len, unsigned int idx);
573
574 /**
575 * vu_queue_push:
576 * @dev: a VuDev context
577 * @vq: a VuVirtq queue
578 * @elem: a VuVirtqElement
579 * @len: length in bytes to write
580 *
581 * Helper that combines vu_queue_fill() with a vu_queue_flush().
582 */
583 void vu_queue_push(VuDev *dev, VuVirtq *vq,
584 const VuVirtqElement *elem, unsigned int len);
585
586 /**
587 * vu_queue_flush:
588 * @dev: a VuDev context
589 * @vq: a VuVirtq queue
590 * @num: number of elements to flush
591 *
592 * Mark the last number of elements as done (used.idx is updated by
593 * num elements).
594 */
595 void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
596
597 /**
598 * vu_queue_get_avail_bytes:
599 * @dev: a VuDev context
600 * @vq: a VuVirtq queue
601 * @in_bytes: in bytes
602 * @out_bytes: out bytes
603 * @max_in_bytes: stop counting after max_in_bytes
604 * @max_out_bytes: stop counting after max_out_bytes
605 *
606 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
607 */
608 void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
609 unsigned int *out_bytes,
610 unsigned max_in_bytes, unsigned max_out_bytes);
611
612 /**
613 * vu_queue_avail_bytes:
614 * @dev: a VuDev context
615 * @vq: a VuVirtq queue
616 * @in_bytes: expected in bytes
617 * @out_bytes: expected out bytes
618 *
619 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
620 */
621 bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
622 unsigned int out_bytes);
623
624 #endif /* LIBVHOST_USER_H */