]> git.proxmox.com Git - mirror_qemu.git/blame - contrib/libvhost-user/libvhost-user.h
nbd/server: Trace client noncompliance on unaligned requests
[mirror_qemu.git] / contrib / libvhost-user / libvhost-user.h
CommitLineData
7b2e5c65
MAL
1/*
2 * Vhost User library
3 *
4 * Copyright (c) 2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
12 */
13
14#ifndef LIBVHOST_USER_H
15#define LIBVHOST_USER_H
16
17#include <stdint.h>
18#include <stdbool.h>
19#include <stddef.h>
49cc0340 20#include <sys/poll.h>
7b2e5c65
MAL
21#include <linux/vhost.h>
22#include "standard-headers/linux/virtio_ring.h"
23
24/* Based on qemu/hw/virtio/vhost-user.c */
25#define VHOST_USER_F_PROTOCOL_FEATURES 30
26#define VHOST_LOG_PAGE 4096
27
28#define VHOST_MAX_NR_VIRTQUEUE 8
29#define VIRTQUEUE_MAX_SIZE 1024
30
31#define VHOST_MEMORY_MAX_NREGIONS 8
32
0bc24d83
CL
33typedef enum VhostSetConfigType {
34 VHOST_SET_CONFIG_TYPE_MASTER = 0,
35 VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
36} VhostSetConfigType;
37
38/*
39 * Maximum size of virtio device config space
40 */
41#define VHOST_USER_MAX_CONFIG_SIZE 256
42
7b2e5c65
MAL
43enum VhostUserProtocolFeature {
44 VHOST_USER_PROTOCOL_F_MQ = 0,
45 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
46 VHOST_USER_PROTOCOL_F_RARP = 2,
ea642e22
DDAG
47 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
48 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
49 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
50 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
9ccbfe14
DDAG
51 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
52 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
ea3e6f5a 53 VHOST_USER_PROTOCOL_F_CONFIG = 9,
d84599f5
TB
54 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
55 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
5f9ff1ef 56 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
7b2e5c65
MAL
57
58 VHOST_USER_PROTOCOL_F_MAX
59};
60
61#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
62
63typedef enum VhostUserRequest {
64 VHOST_USER_NONE = 0,
65 VHOST_USER_GET_FEATURES = 1,
66 VHOST_USER_SET_FEATURES = 2,
67 VHOST_USER_SET_OWNER = 3,
68 VHOST_USER_RESET_OWNER = 4,
69 VHOST_USER_SET_MEM_TABLE = 5,
70 VHOST_USER_SET_LOG_BASE = 6,
71 VHOST_USER_SET_LOG_FD = 7,
72 VHOST_USER_SET_VRING_NUM = 8,
73 VHOST_USER_SET_VRING_ADDR = 9,
74 VHOST_USER_SET_VRING_BASE = 10,
75 VHOST_USER_GET_VRING_BASE = 11,
76 VHOST_USER_SET_VRING_KICK = 12,
77 VHOST_USER_SET_VRING_CALL = 13,
78 VHOST_USER_SET_VRING_ERR = 14,
79 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
80 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
81 VHOST_USER_GET_QUEUE_NUM = 17,
82 VHOST_USER_SET_VRING_ENABLE = 18,
83 VHOST_USER_SEND_RARP = 19,
ea642e22
DDAG
84 VHOST_USER_NET_SET_MTU = 20,
85 VHOST_USER_SET_SLAVE_REQ_FD = 21,
86 VHOST_USER_IOTLB_MSG = 22,
87 VHOST_USER_SET_VRING_ENDIAN = 23,
0bc24d83
CL
88 VHOST_USER_GET_CONFIG = 24,
89 VHOST_USER_SET_CONFIG = 25,
d3dff7a5
DDAG
90 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
91 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
92 VHOST_USER_POSTCOPY_ADVISE = 28,
6864a7b5 93 VHOST_USER_POSTCOPY_LISTEN = 29,
c639187e 94 VHOST_USER_POSTCOPY_END = 30,
5f9ff1ef
XY
95 VHOST_USER_GET_INFLIGHT_FD = 31,
96 VHOST_USER_SET_INFLIGHT_FD = 32,
7b2e5c65
MAL
97 VHOST_USER_MAX
98} VhostUserRequest;
99
d84599f5
TB
100typedef enum VhostUserSlaveRequest {
101 VHOST_USER_SLAVE_NONE = 0,
102 VHOST_USER_SLAVE_IOTLB_MSG = 1,
103 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
104 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
105 VHOST_USER_SLAVE_MAX
106} VhostUserSlaveRequest;
107
7b2e5c65
MAL
108typedef struct VhostUserMemoryRegion {
109 uint64_t guest_phys_addr;
110 uint64_t memory_size;
111 uint64_t userspace_addr;
112 uint64_t mmap_offset;
113} VhostUserMemoryRegion;
114
115typedef struct VhostUserMemory {
116 uint32_t nregions;
117 uint32_t padding;
118 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
119} VhostUserMemory;
120
121typedef struct VhostUserLog {
122 uint64_t mmap_size;
123 uint64_t mmap_offset;
124} VhostUserLog;
125
0bc24d83
CL
126typedef struct VhostUserConfig {
127 uint32_t offset;
128 uint32_t size;
129 uint32_t flags;
130 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
131} VhostUserConfig;
132
133static VhostUserConfig c __attribute__ ((unused));
134#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
135 + sizeof(c.size) \
136 + sizeof(c.flags))
137
d84599f5
TB
138typedef struct VhostUserVringArea {
139 uint64_t u64;
140 uint64_t size;
141 uint64_t offset;
142} VhostUserVringArea;
143
5f9ff1ef
XY
144typedef struct VhostUserInflight {
145 uint64_t mmap_size;
146 uint64_t mmap_offset;
147 uint16_t num_queues;
148 uint16_t queue_size;
149} VhostUserInflight;
150
7b2e5c65
MAL
151#if defined(_WIN32)
152# define VU_PACKED __attribute__((gcc_struct, packed))
153#else
154# define VU_PACKED __attribute__((packed))
155#endif
156
157typedef struct VhostUserMsg {
ba275e9d 158 int request;
7b2e5c65
MAL
159
160#define VHOST_USER_VERSION_MASK (0x3)
161#define VHOST_USER_REPLY_MASK (0x1 << 2)
d84599f5 162#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
7b2e5c65
MAL
163 uint32_t flags;
164 uint32_t size; /* the following payload size */
165
166 union {
167#define VHOST_USER_VRING_IDX_MASK (0xff)
168#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
169 uint64_t u64;
170 struct vhost_vring_state state;
171 struct vhost_vring_addr addr;
172 VhostUserMemory memory;
173 VhostUserLog log;
0bc24d83 174 VhostUserConfig config;
d84599f5 175 VhostUserVringArea area;
5f9ff1ef 176 VhostUserInflight inflight;
7b2e5c65
MAL
177 } payload;
178
179 int fds[VHOST_MEMORY_MAX_NREGIONS];
180 int fd_num;
181 uint8_t *data;
182} VU_PACKED VhostUserMsg;
183
184typedef struct VuDevRegion {
185 /* Guest Physical address. */
186 uint64_t gpa;
187 /* Memory region size. */
188 uint64_t size;
189 /* QEMU virtual address (userspace). */
190 uint64_t qva;
191 /* Starting offset in our mmaped space. */
192 uint64_t mmap_offset;
193 /* Start address of mmaped space. */
194 uint64_t mmap_addr;
195} VuDevRegion;
196
197typedef struct VuDev VuDev;
198
199typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
200typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
201typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
202 int *do_reply);
203typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
35480cbf 204typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
0bc24d83
CL
205typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
206typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
207 uint32_t offset, uint32_t size,
208 uint32_t flags);
7b2e5c65
MAL
209
210typedef struct VuDevIface {
211 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
212 vu_get_features_cb get_features;
213 /* enable vhost implementation features */
214 vu_set_features_cb set_features;
215 /* get the protocol feature bitmask from the underlying vhost
216 * implementation */
217 vu_get_features_cb get_protocol_features;
218 /* enable protocol features in the underlying vhost implementation. */
219 vu_set_features_cb set_protocol_features;
220 /* process_msg is called for each vhost-user message received */
221 /* skip libvhost-user processing if return value != 0 */
222 vu_process_msg_cb process_msg;
223 /* tells when queues can be processed */
224 vu_queue_set_started_cb queue_set_started;
35480cbf
MAL
225 /*
226 * If the queue is processed in order, in which case it will be
227 * resumed to vring.used->idx. This can help to support resuming
228 * on unmanaged exit/crash.
229 */
230 vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
0bc24d83
CL
231 /* get the config space of the device */
232 vu_get_config_cb get_config;
233 /* set the config space of the device */
234 vu_set_config_cb set_config;
7b2e5c65
MAL
235} VuDevIface;
236
237typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
238
239typedef struct VuRing {
240 unsigned int num;
241 struct vring_desc *desc;
242 struct vring_avail *avail;
243 struct vring_used *used;
244 uint64_t log_guest_addr;
245 uint32_t flags;
246} VuRing;
247
5f9ff1ef
XY
248typedef struct VuDescStateSplit {
249 /* Indicate whether this descriptor is inflight or not.
250 * Only available for head-descriptor. */
251 uint8_t inflight;
252
253 /* Padding */
254 uint8_t padding[5];
255
256 /* Maintain a list for the last batch of used descriptors.
257 * Only available when batching is used for submitting */
258 uint16_t next;
259
260 /* Used to preserve the order of fetching available descriptors.
261 * Only available for head-descriptor. */
262 uint64_t counter;
263} VuDescStateSplit;
264
265typedef struct VuVirtqInflight {
266 /* The feature flags of this region. Now it's initialized to 0. */
267 uint64_t features;
268
269 /* The version of this region. It's 1 currently.
270 * Zero value indicates a vm reset happened. */
271 uint16_t version;
272
273 /* The size of VuDescStateSplit array. It's equal to the virtqueue
274 * size. Slave could get it from queue size field of VhostUserInflight. */
275 uint16_t desc_num;
276
277 /* The head of list that track the last batch of used descriptors. */
278 uint16_t last_batch_head;
279
280 /* Storing the idx value of used ring */
281 uint16_t used_idx;
282
283 /* Used to track the state of each descriptor in descriptor table */
284 VuDescStateSplit desc[0];
285} VuVirtqInflight;
286
287typedef struct VuVirtqInflightDesc {
288 uint16_t index;
289 uint64_t counter;
290} VuVirtqInflightDesc;
291
7b2e5c65
MAL
292typedef struct VuVirtq {
293 VuRing vring;
294
5f9ff1ef
XY
295 VuVirtqInflight *inflight;
296
297 VuVirtqInflightDesc *resubmit_list;
298
299 uint16_t resubmit_num;
300
301 uint64_t counter;
302
7b2e5c65
MAL
303 /* Next head to pop */
304 uint16_t last_avail_idx;
305
306 /* Last avail_idx read from VQ. */
307 uint16_t shadow_avail_idx;
308
309 uint16_t used_idx;
310
311 /* Last used index value we have signalled on */
312 uint16_t signalled_used;
313
314 /* Last used index value we have signalled on */
315 bool signalled_used_valid;
316
317 /* Notification enabled? */
318 bool notification;
319
320 int inuse;
321
322 vu_queue_handler_cb handler;
323
324 int call_fd;
325 int kick_fd;
326 int err_fd;
327 unsigned int enable;
328 bool started;
329} VuVirtq;
330
331enum VuWatchCondtion {
49cc0340
FF
332 VU_WATCH_IN = POLLIN,
333 VU_WATCH_OUT = POLLOUT,
334 VU_WATCH_PRI = POLLPRI,
335 VU_WATCH_ERR = POLLERR,
336 VU_WATCH_HUP = POLLHUP,
7b2e5c65
MAL
337};
338
339typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
340typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
341typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
342 vu_watch_cb cb, void *data);
343typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
344
5f9ff1ef
XY
345typedef struct VuDevInflightInfo {
346 int fd;
347 void *addr;
348 uint64_t size;
349} VuDevInflightInfo;
350
7b2e5c65
MAL
351struct VuDev {
352 int sock;
353 uint32_t nregions;
354 VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
355 VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
5f9ff1ef 356 VuDevInflightInfo inflight_info;
7b2e5c65 357 int log_call_fd;
13384f15 358 int slave_fd;
7b2e5c65
MAL
359 uint64_t log_size;
360 uint8_t *log_table;
361 uint64_t features;
362 uint64_t protocol_features;
363 bool broken;
364
365 /* @set_watch: add or update the given fd to the watch set,
366 * call cb when condition is met */
367 vu_set_watch_cb set_watch;
368
369 /* @remove_watch: remove the given fd from the watch set */
370 vu_remove_watch_cb remove_watch;
371
372 /* @panic: encountered an unrecoverable error, you may try to
373 * re-initialize */
374 vu_panic_cb panic;
375 const VuDevIface *iface;
2a84ffc0
DDAG
376
377 /* Postcopy data */
378 int postcopy_ufd;
6864a7b5 379 bool postcopy_listening;
7b2e5c65
MAL
380};
381
382typedef struct VuVirtqElement {
383 unsigned int index;
384 unsigned int out_num;
385 unsigned int in_num;
386 struct iovec *in_sg;
387 struct iovec *out_sg;
388} VuVirtqElement;
389
390/**
391 * vu_init:
392 * @dev: a VuDev context
393 * @socket: the socket connected to vhost-user master
394 * @panic: a panic callback
395 * @set_watch: a set_watch callback
396 * @remove_watch: a remove_watch callback
397 * @iface: a VuDevIface structure with vhost-user device callbacks
398 *
399 * Intializes a VuDev vhost-user context.
400 **/
401void vu_init(VuDev *dev,
402 int socket,
403 vu_panic_cb panic,
404 vu_set_watch_cb set_watch,
405 vu_remove_watch_cb remove_watch,
406 const VuDevIface *iface);
407
408
409/**
410 * vu_deinit:
411 * @dev: a VuDev context
412 *
413 * Cleans up the VuDev context
414 */
415void vu_deinit(VuDev *dev);
416
417/**
418 * vu_dispatch:
419 * @dev: a VuDev context
420 *
421 * Process one vhost-user message.
422 *
423 * Returns: TRUE on success, FALSE on failure.
424 */
425bool vu_dispatch(VuDev *dev);
426
427/**
428 * vu_gpa_to_va:
429 * @dev: a VuDev context
293084a7 430 * @plen: guest memory size
7b2e5c65
MAL
431 * @guest_addr: guest address
432 *
433 * Translate a guest address to a pointer. Returns NULL on failure.
434 */
293084a7 435void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr);
7b2e5c65
MAL
436
437/**
438 * vu_get_queue:
439 * @dev: a VuDev context
440 * @qidx: queue index
441 *
442 * Returns the queue number @qidx.
443 */
444VuVirtq *vu_get_queue(VuDev *dev, int qidx);
445
446/**
447 * vu_set_queue_handler:
448 * @dev: a VuDev context
449 * @vq: a VuVirtq queue
450 * @handler: the queue handler callback
451 *
452 * Set the queue handler. This function may be called several times
453 * for the same queue. If called with NULL @handler, the handler is
454 * removed.
455 */
456void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
457 vu_queue_handler_cb handler);
458
d84599f5
TB
459/**
460 * vu_set_queue_host_notifier:
461 * @dev: a VuDev context
462 * @vq: a VuVirtq queue
463 * @fd: a file descriptor
464 * @size: host page size
465 * @offset: notifier offset in @fd file
466 *
467 * Set queue's host notifier. This function may be called several
468 * times for the same queue. If called with -1 @fd, the notifier
469 * is removed.
470 */
471bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
472 int size, int offset);
7b2e5c65
MAL
473
474/**
475 * vu_queue_set_notification:
476 * @dev: a VuDev context
477 * @vq: a VuVirtq queue
478 * @enable: state
479 *
480 * Set whether the queue notifies (via event index or interrupt)
481 */
482void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
483
484/**
485 * vu_queue_enabled:
486 * @dev: a VuDev context
487 * @vq: a VuVirtq queue
488 *
489 * Returns: whether the queue is enabled.
490 */
491bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
492
bcf0836d
DDAG
493/**
494 * vu_queue_started:
495 * @dev: a VuDev context
496 * @vq: a VuVirtq queue
497 *
498 * Returns: whether the queue is started.
499 */
500bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
501
7b2e5c65 502/**
640601c7 503 * vu_queue_empty:
7b2e5c65
MAL
504 * @dev: a VuDev context
505 * @vq: a VuVirtq queue
506 *
640601c7 507 * Returns: true if the queue is empty or not ready.
7b2e5c65 508 */
640601c7 509bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
7b2e5c65
MAL
510
511/**
512 * vu_queue_notify:
513 * @dev: a VuDev context
514 * @vq: a VuVirtq queue
515 *
516 * Request to notify the queue via callfd (skipped if unnecessary)
517 */
518void vu_queue_notify(VuDev *dev, VuVirtq *vq);
519
520/**
521 * vu_queue_pop:
522 * @dev: a VuDev context
523 * @vq: a VuVirtq queue
524 * @sz: the size of struct to return (must be >= VuVirtqElement)
525 *
19409df8
MAL
526 * Returns: a VuVirtqElement filled from the queue or NULL. The
527 * returned element must be free()-d by the caller.
7b2e5c65
MAL
528 */
529void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
530
b13919ab
MAL
531
532/**
533 * vu_queue_unpop:
534 * @dev: a VuDev context
535 * @vq: a VuVirtq queue
536 * @elem: The #VuVirtqElement
537 * @len: number of bytes written
538 *
539 * Pretend the most recent element wasn't popped from the virtqueue. The next
540 * call to vu_queue_pop() will refetch the element.
541 */
542void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
543 size_t len);
544
7b2e5c65
MAL
545/**
546 * vu_queue_rewind:
547 * @dev: a VuDev context
548 * @vq: a VuVirtq queue
549 * @num: number of elements to push back
550 *
551 * Pretend that elements weren't popped from the virtqueue. The next
552 * virtqueue_pop() will refetch the oldest element.
553 *
554 * Returns: true on success, false if @num is greater than the number of in use
555 * elements.
556 */
557bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
558
559/**
560 * vu_queue_fill:
561 * @dev: a VuDev context
562 * @vq: a VuVirtq queue
563 * @elem: a VuVirtqElement
564 * @len: length in bytes to write
565 * @idx: optional offset for the used ring index (0 in general)
566 *
567 * Fill the used ring with @elem element.
568 */
569void vu_queue_fill(VuDev *dev, VuVirtq *vq,
570 const VuVirtqElement *elem,
571 unsigned int len, unsigned int idx);
572
573/**
574 * vu_queue_push:
575 * @dev: a VuDev context
576 * @vq: a VuVirtq queue
577 * @elem: a VuVirtqElement
578 * @len: length in bytes to write
579 *
580 * Helper that combines vu_queue_fill() with a vu_queue_flush().
581 */
582void vu_queue_push(VuDev *dev, VuVirtq *vq,
583 const VuVirtqElement *elem, unsigned int len);
584
585/**
586 * vu_queue_flush:
587 * @dev: a VuDev context
588 * @vq: a VuVirtq queue
589 * @num: number of elements to flush
590 *
591 * Mark the last number of elements as done (used.idx is updated by
592 * num elements).
593*/
594void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
595
596/**
597 * vu_queue_get_avail_bytes:
598 * @dev: a VuDev context
599 * @vq: a VuVirtq queue
600 * @in_bytes: in bytes
601 * @out_bytes: out bytes
602 * @max_in_bytes: stop counting after max_in_bytes
603 * @max_out_bytes: stop counting after max_out_bytes
604 *
605 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
606 */
607void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
608 unsigned int *out_bytes,
609 unsigned max_in_bytes, unsigned max_out_bytes);
610
611/**
612 * vu_queue_avail_bytes:
613 * @dev: a VuDev context
614 * @vq: a VuVirtq queue
615 * @in_bytes: expected in bytes
616 * @out_bytes: expected out bytes
617 *
618 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
619 */
620bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
621 unsigned int out_bytes);
622
623#endif /* LIBVHOST_USER_H */