]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/vhost/rte_vhost/rte_vhost.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / vhost / rte_vhost / rte_vhost.h
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef _RTE_VHOST_H_
35 #define _RTE_VHOST_H_
36
37 /**
38 * @file
39 * Interface to vhost-user
40 */
41
42 #include <stdint.h>
43 #include <linux/vhost.h>
44 #include <linux/virtio_ring.h>
45 #include <sys/eventfd.h>
46
47 #include <rte_config.h>
48 #include <rte_memory.h>
49 #include <rte_mempool.h>
50
51 #define RTE_VHOST_USER_CLIENT (1ULL << 0)
52 #define RTE_VHOST_USER_NO_RECONNECT (1ULL << 1)
53 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY (1ULL << 2)
54
55 /**
56 * Information relating to memory regions including offsets to
57 * addresses in QEMUs memory file.
58 */
59 struct rte_vhost_mem_region {
60 uint64_t guest_phys_addr;
61 uint64_t guest_user_addr;
62 uint64_t host_user_addr;
63 uint64_t size;
64 void *mmap_addr;
65 uint64_t mmap_size;
66 int fd;
67 };
68
69 /**
70 * Memory structure includes region and mapping information.
71 */
72 struct rte_vhost_memory {
73 uint32_t nregions;
74 struct rte_vhost_mem_region regions[0];
75 };
76
77 struct rte_vhost_vring {
78 struct vring_desc *desc;
79 struct vring_avail *avail;
80 struct vring_used *used;
81 uint64_t log_guest_addr;
82
83 int callfd;
84 int kickfd;
85 uint16_t size;
86 };
87
88 /**
89 * Device and vring operations.
90 */
91 struct vhost_device_ops {
92 int (*new_device)(int vid); /**< Add device. */
93 void (*destroy_device)(int vid); /**< Remove device. */
94
95 int (*vring_state_changed)(int vid, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */
96
97 /**
98 * Features could be changed after the feature negotiation.
99 * For example, VHOST_F_LOG_ALL will be set/cleared at the
100 * start/end of live migration, respectively. This callback
101 * is used to inform the application on such change.
102 */
103 int (*features_changed)(int vid, uint64_t features);
104 int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf);
105 int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd);
106 int (*vhost_nvme_set_bar_mr)(int vid, void *bar_addr, uint64_t bar_size);
107 int (*vhost_nvme_get_cap)(int vid, uint64_t *cap);
108
109 int (*new_connection)(int vid);
110 void (*destroy_connection)(int vid);
111
112 int (*get_config)(int vid, uint8_t *config, uint32_t config_len);
113 int (*set_config)(int vid, uint8_t *config, uint32_t offset,
114 uint32_t len, uint32_t flags);
115
116 void *reserved[2]; /**< Reserved for future extension */
117 };
118
119 /**
120 * Convert guest physical address to host virtual address
121 *
122 * @param mem
123 * the guest memory regions
124 * @param gpa
125 * the guest physical address for querying
126 * @return
127 * the host virtual address on success, 0 on failure
128 */
129 static inline uint64_t __attribute__((always_inline))
130 rte_vhost_gpa_to_vva(struct rte_vhost_memory *mem, uint64_t gpa)
131 {
132 struct rte_vhost_mem_region *reg;
133 uint32_t i;
134
135 for (i = 0; i < mem->nregions; i++) {
136 reg = &mem->regions[i];
137 if (gpa >= reg->guest_phys_addr &&
138 gpa < reg->guest_phys_addr + reg->size) {
139 return gpa - reg->guest_phys_addr +
140 reg->host_user_addr;
141 }
142 }
143
144 return 0;
145 }
146
147 /**
148 * Convert guest physical address to host virtual address safely
149 *
150 * This variant of rte_vhost_gpa_to_vva() takes care all the
151 * requested length is mapped and contiguous in process address
152 * space.
153 *
154 * @param mem
155 * the guest memory regions
156 * @param gpa
157 * the guest physical address for querying
158 * @param len
159 * the size of the requested area to map,
160 * updated with actual size mapped
161 * @return
162 * the host virtual address on success, 0 on failure */
163 static inline uint64_t
164 rte_vhost_va_from_guest_pa(struct rte_vhost_memory *mem,
165 uint64_t gpa, uint64_t *len)
166 {
167 struct rte_vhost_mem_region *r;
168 uint32_t i;
169
170 for (i = 0; i < mem->nregions; i++) {
171 r = &mem->regions[i];
172 if (gpa >= r->guest_phys_addr &&
173 gpa < r->guest_phys_addr + r->size) {
174
175 if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
176 *len = r->guest_phys_addr + r->size - gpa;
177
178 return gpa - r->guest_phys_addr +
179 r->host_user_addr;
180 }
181 }
182 *len = 0;
183
184 return 0;
185 }
186
187 #define RTE_VHOST_NEED_LOG(features) ((features) & (1ULL << VHOST_F_LOG_ALL))
188
189 /**
190 * Log the memory write start with given address.
191 *
192 * This function only need be invoked when the live migration starts.
193 * Therefore, we won't need call it at all in the most of time. For
194 * making the performance impact be minimum, it's suggested to do a
195 * check before calling it:
196 *
197 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
198 * rte_vhost_log_write(vid, addr, len);
199 *
200 * @param vid
201 * vhost device ID
202 * @param addr
203 * the starting address for write
204 * @param len
205 * the length to write
206 */
207 void rte_vhost_log_write(int vid, uint64_t addr, uint64_t len);
208
209 /**
210 * Log the used ring update start at given offset.
211 *
212 * Same as rte_vhost_log_write, it's suggested to do a check before
213 * calling it:
214 *
215 * if (unlikely(RTE_VHOST_NEED_LOG(features)))
216 * rte_vhost_log_used_vring(vid, vring_idx, offset, len);
217 *
218 * @param vid
219 * vhost device ID
220 * @param vring_idx
221 * the vring index
222 * @param offset
223 * the offset inside the used ring
224 * @param len
225 * the length to write
226 */
227 void rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
228 uint64_t offset, uint64_t len);
229
230 int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);
231
232 /**
233 * Register vhost driver. path could be different for multiple
234 * instance support.
235 */
236 int rte_vhost_driver_register(const char *path, uint64_t flags);
237
238 /* Unregister vhost driver. This is only meaningful to vhost user. */
239 int rte_vhost_driver_unregister(const char *path);
240
241 /**
242 * Set the feature bits the vhost-user driver supports.
243 *
244 * @param path
245 * The vhost-user socket file path
246 * @return
247 * 0 on success, -1 on failure
248 */
249 int rte_vhost_driver_set_features(const char *path, uint64_t features);
250
251 /**
252 * Enable vhost-user driver features.
253 *
254 * Note that
255 * - the param @features should be a subset of the feature bits provided
256 * by rte_vhost_driver_set_features().
257 * - it must be invoked before vhost-user negotiation starts.
258 *
259 * @param path
260 * The vhost-user socket file path
261 * @param features
262 * Features to enable
263 * @return
264 * 0 on success, -1 on failure
265 */
266 int rte_vhost_driver_enable_features(const char *path, uint64_t features);
267
268 /**
269 * Disable vhost-user driver features.
270 *
271 * The two notes at rte_vhost_driver_enable_features() also apply here.
272 *
273 * @param path
274 * The vhost-user socket file path
275 * @param features
276 * Features to disable
277 * @return
278 * 0 on success, -1 on failure
279 */
280 int rte_vhost_driver_disable_features(const char *path, uint64_t features);
281
282 /**
283 * Get the feature bits before feature negotiation.
284 *
285 * @param path
286 * The vhost-user socket file path
287 * @param features
288 * A pointer to store the queried feature bits
289 * @return
290 * 0 on success, -1 on failure
291 */
292 int rte_vhost_driver_get_features(const char *path, uint64_t *features);
293
294 /**
295 * Get the feature bits after negotiation
296 *
297 * @param vid
298 * Vhost device ID
299 * @param features
300 * A pointer to store the queried feature bits
301 * @return
302 * 0 on success, -1 on failure
303 */
304 int rte_vhost_get_negotiated_features(int vid, uint64_t *features);
305
306 /* Register callbacks. */
307 int rte_vhost_driver_callback_register(const char *path,
308 struct vhost_device_ops const * const ops);
309
310 /**
311 *
312 * Start the vhost-user driver.
313 *
314 * This function triggers the vhost-user negotiation.
315 *
316 * @param path
317 * The vhost-user socket file path
318 * @return
319 * 0 on success, -1 on failure
320 */
321 int rte_vhost_driver_start(const char *path);
322
323 /**
324 * Get the MTU value of the device if set in QEMU.
325 *
326 * @param vid
327 * virtio-net device ID
328 * @param mtu
329 * The variable to store the MTU value
330 *
331 * @return
332 * 0: success
333 * -EAGAIN: device not yet started
334 * -ENOTSUP: device does not support MTU feature
335 */
336 int rte_vhost_get_mtu(int vid, uint16_t *mtu);
337
338 /**
339 * Get the numa node from which the virtio net device's memory
340 * is allocated.
341 *
342 * @param vid
343 * vhost device ID
344 *
345 * @return
346 * The numa node, -1 on failure
347 */
348 int rte_vhost_get_numa_node(int vid);
349
350 /**
351 * Get the virtio net device's ifname, which is the vhost-user socket
352 * file path.
353 *
354 * @param vid
355 * vhost device ID
356 * @param buf
357 * The buffer to stored the queried ifname
358 * @param len
359 * The length of buf
360 *
361 * @return
362 * 0 on success, -1 on failure
363 */
364 int rte_vhost_get_ifname(int vid, char *buf, size_t len);
365
366 /**
367 * Get how many avail entries are left in the queue
368 *
369 * @param vid
370 * vhost device ID
371 * @param queue_id
372 * virtio queue index
373 *
374 * @return
375 * num of avail entires left
376 */
377 uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);
378
379 struct rte_mbuf;
380 struct rte_mempool;
381 /**
382 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
383 * be received from the physical port or from another virtual device. A packet
384 * count is returned to indicate the number of packets that were succesfully
385 * added to the RX queue.
386 * @param vid
387 * vhost device ID
388 * @param queue_id
389 * virtio queue index in mq case
390 * @param pkts
391 * array to contain packets to be enqueued
392 * @param count
393 * packets num to be enqueued
394 * @return
395 * num of packets enqueued
396 */
397 uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
398 struct rte_mbuf **pkts, uint16_t count);
399
400 /**
401 * This function gets guest buffers from the virtio device TX virtqueue,
402 * construct host mbufs, copies guest buffer content to host mbufs and
403 * store them in pkts to be processed.
404 * @param vid
405 * vhost device ID
406 * @param queue_id
407 * virtio queue index in mq case
408 * @param mbuf_pool
409 * mbuf_pool where host mbuf is allocated.
410 * @param pkts
411 * array to contain packets to be dequeued
412 * @param count
413 * packets num to be dequeued
414 * @return
415 * num of packets dequeued
416 */
417 uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
418 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);
419
420 /**
421 * Get guest mem table: a list of memory regions.
422 *
423 * An rte_vhost_vhost_memory object will be allocated internaly, to hold the
424 * guest memory regions. Application should free it at destroy_device()
425 * callback.
426 *
427 * @param vid
428 * vhost device ID
429 * @param mem
430 * To store the returned mem regions
431 * @return
432 * 0 on success, -1 on failure
433 */
434 int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
435
436 /**
437 * Get guest vring info, including the vring address, vring size, etc.
438 *
439 * @param vid
440 * vhost device ID
441 * @param vring_idx
442 * vring index
443 * @param vring
444 * the structure to hold the requested vring info
445 * @return
446 * 0 on success, -1 on failure
447 */
448 int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
449 struct rte_vhost_vring *vring);
450
451 /**
452 * Set id of the last descriptors in avail and used guest vrings.
453 *
454 * In case user application operates directly on buffers, it should use this
455 * function on device destruction to retrieve the same values later on in device
456 * creation via rte_vhost_get_vhost_vring(int, uint16_t, struct rte_vhost_vring *)
457 *
458 * @param vid
459 * vhost device ID
460 * @param vring_idx
461 * vring index
462 * @param last_avail_idx
463 * id of the last descriptor in avail ring to be set
464 * @param last_used_idx
465 * id of the last descriptor in used ring to be set
466 * @return
467 * 0 on success, -1 on failure
468 */
469 int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
470 uint16_t last_avail_idx, uint16_t last_used_idx);
471
472 int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
473 uint16_t *last_avail_idx, uint16_t *last_used_idx);
474
475 /**
476 * Notify the guest that used descriptors have been added to the vring.
477 *
478 * @param vid
479 * vhost device ID
480 * @param vring_idx
481 * vring index
482 * @return
483 * 0 on success, -1 on failure
484 */
485 int rte_vhost_vring_call(int vid, uint16_t vring_idx);
486
487 #endif /* _RTE_VHOST_H_ */