]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/include/spdk_internal/virtio.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / include / spdk_internal / virtio.h
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef SPDK_VIRTIO_H
35 #define SPDK_VIRTIO_H
36
37 #include "spdk/stdinc.h"
38
39 #include <linux/virtio_ring.h>
40 #include <linux/virtio_pci.h>
41 #include <linux/virtio_config.h>
42
43 #include "spdk_internal/log.h"
44 #include "spdk/likely.h"
45 #include "spdk/queue.h"
46 #include "spdk/json.h"
47 #include "spdk/thread.h"
48 #include "spdk/pci_ids.h"
49 #include "spdk/env.h"
50
51 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
52 #define VHOST_USER_F_PROTOCOL_FEATURES 30
53 #endif
54
55 /**
56 * The maximum virtqueue size is 2^15. Use that value as the end of
57 * descriptor chain terminator since it will never be a valid index
58 * in the descriptor table. This is used to verify we are correctly
59 * handling vq_free_cnt.
60 */
61 #define VQ_RING_DESC_CHAIN_END 32768
62
63 #define SPDK_VIRTIO_MAX_VIRTQUEUES 0x100
64
65 /* Extra status define for readability */
66 #define VIRTIO_CONFIG_S_RESET 0
67
68 struct virtio_dev_ops;
69
70 struct virtio_dev {
71 struct virtqueue **vqs;
72
73 /** Name of this virtio dev set by backend */
74 char *name;
75
76 /** Fixed number of backend-specific non-I/O virtqueues. */
77 uint16_t fixed_queues_num;
78
79 /** Max number of virtqueues the host supports. */
80 uint16_t max_queues;
81
82 /** Common device & guest features. */
83 uint64_t negotiated_features;
84
85 int is_hw;
86
87 /** Modern/legacy virtio device flag. */
88 uint8_t modern;
89
90 /** Mutex for asynchronous virtqueue-changing operations. */
91 pthread_mutex_t mutex;
92
93 /** Backend-specific callbacks. */
94 const struct virtio_dev_ops *backend_ops;
95
96 /** Context for the backend ops */
97 void *ctx;
98 };
99
100 struct virtio_dev_ops {
101 int (*read_dev_cfg)(struct virtio_dev *hw, size_t offset,
102 void *dst, int len);
103 int (*write_dev_cfg)(struct virtio_dev *hw, size_t offset,
104 const void *src, int len);
105 uint8_t (*get_status)(struct virtio_dev *hw);
106 void (*set_status)(struct virtio_dev *hw, uint8_t status);
107
108 /**
109 * Get device features. The features might be already
110 * negotiated with driver (guest) features.
111 */
112 uint64_t (*get_features)(struct virtio_dev *vdev);
113
114 /**
115 * Negotiate and set device features.
116 * The negotiation can fail with return code -1.
117 * This function should also set vdev->negotiated_features field.
118 */
119 int (*set_features)(struct virtio_dev *vdev, uint64_t features);
120
121 /** Destruct virtio device */
122 void (*destruct_dev)(struct virtio_dev *vdev);
123
124 uint16_t (*get_queue_size)(struct virtio_dev *vdev, uint16_t queue_id);
125 int (*setup_queue)(struct virtio_dev *hw, struct virtqueue *vq);
126 void (*del_queue)(struct virtio_dev *hw, struct virtqueue *vq);
127 void (*notify_queue)(struct virtio_dev *hw, struct virtqueue *vq);
128
129 void (*dump_json_info)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
130 void (*write_json_config)(struct virtio_dev *hw, struct spdk_json_write_ctx *w);
131 };
132
133 struct vq_desc_extra {
134 void *cookie;
135 uint16_t ndescs;
136 };
137
138 struct virtqueue {
139 struct virtio_dev *vdev; /**< owner of this virtqueue */
140 struct vring vq_ring; /**< vring keeping desc, used and avail */
141 /**
142 * Last consumed descriptor in the used table,
143 * trails vq_ring.used->idx.
144 */
145 uint16_t vq_used_cons_idx;
146 uint16_t vq_nentries; /**< vring desc numbers */
147 uint16_t vq_free_cnt; /**< num of desc available */
148 uint16_t vq_avail_idx; /**< sync until needed */
149
150 void *vq_ring_virt_mem; /**< virtual address of vring */
151 unsigned int vq_ring_size;
152
153 uint64_t vq_ring_mem; /**< physical address of vring */
154
155 /**
156 * Head of the free chain in the descriptor table. If
157 * there are no free descriptors, this will be set to
158 * VQ_RING_DESC_CHAIN_END.
159 */
160 uint16_t vq_desc_head_idx;
161
162 /**
163 * Tail of the free chain in desc table. If
164 * there are no free descriptors, this will be set to
165 * VQ_RING_DESC_CHAIN_END.
166 */
167 uint16_t vq_desc_tail_idx;
168 uint16_t vq_queue_index; /**< PCI queue index */
169 uint16_t *notify_addr;
170
171 /** Thread that's polling this queue. */
172 struct spdk_thread *owner_thread;
173
174 uint16_t req_start;
175 uint16_t req_end;
176 uint16_t reqs_finished;
177
178 struct vq_desc_extra vq_descx[0];
179 };
180
181 enum spdk_virtio_desc_type {
182 SPDK_VIRTIO_DESC_RO = 0, /**< Read only */
183 SPDK_VIRTIO_DESC_WR = VRING_DESC_F_WRITE, /**< Write only */
184 /* TODO VIRTIO_DESC_INDIRECT */
185 };
186
187 /** Context for creating PCI virtio_devs */
188 struct virtio_pci_ctx;
189
190 /**
191 * Callback for creating virtio_dev from a PCI device.
192 * \param pci_ctx PCI context to be associated with a virtio_dev
193 * \param ctx context provided by the user
194 * \return 0 on success, -1 on error.
195 */
196 typedef int (*virtio_pci_create_cb)(struct virtio_pci_ctx *pci_ctx, void *ctx);
197
198 uint16_t virtio_recv_pkts(struct virtqueue *vq, void **io, uint32_t *len, uint16_t io_cnt);
199
200 /**
201 * Start a new request on the current vring head position and associate it
202 * with an opaque cookie object. The previous request in given vq will be
203 * made visible to the device in hopes it can be processed early, but there's
204 * no guarantee it will be until the device is notified with \c
205 * virtqueue_req_flush. This behavior is simply an optimization and virtqueues
206 * must always be flushed. Empty requests (with no descriptors added) will be
207 * ignored. The device owning given virtqueue must be started.
208 *
209 * \param vq virtio queue
210 * \param cookie opaque object to associate with this request. Once the request
211 * is sent, processed and a response is received, the same object will be
212 * returned to the user after calling the virtio poll API.
213 * \param iovcnt number of required iovectors for the request. This can be
214 * higher than than the actual number of iovectors to be added.
215 * \return 0 on success or negative errno otherwise. If the `iovcnt` is
216 * greater than virtqueue depth, -EINVAL is returned. If simply not enough
217 * iovectors are available, -ENOMEM is returned.
218 */
219 int virtqueue_req_start(struct virtqueue *vq, void *cookie, int iovcnt);
220
221 /**
222 * Flush a virtqueue. This will notify the device if it's required.
223 * The device owning given virtqueue must be started.
224 *
225 * \param vq virtio queue
226 */
227 void virtqueue_req_flush(struct virtqueue *vq);
228
229 /**
230 * Abort the very last request in a virtqueue. This will restore virtqueue
231 * state to the point before the last request was created. Note that this
232 * is only effective if a queue hasn't been flushed yet. The device owning
233 * given virtqueue must be started.
234 *
235 * \param vq virtio queue
236 */
237 void virtqueue_req_abort(struct virtqueue *vq);
238
239 /**
240 * Add iovec chain to the last created request. This call does not provide any
241 * error-checking. The caller has to ensure that he doesn't add more iovs than
242 * what was specified during request creation. The device owning given virtqueue
243 * must be started.
244 *
245 * \param vq virtio queue
246 * \param iovs iovec array
247 * \param iovcnt number of iovs in iovec array
248 * \param desc_type type of all given iovectors
249 */
250 void virtqueue_req_add_iovs(struct virtqueue *vq, struct iovec *iovs, uint16_t iovcnt,
251 enum spdk_virtio_desc_type desc_type);
252
253 /**
254 * Construct a virtio device. The device will be in stopped state by default.
255 * Before doing any I/O, it has to be manually started via \c virtio_dev_restart.
256 *
257 * \param vdev memory for virtio device, must be zeroed
258 * \param name name for the virtio device
259 * \param ops backend callbacks
260 * \param ops_ctx argument for the backend callbacks
261 * \return zero on success, or negative error code otherwise
262 */
263 int virtio_dev_construct(struct virtio_dev *vdev, const char *name,
264 const struct virtio_dev_ops *ops, void *ops_ctx);
265
266 /**
267 * Reset the device and prepare it to be `virtio_dev_start`ed. This call
268 * will also renegotiate feature flags.
269 *
270 * \param vdev virtio device
271 * \param req_features features this driver supports. A VIRTIO_F_VERSION_1
272 * flag will be automatically appended, as legacy devices are not supported.
273 */
274 int virtio_dev_reset(struct virtio_dev *vdev, uint64_t req_features);
275
276 /**
277 * Notify the host to start processing this virtio device. This is
278 * a blocking call that won't return until the host has started.
279 * This will also allocate virtqueues.
280 *
281 * \param vdev virtio device
282 * \param max_queues number of queues to allocate. The max number of
283 * usable I/O queues is also limited by the host device. `vdev` will be
284 * started successfully even if the host supports less queues than requested.
285 * \param fixed_queue_num number of queues preceeding the first
286 * request queue. For Virtio-SCSI this is equal to 2, as there are
287 * additional event and control queues.
288 */
289 int virtio_dev_start(struct virtio_dev *vdev, uint16_t max_queues,
290 uint16_t fixed_queues_num);
291
292 /**
293 * Stop the host from processing the device. This is a blocking call
294 * that won't return until all outstanding I/O has been processed on
295 * the host (virtio device) side. In order to re-start the device, it
296 * has to be `virtio_dev_reset` first.
297 *
298 * \param vdev virtio device
299 */
300 void virtio_dev_stop(struct virtio_dev *vdev);
301
302 /**
303 * Destruct a virtio device. Note that it must be in the stopped state.
304 * The virtio_dev should be manually freed afterwards.
305 *
306 * \param vdev virtio device
307 */
308 void virtio_dev_destruct(struct virtio_dev *vdev);
309
310 /**
311 * Bind a virtqueue with given index to the current thread;
312 *
313 * This function is thread-safe.
314 *
315 * \param vdev vhost device
316 * \param index virtqueue index
317 * \return 0 on success, -1 in case a virtqueue with given index either
318 * does not exists or is already acquired.
319 */
320 int virtio_dev_acquire_queue(struct virtio_dev *vdev, uint16_t index);
321
322 /**
323 * Look for unused queue and bind it to the current thread. This will
324 * scan the queues in range from *start_index* (inclusive) up to
325 * vdev->max_queues (exclusive).
326 *
327 * This function is thread-safe.
328 *
329 * \param vdev vhost device
330 * \param start_index virtqueue index to start looking from
331 * \return index of acquired queue or -1 in case no unused queue in given range
332 * has been found
333 */
334 int32_t virtio_dev_find_and_acquire_queue(struct virtio_dev *vdev, uint16_t start_index);
335
336 /**
337 * Get thread that acquired given virtqueue.
338 *
339 * This function is thread-safe.
340 *
341 * \param vdev vhost device
342 * \param index index of virtqueue
343 * \return thread that acquired given virtqueue. If the queue is unused
344 * or doesn't exist a NULL is returned.
345 */
346 struct spdk_thread *virtio_dev_queue_get_thread(struct virtio_dev *vdev, uint16_t index);
347
348 /**
349 * Check if virtqueue with given index is acquired.
350 *
351 * This function is thread-safe.
352 *
353 * \param vdev vhost device
354 * \param index index of virtqueue
355 * \return virtqueue acquire status. in case of invalid index *false* is returned.
356 */
357 bool virtio_dev_queue_is_acquired(struct virtio_dev *vdev, uint16_t index);
358
359 /**
360 * Release previously acquired queue.
361 *
362 * This function must be called from the thread that acquired the queue.
363 *
364 * \param vdev vhost device
365 * \param index index of virtqueue to release
366 */
367 void virtio_dev_release_queue(struct virtio_dev *vdev, uint16_t index);
368
369 /**
370 * Get Virtio status flags.
371 *
372 * \param vdev virtio device
373 */
374 uint8_t virtio_dev_get_status(struct virtio_dev *vdev);
375
376 /**
377 * Set Virtio status flag. The flags have to be set in very specific order
378 * defined the VIRTIO 1.0 spec section 3.1.1. To unset the flags, stop the
379 * device or set \c VIRTIO_CONFIG_S_RESET status flag. There is no way to
380 * unset only particular flags.
381 *
382 * \param vdev virtio device
383 * \param flag flag to set
384 */
385 void virtio_dev_set_status(struct virtio_dev *vdev, uint8_t flag);
386
387 /**
388 * Write raw data into the device config at given offset. This call does not
389 * provide any error checking.
390 *
391 * \param vdev virtio device
392 * \param offset offset in bytes
393 * \param src pointer to data to copy from
394 * \param len length of data to copy in bytes
395 * \return 0 on success, negative errno otherwise
396 */
397 int virtio_dev_write_dev_config(struct virtio_dev *vdev, size_t offset, const void *src, int len);
398
399 /**
400 * Read raw data from the device config at given offset. This call does not
401 * provide any error checking.
402 *
403 * \param vdev virtio device
404 * \param offset offset in bytes
405 * \param dst pointer to buffer to copy data into
406 * \param len length of data to copy in bytes
407 * \return 0 on success, negative errno otherwise
408 */
409 int virtio_dev_read_dev_config(struct virtio_dev *vdev, size_t offset, void *dst, int len);
410
411 /**
412 * Get backend-specific ops for given device.
413 *
414 * \param vdev virtio device
415 */
416 const struct virtio_dev_ops *virtio_dev_backend_ops(struct virtio_dev *vdev);
417
418 /**
419 * Check if the device has negotiated given feature bit.
420 *
421 * \param vdev virtio device
422 * \param bit feature bit
423 */
424 static inline bool
425 virtio_dev_has_feature(struct virtio_dev *vdev, uint64_t bit)
426 {
427 return !!(vdev->negotiated_features & (1ULL << bit));
428 }
429
430 /**
431 * Dump all device specific information into given json stream.
432 *
433 * \param vdev virtio device
434 * \param w json stream
435 */
436 void virtio_dev_dump_json_info(struct virtio_dev *vdev, struct spdk_json_write_ctx *w);
437
438 /**
439 * Enumerate all PCI Virtio devices of given type on the system.
440 *
441 * \param enum_cb a function to be called for each valid PCI device.
442 * If a virtio_dev is has been created, the callback should return 0.
443 * Returning any other value will cause the PCI context to be freed,
444 * making it unusable.
445 * \param enum_ctx additional opaque context to be passed into `enum_cb`
446 * \param pci_device_id PCI Device ID of devices to iterate through
447 */
448 int virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
449 uint16_t pci_device_id);
450
451 /**
452 * Attach a PCI Virtio device of given type.
453 *
454 * \param create_cb callback to create a virtio_dev.
455 * If virtio_dev is has been created, the callback should return 0.
456 * Returning any other value will cause the PCI context to be freed,
457 * making it unusable.
458 * \param enum_ctx additional opaque context to be passed into `enum_cb`
459 * \param pci_device_id PCI Device ID of devices to iterate through
460 * \param pci_addr PCI address of the device to attach
461 */
462 int virtio_pci_dev_attach(virtio_pci_create_cb create_cb, void *enum_ctx,
463 uint16_t pci_device_id, struct spdk_pci_addr *pci_addr);
464
465 /**
466 * Connect to a vhost-user device and init corresponding virtio_dev struct.
467 * The virtio_dev will have to be freed with \c virtio_dev_free.
468 *
469 * \param vdev preallocated vhost device struct to operate on
470 * \param name name of this virtio device
471 * \param path path to the Unix domain socket of the vhost-user device
472 * \param queue_size size of each of the queues
473 * \return virtio device
474 */
475 int virtio_user_dev_init(struct virtio_dev *vdev, const char *name, const char *path,
476 uint32_t queue_size);
477
478 /**
479 * Initialize virtio_dev for a given PCI device.
480 * The virtio_dev has to be freed with \c virtio_dev_destruct.
481 *
482 * \param vdev preallocated vhost device struct to operate on
483 * \param name name of this virtio device
484 * \param pci_ctx context of the PCI device
485 * \return 0 on success, -1 on error.
486 */
487 int virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
488 struct virtio_pci_ctx *pci_ctx);
489
490 #endif /* SPDK_VIRTIO_H */