]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/vhost/vhost_internal.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / vhost / vhost_internal.h
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #ifndef SPDK_VHOST_INTERNAL_H
35 #define SPDK_VHOST_INTERNAL_H
36
37 #include "spdk/stdinc.h"
38
39 #include <rte_vhost.h>
40
41 #include "spdk_internal/log.h"
42 #include "spdk/event.h"
43 #include "spdk/rpc.h"
44 #include "spdk/config.h"
45
46 #define SPDK_CACHE_LINE_SIZE RTE_CACHE_LINE_SIZE
47
48 #ifndef VHOST_USER_F_PROTOCOL_FEATURES
49 #define VHOST_USER_F_PROTOCOL_FEATURES 30
50 #endif
51
52 #ifndef VIRTIO_F_VERSION_1
53 #define VIRTIO_F_VERSION_1 32
54 #endif
55
56 #ifndef VIRTIO_BLK_F_MQ
57 #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
58 #endif
59
60 #ifndef VIRTIO_BLK_F_CONFIG_WCE
61 #define VIRTIO_BLK_F_CONFIG_WCE 11
62 #endif
63
64 #define SPDK_VHOST_MAX_VQUEUES 256
65 #define SPDK_VHOST_MAX_VQ_SIZE 1024
66
67 #define SPDK_VHOST_SCSI_CTRLR_MAX_DEVS 8
68
69 #define SPDK_VHOST_IOVS_MAX 129
70
71 /*
72 * Rate at which stats are checked for interrupt coalescing.
73 */
74 #define SPDK_VHOST_STATS_CHECK_INTERVAL_MS 10
75 /*
76 * Default threshold at which interrupts start to be coalesced.
77 */
78 #define SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD 60000
79
80 /*
81 * Currently coalescing is not used by default.
82 * Setting this to value > 0 here or by RPC will enable coalescing.
83 */
84 #define SPDK_VHOST_COALESCING_DELAY_BASE_US 0
85
86
87 #define SPDK_VHOST_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
88 (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
89 (1ULL << VIRTIO_F_VERSION_1) | \
90 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | \
91 (1ULL << VIRTIO_RING_F_EVENT_IDX) | \
92 (1ULL << VIRTIO_RING_F_INDIRECT_DESC))
93
94 #define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
95 (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY))
96
97 struct spdk_vhost_virtqueue {
98 struct rte_vhost_vring vring;
99 uint16_t last_avail_idx;
100 uint16_t last_used_idx;
101
102 void *tasks;
103
104 /* Request count from last stats check */
105 uint32_t req_cnt;
106
107 /* Request count from last event */
108 uint16_t used_req_cnt;
109
110 /* How long interrupt is delayed */
111 uint32_t irq_delay_time;
112
113 /* Next time when we need to send event */
114 uint64_t next_event_time;
115
116 /* Associated vhost_virtqueue in the virtio device's virtqueue list */
117 uint32_t vring_idx;
118 } __attribute((aligned(SPDK_CACHE_LINE_SIZE)));
119
120 struct spdk_vhost_session {
121 struct spdk_vhost_dev *vdev;
122
123 /* rte_vhost connection ID. */
124 int vid;
125
126 /* Unique session ID. */
127 unsigned id;
128
129 int32_t lcore;
130
131 bool initialized;
132 bool started;
133 bool needs_restart;
134 bool forced_polling;
135
136 struct rte_vhost_memory *mem;
137
138 int task_cnt;
139
140 uint16_t max_queues;
141
142 uint64_t negotiated_features;
143
144 /* Local copy of device coalescing settings. */
145 uint32_t coalescing_delay_time_base;
146 uint32_t coalescing_io_rate_threshold;
147
148 /* Next time when stats for event coalescing will be checked. */
149 uint64_t next_stats_check_time;
150
151 /* Interval used for event coalescing checking. */
152 uint64_t stats_check_interval;
153
154 struct spdk_vhost_virtqueue virtqueue[SPDK_VHOST_MAX_VQUEUES];
155
156 TAILQ_ENTRY(spdk_vhost_session) tailq;
157
158 struct spdk_vhost_session_fn_ctx *event_ctx;
159 };
160
161 struct spdk_vhost_dev {
162 char *name;
163 char *path;
164
165 struct spdk_cpuset *cpumask;
166 bool registered;
167
168 const struct spdk_vhost_dev_backend *backend;
169
170 /* Saved orginal values used to setup coalescing to avoid integer
171 * rounding issues during save/load config.
172 */
173 uint32_t coalescing_delay_us;
174 uint32_t coalescing_iops_threshold;
175
176 /* Current connections to the device */
177 TAILQ_HEAD(, spdk_vhost_session) vsessions;
178
179 /* Increment-only session counter */
180 uint64_t vsessions_num;
181
182 /* Number of started and actively polled sessions */
183 uint32_t active_session_num;
184
185 /* Number of pending asynchronous operations */
186 uint32_t pending_async_op_num;
187
188 TAILQ_ENTRY(spdk_vhost_dev) tailq;
189 };
190
191 /**
192 * Synchronized vhost session event used for backend callbacks.
193 *
194 * \param vdev vhost device. If the device has been deleted
195 * in the meantime, this function will be called one last
196 * time with vdev == NULL.
197 * \param vsession vhost session. If all sessions have been
198 * iterated through, this function will be called one last
199 * time with vsession == NULL.
200 * \param arg user-provided parameter.
201 *
202 * \return negative values will break the foreach call, meaning
203 * the function won't be called again. Return codes zero and
204 * positive don't have any effect.
205 */
206 typedef int (*spdk_vhost_session_fn)(struct spdk_vhost_dev *vdev,
207 struct spdk_vhost_session *vsession,
208 void *arg);
209
210 struct spdk_vhost_dev_backend {
211 uint64_t virtio_features;
212 uint64_t disabled_features;
213
214 /**
215 * Size of additional per-session context data
216 * allocated whenever a new client connects.
217 */
218 size_t session_ctx_size;
219
220 int (*start_session)(struct spdk_vhost_session *vsession);
221 int (*stop_session)(struct spdk_vhost_session *vsession);
222
223 int (*vhost_get_config)(struct spdk_vhost_dev *vdev, uint8_t *config, uint32_t len);
224 int (*vhost_set_config)(struct spdk_vhost_dev *vdev, uint8_t *config,
225 uint32_t offset, uint32_t size, uint32_t flags);
226
227 void (*dump_info_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
228 void (*write_config_json)(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
229 int (*remove_device)(struct spdk_vhost_dev *vdev);
230 };
231
232 void *spdk_vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len);
233
234 uint16_t spdk_vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *vq, uint16_t *reqs,
235 uint16_t reqs_len);
236
237 /**
238 * Get a virtio descriptor at given index in given virtqueue.
239 * The descriptor will provide access to the entire descriptor
240 * chain. The subsequent descriptors are accesible via
241 * \c spdk_vhost_vring_desc_get_next.
242 * \param vsession vhost session
243 * \param vq virtqueue
244 * \param req_idx descriptor index
245 * \param desc pointer to be set to the descriptor
246 * \param desc_table descriptor table to be used with
247 * \c spdk_vhost_vring_desc_get_next. This might be either
248 * default virtqueue descriptor table or per-chain indirect
249 * table.
250 * \param desc_table_size size of the *desc_table*
251 * \return 0 on success, -1 if given index is invalid.
252 * If -1 is returned, the content of params is undefined.
253 */
254 int spdk_vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq,
255 uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
256 uint32_t *desc_table_size);
257
258 /**
259 * Send IRQ/call client (if pending) for \c vq.
260 * \param vsession vhost session
261 * \param vq virtqueue
262 * \return
263 * 0 - if no interrupt was signalled
264 * 1 - if interrupt was signalled
265 */
266 int spdk_vhost_vq_used_signal(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq);
267
268
269 /**
270 * Send IRQs for all queues that need to be signaled.
271 * \param vsession vhost session
272 * \param vq virtqueue
273 */
274 void spdk_vhost_session_used_signal(struct spdk_vhost_session *vsession);
275
276 void spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
277 struct spdk_vhost_virtqueue *vq,
278 uint16_t id, uint32_t len);
279
280 /**
281 * Get subsequent descriptor from given table.
282 * \param desc current descriptor, will be set to the
283 * next descriptor (NULL in case this is the last
284 * descriptor in the chain or the next desc is invalid)
285 * \param desc_table descriptor table
286 * \param desc_table_size size of the *desc_table*
287 * \return 0 on success, -1 if given index is invalid
288 * The *desc* param will be set regardless of the
289 * return value.
290 */
291 int spdk_vhost_vring_desc_get_next(struct vring_desc **desc,
292 struct vring_desc *desc_table, uint32_t desc_table_size);
293 bool spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc);
294
295 int spdk_vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
296 uint16_t *iov_index, const struct vring_desc *desc);
297
298 static inline bool __attribute__((always_inline))
299 spdk_vhost_dev_has_feature(struct spdk_vhost_session *vsession, unsigned feature_id)
300 {
301 return vsession->negotiated_features & (1ULL << feature_id);
302 }
303
304 int spdk_vhost_dev_register(struct spdk_vhost_dev *vdev, const char *name, const char *mask_str,
305 const struct spdk_vhost_dev_backend *backend);
306 int spdk_vhost_dev_unregister(struct spdk_vhost_dev *vdev);
307
308 int spdk_vhost_scsi_controller_construct(void);
309 int spdk_vhost_blk_controller_construct(void);
310 void spdk_vhost_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w);
311
312 /*
313 * Call function for each active session on the provided
314 * vhost device. The function will be called one-by-one
315 * on each session's thread.
316 *
317 * \param vdev vhost device
318 * \param fn function to call
319 * \param arg additional argument to \c fn
320 */
321 void spdk_vhost_dev_foreach_session(struct spdk_vhost_dev *dev,
322 spdk_vhost_session_fn fn, void *arg);
323
324 /**
325 * Call a function on the provided lcore and block until either
326 * spdk_vhost_session_start_done() or spdk_vhost_session_stop_done()
327 * is called.
328 *
329 * This must be called under the global vhost mutex, which this function
330 * will unlock for the time it's waiting. It's meant to be called only
331 * from start/stop session callbacks.
332 *
333 * \param lcore target session's lcore
334 * \param vsession vhost session
335 * \param cb_fn the function to call. The void *arg parameter in cb_fn
336 * is always NULL.
337 * \param timeout_sec timeout in seconds. This function will still
338 * block after the timeout expires, but will print the provided errmsg.
339 * \param errmsg error message to print once the timeout expires
340 * \return return the code passed to spdk_vhost_session_event_done().
341 */
342 int spdk_vhost_session_send_event(int32_t lcore, struct spdk_vhost_session *vsession,
343 spdk_vhost_session_fn cb_fn, unsigned timeout_sec,
344 const char *errmsg);
345
346 /**
347 * Finish a blocking spdk_vhost_session_send_event() call and finally
348 * start the session. This must be called on the target lcore, which
349 * will now receive all session-related messages (e.g. from
350 * spdk_vhost_dev_foreach_session()).
351 *
352 * Must be called under the global vhost lock.
353 *
354 * \param vsession vhost session
355 * \param response return code
356 */
357 void spdk_vhost_session_start_done(struct spdk_vhost_session *vsession, int response);
358
359 /**
360 * Finish a blocking spdk_vhost_session_send_event() call and finally
361 * stop the session. This must be called on the session's lcore which
362 * used to receive all session-related messages (e.g. from
363 * spdk_vhost_dev_foreach_session()). After this call, the session-
364 * related messages will be once again processed by any arbitrary thread.
365 *
366 * Must be called under the global vhost lock.
367 *
368 * Must be called under the global vhost mutex.
369 *
370 * \param vsession vhost session
371 * \param response return code
372 */
373 void spdk_vhost_session_stop_done(struct spdk_vhost_session *vsession, int response);
374
375 struct spdk_vhost_session *spdk_vhost_session_find_by_vid(int vid);
376 void spdk_vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession);
377 void spdk_vhost_dev_install_rte_compat_hooks(struct spdk_vhost_dev *vdev);
378
379 void spdk_vhost_free_reactor(uint32_t lcore);
380 uint32_t spdk_vhost_allocate_reactor(struct spdk_cpuset *cpumask);
381
382 int spdk_remove_vhost_controller(struct spdk_vhost_dev *vdev);
383
384 #ifdef SPDK_CONFIG_VHOST_INTERNAL_LIB
385 int spdk_vhost_nvme_admin_passthrough(int vid, void *cmd, void *cqe, void *buf);
386 int spdk_vhost_nvme_set_cq_call(int vid, uint16_t qid, int fd);
387 int spdk_vhost_nvme_set_bar_mr(int vid, void *bar_addr, uint64_t bar_size);
388 int spdk_vhost_nvme_get_cap(int vid, uint64_t *cap);
389 int spdk_vhost_nvme_controller_construct(void);
390 int spdk_vhost_nvme_dev_construct(const char *name, const char *cpumask, uint32_t io_queues);
391 int spdk_vhost_nvme_dev_remove(struct spdk_vhost_dev *vdev);
392 int spdk_vhost_nvme_dev_add_ns(struct spdk_vhost_dev *vdev,
393 const char *bdev_name);
394 #endif
395
396 #endif /* SPDK_VHOST_INTERNAL_H */