4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "spdk/stdinc.h"
36 #include "vbdev_delay.h"
39 #include "spdk/conf.h"
40 #include "spdk/endian.h"
41 #include "spdk/string.h"
42 #include "spdk/thread.h"
43 #include "spdk/util.h"
45 #include "spdk/bdev_module.h"
46 #include "spdk_internal/log.h"
49 static int vbdev_delay_init(void);
50 static int vbdev_delay_get_ctx_size(void);
51 static void vbdev_delay_examine(struct spdk_bdev
*bdev
);
52 static void vbdev_delay_finish(void);
53 static int vbdev_delay_config_json(struct spdk_json_write_ctx
*w
);
55 static struct spdk_bdev_module delay_if
= {
57 .module_init
= vbdev_delay_init
,
59 .get_ctx_size
= vbdev_delay_get_ctx_size
,
60 .examine_config
= vbdev_delay_examine
,
61 .module_fini
= vbdev_delay_finish
,
62 .config_json
= vbdev_delay_config_json
65 SPDK_BDEV_MODULE_REGISTER(delay
, &delay_if
)
67 /* Associative list to be used in examine */
68 struct bdev_association
{
71 uint64_t avg_read_latency
;
72 uint64_t p99_read_latency
;
73 uint64_t avg_write_latency
;
74 uint64_t p99_write_latency
;
75 TAILQ_ENTRY(bdev_association
) link
;
77 static TAILQ_HEAD(, bdev_association
) g_bdev_associations
= TAILQ_HEAD_INITIALIZER(
80 /* List of virtual bdevs and associated info for each. */
82 struct spdk_bdev
*base_bdev
; /* the thing we're attaching to */
83 struct spdk_bdev_desc
*base_desc
; /* its descriptor we get from open */
84 struct spdk_bdev delay_bdev
; /* the delay virtual bdev */
85 uint64_t average_read_latency_ticks
; /* the average read delay */
86 uint64_t p99_read_latency_ticks
; /* the p99 read delay */
87 uint64_t average_write_latency_ticks
; /* the average write delay */
88 uint64_t p99_write_latency_ticks
; /* the p99 write delay */
89 TAILQ_ENTRY(vbdev_delay
) link
;
90 struct spdk_thread
*thread
; /* thread where base device is opened */
92 static TAILQ_HEAD(, vbdev_delay
) g_delay_nodes
= TAILQ_HEAD_INITIALIZER(g_delay_nodes
);
94 struct delay_bdev_io
{
97 uint64_t completion_tick
;
99 enum delay_io_type type
;
101 struct spdk_io_channel
*ch
;
103 struct spdk_bdev_io_wait_entry bdev_io_wait
;
105 STAILQ_ENTRY(delay_bdev_io
) link
;
108 struct delay_io_channel
{
109 struct spdk_io_channel
*base_ch
; /* IO channel of base device */
110 STAILQ_HEAD(, delay_bdev_io
) avg_read_io
;
111 STAILQ_HEAD(, delay_bdev_io
) p99_read_io
;
112 STAILQ_HEAD(, delay_bdev_io
) avg_write_io
;
113 STAILQ_HEAD(, delay_bdev_io
) p99_write_io
;
114 struct spdk_poller
*io_poller
;
115 unsigned int rand_seed
;
119 vbdev_delay_submit_request(struct spdk_io_channel
*ch
, struct spdk_bdev_io
*bdev_io
);
122 /* Callback for unregistering the IO device. */
124 _device_unregister_cb(void *io_device
)
126 struct vbdev_delay
*delay_node
= io_device
;
128 /* Done with this delay_node. */
129 free(delay_node
->delay_bdev
.name
);
134 _vbdev_delay_destruct(void *ctx
)
136 struct spdk_bdev_desc
*desc
= ctx
;
138 spdk_bdev_close(desc
);
142 vbdev_delay_destruct(void *ctx
)
144 struct vbdev_delay
*delay_node
= (struct vbdev_delay
*)ctx
;
146 /* It is important to follow this exact sequence of steps for destroying
150 TAILQ_REMOVE(&g_delay_nodes
, delay_node
, link
);
152 /* Unclaim the underlying bdev. */
153 spdk_bdev_module_release_bdev(delay_node
->base_bdev
);
155 /* Close the underlying bdev on its same opened thread. */
156 if (delay_node
->thread
&& delay_node
->thread
!= spdk_get_thread()) {
157 spdk_thread_send_msg(delay_node
->thread
, _vbdev_delay_destruct
, delay_node
->base_desc
);
159 spdk_bdev_close(delay_node
->base_desc
);
162 /* Unregister the io_device. */
163 spdk_io_device_unregister(delay_node
, _device_unregister_cb
);
169 _process_io_stailq(void *arg
, uint64_t ticks
)
171 STAILQ_HEAD(, delay_bdev_io
) *head
= arg
;
172 struct delay_bdev_io
*io_ctx
, *tmp
;
175 STAILQ_FOREACH_SAFE(io_ctx
, head
, link
, tmp
) {
176 if (io_ctx
->completion_tick
<= ticks
) {
177 STAILQ_REMOVE(head
, io_ctx
, delay_bdev_io
, link
);
178 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(io_ctx
), io_ctx
->status
);
181 /* In the general case, I/O will become ready in an fifo order. When timeouts are dynamically
182 * changed, this is not necessarily the case. However, the normal behavior will be restored
183 * after the outstanding I/O at the time of the change have been completed.
184 * This essentially means that moving from a high to low latency creates a dam for the new I/O
185 * submitted after the latency change. This is considered desirable behavior for the use case where
186 * we are trying to trigger a pre-defined timeout on an initiator.
196 _delay_finish_io(void *arg
)
198 struct delay_io_channel
*delay_ch
= arg
;
199 uint64_t ticks
= spdk_get_ticks();
202 completions
+= _process_io_stailq(&delay_ch
->avg_read_io
, ticks
);
203 completions
+= _process_io_stailq(&delay_ch
->avg_write_io
, ticks
);
204 completions
+= _process_io_stailq(&delay_ch
->p99_read_io
, ticks
);
205 completions
+= _process_io_stailq(&delay_ch
->p99_write_io
, ticks
);
207 return completions
== 0 ? SPDK_POLLER_IDLE
: SPDK_POLLER_BUSY
;
210 /* Completion callback for IO that were issued from this bdev. The original bdev_io
211 * is passed in as an arg so we'll complete that one with the appropriate status
212 * and then free the one that this module issued.
215 _delay_complete_io(struct spdk_bdev_io
*bdev_io
, bool success
, void *cb_arg
)
217 struct spdk_bdev_io
*orig_io
= cb_arg
;
218 struct vbdev_delay
*delay_node
= SPDK_CONTAINEROF(orig_io
->bdev
, struct vbdev_delay
, delay_bdev
);
219 struct delay_bdev_io
*io_ctx
= (struct delay_bdev_io
*)orig_io
->driver_ctx
;
220 struct delay_io_channel
*delay_ch
= spdk_io_channel_get_ctx(io_ctx
->ch
);
222 io_ctx
->status
= success
? SPDK_BDEV_IO_STATUS_SUCCESS
: SPDK_BDEV_IO_STATUS_FAILED
;
223 spdk_bdev_free_io(bdev_io
);
225 /* Put the I/O into the proper list for processing by the channel poller. */
226 switch (io_ctx
->type
) {
228 io_ctx
->completion_tick
= spdk_get_ticks() + delay_node
->average_read_latency_ticks
;
229 STAILQ_INSERT_TAIL(&delay_ch
->avg_read_io
, io_ctx
, link
);
231 case DELAY_AVG_WRITE
:
232 io_ctx
->completion_tick
= spdk_get_ticks() + delay_node
->average_write_latency_ticks
;
233 STAILQ_INSERT_TAIL(&delay_ch
->avg_write_io
, io_ctx
, link
);
236 io_ctx
->completion_tick
= spdk_get_ticks() + delay_node
->p99_read_latency_ticks
;
237 STAILQ_INSERT_TAIL(&delay_ch
->p99_read_io
, io_ctx
, link
);
239 case DELAY_P99_WRITE
:
240 io_ctx
->completion_tick
= spdk_get_ticks() + delay_node
->p99_write_latency_ticks
;
241 STAILQ_INSERT_TAIL(&delay_ch
->p99_write_io
, io_ctx
, link
);
245 spdk_bdev_io_complete(orig_io
, io_ctx
->status
);
251 vbdev_delay_resubmit_io(void *arg
)
253 struct spdk_bdev_io
*bdev_io
= (struct spdk_bdev_io
*)arg
;
254 struct delay_bdev_io
*io_ctx
= (struct delay_bdev_io
*)bdev_io
->driver_ctx
;
256 vbdev_delay_submit_request(io_ctx
->ch
, bdev_io
);
260 vbdev_delay_queue_io(struct spdk_bdev_io
*bdev_io
)
262 struct delay_bdev_io
*io_ctx
= (struct delay_bdev_io
*)bdev_io
->driver_ctx
;
263 struct delay_io_channel
*delay_ch
= spdk_io_channel_get_ctx(io_ctx
->ch
);
266 io_ctx
->bdev_io_wait
.bdev
= bdev_io
->bdev
;
267 io_ctx
->bdev_io_wait
.cb_fn
= vbdev_delay_resubmit_io
;
268 io_ctx
->bdev_io_wait
.cb_arg
= bdev_io
;
270 rc
= spdk_bdev_queue_io_wait(bdev_io
->bdev
, delay_ch
->base_ch
, &io_ctx
->bdev_io_wait
);
272 SPDK_ERRLOG("Queue io failed in vbdev_delay_queue_io, rc=%d.\n", rc
);
273 spdk_bdev_io_complete(bdev_io
, SPDK_BDEV_IO_STATUS_FAILED
);
278 delay_read_get_buf_cb(struct spdk_io_channel
*ch
, struct spdk_bdev_io
*bdev_io
, bool success
)
280 struct vbdev_delay
*delay_node
= SPDK_CONTAINEROF(bdev_io
->bdev
, struct vbdev_delay
,
282 struct delay_io_channel
*delay_ch
= spdk_io_channel_get_ctx(ch
);
286 spdk_bdev_io_complete(bdev_io
, SPDK_BDEV_IO_STATUS_FAILED
);
290 rc
= spdk_bdev_readv_blocks(delay_node
->base_desc
, delay_ch
->base_ch
, bdev_io
->u
.bdev
.iovs
,
291 bdev_io
->u
.bdev
.iovcnt
, bdev_io
->u
.bdev
.offset_blocks
,
292 bdev_io
->u
.bdev
.num_blocks
, _delay_complete_io
,
296 SPDK_ERRLOG("No memory, start to queue io for delay.\n");
297 vbdev_delay_queue_io(bdev_io
);
298 } else if (rc
!= 0) {
299 SPDK_ERRLOG("ERROR on bdev_io submission!\n");
300 spdk_bdev_io_complete(bdev_io
, SPDK_BDEV_IO_STATUS_FAILED
);
305 vbdev_delay_reset_dev(struct spdk_io_channel_iter
*i
, int status
)
307 struct spdk_bdev_io
*bdev_io
= spdk_io_channel_iter_get_ctx(i
);
308 struct spdk_io_channel
*ch
= spdk_io_channel_iter_get_channel(i
);
309 struct delay_io_channel
*delay_ch
= spdk_io_channel_get_ctx(ch
);
310 struct vbdev_delay
*delay_node
= spdk_io_channel_iter_get_io_device(i
);
313 rc
= spdk_bdev_reset(delay_node
->base_desc
, delay_ch
->base_ch
,
314 _delay_complete_io
, bdev_io
);
317 SPDK_ERRLOG("No memory, start to queue io for delay.\n");
318 vbdev_delay_queue_io(bdev_io
);
319 } else if (rc
!= 0) {
320 SPDK_ERRLOG("ERROR on bdev_io submission!\n");
321 spdk_bdev_io_complete(bdev_io
, SPDK_BDEV_IO_STATUS_FAILED
);
326 _abort_all_delayed_io(void *arg
)
328 STAILQ_HEAD(, delay_bdev_io
) *head
= arg
;
329 struct delay_bdev_io
*io_ctx
, *tmp
;
331 STAILQ_FOREACH_SAFE(io_ctx
, head
, link
, tmp
) {
332 STAILQ_REMOVE(head
, io_ctx
, delay_bdev_io
, link
);
333 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(io_ctx
), SPDK_BDEV_IO_STATUS_ABORTED
);
338 vbdev_delay_reset_channel(struct spdk_io_channel_iter
*i
)
340 struct spdk_io_channel
*ch
= spdk_io_channel_iter_get_channel(i
);
341 struct delay_io_channel
*delay_ch
= spdk_io_channel_get_ctx(ch
);
343 _abort_all_delayed_io(&delay_ch
->avg_read_io
);
344 _abort_all_delayed_io(&delay_ch
->avg_write_io
);
345 _abort_all_delayed_io(&delay_ch
->p99_read_io
);
346 _abort_all_delayed_io(&delay_ch
->p99_write_io
);
348 spdk_for_each_channel_continue(i
, 0);
352 abort_delayed_io(void *_head
, struct spdk_bdev_io
*bio_to_abort
)
354 STAILQ_HEAD(, delay_bdev_io
) *head
= _head
;
355 struct delay_bdev_io
*io_ctx_to_abort
= (struct delay_bdev_io
*)bio_to_abort
->driver_ctx
;
356 struct delay_bdev_io
*io_ctx
;
358 STAILQ_FOREACH(io_ctx
, head
, link
) {
359 if (io_ctx
== io_ctx_to_abort
) {
360 STAILQ_REMOVE(head
, io_ctx_to_abort
, delay_bdev_io
, link
);
361 spdk_bdev_io_complete(bio_to_abort
, SPDK_BDEV_IO_STATUS_ABORTED
);
370 vbdev_delay_abort(struct vbdev_delay
*delay_node
, struct delay_io_channel
*delay_ch
,
371 struct spdk_bdev_io
*bdev_io
)
373 struct spdk_bdev_io
*bio_to_abort
= bdev_io
->u
.abort
.bio_to_abort
;
375 if (abort_delayed_io(&delay_ch
->avg_read_io
, bio_to_abort
) ||
376 abort_delayed_io(&delay_ch
->avg_write_io
, bio_to_abort
) ||
377 abort_delayed_io(&delay_ch
->p99_read_io
, bio_to_abort
) ||
378 abort_delayed_io(&delay_ch
->p99_write_io
, bio_to_abort
)) {
379 spdk_bdev_io_complete(bdev_io
, SPDK_BDEV_IO_STATUS_SUCCESS
);
383 return spdk_bdev_abort(delay_node
->base_desc
, delay_ch
->base_ch
, bio_to_abort
,
384 _delay_complete_io
, bdev_io
);
388 vbdev_delay_submit_request(struct spdk_io_channel
*ch
, struct spdk_bdev_io
*bdev_io
)
390 struct vbdev_delay
*delay_node
= SPDK_CONTAINEROF(bdev_io
->bdev
, struct vbdev_delay
, delay_bdev
);
391 struct delay_io_channel
*delay_ch
= spdk_io_channel_get_ctx(ch
);
392 struct delay_bdev_io
*io_ctx
= (struct delay_bdev_io
*)bdev_io
->driver_ctx
;
396 is_p99
= rand_r(&delay_ch
->rand_seed
) % 100 == 0 ? true : false;
399 io_ctx
->type
= DELAY_NONE
;
401 switch (bdev_io
->type
) {
402 case SPDK_BDEV_IO_TYPE_READ
:
403 io_ctx
->type
= is_p99
? DELAY_P99_READ
: DELAY_AVG_READ
;
404 spdk_bdev_io_get_buf(bdev_io
, delay_read_get_buf_cb
,
405 bdev_io
->u
.bdev
.num_blocks
* bdev_io
->bdev
->blocklen
);
407 case SPDK_BDEV_IO_TYPE_WRITE
:
408 io_ctx
->type
= is_p99
? DELAY_P99_WRITE
: DELAY_AVG_WRITE
;
409 rc
= spdk_bdev_writev_blocks(delay_node
->base_desc
, delay_ch
->base_ch
, bdev_io
->u
.bdev
.iovs
,
410 bdev_io
->u
.bdev
.iovcnt
, bdev_io
->u
.bdev
.offset_blocks
,
411 bdev_io
->u
.bdev
.num_blocks
, _delay_complete_io
,
414 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES
:
415 rc
= spdk_bdev_write_zeroes_blocks(delay_node
->base_desc
, delay_ch
->base_ch
,
416 bdev_io
->u
.bdev
.offset_blocks
,
417 bdev_io
->u
.bdev
.num_blocks
,
418 _delay_complete_io
, bdev_io
);
420 case SPDK_BDEV_IO_TYPE_UNMAP
:
421 rc
= spdk_bdev_unmap_blocks(delay_node
->base_desc
, delay_ch
->base_ch
,
422 bdev_io
->u
.bdev
.offset_blocks
,
423 bdev_io
->u
.bdev
.num_blocks
,
424 _delay_complete_io
, bdev_io
);
426 case SPDK_BDEV_IO_TYPE_FLUSH
:
427 rc
= spdk_bdev_flush_blocks(delay_node
->base_desc
, delay_ch
->base_ch
,
428 bdev_io
->u
.bdev
.offset_blocks
,
429 bdev_io
->u
.bdev
.num_blocks
,
430 _delay_complete_io
, bdev_io
);
432 case SPDK_BDEV_IO_TYPE_RESET
:
433 /* During reset, the generic bdev layer aborts all new I/Os and queues all new resets.
434 * Hence we can simply abort all I/Os delayed to complete.
436 spdk_for_each_channel(delay_node
, vbdev_delay_reset_channel
, bdev_io
,
437 vbdev_delay_reset_dev
);
439 case SPDK_BDEV_IO_TYPE_ABORT
:
440 rc
= vbdev_delay_abort(delay_node
, delay_ch
, bdev_io
);
443 SPDK_ERRLOG("delay: unknown I/O type %d\n", bdev_io
->type
);
444 spdk_bdev_io_complete(bdev_io
, SPDK_BDEV_IO_STATUS_FAILED
);
449 SPDK_ERRLOG("No memory, start to queue io for delay.\n");
450 vbdev_delay_queue_io(bdev_io
);
451 } else if (rc
!= 0) {
452 SPDK_ERRLOG("ERROR on bdev_io submission!\n");
453 spdk_bdev_io_complete(bdev_io
, SPDK_BDEV_IO_STATUS_FAILED
);
458 vbdev_delay_io_type_supported(void *ctx
, enum spdk_bdev_io_type io_type
)
460 struct vbdev_delay
*delay_node
= (struct vbdev_delay
*)ctx
;
462 if (io_type
== SPDK_BDEV_IO_TYPE_ZCOPY
) {
465 return spdk_bdev_io_type_supported(delay_node
->base_bdev
, io_type
);
469 static struct spdk_io_channel
*
470 vbdev_delay_get_io_channel(void *ctx
)
472 struct vbdev_delay
*delay_node
= (struct vbdev_delay
*)ctx
;
473 struct spdk_io_channel
*delay_ch
= NULL
;
475 delay_ch
= spdk_get_io_channel(delay_node
);
481 _delay_write_conf_values(struct vbdev_delay
*delay_node
, struct spdk_json_write_ctx
*w
)
483 spdk_json_write_named_string(w
, "name", spdk_bdev_get_name(&delay_node
->delay_bdev
));
484 spdk_json_write_named_string(w
, "base_bdev_name", spdk_bdev_get_name(delay_node
->base_bdev
));
485 spdk_json_write_named_int64(w
, "avg_read_latency",
486 delay_node
->average_read_latency_ticks
* SPDK_SEC_TO_USEC
/ spdk_get_ticks_hz());
487 spdk_json_write_named_int64(w
, "p99_read_latency",
488 delay_node
->p99_read_latency_ticks
* SPDK_SEC_TO_USEC
/ spdk_get_ticks_hz());
489 spdk_json_write_named_int64(w
, "avg_write_latency",
490 delay_node
->average_write_latency_ticks
* SPDK_SEC_TO_USEC
/ spdk_get_ticks_hz());
491 spdk_json_write_named_int64(w
, "p99_write_latency",
492 delay_node
->p99_write_latency_ticks
* SPDK_SEC_TO_USEC
/ spdk_get_ticks_hz());
496 vbdev_delay_dump_info_json(void *ctx
, struct spdk_json_write_ctx
*w
)
498 struct vbdev_delay
*delay_node
= (struct vbdev_delay
*)ctx
;
500 spdk_json_write_name(w
, "delay");
501 spdk_json_write_object_begin(w
);
502 _delay_write_conf_values(delay_node
, w
);
503 spdk_json_write_object_end(w
);
508 /* This is used to generate JSON that can configure this module to its current state. */
510 vbdev_delay_config_json(struct spdk_json_write_ctx
*w
)
512 struct vbdev_delay
*delay_node
;
514 TAILQ_FOREACH(delay_node
, &g_delay_nodes
, link
) {
515 spdk_json_write_object_begin(w
);
516 spdk_json_write_named_string(w
, "method", "bdev_delay_create");
517 spdk_json_write_named_object_begin(w
, "params");
518 _delay_write_conf_values(delay_node
, w
);
519 spdk_json_write_object_end(w
);
524 /* We provide this callback for the SPDK channel code to create a channel using
525 * the channel struct we provided in our module get_io_channel() entry point. Here
526 * we get and save off an underlying base channel of the device below us so that
527 * we can communicate with the base bdev on a per channel basis. If we needed
528 * our own poller for this vbdev, we'd register it here.
531 delay_bdev_ch_create_cb(void *io_device
, void *ctx_buf
)
533 struct delay_io_channel
*delay_ch
= ctx_buf
;
534 struct vbdev_delay
*delay_node
= io_device
;
536 STAILQ_INIT(&delay_ch
->avg_read_io
);
537 STAILQ_INIT(&delay_ch
->p99_read_io
);
538 STAILQ_INIT(&delay_ch
->avg_write_io
);
539 STAILQ_INIT(&delay_ch
->p99_write_io
);
541 delay_ch
->io_poller
= SPDK_POLLER_REGISTER(_delay_finish_io
, delay_ch
, 0);
542 delay_ch
->base_ch
= spdk_bdev_get_io_channel(delay_node
->base_desc
);
543 delay_ch
->rand_seed
= time(NULL
);
548 /* We provide this callback for the SPDK channel code to destroy a channel
549 * created with our create callback. We just need to undo anything we did
550 * when we created. If this bdev used its own poller, we'd unregsiter it here.
553 delay_bdev_ch_destroy_cb(void *io_device
, void *ctx_buf
)
555 struct delay_io_channel
*delay_ch
= ctx_buf
;
557 spdk_poller_unregister(&delay_ch
->io_poller
);
558 spdk_put_io_channel(delay_ch
->base_ch
);
561 /* Create the delay association from the bdev and vbdev name and insert
562 * on the global list. */
564 vbdev_delay_insert_association(const char *bdev_name
, const char *vbdev_name
,
565 uint64_t avg_read_latency
, uint64_t p99_read_latency
,
566 uint64_t avg_write_latency
, uint64_t p99_write_latency
)
568 struct bdev_association
*assoc
;
570 TAILQ_FOREACH(assoc
, &g_bdev_associations
, link
) {
571 if (strcmp(vbdev_name
, assoc
->vbdev_name
) == 0) {
572 SPDK_ERRLOG("delay bdev %s already exists\n", vbdev_name
);
577 assoc
= calloc(1, sizeof(struct bdev_association
));
579 SPDK_ERRLOG("could not allocate bdev_association\n");
583 assoc
->bdev_name
= strdup(bdev_name
);
584 if (!assoc
->bdev_name
) {
585 SPDK_ERRLOG("could not allocate assoc->bdev_name\n");
590 assoc
->vbdev_name
= strdup(vbdev_name
);
591 if (!assoc
->vbdev_name
) {
592 SPDK_ERRLOG("could not allocate assoc->vbdev_name\n");
593 free(assoc
->bdev_name
);
598 assoc
->avg_read_latency
= avg_read_latency
;
599 assoc
->p99_read_latency
= p99_read_latency
;
600 assoc
->avg_write_latency
= avg_write_latency
;
601 assoc
->p99_write_latency
= p99_write_latency
;
603 TAILQ_INSERT_TAIL(&g_bdev_associations
, assoc
, link
);
609 vbdev_delay_update_latency_value(char *delay_name
, uint64_t latency_us
, enum delay_io_type type
)
611 struct spdk_bdev
*delay_bdev
;
612 struct vbdev_delay
*delay_node
;
613 uint64_t ticks_mhz
= spdk_get_ticks_hz() / SPDK_SEC_TO_USEC
;
615 delay_bdev
= spdk_bdev_get_by_name(delay_name
);
616 if (delay_bdev
== NULL
) {
618 } else if (delay_bdev
->module
!= &delay_if
) {
622 delay_node
= SPDK_CONTAINEROF(delay_bdev
, struct vbdev_delay
, delay_bdev
);
626 delay_node
->average_read_latency_ticks
= ticks_mhz
* latency_us
;
628 case DELAY_AVG_WRITE
:
629 delay_node
->average_write_latency_ticks
= ticks_mhz
* latency_us
;
632 delay_node
->p99_read_latency_ticks
= ticks_mhz
* latency_us
;
634 case DELAY_P99_WRITE
:
635 delay_node
->p99_write_latency_ticks
= ticks_mhz
* latency_us
;
645 vbdev_delay_init(void)
647 /* Not allowing for .ini style configuration. */
652 vbdev_delay_finish(void)
654 struct bdev_association
*assoc
;
656 while ((assoc
= TAILQ_FIRST(&g_bdev_associations
))) {
657 TAILQ_REMOVE(&g_bdev_associations
, assoc
, link
);
658 free(assoc
->bdev_name
);
659 free(assoc
->vbdev_name
);
665 vbdev_delay_get_ctx_size(void)
667 return sizeof(struct delay_bdev_io
);
671 vbdev_delay_write_config_json(struct spdk_bdev
*bdev
, struct spdk_json_write_ctx
*w
)
673 /* No config per bdev needed */
676 /* When we register our bdev this is how we specify our entry points. */
677 static const struct spdk_bdev_fn_table vbdev_delay_fn_table
= {
678 .destruct
= vbdev_delay_destruct
,
679 .submit_request
= vbdev_delay_submit_request
,
680 .io_type_supported
= vbdev_delay_io_type_supported
,
681 .get_io_channel
= vbdev_delay_get_io_channel
,
682 .dump_info_json
= vbdev_delay_dump_info_json
,
683 .write_config_json
= vbdev_delay_write_config_json
,
686 /* Called when the underlying base bdev goes away. */
688 vbdev_delay_base_bdev_hotremove_cb(void *ctx
)
690 struct vbdev_delay
*delay_node
, *tmp
;
691 struct spdk_bdev
*bdev_find
= ctx
;
693 TAILQ_FOREACH_SAFE(delay_node
, &g_delay_nodes
, link
, tmp
) {
694 if (bdev_find
== delay_node
->base_bdev
) {
695 spdk_bdev_unregister(&delay_node
->delay_bdev
, NULL
, NULL
);
700 /* Create and register the delay vbdev if we find it in our list of bdev names.
701 * This can be called either by the examine path or RPC method.
704 vbdev_delay_register(struct spdk_bdev
*bdev
)
706 struct bdev_association
*assoc
;
707 struct vbdev_delay
*delay_node
;
708 uint64_t ticks_mhz
= spdk_get_ticks_hz() / SPDK_SEC_TO_USEC
;
711 /* Check our list of names from config versus this bdev and if
712 * there's a match, create the delay_node & bdev accordingly.
714 TAILQ_FOREACH(assoc
, &g_bdev_associations
, link
) {
715 if (strcmp(assoc
->bdev_name
, bdev
->name
) != 0) {
719 delay_node
= calloc(1, sizeof(struct vbdev_delay
));
722 SPDK_ERRLOG("could not allocate delay_node\n");
726 /* The base bdev that we're attaching to. */
727 delay_node
->base_bdev
= bdev
;
728 delay_node
->delay_bdev
.name
= strdup(assoc
->vbdev_name
);
729 if (!delay_node
->delay_bdev
.name
) {
731 SPDK_ERRLOG("could not allocate delay_bdev name\n");
735 delay_node
->delay_bdev
.product_name
= "delay";
737 delay_node
->delay_bdev
.write_cache
= bdev
->write_cache
;
738 delay_node
->delay_bdev
.required_alignment
= bdev
->required_alignment
;
739 delay_node
->delay_bdev
.optimal_io_boundary
= bdev
->optimal_io_boundary
;
740 delay_node
->delay_bdev
.blocklen
= bdev
->blocklen
;
741 delay_node
->delay_bdev
.blockcnt
= bdev
->blockcnt
;
743 delay_node
->delay_bdev
.ctxt
= delay_node
;
744 delay_node
->delay_bdev
.fn_table
= &vbdev_delay_fn_table
;
745 delay_node
->delay_bdev
.module
= &delay_if
;
747 /* Store the number of ticks you need to add to get the I/O expiration time. */
748 delay_node
->average_read_latency_ticks
= ticks_mhz
* assoc
->avg_read_latency
;
749 delay_node
->p99_read_latency_ticks
= ticks_mhz
* assoc
->p99_read_latency
;
750 delay_node
->average_write_latency_ticks
= ticks_mhz
* assoc
->avg_write_latency
;
751 delay_node
->p99_write_latency_ticks
= ticks_mhz
* assoc
->p99_write_latency
;
753 spdk_io_device_register(delay_node
, delay_bdev_ch_create_cb
, delay_bdev_ch_destroy_cb
,
754 sizeof(struct delay_io_channel
),
757 rc
= spdk_bdev_open(bdev
, true, vbdev_delay_base_bdev_hotremove_cb
,
758 bdev
, &delay_node
->base_desc
);
760 SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev
));
761 goto error_unregister
;
764 /* Save the thread where the base device is opened */
765 delay_node
->thread
= spdk_get_thread();
767 rc
= spdk_bdev_module_claim_bdev(bdev
, delay_node
->base_desc
, delay_node
->delay_bdev
.module
);
769 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev
));
773 rc
= spdk_bdev_register(&delay_node
->delay_bdev
);
775 SPDK_ERRLOG("could not register delay_bdev\n");
776 spdk_bdev_module_release_bdev(delay_node
->base_bdev
);
780 TAILQ_INSERT_TAIL(&g_delay_nodes
, delay_node
, link
);
786 spdk_bdev_close(delay_node
->base_desc
);
788 spdk_io_device_unregister(delay_node
, NULL
);
789 free(delay_node
->delay_bdev
.name
);
795 create_delay_disk(const char *bdev_name
, const char *vbdev_name
, uint64_t avg_read_latency
,
796 uint64_t p99_read_latency
, uint64_t avg_write_latency
, uint64_t p99_write_latency
)
798 struct spdk_bdev
*bdev
= NULL
;
801 if (p99_read_latency
< avg_read_latency
|| p99_write_latency
< avg_write_latency
) {
802 SPDK_ERRLOG("Unable to create a delay bdev where p99 latency is less than average latency.\n");
806 rc
= vbdev_delay_insert_association(bdev_name
, vbdev_name
, avg_read_latency
, p99_read_latency
,
807 avg_write_latency
, p99_write_latency
);
812 bdev
= spdk_bdev_get_by_name(bdev_name
);
817 return vbdev_delay_register(bdev
);
821 delete_delay_disk(struct spdk_bdev
*bdev
, spdk_bdev_unregister_cb cb_fn
, void *cb_arg
)
823 struct bdev_association
*assoc
;
825 if (!bdev
|| bdev
->module
!= &delay_if
) {
826 cb_fn(cb_arg
, -ENODEV
);
830 TAILQ_FOREACH(assoc
, &g_bdev_associations
, link
) {
831 if (strcmp(assoc
->vbdev_name
, bdev
->name
) == 0) {
832 TAILQ_REMOVE(&g_bdev_associations
, assoc
, link
);
833 free(assoc
->bdev_name
);
834 free(assoc
->vbdev_name
);
840 spdk_bdev_unregister(bdev
, cb_fn
, cb_arg
);
844 vbdev_delay_examine(struct spdk_bdev
*bdev
)
846 vbdev_delay_register(bdev
);
848 spdk_bdev_module_examine_done(&delay_if
);
851 SPDK_LOG_REGISTER_COMPONENT("vbdev_delay", SPDK_LOG_VBDEV_DELAY
)