4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
5 * Copyright (c) Intel Corporation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <rte_config.h>
42 #include <rte_mempool.h>
43 #include <rte_lcore.h>
45 #include "spdk/bdev.h"
46 #include "spdk/copy_engine.h"
47 #include "spdk/endian.h"
49 #include "spdk/event.h"
51 #include "spdk/io_channel.h"
53 struct bdevperf_task
{
55 struct io_target
*target
;
58 struct spdk_scsi_unmap_bdesc bdesc
;
61 static int g_io_size
= 0;
62 /* initialize to invalid value so we can detect if user overrides it. */
63 static int g_rw_percentage
= -1;
64 static int g_is_random
;
65 static bool g_verify
= false;
66 static bool g_reset
= false;
67 static bool g_unmap
= false;
68 static int g_queue_depth
;
69 static int g_time_in_sec
;
70 static int g_show_performance_real_time
= 0;
71 static bool g_run_failed
= false;
72 static bool g_zcopy
= true;
74 static struct spdk_poller
*g_perf_timer
= NULL
;
76 static void bdevperf_submit_single(struct io_target
*target
);
78 #include "../common.c"
81 struct spdk_bdev
*bdev
;
82 struct spdk_io_channel
*ch
;
83 struct io_target
*next
;
86 int current_queue_depth
;
88 uint64_t offset_in_ios
;
90 struct spdk_poller
*run_timer
;
91 struct spdk_poller
*reset_timer
;
94 struct io_target
*head
[RTE_MAX_LCORE
];
95 static int g_target_count
= 0;
98 * Used to determine how the I/O buffers should be aligned.
99 * This alignment will be bumped up for blockdevs that
100 * require alignment based on block length - for example,
103 static uint32_t g_min_alignment
= 8;
106 blockdev_heads_init(void)
110 for (i
= 0; i
< RTE_MAX_LCORE
; i
++) {
116 bdevperf_construct_targets(void)
119 struct spdk_bdev
*bdev
;
120 struct io_target
*target
;
122 bdev
= spdk_bdev_first();
123 while (bdev
!= NULL
) {
125 if (!spdk_bdev_claim(bdev
, NULL
, NULL
)) {
126 bdev
= spdk_bdev_next(bdev
);
130 if (g_unmap
&& !bdev
->thin_provisioning
) {
131 printf("Skipping %s because it does not support unmap\n", bdev
->name
);
132 bdev
= spdk_bdev_next(bdev
);
136 target
= malloc(sizeof(struct io_target
));
138 fprintf(stderr
, "Unable to allocate memory for new target.\n");
139 /* Return immediately because all mallocs will presumably fail after this */
143 /* Mapping each target to lcore */
144 index
= g_target_count
% spdk_env_get_core_count();
145 target
->next
= head
[index
];
146 target
->lcore
= index
;
147 target
->io_completed
= 0;
148 target
->current_queue_depth
= 0;
149 target
->offset_in_ios
= 0;
150 target
->size_in_ios
= (bdev
->blockcnt
* bdev
->blocklen
) /
152 if (bdev
->need_aligned_buffer
&& g_min_alignment
< bdev
->blocklen
) {
153 g_min_alignment
= bdev
->blocklen
;
156 target
->is_draining
= false;
157 target
->run_timer
= NULL
;
158 target
->reset_timer
= NULL
;
160 head
[index
] = target
;
163 bdev
= spdk_bdev_next(bdev
);
168 end_run(void *arg1
, void *arg2
)
170 struct io_target
*target
= arg1
;
172 spdk_put_io_channel(target
->ch
);
173 spdk_bdev_unclaim(target
->bdev
);
174 if (--g_target_count
== 0) {
175 if (g_show_performance_real_time
) {
176 spdk_poller_unregister(&g_perf_timer
, NULL
);
186 struct rte_mempool
*task_pool
;
189 bdevperf_complete(struct spdk_bdev_io
*bdev_io
, enum spdk_bdev_io_status status
, void *cb_arg
)
191 struct io_target
*target
;
192 struct bdevperf_task
*task
= cb_arg
;
193 struct spdk_event
*complete
;
195 target
= task
->target
;
197 if (status
!= SPDK_BDEV_IO_STATUS_SUCCESS
) {
199 target
->is_draining
= true;
202 } else if (g_verify
|| g_reset
|| g_unmap
) {
203 assert(bdev_io
->u
.read
.iovcnt
== 1);
204 if (memcmp(task
->buf
, bdev_io
->u
.read
.iov
.iov_base
, g_io_size
) != 0) {
205 printf("Buffer mismatch! Disk Offset: %lu\n", task
->offset
);
206 target
->is_draining
= true;
211 target
->current_queue_depth
--;
212 target
->io_completed
++;
214 bdev_io
->caller_ctx
= NULL
;
215 rte_mempool_put(task_pool
, task
);
217 spdk_bdev_free_io(bdev_io
);
220 * is_draining indicates when time has expired for the test run
221 * and we are just waiting for the previously submitted I/O
222 * to complete. In this case, do not submit a new I/O to replace
223 * the one just completed.
225 if (!target
->is_draining
) {
226 bdevperf_submit_single(target
);
227 } else if (target
->current_queue_depth
== 0) {
228 complete
= spdk_event_allocate(rte_get_master_lcore(), end_run
, target
, NULL
);
229 spdk_event_call(complete
);
234 bdevperf_unmap_complete(struct spdk_bdev_io
*bdev_io
, enum spdk_bdev_io_status status
, void *cb_arg
)
236 struct io_target
*target
;
237 struct bdevperf_task
*task
= cb_arg
;
239 target
= task
->target
;
241 /* Set the expected buffer to 0. */
242 memset(task
->buf
, 0, g_io_size
);
244 /* Read the data back in */
245 spdk_bdev_read(target
->bdev
, target
->ch
, NULL
, task
->offset
, g_io_size
,
246 bdevperf_complete
, task
);
248 spdk_bdev_free_io(bdev_io
);
253 bdevperf_verify_write_complete(struct spdk_bdev_io
*bdev_io
, enum spdk_bdev_io_status status
,
256 struct io_target
*target
;
257 struct bdevperf_task
*task
= cb_arg
;
259 target
= task
->target
;
263 to_be64(&task
->bdesc
.lba
, task
->offset
/ target
->bdev
->blocklen
);
264 to_be32(&task
->bdesc
.block_count
, g_io_size
/ target
->bdev
->blocklen
);
266 spdk_bdev_unmap(target
->bdev
, target
->ch
, &task
->bdesc
, 1, bdevperf_unmap_complete
,
269 /* Read the data back in */
270 spdk_bdev_read(target
->bdev
, target
->ch
, NULL
,
273 bdevperf_complete
, task
);
276 spdk_bdev_free_io(bdev_io
);
280 task_ctor(struct rte_mempool
*mp
, void *arg
, void *__task
, unsigned id
)
282 struct bdevperf_task
*task
= __task
;
284 task
->buf
= spdk_zmalloc(g_io_size
, g_min_alignment
, NULL
);
287 static __thread
unsigned int seed
= 0;
290 bdevperf_submit_single(struct io_target
*target
)
292 struct spdk_bdev
*bdev
;
293 struct spdk_io_channel
*ch
;
294 struct bdevperf_task
*task
= NULL
;
295 uint64_t offset_in_ios
;
301 if (rte_mempool_get(task_pool
, (void **)&task
) != 0 || task
== NULL
) {
302 printf("Task pool allocation failed\n");
306 task
->target
= target
;
309 offset_in_ios
= rand_r(&seed
) % target
->size_in_ios
;
311 offset_in_ios
= target
->offset_in_ios
++;
312 if (target
->offset_in_ios
== target
->size_in_ios
) {
313 target
->offset_in_ios
= 0;
317 task
->offset
= offset_in_ios
* g_io_size
;
318 if (g_verify
|| g_reset
|| g_unmap
) {
319 memset(task
->buf
, rand_r(&seed
) % 256, g_io_size
);
320 task
->iov
.iov_base
= task
->buf
;
321 task
->iov
.iov_len
= g_io_size
;
322 spdk_bdev_writev(bdev
, ch
, &task
->iov
, 1, task
->offset
, g_io_size
,
323 bdevperf_verify_write_complete
, task
);
324 } else if ((g_rw_percentage
== 100) ||
325 (g_rw_percentage
!= 0 && ((rand_r(&seed
) % 100) < g_rw_percentage
))) {
326 rbuf
= g_zcopy
? NULL
: task
->buf
;
327 spdk_bdev_read(bdev
, ch
, rbuf
, task
->offset
, g_io_size
,
328 bdevperf_complete
, task
);
330 task
->iov
.iov_base
= task
->buf
;
331 task
->iov
.iov_len
= g_io_size
;
332 spdk_bdev_writev(bdev
, ch
, &task
->iov
, 1, task
->offset
, g_io_size
,
333 bdevperf_complete
, task
);
336 target
->current_queue_depth
++;
340 bdevperf_submit_io(struct io_target
*target
, int queue_depth
)
342 while (queue_depth
-- > 0) {
343 bdevperf_submit_single(target
);
348 end_target(void *arg
)
350 struct io_target
*target
= arg
;
352 spdk_poller_unregister(&target
->run_timer
, NULL
);
354 spdk_poller_unregister(&target
->reset_timer
, NULL
);
357 target
->is_draining
= true;
360 static void reset_target(void *arg
);
363 reset_cb(struct spdk_bdev_io
*bdev_io
, enum spdk_bdev_io_status status
, void *cb_arg
)
365 struct bdevperf_task
*task
= cb_arg
;
366 struct io_target
*target
= task
->target
;
368 if (status
!= SPDK_BDEV_IO_STATUS_SUCCESS
) {
369 printf("Reset blockdev=%s failed\n", target
->bdev
->name
);
370 target
->is_draining
= true;
374 rte_mempool_put(task_pool
, task
);
376 spdk_poller_register(&target
->reset_timer
, reset_target
, target
, target
->lcore
,
381 reset_target(void *arg
)
383 struct io_target
*target
= arg
;
384 struct bdevperf_task
*task
= NULL
;
386 spdk_poller_unregister(&target
->reset_timer
, NULL
);
389 rte_mempool_get(task_pool
, (void **)&task
);
390 task
->target
= target
;
391 spdk_bdev_reset(target
->bdev
, SPDK_BDEV_RESET_SOFT
,
396 bdevperf_submit_on_core(void *arg1
, void *arg2
)
398 struct io_target
*target
= arg1
;
400 /* Submit initial I/O for each block device. Each time one
401 * completes, another will be submitted. */
402 while (target
!= NULL
) {
403 target
->ch
= spdk_bdev_get_io_channel(target
->bdev
, SPDK_IO_PRIORITY_DEFAULT
);
405 /* Start a timer to stop this I/O chain when the run is over */
406 spdk_poller_register(&target
->run_timer
, end_target
, target
, target
->lcore
,
407 g_time_in_sec
* 1000000);
409 spdk_poller_register(&target
->reset_timer
, reset_target
, target
,
410 target
->lcore
, 10 * 1000000);
412 bdevperf_submit_io(target
, g_queue_depth
);
413 target
= target
->next
;
417 static void usage(char *program_name
)
419 printf("%s options\n", program_name
);
420 printf("\t[-c configuration file]\n");
421 printf("\t[-m core mask for distributing I/O submission/completion work\n");
422 printf("\t\t(default: 0x1 - use core 0 only)]\n");
423 printf("\t[-q io depth]\n");
424 printf("\t[-s io size in bytes]\n");
425 printf("\t[-w io pattern type, must be one of\n");
426 printf("\t\t(read, write, randread, randwrite, rw, randrw, verify, reset)]\n");
427 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
428 printf("\t[-t time in seconds]\n");
429 printf("\t[-S Show performance result in real time]\n");
433 performance_dump(int io_time
)
437 float io_per_second
, mb_per_second
;
438 float total_io_per_second
, total_mb_per_second
;
439 struct io_target
*target
;
441 total_io_per_second
= 0;
442 total_mb_per_second
= 0;
443 for (index
= 0; index
< spdk_env_get_core_count(); index
++) {
444 target
= head
[index
];
445 if (target
!= NULL
) {
446 lcore_id
= target
->lcore
;
447 printf("\r Logical core: %u\n", lcore_id
);
449 while (target
!= NULL
) {
450 io_per_second
= (float)target
->io_completed
/
452 mb_per_second
= io_per_second
* g_io_size
/
454 printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n",
455 target
->bdev
->name
, io_per_second
,
457 total_io_per_second
+= io_per_second
;
458 total_mb_per_second
+= mb_per_second
;
459 target
= target
->next
;
463 printf("\r =====================================================\n");
464 printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n",
465 "Total", total_io_per_second
, total_mb_per_second
);
471 performance_statistics_thread(void *arg
)
477 bdevperf_run(void *arg1
, void *arg2
)
480 struct io_target
*target
;
481 struct spdk_event
*event
;
483 printf("Running I/O for %d seconds...\n", g_time_in_sec
);
486 /* Start a timer to dump performance numbers */
487 if (g_show_performance_real_time
) {
488 spdk_poller_register(&g_perf_timer
, performance_statistics_thread
, NULL
,
489 spdk_env_get_current_core(), 1000000);
492 /* Send events to start all I/O */
493 SPDK_ENV_FOREACH_CORE(i
) {
495 if (target
!= NULL
) {
496 event
= spdk_event_allocate(target
->lcore
, bdevperf_submit_on_core
,
498 spdk_event_call(event
);
504 main(int argc
, char **argv
)
506 const char *config_file
;
507 const char *core_mask
;
508 const char *workload_type
;
516 workload_type
= NULL
;
518 mix_specified
= false;
521 while ((op
= getopt(argc
, argv
, "c:m:q:s:t:w:M:S")) != -1) {
524 config_file
= optarg
;
530 g_queue_depth
= atoi(optarg
);
533 g_io_size
= atoi(optarg
);
536 g_time_in_sec
= atoi(optarg
);
539 workload_type
= optarg
;
542 g_rw_percentage
= atoi(optarg
);
543 mix_specified
= true;
546 g_show_performance_real_time
= 1;
558 if (g_queue_depth
<= 0) {
562 if (g_io_size
<= 0) {
566 if (!workload_type
) {
570 if (g_time_in_sec
<= 0) {
575 if (strcmp(workload_type
, "read") &&
576 strcmp(workload_type
, "write") &&
577 strcmp(workload_type
, "randread") &&
578 strcmp(workload_type
, "randwrite") &&
579 strcmp(workload_type
, "rw") &&
580 strcmp(workload_type
, "randrw") &&
581 strcmp(workload_type
, "verify") &&
582 strcmp(workload_type
, "reset") &&
583 strcmp(workload_type
, "unmap")) {
585 "io pattern type must be one of\n"
586 "(read, write, randread, randwrite, rw, randrw, verify, reset, unmap)\n");
590 if (!strcmp(workload_type
, "read") ||
591 !strcmp(workload_type
, "randread")) {
592 g_rw_percentage
= 100;
595 if (!strcmp(workload_type
, "write") ||
596 !strcmp(workload_type
, "randwrite")) {
600 if (!strcmp(workload_type
, "verify") ||
601 !strcmp(workload_type
, "reset") ||
602 !strcmp(workload_type
, "unmap")) {
603 g_rw_percentage
= 50;
604 if (g_io_size
> SPDK_BDEV_LARGE_RBUF_MAX_SIZE
) {
605 fprintf(stderr
, "Unable to exceed max I/O size of %d for verify. (%d provided).\n",
606 SPDK_BDEV_LARGE_RBUF_MAX_SIZE
, g_io_size
);
610 fprintf(stderr
, "Ignoring -m option. Verify can only run with a single core.\n");
614 if (!strcmp(workload_type
, "reset")) {
617 if (!strcmp(workload_type
, "unmap")) {
622 if (!strcmp(workload_type
, "read") ||
623 !strcmp(workload_type
, "randread") ||
624 !strcmp(workload_type
, "write") ||
625 !strcmp(workload_type
, "randwrite") ||
626 !strcmp(workload_type
, "verify") ||
627 !strcmp(workload_type
, "reset") ||
628 !strcmp(workload_type
, "unmap")) {
630 fprintf(stderr
, "Ignoring -M option... Please use -M option"
631 " only when using rw or randrw.\n");
635 if (!strcmp(workload_type
, "rw") ||
636 !strcmp(workload_type
, "randrw")) {
637 if (g_rw_percentage
< 0 || g_rw_percentage
> 100) {
639 "-M must be specified to value from 0 to 100 "
640 "for rw or randrw.\n");
645 if (!strcmp(workload_type
, "read") ||
646 !strcmp(workload_type
, "write") ||
647 !strcmp(workload_type
, "rw") ||
648 !strcmp(workload_type
, "verify") ||
649 !strcmp(workload_type
, "reset") ||
650 !strcmp(workload_type
, "unmap")) {
656 if (g_io_size
> SPDK_BDEV_LARGE_RBUF_MAX_SIZE
) {
657 fprintf(stdout
, "I/O size of %d is greather than zero copy threshold (%d).\n",
658 g_io_size
, SPDK_BDEV_LARGE_RBUF_MAX_SIZE
);
659 fprintf(stdout
, "Zero copy mechanism will not be used.\n");
663 optind
= 1; /*reset the optind */
665 rte_set_log_level(RTE_LOG_ERR
);
667 blockdev_heads_init();
669 bdevtest_init(config_file
, core_mask
);
671 bdevperf_construct_targets();
673 task_pool
= rte_mempool_create("task_pool", 4096 * spdk_env_get_core_count(),
674 sizeof(struct bdevperf_task
),
675 64, 0, NULL
, NULL
, task_ctor
, NULL
,
678 spdk_app_start(bdevperf_run
, NULL
, NULL
);
680 performance_dump(g_time_in_sec
);