]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/test/lib/bdev/bdevperf/bdevperf.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / test / lib / bdev / bdevperf / bdevperf.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>.
5 * Copyright (c) Intel Corporation.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <stdint.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdbool.h>
39 #include <unistd.h>
40
41 #include <rte_config.h>
42 #include <rte_mempool.h>
43 #include <rte_lcore.h>
44
45 #include "spdk/bdev.h"
46 #include "spdk/copy_engine.h"
47 #include "spdk/endian.h"
48 #include "spdk/env.h"
49 #include "spdk/event.h"
50 #include "spdk/log.h"
51 #include "spdk/io_channel.h"
52
53 struct bdevperf_task {
54 struct iovec iov;
55 struct io_target *target;
56 void *buf;
57 uint64_t offset;
58 struct spdk_scsi_unmap_bdesc bdesc;
59 };
60
61 static int g_io_size = 0;
62 /* initialize to invalid value so we can detect if user overrides it. */
63 static int g_rw_percentage = -1;
64 static int g_is_random;
65 static bool g_verify = false;
66 static bool g_reset = false;
67 static bool g_unmap = false;
68 static int g_queue_depth;
69 static int g_time_in_sec;
70 static int g_show_performance_real_time = 0;
71 static bool g_run_failed = false;
72 static bool g_zcopy = true;
73
74 static struct spdk_poller *g_perf_timer = NULL;
75
76 static void bdevperf_submit_single(struct io_target *target);
77
78 #include "../common.c"
79
80 struct io_target {
81 struct spdk_bdev *bdev;
82 struct spdk_io_channel *ch;
83 struct io_target *next;
84 unsigned lcore;
85 int io_completed;
86 int current_queue_depth;
87 uint64_t size_in_ios;
88 uint64_t offset_in_ios;
89 bool is_draining;
90 struct spdk_poller *run_timer;
91 struct spdk_poller *reset_timer;
92 };
93
94 struct io_target *head[RTE_MAX_LCORE];
95 static int g_target_count = 0;
96
97 /*
98 * Used to determine how the I/O buffers should be aligned.
99 * This alignment will be bumped up for blockdevs that
100 * require alignment based on block length - for example,
101 * AIO blockdevs.
102 */
103 static uint32_t g_min_alignment = 8;
104
105 static void
106 blockdev_heads_init(void)
107 {
108 int i;
109
110 for (i = 0; i < RTE_MAX_LCORE; i++) {
111 head[i] = NULL;
112 }
113 }
114
115 static void
116 bdevperf_construct_targets(void)
117 {
118 int index = 0;
119 struct spdk_bdev *bdev;
120 struct io_target *target;
121
122 bdev = spdk_bdev_first();
123 while (bdev != NULL) {
124
125 if (!spdk_bdev_claim(bdev, NULL, NULL)) {
126 bdev = spdk_bdev_next(bdev);
127 continue;
128 }
129
130 if (g_unmap && !bdev->thin_provisioning) {
131 printf("Skipping %s because it does not support unmap\n", bdev->name);
132 bdev = spdk_bdev_next(bdev);
133 continue;
134 }
135
136 target = malloc(sizeof(struct io_target));
137 if (!target) {
138 fprintf(stderr, "Unable to allocate memory for new target.\n");
139 /* Return immediately because all mallocs will presumably fail after this */
140 return;
141 }
142 target->bdev = bdev;
143 /* Mapping each target to lcore */
144 index = g_target_count % spdk_env_get_core_count();
145 target->next = head[index];
146 target->lcore = index;
147 target->io_completed = 0;
148 target->current_queue_depth = 0;
149 target->offset_in_ios = 0;
150 target->size_in_ios = (bdev->blockcnt * bdev->blocklen) /
151 g_io_size;
152 if (bdev->need_aligned_buffer && g_min_alignment < bdev->blocklen) {
153 g_min_alignment = bdev->blocklen;
154 }
155
156 target->is_draining = false;
157 target->run_timer = NULL;
158 target->reset_timer = NULL;
159
160 head[index] = target;
161 g_target_count++;
162
163 bdev = spdk_bdev_next(bdev);
164 }
165 }
166
167 static void
168 end_run(void *arg1, void *arg2)
169 {
170 struct io_target *target = arg1;
171
172 spdk_put_io_channel(target->ch);
173 spdk_bdev_unclaim(target->bdev);
174 if (--g_target_count == 0) {
175 if (g_show_performance_real_time) {
176 spdk_poller_unregister(&g_perf_timer, NULL);
177 }
178 if (g_run_failed) {
179 spdk_app_stop(1);
180 } else {
181 spdk_app_stop(0);
182 }
183 }
184 }
185
186 struct rte_mempool *task_pool;
187
188 static void
189 bdevperf_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status, void *cb_arg)
190 {
191 struct io_target *target;
192 struct bdevperf_task *task = cb_arg;
193 struct spdk_event *complete;
194
195 target = task->target;
196
197 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
198 if (!g_reset) {
199 target->is_draining = true;
200 g_run_failed = true;
201 }
202 } else if (g_verify || g_reset || g_unmap) {
203 assert(bdev_io->u.read.iovcnt == 1);
204 if (memcmp(task->buf, bdev_io->u.read.iov.iov_base, g_io_size) != 0) {
205 printf("Buffer mismatch! Disk Offset: %lu\n", task->offset);
206 target->is_draining = true;
207 g_run_failed = true;
208 }
209 }
210
211 target->current_queue_depth--;
212 target->io_completed++;
213
214 bdev_io->caller_ctx = NULL;
215 rte_mempool_put(task_pool, task);
216
217 spdk_bdev_free_io(bdev_io);
218
219 /*
220 * is_draining indicates when time has expired for the test run
221 * and we are just waiting for the previously submitted I/O
222 * to complete. In this case, do not submit a new I/O to replace
223 * the one just completed.
224 */
225 if (!target->is_draining) {
226 bdevperf_submit_single(target);
227 } else if (target->current_queue_depth == 0) {
228 complete = spdk_event_allocate(rte_get_master_lcore(), end_run, target, NULL);
229 spdk_event_call(complete);
230 }
231 }
232
233 static void
234 bdevperf_unmap_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status, void *cb_arg)
235 {
236 struct io_target *target;
237 struct bdevperf_task *task = cb_arg;
238
239 target = task->target;
240
241 /* Set the expected buffer to 0. */
242 memset(task->buf, 0, g_io_size);
243
244 /* Read the data back in */
245 spdk_bdev_read(target->bdev, target->ch, NULL, task->offset, g_io_size,
246 bdevperf_complete, task);
247
248 spdk_bdev_free_io(bdev_io);
249
250 }
251
252 static void
253 bdevperf_verify_write_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status,
254 void *cb_arg)
255 {
256 struct io_target *target;
257 struct bdevperf_task *task = cb_arg;
258
259 target = task->target;
260
261 if (g_unmap) {
262 /* Unmap the data */
263 to_be64(&task->bdesc.lba, task->offset / target->bdev->blocklen);
264 to_be32(&task->bdesc.block_count, g_io_size / target->bdev->blocklen);
265
266 spdk_bdev_unmap(target->bdev, target->ch, &task->bdesc, 1, bdevperf_unmap_complete,
267 task);
268 } else {
269 /* Read the data back in */
270 spdk_bdev_read(target->bdev, target->ch, NULL,
271 task->offset,
272 g_io_size,
273 bdevperf_complete, task);
274 }
275
276 spdk_bdev_free_io(bdev_io);
277 }
278
279 static void
280 task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id)
281 {
282 struct bdevperf_task *task = __task;
283
284 task->buf = spdk_zmalloc(g_io_size, g_min_alignment, NULL);
285 }
286
287 static __thread unsigned int seed = 0;
288
289 static void
290 bdevperf_submit_single(struct io_target *target)
291 {
292 struct spdk_bdev *bdev;
293 struct spdk_io_channel *ch;
294 struct bdevperf_task *task = NULL;
295 uint64_t offset_in_ios;
296 void *rbuf;
297
298 bdev = target->bdev;
299 ch = target->ch;
300
301 if (rte_mempool_get(task_pool, (void **)&task) != 0 || task == NULL) {
302 printf("Task pool allocation failed\n");
303 abort();
304 }
305
306 task->target = target;
307
308 if (g_is_random) {
309 offset_in_ios = rand_r(&seed) % target->size_in_ios;
310 } else {
311 offset_in_ios = target->offset_in_ios++;
312 if (target->offset_in_ios == target->size_in_ios) {
313 target->offset_in_ios = 0;
314 }
315 }
316
317 task->offset = offset_in_ios * g_io_size;
318 if (g_verify || g_reset || g_unmap) {
319 memset(task->buf, rand_r(&seed) % 256, g_io_size);
320 task->iov.iov_base = task->buf;
321 task->iov.iov_len = g_io_size;
322 spdk_bdev_writev(bdev, ch, &task->iov, 1, task->offset, g_io_size,
323 bdevperf_verify_write_complete, task);
324 } else if ((g_rw_percentage == 100) ||
325 (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) {
326 rbuf = g_zcopy ? NULL : task->buf;
327 spdk_bdev_read(bdev, ch, rbuf, task->offset, g_io_size,
328 bdevperf_complete, task);
329 } else {
330 task->iov.iov_base = task->buf;
331 task->iov.iov_len = g_io_size;
332 spdk_bdev_writev(bdev, ch, &task->iov, 1, task->offset, g_io_size,
333 bdevperf_complete, task);
334 }
335
336 target->current_queue_depth++;
337 }
338
339 static void
340 bdevperf_submit_io(struct io_target *target, int queue_depth)
341 {
342 while (queue_depth-- > 0) {
343 bdevperf_submit_single(target);
344 }
345 }
346
347 static void
348 end_target(void *arg)
349 {
350 struct io_target *target = arg;
351
352 spdk_poller_unregister(&target->run_timer, NULL);
353 if (g_reset) {
354 spdk_poller_unregister(&target->reset_timer, NULL);
355 }
356
357 target->is_draining = true;
358 }
359
360 static void reset_target(void *arg);
361
362 static void
363 reset_cb(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status, void *cb_arg)
364 {
365 struct bdevperf_task *task = cb_arg;
366 struct io_target *target = task->target;
367
368 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
369 printf("Reset blockdev=%s failed\n", target->bdev->name);
370 target->is_draining = true;
371 g_run_failed = true;
372 }
373
374 rte_mempool_put(task_pool, task);
375
376 spdk_poller_register(&target->reset_timer, reset_target, target, target->lcore,
377 10 * 1000000);
378 }
379
380 static void
381 reset_target(void *arg)
382 {
383 struct io_target *target = arg;
384 struct bdevperf_task *task = NULL;
385
386 spdk_poller_unregister(&target->reset_timer, NULL);
387
388 /* Do reset. */
389 rte_mempool_get(task_pool, (void **)&task);
390 task->target = target;
391 spdk_bdev_reset(target->bdev, SPDK_BDEV_RESET_SOFT,
392 reset_cb, task);
393 }
394
395 static void
396 bdevperf_submit_on_core(void *arg1, void *arg2)
397 {
398 struct io_target *target = arg1;
399
400 /* Submit initial I/O for each block device. Each time one
401 * completes, another will be submitted. */
402 while (target != NULL) {
403 target->ch = spdk_bdev_get_io_channel(target->bdev, SPDK_IO_PRIORITY_DEFAULT);
404
405 /* Start a timer to stop this I/O chain when the run is over */
406 spdk_poller_register(&target->run_timer, end_target, target, target->lcore,
407 g_time_in_sec * 1000000);
408 if (g_reset) {
409 spdk_poller_register(&target->reset_timer, reset_target, target,
410 target->lcore, 10 * 1000000);
411 }
412 bdevperf_submit_io(target, g_queue_depth);
413 target = target->next;
414 }
415 }
416
417 static void usage(char *program_name)
418 {
419 printf("%s options\n", program_name);
420 printf("\t[-c configuration file]\n");
421 printf("\t[-m core mask for distributing I/O submission/completion work\n");
422 printf("\t\t(default: 0x1 - use core 0 only)]\n");
423 printf("\t[-q io depth]\n");
424 printf("\t[-s io size in bytes]\n");
425 printf("\t[-w io pattern type, must be one of\n");
426 printf("\t\t(read, write, randread, randwrite, rw, randrw, verify, reset)]\n");
427 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
428 printf("\t[-t time in seconds]\n");
429 printf("\t[-S Show performance result in real time]\n");
430 }
431
432 static void
433 performance_dump(int io_time)
434 {
435 uint32_t index;
436 unsigned lcore_id;
437 float io_per_second, mb_per_second;
438 float total_io_per_second, total_mb_per_second;
439 struct io_target *target;
440
441 total_io_per_second = 0;
442 total_mb_per_second = 0;
443 for (index = 0; index < spdk_env_get_core_count(); index++) {
444 target = head[index];
445 if (target != NULL) {
446 lcore_id = target->lcore;
447 printf("\r Logical core: %u\n", lcore_id);
448 }
449 while (target != NULL) {
450 io_per_second = (float)target->io_completed /
451 io_time;
452 mb_per_second = io_per_second * g_io_size /
453 (1024 * 1024);
454 printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n",
455 target->bdev->name, io_per_second,
456 mb_per_second);
457 total_io_per_second += io_per_second;
458 total_mb_per_second += mb_per_second;
459 target = target->next;
460 }
461 }
462
463 printf("\r =====================================================\n");
464 printf("\r %-20s: %10.2f IO/s %10.2f MB/s\n",
465 "Total", total_io_per_second, total_mb_per_second);
466 fflush(stdout);
467
468 }
469
470 static void
471 performance_statistics_thread(void *arg)
472 {
473 performance_dump(1);
474 }
475
476 static void
477 bdevperf_run(void *arg1, void *arg2)
478 {
479 uint32_t i;
480 struct io_target *target;
481 struct spdk_event *event;
482
483 printf("Running I/O for %d seconds...\n", g_time_in_sec);
484 fflush(stdout);
485
486 /* Start a timer to dump performance numbers */
487 if (g_show_performance_real_time) {
488 spdk_poller_register(&g_perf_timer, performance_statistics_thread, NULL,
489 spdk_env_get_current_core(), 1000000);
490 }
491
492 /* Send events to start all I/O */
493 SPDK_ENV_FOREACH_CORE(i) {
494 target = head[i];
495 if (target != NULL) {
496 event = spdk_event_allocate(target->lcore, bdevperf_submit_on_core,
497 target, NULL);
498 spdk_event_call(event);
499 }
500 }
501 }
502
503 int
504 main(int argc, char **argv)
505 {
506 const char *config_file;
507 const char *core_mask;
508 const char *workload_type;
509 int op;
510 bool mix_specified;
511
512 /* default value*/
513 config_file = NULL;
514 g_queue_depth = 0;
515 g_io_size = 0;
516 workload_type = NULL;
517 g_time_in_sec = 0;
518 mix_specified = false;
519 core_mask = NULL;
520
521 while ((op = getopt(argc, argv, "c:m:q:s:t:w:M:S")) != -1) {
522 switch (op) {
523 case 'c':
524 config_file = optarg;
525 break;
526 case 'm':
527 core_mask = optarg;
528 break;
529 case 'q':
530 g_queue_depth = atoi(optarg);
531 break;
532 case 's':
533 g_io_size = atoi(optarg);
534 break;
535 case 't':
536 g_time_in_sec = atoi(optarg);
537 break;
538 case 'w':
539 workload_type = optarg;
540 break;
541 case 'M':
542 g_rw_percentage = atoi(optarg);
543 mix_specified = true;
544 break;
545 case 'S':
546 g_show_performance_real_time = 1;
547 break;
548 default:
549 usage(argv[0]);
550 exit(1);
551 }
552 }
553
554 if (!config_file) {
555 usage(argv[0]);
556 exit(1);
557 }
558 if (g_queue_depth <= 0) {
559 usage(argv[0]);
560 exit(1);
561 }
562 if (g_io_size <= 0) {
563 usage(argv[0]);
564 exit(1);
565 }
566 if (!workload_type) {
567 usage(argv[0]);
568 exit(1);
569 }
570 if (g_time_in_sec <= 0) {
571 usage(argv[0]);
572 exit(1);
573 }
574
575 if (strcmp(workload_type, "read") &&
576 strcmp(workload_type, "write") &&
577 strcmp(workload_type, "randread") &&
578 strcmp(workload_type, "randwrite") &&
579 strcmp(workload_type, "rw") &&
580 strcmp(workload_type, "randrw") &&
581 strcmp(workload_type, "verify") &&
582 strcmp(workload_type, "reset") &&
583 strcmp(workload_type, "unmap")) {
584 fprintf(stderr,
585 "io pattern type must be one of\n"
586 "(read, write, randread, randwrite, rw, randrw, verify, reset, unmap)\n");
587 exit(1);
588 }
589
590 if (!strcmp(workload_type, "read") ||
591 !strcmp(workload_type, "randread")) {
592 g_rw_percentage = 100;
593 }
594
595 if (!strcmp(workload_type, "write") ||
596 !strcmp(workload_type, "randwrite")) {
597 g_rw_percentage = 0;
598 }
599
600 if (!strcmp(workload_type, "verify") ||
601 !strcmp(workload_type, "reset") ||
602 !strcmp(workload_type, "unmap")) {
603 g_rw_percentage = 50;
604 if (g_io_size > SPDK_BDEV_LARGE_RBUF_MAX_SIZE) {
605 fprintf(stderr, "Unable to exceed max I/O size of %d for verify. (%d provided).\n",
606 SPDK_BDEV_LARGE_RBUF_MAX_SIZE, g_io_size);
607 exit(1);
608 }
609 if (core_mask) {
610 fprintf(stderr, "Ignoring -m option. Verify can only run with a single core.\n");
611 core_mask = NULL;
612 }
613 g_verify = true;
614 if (!strcmp(workload_type, "reset")) {
615 g_reset = true;
616 }
617 if (!strcmp(workload_type, "unmap")) {
618 g_unmap = true;
619 }
620 }
621
622 if (!strcmp(workload_type, "read") ||
623 !strcmp(workload_type, "randread") ||
624 !strcmp(workload_type, "write") ||
625 !strcmp(workload_type, "randwrite") ||
626 !strcmp(workload_type, "verify") ||
627 !strcmp(workload_type, "reset") ||
628 !strcmp(workload_type, "unmap")) {
629 if (mix_specified) {
630 fprintf(stderr, "Ignoring -M option... Please use -M option"
631 " only when using rw or randrw.\n");
632 }
633 }
634
635 if (!strcmp(workload_type, "rw") ||
636 !strcmp(workload_type, "randrw")) {
637 if (g_rw_percentage < 0 || g_rw_percentage > 100) {
638 fprintf(stderr,
639 "-M must be specified to value from 0 to 100 "
640 "for rw or randrw.\n");
641 exit(1);
642 }
643 }
644
645 if (!strcmp(workload_type, "read") ||
646 !strcmp(workload_type, "write") ||
647 !strcmp(workload_type, "rw") ||
648 !strcmp(workload_type, "verify") ||
649 !strcmp(workload_type, "reset") ||
650 !strcmp(workload_type, "unmap")) {
651 g_is_random = 0;
652 } else {
653 g_is_random = 1;
654 }
655
656 if (g_io_size > SPDK_BDEV_LARGE_RBUF_MAX_SIZE) {
657 fprintf(stdout, "I/O size of %d is greather than zero copy threshold (%d).\n",
658 g_io_size, SPDK_BDEV_LARGE_RBUF_MAX_SIZE);
659 fprintf(stdout, "Zero copy mechanism will not be used.\n");
660 g_zcopy = false;
661 }
662
663 optind = 1; /*reset the optind */
664
665 rte_set_log_level(RTE_LOG_ERR);
666
667 blockdev_heads_init();
668
669 bdevtest_init(config_file, core_mask);
670
671 bdevperf_construct_targets();
672
673 task_pool = rte_mempool_create("task_pool", 4096 * spdk_env_get_core_count(),
674 sizeof(struct bdevperf_task),
675 64, 0, NULL, NULL, task_ctor, NULL,
676 SOCKET_ID_ANY, 0);
677
678 spdk_app_start(bdevperf_run, NULL, NULL);
679
680 performance_dump(g_time_in_sec);
681 spdk_app_fini();
682 printf("done.\n");
683 return g_run_failed;
684 }