]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/test/lib/nvme/reset/reset.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / test / lib / nvme / reset / reset.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <stdio.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include <rte_config.h>
40 #include <rte_mempool.h>
41 #include <rte_lcore.h>
42
43 #include "spdk/nvme.h"
44 #include "spdk/env.h"
45 #include "spdk/string.h"
46
47 struct ctrlr_entry {
48 struct spdk_nvme_ctrlr *ctrlr;
49 struct ctrlr_entry *next;
50 char name[1024];
51 };
52
53 struct ns_entry {
54 struct spdk_nvme_ns *ns;
55 struct spdk_nvme_ctrlr *ctrlr;
56 struct ns_entry *next;
57 uint32_t io_size_blocks;
58 uint64_t size_in_ios;
59 char name[1024];
60 };
61
62 struct ns_worker_ctx {
63 struct ns_entry *entry;
64 struct spdk_nvme_qpair *qpair;
65 uint64_t io_completed;
66 uint64_t io_completed_error;
67 uint64_t io_submitted;
68 uint64_t current_queue_depth;
69 uint64_t offset_in_ios;
70 bool is_draining;
71
72 struct ns_worker_ctx *next;
73 };
74
75 struct reset_task {
76 struct ns_worker_ctx *ns_ctx;
77 void *buf;
78 };
79
80 struct worker_thread {
81 struct ns_worker_ctx *ns_ctx;
82 unsigned lcore;
83 };
84
85 static struct rte_mempool *task_pool;
86
87 static struct ctrlr_entry *g_controllers = NULL;
88 static struct ns_entry *g_namespaces = NULL;
89 static int g_num_namespaces = 0;
90 static struct worker_thread *g_workers = NULL;
91
92 static uint64_t g_tsc_rate;
93
94 static int g_io_size_bytes;
95 static int g_rw_percentage;
96 static int g_is_random;
97 static int g_queue_depth;
98 static int g_time_in_sec;
99
100 static void
101 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
102 {
103 struct ns_entry *entry;
104 const struct spdk_nvme_ctrlr_data *cdata;
105
106 if (!spdk_nvme_ns_is_active(ns)) {
107 printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns));
108 return;
109 }
110
111 entry = malloc(sizeof(struct ns_entry));
112 if (entry == NULL) {
113 perror("ns_entry malloc");
114 exit(1);
115 }
116
117 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
118
119 entry->ns = ns;
120 entry->ctrlr = ctrlr;
121 entry->size_in_ios = spdk_nvme_ns_get_size(ns) /
122 g_io_size_bytes;
123 entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns);
124
125 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
126
127 g_num_namespaces++;
128 entry->next = g_namespaces;
129 g_namespaces = entry;
130 }
131
132 static void
133 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
134 {
135 int nsid, num_ns;
136 struct spdk_nvme_ns *ns;
137 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry));
138
139 if (entry == NULL) {
140 perror("ctrlr_entry malloc");
141 exit(1);
142 }
143
144 entry->ctrlr = ctrlr;
145 entry->next = g_controllers;
146 g_controllers = entry;
147
148 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
149 for (nsid = 1; nsid <= num_ns; nsid++) {
150 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
151 if (ns == NULL) {
152 continue;
153 }
154 register_ns(ctrlr, ns);
155 }
156 }
157
158 static void task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id)
159 {
160 struct reset_task *task = __task;
161
162 task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL);
163 if (task->buf == NULL) {
164 fprintf(stderr, "task->buf spdk_zmalloc failed\n");
165 exit(1);
166 }
167 }
168
169 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
170
171 static __thread unsigned int seed = 0;
172
173 static void
174 submit_single_io(struct ns_worker_ctx *ns_ctx)
175 {
176 struct reset_task *task = NULL;
177 uint64_t offset_in_ios;
178 int rc;
179 struct ns_entry *entry = ns_ctx->entry;
180
181 if (rte_mempool_get(task_pool, (void **)&task) != 0) {
182 fprintf(stderr, "task_pool rte_mempool_get failed\n");
183 exit(1);
184 }
185
186 task->ns_ctx = ns_ctx;
187 task->ns_ctx->io_submitted++;
188
189 if (g_is_random) {
190 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
191 } else {
192 offset_in_ios = ns_ctx->offset_in_ios++;
193 if (ns_ctx->offset_in_ios == entry->size_in_ios) {
194 ns_ctx->offset_in_ios = 0;
195 }
196 }
197
198 if ((g_rw_percentage == 100) ||
199 (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) {
200 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf,
201 offset_in_ios * entry->io_size_blocks,
202 entry->io_size_blocks, io_complete, task, 0);
203 } else {
204 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf,
205 offset_in_ios * entry->io_size_blocks,
206 entry->io_size_blocks, io_complete, task, 0);
207 }
208
209 if (rc != 0) {
210 fprintf(stderr, "starting I/O failed\n");
211 }
212
213 ns_ctx->current_queue_depth++;
214 }
215
216 static void
217 task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion)
218 {
219 struct ns_worker_ctx *ns_ctx;
220
221 ns_ctx = task->ns_ctx;
222 ns_ctx->current_queue_depth--;
223
224 if (spdk_nvme_cpl_is_error(completion)) {
225 ns_ctx->io_completed_error++;
226 } else {
227 ns_ctx->io_completed++;
228 }
229
230 rte_mempool_put(task_pool, task);
231
232 /*
233 * is_draining indicates when time has expired for the test run
234 * and we are just waiting for the previously submitted I/O
235 * to complete. In this case, do not submit a new I/O to replace
236 * the one just completed.
237 */
238 if (!ns_ctx->is_draining) {
239 submit_single_io(ns_ctx);
240 }
241 }
242
243 static void
244 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
245 {
246 task_complete((struct reset_task *)ctx, completion);
247 }
248
249 static void
250 check_io(struct ns_worker_ctx *ns_ctx)
251 {
252 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0);
253 }
254
255 static void
256 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
257 {
258 while (queue_depth-- > 0) {
259 submit_single_io(ns_ctx);
260 }
261 }
262
263 static void
264 drain_io(struct ns_worker_ctx *ns_ctx)
265 {
266 ns_ctx->is_draining = true;
267 while (ns_ctx->current_queue_depth > 0) {
268 check_io(ns_ctx);
269 }
270 }
271
272 static int
273 work_fn(void *arg)
274 {
275 uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
276 struct worker_thread *worker = (struct worker_thread *)arg;
277 struct ns_worker_ctx *ns_ctx = NULL;
278 bool did_reset = false;
279
280 printf("Starting thread on core %u\n", worker->lcore);
281
282 /* Submit initial I/O for each namespace. */
283 ns_ctx = worker->ns_ctx;
284 while (ns_ctx != NULL) {
285 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, 0);
286 if (ns_ctx->qpair == NULL) {
287 fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore);
288 return -1;
289 }
290 submit_io(ns_ctx, g_queue_depth);
291 ns_ctx = ns_ctx->next;
292 }
293
294 while (1) {
295 /*
296 * Check for completed I/O for each controller. A new
297 * I/O will be submitted in the io_complete callback
298 * to replace each I/O that is completed.
299 */
300 ns_ctx = worker->ns_ctx;
301 while (ns_ctx != NULL) {
302 check_io(ns_ctx);
303 ns_ctx = ns_ctx->next;
304 }
305
306 if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) {
307 ns_ctx = worker->ns_ctx;
308 while (ns_ctx != NULL) {
309 if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) {
310 fprintf(stderr, "nvme reset failed.\n");
311 return -1;
312 }
313 ns_ctx = ns_ctx->next;
314 }
315 did_reset = true;
316 }
317
318 if (spdk_get_ticks() > tsc_end) {
319 break;
320 }
321 }
322
323 ns_ctx = worker->ns_ctx;
324 while (ns_ctx != NULL) {
325 drain_io(ns_ctx);
326 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
327 ns_ctx = ns_ctx->next;
328 }
329
330 return 0;
331 }
332
333 static void usage(char *program_name)
334 {
335 printf("%s options", program_name);
336 printf("\n");
337 printf("\t[-q io depth]\n");
338 printf("\t[-s io size in bytes]\n");
339 printf("\t[-w io pattern type, must be one of\n");
340 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
341 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
342 printf("\t[-t time in seconds(should be larger than 15 seconds)]\n");
343 printf("\t[-m max completions per poll]\n");
344 printf("\t\t(default:0 - unlimited)\n");
345 }
346
347 static int
348 print_stats(void)
349 {
350 uint64_t io_completed, io_submitted, io_completed_error;
351 uint64_t total_completed_io, total_submitted_io, total_completed_err_io;
352 struct worker_thread *worker;
353 struct ns_worker_ctx *ns_ctx;
354
355 total_completed_io = 0;
356 total_submitted_io = 0;
357 total_completed_err_io = 0;
358
359 worker = g_workers;
360 ns_ctx = worker->ns_ctx;
361 while (ns_ctx) {
362 io_completed = ns_ctx->io_completed;
363 io_submitted = ns_ctx->io_submitted;
364 io_completed_error = ns_ctx->io_completed_error;
365 total_completed_io += io_completed;
366 total_submitted_io += io_submitted;
367 total_completed_err_io += io_completed_error;
368 ns_ctx = ns_ctx->next;
369 }
370
371 printf("========================================================\n");
372 printf("%16lu IO completed successfully\n", total_completed_io);
373 printf("%16lu IO completed with error\n", total_completed_err_io);
374 printf("--------------------------------------------------------\n");
375 printf("%16lu IO completed total\n", total_completed_io + total_completed_err_io);
376 printf("%16lu IO submitted\n", total_submitted_io);
377
378 if (total_submitted_io != (total_completed_io + total_completed_err_io)) {
379 fprintf(stderr, "Some IO are missing......\n");
380 return -1;
381 }
382
383 return 0;
384 }
385
386 static int
387 parse_args(int argc, char **argv)
388 {
389 const char *workload_type;
390 int op;
391 bool mix_specified = false;
392
393 /* default value*/
394 g_queue_depth = 0;
395 g_io_size_bytes = 0;
396 workload_type = NULL;
397 g_time_in_sec = 0;
398 g_rw_percentage = -1;
399
400 while ((op = getopt(argc, argv, "m:q:s:t:w:M:")) != -1) {
401 switch (op) {
402 case 'q':
403 g_queue_depth = atoi(optarg);
404 break;
405 case 's':
406 g_io_size_bytes = atoi(optarg);
407 break;
408 case 't':
409 g_time_in_sec = atoi(optarg);
410 break;
411 case 'w':
412 workload_type = optarg;
413 break;
414 case 'M':
415 g_rw_percentage = atoi(optarg);
416 mix_specified = true;
417 break;
418 default:
419 usage(argv[0]);
420 return 1;
421 }
422 }
423
424 if (!g_queue_depth) {
425 usage(argv[0]);
426 return 1;
427 }
428 if (!g_io_size_bytes) {
429 usage(argv[0]);
430 return 1;
431 }
432 if (!workload_type) {
433 usage(argv[0]);
434 return 1;
435 }
436 if (!g_time_in_sec) {
437 usage(argv[0]);
438 return 1;
439 }
440
441 if (strcmp(workload_type, "read") &&
442 strcmp(workload_type, "write") &&
443 strcmp(workload_type, "randread") &&
444 strcmp(workload_type, "randwrite") &&
445 strcmp(workload_type, "rw") &&
446 strcmp(workload_type, "randrw")) {
447 fprintf(stderr,
448 "io pattern type must be one of\n"
449 "(read, write, randread, randwrite, rw, randrw)\n");
450 return 1;
451 }
452
453 if (!strcmp(workload_type, "read") ||
454 !strcmp(workload_type, "randread")) {
455 g_rw_percentage = 100;
456 }
457
458 if (!strcmp(workload_type, "write") ||
459 !strcmp(workload_type, "randwrite")) {
460 g_rw_percentage = 0;
461 }
462
463 if (!strcmp(workload_type, "read") ||
464 !strcmp(workload_type, "randread") ||
465 !strcmp(workload_type, "write") ||
466 !strcmp(workload_type, "randwrite")) {
467 if (mix_specified) {
468 fprintf(stderr, "Ignoring -M option... Please use -M option"
469 " only when using rw or randrw.\n");
470 }
471 }
472
473 if (!strcmp(workload_type, "rw") ||
474 !strcmp(workload_type, "randrw")) {
475 if (g_rw_percentage < 0 || g_rw_percentage > 100) {
476 fprintf(stderr,
477 "-M must be specified to value from 0 to 100 "
478 "for rw or randrw.\n");
479 return 1;
480 }
481 }
482
483 if (!strcmp(workload_type, "read") ||
484 !strcmp(workload_type, "write") ||
485 !strcmp(workload_type, "rw")) {
486 g_is_random = 0;
487 } else {
488 g_is_random = 1;
489 }
490
491 optind = 1;
492 return 0;
493 }
494
495 static int
496 register_workers(void)
497 {
498 struct worker_thread *worker;
499
500 worker = malloc(sizeof(struct worker_thread));
501 if (worker == NULL) {
502 perror("worker_thread malloc");
503 return -1;
504 }
505
506 memset(worker, 0, sizeof(struct worker_thread));
507 worker->lcore = rte_get_master_lcore();
508
509 g_workers = worker;
510
511 return 0;
512 }
513
514
515 static bool
516 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
517 struct spdk_nvme_ctrlr_opts *opts)
518 {
519 return true;
520 }
521
522 static void
523 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
524 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
525 {
526 register_ctrlr(ctrlr);
527 }
528
529 static int
530 register_controllers(void)
531 {
532 printf("Initializing NVMe Controllers\n");
533
534 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
535 fprintf(stderr, "spdk_nvme_probe() failed\n");
536 return 1;
537 }
538
539 return 0;
540 }
541
542 static void
543 unregister_controllers(void)
544 {
545 struct ctrlr_entry *entry = g_controllers;
546
547 while (entry) {
548 struct ctrlr_entry *next = entry->next;
549 spdk_nvme_detach(entry->ctrlr);
550 free(entry);
551 entry = next;
552 }
553 }
554
555 static int
556 associate_workers_with_ns(void)
557 {
558 struct ns_entry *entry = g_namespaces;
559 struct worker_thread *worker = g_workers;
560 struct ns_worker_ctx *ns_ctx;
561 int i, count;
562
563 count = g_num_namespaces;
564
565 for (i = 0; i < count; i++) {
566 if (entry == NULL) {
567 break;
568 }
569 ns_ctx = malloc(sizeof(struct ns_worker_ctx));
570 if (!ns_ctx) {
571 return -1;
572 }
573 memset(ns_ctx, 0, sizeof(*ns_ctx));
574
575 printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
576 ns_ctx->entry = entry;
577 ns_ctx->next = worker->ns_ctx;
578 worker->ns_ctx = ns_ctx;
579
580 worker = g_workers;
581
582 entry = entry->next;
583 if (entry == NULL) {
584 entry = g_namespaces;
585 }
586 }
587
588 return 0;
589 }
590
591 static int
592 run_nvme_reset_cycle(int retry_count)
593 {
594 struct worker_thread *worker;
595 struct ns_worker_ctx *ns_ctx;
596
597 spdk_nvme_retry_count = retry_count;
598
599 if (work_fn(g_workers) != 0) {
600 return -1;
601 }
602
603 if (print_stats() != 0) {
604 return -1;
605 }
606
607 worker = g_workers;
608 ns_ctx = worker->ns_ctx;
609 while (ns_ctx != NULL) {
610 ns_ctx->io_completed = 0;
611 ns_ctx->io_completed_error = 0;
612 ns_ctx->io_submitted = 0;
613 ns_ctx->is_draining = false;
614 ns_ctx = ns_ctx->next;
615 }
616
617 return 0;
618 }
619
620 int main(int argc, char **argv)
621 {
622 int rc;
623 int i;
624 struct spdk_env_opts opts;
625
626 spdk_env_opts_init(&opts);
627 opts.name = "reset";
628 opts.core_mask = "0x1";
629 spdk_env_init(&opts);
630
631 rc = parse_args(argc, argv);
632 if (rc != 0) {
633 return rc;
634 }
635
636 task_pool = rte_mempool_create("task_pool", 8192,
637 sizeof(struct reset_task),
638 64, 0, NULL, NULL, task_ctor, NULL,
639 SOCKET_ID_ANY, 0);
640
641 g_tsc_rate = spdk_get_ticks_hz();
642
643 if (register_workers() != 0) {
644 return 1;
645 }
646
647 if (register_controllers() != 0) {
648 return 1;
649 }
650
651 if (associate_workers_with_ns() != 0) {
652 rc = 1;
653 goto cleanup;
654 }
655
656 printf("Initialization complete. Launching workers.\n");
657
658 for (i = 2; i >= 0; i--) {
659 rc = run_nvme_reset_cycle(i);
660 if (rc != 0) {
661 goto cleanup;
662 }
663 }
664
665 cleanup:
666 unregister_controllers();
667
668 if (rc != 0) {
669 fprintf(stderr, "%s: errors occured\n", argv[0]);
670 }
671
672 return rc;
673 }