ceph/src/spdk/test/lib/nvme/reset/reset.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright (c) Intel Corporation.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include <stdio.h>
  35 #include <stdbool.h>
  36 #include <string.h>
  37 #include <unistd.h>
  38
  39 #include <rte_config.h>
  40 #include <rte_mempool.h>
  41 #include <rte_lcore.h>
  42
  43 #include "spdk/nvme.h"
  44 #include "spdk/env.h"
  45 #include "spdk/string.h"
  46
  47 struct ctrlr_entry {
  48         struct spdk_nvme_ctrlr  *ctrlr;
  49         struct ctrlr_entry      *next;
  50         char                    name[1024];
  51 };
  52
  53 struct ns_entry {
  54         struct spdk_nvme_ns     *ns;
  55         struct spdk_nvme_ctrlr  *ctrlr;
  56         struct ns_entry         *next;
  57         uint32_t                io_size_blocks;
  58         uint64_t                size_in_ios;
  59         char                    name[1024];
  60 };
  61
  62 struct ns_worker_ctx {
  63         struct ns_entry         *entry;
  64         struct spdk_nvme_qpair  *qpair;
  65         uint64_t                io_completed;
  66         uint64_t                io_completed_error;
  67         uint64_t                io_submitted;
  68         uint64_t                current_queue_depth;
  69         uint64_t                offset_in_ios;
  70         bool                    is_draining;
  71
  72         struct ns_worker_ctx    *next;
  73 };
  74
  75 struct reset_task {
  76         struct ns_worker_ctx    *ns_ctx;
  77         void                    *buf;
  78 };
  79
  80 struct worker_thread {
  81         struct ns_worker_ctx    *ns_ctx;
  82         unsigned                lcore;
  83 };
  84
  85 static struct rte_mempool *task_pool;
  86
  87 static struct ctrlr_entry *g_controllers = NULL;
  88 static struct ns_entry *g_namespaces = NULL;
  89 static int g_num_namespaces = 0;
  90 static struct worker_thread *g_workers = NULL;
  91
  92 static uint64_t g_tsc_rate;
  93
  94 static int g_io_size_bytes;
  95 static int g_rw_percentage;
  96 static int g_is_random;
  97 static int g_queue_depth;
  98 static int g_time_in_sec;
  99
 100 static void
 101 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
 102 {
 103         struct ns_entry *entry;
 104         const struct spdk_nvme_ctrlr_data *cdata;
 105
 106         if (!spdk_nvme_ns_is_active(ns)) {
 107                 printf("Skipping inactive NS %u\n", spdk_nvme_ns_get_id(ns));
 108                 return;
 109         }
 110
 111         entry = malloc(sizeof(struct ns_entry));
 112         if (entry == NULL) {
 113                 perror("ns_entry malloc");
 114                 exit(1);
 115         }
 116
 117         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
 118
 119         entry->ns = ns;
 120         entry->ctrlr = ctrlr;
 121         entry->size_in_ios = spdk_nvme_ns_get_size(ns) /
 122                              g_io_size_bytes;
 123         entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns);
 124
 125         snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
 126
 127         g_num_namespaces++;
 128         entry->next = g_namespaces;
 129         g_namespaces = entry;
 130 }
 131
 132 static void
 133 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
 134 {
 135         int nsid, num_ns;
 136         struct spdk_nvme_ns *ns;
 137         struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry));
 138
 139         if (entry == NULL) {
 140                 perror("ctrlr_entry malloc");
 141                 exit(1);
 142         }
 143
 144         entry->ctrlr = ctrlr;
 145         entry->next = g_controllers;
 146         g_controllers = entry;
 147
 148         num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
 149         for (nsid = 1; nsid <= num_ns; nsid++) {
 150                 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
 151                 if (ns == NULL) {
 152                         continue;
 153                 }
 154                 register_ns(ctrlr, ns);
 155         }
 156 }
 157
 158 static void task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id)
 159 {
 160         struct reset_task *task = __task;
 161
 162         task->buf = spdk_zmalloc(g_io_size_bytes, 0x200, NULL);
 163         if (task->buf == NULL) {
 164                 fprintf(stderr, "task->buf spdk_zmalloc failed\n");
 165                 exit(1);
 166         }
 167 }
 168
 169 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
 170
 171 static __thread unsigned int seed = 0;
 172
 173 static void
 174 submit_single_io(struct ns_worker_ctx *ns_ctx)
 175 {
 176         struct reset_task       *task = NULL;
 177         uint64_t                offset_in_ios;
 178         int                     rc;
 179         struct ns_entry         *entry = ns_ctx->entry;
 180
 181         if (rte_mempool_get(task_pool, (void **)&task) != 0) {
 182                 fprintf(stderr, "task_pool rte_mempool_get failed\n");
 183                 exit(1);
 184         }
 185
 186         task->ns_ctx = ns_ctx;
 187         task->ns_ctx->io_submitted++;
 188
 189         if (g_is_random) {
 190                 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
 191         } else {
 192                 offset_in_ios = ns_ctx->offset_in_ios++;
 193                 if (ns_ctx->offset_in_ios == entry->size_in_ios) {
 194                         ns_ctx->offset_in_ios = 0;
 195                 }
 196         }
 197
 198         if ((g_rw_percentage == 100) ||
 199             (g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) {
 200                 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf,
 201                                            offset_in_ios * entry->io_size_blocks,
 202                                            entry->io_size_blocks, io_complete, task, 0);
 203         } else {
 204                 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf,
 205                                             offset_in_ios * entry->io_size_blocks,
 206                                             entry->io_size_blocks, io_complete, task, 0);
 207         }
 208
 209         if (rc != 0) {
 210                 fprintf(stderr, "starting I/O failed\n");
 211         }
 212
 213         ns_ctx->current_queue_depth++;
 214 }
 215
 216 static void
 217 task_complete(struct reset_task *task, const struct spdk_nvme_cpl *completion)
 218 {
 219         struct ns_worker_ctx    *ns_ctx;
 220
 221         ns_ctx = task->ns_ctx;
 222         ns_ctx->current_queue_depth--;
 223
 224         if (spdk_nvme_cpl_is_error(completion)) {
 225                 ns_ctx->io_completed_error++;
 226         } else {
 227                 ns_ctx->io_completed++;
 228         }
 229
 230         rte_mempool_put(task_pool, task);
 231
 232         /*
 233          * is_draining indicates when time has expired for the test run
 234          * and we are just waiting for the previously submitted I/O
 235          * to complete.  In this case, do not submit a new I/O to replace
 236          * the one just completed.
 237          */
 238         if (!ns_ctx->is_draining) {
 239                 submit_single_io(ns_ctx);
 240         }
 241 }
 242
 243 static void
 244 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
 245 {
 246         task_complete((struct reset_task *)ctx, completion);
 247 }
 248
 249 static void
 250 check_io(struct ns_worker_ctx *ns_ctx)
 251 {
 252         spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0);
 253 }
 254
 255 static void
 256 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
 257 {
 258         while (queue_depth-- > 0) {
 259                 submit_single_io(ns_ctx);
 260         }
 261 }
 262
 263 static void
 264 drain_io(struct ns_worker_ctx *ns_ctx)
 265 {
 266         ns_ctx->is_draining = true;
 267         while (ns_ctx->current_queue_depth > 0) {
 268                 check_io(ns_ctx);
 269         }
 270 }
 271
 272 static int
 273 work_fn(void *arg)
 274 {
 275         uint64_t tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
 276         struct worker_thread *worker = (struct worker_thread *)arg;
 277         struct ns_worker_ctx *ns_ctx = NULL;
 278         bool did_reset = false;
 279
 280         printf("Starting thread on core %u\n", worker->lcore);
 281
 282         /* Submit initial I/O for each namespace. */
 283         ns_ctx = worker->ns_ctx;
 284         while (ns_ctx != NULL) {
 285                 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->ctrlr, 0);
 286                 if (ns_ctx->qpair == NULL) {
 287                         fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair() failed on core %u\n", worker->lcore);
 288                         return -1;
 289                 }
 290                 submit_io(ns_ctx, g_queue_depth);
 291                 ns_ctx = ns_ctx->next;
 292         }
 293
 294         while (1) {
 295                 /*
 296                  * Check for completed I/O for each controller. A new
 297                  * I/O will be submitted in the io_complete callback
 298                  * to replace each I/O that is completed.
 299                  */
 300                 ns_ctx = worker->ns_ctx;
 301                 while (ns_ctx != NULL) {
 302                         check_io(ns_ctx);
 303                         ns_ctx = ns_ctx->next;
 304                 }
 305
 306                 if (!did_reset && ((tsc_end - spdk_get_ticks()) / g_tsc_rate) > (uint64_t)g_time_in_sec / 2) {
 307                         ns_ctx = worker->ns_ctx;
 308                         while (ns_ctx != NULL) {
 309                                 if (spdk_nvme_ctrlr_reset(ns_ctx->entry->ctrlr) < 0) {
 310                                         fprintf(stderr, "nvme reset failed.\n");
 311                                         return -1;
 312                                 }
 313                                 ns_ctx = ns_ctx->next;
 314                         }
 315                         did_reset = true;
 316                 }
 317
 318                 if (spdk_get_ticks() > tsc_end) {
 319                         break;
 320                 }
 321         }
 322
 323         ns_ctx = worker->ns_ctx;
 324         while (ns_ctx != NULL) {
 325                 drain_io(ns_ctx);
 326                 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
 327                 ns_ctx = ns_ctx->next;
 328         }
 329
 330         return 0;
 331 }
 332
 333 static void usage(char *program_name)
 334 {
 335         printf("%s options", program_name);
 336         printf("\n");
 337         printf("\t[-q io depth]\n");
 338         printf("\t[-s io size in bytes]\n");
 339         printf("\t[-w io pattern type, must be one of\n");
 340         printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
 341         printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
 342         printf("\t[-t time in seconds(should be larger than 15 seconds)]\n");
 343         printf("\t[-m max completions per poll]\n");
 344         printf("\t\t(default:0 - unlimited)\n");
 345 }
 346
 347 static int
 348 print_stats(void)
 349 {
 350         uint64_t io_completed, io_submitted, io_completed_error;
 351         uint64_t total_completed_io, total_submitted_io, total_completed_err_io;
 352         struct worker_thread    *worker;
 353         struct ns_worker_ctx    *ns_ctx;
 354
 355         total_completed_io = 0;
 356         total_submitted_io = 0;
 357         total_completed_err_io = 0;
 358
 359         worker = g_workers;
 360         ns_ctx = worker->ns_ctx;
 361         while (ns_ctx) {
 362                 io_completed = ns_ctx->io_completed;
 363                 io_submitted = ns_ctx->io_submitted;
 364                 io_completed_error = ns_ctx->io_completed_error;
 365                 total_completed_io += io_completed;
 366                 total_submitted_io += io_submitted;
 367                 total_completed_err_io += io_completed_error;
 368                 ns_ctx = ns_ctx->next;
 369         }
 370
 371         printf("========================================================\n");
 372         printf("%16lu IO completed successfully\n", total_completed_io);
 373         printf("%16lu IO completed with error\n", total_completed_err_io);
 374         printf("--------------------------------------------------------\n");
 375         printf("%16lu IO completed total\n", total_completed_io + total_completed_err_io);
 376         printf("%16lu IO submitted\n", total_submitted_io);
 377
 378         if (total_submitted_io != (total_completed_io + total_completed_err_io)) {
 379                 fprintf(stderr, "Some IO are missing......\n");
 380                 return -1;
 381         }
 382
 383         return 0;
 384 }
 385
 386 static int
 387 parse_args(int argc, char **argv)
 388 {
 389         const char *workload_type;
 390         int op;
 391         bool mix_specified = false;
 392
 393         /* default value*/
 394         g_queue_depth = 0;
 395         g_io_size_bytes = 0;
 396         workload_type = NULL;
 397         g_time_in_sec = 0;
 398         g_rw_percentage = -1;
 399
 400         while ((op = getopt(argc, argv, "m:q:s:t:w:M:")) != -1) {
 401                 switch (op) {
 402                 case 'q':
 403                         g_queue_depth = atoi(optarg);
 404                         break;
 405                 case 's':
 406                         g_io_size_bytes = atoi(optarg);
 407                         break;
 408                 case 't':
 409                         g_time_in_sec = atoi(optarg);
 410                         break;
 411                 case 'w':
 412                         workload_type = optarg;
 413                         break;
 414                 case 'M':
 415                         g_rw_percentage = atoi(optarg);
 416                         mix_specified = true;
 417                         break;
 418                 default:
 419                         usage(argv[0]);
 420                         return 1;
 421                 }
 422         }
 423
 424         if (!g_queue_depth) {
 425                 usage(argv[0]);
 426                 return 1;
 427         }
 428         if (!g_io_size_bytes) {
 429                 usage(argv[0]);
 430                 return 1;
 431         }
 432         if (!workload_type) {
 433                 usage(argv[0]);
 434                 return 1;
 435         }
 436         if (!g_time_in_sec) {
 437                 usage(argv[0]);
 438                 return 1;
 439         }
 440
 441         if (strcmp(workload_type, "read") &&
 442             strcmp(workload_type, "write") &&
 443             strcmp(workload_type, "randread") &&
 444             strcmp(workload_type, "randwrite") &&
 445             strcmp(workload_type, "rw") &&
 446             strcmp(workload_type, "randrw")) {
 447                 fprintf(stderr,
 448                         "io pattern type must be one of\n"
 449                         "(read, write, randread, randwrite, rw, randrw)\n");
 450                 return 1;
 451         }
 452
 453         if (!strcmp(workload_type, "read") ||
 454             !strcmp(workload_type, "randread")) {
 455                 g_rw_percentage = 100;
 456         }
 457
 458         if (!strcmp(workload_type, "write") ||
 459             !strcmp(workload_type, "randwrite")) {
 460                 g_rw_percentage = 0;
 461         }
 462
 463         if (!strcmp(workload_type, "read") ||
 464             !strcmp(workload_type, "randread") ||
 465             !strcmp(workload_type, "write") ||
 466             !strcmp(workload_type, "randwrite")) {
 467                 if (mix_specified) {
 468                         fprintf(stderr, "Ignoring -M option... Please use -M option"
 469                                 " only when using rw or randrw.\n");
 470                 }
 471         }
 472
 473         if (!strcmp(workload_type, "rw") ||
 474             !strcmp(workload_type, "randrw")) {
 475                 if (g_rw_percentage < 0 || g_rw_percentage > 100) {
 476                         fprintf(stderr,
 477                                 "-M must be specified to value from 0 to 100 "
 478                                 "for rw or randrw.\n");
 479                         return 1;
 480                 }
 481         }
 482
 483         if (!strcmp(workload_type, "read") ||
 484             !strcmp(workload_type, "write") ||
 485             !strcmp(workload_type, "rw")) {
 486                 g_is_random = 0;
 487         } else {
 488                 g_is_random = 1;
 489         }
 490
 491         optind = 1;
 492         return 0;
 493 }
 494
 495 static int
 496 register_workers(void)
 497 {
 498         struct worker_thread *worker;
 499
 500         worker = malloc(sizeof(struct worker_thread));
 501         if (worker == NULL) {
 502                 perror("worker_thread malloc");
 503                 return -1;
 504         }
 505
 506         memset(worker, 0, sizeof(struct worker_thread));
 507         worker->lcore = rte_get_master_lcore();
 508
 509         g_workers = worker;
 510
 511         return 0;
 512 }
 513
 514
 515 static bool
 516 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
 517          struct spdk_nvme_ctrlr_opts *opts)
 518 {
 519         return true;
 520 }
 521
 522 static void
 523 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
 524           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
 525 {
 526         register_ctrlr(ctrlr);
 527 }
 528
 529 static int
 530 register_controllers(void)
 531 {
 532         printf("Initializing NVMe Controllers\n");
 533
 534         if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
 535                 fprintf(stderr, "spdk_nvme_probe() failed\n");
 536                 return 1;
 537         }
 538
 539         return 0;
 540 }
 541
 542 static void
 543 unregister_controllers(void)
 544 {
 545         struct ctrlr_entry *entry = g_controllers;
 546
 547         while (entry) {
 548                 struct ctrlr_entry *next = entry->next;
 549                 spdk_nvme_detach(entry->ctrlr);
 550                 free(entry);
 551                 entry = next;
 552         }
 553 }
 554
 555 static int
 556 associate_workers_with_ns(void)
 557 {
 558         struct ns_entry         *entry = g_namespaces;
 559         struct worker_thread    *worker = g_workers;
 560         struct ns_worker_ctx    *ns_ctx;
 561         int                     i, count;
 562
 563         count = g_num_namespaces;
 564
 565         for (i = 0; i < count; i++) {
 566                 if (entry == NULL) {
 567                         break;
 568                 }
 569                 ns_ctx = malloc(sizeof(struct ns_worker_ctx));
 570                 if (!ns_ctx) {
 571                         return -1;
 572                 }
 573                 memset(ns_ctx, 0, sizeof(*ns_ctx));
 574
 575                 printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
 576                 ns_ctx->entry = entry;
 577                 ns_ctx->next = worker->ns_ctx;
 578                 worker->ns_ctx = ns_ctx;
 579
 580                 worker = g_workers;
 581
 582                 entry = entry->next;
 583                 if (entry == NULL) {
 584                         entry = g_namespaces;
 585                 }
 586         }
 587
 588         return 0;
 589 }
 590
 591 static int
 592 run_nvme_reset_cycle(int retry_count)
 593 {
 594         struct worker_thread *worker;
 595         struct ns_worker_ctx *ns_ctx;
 596
 597         spdk_nvme_retry_count = retry_count;
 598
 599         if (work_fn(g_workers) != 0) {
 600                 return -1;
 601         }
 602
 603         if (print_stats() != 0) {
 604                 return -1;
 605         }
 606
 607         worker = g_workers;
 608         ns_ctx = worker->ns_ctx;
 609         while (ns_ctx != NULL) {
 610                 ns_ctx->io_completed = 0;
 611                 ns_ctx->io_completed_error = 0;
 612                 ns_ctx->io_submitted = 0;
 613                 ns_ctx->is_draining = false;
 614                 ns_ctx = ns_ctx->next;
 615         }
 616
 617         return 0;
 618 }
 619
 620 int main(int argc, char **argv)
 621 {
 622         int                     rc;
 623         int                     i;
 624         struct spdk_env_opts    opts;
 625
 626         spdk_env_opts_init(&opts);
 627         opts.name = "reset";
 628         opts.core_mask = "0x1";
 629         spdk_env_init(&opts);
 630
 631         rc = parse_args(argc, argv);
 632         if (rc != 0) {
 633                 return rc;
 634         }
 635
 636         task_pool = rte_mempool_create("task_pool", 8192,
 637                                        sizeof(struct reset_task),
 638                                        64, 0, NULL, NULL, task_ctor, NULL,
 639                                        SOCKET_ID_ANY, 0);
 640
 641         g_tsc_rate = spdk_get_ticks_hz();
 642
 643         if (register_workers() != 0) {
 644                 return 1;
 645         }
 646
 647         if (register_controllers() != 0) {
 648                 return 1;
 649         }
 650
 651         if (associate_workers_with_ns() != 0) {
 652                 rc = 1;
 653                 goto cleanup;
 654         }
 655
 656         printf("Initialization complete. Launching workers.\n");
 657
 658         for (i = 2; i >= 0; i--) {
 659                 rc = run_nvme_reset_cycle(i);
 660                 if (rc != 0) {
 661                         goto cleanup;
 662                 }
 663         }
 664
 665 cleanup:
 666         unregister_controllers();
 667
 668         if (rc != 0) {
 669                 fprintf(stderr, "%s: errors occured\n", argv[0]);
 670         }
 671
 672         return rc;
 673 }