ceph/src/spdk/examples/nvme/abort/abort.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright (c) Intel Corporation.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include "spdk/stdinc.h"
  35
  36 #include "spdk/env.h"
  37 #include "spdk/log.h"
  38 #include "spdk/nvme.h"
  39 #include "spdk/queue.h"
  40 #include "spdk/string.h"
  41 #include "spdk/util.h"
  42 #include "spdk/likely.h"
  43
  44 struct ctrlr_entry {
  45         struct spdk_nvme_ctrlr          *ctrlr;
  46         enum spdk_nvme_transport_type   trtype;
  47
  48         struct ctrlr_entry              *next;
  49         char                            name[1024];
  50 };
  51
  52 struct ns_entry {
  53         struct spdk_nvme_ctrlr          *ctrlr;
  54         struct spdk_nvme_ns             *ns;
  55
  56         struct ns_entry                 *next;
  57         uint32_t                        io_size_blocks;
  58         uint32_t                        num_io_requests;
  59         uint64_t                        size_in_ios;
  60         uint32_t                        block_size;
  61         char                            name[1024];
  62 };
  63
  64 struct ctrlr_worker_ctx {
  65         pthread_mutex_t                 mutex;
  66         struct ctrlr_entry              *entry;
  67         uint64_t                        abort_submitted;
  68         uint64_t                        abort_submit_failed;
  69         uint64_t                        successful_abort;
  70         uint64_t                        unsuccessful_abort;
  71         uint64_t                        abort_failed;
  72         uint64_t                        current_queue_depth;
  73         struct spdk_nvme_ctrlr          *ctrlr;
  74         struct ctrlr_worker_ctx         *next;
  75 };
  76
  77 struct ns_worker_ctx {
  78         struct ns_entry                 *entry;
  79         uint64_t                        io_submitted;
  80         uint64_t                        io_completed;
  81         uint64_t                        io_aborted;
  82         uint64_t                        io_failed;
  83         uint64_t                        current_queue_depth;
  84         uint64_t                        offset_in_ios;
  85         bool                            is_draining;
  86         struct spdk_nvme_qpair          *qpair;
  87         struct ctrlr_worker_ctx         *ctrlr_ctx;
  88         struct ns_worker_ctx            *next;
  89 };
  90
  91 struct perf_task {
  92         struct ns_worker_ctx            *ns_ctx;
  93         void                            *buf;
  94 };
  95
  96 struct worker_thread {
  97         struct ns_worker_ctx            *ns_ctx;
  98         struct ctrlr_worker_ctx         *ctrlr_ctx;
  99         struct worker_thread            *next;
 100         unsigned                        lcore;
 101 };
 102
 103 static const char *g_workload_type = "read";
 104 static struct ctrlr_entry *g_controllers;
 105 static struct ns_entry *g_namespaces;
 106 static int g_num_namespaces;
 107 static struct worker_thread *g_workers;
 108 static int g_num_workers;
 109 static uint32_t g_master_core;
 110
 111 static int g_abort_interval = 1;
 112
 113 static uint64_t g_tsc_rate;
 114
 115 static uint32_t g_io_size_bytes = 131072;
 116 static uint32_t g_max_io_size_blocks;
 117 static int g_rw_percentage = -1;
 118 static int g_is_random;
 119 static int g_queue_depth = 128;
 120 static int g_time_in_sec = 3;
 121 static int g_dpdk_mem;
 122 static int g_shm_id = -1;
 123 static bool g_no_pci;
 124 static bool g_warn;
 125 static bool g_mix_specified;
 126
 127 static const char *g_core_mask;
 128
 129 struct trid_entry {
 130         struct spdk_nvme_transport_id   trid;
 131         uint16_t                        nsid;
 132         TAILQ_ENTRY(trid_entry)         tailq;
 133 };
 134
 135 static TAILQ_HEAD(, trid_entry) g_trid_list = TAILQ_HEAD_INITIALIZER(g_trid_list);
 136
 137 static void io_complete(void *ctx, const struct spdk_nvme_cpl *cpl);
 138
 139 static int
 140 build_nvme_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr)
 141 {
 142         const struct spdk_nvme_transport_id *trid;
 143         int res = 0;
 144
 145         trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
 146
 147         switch (trid->trtype) {
 148         case SPDK_NVME_TRANSPORT_PCIE:
 149                 res = snprintf(name, length, "PCIE (%s)", trid->traddr);
 150                 break;
 151         case SPDK_NVME_TRANSPORT_RDMA:
 152                 res = snprintf(name, length, "RDMA (addr:%s subnqn:%s)", trid->traddr, trid->subnqn);
 153                 break;
 154         case SPDK_NVME_TRANSPORT_TCP:
 155                 res = snprintf(name, length, "TCP  (addr:%s subnqn:%s)", trid->traddr, trid->subnqn);
 156                 break;
 157
 158         default:
 159                 fprintf(stderr, "Unknown transport type %d\n", trid->trtype);
 160                 break;
 161         }
 162         return res;
 163 }
 164
 165 static void
 166 build_nvme_ns_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
 167 {
 168         int res = 0;
 169
 170         res = build_nvme_name(name, length, ctrlr);
 171         if (res > 0) {
 172                 snprintf(name + res, length - res, " NSID %u", nsid);
 173         }
 174
 175 }
 176
 177 static void
 178 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
 179 {
 180         struct ns_entry *entry;
 181         const struct spdk_nvme_ctrlr_data *cdata;
 182         uint32_t max_xfer_size, entries, sector_size;
 183         uint64_t ns_size;
 184         struct spdk_nvme_io_qpair_opts opts;
 185
 186         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
 187
 188         if (!spdk_nvme_ns_is_active(ns)) {
 189                 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
 190                        cdata->mn, cdata->sn,
 191                        spdk_nvme_ns_get_id(ns));
 192                 g_warn = true;
 193                 return;
 194         }
 195
 196         ns_size = spdk_nvme_ns_get_size(ns);
 197         sector_size = spdk_nvme_ns_get_sector_size(ns);
 198
 199         if (ns_size < g_io_size_bytes || sector_size > g_io_size_bytes) {
 200                 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
 201                        "ns size %" PRIu64 " / block size %u for I/O size %u\n",
 202                        cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns),
 203                        ns_size, spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes);
 204                 g_warn = true;
 205                 return;
 206         }
 207
 208         max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
 209         spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
 210         /* NVMe driver may add additional entries based on
 211          * stripe size and maximum transfer size, we assume
 212          * 1 more entry be used for stripe.
 213          */
 214         entries = (g_io_size_bytes - 1) / max_xfer_size + 2;
 215         if ((g_queue_depth * entries) > opts.io_queue_size) {
 216                 printf("controller IO queue size %u less than required\n",
 217                        opts.io_queue_size);
 218                 printf("Consider using lower queue depth or small IO size because "
 219                        "IO requests may be queued at the NVMe driver.\n");
 220         }
 221         /* For requests which have children requests, parent request itself
 222          * will also occupy 1 entry.
 223          */
 224         entries += 1;
 225
 226         entry = calloc(1, sizeof(struct ns_entry));
 227         if (entry == NULL) {
 228                 perror("ns_entry malloc");
 229                 exit(1);
 230         }
 231
 232         entry->ctrlr = ctrlr;
 233         entry->ns = ns;
 234         entry->num_io_requests = g_queue_depth * entries;
 235
 236         entry->size_in_ios = ns_size / g_io_size_bytes;
 237         entry->io_size_blocks = g_io_size_bytes / sector_size;
 238
 239         entry->block_size = spdk_nvme_ns_get_sector_size(ns);
 240
 241         if (g_max_io_size_blocks < entry->io_size_blocks) {
 242                 g_max_io_size_blocks = entry->io_size_blocks;
 243         }
 244
 245         build_nvme_ns_name(entry->name, sizeof(entry->name), ctrlr, spdk_nvme_ns_get_id(ns));
 246
 247         g_num_namespaces++;
 248         entry->next = g_namespaces;
 249         g_namespaces = entry;
 250 }
 251
 252 static void
 253 unregister_namespaces(void)
 254 {
 255         struct ns_entry *entry = g_namespaces;
 256
 257         while (entry) {
 258                 struct ns_entry *next = entry->next;
 259                 free(entry);
 260                 entry = next;
 261         }
 262 }
 263
 264 static void
 265 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry)
 266 {
 267         struct spdk_nvme_ns *ns;
 268         struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry));
 269         uint32_t nsid;
 270
 271         if (entry == NULL) {
 272                 perror("ctrlr_entry malloc");
 273                 exit(1);
 274         }
 275
 276         build_nvme_name(entry->name, sizeof(entry->name), ctrlr);
 277
 278         entry->ctrlr = ctrlr;
 279         entry->trtype = trid_entry->trid.trtype;
 280         entry->next = g_controllers;
 281         g_controllers = entry;
 282
 283         if (trid_entry->nsid == 0) {
 284                 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
 285                      nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
 286                         ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
 287                         if (ns == NULL) {
 288                                 continue;
 289                         }
 290                         register_ns(ctrlr, ns);
 291                 }
 292         } else {
 293                 ns = spdk_nvme_ctrlr_get_ns(ctrlr, trid_entry->nsid);
 294                 if (!ns) {
 295                         perror("Namespace does not exist.");
 296                         exit(1);
 297                 }
 298
 299                 register_ns(ctrlr, ns);
 300         }
 301 }
 302
 303 static void
 304 abort_complete(void *ctx, const struct spdk_nvme_cpl *cpl)
 305 {
 306         struct ctrlr_worker_ctx *ctrlr_ctx = ctx;
 307
 308         ctrlr_ctx->current_queue_depth--;
 309         if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) {
 310                 ctrlr_ctx->abort_failed++;
 311         } else if ((cpl->cdw0 & 0x1) == 0) {
 312                 ctrlr_ctx->successful_abort++;
 313         } else {
 314                 ctrlr_ctx->unsuccessful_abort++;
 315         }
 316 }
 317
 318 static void
 319 abort_task(struct perf_task *task)
 320 {
 321         struct ns_worker_ctx    *ns_ctx = task->ns_ctx;
 322         struct ctrlr_worker_ctx *ctrlr_ctx = ns_ctx->ctrlr_ctx;
 323         int                     rc;
 324
 325         /* Hold mutex to guard ctrlr_ctx->current_queue_depth. */
 326         pthread_mutex_lock(&ctrlr_ctx->mutex);
 327
 328         rc = spdk_nvme_ctrlr_cmd_abort_ext(ctrlr_ctx->ctrlr, ns_ctx->qpair, task, abort_complete,
 329                                            ctrlr_ctx);
 330
 331         if (spdk_unlikely(rc != 0)) {
 332                 ctrlr_ctx->abort_submit_failed++;
 333         } else {
 334                 ctrlr_ctx->current_queue_depth++;
 335                 ctrlr_ctx->abort_submitted++;
 336         }
 337
 338         pthread_mutex_unlock(&ctrlr_ctx->mutex);
 339 }
 340
 341 static __thread unsigned int seed = 0;
 342
 343 static inline void
 344 submit_single_io(struct perf_task *task)
 345 {
 346         uint64_t                offset_in_ios, lba;
 347         int                     rc;
 348         struct ns_worker_ctx    *ns_ctx = task->ns_ctx;
 349         struct ns_entry         *entry = ns_ctx->entry;
 350
 351         if (g_is_random) {
 352                 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
 353         } else {
 354                 offset_in_ios = ns_ctx->offset_in_ios++;
 355                 if (ns_ctx->offset_in_ios == entry->size_in_ios) {
 356                         ns_ctx->offset_in_ios = 0;
 357                 }
 358         }
 359
 360         lba = offset_in_ios * entry->io_size_blocks;
 361
 362         if ((g_rw_percentage == 100) ||
 363             (g_rw_percentage != 0 && (rand_r(&seed) % 100) < g_rw_percentage)) {
 364                 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf,
 365                                            lba, entry->io_size_blocks, io_complete, task, 0);
 366         } else {
 367                 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf,
 368                                             lba, entry->io_size_blocks, io_complete, task, 0);
 369         }
 370
 371         if (spdk_unlikely(rc != 0)) {
 372                 fprintf(stderr, "I/O submission failed\n");
 373         } else {
 374                 ns_ctx->current_queue_depth++;
 375                 ns_ctx->io_submitted++;
 376
 377                 if ((ns_ctx->io_submitted % g_abort_interval) == 0) {
 378                         abort_task(task);
 379                 }
 380         }
 381
 382 }
 383
 384 static void
 385 io_complete(void *ctx, const struct spdk_nvme_cpl *cpl)
 386 {
 387         struct perf_task        *task = ctx;
 388         struct ns_worker_ctx    *ns_ctx = task->ns_ctx;
 389
 390         ns_ctx->current_queue_depth--;
 391         if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) {
 392                 ns_ctx->io_failed++;
 393         } else {
 394                 ns_ctx->io_completed++;
 395         }
 396
 397         /* is_draining indicates when time has expired for the test run and we are
 398          * just waiting for the previously submitted I/O to complete. In this case,
 399          * do not submit a new I/O to replace the one just completed.
 400          */
 401         if (spdk_unlikely(ns_ctx->is_draining)) {
 402                 spdk_dma_free(task->buf);
 403                 free(task);
 404         } else {
 405                 submit_single_io(task);
 406         }
 407 }
 408
 409 static struct perf_task *
 410 allocate_task(struct ns_worker_ctx *ns_ctx)
 411 {
 412         struct perf_task *task;
 413
 414         task = calloc(1, sizeof(*task));
 415         if (task == NULL) {
 416                 fprintf(stderr, "Failed to allocate task\n");
 417                 exit(1);
 418         }
 419
 420         task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL);
 421         if (task->buf == NULL) {
 422                 free(task);
 423                 fprintf(stderr, "Failed to allocate task->buf\n");
 424                 exit(1);
 425         }
 426
 427         task->ns_ctx = ns_ctx;
 428
 429         return task;
 430 }
 431
 432 static void
 433 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
 434 {
 435         struct perf_task *task;
 436
 437         while (queue_depth-- > 0) {
 438                 task = allocate_task(ns_ctx);
 439                 submit_single_io(task);
 440         }
 441 }
 442
 443 static int
 444 work_fn(void *arg)
 445 {
 446         struct worker_thread *worker = (struct worker_thread *)arg;
 447         struct ns_worker_ctx *ns_ctx;
 448         struct ctrlr_worker_ctx *ctrlr_ctx;
 449         struct ns_entry *ns_entry;
 450         struct spdk_nvme_io_qpair_opts opts;
 451         uint64_t tsc_end;
 452         uint32_t unfinished_ctx;
 453
 454         /* Allocate queue pair for each namespace. */
 455         ns_ctx = worker->ns_ctx;
 456         while (ns_ctx != NULL) {
 457                 ns_entry = ns_ctx->entry;
 458
 459                 spdk_nvme_ctrlr_get_default_io_qpair_opts(ns_entry->ctrlr, &opts, sizeof(opts));
 460                 if (opts.io_queue_requests < ns_entry->num_io_requests) {
 461                         opts.io_queue_requests = ns_entry->num_io_requests;
 462                 }
 463
 464                 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, &opts, sizeof(opts));
 465                 if (ns_ctx->qpair == NULL) {
 466                         fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair failed\n");
 467                         return 1;
 468                 }
 469
 470                 ns_ctx = ns_ctx->next;
 471         }
 472
 473         tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
 474
 475         /* Submit initial I/O for each namespace. */
 476         ns_ctx = worker->ns_ctx;
 477         while (ns_ctx != NULL) {
 478                 submit_io(ns_ctx, g_queue_depth);
 479                 ns_ctx = ns_ctx->next;
 480         }
 481
 482         while (1) {
 483                 ns_ctx = worker->ns_ctx;
 484                 while (ns_ctx != NULL) {
 485                         spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0);
 486                         ns_ctx = ns_ctx->next;
 487                 }
 488
 489                 if (worker->lcore == g_master_core) {
 490                         ctrlr_ctx = worker->ctrlr_ctx;
 491                         while (ctrlr_ctx) {
 492                                 /* Hold mutex to guard ctrlr_ctx->current_queue_depth. */
 493                                 pthread_mutex_lock(&ctrlr_ctx->mutex);
 494                                 spdk_nvme_ctrlr_process_admin_completions(ctrlr_ctx->ctrlr);
 495                                 pthread_mutex_unlock(&ctrlr_ctx->mutex);
 496                                 ctrlr_ctx = ctrlr_ctx->next;
 497                         }
 498                 }
 499
 500                 if (spdk_get_ticks() > tsc_end) {
 501                         break;
 502                 }
 503         }
 504
 505         do {
 506                 unfinished_ctx = 0;
 507
 508                 ns_ctx = worker->ns_ctx;
 509                 while (ns_ctx != NULL) {
 510                         if (!ns_ctx->is_draining) {
 511                                 ns_ctx->is_draining = true;
 512                         }
 513                         if (ns_ctx->current_queue_depth > 0) {
 514                                 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0);
 515                                 if (ns_ctx->current_queue_depth == 0) {
 516                                         spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
 517                                 } else {
 518                                         unfinished_ctx++;
 519                                 }
 520                         }
 521                         ns_ctx = ns_ctx->next;
 522                 }
 523         } while (unfinished_ctx > 0);
 524
 525         if (worker->lcore == g_master_core) {
 526                 do {
 527                         unfinished_ctx = 0;
 528
 529                         ctrlr_ctx = worker->ctrlr_ctx;
 530                         while (ctrlr_ctx != NULL) {
 531                                 pthread_mutex_lock(&ctrlr_ctx->mutex);
 532                                 if (ctrlr_ctx->current_queue_depth > 0) {
 533                                         spdk_nvme_ctrlr_process_admin_completions(ctrlr_ctx->ctrlr);
 534                                         if (ctrlr_ctx->current_queue_depth > 0) {
 535                                                 unfinished_ctx++;
 536                                         }
 537                                 }
 538                                 pthread_mutex_unlock(&ctrlr_ctx->mutex);
 539                                 ctrlr_ctx = ctrlr_ctx->next;
 540                         }
 541                 } while (unfinished_ctx > 0);
 542         }
 543
 544         return 0;
 545 }
 546
 547 static void
 548 usage(char *program_name)
 549 {
 550         printf("%s options", program_name);
 551
 552         printf("\n");
 553         printf("\t[-q io depth]\n");
 554         printf("\t[-o io size in bytes]\n");
 555         printf("\t[-w io pattern type, must be one of\n");
 556         printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
 557         printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
 558         printf("\t[-t time in seconds]\n");
 559         printf("\t[-c core mask for I/O submission/completion.]\n");
 560         printf("\t\t(default: 1)\n");
 561         printf("\t[-r Transport ID for local PCIe NVMe or NVMeoF]\n");
 562         printf("\t Format: 'key:value [key:value] ...'\n");
 563         printf("\t Keys:\n");
 564         printf("\t  trtype      Transport type (e.g. PCIe, RDMA)\n");
 565         printf("\t  adrfam      Address family (e.g. IPv4, IPv6)\n");
 566         printf("\t  traddr      Transport address (e.g. 0000:04:00.0 for PCIe or 192.168.100.8 for RDMA)\n");
 567         printf("\t  trsvcid     Transport service identifier (e.g. 4420)\n");
 568         printf("\t  subnqn      Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
 569         printf("\t Example: -r 'trtype:PCIe traddr:0000:04:00.0' for PCIe or\n");
 570         printf("\t          -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n");
 571         printf("\t[-s DPDK huge memory size in MB.]\n");
 572         printf("\t[-i shared memory group ID]\n");
 573         printf("\t[-a abort interval.]\n");
 574         printf("\t");
 575         spdk_log_usage(stdout, "-T");
 576 #ifdef DEBUG
 577         printf("\t[-G enable debug logging]\n");
 578 #else
 579         printf("\t[-G enable debug logging (flag disabled, must reconfigure with --enable-debug)\n");
 580 #endif
 581 }
 582
 583 static void
 584 unregister_trids(void)
 585 {
 586         struct trid_entry *trid_entry, *tmp;
 587
 588         TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, tmp) {
 589                 TAILQ_REMOVE(&g_trid_list, trid_entry, tailq);
 590                 free(trid_entry);
 591         }
 592 }
 593
 594 static int
 595 add_trid(const char *trid_str)
 596 {
 597         struct trid_entry *trid_entry;
 598         struct spdk_nvme_transport_id *trid;
 599         char *ns;
 600
 601         trid_entry = calloc(1, sizeof(*trid_entry));
 602         if (trid_entry == NULL) {
 603                 return -1;
 604         }
 605
 606         trid = &trid_entry->trid;
 607         trid->trtype = SPDK_NVME_TRANSPORT_PCIE;
 608         snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
 609
 610         if (spdk_nvme_transport_id_parse(trid, trid_str) != 0) {
 611                 fprintf(stderr, "Invalid transport ID format '%s'\n", trid_str);
 612                 free(trid_entry);
 613                 return 1;
 614         }
 615
 616         spdk_nvme_transport_id_populate_trstring(trid,
 617                         spdk_nvme_transport_id_trtype_str(trid->trtype));
 618
 619         ns = strcasestr(trid_str, "ns:");
 620         if (ns) {
 621                 char nsid_str[6]; /* 5 digits maximum in an nsid */
 622                 int len;
 623                 int nsid;
 624
 625                 ns += 3;
 626
 627                 len = strcspn(ns, " \t\n");
 628                 if (len > 5) {
 629                         fprintf(stderr, "NVMe namespace IDs must be 5 digits or less\n");
 630                         free(trid_entry);
 631                         return 1;
 632                 }
 633
 634                 memcpy(nsid_str, ns, len);
 635                 nsid_str[len] = '\0';
 636
 637                 nsid = spdk_strtol(nsid_str, 10);
 638                 if (nsid <= 0 || nsid > 65535) {
 639                         fprintf(stderr, "NVMe namespace IDs must be less than 65536 and greater than 0\n");
 640                         free(trid_entry);
 641                         return 1;
 642                 }
 643
 644                 trid_entry->nsid = (uint16_t)nsid;
 645         }
 646
 647         TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq);
 648         return 0;
 649 }
 650
 651 static int
 652 parse_args(int argc, char **argv)
 653 {
 654         int op;
 655         long int val;
 656         int rc;
 657
 658         while ((op = getopt(argc, argv, "a:c:i:o:q:r:s:t:w:M:")) != -1) {
 659                 switch (op) {
 660                 case 'a':
 661                 case 'i':
 662                 case 'o':
 663                 case 'q':
 664                 case 's':
 665                 case 't':
 666                 case 'M':
 667                         val = spdk_strtol(optarg, 10);
 668                         if (val < 0) {
 669                                 fprintf(stderr, "Converting a string to integer failed\n");
 670                                 return val;
 671                         }
 672                         switch (op) {
 673                         case 'a':
 674                                 g_abort_interval = val;
 675                                 break;
 676                         case 'i':
 677                                 g_shm_id = val;
 678                                 break;
 679                         case 'o':
 680                                 g_io_size_bytes = val;
 681                                 break;
 682                         case 'q':
 683                                 g_queue_depth = val;
 684                                 break;
 685                         case 's':
 686                                 g_dpdk_mem = val;
 687                                 break;
 688                         case 't':
 689                                 g_time_in_sec = val;
 690                                 break;
 691                         case 'M':
 692                                 g_rw_percentage = val;
 693                                 g_mix_specified = true;
 694                                 break;
 695                         }
 696                         break;
 697                 case 'c':
 698                         g_core_mask = optarg;
 699                         break;
 700                 case 'r':
 701                         if (add_trid(optarg)) {
 702                                 usage(argv[0]);
 703                                 return 1;
 704                         }
 705                         break;
 706                 case 'w':
 707                         g_workload_type = optarg;
 708                         break;
 709                 case 'G':
 710 #ifndef DEBUG
 711                         fprintf(stderr, "%s must be configured with --enable-debug for -G flag\n",
 712                                 argv[0]);
 713                         usage(argv[0]);
 714                         return 1;
 715 #else
 716                         spdk_log_set_flag("nvme");
 717                         spdk_log_set_print_level(SPDK_LOG_DEBUG);
 718                         break;
 719 #endif
 720                 case 'T':
 721                         rc = spdk_log_set_flag(optarg);
 722                         if (rc < 0) {
 723                                 fprintf(stderr, "unknown flag\n");
 724                                 usage(argv[0]);
 725                                 exit(EXIT_FAILURE);
 726                         }
 727                         spdk_log_set_print_level(SPDK_LOG_DEBUG);
 728 #ifndef DEBUG
 729                         fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -T flag.\n",
 730                                 argv[0]);
 731                         usage(argv[0]);
 732                         return 0;
 733 #endif
 734                         break;
 735                 default:
 736                         usage(argv[0]);
 737                         return 1;
 738                 }
 739         }
 740
 741         if (!g_queue_depth) {
 742                 fprintf(stderr, "missing -q (queue size) operand\n");
 743                 usage(argv[0]);
 744                 return 1;
 745         }
 746         if (!g_io_size_bytes) {
 747                 fprintf(stderr, "missing -o (block size) operand\n");
 748                 usage(argv[0]);
 749                 return 1;
 750         }
 751         if (!g_workload_type) {
 752                 fprintf(stderr, "missing -t (test time in seconds) operand\n");
 753                 usage(argv[0]);
 754                 return 1;
 755         }
 756
 757         if (!g_time_in_sec) {
 758                 usage(argv[0]);
 759                 return 1;
 760         }
 761
 762         if (strncmp(g_workload_type, "rand", 4) == 0) {
 763                 g_is_random = 1;
 764                 g_workload_type = &g_workload_type[4];
 765         }
 766
 767         if (strcmp(g_workload_type, "read") == 0 || strcmp(g_workload_type, "write") == 0) {
 768                 g_rw_percentage = strcmp(g_workload_type, "read") == 0 ? 100 : 0;
 769                 if (g_mix_specified) {
 770                         fprintf(stderr, "Ignoring -M option... Please use -M option"
 771                                 " only when using rw or randrw.\n");
 772                 }
 773         } else if (strcmp(g_workload_type, "rw") == 0) {
 774                 if (g_rw_percentage < 0 || g_rw_percentage > 100) {
 775                         fprintf(stderr,
 776                                 "-M must be specified to value from 0 to 100 "
 777                                 "for rw or randrw.\n");
 778                         return 1;
 779                 }
 780         } else {
 781                 fprintf(stderr,
 782                         "io pattern type must be one of\n"
 783                         "(read, write, randread, randwrite, rw, randrw)\n");
 784                 return 1;
 785         }
 786
 787         if (TAILQ_EMPTY(&g_trid_list)) {
 788                 /* If no transport IDs specified, default to enumerating all local PCIe devices */
 789                 add_trid("trtype:PCIe");
 790         } else {
 791                 struct trid_entry *trid_entry, *trid_entry_tmp;
 792
 793                 g_no_pci = true;
 794                 /* check whether there is local PCIe type */
 795                 TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, trid_entry_tmp) {
 796                         if (trid_entry->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
 797                                 g_no_pci = false;
 798                                 break;
 799                         }
 800                 }
 801         }
 802
 803         return 0;
 804 }
 805
 806 static int
 807 register_workers(void)
 808 {
 809         uint32_t i;
 810         struct worker_thread *worker;
 811
 812         g_workers = NULL;
 813         g_num_workers = 0;
 814
 815         SPDK_ENV_FOREACH_CORE(i) {
 816                 worker = calloc(1, sizeof(*worker));
 817                 if (worker == NULL) {
 818                         fprintf(stderr, "Unable to allocate worker\n");
 819                         return -1;
 820                 }
 821
 822                 worker->lcore = i;
 823                 worker->next = g_workers;
 824                 g_workers = worker;
 825                 g_num_workers++;
 826         }
 827
 828         return 0;
 829 }
 830
 831 static void
 832 unregister_workers(void)
 833 {
 834         struct worker_thread *worker = g_workers;
 835
 836         /* Free namespace context and worker thread */
 837         while (worker) {
 838                 struct worker_thread *next_worker = worker->next;
 839                 struct ns_worker_ctx *ns_ctx = worker->ns_ctx;
 840
 841                 while (ns_ctx) {
 842                         struct ns_worker_ctx *next_ns_ctx = ns_ctx->next;
 843
 844                         printf("NS: %s I/O completed: %lu, failed: %lu\n",
 845                                ns_ctx->entry->name, ns_ctx->io_completed, ns_ctx->io_failed);
 846                         free(ns_ctx);
 847                         ns_ctx = next_ns_ctx;
 848                 }
 849
 850                 struct ctrlr_worker_ctx *ctrlr_ctx = worker->ctrlr_ctx;
 851
 852                 while (ctrlr_ctx) {
 853                         struct ctrlr_worker_ctx *next_ctrlr_ctx = ctrlr_ctx->next;
 854
 855                         printf("CTRLR: %s abort submitted %lu, failed to submit %lu\n",
 856                                ctrlr_ctx->entry->name, ctrlr_ctx->abort_submitted,
 857                                ctrlr_ctx->abort_submit_failed);
 858                         printf("\t success %lu, unsuccess %lu, failed %lu\n",
 859                                ctrlr_ctx->successful_abort, ctrlr_ctx->unsuccessful_abort,
 860                                ctrlr_ctx->abort_failed);
 861                         free(ctrlr_ctx);
 862                         ctrlr_ctx = next_ctrlr_ctx;
 863                 }
 864
 865                 free(worker);
 866                 worker = next_worker;
 867         }
 868 }
 869
 870 static bool
 871 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
 872          struct spdk_nvme_ctrlr_opts *opts)
 873 {
 874         return true;
 875 }
 876
 877 static void
 878 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
 879           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
 880 {
 881         struct trid_entry       *trid_entry = cb_ctx;
 882         struct spdk_pci_addr    pci_addr;
 883         struct spdk_pci_device  *pci_dev;
 884         struct spdk_pci_id      pci_id;
 885
 886         if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) {
 887                 printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n",
 888                        trid->traddr, trid->trsvcid,
 889                        trid->subnqn);
 890         } else {
 891                 if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) {
 892                         return;
 893                 }
 894
 895                 pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr);
 896                 if (!pci_dev) {
 897                         return;
 898                 }
 899
 900                 pci_id = spdk_pci_device_get_id(pci_dev);
 901
 902                 printf("Attached to NVMe Controller at %s [%04x:%04x]\n",
 903                        trid->traddr,
 904                        pci_id.vendor_id, pci_id.device_id);
 905         }
 906
 907         register_ctrlr(ctrlr, trid_entry);
 908 }
 909
 910 static int
 911 register_controllers(void)
 912 {
 913         struct trid_entry *trid_entry;
 914
 915         printf("Initializing NVMe Controllers\n");
 916
 917         TAILQ_FOREACH(trid_entry, &g_trid_list, tailq) {
 918                 if (spdk_nvme_probe(&trid_entry->trid, trid_entry, probe_cb, attach_cb, NULL) != 0) {
 919                         fprintf(stderr, "spdk_nvme_probe() failed for transport address '%s'\n",
 920                                 trid_entry->trid.traddr);
 921                         return -1;
 922                 }
 923         }
 924
 925         return 0;
 926 }
 927
 928 static void
 929 unregister_controllers(void)
 930 {
 931         struct ctrlr_entry *entry = g_controllers;
 932
 933         while (entry) {
 934                 struct ctrlr_entry *next = entry->next;
 935                 spdk_nvme_detach(entry->ctrlr);
 936                 free(entry);
 937                 entry = next;
 938         }
 939 }
 940
 941 static int
 942 associate_master_worker_with_ctrlr(void)
 943 {
 944         struct ctrlr_entry      *entry = g_controllers;
 945         struct worker_thread    *worker = g_workers;
 946         struct ctrlr_worker_ctx *ctrlr_ctx;
 947
 948         while (worker) {
 949                 if (worker->lcore == g_master_core) {
 950                         break;
 951                 }
 952                 worker = worker->next;
 953         }
 954
 955         if (!worker) {
 956                 return -1;
 957         }
 958
 959         while (entry) {
 960                 ctrlr_ctx = calloc(1, sizeof(struct ctrlr_worker_ctx));
 961                 if (!ctrlr_ctx) {
 962                         return -1;
 963                 }
 964
 965                 pthread_mutex_init(&ctrlr_ctx->mutex, NULL);
 966                 ctrlr_ctx->entry = entry;
 967                 ctrlr_ctx->ctrlr = entry->ctrlr;
 968                 ctrlr_ctx->next = worker->ctrlr_ctx;
 969                 worker->ctrlr_ctx = ctrlr_ctx;
 970
 971                 entry = entry->next;
 972         }
 973
 974         return 0;
 975 }
 976
 977 static struct ctrlr_worker_ctx *
 978 get_ctrlr_worker_ctx(struct spdk_nvme_ctrlr *ctrlr)
 979 {
 980         struct worker_thread    *worker = g_workers;
 981         struct ctrlr_worker_ctx *ctrlr_ctx;
 982
 983         while (worker != NULL) {
 984                 if (worker->lcore == g_master_core) {
 985                         break;
 986                 }
 987                 worker = worker->next;
 988         }
 989
 990         if (!worker) {
 991                 return NULL;
 992         }
 993
 994         ctrlr_ctx = worker->ctrlr_ctx;
 995
 996         while (ctrlr_ctx != NULL) {
 997                 if (ctrlr_ctx->ctrlr == ctrlr) {
 998                         return ctrlr_ctx;
 999                 }
1000                 ctrlr_ctx = ctrlr_ctx->next;
1001         }
1002
1003         return NULL;
1004 }
1005
1006 static int
1007 associate_workers_with_ns(void)
1008 {
1009         struct ns_entry         *entry = g_namespaces;
1010         struct worker_thread    *worker = g_workers;
1011         struct ns_worker_ctx    *ns_ctx;
1012         int                     i, count;
1013
1014         count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers;
1015
1016         for (i = 0; i < count; i++) {
1017                 if (entry == NULL) {
1018                         break;
1019                 }
1020
1021                 ns_ctx = calloc(1, sizeof(struct ns_worker_ctx));
1022                 if (!ns_ctx) {
1023                         return -1;
1024                 }
1025
1026                 printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
1027                 ns_ctx->entry = entry;
1028                 ns_ctx->ctrlr_ctx = get_ctrlr_worker_ctx(entry->ctrlr);
1029                 if (!ns_ctx->ctrlr_ctx) {
1030                         free(ns_ctx);
1031                         return -1;
1032                 }
1033
1034                 ns_ctx->next = worker->ns_ctx;
1035                 worker->ns_ctx = ns_ctx;
1036
1037                 worker = worker->next;
1038                 if (worker == NULL) {
1039                         worker = g_workers;
1040                 }
1041
1042                 entry = entry->next;
1043                 if (entry == NULL) {
1044                         entry = g_namespaces;
1045                 }
1046         }
1047
1048         return 0;
1049 }
1050
1051 int main(int argc, char **argv)
1052 {
1053         int rc;
1054         struct worker_thread *worker, *master_worker;
1055         struct spdk_env_opts opts;
1056
1057         rc = parse_args(argc, argv);
1058         if (rc != 0) {
1059                 return rc;
1060         }
1061
1062         spdk_env_opts_init(&opts);
1063         opts.name = "abort";
1064         opts.shm_id = g_shm_id;
1065         if (g_core_mask) {
1066                 opts.core_mask = g_core_mask;
1067         }
1068
1069         if (g_dpdk_mem) {
1070                 opts.mem_size = g_dpdk_mem;
1071         }
1072         if (g_no_pci) {
1073                 opts.no_pci = g_no_pci;
1074         }
1075         if (spdk_env_init(&opts) < 0) {
1076                 fprintf(stderr, "Unable to initialize SPDK env\n");
1077                 rc = -1;
1078                 goto cleanup;
1079         }
1080
1081         g_tsc_rate = spdk_get_ticks_hz();
1082
1083         if (register_workers() != 0) {
1084                 rc = -1;
1085                 goto cleanup;
1086         }
1087
1088         if (register_controllers() != 0) {
1089                 rc = -1;
1090                 goto cleanup;
1091         }
1092
1093         if (g_warn) {
1094                 printf("WARNING: Some requested NVMe devices were skipped\n");
1095         }
1096
1097         if (g_num_namespaces == 0) {
1098                 fprintf(stderr, "No valid NVMe controllers found\n");
1099                 goto cleanup;
1100         }
1101
1102         if (associate_master_worker_with_ctrlr() != 0) {
1103                 rc = -1;
1104                 goto cleanup;
1105         }
1106
1107         if (associate_workers_with_ns() != 0) {
1108                 rc = -1;
1109                 goto cleanup;
1110         }
1111
1112         printf("Initialization complete. Launching workers.\n");
1113
1114         /* Launch all of the slave workers */
1115         g_master_core = spdk_env_get_current_core();
1116         master_worker = NULL;
1117         worker = g_workers;
1118         while (worker != NULL) {
1119                 if (worker->lcore != g_master_core) {
1120                         spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker);
1121                 } else {
1122                         assert(master_worker == NULL);
1123                         master_worker = worker;
1124                 }
1125                 worker = worker->next;
1126         }
1127
1128         assert(master_worker != NULL);
1129         rc = work_fn(master_worker);
1130
1131         spdk_env_thread_wait_all();
1132
1133 cleanup:
1134         unregister_trids();
1135         unregister_workers();
1136         unregister_namespaces();
1137         unregister_controllers();
1138
1139         if (rc != 0) {
1140                 fprintf(stderr, "%s: errors occured\n", argv[0]);
1141         }
1142
1143         return rc;
1144 }