ceph/src/spdk/examples/nvme/arbitration/arbitration.c

   1 /*-
   2  *   BSD LICENSE
   3  *
   4  *   Copyright (c) Intel Corporation.
   5  *   All rights reserved.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following conditions
   9  *   are met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  *       notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above copyright
  14  *       notice, this list of conditions and the following disclaimer in
  15  *       the documentation and/or other materials provided with the
  16  *       distribution.
  17  *     * Neither the name of Intel Corporation nor the names of its
  18  *       contributors may be used to endorse or promote products derived
  19  *       from this software without specific prior written permission.
  20  *
  21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include "spdk/stdinc.h"
  35
  36 #include "spdk/nvme.h"
  37 #include "spdk/env.h"
  38 #include "spdk/string.h"
  39 #include "spdk/nvme_intel.h"
  40
  41 struct ctrlr_entry {
  42         struct spdk_nvme_ctrlr                  *ctrlr;
  43         struct spdk_nvme_intel_rw_latency_page  latency_page;
  44         struct ctrlr_entry                      *next;
  45         char                                    name[1024];
  46 };
  47
  48 struct ns_entry {
  49         struct {
  50                 struct spdk_nvme_ctrlr          *ctrlr;
  51                 struct spdk_nvme_ns             *ns;
  52         } nvme;
  53
  54         struct ns_entry                         *next;
  55         uint32_t                                io_size_blocks;
  56         uint64_t                                size_in_ios;
  57         char                                    name[1024];
  58 };
  59
  60 struct ns_worker_ctx {
  61         struct ns_entry                         *entry;
  62         uint64_t                                io_completed;
  63         uint64_t                                current_queue_depth;
  64         uint64_t                                offset_in_ios;
  65         bool                                    is_draining;
  66         struct spdk_nvme_qpair                  *qpair;
  67         struct ns_worker_ctx                    *next;
  68 };
  69
  70 struct arb_task {
  71         struct ns_worker_ctx                    *ns_ctx;
  72         void                                    *buf;
  73 };
  74
  75 struct worker_thread {
  76         struct ns_worker_ctx                    *ns_ctx;
  77         struct worker_thread                    *next;
  78         unsigned                                lcore;
  79         enum spdk_nvme_qprio                    qprio;
  80 };
  81
  82 struct arb_context {
  83         int                                     shm_id;
  84         int                                     outstanding_commands;
  85         int                                     num_namespaces;
  86         int                                     num_workers;
  87         int                                     rw_percentage;
  88         int                                     is_random;
  89         int                                     queue_depth;
  90         int                                     time_in_sec;
  91         int                                     io_count;
  92         uint8_t                                 latency_tracking_enable;
  93         uint8_t                                 arbitration_mechanism;
  94         uint8_t                                 arbitration_config;
  95         uint32_t                                io_size_bytes;
  96         uint32_t                                max_completions;
  97         uint64_t                                tsc_rate;
  98         const char                              *core_mask;
  99         const char                              *workload_type;
 100 };
 101
 102 struct feature {
 103         uint32_t                                result;
 104         bool                                    valid;
 105 };
 106
 107 static struct spdk_mempool *task_pool           = NULL;
 108
 109 static struct ctrlr_entry *g_controllers        = NULL;
 110 static struct ns_entry *g_namespaces            = NULL;
 111 static struct worker_thread *g_workers          = NULL;
 112
 113 static struct feature features[256];
 114
 115 static struct arb_context g_arbitration = {
 116         .shm_id                                 = -1,
 117         .outstanding_commands                   = 0,
 118         .num_workers                            = 0,
 119         .num_namespaces                         = 0,
 120         .rw_percentage                          = 50,
 121         .queue_depth                            = 64,
 122         .time_in_sec                            = 60,
 123         .io_count                               = 100000,
 124         .latency_tracking_enable                = 0,
 125         .arbitration_mechanism                  = SPDK_NVME_CC_AMS_RR,
 126         .arbitration_config                     = 0,
 127         .io_size_bytes                          = 131072,
 128         .max_completions                        = 0,
 129         /* Default 4 cores for urgent/high/medium/low */
 130         .core_mask                              = "0xf",
 131         .workload_type                          = "randrw",
 132 };
 133
 134 /*
 135  * For weighted round robin arbitration mechanism, the smaller value between
 136  * weight and burst will be picked to execute the commands in one queue.
 137  */
 138 #define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT     32
 139 #define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT   16
 140 #define USER_SPECIFIED_LOW_PRIORITY_WEIGHT      8
 141 #define USER_SPECIFIED_ARBITRATION_BURST        7       /* No limit */
 142
 143 /*
 144  * Description of dword for priority weight and arbitration burst
 145  * ------------------------------------------------------------------------------
 146  *     31 : 24      |       23 : 16      |    15 : 08      | 07 : 03  | 02 : 00
 147  * ------------------------------------------------------------------------------
 148  * High Prio Weight | Medium Prio Weight | Low Prio Weight | Reserved | Arb Burst
 149  * ------------------------------------------------------------------------------
 150  *
 151  * The priority weights are zero based value.
 152  */
 153 #define SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT        24
 154 #define SPDK_NVME_MED_PRIO_WEIGHT_SHIFT         16
 155 #define SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT         8
 156 #define SPDK_NVME_PRIO_WEIGHT_MASK              0xFF
 157 #define SPDK_NVME_ARB_BURST_MASK                0x7
 158
 159 #define SPDK_NVME_QPRIO_MAX                     (SPDK_NVME_QPRIO_LOW + 1)
 160
 161 static void task_complete(struct arb_task *task);
 162
 163 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
 164
 165 static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr);
 166
 167 static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr);
 168
 169 static const char *print_qprio(enum spdk_nvme_qprio);
 170
 171
 172 static void
 173 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
 174 {
 175         struct ns_entry *entry;
 176         const struct spdk_nvme_ctrlr_data *cdata;
 177
 178         cdata = spdk_nvme_ctrlr_get_data(ctrlr);
 179
 180         if (!spdk_nvme_ns_is_active(ns)) {
 181                 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
 182                        cdata->mn, cdata->sn,
 183                        spdk_nvme_ns_get_id(ns));
 184                 return;
 185         }
 186
 187         if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes ||
 188             spdk_nvme_ns_get_sector_size(ns) > g_arbitration.io_size_bytes) {
 189                 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
 190                        "ns size %" PRIu64 " / block size %u for I/O size %u\n",
 191                        cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns),
 192                        spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns),
 193                        g_arbitration.io_size_bytes);
 194                 return;
 195         }
 196
 197         entry = malloc(sizeof(struct ns_entry));
 198         if (entry == NULL) {
 199                 perror("ns_entry malloc");
 200                 exit(1);
 201         }
 202
 203         entry->nvme.ctrlr = ctrlr;
 204         entry->nvme.ns = ns;
 205
 206         entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes;
 207         entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns);
 208
 209         snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
 210
 211         g_arbitration.num_namespaces++;
 212         entry->next = g_namespaces;
 213         g_namespaces = entry;
 214 }
 215
 216 static void
 217 enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl)
 218 {
 219         if (spdk_nvme_cpl_is_error(cpl)) {
 220                 printf("enable_latency_tracking_complete failed\n");
 221         }
 222         g_arbitration.outstanding_commands--;
 223 }
 224
 225 static void
 226 set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable)
 227 {
 228         int res;
 229         union spdk_nvme_intel_feat_latency_tracking latency_tracking;
 230
 231         if (enable) {
 232                 latency_tracking.bits.enable = 0x01;
 233         } else {
 234                 latency_tracking.bits.enable = 0x00;
 235         }
 236
 237         res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING,
 238                                               latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL);
 239         if (res) {
 240                 printf("fail to allocate nvme request.\n");
 241                 return;
 242         }
 243         g_arbitration.outstanding_commands++;
 244
 245         while (g_arbitration.outstanding_commands) {
 246                 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
 247         }
 248 }
 249
 250 static void
 251 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
 252 {
 253         int nsid, num_ns;
 254         struct spdk_nvme_ns *ns;
 255         struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry));
 256         const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
 257
 258         if (entry == NULL) {
 259                 perror("ctrlr_entry malloc");
 260                 exit(1);
 261         }
 262
 263         snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
 264
 265         entry->ctrlr = ctrlr;
 266         entry->next = g_controllers;
 267         g_controllers = entry;
 268
 269         if ((g_arbitration.latency_tracking_enable != 0) &&
 270             spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) {
 271                 set_latency_tracking_feature(ctrlr, true);
 272         }
 273
 274         num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
 275         for (nsid = 1; nsid <= num_ns; nsid++) {
 276                 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
 277                 if (ns == NULL) {
 278                         continue;
 279                 }
 280                 register_ns(ctrlr, ns);
 281         }
 282
 283         if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) {
 284                 get_arb_feature(ctrlr);
 285
 286                 if (g_arbitration.arbitration_config != 0) {
 287                         set_arb_feature(ctrlr);
 288                         get_arb_feature(ctrlr);
 289                 }
 290         }
 291 }
 292
 293 static __thread unsigned int seed = 0;
 294
 295 static void
 296 submit_single_io(struct ns_worker_ctx *ns_ctx)
 297 {
 298         struct arb_task         *task = NULL;
 299         uint64_t                offset_in_ios;
 300         int                     rc;
 301         struct ns_entry         *entry = ns_ctx->entry;
 302
 303         task = spdk_mempool_get(task_pool);
 304         if (!task) {
 305                 fprintf(stderr, "Failed to get task from task_pool\n");
 306                 exit(1);
 307         }
 308
 309         task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL);
 310         if (!task->buf) {
 311                 spdk_mempool_put(task_pool, task);
 312                 fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n");
 313                 exit(1);
 314         }
 315
 316         task->ns_ctx = ns_ctx;
 317
 318         if (g_arbitration.is_random) {
 319                 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
 320         } else {
 321                 offset_in_ios = ns_ctx->offset_in_ios++;
 322                 if (ns_ctx->offset_in_ios == entry->size_in_ios) {
 323                         ns_ctx->offset_in_ios = 0;
 324                 }
 325         }
 326
 327         if ((g_arbitration.rw_percentage == 100) ||
 328             (g_arbitration.rw_percentage != 0 &&
 329              ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) {
 330                 rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf,
 331                                            offset_in_ios * entry->io_size_blocks,
 332                                            entry->io_size_blocks, io_complete, task, 0);
 333         } else {
 334                 rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf,
 335                                             offset_in_ios * entry->io_size_blocks,
 336                                             entry->io_size_blocks, io_complete, task, 0);
 337         }
 338
 339         if (rc != 0) {
 340                 fprintf(stderr, "starting I/O failed\n");
 341         }
 342
 343         ns_ctx->current_queue_depth++;
 344 }
 345
 346 static void
 347 task_complete(struct arb_task *task)
 348 {
 349         struct ns_worker_ctx    *ns_ctx;
 350
 351         ns_ctx = task->ns_ctx;
 352         ns_ctx->current_queue_depth--;
 353         ns_ctx->io_completed++;
 354
 355         spdk_dma_free(task->buf);
 356         spdk_mempool_put(task_pool, task);
 357
 358         /*
 359          * is_draining indicates when time has expired for the test run
 360          * and we are just waiting for the previously submitted I/O
 361          * to complete.  In this case, do not submit a new I/O to replace
 362          * the one just completed.
 363          */
 364         if (!ns_ctx->is_draining) {
 365                 submit_single_io(ns_ctx);
 366         }
 367 }
 368
 369 static void
 370 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
 371 {
 372         task_complete((struct arb_task *)ctx);
 373 }
 374
 375 static void
 376 check_io(struct ns_worker_ctx *ns_ctx)
 377 {
 378         spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions);
 379 }
 380
 381 static void
 382 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
 383 {
 384         while (queue_depth-- > 0) {
 385                 submit_single_io(ns_ctx);
 386         }
 387 }
 388
 389 static void
 390 drain_io(struct ns_worker_ctx *ns_ctx)
 391 {
 392         ns_ctx->is_draining = true;
 393         while (ns_ctx->current_queue_depth > 0) {
 394                 check_io(ns_ctx);
 395         }
 396 }
 397
 398 static int
 399 init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio)
 400 {
 401         struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr;
 402         struct spdk_nvme_io_qpair_opts opts;
 403
 404         spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
 405         opts.qprio = qprio;
 406
 407         ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts));
 408         if (!ns_ctx->qpair) {
 409                 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n");
 410                 return 1;
 411         }
 412
 413         return 0;
 414 }
 415
 416 static void
 417 cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx)
 418 {
 419         spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
 420 }
 421
 422 static void
 423 cleanup(uint32_t task_count)
 424 {
 425         struct ns_entry *entry                  = g_namespaces;
 426         struct ns_entry *next_entry             = NULL;
 427         struct worker_thread *worker            = g_workers;
 428         struct worker_thread *next_worker       = NULL;
 429
 430         while (entry) {
 431                 next_entry = entry->next;
 432                 free(entry);
 433                 entry = next_entry;
 434         };
 435
 436         while (worker) {
 437                 next_worker = worker->next;
 438                 free(worker->ns_ctx);
 439                 free(worker);
 440                 worker = next_worker;
 441         };
 442
 443         if (spdk_mempool_count(task_pool) != (size_t)task_count) {
 444                 fprintf(stderr, "task_pool count is %zu but should be %u\n",
 445                         spdk_mempool_count(task_pool), task_count);
 446         }
 447         spdk_mempool_free(task_pool);
 448 }
 449
 450 static int
 451 work_fn(void *arg)
 452 {
 453         uint64_t tsc_end;
 454         struct worker_thread *worker = (struct worker_thread *)arg;
 455         struct ns_worker_ctx *ns_ctx = NULL;
 456
 457         printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio));
 458
 459         /* Allocate a queue pair for each namespace. */
 460         ns_ctx = worker->ns_ctx;
 461         while (ns_ctx != NULL) {
 462                 if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) {
 463                         printf("ERROR: init_ns_worker_ctx() failed\n");
 464                         return 1;
 465                 }
 466                 ns_ctx = ns_ctx->next;
 467         }
 468
 469         tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate;
 470
 471         /* Submit initial I/O for each namespace. */
 472         ns_ctx = worker->ns_ctx;
 473
 474         while (ns_ctx != NULL) {
 475                 submit_io(ns_ctx, g_arbitration.queue_depth);
 476                 ns_ctx = ns_ctx->next;
 477         }
 478
 479         while (1) {
 480                 /*
 481                  * Check for completed I/O for each controller. A new
 482                  * I/O will be submitted in the io_complete callback
 483                  * to replace each I/O that is completed.
 484                  */
 485                 ns_ctx = worker->ns_ctx;
 486                 while (ns_ctx != NULL) {
 487                         check_io(ns_ctx);
 488                         ns_ctx = ns_ctx->next;
 489                 }
 490
 491                 if (spdk_get_ticks() > tsc_end) {
 492                         break;
 493                 }
 494         }
 495
 496         ns_ctx = worker->ns_ctx;
 497         while (ns_ctx != NULL) {
 498                 drain_io(ns_ctx);
 499                 cleanup_ns_worker_ctx(ns_ctx);
 500                 ns_ctx = ns_ctx->next;
 501         }
 502
 503         return 0;
 504 }
 505
 506 static void
 507 usage(char *program_name)
 508 {
 509         printf("%s options", program_name);
 510         printf("\n");
 511         printf("\t[-q io depth]\n");
 512         printf("\t[-s io size in bytes]\n");
 513         printf("\t[-w io pattern type, must be one of\n");
 514         printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
 515         printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
 516         printf("\t[-l enable latency tracking, default: disabled]\n");
 517         printf("\t\t(0 - disabled; 1 - enabled)\n");
 518         printf("\t[-t time in seconds]\n");
 519         printf("\t[-c core mask for I/O submission/completion.]\n");
 520         printf("\t\t(default: 0xf - 4 cores)]\n");
 521         printf("\t[-m max completions per poll]\n");
 522         printf("\t\t(default: 0 - unlimited)\n");
 523         printf("\t[-a arbitration mechanism, must be one of below]\n");
 524         printf("\t\t(0, 1, 2)]\n");
 525         printf("\t\t(0: default round robin mechanism)]\n");
 526         printf("\t\t(1: weighted round robin mechanism)]\n");
 527         printf("\t\t(2: vendor specific mechanism)]\n");
 528         printf("\t[-b enable arbitration user configuration, default: disabled]\n");
 529         printf("\t\t(0 - disabled; 1 - enabled)\n");
 530         printf("\t[-n subjected IOs for performance comparison]\n");
 531         printf("\t[-i shared memory group ID]\n");
 532 }
 533
 534 static const char *
 535 print_qprio(enum spdk_nvme_qprio qprio)
 536 {
 537         switch (qprio) {
 538         case SPDK_NVME_QPRIO_URGENT:
 539                 return "urgent priority queue";
 540         case SPDK_NVME_QPRIO_HIGH:
 541                 return "high priority queue";
 542         case SPDK_NVME_QPRIO_MEDIUM:
 543                 return "medium priority queue";
 544         case SPDK_NVME_QPRIO_LOW:
 545                 return "low priority queue";
 546         default:
 547                 return "invalid priority queue";
 548         }
 549 }
 550
 551
 552 static void
 553 print_configuration(char *program_name)
 554 {
 555         printf("%s run with configuration:\n", program_name);
 556         printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n",
 557                program_name,
 558                g_arbitration.queue_depth,
 559                g_arbitration.io_size_bytes,
 560                g_arbitration.workload_type,
 561                g_arbitration.rw_percentage,
 562                g_arbitration.latency_tracking_enable,
 563                g_arbitration.time_in_sec,
 564                g_arbitration.core_mask,
 565                g_arbitration.max_completions,
 566                g_arbitration.arbitration_mechanism,
 567                g_arbitration.arbitration_config,
 568                g_arbitration.io_count,
 569                g_arbitration.shm_id);
 570 }
 571
 572
 573 static void
 574 print_performance(void)
 575 {
 576         float io_per_second, sent_all_io_in_secs;
 577         struct worker_thread    *worker;
 578         struct ns_worker_ctx    *ns_ctx;
 579
 580         worker = g_workers;
 581         while (worker) {
 582                 ns_ctx = worker->ns_ctx;
 583                 while (ns_ctx) {
 584                         io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec;
 585                         sent_all_io_in_secs = g_arbitration.io_count / io_per_second;
 586                         printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n",
 587                                ns_ctx->entry->name, worker->lcore,
 588                                io_per_second, sent_all_io_in_secs, g_arbitration.io_count);
 589                         ns_ctx = ns_ctx->next;
 590                 }
 591                 worker = worker->next;
 592         }
 593         printf("========================================================\n");
 594
 595         printf("\n");
 596 }
 597
 598 static void
 599 print_latency_page(struct ctrlr_entry *entry)
 600 {
 601         int i;
 602
 603         printf("\n");
 604         printf("%s\n", entry->name);
 605         printf("--------------------------------------------------------\n");
 606
 607         for (i = 0; i < 32; i++) {
 608                 if (entry->latency_page.buckets_32us[i])
 609                         printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32,
 610                                entry->latency_page.buckets_32us[i]);
 611         }
 612         for (i = 0; i < 31; i++) {
 613                 if (entry->latency_page.buckets_1ms[i])
 614                         printf("Bucket %dms - %dms: %d\n", i + 1, i + 2,
 615                                entry->latency_page.buckets_1ms[i]);
 616         }
 617         for (i = 0; i < 31; i++) {
 618                 if (entry->latency_page.buckets_32ms[i])
 619                         printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32,
 620                                entry->latency_page.buckets_32ms[i]);
 621         }
 622 }
 623
 624 static void
 625 print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page)
 626 {
 627         struct ctrlr_entry      *ctrlr;
 628
 629         printf("%s Latency Statistics:\n", op_name);
 630         printf("========================================================\n");
 631         ctrlr = g_controllers;
 632         while (ctrlr) {
 633                 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) {
 634                         if (spdk_nvme_ctrlr_cmd_get_log_page(
 635                                     ctrlr->ctrlr, log_page,
 636                                     SPDK_NVME_GLOBAL_NS_TAG,
 637                                     &ctrlr->latency_page,
 638                                     sizeof(struct spdk_nvme_intel_rw_latency_page),
 639                                     0,
 640                                     enable_latency_tracking_complete,
 641                                     NULL)) {
 642                                 printf("nvme_ctrlr_cmd_get_log_page() failed\n");
 643                                 exit(1);
 644                         }
 645
 646                         g_arbitration.outstanding_commands++;
 647                 } else {
 648                         printf("Controller %s: %s latency statistics not supported\n",
 649                                ctrlr->name, op_name);
 650                 }
 651                 ctrlr = ctrlr->next;
 652         }
 653
 654         while (g_arbitration.outstanding_commands) {
 655                 ctrlr = g_controllers;
 656                 while (ctrlr) {
 657                         spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr);
 658                         ctrlr = ctrlr->next;
 659                 }
 660         }
 661
 662         ctrlr = g_controllers;
 663         while (ctrlr) {
 664                 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) {
 665                         print_latency_page(ctrlr);
 666                 }
 667                 ctrlr = ctrlr->next;
 668         }
 669         printf("\n");
 670 }
 671
 672 static void
 673 print_stats(void)
 674 {
 675         print_performance();
 676         if (g_arbitration.latency_tracking_enable) {
 677                 if (g_arbitration.rw_percentage != 0) {
 678                         print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY);
 679                 }
 680                 if (g_arbitration.rw_percentage != 100) {
 681                         print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY);
 682                 }
 683         }
 684 }
 685
 686 static int
 687 parse_args(int argc, char **argv)
 688 {
 689         const char *workload_type       = NULL;
 690         int op                          = 0;
 691         bool mix_specified              = false;
 692
 693         while ((op = getopt(argc, argv, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) {
 694                 switch (op) {
 695                 case 'c':
 696                         g_arbitration.core_mask = optarg;
 697                         break;
 698                 case 'i':
 699                         g_arbitration.shm_id = atoi(optarg);
 700                         break;
 701                 case 'l':
 702                         g_arbitration.latency_tracking_enable = atoi(optarg);
 703                         break;
 704                 case 'm':
 705                         g_arbitration.max_completions = atoi(optarg);
 706                         break;
 707                 case 'q':
 708                         g_arbitration.queue_depth = atoi(optarg);
 709                         break;
 710                 case 's':
 711                         g_arbitration.io_size_bytes = atoi(optarg);
 712                         break;
 713                 case 't':
 714                         g_arbitration.time_in_sec = atoi(optarg);
 715                         break;
 716                 case 'w':
 717                         g_arbitration.workload_type = optarg;
 718                         break;
 719                 case 'M':
 720                         g_arbitration.rw_percentage = atoi(optarg);
 721                         mix_specified = true;
 722                         break;
 723                 case 'a':
 724                         g_arbitration.arbitration_mechanism = atoi(optarg);
 725                         break;
 726                 case 'b':
 727                         g_arbitration.arbitration_config = atoi(optarg);
 728                         break;
 729                 case 'n':
 730                         g_arbitration.io_count = atoi(optarg);
 731                         break;
 732                 case 'h':
 733                 default:
 734                         usage(argv[0]);
 735                         return 1;
 736                 }
 737         }
 738
 739         workload_type = g_arbitration.workload_type;
 740
 741         if (strcmp(workload_type, "read") &&
 742             strcmp(workload_type, "write") &&
 743             strcmp(workload_type, "randread") &&
 744             strcmp(workload_type, "randwrite") &&
 745             strcmp(workload_type, "rw") &&
 746             strcmp(workload_type, "randrw")) {
 747                 fprintf(stderr,
 748                         "io pattern type must be one of\n"
 749                         "(read, write, randread, randwrite, rw, randrw)\n");
 750                 return 1;
 751         }
 752
 753         if (!strcmp(workload_type, "read") ||
 754             !strcmp(workload_type, "randread")) {
 755                 g_arbitration.rw_percentage = 100;
 756         }
 757
 758         if (!strcmp(workload_type, "write") ||
 759             !strcmp(workload_type, "randwrite")) {
 760                 g_arbitration.rw_percentage = 0;
 761         }
 762
 763         if (!strcmp(workload_type, "read") ||
 764             !strcmp(workload_type, "randread") ||
 765             !strcmp(workload_type, "write") ||
 766             !strcmp(workload_type, "randwrite")) {
 767                 if (mix_specified) {
 768                         fprintf(stderr, "Ignoring -M option... Please use -M option"
 769                                 " only when using rw or randrw.\n");
 770                 }
 771         }
 772
 773         if (!strcmp(workload_type, "rw") ||
 774             !strcmp(workload_type, "randrw")) {
 775                 if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) {
 776                         fprintf(stderr,
 777                                 "-M must be specified to value from 0 to 100 "
 778                                 "for rw or randrw.\n");
 779                         return 1;
 780                 }
 781         }
 782
 783         if (!strcmp(workload_type, "read") ||
 784             !strcmp(workload_type, "write") ||
 785             !strcmp(workload_type, "rw")) {
 786                 g_arbitration.is_random = 0;
 787         } else {
 788                 g_arbitration.is_random = 1;
 789         }
 790
 791         if (g_arbitration.latency_tracking_enable != 0 &&
 792             g_arbitration.latency_tracking_enable != 1) {
 793                 fprintf(stderr,
 794                         "-l must be specified to value 0 or 1.\n");
 795                 return 1;
 796         }
 797
 798         switch (g_arbitration.arbitration_mechanism) {
 799         case SPDK_NVME_CC_AMS_RR:
 800         case SPDK_NVME_CC_AMS_WRR:
 801         case SPDK_NVME_CC_AMS_VS:
 802                 break;
 803         default:
 804                 fprintf(stderr,
 805                         "-a must be specified to value 0, 1, or 7.\n");
 806                 return 1;
 807         }
 808
 809         if (g_arbitration.arbitration_config != 0 &&
 810             g_arbitration.arbitration_config != 1) {
 811                 fprintf(stderr,
 812                         "-b must be specified to value 0 or 1.\n");
 813                 return 1;
 814         } else if (g_arbitration.arbitration_config == 1 &&
 815                    g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) {
 816                 fprintf(stderr,
 817                         "-a must be specified to 1 (WRR) together.\n");
 818                 return 1;
 819         }
 820
 821         return 0;
 822 }
 823
 824 static int
 825 register_workers(void)
 826 {
 827         uint32_t i;
 828         struct worker_thread *worker;
 829         enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT;
 830
 831         g_workers = NULL;
 832         g_arbitration.num_workers = 0;
 833
 834         SPDK_ENV_FOREACH_CORE(i) {
 835                 worker = calloc(1, sizeof(*worker));
 836                 if (worker == NULL) {
 837                         fprintf(stderr, "Unable to allocate worker\n");
 838                         return -1;
 839                 }
 840
 841                 worker->lcore = i;
 842                 worker->next = g_workers;
 843                 g_workers = worker;
 844                 g_arbitration.num_workers++;
 845
 846                 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) {
 847                         qprio++;
 848                 }
 849
 850                 worker->qprio = qprio % SPDK_NVME_QPRIO_MAX;
 851         }
 852
 853         return 0;
 854 }
 855
 856 static bool
 857 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
 858          struct spdk_nvme_ctrlr_opts *opts)
 859 {
 860         /* Update with user specified arbitration configuration */
 861         opts->arb_mechanism = g_arbitration.arbitration_mechanism;
 862
 863         printf("Attaching to %s\n", trid->traddr);
 864
 865         return true;
 866 }
 867
 868 static void
 869 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
 870           struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
 871 {
 872         printf("Attached to %s\n", trid->traddr);
 873
 874         /* Update with actual arbitration configuration in use */
 875         g_arbitration.arbitration_mechanism = opts->arb_mechanism;
 876
 877         register_ctrlr(ctrlr);
 878 }
 879
 880 static int
 881 register_controllers(void)
 882 {
 883         printf("Initializing NVMe Controllers\n");
 884
 885         if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
 886                 fprintf(stderr, "spdk_nvme_probe() failed\n");
 887                 return 1;
 888         }
 889
 890         if (g_arbitration.num_namespaces == 0) {
 891                 fprintf(stderr, "No valid namespaces to continue IO testing\n");
 892                 return 1;
 893         }
 894
 895         return 0;
 896 }
 897
 898 static void
 899 unregister_controllers(void)
 900 {
 901         struct ctrlr_entry *entry = g_controllers;
 902
 903         while (entry) {
 904                 struct ctrlr_entry *next = entry->next;
 905                 if (g_arbitration.latency_tracking_enable &&
 906                     spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) {
 907                         set_latency_tracking_feature(entry->ctrlr, false);
 908                 }
 909                 spdk_nvme_detach(entry->ctrlr);
 910                 free(entry);
 911                 entry = next;
 912         }
 913 }
 914
 915 static int
 916 associate_workers_with_ns(void)
 917 {
 918         struct ns_entry         *entry = g_namespaces;
 919         struct worker_thread    *worker = g_workers;
 920         struct ns_worker_ctx    *ns_ctx;
 921         int                     i, count;
 922
 923         count = g_arbitration.num_namespaces > g_arbitration.num_workers ?
 924                 g_arbitration.num_namespaces : g_arbitration.num_workers;
 925
 926         for (i = 0; i < count; i++) {
 927                 if (entry == NULL) {
 928                         break;
 929                 }
 930
 931                 ns_ctx = malloc(sizeof(struct ns_worker_ctx));
 932                 if (!ns_ctx) {
 933                         return 1;
 934                 }
 935                 memset(ns_ctx, 0, sizeof(*ns_ctx));
 936
 937                 printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
 938                 ns_ctx->entry = entry;
 939                 ns_ctx->next = worker->ns_ctx;
 940                 worker->ns_ctx = ns_ctx;
 941
 942                 worker = worker->next;
 943                 if (worker == NULL) {
 944                         worker = g_workers;
 945                 }
 946
 947                 entry = entry->next;
 948                 if (entry == NULL) {
 949                         entry = g_namespaces;
 950                 }
 951
 952         }
 953
 954         return 0;
 955 }
 956
 957 static void
 958 get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
 959 {
 960         struct feature *feature = cb_arg;
 961         int fid = feature - features;
 962
 963         if (spdk_nvme_cpl_is_error(cpl)) {
 964                 printf("get_feature(0x%02X) failed\n", fid);
 965         } else {
 966                 feature->result = cpl->cdw0;
 967                 feature->valid = true;
 968         }
 969
 970         g_arbitration.outstanding_commands--;
 971 }
 972
 973 static int
 974 get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid)
 975 {
 976         struct spdk_nvme_cmd cmd = {};
 977
 978         cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
 979         cmd.cdw10 = fid;
 980
 981         return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]);
 982 }
 983
 984 static void
 985 get_arb_feature(struct spdk_nvme_ctrlr *ctrlr)
 986 {
 987         get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION);
 988
 989         g_arbitration.outstanding_commands++;
 990
 991         while (g_arbitration.outstanding_commands) {
 992                 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
 993         }
 994
 995         if (features[SPDK_NVME_FEAT_ARBITRATION].valid) {
 996                 uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result;
 997                 unsigned ab, lpw, mpw, hpw;
 998
 999                 ab = arb & SPDK_NVME_ARB_BURST_MASK;
1000                 lpw = ((arb >> SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1001                 mpw = ((arb >> SPDK_NVME_MED_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1002                 hpw = ((arb >> SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1003
1004                 printf("Current Arbitration Configuration\n");
1005                 printf("===========\n");
1006                 printf("Arbitration Burst:           ");
1007                 if (ab == SPDK_NVME_ARB_BURST_MASK) {
1008                         printf("no limit\n");
1009                 } else {
1010                         printf("%u\n", 1u << ab);
1011                 }
1012
1013                 printf("Low Priority Weight:         %u\n", lpw);
1014                 printf("Medium Priority Weight:      %u\n", mpw);
1015                 printf("High Priority Weight:        %u\n", hpw);
1016                 printf("\n");
1017         }
1018 }
1019
1020 static void
1021 set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
1022 {
1023         struct feature *feature = cb_arg;
1024         int fid = feature - features;
1025
1026         if (spdk_nvme_cpl_is_error(cpl)) {
1027                 printf("set_feature(0x%02X) failed\n", fid);
1028                 feature->valid = false;
1029         } else {
1030                 printf("Set Arbitration Feature Successfully\n");
1031         }
1032
1033         g_arbitration.outstanding_commands--;
1034 }
1035
1036 static int
1037 set_arb_feature(struct spdk_nvme_ctrlr *ctrlr)
1038 {
1039         int ret;
1040         struct spdk_nvme_cmd cmd = {};
1041         uint32_t arb = 0;
1042         unsigned ab, lpw, mpw, hpw;
1043
1044         cmd.opc = SPDK_NVME_OPC_SET_FEATURES;
1045         cmd.cdw10 = SPDK_NVME_FEAT_ARBITRATION;
1046
1047         g_arbitration.outstanding_commands = 0;
1048
1049         if (features[SPDK_NVME_FEAT_ARBITRATION].valid) {
1050                 ab = USER_SPECIFIED_ARBITRATION_BURST & SPDK_NVME_ARB_BURST_MASK;
1051                 hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT << SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT;
1052                 mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT << SPDK_NVME_MED_PRIO_WEIGHT_SHIFT;
1053                 lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT << SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT;
1054                 arb = hpw | mpw | lpw | ab;
1055                 cmd.cdw11 = arb;
1056         }
1057
1058         ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0,
1059                                             set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]);
1060         if (ret) {
1061                 printf("Set Arbitration Feature: Failed 0x%x\n", ret);
1062                 return 1;
1063         }
1064
1065         g_arbitration.outstanding_commands++;
1066
1067         while (g_arbitration.outstanding_commands) {
1068                 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
1069         }
1070
1071         if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) {
1072                 printf("Set Arbitration Feature failed and use default configuration\n");
1073         }
1074
1075         return 0;
1076 }
1077
1078 int
1079 main(int argc, char **argv)
1080 {
1081         int rc;
1082         struct worker_thread *worker, *master_worker;
1083         unsigned master_core;
1084         char task_pool_name[30];
1085         uint32_t task_count;
1086         struct spdk_env_opts opts;
1087
1088         rc = parse_args(argc, argv);
1089         if (rc != 0) {
1090                 return rc;
1091         }
1092
1093         spdk_env_opts_init(&opts);
1094         opts.name = "arb";
1095         opts.core_mask = g_arbitration.core_mask;
1096         opts.shm_id = g_arbitration.shm_id;
1097         if (spdk_env_init(&opts) < 0) {
1098                 return 1;
1099         }
1100
1101         g_arbitration.tsc_rate = spdk_get_ticks_hz();
1102
1103         if (register_workers() != 0) {
1104                 return 1;
1105         }
1106
1107         if (register_controllers() != 0) {
1108                 return 1;
1109         }
1110
1111         if (associate_workers_with_ns() != 0) {
1112                 return 1;
1113         }
1114
1115         snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid());
1116
1117         /*
1118          * The task_count will be dynamically calculated based on the
1119          * number of attached active namespaces, queue depth and number
1120          * of cores (workers) involved in the IO perations.
1121          */
1122         task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ?
1123                      g_arbitration.num_namespaces : g_arbitration.num_workers;
1124         task_count *= g_arbitration.queue_depth;
1125
1126         task_pool = spdk_mempool_create(task_pool_name, task_count,
1127                                         sizeof(struct arb_task), 0, SPDK_ENV_SOCKET_ID_ANY);
1128         if (task_pool == NULL) {
1129                 fprintf(stderr, "could not initialize task pool\n");
1130                 return 1;
1131         }
1132
1133         print_configuration(argv[0]);
1134
1135         printf("Initialization complete. Launching workers.\n");
1136
1137         /* Launch all of the slave workers */
1138         master_core = spdk_env_get_current_core();
1139         master_worker = NULL;
1140         worker = g_workers;
1141         while (worker != NULL) {
1142                 if (worker->lcore != master_core) {
1143                         spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker);
1144                 } else {
1145                         assert(master_worker == NULL);
1146                         master_worker = worker;
1147                 }
1148                 worker = worker->next;
1149         }
1150
1151         assert(master_worker != NULL);
1152         rc = work_fn(master_worker);
1153
1154         spdk_env_thread_wait_all();
1155
1156         print_stats();
1157
1158         unregister_controllers();
1159
1160         cleanup(task_count);
1161
1162         if (rc != 0) {
1163                 fprintf(stderr, "%s: errors occured\n", argv[0]);
1164         }
1165
1166         return rc;
1167 }