]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/examples/nvme/arbitration/arbitration.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / examples / nvme / arbitration / arbitration.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <stdio.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <unistd.h>
38
39 #include <rte_config.h>
40 #include <rte_mempool.h>
41 #include <rte_lcore.h>
42
43 #include "spdk/nvme.h"
44 #include "spdk/env.h"
45 #include "spdk/string.h"
46 #include "spdk/nvme_intel.h"
47
48 struct ctrlr_entry {
49 struct spdk_nvme_ctrlr *ctrlr;
50 struct spdk_nvme_intel_rw_latency_page latency_page;
51 struct ctrlr_entry *next;
52 char name[1024];
53 };
54
55 struct ns_entry {
56 struct {
57 struct spdk_nvme_ctrlr *ctrlr;
58 struct spdk_nvme_ns *ns;
59 } nvme;
60
61 struct ns_entry *next;
62 uint32_t io_size_blocks;
63 uint64_t size_in_ios;
64 char name[1024];
65 };
66
67 struct ns_worker_ctx {
68 struct ns_entry *entry;
69 uint64_t io_completed;
70 uint64_t current_queue_depth;
71 uint64_t offset_in_ios;
72 bool is_draining;
73 struct spdk_nvme_qpair *qpair;
74 struct ns_worker_ctx *next;
75 };
76
77 struct arb_task {
78 struct ns_worker_ctx *ns_ctx;
79 void *buf;
80 };
81
82 struct worker_thread {
83 struct ns_worker_ctx *ns_ctx;
84 struct worker_thread *next;
85 unsigned lcore;
86 enum spdk_nvme_qprio qprio;
87 };
88
89 struct arb_context {
90 int shm_id;
91 int outstanding_commands;
92 int num_namespaces;
93 int num_workers;
94 int rw_percentage;
95 int is_random;
96 int queue_depth;
97 int time_in_sec;
98 int io_count;
99 uint8_t latency_tracking_enable;
100 uint8_t arbitration_mechanism;
101 uint8_t arbitration_config;
102 uint32_t io_size_bytes;
103 uint32_t max_completions;
104 uint64_t tsc_rate;
105 const char *core_mask;
106 const char *workload_type;
107 };
108
109 struct feature {
110 uint32_t result;
111 bool valid;
112 };
113
114 static struct rte_mempool *task_pool = NULL;
115
116 static struct ctrlr_entry *g_controllers = NULL;
117 static struct ns_entry *g_namespaces = NULL;
118 static struct worker_thread *g_workers = NULL;
119
120 static struct feature features[256];
121
122 static struct arb_context g_arbitration = {
123 .shm_id = -1,
124 .outstanding_commands = 0,
125 .num_workers = 0,
126 .num_namespaces = 0,
127 .rw_percentage = 50,
128 .queue_depth = 64,
129 .time_in_sec = 60,
130 .io_count = 100000,
131 .latency_tracking_enable = 0,
132 .arbitration_mechanism = SPDK_NVME_CC_AMS_RR,
133 .arbitration_config = 0,
134 .io_size_bytes = 131072,
135 .max_completions = 0,
136 /* Default 4 cores for urgent/high/medium/low */
137 .core_mask = "0xf",
138 .workload_type = "randrw",
139 };
140
141 /*
142 * For weighted round robin arbitration mechanism, the smaller value between
143 * weight and burst will be picked to execute the commands in one queue.
144 */
145 #define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32
146 #define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16
147 #define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8
148 #define USER_SPECIFIED_ARBITRATION_BURST 7 /* No limit */
149
150 /*
151 * Description of dword for priority weight and arbitration burst
152 * ------------------------------------------------------------------------------
153 * 31 : 24 | 23 : 16 | 15 : 08 | 07 : 03 | 02 : 00
154 * ------------------------------------------------------------------------------
155 * High Prio Weight | Medium Prio Weight | Low Prio Weight | Reserved | Arb Burst
156 * ------------------------------------------------------------------------------
157 *
158 * The priority weights are zero based value.
159 */
160 #define SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT 24
161 #define SPDK_NVME_MED_PRIO_WEIGHT_SHIFT 16
162 #define SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT 8
163 #define SPDK_NVME_PRIO_WEIGHT_MASK 0xFF
164 #define SPDK_NVME_ARB_BURST_MASK 0x7
165
166 #define SPDK_NVME_QPRIO_MAX (SPDK_NVME_QPRIO_LOW + 1)
167
168 static void task_complete(struct arb_task *task);
169
170 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
171
172 static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr);
173
174 static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr);
175
176 static const char *print_qprio(enum spdk_nvme_qprio);
177
178
179 static void
180 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
181 {
182 struct ns_entry *entry;
183 const struct spdk_nvme_ctrlr_data *cdata;
184
185 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
186
187 if (!spdk_nvme_ns_is_active(ns)) {
188 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
189 cdata->mn, cdata->sn,
190 spdk_nvme_ns_get_id(ns));
191 return;
192 }
193
194 if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes ||
195 spdk_nvme_ns_get_sector_size(ns) > g_arbitration.io_size_bytes) {
196 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
197 "ns size %" PRIu64 " / block size %u for I/O size %u\n",
198 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns),
199 spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns),
200 g_arbitration.io_size_bytes);
201 return;
202 }
203
204 entry = malloc(sizeof(struct ns_entry));
205 if (entry == NULL) {
206 perror("ns_entry malloc");
207 exit(1);
208 }
209
210 entry->nvme.ctrlr = ctrlr;
211 entry->nvme.ns = ns;
212
213 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes;
214 entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns);
215
216 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
217
218 g_arbitration.num_namespaces++;
219 entry->next = g_namespaces;
220 g_namespaces = entry;
221 }
222
223 static void
224 enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl)
225 {
226 if (spdk_nvme_cpl_is_error(cpl)) {
227 printf("enable_latency_tracking_complete failed\n");
228 }
229 g_arbitration.outstanding_commands--;
230 }
231
232 static void
233 set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable)
234 {
235 int res;
236 union spdk_nvme_intel_feat_latency_tracking latency_tracking;
237
238 if (enable) {
239 latency_tracking.bits.enable = 0x01;
240 } else {
241 latency_tracking.bits.enable = 0x00;
242 }
243
244 res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING,
245 latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL);
246 if (res) {
247 printf("fail to allocate nvme request.\n");
248 return;
249 }
250 g_arbitration.outstanding_commands++;
251
252 while (g_arbitration.outstanding_commands) {
253 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
254 }
255 }
256
257 static void
258 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
259 {
260 int nsid, num_ns;
261 struct spdk_nvme_ns *ns;
262 struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry));
263 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
264
265 if (entry == NULL) {
266 perror("ctrlr_entry malloc");
267 exit(1);
268 }
269
270 snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
271
272 entry->ctrlr = ctrlr;
273 entry->next = g_controllers;
274 g_controllers = entry;
275
276 if ((g_arbitration.latency_tracking_enable != 0) &&
277 spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING))
278 set_latency_tracking_feature(ctrlr, true);
279
280 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
281 for (nsid = 1; nsid <= num_ns; nsid++) {
282 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
283 if (ns == NULL) {
284 continue;
285 }
286 register_ns(ctrlr, ns);
287 }
288
289 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) {
290 get_arb_feature(ctrlr);
291
292 if (g_arbitration.arbitration_config != 0) {
293 set_arb_feature(ctrlr);
294 get_arb_feature(ctrlr);
295 }
296 }
297 }
298
299 static void
300 task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id)
301 {
302 struct arb_task *task = __task;
303 task->buf = spdk_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL);
304 if (task->buf == NULL) {
305 fprintf(stderr, "task->buf spdk_zmalloc failed\n");
306 exit(1);
307 }
308 }
309
310 static __thread unsigned int seed = 0;
311
312 static void
313 submit_single_io(struct ns_worker_ctx *ns_ctx)
314 {
315 struct arb_task *task = NULL;
316 uint64_t offset_in_ios;
317 int rc;
318 struct ns_entry *entry = ns_ctx->entry;
319
320 if (rte_mempool_get(task_pool, (void **)&task) != 0) {
321 fprintf(stderr, "task_pool rte_mempool_get failed\n");
322 exit(1);
323 }
324
325 task->ns_ctx = ns_ctx;
326
327 if (g_arbitration.is_random) {
328 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
329 } else {
330 offset_in_ios = ns_ctx->offset_in_ios++;
331 if (ns_ctx->offset_in_ios == entry->size_in_ios) {
332 ns_ctx->offset_in_ios = 0;
333 }
334 }
335
336 if ((g_arbitration.rw_percentage == 100) ||
337 (g_arbitration.rw_percentage != 0 &&
338 ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) {
339 rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf,
340 offset_in_ios * entry->io_size_blocks,
341 entry->io_size_blocks, io_complete, task, 0);
342 } else {
343 rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf,
344 offset_in_ios * entry->io_size_blocks,
345 entry->io_size_blocks, io_complete, task, 0);
346 }
347
348 if (rc != 0) {
349 fprintf(stderr, "starting I/O failed\n");
350 }
351
352 ns_ctx->current_queue_depth++;
353 }
354
355 static void
356 task_complete(struct arb_task *task)
357 {
358 struct ns_worker_ctx *ns_ctx;
359
360 ns_ctx = task->ns_ctx;
361 ns_ctx->current_queue_depth--;
362 ns_ctx->io_completed++;
363
364 rte_mempool_put(task_pool, task);
365
366 /*
367 * is_draining indicates when time has expired for the test run
368 * and we are just waiting for the previously submitted I/O
369 * to complete. In this case, do not submit a new I/O to replace
370 * the one just completed.
371 */
372 if (!ns_ctx->is_draining) {
373 submit_single_io(ns_ctx);
374 }
375 }
376
377 static void
378 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
379 {
380 task_complete((struct arb_task *)ctx);
381 }
382
383 static void
384 check_io(struct ns_worker_ctx *ns_ctx)
385 {
386 spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions);
387 }
388
389 static void
390 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
391 {
392 while (queue_depth-- > 0) {
393 submit_single_io(ns_ctx);
394 }
395 }
396
397 static void
398 drain_io(struct ns_worker_ctx *ns_ctx)
399 {
400 ns_ctx->is_draining = true;
401 while (ns_ctx->current_queue_depth > 0) {
402 check_io(ns_ctx);
403 }
404 }
405
406 static int
407 init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio)
408 {
409 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx->entry->nvme.ctrlr, qprio);
410 if (!ns_ctx->qpair) {
411 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n");
412 return 1;
413 }
414
415 return 0;
416 }
417
418 static void
419 cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx)
420 {
421 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
422 }
423
424 static void
425 cleanup(void)
426 {
427 struct ns_entry *entry = g_namespaces;
428 struct ns_entry *next_entry = NULL;
429 struct worker_thread *worker = g_workers;
430 struct worker_thread *next_worker = NULL;
431 struct arb_task *task = NULL;
432
433 while (entry) {
434 next_entry = entry->next;
435 free(entry);
436 entry = next_entry;
437 };
438
439 while (worker) {
440 next_worker = worker->next;
441 free(worker->ns_ctx);
442 free(worker);
443 worker = next_worker;
444 };
445
446 if (rte_mempool_get(task_pool, (void **)&task) == 0) {
447 spdk_free(task->buf);
448 }
449
450 }
451
452 static int
453 work_fn(void *arg)
454 {
455 uint64_t tsc_end;
456 struct worker_thread *worker = (struct worker_thread *)arg;
457 struct ns_worker_ctx *ns_ctx = NULL;
458
459 printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio));
460
461 /* Allocate a queue pair for each namespace. */
462 ns_ctx = worker->ns_ctx;
463 while (ns_ctx != NULL) {
464 if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) {
465 printf("ERROR: init_ns_worker_ctx() failed\n");
466 return 1;
467 }
468 ns_ctx = ns_ctx->next;
469 }
470
471 tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate;
472
473 /* Submit initial I/O for each namespace. */
474 ns_ctx = worker->ns_ctx;
475
476 while (ns_ctx != NULL) {
477 submit_io(ns_ctx, g_arbitration.queue_depth);
478 ns_ctx = ns_ctx->next;
479 }
480
481 while (1) {
482 /*
483 * Check for completed I/O for each controller. A new
484 * I/O will be submitted in the io_complete callback
485 * to replace each I/O that is completed.
486 */
487 ns_ctx = worker->ns_ctx;
488 while (ns_ctx != NULL) {
489 check_io(ns_ctx);
490 ns_ctx = ns_ctx->next;
491 }
492
493 if (spdk_get_ticks() > tsc_end) {
494 break;
495 }
496 }
497
498 ns_ctx = worker->ns_ctx;
499 while (ns_ctx != NULL) {
500 drain_io(ns_ctx);
501 cleanup_ns_worker_ctx(ns_ctx);
502 ns_ctx = ns_ctx->next;
503 }
504
505 return 0;
506 }
507
508 static void
509 usage(char *program_name)
510 {
511 printf("%s options", program_name);
512 printf("\n");
513 printf("\t[-q io depth]\n");
514 printf("\t[-s io size in bytes]\n");
515 printf("\t[-w io pattern type, must be one of\n");
516 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
517 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
518 printf("\t[-l enable latency tracking, default: disabled]\n");
519 printf("\t\t(0 - disabled; 1 - enabled)\n");
520 printf("\t[-t time in seconds]\n");
521 printf("\t[-c core mask for I/O submission/completion.]\n");
522 printf("\t\t(default: 0xf - 4 cores)]\n");
523 printf("\t[-m max completions per poll]\n");
524 printf("\t\t(default: 0 - unlimited)\n");
525 printf("\t[-a arbitration mechanism, must be one of below]\n");
526 printf("\t\t(0, 1, 2)]\n");
527 printf("\t\t(0: default round robin mechanism)]\n");
528 printf("\t\t(1: weighted round robin mechanism)]\n");
529 printf("\t\t(2: vendor specific mechanism)]\n");
530 printf("\t[-b enable arbitration user configuration, default: disabled]\n");
531 printf("\t\t(0 - disabled; 1 - enabled)\n");
532 printf("\t[-n subjected IOs for performance comparison]\n");
533 printf("\t[-i shared memory group ID]\n");
534 }
535
536 static const char *
537 print_qprio(enum spdk_nvme_qprio qprio)
538 {
539 switch (qprio) {
540 case SPDK_NVME_QPRIO_URGENT:
541 return "urgent priority queue";
542 case SPDK_NVME_QPRIO_HIGH:
543 return "high priority queue";
544 case SPDK_NVME_QPRIO_MEDIUM:
545 return "medium priority queue";
546 case SPDK_NVME_QPRIO_LOW:
547 return "low priority queue";
548 default:
549 return "invalid priority queue";
550 }
551 }
552
553
554 static void
555 print_configuration(char *program_name)
556 {
557 printf("%s run with configuration:\n", program_name);
558 printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -i %d\n",
559 program_name,
560 g_arbitration.queue_depth,
561 g_arbitration.io_size_bytes,
562 g_arbitration.workload_type,
563 g_arbitration.rw_percentage,
564 g_arbitration.latency_tracking_enable,
565 g_arbitration.time_in_sec,
566 g_arbitration.core_mask,
567 g_arbitration.max_completions,
568 g_arbitration.arbitration_mechanism,
569 g_arbitration.arbitration_config ,
570 g_arbitration.io_count);
571 }
572
573
574 static void
575 print_performance(void)
576 {
577 float io_per_second, sent_all_io_in_secs;
578 struct worker_thread *worker;
579 struct ns_worker_ctx *ns_ctx;
580
581 worker = g_workers;
582 while (worker) {
583 ns_ctx = worker->ns_ctx;
584 while (ns_ctx) {
585 io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec;
586 sent_all_io_in_secs = g_arbitration.io_count / io_per_second;
587 printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n",
588 ns_ctx->entry->name, worker->lcore,
589 io_per_second, sent_all_io_in_secs, g_arbitration.io_count);
590 ns_ctx = ns_ctx->next;
591 }
592 worker = worker->next;
593 }
594 printf("========================================================\n");
595
596 printf("\n");
597 }
598
599 static void
600 print_latency_page(struct ctrlr_entry *entry)
601 {
602 int i;
603
604 printf("\n");
605 printf("%s\n", entry->name);
606 printf("--------------------------------------------------------\n");
607
608 for (i = 0; i < 32; i++) {
609 if (entry->latency_page.buckets_32us[i])
610 printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32,
611 entry->latency_page.buckets_32us[i]);
612 }
613 for (i = 0; i < 31; i++) {
614 if (entry->latency_page.buckets_1ms[i])
615 printf("Bucket %dms - %dms: %d\n", i + 1, i + 2,
616 entry->latency_page.buckets_1ms[i]);
617 }
618 for (i = 0; i < 31; i++) {
619 if (entry->latency_page.buckets_32ms[i])
620 printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32,
621 entry->latency_page.buckets_32ms[i]);
622 }
623 }
624
625 static void
626 print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page)
627 {
628 struct ctrlr_entry *ctrlr;
629
630 printf("%s Latency Statistics:\n", op_name);
631 printf("========================================================\n");
632 ctrlr = g_controllers;
633 while (ctrlr) {
634 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) {
635 if (spdk_nvme_ctrlr_cmd_get_log_page(
636 ctrlr->ctrlr, log_page,
637 SPDK_NVME_GLOBAL_NS_TAG,
638 &ctrlr->latency_page,
639 sizeof(struct spdk_nvme_intel_rw_latency_page),
640 0,
641 enable_latency_tracking_complete,
642 NULL)) {
643 printf("nvme_ctrlr_cmd_get_log_page() failed\n");
644 exit(1);
645 }
646
647 g_arbitration.outstanding_commands++;
648 } else {
649 printf("Controller %s: %s latency statistics not supported\n",
650 ctrlr->name, op_name);
651 }
652 ctrlr = ctrlr->next;
653 }
654
655 while (g_arbitration.outstanding_commands) {
656 ctrlr = g_controllers;
657 while (ctrlr) {
658 spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr);
659 ctrlr = ctrlr->next;
660 }
661 }
662
663 ctrlr = g_controllers;
664 while (ctrlr) {
665 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) {
666 print_latency_page(ctrlr);
667 }
668 ctrlr = ctrlr->next;
669 }
670 printf("\n");
671 }
672
673 static void
674 print_stats(void)
675 {
676 print_performance();
677 if (g_arbitration.latency_tracking_enable) {
678 if (g_arbitration.rw_percentage != 0) {
679 print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY);
680 }
681 if (g_arbitration.rw_percentage != 100) {
682 print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY);
683 }
684 }
685 }
686
687 static int
688 parse_args(int argc, char **argv)
689 {
690 const char *workload_type = NULL;
691 int op = 0;
692 bool mix_specified = false;
693
694 while ((op = getopt(argc, argv, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) {
695 switch (op) {
696 case 'c':
697 g_arbitration.core_mask = optarg;
698 break;
699 case 'i':
700 g_arbitration.shm_id = atoi(optarg);
701 break;
702 case 'l':
703 g_arbitration.latency_tracking_enable = atoi(optarg);
704 break;
705 case 'm':
706 g_arbitration.max_completions = atoi(optarg);
707 break;
708 case 'q':
709 g_arbitration.queue_depth = atoi(optarg);
710 break;
711 case 's':
712 g_arbitration.io_size_bytes = atoi(optarg);
713 break;
714 case 't':
715 g_arbitration.time_in_sec = atoi(optarg);
716 break;
717 case 'w':
718 g_arbitration.workload_type = optarg;
719 break;
720 case 'M':
721 g_arbitration.rw_percentage = atoi(optarg);
722 mix_specified = true;
723 break;
724 case 'a':
725 g_arbitration.arbitration_mechanism = atoi(optarg);
726 break;
727 case 'b':
728 g_arbitration.arbitration_config = atoi(optarg);
729 break;
730 case 'n':
731 g_arbitration.io_count = atoi(optarg);
732 break;
733 case 'h':
734 default:
735 usage(argv[0]);
736 return 1;
737 }
738 }
739
740 workload_type = g_arbitration.workload_type;
741
742 if (strcmp(workload_type, "read") &&
743 strcmp(workload_type, "write") &&
744 strcmp(workload_type, "randread") &&
745 strcmp(workload_type, "randwrite") &&
746 strcmp(workload_type, "rw") &&
747 strcmp(workload_type, "randrw")) {
748 fprintf(stderr,
749 "io pattern type must be one of\n"
750 "(read, write, randread, randwrite, rw, randrw)\n");
751 return 1;
752 }
753
754 if (!strcmp(workload_type, "read") ||
755 !strcmp(workload_type, "randread")) {
756 g_arbitration.rw_percentage = 100;
757 }
758
759 if (!strcmp(workload_type, "write") ||
760 !strcmp(workload_type, "randwrite")) {
761 g_arbitration.rw_percentage = 0;
762 }
763
764 if (!strcmp(workload_type, "read") ||
765 !strcmp(workload_type, "randread") ||
766 !strcmp(workload_type, "write") ||
767 !strcmp(workload_type, "randwrite")) {
768 if (mix_specified) {
769 fprintf(stderr, "Ignoring -M option... Please use -M option"
770 " only when using rw or randrw.\n");
771 }
772 }
773
774 if (!strcmp(workload_type, "rw") ||
775 !strcmp(workload_type, "randrw")) {
776 if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) {
777 fprintf(stderr,
778 "-M must be specified to value from 0 to 100 "
779 "for rw or randrw.\n");
780 return 1;
781 }
782 }
783
784 if (!strcmp(workload_type, "read") ||
785 !strcmp(workload_type, "write") ||
786 !strcmp(workload_type, "rw")) {
787 g_arbitration.is_random = 0;
788 } else {
789 g_arbitration.is_random = 1;
790 }
791
792 if (g_arbitration.latency_tracking_enable != 0 &&
793 g_arbitration.latency_tracking_enable != 1) {
794 fprintf(stderr,
795 "-l must be specified to value 0 or 1.\n");
796 return 1;
797 }
798
799 switch (g_arbitration.arbitration_mechanism) {
800 case SPDK_NVME_CC_AMS_RR:
801 case SPDK_NVME_CC_AMS_WRR:
802 case SPDK_NVME_CC_AMS_VS:
803 break;
804 default:
805 fprintf(stderr,
806 "-a must be specified to value 0, 1, or 7.\n");
807 return 1;
808 }
809
810 if (g_arbitration.arbitration_config != 0 &&
811 g_arbitration.arbitration_config != 1) {
812 fprintf(stderr,
813 "-b must be specified to value 0 or 1.\n");
814 return 1;
815 } else if (g_arbitration.arbitration_config == 1 &&
816 g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) {
817 fprintf(stderr,
818 "-a must be specified to 1 (WRR) together.\n");
819 return 1;
820 }
821
822 return 0;
823 }
824
825 static int
826 register_workers(void)
827 {
828 uint32_t i;
829 struct worker_thread *worker;
830 enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT;
831
832 g_workers = NULL;
833 g_arbitration.num_workers = 0;
834
835 SPDK_ENV_FOREACH_CORE(i) {
836 worker = calloc(1, sizeof(*worker));
837 if (worker == NULL) {
838 fprintf(stderr, "Unable to allocate worker\n");
839 return -1;
840 }
841
842 worker->lcore = i;
843 worker->next = g_workers;
844 g_workers = worker;
845 g_arbitration.num_workers++;
846
847 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) {
848 qprio++;
849 }
850
851 worker->qprio = qprio % SPDK_NVME_QPRIO_MAX;
852 }
853
854 return 0;
855 }
856
857 static bool
858 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
859 struct spdk_nvme_ctrlr_opts *opts)
860 {
861 /* Update with user specified arbitration configuration */
862 opts->arb_mechanism = g_arbitration.arbitration_mechanism;
863
864 printf("Attaching to %s\n", trid->traddr);
865
866 return true;
867 }
868
869 static void
870 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
871 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
872 {
873 printf("Attached to %s\n", trid->traddr);
874
875 /* Update with actual arbitration configuration in use */
876 g_arbitration.arbitration_mechanism = opts->arb_mechanism;
877
878 register_ctrlr(ctrlr);
879 }
880
881 static int
882 register_controllers(void)
883 {
884 printf("Initializing NVMe Controllers\n");
885
886 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
887 fprintf(stderr, "spdk_nvme_probe() failed\n");
888 return 1;
889 }
890
891 if (g_arbitration.num_namespaces == 0) {
892 fprintf(stderr, "No valid namespaces to continue IO testing\n");
893 return 1;
894 }
895
896 return 0;
897 }
898
899 static void
900 unregister_controllers(void)
901 {
902 struct ctrlr_entry *entry = g_controllers;
903
904 while (entry) {
905 struct ctrlr_entry *next = entry->next;
906 if (g_arbitration.latency_tracking_enable &&
907 spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING))
908 set_latency_tracking_feature(entry->ctrlr, false);
909 spdk_nvme_detach(entry->ctrlr);
910 free(entry);
911 entry = next;
912 }
913 }
914
915 static int
916 associate_workers_with_ns(void)
917 {
918 struct ns_entry *entry = g_namespaces;
919 struct worker_thread *worker = g_workers;
920 struct ns_worker_ctx *ns_ctx;
921 int i, count;
922
923 count = g_arbitration.num_namespaces > g_arbitration.num_workers ?
924 g_arbitration.num_namespaces : g_arbitration.num_workers;
925
926 for (i = 0; i < count; i++) {
927 if (entry == NULL) {
928 break;
929 }
930
931 ns_ctx = malloc(sizeof(struct ns_worker_ctx));
932 if (!ns_ctx) {
933 return 1;
934 }
935 memset(ns_ctx, 0, sizeof(*ns_ctx));
936
937 printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
938 ns_ctx->entry = entry;
939 ns_ctx->next = worker->ns_ctx;
940 worker->ns_ctx = ns_ctx;
941
942 worker = worker->next;
943 if (worker == NULL) {
944 worker = g_workers;
945 }
946
947 entry = entry->next;
948 if (entry == NULL) {
949 entry = g_namespaces;
950 }
951
952 }
953
954 return 0;
955 }
956
957 static void
958 get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
959 {
960 struct feature *feature = cb_arg;
961 int fid = feature - features;
962
963 if (spdk_nvme_cpl_is_error(cpl)) {
964 printf("get_feature(0x%02X) failed\n", fid);
965 } else {
966 feature->result = cpl->cdw0;
967 feature->valid = true;
968 }
969
970 g_arbitration.outstanding_commands--;
971 }
972
973 static int
974 get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid)
975 {
976 struct spdk_nvme_cmd cmd = {};
977
978 cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
979 cmd.cdw10 = fid;
980
981 return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]);
982 }
983
984 static void
985 get_arb_feature(struct spdk_nvme_ctrlr *ctrlr)
986 {
987 get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION);
988
989 g_arbitration.outstanding_commands++;
990
991 while (g_arbitration.outstanding_commands) {
992 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
993 }
994
995 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) {
996 uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result;
997 unsigned ab, lpw, mpw, hpw;
998
999 ab = arb & SPDK_NVME_ARB_BURST_MASK;
1000 lpw = ((arb >> SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1001 mpw = ((arb >> SPDK_NVME_MED_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1002 hpw = ((arb >> SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1003
1004 printf("Current Arbitration Configuration\n");
1005 printf("===========\n");
1006 printf("Arbitration Burst: ");
1007 if (ab == SPDK_NVME_ARB_BURST_MASK) {
1008 printf("no limit\n");
1009 } else {
1010 printf("%u\n", 1u << ab);
1011 }
1012
1013 printf("Low Priority Weight: %u\n", lpw);
1014 printf("Medium Priority Weight: %u\n", mpw);
1015 printf("High Priority Weight: %u\n", hpw);
1016 printf("\n");
1017 }
1018 }
1019
1020 static void
1021 set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
1022 {
1023 struct feature *feature = cb_arg;
1024 int fid = feature - features;
1025
1026 if (spdk_nvme_cpl_is_error(cpl)) {
1027 printf("set_feature(0x%02X) failed\n", fid);
1028 feature->valid = false;
1029 } else {
1030 printf("Set Arbitration Feature Successfully\n");
1031 }
1032
1033 g_arbitration.outstanding_commands--;
1034 }
1035
1036 static int
1037 set_arb_feature(struct spdk_nvme_ctrlr *ctrlr)
1038 {
1039 int ret;
1040 struct spdk_nvme_cmd cmd = {};
1041 uint32_t arb = 0;
1042 unsigned ab, lpw, mpw, hpw;
1043
1044 cmd.opc = SPDK_NVME_OPC_SET_FEATURES;
1045 cmd.cdw10 = SPDK_NVME_FEAT_ARBITRATION;
1046
1047 g_arbitration.outstanding_commands = 0;
1048
1049 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) {
1050 ab = USER_SPECIFIED_ARBITRATION_BURST & SPDK_NVME_ARB_BURST_MASK;
1051 hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT << SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT;
1052 mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT << SPDK_NVME_MED_PRIO_WEIGHT_SHIFT;
1053 lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT << SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT;
1054 arb = hpw | mpw | lpw | ab;
1055 cmd.cdw11 = arb;
1056 }
1057
1058 ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0,
1059 set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]);
1060 if (ret) {
1061 printf("Set Arbitration Feature: Failed 0x%x\n", ret);
1062 return 1;
1063 }
1064
1065 g_arbitration.outstanding_commands++;
1066
1067 while (g_arbitration.outstanding_commands) {
1068 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
1069 }
1070
1071 if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) {
1072 printf("Set Arbitration Feature failed and use default configuration\n");
1073 }
1074
1075 return 0;
1076 }
1077
1078 int
1079 main(int argc, char **argv)
1080 {
1081 int rc;
1082 struct worker_thread *worker, *master_worker;
1083 unsigned master_core;
1084 char task_pool_name[30];
1085 uint32_t task_count;
1086 struct spdk_env_opts opts;
1087
1088 rc = parse_args(argc, argv);
1089 if (rc != 0) {
1090 return rc;
1091 }
1092
1093 spdk_env_opts_init(&opts);
1094 opts.name = "arb";
1095 opts.core_mask = g_arbitration.core_mask;
1096 opts.shm_id = g_arbitration.shm_id;
1097 spdk_env_init(&opts);
1098
1099 g_arbitration.tsc_rate = spdk_get_ticks_hz();
1100
1101 if (register_workers() != 0) {
1102 return 1;
1103 }
1104
1105 if (register_controllers() != 0) {
1106 return 1;
1107 }
1108
1109 if (associate_workers_with_ns() != 0) {
1110 return 1;
1111 }
1112
1113 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid());
1114
1115 /*
1116 * The task_count will be dynamically calculated based on the
1117 * number of attached active namespaces, queue depth and number
1118 * of cores (workers) involved in the IO perations.
1119 */
1120 task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ?
1121 g_arbitration.num_namespaces : g_arbitration.num_workers;
1122 task_count *= g_arbitration.queue_depth;
1123
1124 task_pool = rte_mempool_create(task_pool_name, task_count,
1125 sizeof(struct arb_task),
1126 0, 0, NULL, NULL, task_ctor, NULL,
1127 SOCKET_ID_ANY, 0);
1128 if (task_pool == NULL) {
1129 fprintf(stderr, "could not initialize task pool\n");
1130 return 1;
1131 }
1132
1133 print_configuration(argv[0]);
1134
1135 printf("Initialization complete. Launching workers.\n");
1136
1137 /* Launch all of the slave workers */
1138 master_core = rte_get_master_lcore();
1139 master_worker = NULL;
1140 worker = g_workers;
1141 while (worker != NULL) {
1142 if (worker->lcore != master_core) {
1143 rte_eal_remote_launch(work_fn, worker, worker->lcore);
1144 } else {
1145 assert(master_worker == NULL);
1146 master_worker = worker;
1147 }
1148 worker = worker->next;
1149 }
1150
1151 assert(master_worker != NULL);
1152 rc = work_fn(master_worker);
1153
1154 rte_eal_mp_wait_lcore();
1155
1156 print_stats();
1157
1158 unregister_controllers();
1159
1160 cleanup();
1161
1162 if (rc != 0) {
1163 fprintf(stderr, "%s: errors occured\n", argv[0]);
1164 }
1165
1166 return rc;
1167 }