]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/examples/nvme/arbitration/arbitration.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / examples / nvme / arbitration / arbitration.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "spdk/stdinc.h"
35
36 #include "spdk/nvme.h"
37 #include "spdk/env.h"
38 #include "spdk/string.h"
39 #include "spdk/nvme_intel.h"
40
41 struct ctrlr_entry {
42 struct spdk_nvme_ctrlr *ctrlr;
43 struct spdk_nvme_intel_rw_latency_page latency_page;
44 struct ctrlr_entry *next;
45 char name[1024];
46 };
47
48 struct ns_entry {
49 struct {
50 struct spdk_nvme_ctrlr *ctrlr;
51 struct spdk_nvme_ns *ns;
52 } nvme;
53
54 struct ns_entry *next;
55 uint32_t io_size_blocks;
56 uint64_t size_in_ios;
57 char name[1024];
58 };
59
60 struct ns_worker_ctx {
61 struct ns_entry *entry;
62 uint64_t io_completed;
63 uint64_t current_queue_depth;
64 uint64_t offset_in_ios;
65 bool is_draining;
66 struct spdk_nvme_qpair *qpair;
67 struct ns_worker_ctx *next;
68 };
69
70 struct arb_task {
71 struct ns_worker_ctx *ns_ctx;
72 void *buf;
73 };
74
75 struct worker_thread {
76 struct ns_worker_ctx *ns_ctx;
77 struct worker_thread *next;
78 unsigned lcore;
79 enum spdk_nvme_qprio qprio;
80 };
81
82 struct arb_context {
83 int shm_id;
84 int outstanding_commands;
85 int num_namespaces;
86 int num_workers;
87 int rw_percentage;
88 int is_random;
89 int queue_depth;
90 int time_in_sec;
91 int io_count;
92 uint8_t latency_tracking_enable;
93 uint8_t arbitration_mechanism;
94 uint8_t arbitration_config;
95 uint32_t io_size_bytes;
96 uint32_t max_completions;
97 uint64_t tsc_rate;
98 const char *core_mask;
99 const char *workload_type;
100 };
101
102 struct feature {
103 uint32_t result;
104 bool valid;
105 };
106
107 static struct spdk_mempool *task_pool = NULL;
108
109 static struct ctrlr_entry *g_controllers = NULL;
110 static struct ns_entry *g_namespaces = NULL;
111 static struct worker_thread *g_workers = NULL;
112
113 static struct feature features[256];
114
115 static struct arb_context g_arbitration = {
116 .shm_id = -1,
117 .outstanding_commands = 0,
118 .num_workers = 0,
119 .num_namespaces = 0,
120 .rw_percentage = 50,
121 .queue_depth = 64,
122 .time_in_sec = 60,
123 .io_count = 100000,
124 .latency_tracking_enable = 0,
125 .arbitration_mechanism = SPDK_NVME_CC_AMS_RR,
126 .arbitration_config = 0,
127 .io_size_bytes = 131072,
128 .max_completions = 0,
129 /* Default 4 cores for urgent/high/medium/low */
130 .core_mask = "0xf",
131 .workload_type = "randrw",
132 };
133
134 /*
135 * For weighted round robin arbitration mechanism, the smaller value between
136 * weight and burst will be picked to execute the commands in one queue.
137 */
138 #define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32
139 #define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16
140 #define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8
141 #define USER_SPECIFIED_ARBITRATION_BURST 7 /* No limit */
142
143 /*
144 * Description of dword for priority weight and arbitration burst
145 * ------------------------------------------------------------------------------
146 * 31 : 24 | 23 : 16 | 15 : 08 | 07 : 03 | 02 : 00
147 * ------------------------------------------------------------------------------
148 * High Prio Weight | Medium Prio Weight | Low Prio Weight | Reserved | Arb Burst
149 * ------------------------------------------------------------------------------
150 *
151 * The priority weights are zero based value.
152 */
153 #define SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT 24
154 #define SPDK_NVME_MED_PRIO_WEIGHT_SHIFT 16
155 #define SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT 8
156 #define SPDK_NVME_PRIO_WEIGHT_MASK 0xFF
157 #define SPDK_NVME_ARB_BURST_MASK 0x7
158
159 #define SPDK_NVME_QPRIO_MAX (SPDK_NVME_QPRIO_LOW + 1)
160
161 static void task_complete(struct arb_task *task);
162
163 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
164
165 static void get_arb_feature(struct spdk_nvme_ctrlr *ctrlr);
166
167 static int set_arb_feature(struct spdk_nvme_ctrlr *ctrlr);
168
169 static const char *print_qprio(enum spdk_nvme_qprio);
170
171
172 static void
173 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
174 {
175 struct ns_entry *entry;
176 const struct spdk_nvme_ctrlr_data *cdata;
177
178 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
179
180 if (!spdk_nvme_ns_is_active(ns)) {
181 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
182 cdata->mn, cdata->sn,
183 spdk_nvme_ns_get_id(ns));
184 return;
185 }
186
187 if (spdk_nvme_ns_get_size(ns) < g_arbitration.io_size_bytes ||
188 spdk_nvme_ns_get_sector_size(ns) > g_arbitration.io_size_bytes) {
189 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
190 "ns size %" PRIu64 " / block size %u for I/O size %u\n",
191 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns),
192 spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns),
193 g_arbitration.io_size_bytes);
194 return;
195 }
196
197 entry = malloc(sizeof(struct ns_entry));
198 if (entry == NULL) {
199 perror("ns_entry malloc");
200 exit(1);
201 }
202
203 entry->nvme.ctrlr = ctrlr;
204 entry->nvme.ns = ns;
205
206 entry->size_in_ios = spdk_nvme_ns_get_size(ns) / g_arbitration.io_size_bytes;
207 entry->io_size_blocks = g_arbitration.io_size_bytes / spdk_nvme_ns_get_sector_size(ns);
208
209 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
210
211 g_arbitration.num_namespaces++;
212 entry->next = g_namespaces;
213 g_namespaces = entry;
214 }
215
216 static void
217 enable_latency_tracking_complete(void *cb_arg, const struct spdk_nvme_cpl *cpl)
218 {
219 if (spdk_nvme_cpl_is_error(cpl)) {
220 printf("enable_latency_tracking_complete failed\n");
221 }
222 g_arbitration.outstanding_commands--;
223 }
224
225 static void
226 set_latency_tracking_feature(struct spdk_nvme_ctrlr *ctrlr, bool enable)
227 {
228 int res;
229 union spdk_nvme_intel_feat_latency_tracking latency_tracking;
230
231 if (enable) {
232 latency_tracking.bits.enable = 0x01;
233 } else {
234 latency_tracking.bits.enable = 0x00;
235 }
236
237 res = spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING,
238 latency_tracking.raw, 0, NULL, 0, enable_latency_tracking_complete, NULL);
239 if (res) {
240 printf("fail to allocate nvme request.\n");
241 return;
242 }
243 g_arbitration.outstanding_commands++;
244
245 while (g_arbitration.outstanding_commands) {
246 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
247 }
248 }
249
250 static void
251 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
252 {
253 int nsid, num_ns;
254 struct spdk_nvme_ns *ns;
255 struct ctrlr_entry *entry = calloc(1, sizeof(struct ctrlr_entry));
256 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
257
258 if (entry == NULL) {
259 perror("ctrlr_entry malloc");
260 exit(1);
261 }
262
263 snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
264
265 entry->ctrlr = ctrlr;
266 entry->next = g_controllers;
267 g_controllers = entry;
268
269 if ((g_arbitration.latency_tracking_enable != 0) &&
270 spdk_nvme_ctrlr_is_feature_supported(ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) {
271 set_latency_tracking_feature(ctrlr, true);
272 }
273
274 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
275 for (nsid = 1; nsid <= num_ns; nsid++) {
276 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
277 if (ns == NULL) {
278 continue;
279 }
280 register_ns(ctrlr, ns);
281 }
282
283 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) {
284 get_arb_feature(ctrlr);
285
286 if (g_arbitration.arbitration_config != 0) {
287 set_arb_feature(ctrlr);
288 get_arb_feature(ctrlr);
289 }
290 }
291 }
292
293 static __thread unsigned int seed = 0;
294
295 static void
296 submit_single_io(struct ns_worker_ctx *ns_ctx)
297 {
298 struct arb_task *task = NULL;
299 uint64_t offset_in_ios;
300 int rc;
301 struct ns_entry *entry = ns_ctx->entry;
302
303 task = spdk_mempool_get(task_pool);
304 if (!task) {
305 fprintf(stderr, "Failed to get task from task_pool\n");
306 exit(1);
307 }
308
309 task->buf = spdk_dma_zmalloc(g_arbitration.io_size_bytes, 0x200, NULL);
310 if (!task->buf) {
311 spdk_mempool_put(task_pool, task);
312 fprintf(stderr, "task->buf spdk_dma_zmalloc failed\n");
313 exit(1);
314 }
315
316 task->ns_ctx = ns_ctx;
317
318 if (g_arbitration.is_random) {
319 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
320 } else {
321 offset_in_ios = ns_ctx->offset_in_ios++;
322 if (ns_ctx->offset_in_ios == entry->size_in_ios) {
323 ns_ctx->offset_in_ios = 0;
324 }
325 }
326
327 if ((g_arbitration.rw_percentage == 100) ||
328 (g_arbitration.rw_percentage != 0 &&
329 ((rand_r(&seed) % 100) < g_arbitration.rw_percentage))) {
330 rc = spdk_nvme_ns_cmd_read(entry->nvme.ns, ns_ctx->qpair, task->buf,
331 offset_in_ios * entry->io_size_blocks,
332 entry->io_size_blocks, io_complete, task, 0);
333 } else {
334 rc = spdk_nvme_ns_cmd_write(entry->nvme.ns, ns_ctx->qpair, task->buf,
335 offset_in_ios * entry->io_size_blocks,
336 entry->io_size_blocks, io_complete, task, 0);
337 }
338
339 if (rc != 0) {
340 fprintf(stderr, "starting I/O failed\n");
341 }
342
343 ns_ctx->current_queue_depth++;
344 }
345
346 static void
347 task_complete(struct arb_task *task)
348 {
349 struct ns_worker_ctx *ns_ctx;
350
351 ns_ctx = task->ns_ctx;
352 ns_ctx->current_queue_depth--;
353 ns_ctx->io_completed++;
354
355 spdk_dma_free(task->buf);
356 spdk_mempool_put(task_pool, task);
357
358 /*
359 * is_draining indicates when time has expired for the test run
360 * and we are just waiting for the previously submitted I/O
361 * to complete. In this case, do not submit a new I/O to replace
362 * the one just completed.
363 */
364 if (!ns_ctx->is_draining) {
365 submit_single_io(ns_ctx);
366 }
367 }
368
369 static void
370 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
371 {
372 task_complete((struct arb_task *)ctx);
373 }
374
375 static void
376 check_io(struct ns_worker_ctx *ns_ctx)
377 {
378 spdk_nvme_qpair_process_completions(ns_ctx->qpair, g_arbitration.max_completions);
379 }
380
381 static void
382 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
383 {
384 while (queue_depth-- > 0) {
385 submit_single_io(ns_ctx);
386 }
387 }
388
389 static void
390 drain_io(struct ns_worker_ctx *ns_ctx)
391 {
392 ns_ctx->is_draining = true;
393 while (ns_ctx->current_queue_depth > 0) {
394 check_io(ns_ctx);
395 }
396 }
397
398 static int
399 init_ns_worker_ctx(struct ns_worker_ctx *ns_ctx, enum spdk_nvme_qprio qprio)
400 {
401 struct spdk_nvme_ctrlr *ctrlr = ns_ctx->entry->nvme.ctrlr;
402 struct spdk_nvme_io_qpair_opts opts;
403
404 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
405 opts.qprio = qprio;
406
407 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts));
408 if (!ns_ctx->qpair) {
409 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n");
410 return 1;
411 }
412
413 return 0;
414 }
415
416 static void
417 cleanup_ns_worker_ctx(struct ns_worker_ctx *ns_ctx)
418 {
419 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
420 }
421
422 static void
423 cleanup(uint32_t task_count)
424 {
425 struct ns_entry *entry = g_namespaces;
426 struct ns_entry *next_entry = NULL;
427 struct worker_thread *worker = g_workers;
428 struct worker_thread *next_worker = NULL;
429
430 while (entry) {
431 next_entry = entry->next;
432 free(entry);
433 entry = next_entry;
434 };
435
436 while (worker) {
437 next_worker = worker->next;
438 free(worker->ns_ctx);
439 free(worker);
440 worker = next_worker;
441 };
442
443 if (spdk_mempool_count(task_pool) != (size_t)task_count) {
444 fprintf(stderr, "task_pool count is %zu but should be %u\n",
445 spdk_mempool_count(task_pool), task_count);
446 }
447 spdk_mempool_free(task_pool);
448 }
449
450 static int
451 work_fn(void *arg)
452 {
453 uint64_t tsc_end;
454 struct worker_thread *worker = (struct worker_thread *)arg;
455 struct ns_worker_ctx *ns_ctx = NULL;
456
457 printf("Starting thread on core %u with %s\n", worker->lcore, print_qprio(worker->qprio));
458
459 /* Allocate a queue pair for each namespace. */
460 ns_ctx = worker->ns_ctx;
461 while (ns_ctx != NULL) {
462 if (init_ns_worker_ctx(ns_ctx, worker->qprio) != 0) {
463 printf("ERROR: init_ns_worker_ctx() failed\n");
464 return 1;
465 }
466 ns_ctx = ns_ctx->next;
467 }
468
469 tsc_end = spdk_get_ticks() + g_arbitration.time_in_sec * g_arbitration.tsc_rate;
470
471 /* Submit initial I/O for each namespace. */
472 ns_ctx = worker->ns_ctx;
473
474 while (ns_ctx != NULL) {
475 submit_io(ns_ctx, g_arbitration.queue_depth);
476 ns_ctx = ns_ctx->next;
477 }
478
479 while (1) {
480 /*
481 * Check for completed I/O for each controller. A new
482 * I/O will be submitted in the io_complete callback
483 * to replace each I/O that is completed.
484 */
485 ns_ctx = worker->ns_ctx;
486 while (ns_ctx != NULL) {
487 check_io(ns_ctx);
488 ns_ctx = ns_ctx->next;
489 }
490
491 if (spdk_get_ticks() > tsc_end) {
492 break;
493 }
494 }
495
496 ns_ctx = worker->ns_ctx;
497 while (ns_ctx != NULL) {
498 drain_io(ns_ctx);
499 cleanup_ns_worker_ctx(ns_ctx);
500 ns_ctx = ns_ctx->next;
501 }
502
503 return 0;
504 }
505
506 static void
507 usage(char *program_name)
508 {
509 printf("%s options", program_name);
510 printf("\n");
511 printf("\t[-q io depth]\n");
512 printf("\t[-s io size in bytes]\n");
513 printf("\t[-w io pattern type, must be one of\n");
514 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
515 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
516 printf("\t[-l enable latency tracking, default: disabled]\n");
517 printf("\t\t(0 - disabled; 1 - enabled)\n");
518 printf("\t[-t time in seconds]\n");
519 printf("\t[-c core mask for I/O submission/completion.]\n");
520 printf("\t\t(default: 0xf - 4 cores)]\n");
521 printf("\t[-m max completions per poll]\n");
522 printf("\t\t(default: 0 - unlimited)\n");
523 printf("\t[-a arbitration mechanism, must be one of below]\n");
524 printf("\t\t(0, 1, 2)]\n");
525 printf("\t\t(0: default round robin mechanism)]\n");
526 printf("\t\t(1: weighted round robin mechanism)]\n");
527 printf("\t\t(2: vendor specific mechanism)]\n");
528 printf("\t[-b enable arbitration user configuration, default: disabled]\n");
529 printf("\t\t(0 - disabled; 1 - enabled)\n");
530 printf("\t[-n subjected IOs for performance comparison]\n");
531 printf("\t[-i shared memory group ID]\n");
532 }
533
534 static const char *
535 print_qprio(enum spdk_nvme_qprio qprio)
536 {
537 switch (qprio) {
538 case SPDK_NVME_QPRIO_URGENT:
539 return "urgent priority queue";
540 case SPDK_NVME_QPRIO_HIGH:
541 return "high priority queue";
542 case SPDK_NVME_QPRIO_MEDIUM:
543 return "medium priority queue";
544 case SPDK_NVME_QPRIO_LOW:
545 return "low priority queue";
546 default:
547 return "invalid priority queue";
548 }
549 }
550
551
552 static void
553 print_configuration(char *program_name)
554 {
555 printf("%s run with configuration:\n", program_name);
556 printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -n %d -i %d\n",
557 program_name,
558 g_arbitration.queue_depth,
559 g_arbitration.io_size_bytes,
560 g_arbitration.workload_type,
561 g_arbitration.rw_percentage,
562 g_arbitration.latency_tracking_enable,
563 g_arbitration.time_in_sec,
564 g_arbitration.core_mask,
565 g_arbitration.max_completions,
566 g_arbitration.arbitration_mechanism,
567 g_arbitration.arbitration_config,
568 g_arbitration.io_count,
569 g_arbitration.shm_id);
570 }
571
572
573 static void
574 print_performance(void)
575 {
576 float io_per_second, sent_all_io_in_secs;
577 struct worker_thread *worker;
578 struct ns_worker_ctx *ns_ctx;
579
580 worker = g_workers;
581 while (worker) {
582 ns_ctx = worker->ns_ctx;
583 while (ns_ctx) {
584 io_per_second = (float)ns_ctx->io_completed / g_arbitration.time_in_sec;
585 sent_all_io_in_secs = g_arbitration.io_count / io_per_second;
586 printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n",
587 ns_ctx->entry->name, worker->lcore,
588 io_per_second, sent_all_io_in_secs, g_arbitration.io_count);
589 ns_ctx = ns_ctx->next;
590 }
591 worker = worker->next;
592 }
593 printf("========================================================\n");
594
595 printf("\n");
596 }
597
598 static void
599 print_latency_page(struct ctrlr_entry *entry)
600 {
601 int i;
602
603 printf("\n");
604 printf("%s\n", entry->name);
605 printf("--------------------------------------------------------\n");
606
607 for (i = 0; i < 32; i++) {
608 if (entry->latency_page.buckets_32us[i])
609 printf("Bucket %dus - %dus: %d\n", i * 32, (i + 1) * 32,
610 entry->latency_page.buckets_32us[i]);
611 }
612 for (i = 0; i < 31; i++) {
613 if (entry->latency_page.buckets_1ms[i])
614 printf("Bucket %dms - %dms: %d\n", i + 1, i + 2,
615 entry->latency_page.buckets_1ms[i]);
616 }
617 for (i = 0; i < 31; i++) {
618 if (entry->latency_page.buckets_32ms[i])
619 printf("Bucket %dms - %dms: %d\n", (i + 1) * 32, (i + 2) * 32,
620 entry->latency_page.buckets_32ms[i]);
621 }
622 }
623
624 static void
625 print_latency_statistics(const char *op_name, enum spdk_nvme_intel_log_page log_page)
626 {
627 struct ctrlr_entry *ctrlr;
628
629 printf("%s Latency Statistics:\n", op_name);
630 printf("========================================================\n");
631 ctrlr = g_controllers;
632 while (ctrlr) {
633 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) {
634 if (spdk_nvme_ctrlr_cmd_get_log_page(
635 ctrlr->ctrlr, log_page,
636 SPDK_NVME_GLOBAL_NS_TAG,
637 &ctrlr->latency_page,
638 sizeof(struct spdk_nvme_intel_rw_latency_page),
639 0,
640 enable_latency_tracking_complete,
641 NULL)) {
642 printf("nvme_ctrlr_cmd_get_log_page() failed\n");
643 exit(1);
644 }
645
646 g_arbitration.outstanding_commands++;
647 } else {
648 printf("Controller %s: %s latency statistics not supported\n",
649 ctrlr->name, op_name);
650 }
651 ctrlr = ctrlr->next;
652 }
653
654 while (g_arbitration.outstanding_commands) {
655 ctrlr = g_controllers;
656 while (ctrlr) {
657 spdk_nvme_ctrlr_process_admin_completions(ctrlr->ctrlr);
658 ctrlr = ctrlr->next;
659 }
660 }
661
662 ctrlr = g_controllers;
663 while (ctrlr) {
664 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr->ctrlr, log_page)) {
665 print_latency_page(ctrlr);
666 }
667 ctrlr = ctrlr->next;
668 }
669 printf("\n");
670 }
671
672 static void
673 print_stats(void)
674 {
675 print_performance();
676 if (g_arbitration.latency_tracking_enable) {
677 if (g_arbitration.rw_percentage != 0) {
678 print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY);
679 }
680 if (g_arbitration.rw_percentage != 100) {
681 print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY);
682 }
683 }
684 }
685
686 static int
687 parse_args(int argc, char **argv)
688 {
689 const char *workload_type = NULL;
690 int op = 0;
691 bool mix_specified = false;
692
693 while ((op = getopt(argc, argv, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) {
694 switch (op) {
695 case 'c':
696 g_arbitration.core_mask = optarg;
697 break;
698 case 'i':
699 g_arbitration.shm_id = atoi(optarg);
700 break;
701 case 'l':
702 g_arbitration.latency_tracking_enable = atoi(optarg);
703 break;
704 case 'm':
705 g_arbitration.max_completions = atoi(optarg);
706 break;
707 case 'q':
708 g_arbitration.queue_depth = atoi(optarg);
709 break;
710 case 's':
711 g_arbitration.io_size_bytes = atoi(optarg);
712 break;
713 case 't':
714 g_arbitration.time_in_sec = atoi(optarg);
715 break;
716 case 'w':
717 g_arbitration.workload_type = optarg;
718 break;
719 case 'M':
720 g_arbitration.rw_percentage = atoi(optarg);
721 mix_specified = true;
722 break;
723 case 'a':
724 g_arbitration.arbitration_mechanism = atoi(optarg);
725 break;
726 case 'b':
727 g_arbitration.arbitration_config = atoi(optarg);
728 break;
729 case 'n':
730 g_arbitration.io_count = atoi(optarg);
731 break;
732 case 'h':
733 default:
734 usage(argv[0]);
735 return 1;
736 }
737 }
738
739 workload_type = g_arbitration.workload_type;
740
741 if (strcmp(workload_type, "read") &&
742 strcmp(workload_type, "write") &&
743 strcmp(workload_type, "randread") &&
744 strcmp(workload_type, "randwrite") &&
745 strcmp(workload_type, "rw") &&
746 strcmp(workload_type, "randrw")) {
747 fprintf(stderr,
748 "io pattern type must be one of\n"
749 "(read, write, randread, randwrite, rw, randrw)\n");
750 return 1;
751 }
752
753 if (!strcmp(workload_type, "read") ||
754 !strcmp(workload_type, "randread")) {
755 g_arbitration.rw_percentage = 100;
756 }
757
758 if (!strcmp(workload_type, "write") ||
759 !strcmp(workload_type, "randwrite")) {
760 g_arbitration.rw_percentage = 0;
761 }
762
763 if (!strcmp(workload_type, "read") ||
764 !strcmp(workload_type, "randread") ||
765 !strcmp(workload_type, "write") ||
766 !strcmp(workload_type, "randwrite")) {
767 if (mix_specified) {
768 fprintf(stderr, "Ignoring -M option... Please use -M option"
769 " only when using rw or randrw.\n");
770 }
771 }
772
773 if (!strcmp(workload_type, "rw") ||
774 !strcmp(workload_type, "randrw")) {
775 if (g_arbitration.rw_percentage < 0 || g_arbitration.rw_percentage > 100) {
776 fprintf(stderr,
777 "-M must be specified to value from 0 to 100 "
778 "for rw or randrw.\n");
779 return 1;
780 }
781 }
782
783 if (!strcmp(workload_type, "read") ||
784 !strcmp(workload_type, "write") ||
785 !strcmp(workload_type, "rw")) {
786 g_arbitration.is_random = 0;
787 } else {
788 g_arbitration.is_random = 1;
789 }
790
791 if (g_arbitration.latency_tracking_enable != 0 &&
792 g_arbitration.latency_tracking_enable != 1) {
793 fprintf(stderr,
794 "-l must be specified to value 0 or 1.\n");
795 return 1;
796 }
797
798 switch (g_arbitration.arbitration_mechanism) {
799 case SPDK_NVME_CC_AMS_RR:
800 case SPDK_NVME_CC_AMS_WRR:
801 case SPDK_NVME_CC_AMS_VS:
802 break;
803 default:
804 fprintf(stderr,
805 "-a must be specified to value 0, 1, or 7.\n");
806 return 1;
807 }
808
809 if (g_arbitration.arbitration_config != 0 &&
810 g_arbitration.arbitration_config != 1) {
811 fprintf(stderr,
812 "-b must be specified to value 0 or 1.\n");
813 return 1;
814 } else if (g_arbitration.arbitration_config == 1 &&
815 g_arbitration.arbitration_mechanism != SPDK_NVME_CC_AMS_WRR) {
816 fprintf(stderr,
817 "-a must be specified to 1 (WRR) together.\n");
818 return 1;
819 }
820
821 return 0;
822 }
823
824 static int
825 register_workers(void)
826 {
827 uint32_t i;
828 struct worker_thread *worker;
829 enum spdk_nvme_qprio qprio = SPDK_NVME_QPRIO_URGENT;
830
831 g_workers = NULL;
832 g_arbitration.num_workers = 0;
833
834 SPDK_ENV_FOREACH_CORE(i) {
835 worker = calloc(1, sizeof(*worker));
836 if (worker == NULL) {
837 fprintf(stderr, "Unable to allocate worker\n");
838 return -1;
839 }
840
841 worker->lcore = i;
842 worker->next = g_workers;
843 g_workers = worker;
844 g_arbitration.num_workers++;
845
846 if (g_arbitration.arbitration_mechanism == SPDK_NVME_CAP_AMS_WRR) {
847 qprio++;
848 }
849
850 worker->qprio = qprio % SPDK_NVME_QPRIO_MAX;
851 }
852
853 return 0;
854 }
855
856 static bool
857 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
858 struct spdk_nvme_ctrlr_opts *opts)
859 {
860 /* Update with user specified arbitration configuration */
861 opts->arb_mechanism = g_arbitration.arbitration_mechanism;
862
863 printf("Attaching to %s\n", trid->traddr);
864
865 return true;
866 }
867
868 static void
869 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
870 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
871 {
872 printf("Attached to %s\n", trid->traddr);
873
874 /* Update with actual arbitration configuration in use */
875 g_arbitration.arbitration_mechanism = opts->arb_mechanism;
876
877 register_ctrlr(ctrlr);
878 }
879
880 static int
881 register_controllers(void)
882 {
883 printf("Initializing NVMe Controllers\n");
884
885 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
886 fprintf(stderr, "spdk_nvme_probe() failed\n");
887 return 1;
888 }
889
890 if (g_arbitration.num_namespaces == 0) {
891 fprintf(stderr, "No valid namespaces to continue IO testing\n");
892 return 1;
893 }
894
895 return 0;
896 }
897
898 static void
899 unregister_controllers(void)
900 {
901 struct ctrlr_entry *entry = g_controllers;
902
903 while (entry) {
904 struct ctrlr_entry *next = entry->next;
905 if (g_arbitration.latency_tracking_enable &&
906 spdk_nvme_ctrlr_is_feature_supported(entry->ctrlr, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING)) {
907 set_latency_tracking_feature(entry->ctrlr, false);
908 }
909 spdk_nvme_detach(entry->ctrlr);
910 free(entry);
911 entry = next;
912 }
913 }
914
915 static int
916 associate_workers_with_ns(void)
917 {
918 struct ns_entry *entry = g_namespaces;
919 struct worker_thread *worker = g_workers;
920 struct ns_worker_ctx *ns_ctx;
921 int i, count;
922
923 count = g_arbitration.num_namespaces > g_arbitration.num_workers ?
924 g_arbitration.num_namespaces : g_arbitration.num_workers;
925
926 for (i = 0; i < count; i++) {
927 if (entry == NULL) {
928 break;
929 }
930
931 ns_ctx = malloc(sizeof(struct ns_worker_ctx));
932 if (!ns_ctx) {
933 return 1;
934 }
935 memset(ns_ctx, 0, sizeof(*ns_ctx));
936
937 printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
938 ns_ctx->entry = entry;
939 ns_ctx->next = worker->ns_ctx;
940 worker->ns_ctx = ns_ctx;
941
942 worker = worker->next;
943 if (worker == NULL) {
944 worker = g_workers;
945 }
946
947 entry = entry->next;
948 if (entry == NULL) {
949 entry = g_namespaces;
950 }
951
952 }
953
954 return 0;
955 }
956
957 static void
958 get_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
959 {
960 struct feature *feature = cb_arg;
961 int fid = feature - features;
962
963 if (spdk_nvme_cpl_is_error(cpl)) {
964 printf("get_feature(0x%02X) failed\n", fid);
965 } else {
966 feature->result = cpl->cdw0;
967 feature->valid = true;
968 }
969
970 g_arbitration.outstanding_commands--;
971 }
972
973 static int
974 get_feature(struct spdk_nvme_ctrlr *ctrlr, uint8_t fid)
975 {
976 struct spdk_nvme_cmd cmd = {};
977
978 cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
979 cmd.cdw10 = fid;
980
981 return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0, get_feature_completion, &features[fid]);
982 }
983
984 static void
985 get_arb_feature(struct spdk_nvme_ctrlr *ctrlr)
986 {
987 get_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION);
988
989 g_arbitration.outstanding_commands++;
990
991 while (g_arbitration.outstanding_commands) {
992 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
993 }
994
995 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) {
996 uint32_t arb = features[SPDK_NVME_FEAT_ARBITRATION].result;
997 unsigned ab, lpw, mpw, hpw;
998
999 ab = arb & SPDK_NVME_ARB_BURST_MASK;
1000 lpw = ((arb >> SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1001 mpw = ((arb >> SPDK_NVME_MED_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1002 hpw = ((arb >> SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT) & SPDK_NVME_PRIO_WEIGHT_MASK) + 1;
1003
1004 printf("Current Arbitration Configuration\n");
1005 printf("===========\n");
1006 printf("Arbitration Burst: ");
1007 if (ab == SPDK_NVME_ARB_BURST_MASK) {
1008 printf("no limit\n");
1009 } else {
1010 printf("%u\n", 1u << ab);
1011 }
1012
1013 printf("Low Priority Weight: %u\n", lpw);
1014 printf("Medium Priority Weight: %u\n", mpw);
1015 printf("High Priority Weight: %u\n", hpw);
1016 printf("\n");
1017 }
1018 }
1019
1020 static void
1021 set_feature_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
1022 {
1023 struct feature *feature = cb_arg;
1024 int fid = feature - features;
1025
1026 if (spdk_nvme_cpl_is_error(cpl)) {
1027 printf("set_feature(0x%02X) failed\n", fid);
1028 feature->valid = false;
1029 } else {
1030 printf("Set Arbitration Feature Successfully\n");
1031 }
1032
1033 g_arbitration.outstanding_commands--;
1034 }
1035
1036 static int
1037 set_arb_feature(struct spdk_nvme_ctrlr *ctrlr)
1038 {
1039 int ret;
1040 struct spdk_nvme_cmd cmd = {};
1041 uint32_t arb = 0;
1042 unsigned ab, lpw, mpw, hpw;
1043
1044 cmd.opc = SPDK_NVME_OPC_SET_FEATURES;
1045 cmd.cdw10 = SPDK_NVME_FEAT_ARBITRATION;
1046
1047 g_arbitration.outstanding_commands = 0;
1048
1049 if (features[SPDK_NVME_FEAT_ARBITRATION].valid) {
1050 ab = USER_SPECIFIED_ARBITRATION_BURST & SPDK_NVME_ARB_BURST_MASK;
1051 hpw = USER_SPECIFIED_HIGH_PRIORITY_WEIGHT << SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT;
1052 mpw = USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT << SPDK_NVME_MED_PRIO_WEIGHT_SHIFT;
1053 lpw = USER_SPECIFIED_LOW_PRIORITY_WEIGHT << SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT;
1054 arb = hpw | mpw | lpw | ab;
1055 cmd.cdw11 = arb;
1056 }
1057
1058 ret = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &cmd, NULL, 0,
1059 set_feature_completion, &features[SPDK_NVME_FEAT_ARBITRATION]);
1060 if (ret) {
1061 printf("Set Arbitration Feature: Failed 0x%x\n", ret);
1062 return 1;
1063 }
1064
1065 g_arbitration.outstanding_commands++;
1066
1067 while (g_arbitration.outstanding_commands) {
1068 spdk_nvme_ctrlr_process_admin_completions(ctrlr);
1069 }
1070
1071 if (!features[SPDK_NVME_FEAT_ARBITRATION].valid) {
1072 printf("Set Arbitration Feature failed and use default configuration\n");
1073 }
1074
1075 return 0;
1076 }
1077
1078 int
1079 main(int argc, char **argv)
1080 {
1081 int rc;
1082 struct worker_thread *worker, *master_worker;
1083 unsigned master_core;
1084 char task_pool_name[30];
1085 uint32_t task_count;
1086 struct spdk_env_opts opts;
1087
1088 rc = parse_args(argc, argv);
1089 if (rc != 0) {
1090 return rc;
1091 }
1092
1093 spdk_env_opts_init(&opts);
1094 opts.name = "arb";
1095 opts.core_mask = g_arbitration.core_mask;
1096 opts.shm_id = g_arbitration.shm_id;
1097 if (spdk_env_init(&opts) < 0) {
1098 return 1;
1099 }
1100
1101 g_arbitration.tsc_rate = spdk_get_ticks_hz();
1102
1103 if (register_workers() != 0) {
1104 return 1;
1105 }
1106
1107 if (register_controllers() != 0) {
1108 return 1;
1109 }
1110
1111 if (associate_workers_with_ns() != 0) {
1112 return 1;
1113 }
1114
1115 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", getpid());
1116
1117 /*
1118 * The task_count will be dynamically calculated based on the
1119 * number of attached active namespaces, queue depth and number
1120 * of cores (workers) involved in the IO perations.
1121 */
1122 task_count = g_arbitration.num_namespaces > g_arbitration.num_workers ?
1123 g_arbitration.num_namespaces : g_arbitration.num_workers;
1124 task_count *= g_arbitration.queue_depth;
1125
1126 task_pool = spdk_mempool_create(task_pool_name, task_count,
1127 sizeof(struct arb_task), 0, SPDK_ENV_SOCKET_ID_ANY);
1128 if (task_pool == NULL) {
1129 fprintf(stderr, "could not initialize task pool\n");
1130 return 1;
1131 }
1132
1133 print_configuration(argv[0]);
1134
1135 printf("Initialization complete. Launching workers.\n");
1136
1137 /* Launch all of the slave workers */
1138 master_core = spdk_env_get_current_core();
1139 master_worker = NULL;
1140 worker = g_workers;
1141 while (worker != NULL) {
1142 if (worker->lcore != master_core) {
1143 spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker);
1144 } else {
1145 assert(master_worker == NULL);
1146 master_worker = worker;
1147 }
1148 worker = worker->next;
1149 }
1150
1151 assert(master_worker != NULL);
1152 rc = work_fn(master_worker);
1153
1154 spdk_env_thread_wait_all();
1155
1156 print_stats();
1157
1158 unregister_controllers();
1159
1160 cleanup(task_count);
1161
1162 if (rc != 0) {
1163 fprintf(stderr, "%s: errors occured\n", argv[0]);
1164 }
1165
1166 return rc;
1167 }