4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <rte_config.h>
40 #include <rte_mempool.h>
41 #include <rte_lcore.h>
43 #include "spdk/nvme.h"
45 #include "spdk/string.h"
46 #include "spdk/nvme_intel.h"
49 struct spdk_nvme_ctrlr
*ctrlr
;
50 struct spdk_nvme_intel_rw_latency_page latency_page
;
51 struct ctrlr_entry
*next
;
57 struct spdk_nvme_ctrlr
*ctrlr
;
58 struct spdk_nvme_ns
*ns
;
61 struct ns_entry
*next
;
62 uint32_t io_size_blocks
;
67 struct ns_worker_ctx
{
68 struct ns_entry
*entry
;
69 uint64_t io_completed
;
70 uint64_t current_queue_depth
;
71 uint64_t offset_in_ios
;
73 struct spdk_nvme_qpair
*qpair
;
74 struct ns_worker_ctx
*next
;
78 struct ns_worker_ctx
*ns_ctx
;
82 struct worker_thread
{
83 struct ns_worker_ctx
*ns_ctx
;
84 struct worker_thread
*next
;
86 enum spdk_nvme_qprio qprio
;
91 int outstanding_commands
;
99 uint8_t latency_tracking_enable
;
100 uint8_t arbitration_mechanism
;
101 uint8_t arbitration_config
;
102 uint32_t io_size_bytes
;
103 uint32_t max_completions
;
105 const char *core_mask
;
106 const char *workload_type
;
114 static struct rte_mempool
*task_pool
= NULL
;
116 static struct ctrlr_entry
*g_controllers
= NULL
;
117 static struct ns_entry
*g_namespaces
= NULL
;
118 static struct worker_thread
*g_workers
= NULL
;
120 static struct feature features
[256];
122 static struct arb_context g_arbitration
= {
124 .outstanding_commands
= 0,
131 .latency_tracking_enable
= 0,
132 .arbitration_mechanism
= SPDK_NVME_CC_AMS_RR
,
133 .arbitration_config
= 0,
134 .io_size_bytes
= 131072,
135 .max_completions
= 0,
136 /* Default 4 cores for urgent/high/medium/low */
138 .workload_type
= "randrw",
142 * For weighted round robin arbitration mechanism, the smaller value between
143 * weight and burst will be picked to execute the commands in one queue.
145 #define USER_SPECIFIED_HIGH_PRIORITY_WEIGHT 32
146 #define USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT 16
147 #define USER_SPECIFIED_LOW_PRIORITY_WEIGHT 8
148 #define USER_SPECIFIED_ARBITRATION_BURST 7 /* No limit */
151 * Description of dword for priority weight and arbitration burst
152 * ------------------------------------------------------------------------------
153 * 31 : 24 | 23 : 16 | 15 : 08 | 07 : 03 | 02 : 00
154 * ------------------------------------------------------------------------------
155 * High Prio Weight | Medium Prio Weight | Low Prio Weight | Reserved | Arb Burst
156 * ------------------------------------------------------------------------------
158 * The priority weights are zero based value.
160 #define SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT 24
161 #define SPDK_NVME_MED_PRIO_WEIGHT_SHIFT 16
162 #define SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT 8
163 #define SPDK_NVME_PRIO_WEIGHT_MASK 0xFF
164 #define SPDK_NVME_ARB_BURST_MASK 0x7
166 #define SPDK_NVME_QPRIO_MAX (SPDK_NVME_QPRIO_LOW + 1)
168 static void task_complete(struct arb_task
*task
);
170 static void io_complete(void *ctx
, const struct spdk_nvme_cpl
*completion
);
172 static void get_arb_feature(struct spdk_nvme_ctrlr
*ctrlr
);
174 static int set_arb_feature(struct spdk_nvme_ctrlr
*ctrlr
);
176 static const char *print_qprio(enum spdk_nvme_qprio
);
180 register_ns(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_ns
*ns
)
182 struct ns_entry
*entry
;
183 const struct spdk_nvme_ctrlr_data
*cdata
;
185 cdata
= spdk_nvme_ctrlr_get_data(ctrlr
);
187 if (!spdk_nvme_ns_is_active(ns
)) {
188 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
189 cdata
->mn
, cdata
->sn
,
190 spdk_nvme_ns_get_id(ns
));
194 if (spdk_nvme_ns_get_size(ns
) < g_arbitration
.io_size_bytes
||
195 spdk_nvme_ns_get_sector_size(ns
) > g_arbitration
.io_size_bytes
) {
196 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
197 "ns size %" PRIu64
" / block size %u for I/O size %u\n",
198 cdata
->mn
, cdata
->sn
, spdk_nvme_ns_get_id(ns
),
199 spdk_nvme_ns_get_size(ns
), spdk_nvme_ns_get_sector_size(ns
),
200 g_arbitration
.io_size_bytes
);
204 entry
= malloc(sizeof(struct ns_entry
));
206 perror("ns_entry malloc");
210 entry
->nvme
.ctrlr
= ctrlr
;
213 entry
->size_in_ios
= spdk_nvme_ns_get_size(ns
) / g_arbitration
.io_size_bytes
;
214 entry
->io_size_blocks
= g_arbitration
.io_size_bytes
/ spdk_nvme_ns_get_sector_size(ns
);
216 snprintf(entry
->name
, 44, "%-20.20s (%-20.20s)", cdata
->mn
, cdata
->sn
);
218 g_arbitration
.num_namespaces
++;
219 entry
->next
= g_namespaces
;
220 g_namespaces
= entry
;
224 enable_latency_tracking_complete(void *cb_arg
, const struct spdk_nvme_cpl
*cpl
)
226 if (spdk_nvme_cpl_is_error(cpl
)) {
227 printf("enable_latency_tracking_complete failed\n");
229 g_arbitration
.outstanding_commands
--;
233 set_latency_tracking_feature(struct spdk_nvme_ctrlr
*ctrlr
, bool enable
)
236 union spdk_nvme_intel_feat_latency_tracking latency_tracking
;
239 latency_tracking
.bits
.enable
= 0x01;
241 latency_tracking
.bits
.enable
= 0x00;
244 res
= spdk_nvme_ctrlr_cmd_set_feature(ctrlr
, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING
,
245 latency_tracking
.raw
, 0, NULL
, 0, enable_latency_tracking_complete
, NULL
);
247 printf("fail to allocate nvme request.\n");
250 g_arbitration
.outstanding_commands
++;
252 while (g_arbitration
.outstanding_commands
) {
253 spdk_nvme_ctrlr_process_admin_completions(ctrlr
);
258 register_ctrlr(struct spdk_nvme_ctrlr
*ctrlr
)
261 struct spdk_nvme_ns
*ns
;
262 struct ctrlr_entry
*entry
= calloc(1, sizeof(struct ctrlr_entry
));
263 const struct spdk_nvme_ctrlr_data
*cdata
= spdk_nvme_ctrlr_get_data(ctrlr
);
266 perror("ctrlr_entry malloc");
270 snprintf(entry
->name
, sizeof(entry
->name
), "%-20.20s (%-20.20s)", cdata
->mn
, cdata
->sn
);
272 entry
->ctrlr
= ctrlr
;
273 entry
->next
= g_controllers
;
274 g_controllers
= entry
;
276 if ((g_arbitration
.latency_tracking_enable
!= 0) &&
277 spdk_nvme_ctrlr_is_feature_supported(ctrlr
, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING
))
278 set_latency_tracking_feature(ctrlr
, true);
280 num_ns
= spdk_nvme_ctrlr_get_num_ns(ctrlr
);
281 for (nsid
= 1; nsid
<= num_ns
; nsid
++) {
282 ns
= spdk_nvme_ctrlr_get_ns(ctrlr
, nsid
);
286 register_ns(ctrlr
, ns
);
289 if (g_arbitration
.arbitration_mechanism
== SPDK_NVME_CAP_AMS_WRR
) {
290 get_arb_feature(ctrlr
);
292 if (g_arbitration
.arbitration_config
!= 0) {
293 set_arb_feature(ctrlr
);
294 get_arb_feature(ctrlr
);
300 task_ctor(struct rte_mempool
*mp
, void *arg
, void *__task
, unsigned id
)
302 struct arb_task
*task
= __task
;
303 task
->buf
= spdk_zmalloc(g_arbitration
.io_size_bytes
, 0x200, NULL
);
304 if (task
->buf
== NULL
) {
305 fprintf(stderr
, "task->buf spdk_zmalloc failed\n");
310 static __thread
unsigned int seed
= 0;
313 submit_single_io(struct ns_worker_ctx
*ns_ctx
)
315 struct arb_task
*task
= NULL
;
316 uint64_t offset_in_ios
;
318 struct ns_entry
*entry
= ns_ctx
->entry
;
320 if (rte_mempool_get(task_pool
, (void **)&task
) != 0) {
321 fprintf(stderr
, "task_pool rte_mempool_get failed\n");
325 task
->ns_ctx
= ns_ctx
;
327 if (g_arbitration
.is_random
) {
328 offset_in_ios
= rand_r(&seed
) % entry
->size_in_ios
;
330 offset_in_ios
= ns_ctx
->offset_in_ios
++;
331 if (ns_ctx
->offset_in_ios
== entry
->size_in_ios
) {
332 ns_ctx
->offset_in_ios
= 0;
336 if ((g_arbitration
.rw_percentage
== 100) ||
337 (g_arbitration
.rw_percentage
!= 0 &&
338 ((rand_r(&seed
) % 100) < g_arbitration
.rw_percentage
))) {
339 rc
= spdk_nvme_ns_cmd_read(entry
->nvme
.ns
, ns_ctx
->qpair
, task
->buf
,
340 offset_in_ios
* entry
->io_size_blocks
,
341 entry
->io_size_blocks
, io_complete
, task
, 0);
343 rc
= spdk_nvme_ns_cmd_write(entry
->nvme
.ns
, ns_ctx
->qpair
, task
->buf
,
344 offset_in_ios
* entry
->io_size_blocks
,
345 entry
->io_size_blocks
, io_complete
, task
, 0);
349 fprintf(stderr
, "starting I/O failed\n");
352 ns_ctx
->current_queue_depth
++;
356 task_complete(struct arb_task
*task
)
358 struct ns_worker_ctx
*ns_ctx
;
360 ns_ctx
= task
->ns_ctx
;
361 ns_ctx
->current_queue_depth
--;
362 ns_ctx
->io_completed
++;
364 rte_mempool_put(task_pool
, task
);
367 * is_draining indicates when time has expired for the test run
368 * and we are just waiting for the previously submitted I/O
369 * to complete. In this case, do not submit a new I/O to replace
370 * the one just completed.
372 if (!ns_ctx
->is_draining
) {
373 submit_single_io(ns_ctx
);
378 io_complete(void *ctx
, const struct spdk_nvme_cpl
*completion
)
380 task_complete((struct arb_task
*)ctx
);
384 check_io(struct ns_worker_ctx
*ns_ctx
)
386 spdk_nvme_qpair_process_completions(ns_ctx
->qpair
, g_arbitration
.max_completions
);
390 submit_io(struct ns_worker_ctx
*ns_ctx
, int queue_depth
)
392 while (queue_depth
-- > 0) {
393 submit_single_io(ns_ctx
);
398 drain_io(struct ns_worker_ctx
*ns_ctx
)
400 ns_ctx
->is_draining
= true;
401 while (ns_ctx
->current_queue_depth
> 0) {
407 init_ns_worker_ctx(struct ns_worker_ctx
*ns_ctx
, enum spdk_nvme_qprio qprio
)
409 ns_ctx
->qpair
= spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx
->entry
->nvme
.ctrlr
, qprio
);
410 if (!ns_ctx
->qpair
) {
411 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n");
419 cleanup_ns_worker_ctx(struct ns_worker_ctx
*ns_ctx
)
421 spdk_nvme_ctrlr_free_io_qpair(ns_ctx
->qpair
);
427 struct ns_entry
*entry
= g_namespaces
;
428 struct ns_entry
*next_entry
= NULL
;
429 struct worker_thread
*worker
= g_workers
;
430 struct worker_thread
*next_worker
= NULL
;
431 struct arb_task
*task
= NULL
;
434 next_entry
= entry
->next
;
440 next_worker
= worker
->next
;
441 free(worker
->ns_ctx
);
443 worker
= next_worker
;
446 if (rte_mempool_get(task_pool
, (void **)&task
) == 0) {
447 spdk_free(task
->buf
);
456 struct worker_thread
*worker
= (struct worker_thread
*)arg
;
457 struct ns_worker_ctx
*ns_ctx
= NULL
;
459 printf("Starting thread on core %u with %s\n", worker
->lcore
, print_qprio(worker
->qprio
));
461 /* Allocate a queue pair for each namespace. */
462 ns_ctx
= worker
->ns_ctx
;
463 while (ns_ctx
!= NULL
) {
464 if (init_ns_worker_ctx(ns_ctx
, worker
->qprio
) != 0) {
465 printf("ERROR: init_ns_worker_ctx() failed\n");
468 ns_ctx
= ns_ctx
->next
;
471 tsc_end
= spdk_get_ticks() + g_arbitration
.time_in_sec
* g_arbitration
.tsc_rate
;
473 /* Submit initial I/O for each namespace. */
474 ns_ctx
= worker
->ns_ctx
;
476 while (ns_ctx
!= NULL
) {
477 submit_io(ns_ctx
, g_arbitration
.queue_depth
);
478 ns_ctx
= ns_ctx
->next
;
483 * Check for completed I/O for each controller. A new
484 * I/O will be submitted in the io_complete callback
485 * to replace each I/O that is completed.
487 ns_ctx
= worker
->ns_ctx
;
488 while (ns_ctx
!= NULL
) {
490 ns_ctx
= ns_ctx
->next
;
493 if (spdk_get_ticks() > tsc_end
) {
498 ns_ctx
= worker
->ns_ctx
;
499 while (ns_ctx
!= NULL
) {
501 cleanup_ns_worker_ctx(ns_ctx
);
502 ns_ctx
= ns_ctx
->next
;
509 usage(char *program_name
)
511 printf("%s options", program_name
);
513 printf("\t[-q io depth]\n");
514 printf("\t[-s io size in bytes]\n");
515 printf("\t[-w io pattern type, must be one of\n");
516 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
517 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
518 printf("\t[-l enable latency tracking, default: disabled]\n");
519 printf("\t\t(0 - disabled; 1 - enabled)\n");
520 printf("\t[-t time in seconds]\n");
521 printf("\t[-c core mask for I/O submission/completion.]\n");
522 printf("\t\t(default: 0xf - 4 cores)]\n");
523 printf("\t[-m max completions per poll]\n");
524 printf("\t\t(default: 0 - unlimited)\n");
525 printf("\t[-a arbitration mechanism, must be one of below]\n");
526 printf("\t\t(0, 1, 2)]\n");
527 printf("\t\t(0: default round robin mechanism)]\n");
528 printf("\t\t(1: weighted round robin mechanism)]\n");
529 printf("\t\t(2: vendor specific mechanism)]\n");
530 printf("\t[-b enable arbitration user configuration, default: disabled]\n");
531 printf("\t\t(0 - disabled; 1 - enabled)\n");
532 printf("\t[-n subjected IOs for performance comparison]\n");
533 printf("\t[-i shared memory group ID]\n");
537 print_qprio(enum spdk_nvme_qprio qprio
)
540 case SPDK_NVME_QPRIO_URGENT
:
541 return "urgent priority queue";
542 case SPDK_NVME_QPRIO_HIGH
:
543 return "high priority queue";
544 case SPDK_NVME_QPRIO_MEDIUM
:
545 return "medium priority queue";
546 case SPDK_NVME_QPRIO_LOW
:
547 return "low priority queue";
549 return "invalid priority queue";
555 print_configuration(char *program_name
)
557 printf("%s run with configuration:\n", program_name
);
558 printf("%s -q %d -s %d -w %s -M %d -l %d -t %d -c %s -m %d -a %d -b %d -i %d\n",
560 g_arbitration
.queue_depth
,
561 g_arbitration
.io_size_bytes
,
562 g_arbitration
.workload_type
,
563 g_arbitration
.rw_percentage
,
564 g_arbitration
.latency_tracking_enable
,
565 g_arbitration
.time_in_sec
,
566 g_arbitration
.core_mask
,
567 g_arbitration
.max_completions
,
568 g_arbitration
.arbitration_mechanism
,
569 g_arbitration
.arbitration_config
,
570 g_arbitration
.io_count
);
575 print_performance(void)
577 float io_per_second
, sent_all_io_in_secs
;
578 struct worker_thread
*worker
;
579 struct ns_worker_ctx
*ns_ctx
;
583 ns_ctx
= worker
->ns_ctx
;
585 io_per_second
= (float)ns_ctx
->io_completed
/ g_arbitration
.time_in_sec
;
586 sent_all_io_in_secs
= g_arbitration
.io_count
/ io_per_second
;
587 printf("%-43.43s core %u: %8.2f IO/s %8.2f secs/%d ios\n",
588 ns_ctx
->entry
->name
, worker
->lcore
,
589 io_per_second
, sent_all_io_in_secs
, g_arbitration
.io_count
);
590 ns_ctx
= ns_ctx
->next
;
592 worker
= worker
->next
;
594 printf("========================================================\n");
600 print_latency_page(struct ctrlr_entry
*entry
)
605 printf("%s\n", entry
->name
);
606 printf("--------------------------------------------------------\n");
608 for (i
= 0; i
< 32; i
++) {
609 if (entry
->latency_page
.buckets_32us
[i
])
610 printf("Bucket %dus - %dus: %d\n", i
* 32, (i
+ 1) * 32,
611 entry
->latency_page
.buckets_32us
[i
]);
613 for (i
= 0; i
< 31; i
++) {
614 if (entry
->latency_page
.buckets_1ms
[i
])
615 printf("Bucket %dms - %dms: %d\n", i
+ 1, i
+ 2,
616 entry
->latency_page
.buckets_1ms
[i
]);
618 for (i
= 0; i
< 31; i
++) {
619 if (entry
->latency_page
.buckets_32ms
[i
])
620 printf("Bucket %dms - %dms: %d\n", (i
+ 1) * 32, (i
+ 2) * 32,
621 entry
->latency_page
.buckets_32ms
[i
]);
626 print_latency_statistics(const char *op_name
, enum spdk_nvme_intel_log_page log_page
)
628 struct ctrlr_entry
*ctrlr
;
630 printf("%s Latency Statistics:\n", op_name
);
631 printf("========================================================\n");
632 ctrlr
= g_controllers
;
634 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr
->ctrlr
, log_page
)) {
635 if (spdk_nvme_ctrlr_cmd_get_log_page(
636 ctrlr
->ctrlr
, log_page
,
637 SPDK_NVME_GLOBAL_NS_TAG
,
638 &ctrlr
->latency_page
,
639 sizeof(struct spdk_nvme_intel_rw_latency_page
),
641 enable_latency_tracking_complete
,
643 printf("nvme_ctrlr_cmd_get_log_page() failed\n");
647 g_arbitration
.outstanding_commands
++;
649 printf("Controller %s: %s latency statistics not supported\n",
650 ctrlr
->name
, op_name
);
655 while (g_arbitration
.outstanding_commands
) {
656 ctrlr
= g_controllers
;
658 spdk_nvme_ctrlr_process_admin_completions(ctrlr
->ctrlr
);
663 ctrlr
= g_controllers
;
665 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr
->ctrlr
, log_page
)) {
666 print_latency_page(ctrlr
);
677 if (g_arbitration
.latency_tracking_enable
) {
678 if (g_arbitration
.rw_percentage
!= 0) {
679 print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY
);
681 if (g_arbitration
.rw_percentage
!= 100) {
682 print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY
);
688 parse_args(int argc
, char **argv
)
690 const char *workload_type
= NULL
;
692 bool mix_specified
= false;
694 while ((op
= getopt(argc
, argv
, "c:l:i:m:q:s:t:w:M:a:b:n:h")) != -1) {
697 g_arbitration
.core_mask
= optarg
;
700 g_arbitration
.shm_id
= atoi(optarg
);
703 g_arbitration
.latency_tracking_enable
= atoi(optarg
);
706 g_arbitration
.max_completions
= atoi(optarg
);
709 g_arbitration
.queue_depth
= atoi(optarg
);
712 g_arbitration
.io_size_bytes
= atoi(optarg
);
715 g_arbitration
.time_in_sec
= atoi(optarg
);
718 g_arbitration
.workload_type
= optarg
;
721 g_arbitration
.rw_percentage
= atoi(optarg
);
722 mix_specified
= true;
725 g_arbitration
.arbitration_mechanism
= atoi(optarg
);
728 g_arbitration
.arbitration_config
= atoi(optarg
);
731 g_arbitration
.io_count
= atoi(optarg
);
740 workload_type
= g_arbitration
.workload_type
;
742 if (strcmp(workload_type
, "read") &&
743 strcmp(workload_type
, "write") &&
744 strcmp(workload_type
, "randread") &&
745 strcmp(workload_type
, "randwrite") &&
746 strcmp(workload_type
, "rw") &&
747 strcmp(workload_type
, "randrw")) {
749 "io pattern type must be one of\n"
750 "(read, write, randread, randwrite, rw, randrw)\n");
754 if (!strcmp(workload_type
, "read") ||
755 !strcmp(workload_type
, "randread")) {
756 g_arbitration
.rw_percentage
= 100;
759 if (!strcmp(workload_type
, "write") ||
760 !strcmp(workload_type
, "randwrite")) {
761 g_arbitration
.rw_percentage
= 0;
764 if (!strcmp(workload_type
, "read") ||
765 !strcmp(workload_type
, "randread") ||
766 !strcmp(workload_type
, "write") ||
767 !strcmp(workload_type
, "randwrite")) {
769 fprintf(stderr
, "Ignoring -M option... Please use -M option"
770 " only when using rw or randrw.\n");
774 if (!strcmp(workload_type
, "rw") ||
775 !strcmp(workload_type
, "randrw")) {
776 if (g_arbitration
.rw_percentage
< 0 || g_arbitration
.rw_percentage
> 100) {
778 "-M must be specified to value from 0 to 100 "
779 "for rw or randrw.\n");
784 if (!strcmp(workload_type
, "read") ||
785 !strcmp(workload_type
, "write") ||
786 !strcmp(workload_type
, "rw")) {
787 g_arbitration
.is_random
= 0;
789 g_arbitration
.is_random
= 1;
792 if (g_arbitration
.latency_tracking_enable
!= 0 &&
793 g_arbitration
.latency_tracking_enable
!= 1) {
795 "-l must be specified to value 0 or 1.\n");
799 switch (g_arbitration
.arbitration_mechanism
) {
800 case SPDK_NVME_CC_AMS_RR
:
801 case SPDK_NVME_CC_AMS_WRR
:
802 case SPDK_NVME_CC_AMS_VS
:
806 "-a must be specified to value 0, 1, or 7.\n");
810 if (g_arbitration
.arbitration_config
!= 0 &&
811 g_arbitration
.arbitration_config
!= 1) {
813 "-b must be specified to value 0 or 1.\n");
815 } else if (g_arbitration
.arbitration_config
== 1 &&
816 g_arbitration
.arbitration_mechanism
!= SPDK_NVME_CC_AMS_WRR
) {
818 "-a must be specified to 1 (WRR) together.\n");
826 register_workers(void)
829 struct worker_thread
*worker
;
830 enum spdk_nvme_qprio qprio
= SPDK_NVME_QPRIO_URGENT
;
833 g_arbitration
.num_workers
= 0;
835 SPDK_ENV_FOREACH_CORE(i
) {
836 worker
= calloc(1, sizeof(*worker
));
837 if (worker
== NULL
) {
838 fprintf(stderr
, "Unable to allocate worker\n");
843 worker
->next
= g_workers
;
845 g_arbitration
.num_workers
++;
847 if (g_arbitration
.arbitration_mechanism
== SPDK_NVME_CAP_AMS_WRR
) {
851 worker
->qprio
= qprio
% SPDK_NVME_QPRIO_MAX
;
858 probe_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
859 struct spdk_nvme_ctrlr_opts
*opts
)
861 /* Update with user specified arbitration configuration */
862 opts
->arb_mechanism
= g_arbitration
.arbitration_mechanism
;
864 printf("Attaching to %s\n", trid
->traddr
);
870 attach_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
871 struct spdk_nvme_ctrlr
*ctrlr
, const struct spdk_nvme_ctrlr_opts
*opts
)
873 printf("Attached to %s\n", trid
->traddr
);
875 /* Update with actual arbitration configuration in use */
876 g_arbitration
.arbitration_mechanism
= opts
->arb_mechanism
;
878 register_ctrlr(ctrlr
);
882 register_controllers(void)
884 printf("Initializing NVMe Controllers\n");
886 if (spdk_nvme_probe(NULL
, NULL
, probe_cb
, attach_cb
, NULL
) != 0) {
887 fprintf(stderr
, "spdk_nvme_probe() failed\n");
891 if (g_arbitration
.num_namespaces
== 0) {
892 fprintf(stderr
, "No valid namespaces to continue IO testing\n");
900 unregister_controllers(void)
902 struct ctrlr_entry
*entry
= g_controllers
;
905 struct ctrlr_entry
*next
= entry
->next
;
906 if (g_arbitration
.latency_tracking_enable
&&
907 spdk_nvme_ctrlr_is_feature_supported(entry
->ctrlr
, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING
))
908 set_latency_tracking_feature(entry
->ctrlr
, false);
909 spdk_nvme_detach(entry
->ctrlr
);
916 associate_workers_with_ns(void)
918 struct ns_entry
*entry
= g_namespaces
;
919 struct worker_thread
*worker
= g_workers
;
920 struct ns_worker_ctx
*ns_ctx
;
923 count
= g_arbitration
.num_namespaces
> g_arbitration
.num_workers
?
924 g_arbitration
.num_namespaces
: g_arbitration
.num_workers
;
926 for (i
= 0; i
< count
; i
++) {
931 ns_ctx
= malloc(sizeof(struct ns_worker_ctx
));
935 memset(ns_ctx
, 0, sizeof(*ns_ctx
));
937 printf("Associating %s with lcore %d\n", entry
->name
, worker
->lcore
);
938 ns_ctx
->entry
= entry
;
939 ns_ctx
->next
= worker
->ns_ctx
;
940 worker
->ns_ctx
= ns_ctx
;
942 worker
= worker
->next
;
943 if (worker
== NULL
) {
949 entry
= g_namespaces
;
958 get_feature_completion(void *cb_arg
, const struct spdk_nvme_cpl
*cpl
)
960 struct feature
*feature
= cb_arg
;
961 int fid
= feature
- features
;
963 if (spdk_nvme_cpl_is_error(cpl
)) {
964 printf("get_feature(0x%02X) failed\n", fid
);
966 feature
->result
= cpl
->cdw0
;
967 feature
->valid
= true;
970 g_arbitration
.outstanding_commands
--;
974 get_feature(struct spdk_nvme_ctrlr
*ctrlr
, uint8_t fid
)
976 struct spdk_nvme_cmd cmd
= {};
978 cmd
.opc
= SPDK_NVME_OPC_GET_FEATURES
;
981 return spdk_nvme_ctrlr_cmd_admin_raw(ctrlr
, &cmd
, NULL
, 0, get_feature_completion
, &features
[fid
]);
985 get_arb_feature(struct spdk_nvme_ctrlr
*ctrlr
)
987 get_feature(ctrlr
, SPDK_NVME_FEAT_ARBITRATION
);
989 g_arbitration
.outstanding_commands
++;
991 while (g_arbitration
.outstanding_commands
) {
992 spdk_nvme_ctrlr_process_admin_completions(ctrlr
);
995 if (features
[SPDK_NVME_FEAT_ARBITRATION
].valid
) {
996 uint32_t arb
= features
[SPDK_NVME_FEAT_ARBITRATION
].result
;
997 unsigned ab
, lpw
, mpw
, hpw
;
999 ab
= arb
& SPDK_NVME_ARB_BURST_MASK
;
1000 lpw
= ((arb
>> SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT
) & SPDK_NVME_PRIO_WEIGHT_MASK
) + 1;
1001 mpw
= ((arb
>> SPDK_NVME_MED_PRIO_WEIGHT_SHIFT
) & SPDK_NVME_PRIO_WEIGHT_MASK
) + 1;
1002 hpw
= ((arb
>> SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT
) & SPDK_NVME_PRIO_WEIGHT_MASK
) + 1;
1004 printf("Current Arbitration Configuration\n");
1005 printf("===========\n");
1006 printf("Arbitration Burst: ");
1007 if (ab
== SPDK_NVME_ARB_BURST_MASK
) {
1008 printf("no limit\n");
1010 printf("%u\n", 1u << ab
);
1013 printf("Low Priority Weight: %u\n", lpw
);
1014 printf("Medium Priority Weight: %u\n", mpw
);
1015 printf("High Priority Weight: %u\n", hpw
);
1021 set_feature_completion(void *cb_arg
, const struct spdk_nvme_cpl
*cpl
)
1023 struct feature
*feature
= cb_arg
;
1024 int fid
= feature
- features
;
1026 if (spdk_nvme_cpl_is_error(cpl
)) {
1027 printf("set_feature(0x%02X) failed\n", fid
);
1028 feature
->valid
= false;
1030 printf("Set Arbitration Feature Successfully\n");
1033 g_arbitration
.outstanding_commands
--;
1037 set_arb_feature(struct spdk_nvme_ctrlr
*ctrlr
)
1040 struct spdk_nvme_cmd cmd
= {};
1042 unsigned ab
, lpw
, mpw
, hpw
;
1044 cmd
.opc
= SPDK_NVME_OPC_SET_FEATURES
;
1045 cmd
.cdw10
= SPDK_NVME_FEAT_ARBITRATION
;
1047 g_arbitration
.outstanding_commands
= 0;
1049 if (features
[SPDK_NVME_FEAT_ARBITRATION
].valid
) {
1050 ab
= USER_SPECIFIED_ARBITRATION_BURST
& SPDK_NVME_ARB_BURST_MASK
;
1051 hpw
= USER_SPECIFIED_HIGH_PRIORITY_WEIGHT
<< SPDK_NVME_HIGH_PRIO_WEIGHT_SHIFT
;
1052 mpw
= USER_SPECIFIED_MEDIUM_PRIORITY_WEIGHT
<< SPDK_NVME_MED_PRIO_WEIGHT_SHIFT
;
1053 lpw
= USER_SPECIFIED_LOW_PRIORITY_WEIGHT
<< SPDK_NVME_LOW_PRIO_WEIGHT_SHIFT
;
1054 arb
= hpw
| mpw
| lpw
| ab
;
1058 ret
= spdk_nvme_ctrlr_cmd_admin_raw(ctrlr
, &cmd
, NULL
, 0,
1059 set_feature_completion
, &features
[SPDK_NVME_FEAT_ARBITRATION
]);
1061 printf("Set Arbitration Feature: Failed 0x%x\n", ret
);
1065 g_arbitration
.outstanding_commands
++;
1067 while (g_arbitration
.outstanding_commands
) {
1068 spdk_nvme_ctrlr_process_admin_completions(ctrlr
);
1071 if (!features
[SPDK_NVME_FEAT_ARBITRATION
].valid
) {
1072 printf("Set Arbitration Feature failed and use default configuration\n");
1079 main(int argc
, char **argv
)
1082 struct worker_thread
*worker
, *master_worker
;
1083 unsigned master_core
;
1084 char task_pool_name
[30];
1085 uint32_t task_count
;
1086 struct spdk_env_opts opts
;
1088 rc
= parse_args(argc
, argv
);
1093 spdk_env_opts_init(&opts
);
1095 opts
.core_mask
= g_arbitration
.core_mask
;
1096 opts
.shm_id
= g_arbitration
.shm_id
;
1097 spdk_env_init(&opts
);
1099 g_arbitration
.tsc_rate
= spdk_get_ticks_hz();
1101 if (register_workers() != 0) {
1105 if (register_controllers() != 0) {
1109 if (associate_workers_with_ns() != 0) {
1113 snprintf(task_pool_name
, sizeof(task_pool_name
), "task_pool_%d", getpid());
1116 * The task_count will be dynamically calculated based on the
1117 * number of attached active namespaces, queue depth and number
1118 * of cores (workers) involved in the IO perations.
1120 task_count
= g_arbitration
.num_namespaces
> g_arbitration
.num_workers
?
1121 g_arbitration
.num_namespaces
: g_arbitration
.num_workers
;
1122 task_count
*= g_arbitration
.queue_depth
;
1124 task_pool
= rte_mempool_create(task_pool_name
, task_count
,
1125 sizeof(struct arb_task
),
1126 0, 0, NULL
, NULL
, task_ctor
, NULL
,
1128 if (task_pool
== NULL
) {
1129 fprintf(stderr
, "could not initialize task pool\n");
1133 print_configuration(argv
[0]);
1135 printf("Initialization complete. Launching workers.\n");
1137 /* Launch all of the slave workers */
1138 master_core
= rte_get_master_lcore();
1139 master_worker
= NULL
;
1141 while (worker
!= NULL
) {
1142 if (worker
->lcore
!= master_core
) {
1143 rte_eal_remote_launch(work_fn
, worker
, worker
->lcore
);
1145 assert(master_worker
== NULL
);
1146 master_worker
= worker
;
1148 worker
= worker
->next
;
1151 assert(master_worker
!= NULL
);
1152 rc
= work_fn(master_worker
);
1154 rte_eal_mp_wait_lcore();
1158 unregister_controllers();
1163 fprintf(stderr
, "%s: errors occured\n", argv
[0]);