4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 #include <rte_config.h>
40 #include <rte_mempool.h>
41 #include <rte_lcore.h>
44 #include "spdk/nvme.h"
46 #include "spdk/queue.h"
47 #include "spdk/string.h"
48 #include "spdk/nvme_intel.h"
57 struct spdk_nvme_ctrlr
*ctrlr
;
58 struct spdk_nvme_intel_rw_latency_page
*latency_page
;
59 struct ctrlr_entry
*next
;
73 struct spdk_nvme_ctrlr
*ctrlr
;
74 struct spdk_nvme_ns
*ns
;
83 struct ns_entry
*next
;
84 uint32_t io_size_blocks
;
89 struct ns_worker_ctx
{
90 struct ns_entry
*entry
;
91 uint64_t io_completed
;
95 uint64_t current_queue_depth
;
96 uint64_t offset_in_ios
;
101 struct spdk_nvme_qpair
*qpair
;
106 struct io_event
*events
;
112 struct ns_worker_ctx
*next
;
116 struct ns_worker_ctx
*ns_ctx
;
124 struct worker_thread
{
125 struct ns_worker_ctx
*ns_ctx
;
126 struct worker_thread
*next
;
130 static int g_outstanding_commands
;
132 static bool g_latency_tracking_enable
= false;
134 static struct rte_mempool
*task_pool
;
136 static struct ctrlr_entry
*g_controllers
= NULL
;
137 static struct ns_entry
*g_namespaces
= NULL
;
138 static int g_num_namespaces
= 0;
139 static struct worker_thread
*g_workers
= NULL
;
140 static int g_num_workers
= 0;
142 static uint64_t g_tsc_rate
;
144 static uint32_t g_io_align
= 0x200;
145 static uint32_t g_io_size_bytes
;
146 static int g_rw_percentage
;
147 static int g_is_random
;
148 static int g_queue_depth
;
149 static int g_time_in_sec
;
150 static uint32_t g_max_completions
;
151 static int g_dpdk_mem
;
152 static int g_shm_id
= -1;
154 static const char *g_core_mask
;
157 struct spdk_nvme_transport_id trid
;
158 TAILQ_ENTRY(trid_entry
) tailq
;
161 static TAILQ_HEAD(, trid_entry
) g_trid_list
= TAILQ_HEAD_INITIALIZER(g_trid_list
);
163 static int g_aio_optind
; /* Index of first AIO filename in argv */
166 task_complete(struct perf_task
*task
);
169 register_ns(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_ns
*ns
)
171 struct ns_entry
*entry
;
172 const struct spdk_nvme_ctrlr_data
*cdata
;
174 cdata
= spdk_nvme_ctrlr_get_data(ctrlr
);
176 if (!spdk_nvme_ns_is_active(ns
)) {
177 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
178 cdata
->mn
, cdata
->sn
,
179 spdk_nvme_ns_get_id(ns
));
183 if (spdk_nvme_ns_get_size(ns
) < g_io_size_bytes
||
184 spdk_nvme_ns_get_sector_size(ns
) > g_io_size_bytes
) {
185 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
186 "ns size %" PRIu64
" / block size %u for I/O size %u\n",
187 cdata
->mn
, cdata
->sn
, spdk_nvme_ns_get_id(ns
),
188 spdk_nvme_ns_get_size(ns
), spdk_nvme_ns_get_sector_size(ns
), g_io_size_bytes
);
192 entry
= malloc(sizeof(struct ns_entry
));
194 perror("ns_entry malloc");
198 entry
->type
= ENTRY_TYPE_NVME_NS
;
199 entry
->u
.nvme
.ctrlr
= ctrlr
;
200 entry
->u
.nvme
.ns
= ns
;
202 entry
->size_in_ios
= spdk_nvme_ns_get_size(ns
) /
204 entry
->io_size_blocks
= g_io_size_bytes
/ spdk_nvme_ns_get_sector_size(ns
);
206 snprintf(entry
->name
, 44, "%-20.20s (%-20.20s)", cdata
->mn
, cdata
->sn
);
209 entry
->next
= g_namespaces
;
210 g_namespaces
= entry
;
214 unregister_namespaces(void)
216 struct ns_entry
*entry
= g_namespaces
;
219 struct ns_entry
*next
= entry
->next
;
226 enable_latency_tracking_complete(void *cb_arg
, const struct spdk_nvme_cpl
*cpl
)
228 if (spdk_nvme_cpl_is_error(cpl
)) {
229 printf("enable_latency_tracking_complete failed\n");
231 g_outstanding_commands
--;
235 set_latency_tracking_feature(struct spdk_nvme_ctrlr
*ctrlr
, bool enable
)
238 union spdk_nvme_intel_feat_latency_tracking latency_tracking
;
241 latency_tracking
.bits
.enable
= 0x01;
243 latency_tracking
.bits
.enable
= 0x00;
246 res
= spdk_nvme_ctrlr_cmd_set_feature(ctrlr
, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING
,
247 latency_tracking
.raw
, 0, NULL
, 0, enable_latency_tracking_complete
, NULL
);
249 printf("fail to allocate nvme request.\n");
252 g_outstanding_commands
++;
254 while (g_outstanding_commands
) {
255 spdk_nvme_ctrlr_process_admin_completions(ctrlr
);
260 register_ctrlr(struct spdk_nvme_ctrlr
*ctrlr
)
263 struct spdk_nvme_ns
*ns
;
264 struct ctrlr_entry
*entry
= malloc(sizeof(struct ctrlr_entry
));
265 const struct spdk_nvme_ctrlr_data
*cdata
= spdk_nvme_ctrlr_get_data(ctrlr
);
268 perror("ctrlr_entry malloc");
272 entry
->latency_page
= spdk_zmalloc(sizeof(struct spdk_nvme_intel_rw_latency_page
),
274 if (entry
->latency_page
== NULL
) {
275 printf("Allocation error (latency page)\n");
279 snprintf(entry
->name
, sizeof(entry
->name
), "%-20.20s (%-20.20s)", cdata
->mn
, cdata
->sn
);
281 entry
->ctrlr
= ctrlr
;
282 entry
->next
= g_controllers
;
283 g_controllers
= entry
;
285 if (g_latency_tracking_enable
&&
286 spdk_nvme_ctrlr_is_feature_supported(ctrlr
, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING
))
287 set_latency_tracking_feature(ctrlr
, true);
289 num_ns
= spdk_nvme_ctrlr_get_num_ns(ctrlr
);
290 for (nsid
= 1; nsid
<= num_ns
; nsid
++) {
291 ns
= spdk_nvme_ctrlr_get_ns(ctrlr
, nsid
);
295 register_ns(ctrlr
, ns
);
302 register_aio_file(const char *path
)
304 struct ns_entry
*entry
;
310 if (g_rw_percentage
== 100) {
312 } else if (g_rw_percentage
== 0) {
320 fd
= open(path
, flags
);
322 fprintf(stderr
, "Could not open AIO device %s: %s\n", path
, strerror(errno
));
326 size
= spdk_fd_get_size(fd
);
328 fprintf(stderr
, "Could not determine size of AIO device %s\n", path
);
333 blklen
= spdk_fd_get_blocklen(fd
);
335 fprintf(stderr
, "Could not determine block size of AIO device %s\n", path
);
341 * TODO: This should really calculate the LCM of the current g_io_align and blklen.
342 * For now, it's fairly safe to just assume all block sizes are powers of 2.
344 if (g_io_align
< blklen
) {
348 entry
= malloc(sizeof(struct ns_entry
));
351 perror("aio ns_entry malloc");
355 entry
->type
= ENTRY_TYPE_AIO_FILE
;
356 entry
->u
.aio
.fd
= fd
;
357 entry
->size_in_ios
= size
/ g_io_size_bytes
;
358 entry
->io_size_blocks
= g_io_size_bytes
/ blklen
;
360 snprintf(entry
->name
, sizeof(entry
->name
), "%s", path
);
363 entry
->next
= g_namespaces
;
364 g_namespaces
= entry
;
370 aio_submit(io_context_t aio_ctx
, struct iocb
*iocb
, int fd
, enum io_iocb_cmd cmd
, void *buf
,
371 unsigned long nbytes
, uint64_t offset
, void *cb_ctx
)
373 iocb
->aio_fildes
= fd
;
374 iocb
->aio_reqprio
= 0;
375 iocb
->aio_lio_opcode
= cmd
;
377 iocb
->u
.c
.nbytes
= nbytes
;
378 iocb
->u
.c
.offset
= offset
;
381 if (io_submit(aio_ctx
, 1, &iocb
) < 0) {
390 aio_check_io(struct ns_worker_ctx
*ns_ctx
)
393 struct timespec timeout
;
398 count
= io_getevents(ns_ctx
->u
.aio
.ctx
, 1, g_queue_depth
, ns_ctx
->u
.aio
.events
, &timeout
);
400 fprintf(stderr
, "io_getevents error\n");
404 for (i
= 0; i
< count
; i
++) {
405 task_complete(ns_ctx
->u
.aio
.events
[i
].data
);
408 #endif /* HAVE_LIBAIO */
410 static void task_ctor(struct rte_mempool
*mp
, void *arg
, void *__task
, unsigned id
)
412 struct perf_task
*task
= __task
;
413 task
->buf
= spdk_zmalloc(g_io_size_bytes
, g_io_align
, NULL
);
414 if (task
->buf
== NULL
) {
415 fprintf(stderr
, "task->buf spdk_zmalloc failed\n");
418 memset(task
->buf
, id
% 8, g_io_size_bytes
);
421 static void io_complete(void *ctx
, const struct spdk_nvme_cpl
*completion
);
423 static __thread
unsigned int seed
= 0;
426 submit_single_io(struct ns_worker_ctx
*ns_ctx
)
428 struct perf_task
*task
= NULL
;
429 uint64_t offset_in_ios
;
431 struct ns_entry
*entry
= ns_ctx
->entry
;
433 if (rte_mempool_get(task_pool
, (void **)&task
) != 0) {
434 fprintf(stderr
, "task_pool rte_mempool_get failed\n");
438 task
->ns_ctx
= ns_ctx
;
441 offset_in_ios
= rand_r(&seed
) % entry
->size_in_ios
;
443 offset_in_ios
= ns_ctx
->offset_in_ios
++;
444 if (ns_ctx
->offset_in_ios
== entry
->size_in_ios
) {
445 ns_ctx
->offset_in_ios
= 0;
449 task
->submit_tsc
= spdk_get_ticks();
451 if ((g_rw_percentage
== 100) ||
452 (g_rw_percentage
!= 0 && ((rand_r(&seed
) % 100) < g_rw_percentage
))) {
454 if (entry
->type
== ENTRY_TYPE_AIO_FILE
) {
455 rc
= aio_submit(ns_ctx
->u
.aio
.ctx
, &task
->iocb
, entry
->u
.aio
.fd
, IO_CMD_PREAD
, task
->buf
,
456 g_io_size_bytes
, offset_in_ios
* g_io_size_bytes
, task
);
460 rc
= spdk_nvme_ns_cmd_read(entry
->u
.nvme
.ns
, ns_ctx
->u
.nvme
.qpair
, task
->buf
,
461 offset_in_ios
* entry
->io_size_blocks
,
462 entry
->io_size_blocks
, io_complete
, task
, 0);
466 if (entry
->type
== ENTRY_TYPE_AIO_FILE
) {
467 rc
= aio_submit(ns_ctx
->u
.aio
.ctx
, &task
->iocb
, entry
->u
.aio
.fd
, IO_CMD_PWRITE
, task
->buf
,
468 g_io_size_bytes
, offset_in_ios
* g_io_size_bytes
, task
);
472 rc
= spdk_nvme_ns_cmd_write(entry
->u
.nvme
.ns
, ns_ctx
->u
.nvme
.qpair
, task
->buf
,
473 offset_in_ios
* entry
->io_size_blocks
,
474 entry
->io_size_blocks
, io_complete
, task
, 0);
479 fprintf(stderr
, "starting I/O failed\n");
482 ns_ctx
->current_queue_depth
++;
486 task_complete(struct perf_task
*task
)
488 struct ns_worker_ctx
*ns_ctx
;
491 ns_ctx
= task
->ns_ctx
;
492 ns_ctx
->current_queue_depth
--;
493 ns_ctx
->io_completed
++;
494 tsc_diff
= spdk_get_ticks() - task
->submit_tsc
;
495 ns_ctx
->total_tsc
+= tsc_diff
;
496 if (ns_ctx
->min_tsc
> tsc_diff
) {
497 ns_ctx
->min_tsc
= tsc_diff
;
499 if (ns_ctx
->max_tsc
< tsc_diff
) {
500 ns_ctx
->max_tsc
= tsc_diff
;
503 rte_mempool_put(task_pool
, task
);
506 * is_draining indicates when time has expired for the test run
507 * and we are just waiting for the previously submitted I/O
508 * to complete. In this case, do not submit a new I/O to replace
509 * the one just completed.
511 if (!ns_ctx
->is_draining
) {
512 submit_single_io(ns_ctx
);
517 io_complete(void *ctx
, const struct spdk_nvme_cpl
*completion
)
519 task_complete((struct perf_task
*)ctx
);
523 check_io(struct ns_worker_ctx
*ns_ctx
)
526 if (ns_ctx
->entry
->type
== ENTRY_TYPE_AIO_FILE
) {
527 aio_check_io(ns_ctx
);
531 spdk_nvme_qpair_process_completions(ns_ctx
->u
.nvme
.qpair
, g_max_completions
);
536 submit_io(struct ns_worker_ctx
*ns_ctx
, int queue_depth
)
538 while (queue_depth
-- > 0) {
539 submit_single_io(ns_ctx
);
544 drain_io(struct ns_worker_ctx
*ns_ctx
)
546 ns_ctx
->is_draining
= true;
547 while (ns_ctx
->current_queue_depth
> 0) {
553 init_ns_worker_ctx(struct ns_worker_ctx
*ns_ctx
)
555 if (ns_ctx
->entry
->type
== ENTRY_TYPE_AIO_FILE
) {
557 ns_ctx
->u
.aio
.events
= calloc(g_queue_depth
, sizeof(struct io_event
));
558 if (!ns_ctx
->u
.aio
.events
) {
561 ns_ctx
->u
.aio
.ctx
= 0;
562 if (io_setup(g_queue_depth
, &ns_ctx
->u
.aio
.ctx
) < 0) {
563 free(ns_ctx
->u
.aio
.events
);
570 * TODO: If a controller has multiple namespaces, they could all use the same queue.
571 * For now, give each namespace/thread combination its own queue.
573 ns_ctx
->u
.nvme
.qpair
= spdk_nvme_ctrlr_alloc_io_qpair(ns_ctx
->entry
->u
.nvme
.ctrlr
, 0);
574 if (!ns_ctx
->u
.nvme
.qpair
) {
575 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n");
584 cleanup_ns_worker_ctx(struct ns_worker_ctx
*ns_ctx
)
586 if (ns_ctx
->entry
->type
== ENTRY_TYPE_AIO_FILE
) {
588 io_destroy(ns_ctx
->u
.aio
.ctx
);
589 free(ns_ctx
->u
.aio
.events
);
592 spdk_nvme_ctrlr_free_io_qpair(ns_ctx
->u
.nvme
.qpair
);
600 struct worker_thread
*worker
= (struct worker_thread
*)arg
;
601 struct ns_worker_ctx
*ns_ctx
= NULL
;
603 printf("Starting thread on core %u\n", worker
->lcore
);
605 /* Allocate a queue pair for each namespace. */
606 ns_ctx
= worker
->ns_ctx
;
607 while (ns_ctx
!= NULL
) {
608 if (init_ns_worker_ctx(ns_ctx
) != 0) {
609 printf("ERROR: init_ns_worker_ctx() failed\n");
612 ns_ctx
= ns_ctx
->next
;
615 tsc_end
= spdk_get_ticks() + g_time_in_sec
* g_tsc_rate
;
617 /* Submit initial I/O for each namespace. */
618 ns_ctx
= worker
->ns_ctx
;
619 while (ns_ctx
!= NULL
) {
620 submit_io(ns_ctx
, g_queue_depth
);
621 ns_ctx
= ns_ctx
->next
;
626 * Check for completed I/O for each controller. A new
627 * I/O will be submitted in the io_complete callback
628 * to replace each I/O that is completed.
630 ns_ctx
= worker
->ns_ctx
;
631 while (ns_ctx
!= NULL
) {
633 ns_ctx
= ns_ctx
->next
;
636 if (spdk_get_ticks() > tsc_end
) {
641 ns_ctx
= worker
->ns_ctx
;
642 while (ns_ctx
!= NULL
) {
644 cleanup_ns_worker_ctx(ns_ctx
);
645 ns_ctx
= ns_ctx
->next
;
651 static void usage(char *program_name
)
653 printf("%s options", program_name
);
655 printf(" [AIO device(s)]...");
658 printf("\t[-q io depth]\n");
659 printf("\t[-s io size in bytes]\n");
660 printf("\t[-w io pattern type, must be one of\n");
661 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
662 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
663 printf("\t[-l enable latency tracking, default: disabled]\n");
664 printf("\t[-t time in seconds]\n");
665 printf("\t[-c core mask for I/O submission/completion.]\n");
666 printf("\t\t(default: 1)]\n");
667 printf("\t[-r Transport ID for local PCIe NVMe or NVMeoF]\n");
668 printf("\t Format: 'key:value [key:value] ...'\n");
669 printf("\t Keys:\n");
670 printf("\t trtype Transport type (e.g. PCIe, RDMA)\n");
671 printf("\t adrfam Address family (e.g. IPv4, IPv6)\n");
672 printf("\t traddr Transport address (e.g. 0000:04:00.0 for PCIe or 192.168.100.8 for RDMA)\n");
673 printf("\t trsvcid Transport service identifier (e.g. 4420)\n");
674 printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN
);
675 printf("\t Example: -r 'trtype:PCIe traddr:0000:04:00.0' for PCIe or\n");
676 printf("\t -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n");
677 printf("\t[-d DPDK huge memory size in MB.]\n");
678 printf("\t[-m max completions per poll]\n");
679 printf("\t\t(default: 0 - unlimited)\n");
680 printf("\t[-i shared memory group ID]\n");
684 print_performance(void)
686 uint64_t total_io_completed
;
687 float io_per_second
, mb_per_second
, average_latency
, min_latency
, max_latency
;
688 float total_io_per_second
, total_mb_per_second
;
689 float sum_ave_latency
, sum_min_latency
, sum_max_latency
;
691 struct worker_thread
*worker
;
692 struct ns_worker_ctx
*ns_ctx
;
694 total_io_per_second
= 0;
695 total_mb_per_second
= 0;
696 total_io_completed
= 0;
702 printf("========================================================\n");
703 printf("%103s\n", "Latency(us)");
704 printf("%-55s: %10s %10s %10s %10s %10s\n",
705 "Device Information", "IOPS", "MB/s", "Average", "min", "max");
709 ns_ctx
= worker
->ns_ctx
;
711 io_per_second
= (float)ns_ctx
->io_completed
/ g_time_in_sec
;
712 mb_per_second
= io_per_second
* g_io_size_bytes
/ (1024 * 1024);
713 average_latency
= (float)(ns_ctx
->total_tsc
/ ns_ctx
->io_completed
) * 1000 * 1000 / g_tsc_rate
;
714 min_latency
= (float)ns_ctx
->min_tsc
* 1000 * 1000 / g_tsc_rate
;
715 max_latency
= (float)ns_ctx
->max_tsc
* 1000 * 1000 / g_tsc_rate
;
716 printf("%-43.43s from core %u: %10.2f %10.2f %10.2f %10.2f %10.2f\n",
717 ns_ctx
->entry
->name
, worker
->lcore
,
718 io_per_second
, mb_per_second
,
719 average_latency
, min_latency
, max_latency
);
720 total_io_per_second
+= io_per_second
;
721 total_mb_per_second
+= mb_per_second
;
722 total_io_completed
+= ns_ctx
->io_completed
;
723 sum_ave_latency
+= average_latency
;
724 sum_min_latency
+= min_latency
;
725 sum_max_latency
+= max_latency
;
727 ns_ctx
= ns_ctx
->next
;
729 worker
= worker
->next
;
732 assert(ns_count
!= 0);
733 printf("========================================================\n");
734 printf("%-55s: %10.2f %10.2f %10.2f %10.2f %10.2f\n",
735 "Total", total_io_per_second
, total_mb_per_second
,
736 sum_ave_latency
/ ns_count
, sum_min_latency
/ ns_count
,
737 sum_max_latency
/ ns_count
);
742 print_latency_page(struct ctrlr_entry
*entry
)
747 printf("%s\n", entry
->name
);
748 printf("--------------------------------------------------------\n");
750 for (i
= 0; i
< 32; i
++) {
751 if (entry
->latency_page
->buckets_32us
[i
])
752 printf("Bucket %dus - %dus: %d\n", i
* 32, (i
+ 1) * 32, entry
->latency_page
->buckets_32us
[i
]);
754 for (i
= 0; i
< 31; i
++) {
755 if (entry
->latency_page
->buckets_1ms
[i
])
756 printf("Bucket %dms - %dms: %d\n", i
+ 1, i
+ 2, entry
->latency_page
->buckets_1ms
[i
]);
758 for (i
= 0; i
< 31; i
++) {
759 if (entry
->latency_page
->buckets_32ms
[i
])
760 printf("Bucket %dms - %dms: %d\n", (i
+ 1) * 32, (i
+ 2) * 32,
761 entry
->latency_page
->buckets_32ms
[i
]);
766 print_latency_statistics(const char *op_name
, enum spdk_nvme_intel_log_page log_page
)
768 struct ctrlr_entry
*ctrlr
;
770 printf("%s Latency Statistics:\n", op_name
);
771 printf("========================================================\n");
772 ctrlr
= g_controllers
;
774 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr
->ctrlr
, log_page
)) {
775 if (spdk_nvme_ctrlr_cmd_get_log_page(ctrlr
->ctrlr
, log_page
, SPDK_NVME_GLOBAL_NS_TAG
,
776 ctrlr
->latency_page
, sizeof(struct spdk_nvme_intel_rw_latency_page
), 0,
777 enable_latency_tracking_complete
,
779 printf("nvme_ctrlr_cmd_get_log_page() failed\n");
783 g_outstanding_commands
++;
785 printf("Controller %s: %s latency statistics not supported\n", ctrlr
->name
, op_name
);
790 while (g_outstanding_commands
) {
791 ctrlr
= g_controllers
;
793 spdk_nvme_ctrlr_process_admin_completions(ctrlr
->ctrlr
);
798 ctrlr
= g_controllers
;
800 if (spdk_nvme_ctrlr_is_log_page_supported(ctrlr
->ctrlr
, log_page
)) {
801 print_latency_page(ctrlr
);
812 if (g_latency_tracking_enable
) {
813 if (g_rw_percentage
!= 0) {
814 print_latency_statistics("Read", SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY
);
816 if (g_rw_percentage
!= 100) {
817 print_latency_statistics("Write", SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY
);
823 unregister_trids(void)
825 struct trid_entry
*trid_entry
, *tmp
;
827 TAILQ_FOREACH_SAFE(trid_entry
, &g_trid_list
, tailq
, tmp
) {
833 add_trid(const char *trid_str
)
835 struct trid_entry
*trid_entry
;
836 struct spdk_nvme_transport_id
*trid
;
838 trid_entry
= calloc(1, sizeof(*trid_entry
));
839 if (trid_entry
== NULL
) {
843 trid
= &trid_entry
->trid
;
844 memset(trid
, 0, sizeof(*trid
));
845 trid
->trtype
= SPDK_NVME_TRANSPORT_PCIE
;
846 snprintf(trid
->subnqn
, sizeof(trid
->subnqn
), "%s", SPDK_NVMF_DISCOVERY_NQN
);
848 if (spdk_nvme_transport_id_parse(trid
, trid_str
) != 0) {
849 fprintf(stderr
, "Invalid transport ID format '%s'\n", trid_str
);
854 TAILQ_INSERT_TAIL(&g_trid_list
, trid_entry
, tailq
);
859 parse_args(int argc
, char **argv
)
861 const char *workload_type
;
863 bool mix_specified
= false;
868 workload_type
= NULL
;
870 g_rw_percentage
= -1;
872 g_max_completions
= 0;
874 while ((op
= getopt(argc
, argv
, "c:d:i:lm:q:r:s:t:w:M:")) != -1) {
877 g_core_mask
= optarg
;
880 g_dpdk_mem
= atoi(optarg
);
883 g_shm_id
= atoi(optarg
);
886 g_latency_tracking_enable
= true;
889 g_max_completions
= atoi(optarg
);
892 g_queue_depth
= atoi(optarg
);
895 if (add_trid(optarg
)) {
901 g_io_size_bytes
= atoi(optarg
);
904 g_time_in_sec
= atoi(optarg
);
907 workload_type
= optarg
;
910 g_rw_percentage
= atoi(optarg
);
911 mix_specified
= true;
919 if (!g_queue_depth
) {
923 if (!g_io_size_bytes
) {
927 if (!workload_type
) {
931 if (!g_time_in_sec
) {
936 if (strcmp(workload_type
, "read") &&
937 strcmp(workload_type
, "write") &&
938 strcmp(workload_type
, "randread") &&
939 strcmp(workload_type
, "randwrite") &&
940 strcmp(workload_type
, "rw") &&
941 strcmp(workload_type
, "randrw")) {
943 "io pattern type must be one of\n"
944 "(read, write, randread, randwrite, rw, randrw)\n");
948 if (!strcmp(workload_type
, "read") ||
949 !strcmp(workload_type
, "randread")) {
950 g_rw_percentage
= 100;
953 if (!strcmp(workload_type
, "write") ||
954 !strcmp(workload_type
, "randwrite")) {
958 if (!strcmp(workload_type
, "read") ||
959 !strcmp(workload_type
, "randread") ||
960 !strcmp(workload_type
, "write") ||
961 !strcmp(workload_type
, "randwrite")) {
963 fprintf(stderr
, "Ignoring -M option... Please use -M option"
964 " only when using rw or randrw.\n");
968 if (!strcmp(workload_type
, "rw") ||
969 !strcmp(workload_type
, "randrw")) {
970 if (g_rw_percentage
< 0 || g_rw_percentage
> 100) {
972 "-M must be specified to value from 0 to 100 "
973 "for rw or randrw.\n");
978 if (!strcmp(workload_type
, "read") ||
979 !strcmp(workload_type
, "write") ||
980 !strcmp(workload_type
, "rw")) {
986 if (TAILQ_EMPTY(&g_trid_list
)) {
987 /* If no transport IDs specified, default to enumerating all local PCIe devices */
988 add_trid("trtype:PCIe");
991 g_aio_optind
= optind
;
997 register_workers(void)
1000 struct worker_thread
*worker
;
1005 SPDK_ENV_FOREACH_CORE(i
) {
1006 worker
= calloc(1, sizeof(*worker
));
1007 if (worker
== NULL
) {
1008 fprintf(stderr
, "Unable to allocate worker\n");
1013 worker
->next
= g_workers
;
1022 unregister_workers(void)
1024 struct worker_thread
*worker
= g_workers
;
1026 /* Free namespace context and worker thread */
1028 struct worker_thread
*next_worker
= worker
->next
;
1029 struct ns_worker_ctx
*ns_ctx
= worker
->ns_ctx
;
1032 struct ns_worker_ctx
*next_ns_ctx
= ns_ctx
->next
;
1034 ns_ctx
= next_ns_ctx
;
1038 worker
= next_worker
;
1043 probe_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
1044 struct spdk_nvme_ctrlr_opts
*opts
)
1046 struct spdk_pci_addr pci_addr
;
1047 struct spdk_pci_device
*pci_dev
;
1048 struct spdk_pci_id pci_id
;
1050 if (trid
->trtype
!= SPDK_NVME_TRANSPORT_PCIE
) {
1051 printf("Attaching to NVMe over Fabrics controller at %s:%s: %s\n",
1052 trid
->traddr
, trid
->trsvcid
,
1055 if (spdk_pci_addr_parse(&pci_addr
, trid
->traddr
)) {
1059 pci_dev
= spdk_pci_get_device(&pci_addr
);
1064 pci_id
= spdk_pci_device_get_id(pci_dev
);
1066 printf("Attaching to NVMe Controller at %s [%04x:%04x]\n",
1068 pci_id
.vendor_id
, pci_id
.device_id
);
1071 opts
->io_queue_size
= g_queue_depth
+ 1;
1077 attach_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
1078 struct spdk_nvme_ctrlr
*ctrlr
, const struct spdk_nvme_ctrlr_opts
*opts
)
1080 struct spdk_pci_addr pci_addr
;
1081 struct spdk_pci_device
*pci_dev
;
1082 struct spdk_pci_id pci_id
;
1084 if (trid
->trtype
!= SPDK_NVME_TRANSPORT_PCIE
) {
1085 printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n",
1086 trid
->traddr
, trid
->trsvcid
,
1089 if (spdk_pci_addr_parse(&pci_addr
, trid
->traddr
)) {
1093 pci_dev
= spdk_pci_get_device(&pci_addr
);
1098 pci_id
= spdk_pci_device_get_id(pci_dev
);
1100 printf("Attached to NVMe Controller at %s [%04x:%04x]\n",
1102 pci_id
.vendor_id
, pci_id
.device_id
);
1105 register_ctrlr(ctrlr
);
1109 register_controllers(void)
1111 struct trid_entry
*trid_entry
;
1113 printf("Initializing NVMe Controllers\n");
1115 TAILQ_FOREACH(trid_entry
, &g_trid_list
, tailq
) {
1116 if (spdk_nvme_probe(&trid_entry
->trid
, NULL
, probe_cb
, attach_cb
, NULL
) != 0) {
1117 fprintf(stderr
, "spdk_nvme_probe() failed for transport address '%s'\n",
1118 trid_entry
->trid
.traddr
);
1127 unregister_controllers(void)
1129 struct ctrlr_entry
*entry
= g_controllers
;
1132 struct ctrlr_entry
*next
= entry
->next
;
1133 spdk_free(entry
->latency_page
);
1134 if (g_latency_tracking_enable
&&
1135 spdk_nvme_ctrlr_is_feature_supported(entry
->ctrlr
, SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING
))
1136 set_latency_tracking_feature(entry
->ctrlr
, false);
1137 spdk_nvme_detach(entry
->ctrlr
);
1144 register_aio_files(int argc
, char **argv
)
1149 /* Treat everything after the options as files for AIO */
1150 for (i
= g_aio_optind
; i
< argc
; i
++) {
1151 if (register_aio_file(argv
[i
]) != 0) {
1155 #endif /* HAVE_LIBAIO */
1161 associate_workers_with_ns(void)
1163 struct ns_entry
*entry
= g_namespaces
;
1164 struct worker_thread
*worker
= g_workers
;
1165 struct ns_worker_ctx
*ns_ctx
;
1168 count
= g_num_namespaces
> g_num_workers
? g_num_namespaces
: g_num_workers
;
1170 for (i
= 0; i
< count
; i
++) {
1171 if (entry
== NULL
) {
1175 ns_ctx
= malloc(sizeof(struct ns_worker_ctx
));
1179 memset(ns_ctx
, 0, sizeof(*ns_ctx
));
1181 printf("Associating %s with lcore %d\n", entry
->name
, worker
->lcore
);
1182 ns_ctx
->min_tsc
= UINT64_MAX
;
1183 ns_ctx
->entry
= entry
;
1184 ns_ctx
->next
= worker
->ns_ctx
;
1185 worker
->ns_ctx
= ns_ctx
;
1187 worker
= worker
->next
;
1188 if (worker
== NULL
) {
1192 entry
= entry
->next
;
1193 if (entry
== NULL
) {
1194 entry
= g_namespaces
;
1202 int main(int argc
, char **argv
)
1205 struct worker_thread
*worker
, *master_worker
;
1206 unsigned master_core
;
1207 char task_pool_name
[30];
1208 uint32_t task_count
;
1209 struct spdk_env_opts opts
;
1211 rc
= parse_args(argc
, argv
);
1216 spdk_env_opts_init(&opts
);
1218 opts
.shm_id
= g_shm_id
;
1220 opts
.core_mask
= g_core_mask
;
1224 opts
.dpdk_mem_size
= g_dpdk_mem
;
1226 spdk_env_init(&opts
);
1228 g_tsc_rate
= spdk_get_ticks_hz();
1230 if (register_workers() != 0) {
1235 if (register_aio_files(argc
, argv
) != 0) {
1240 if (register_controllers() != 0) {
1245 if (associate_workers_with_ns() != 0) {
1250 snprintf(task_pool_name
, sizeof(task_pool_name
), "task_pool_%d", getpid());
1253 * The task_count will be dynamically calculated based on the
1254 * number of attached active namespaces(aio files), queue depth
1255 * and number of cores (workers) involved in the IO operations.
1257 task_count
= g_num_namespaces
> g_num_workers
? g_num_namespaces
: g_num_workers
;
1258 task_count
*= g_queue_depth
;
1260 task_pool
= rte_mempool_create(task_pool_name
, task_count
,
1261 sizeof(struct perf_task
),
1262 0, 0, NULL
, NULL
, task_ctor
, NULL
,
1264 if (task_pool
== NULL
) {
1265 fprintf(stderr
, "could not initialize task pool\n");
1270 printf("Initialization complete. Launching workers.\n");
1272 /* Launch all of the slave workers */
1273 master_core
= rte_get_master_lcore();
1274 master_worker
= NULL
;
1276 while (worker
!= NULL
) {
1277 if (worker
->lcore
!= master_core
) {
1278 rte_eal_remote_launch(work_fn
, worker
, worker
->lcore
);
1280 assert(master_worker
== NULL
);
1281 master_worker
= worker
;
1283 worker
= worker
->next
;
1286 assert(master_worker
!= NULL
);
1287 rc
= work_fn(master_worker
);
1289 rte_eal_mp_wait_lcore();
1295 unregister_namespaces();
1296 unregister_controllers();
1297 unregister_workers();
1300 fprintf(stderr
, "%s: errors occured\n", argv
[0]);