]>
git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/test/lib/nvme/overhead/overhead.c
4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 #include <rte_config.h>
43 #include <rte_lcore.h>
45 #include "spdk/barrier.h"
47 #include "spdk/nvme.h"
49 #include "spdk/string.h"
50 #include "spdk/nvme_intel.h"
59 struct spdk_nvme_ctrlr
*ctrlr
;
60 struct ctrlr_entry
*next
;
74 struct spdk_nvme_ctrlr
*ctrlr
;
75 struct spdk_nvme_ns
*ns
;
76 struct spdk_nvme_qpair
*qpair
;
81 struct io_event
*events
;
87 uint32_t io_size_blocks
;
90 uint32_t current_queue_depth
;
102 static struct ctrlr_entry
*g_ctrlr
= NULL
;
103 static struct ns_entry
*g_ns
= NULL
;
105 static uint64_t g_tsc_rate
;
107 static uint32_t g_io_size_bytes
;
108 static int g_time_in_sec
;
110 static int g_aio_optind
; /* Index of first AIO filename in argv */
112 struct perf_task
*g_task
;
113 uint64_t g_tsc_submit
= 0;
114 uint64_t g_tsc_submit_min
= UINT64_MAX
;
115 uint64_t g_tsc_submit_max
= 0;
116 uint64_t g_tsc_complete
= 0;
117 uint64_t g_tsc_complete_min
= UINT64_MAX
;
118 uint64_t g_tsc_complete_max
= 0;
119 uint64_t g_io_completed
= 0;
122 register_ns(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_ns
*ns
)
124 struct ns_entry
*entry
;
125 const struct spdk_nvme_ctrlr_data
*cdata
;
127 cdata
= spdk_nvme_ctrlr_get_data(ctrlr
);
129 if (!spdk_nvme_ns_is_active(ns
)) {
130 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
131 cdata
->mn
, cdata
->sn
,
132 spdk_nvme_ns_get_id(ns
));
136 if (spdk_nvme_ns_get_size(ns
) < g_io_size_bytes
||
137 spdk_nvme_ns_get_sector_size(ns
) > g_io_size_bytes
) {
138 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
139 "ns size %" PRIu64
" / block size %u for I/O size %u\n",
140 cdata
->mn
, cdata
->sn
, spdk_nvme_ns_get_id(ns
),
141 spdk_nvme_ns_get_size(ns
), spdk_nvme_ns_get_sector_size(ns
), g_io_size_bytes
);
145 entry
= calloc(1, sizeof(struct ns_entry
));
147 perror("ns_entry malloc");
151 entry
->type
= ENTRY_TYPE_NVME_NS
;
152 entry
->u
.nvme
.ctrlr
= ctrlr
;
153 entry
->u
.nvme
.ns
= ns
;
155 entry
->size_in_ios
= spdk_nvme_ns_get_size(ns
) /
157 entry
->io_size_blocks
= g_io_size_bytes
/ spdk_nvme_ns_get_sector_size(ns
);
159 snprintf(entry
->name
, 44, "%-20.20s (%-20.20s)", cdata
->mn
, cdata
->sn
);
165 register_ctrlr(struct spdk_nvme_ctrlr
*ctrlr
)
168 struct ctrlr_entry
*entry
= malloc(sizeof(struct ctrlr_entry
));
169 const struct spdk_nvme_ctrlr_data
*cdata
= spdk_nvme_ctrlr_get_data(ctrlr
);
172 perror("ctrlr_entry malloc");
176 snprintf(entry
->name
, sizeof(entry
->name
), "%-20.20s (%-20.20s)", cdata
->mn
, cdata
->sn
);
178 entry
->ctrlr
= ctrlr
;
181 num_ns
= spdk_nvme_ctrlr_get_num_ns(ctrlr
);
182 /* Only register the first namespace. */
184 fprintf(stderr
, "controller found with no namespaces\n");
188 register_ns(ctrlr
, spdk_nvme_ctrlr_get_ns(ctrlr
, 1));
193 register_aio_file(const char *path
)
195 struct ns_entry
*entry
;
201 fd
= open(path
, O_RDWR
| O_DIRECT
);
203 fprintf(stderr
, "Could not open AIO device %s: %s\n", path
, strerror(errno
));
207 size
= spdk_fd_get_size(fd
);
209 fprintf(stderr
, "Could not determine size of AIO device %s\n", path
);
214 blklen
= spdk_fd_get_blocklen(fd
);
216 fprintf(stderr
, "Could not determine block size of AIO device %s\n", path
);
221 entry
= calloc(1, sizeof(struct ns_entry
));
224 perror("aio ns_entry malloc");
228 entry
->type
= ENTRY_TYPE_AIO_FILE
;
229 entry
->u
.aio
.fd
= fd
;
230 entry
->size_in_ios
= size
/ g_io_size_bytes
;
231 entry
->io_size_blocks
= g_io_size_bytes
/ blklen
;
233 snprintf(entry
->name
, sizeof(entry
->name
), "%s", path
);
241 aio_submit(io_context_t aio_ctx
, struct iocb
*iocb
, int fd
, enum io_iocb_cmd cmd
, void *buf
,
242 unsigned long nbytes
, uint64_t offset
, void *cb_ctx
)
244 iocb
->aio_fildes
= fd
;
245 iocb
->aio_reqprio
= 0;
246 iocb
->aio_lio_opcode
= cmd
;
248 iocb
->u
.c
.nbytes
= nbytes
;
249 iocb
->u
.c
.offset
= offset
;
252 if (io_submit(aio_ctx
, 1, &iocb
) < 0) {
264 struct timespec timeout
;
269 count
= io_getevents(g_ns
->u
.aio
.ctx
, 1, 1, g_ns
->u
.aio
.events
, &timeout
);
271 fprintf(stderr
, "io_getevents error\n");
275 for (i
= 0; i
< count
; i
++) {
276 g_ns
->current_queue_depth
--;
279 #endif /* HAVE_LIBAIO */
281 static void io_complete(void *ctx
, const struct spdk_nvme_cpl
*completion
);
283 static __thread
unsigned int seed
= 0;
286 submit_single_io(void)
288 uint64_t offset_in_ios
;
291 struct ns_entry
*entry
= g_ns
;
294 offset_in_ios
= rand_r(&seed
) % entry
->size_in_ios
;
296 start
= spdk_get_ticks();
299 if (entry
->type
== ENTRY_TYPE_AIO_FILE
) {
300 rc
= aio_submit(g_ns
->u
.aio
.ctx
, &g_task
->iocb
, entry
->u
.aio
.fd
, IO_CMD_PREAD
, g_task
->buf
,
301 g_io_size_bytes
, offset_in_ios
* g_io_size_bytes
, g_task
);
305 rc
= spdk_nvme_ns_cmd_read(entry
->u
.nvme
.ns
, g_ns
->u
.nvme
.qpair
, g_task
->buf
,
306 offset_in_ios
* entry
->io_size_blocks
,
307 entry
->io_size_blocks
, io_complete
, g_task
, 0);
311 tsc_submit
= spdk_get_ticks() - start
;
312 g_tsc_submit
+= tsc_submit
;
313 if (tsc_submit
< g_tsc_submit_min
) {
314 g_tsc_submit_min
= tsc_submit
;
316 if (tsc_submit
> g_tsc_submit_max
) {
317 g_tsc_submit_max
= tsc_submit
;
321 fprintf(stderr
, "starting I/O failed\n");
324 g_ns
->current_queue_depth
++;
328 io_complete(void *ctx
, const struct spdk_nvme_cpl
*completion
)
330 g_ns
->current_queue_depth
--;
333 uint64_t g_complete_tsc_start
;
338 uint64_t end
, tsc_complete
;
341 if (g_ns
->type
== ENTRY_TYPE_AIO_FILE
) {
346 spdk_nvme_qpair_process_completions(g_ns
->u
.nvme
.qpair
, 0);
349 end
= spdk_get_ticks();
350 if (g_ns
->current_queue_depth
== 1) {
352 * Account for race condition in AIO case where interrupt occurs
353 * after checking for queue depth. If the timestamp capture
354 * is too big compared to the last capture, assume that an
355 * interrupt fired, and do not bump the start tsc forward. This
356 * will ensure this extra time is accounted for next time through
357 * when we see current_queue_depth drop to 0.
359 if (g_ns
->type
== ENTRY_TYPE_NVME_NS
|| (end
- g_complete_tsc_start
) < 500) {
360 g_complete_tsc_start
= end
;
363 tsc_complete
= end
- g_complete_tsc_start
;
364 g_tsc_complete
+= tsc_complete
;
365 if (tsc_complete
< g_tsc_complete_min
) {
366 g_tsc_complete_min
= tsc_complete
;
368 if (tsc_complete
> g_tsc_complete_max
) {
369 g_tsc_complete_max
= tsc_complete
;
372 if (!g_ns
->is_draining
) {
375 g_complete_tsc_start
= spdk_get_ticks();
382 g_ns
->is_draining
= true;
383 while (g_ns
->current_queue_depth
> 0) {
389 init_ns_worker_ctx(void)
391 if (g_ns
->type
== ENTRY_TYPE_AIO_FILE
) {
393 g_ns
->u
.aio
.events
= calloc(1, sizeof(struct io_event
));
394 if (!g_ns
->u
.aio
.events
) {
398 if (io_setup(1, &g_ns
->u
.aio
.ctx
) < 0) {
399 free(g_ns
->u
.aio
.events
);
406 * TODO: If a controller has multiple namespaces, they could all use the same queue.
407 * For now, give each namespace/thread combination its own queue.
409 g_ns
->u
.nvme
.qpair
= spdk_nvme_ctrlr_alloc_io_qpair(g_ns
->u
.nvme
.ctrlr
, 0);
410 if (!g_ns
->u
.nvme
.qpair
) {
411 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n");
420 cleanup_ns_worker_ctx(void)
422 if (g_ns
->type
== ENTRY_TYPE_AIO_FILE
) {
424 io_destroy(g_ns
->u
.aio
.ctx
);
425 free(g_ns
->u
.aio
.events
);
428 spdk_nvme_ctrlr_free_io_qpair(g_ns
->u
.nvme
.qpair
);
437 printf("Starting work_fn\n");
439 /* Allocate a queue pair for each namespace. */
440 if (init_ns_worker_ctx() != 0) {
441 printf("ERROR: init_ns_worker_ctx() failed\n");
445 tsc_end
= spdk_get_ticks() + g_time_in_sec
* g_tsc_rate
;
447 /* Submit initial I/O for each namespace. */
449 g_complete_tsc_start
= spdk_get_ticks();
453 * Check for completed I/O for each controller. A new
454 * I/O will be submitted in the io_complete callback
455 * to replace each I/O that is completed.
459 if (spdk_get_ticks() > tsc_end
) {
465 cleanup_ns_worker_ctx();
470 static void usage(char *program_name
)
472 printf("%s options", program_name
);
474 printf(" [AIO device(s)]...");
477 printf("\t[-s io size in bytes]\n");
478 printf("\t[-t time in seconds]\n");
479 printf("\t\t(default: 1)]\n");
485 printf("g_tsc_submit = %ju\n", g_tsc_submit
);
486 printf("g_tsc_complete = %ju\n", g_tsc_complete
);
487 printf("g_io_completed = %ju\n", g_io_completed
);
489 printf("submit avg, min, max = %8.1f, %ju, %ju\n",
490 (float)g_tsc_submit
/ g_io_completed
, g_tsc_submit_min
, g_tsc_submit_max
);
491 printf("complete avg, min, max = %8.1f, %ju, %ju\n",
492 (float)g_tsc_complete
/ g_io_completed
, g_tsc_complete_min
, g_tsc_complete_max
);
496 parse_args(int argc
, char **argv
)
504 while ((op
= getopt(argc
, argv
, "s:t:")) != -1) {
507 g_io_size_bytes
= atoi(optarg
);
510 g_time_in_sec
= atoi(optarg
);
518 if (!g_io_size_bytes
) {
522 if (!g_time_in_sec
) {
527 g_aio_optind
= optind
;
533 probe_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
534 struct spdk_nvme_ctrlr_opts
*opts
)
536 static uint32_t ctrlr_found
= 0;
538 if (ctrlr_found
== 1) {
539 fprintf(stderr
, "only attching to one controller, so skipping\n");
540 fprintf(stderr
, " controller at PCI address %s\n",
546 printf("Attaching to %s\n", trid
->traddr
);
552 attach_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
553 struct spdk_nvme_ctrlr
*ctrlr
, const struct spdk_nvme_ctrlr_opts
*opts
)
555 printf("Attached to %s\n", trid
->traddr
);
557 register_ctrlr(ctrlr
);
561 register_controllers(void)
563 printf("Initializing NVMe Controllers\n");
565 if (spdk_nvme_probe(NULL
, NULL
, probe_cb
, attach_cb
, NULL
) != 0) {
566 fprintf(stderr
, "spdk_nvme_probe() failed\n");
573 int main(int argc
, char **argv
)
576 struct spdk_env_opts opts
;
578 spdk_env_opts_init(&opts
);
579 opts
.name
= "overhead";
580 opts
.core_mask
= "0x1";
581 spdk_env_init(&opts
);
583 rc
= parse_args(argc
, argv
);
588 g_task
= spdk_zmalloc(sizeof(struct perf_task
), 0, NULL
);
589 if (g_task
== NULL
) {
590 fprintf(stderr
, "g_task alloc failed\n");
594 g_task
->buf
= spdk_zmalloc(g_io_size_bytes
, 0x1000, NULL
);
595 if (g_task
->buf
== NULL
) {
596 fprintf(stderr
, "g_task->buf spdk_zmalloc failed\n");
600 g_tsc_rate
= spdk_get_ticks_hz();
603 if (g_aio_optind
< argc
) {
604 printf("Measuring overhead for AIO device %s.\n", argv
[g_aio_optind
]);
605 if (register_aio_file(argv
[g_aio_optind
]) != 0) {
612 if (register_controllers() != 0) {
618 printf("Initialization complete. Launching workers.\n");
627 spdk_nvme_detach(g_ctrlr
->ctrlr
);
632 fprintf(stderr
, "%s: errors occured\n", argv
[0]);