]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/test/lib/nvme/overhead/overhead.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / test / lib / nvme / overhead / overhead.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <errno.h>
35 #include <inttypes.h>
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41
42 #include <rte_config.h>
43 #include <rte_lcore.h>
44
45 #include "spdk/barrier.h"
46 #include "spdk/fd.h"
47 #include "spdk/nvme.h"
48 #include "spdk/env.h"
49 #include "spdk/string.h"
50 #include "spdk/nvme_intel.h"
51
52 #if HAVE_LIBAIO
53 #include <libaio.h>
54 #include <sys/stat.h>
55 #include <fcntl.h>
56 #endif
57
58 struct ctrlr_entry {
59 struct spdk_nvme_ctrlr *ctrlr;
60 struct ctrlr_entry *next;
61 char name[1024];
62 };
63
64 enum entry_type {
65 ENTRY_TYPE_NVME_NS,
66 ENTRY_TYPE_AIO_FILE,
67 };
68
69 struct ns_entry {
70 enum entry_type type;
71
72 union {
73 struct {
74 struct spdk_nvme_ctrlr *ctrlr;
75 struct spdk_nvme_ns *ns;
76 struct spdk_nvme_qpair *qpair;
77 } nvme;
78 #if HAVE_LIBAIO
79 struct {
80 int fd;
81 struct io_event *events;
82 io_context_t ctx;
83 } aio;
84 #endif
85 } u;
86
87 uint32_t io_size_blocks;
88 uint64_t size_in_ios;
89 bool is_draining;
90 uint32_t current_queue_depth;
91 char name[1024];
92 };
93
94 struct perf_task {
95 void *buf;
96 uint64_t submit_tsc;
97 #if HAVE_LIBAIO
98 struct iocb iocb;
99 #endif
100 };
101
102 static struct ctrlr_entry *g_ctrlr = NULL;
103 static struct ns_entry *g_ns = NULL;
104
105 static uint64_t g_tsc_rate;
106
107 static uint32_t g_io_size_bytes;
108 static int g_time_in_sec;
109
110 static int g_aio_optind; /* Index of first AIO filename in argv */
111
112 struct perf_task *g_task;
113 uint64_t g_tsc_submit = 0;
114 uint64_t g_tsc_submit_min = UINT64_MAX;
115 uint64_t g_tsc_submit_max = 0;
116 uint64_t g_tsc_complete = 0;
117 uint64_t g_tsc_complete_min = UINT64_MAX;
118 uint64_t g_tsc_complete_max = 0;
119 uint64_t g_io_completed = 0;
120
121 static void
122 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
123 {
124 struct ns_entry *entry;
125 const struct spdk_nvme_ctrlr_data *cdata;
126
127 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
128
129 if (!spdk_nvme_ns_is_active(ns)) {
130 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
131 cdata->mn, cdata->sn,
132 spdk_nvme_ns_get_id(ns));
133 return;
134 }
135
136 if (spdk_nvme_ns_get_size(ns) < g_io_size_bytes ||
137 spdk_nvme_ns_get_sector_size(ns) > g_io_size_bytes) {
138 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
139 "ns size %" PRIu64 " / block size %u for I/O size %u\n",
140 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns),
141 spdk_nvme_ns_get_size(ns), spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes);
142 return;
143 }
144
145 entry = calloc(1, sizeof(struct ns_entry));
146 if (entry == NULL) {
147 perror("ns_entry malloc");
148 exit(1);
149 }
150
151 entry->type = ENTRY_TYPE_NVME_NS;
152 entry->u.nvme.ctrlr = ctrlr;
153 entry->u.nvme.ns = ns;
154
155 entry->size_in_ios = spdk_nvme_ns_get_size(ns) /
156 g_io_size_bytes;
157 entry->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(ns);
158
159 snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
160
161 g_ns = entry;
162 }
163
164 static void
165 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
166 {
167 int num_ns;
168 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry));
169 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr);
170
171 if (entry == NULL) {
172 perror("ctrlr_entry malloc");
173 exit(1);
174 }
175
176 snprintf(entry->name, sizeof(entry->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
177
178 entry->ctrlr = ctrlr;
179 g_ctrlr = entry;
180
181 num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
182 /* Only register the first namespace. */
183 if (num_ns < 1) {
184 fprintf(stderr, "controller found with no namespaces\n");
185 exit(1);
186 }
187
188 register_ns(ctrlr, spdk_nvme_ctrlr_get_ns(ctrlr, 1));
189 }
190
191 #if HAVE_LIBAIO
192 static int
193 register_aio_file(const char *path)
194 {
195 struct ns_entry *entry;
196
197 int fd;
198 uint64_t size;
199 uint32_t blklen;
200
201 fd = open(path, O_RDWR | O_DIRECT);
202 if (fd < 0) {
203 fprintf(stderr, "Could not open AIO device %s: %s\n", path, strerror(errno));
204 return -1;
205 }
206
207 size = spdk_fd_get_size(fd);
208 if (size == 0) {
209 fprintf(stderr, "Could not determine size of AIO device %s\n", path);
210 close(fd);
211 return -1;
212 }
213
214 blklen = spdk_fd_get_blocklen(fd);
215 if (blklen == 0) {
216 fprintf(stderr, "Could not determine block size of AIO device %s\n", path);
217 close(fd);
218 return -1;
219 }
220
221 entry = calloc(1, sizeof(struct ns_entry));
222 if (entry == NULL) {
223 close(fd);
224 perror("aio ns_entry malloc");
225 return -1;
226 }
227
228 entry->type = ENTRY_TYPE_AIO_FILE;
229 entry->u.aio.fd = fd;
230 entry->size_in_ios = size / g_io_size_bytes;
231 entry->io_size_blocks = g_io_size_bytes / blklen;
232
233 snprintf(entry->name, sizeof(entry->name), "%s", path);
234
235 g_ns = entry;
236
237 return 0;
238 }
239
240 static int
241 aio_submit(io_context_t aio_ctx, struct iocb *iocb, int fd, enum io_iocb_cmd cmd, void *buf,
242 unsigned long nbytes, uint64_t offset, void *cb_ctx)
243 {
244 iocb->aio_fildes = fd;
245 iocb->aio_reqprio = 0;
246 iocb->aio_lio_opcode = cmd;
247 iocb->u.c.buf = buf;
248 iocb->u.c.nbytes = nbytes;
249 iocb->u.c.offset = offset;
250 iocb->data = cb_ctx;
251
252 if (io_submit(aio_ctx, 1, &iocb) < 0) {
253 printf("io_submit");
254 return -1;
255 }
256
257 return 0;
258 }
259
260 static void
261 aio_check_io(void)
262 {
263 int count, i;
264 struct timespec timeout;
265
266 timeout.tv_sec = 0;
267 timeout.tv_nsec = 0;
268
269 count = io_getevents(g_ns->u.aio.ctx, 1, 1, g_ns->u.aio.events, &timeout);
270 if (count < 0) {
271 fprintf(stderr, "io_getevents error\n");
272 exit(1);
273 }
274
275 for (i = 0; i < count; i++) {
276 g_ns->current_queue_depth--;
277 }
278 }
279 #endif /* HAVE_LIBAIO */
280
281 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion);
282
283 static __thread unsigned int seed = 0;
284
285 static void
286 submit_single_io(void)
287 {
288 uint64_t offset_in_ios;
289 uint64_t start;
290 int rc;
291 struct ns_entry *entry = g_ns;
292 uint64_t tsc_submit;
293
294 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
295
296 start = spdk_get_ticks();
297 spdk_mb();
298 #if HAVE_LIBAIO
299 if (entry->type == ENTRY_TYPE_AIO_FILE) {
300 rc = aio_submit(g_ns->u.aio.ctx, &g_task->iocb, entry->u.aio.fd, IO_CMD_PREAD, g_task->buf,
301 g_io_size_bytes, offset_in_ios * g_io_size_bytes, g_task);
302 } else
303 #endif
304 {
305 rc = spdk_nvme_ns_cmd_read(entry->u.nvme.ns, g_ns->u.nvme.qpair, g_task->buf,
306 offset_in_ios * entry->io_size_blocks,
307 entry->io_size_blocks, io_complete, g_task, 0);
308 }
309
310 spdk_mb();
311 tsc_submit = spdk_get_ticks() - start;
312 g_tsc_submit += tsc_submit;
313 if (tsc_submit < g_tsc_submit_min) {
314 g_tsc_submit_min = tsc_submit;
315 }
316 if (tsc_submit > g_tsc_submit_max) {
317 g_tsc_submit_max = tsc_submit;
318 }
319
320 if (rc != 0) {
321 fprintf(stderr, "starting I/O failed\n");
322 }
323
324 g_ns->current_queue_depth++;
325 }
326
327 static void
328 io_complete(void *ctx, const struct spdk_nvme_cpl *completion)
329 {
330 g_ns->current_queue_depth--;
331 }
332
333 uint64_t g_complete_tsc_start;
334
335 static void
336 check_io(void)
337 {
338 uint64_t end, tsc_complete;
339 spdk_mb();
340 #if HAVE_LIBAIO
341 if (g_ns->type == ENTRY_TYPE_AIO_FILE) {
342 aio_check_io();
343 } else
344 #endif
345 {
346 spdk_nvme_qpair_process_completions(g_ns->u.nvme.qpair, 0);
347 }
348 spdk_mb();
349 end = spdk_get_ticks();
350 if (g_ns->current_queue_depth == 1) {
351 /*
352 * Account for race condition in AIO case where interrupt occurs
353 * after checking for queue depth. If the timestamp capture
354 * is too big compared to the last capture, assume that an
355 * interrupt fired, and do not bump the start tsc forward. This
356 * will ensure this extra time is accounted for next time through
357 * when we see current_queue_depth drop to 0.
358 */
359 if (g_ns->type == ENTRY_TYPE_NVME_NS || (end - g_complete_tsc_start) < 500) {
360 g_complete_tsc_start = end;
361 }
362 } else {
363 tsc_complete = end - g_complete_tsc_start;
364 g_tsc_complete += tsc_complete;
365 if (tsc_complete < g_tsc_complete_min) {
366 g_tsc_complete_min = tsc_complete;
367 }
368 if (tsc_complete > g_tsc_complete_max) {
369 g_tsc_complete_max = tsc_complete;
370 }
371 g_io_completed++;
372 if (!g_ns->is_draining) {
373 submit_single_io();
374 }
375 g_complete_tsc_start = spdk_get_ticks();
376 }
377 }
378
379 static void
380 drain_io(void)
381 {
382 g_ns->is_draining = true;
383 while (g_ns->current_queue_depth > 0) {
384 check_io();
385 }
386 }
387
388 static int
389 init_ns_worker_ctx(void)
390 {
391 if (g_ns->type == ENTRY_TYPE_AIO_FILE) {
392 #ifdef HAVE_LIBAIO
393 g_ns->u.aio.events = calloc(1, sizeof(struct io_event));
394 if (!g_ns->u.aio.events) {
395 return -1;
396 }
397 g_ns->u.aio.ctx = 0;
398 if (io_setup(1, &g_ns->u.aio.ctx) < 0) {
399 free(g_ns->u.aio.events);
400 perror("io_setup");
401 return -1;
402 }
403 #endif
404 } else {
405 /*
406 * TODO: If a controller has multiple namespaces, they could all use the same queue.
407 * For now, give each namespace/thread combination its own queue.
408 */
409 g_ns->u.nvme.qpair = spdk_nvme_ctrlr_alloc_io_qpair(g_ns->u.nvme.ctrlr, 0);
410 if (!g_ns->u.nvme.qpair) {
411 printf("ERROR: spdk_nvme_ctrlr_alloc_io_qpair failed\n");
412 return -1;
413 }
414 }
415
416 return 0;
417 }
418
419 static void
420 cleanup_ns_worker_ctx(void)
421 {
422 if (g_ns->type == ENTRY_TYPE_AIO_FILE) {
423 #ifdef HAVE_LIBAIO
424 io_destroy(g_ns->u.aio.ctx);
425 free(g_ns->u.aio.events);
426 #endif
427 } else {
428 spdk_nvme_ctrlr_free_io_qpair(g_ns->u.nvme.qpair);
429 }
430 }
431
432 static int
433 work_fn(void)
434 {
435 uint64_t tsc_end;
436
437 printf("Starting work_fn\n");
438
439 /* Allocate a queue pair for each namespace. */
440 if (init_ns_worker_ctx() != 0) {
441 printf("ERROR: init_ns_worker_ctx() failed\n");
442 return 1;
443 }
444
445 tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
446
447 /* Submit initial I/O for each namespace. */
448 submit_single_io();
449 g_complete_tsc_start = spdk_get_ticks();
450
451 while (1) {
452 /*
453 * Check for completed I/O for each controller. A new
454 * I/O will be submitted in the io_complete callback
455 * to replace each I/O that is completed.
456 */
457 check_io();
458
459 if (spdk_get_ticks() > tsc_end) {
460 break;
461 }
462 }
463
464 drain_io();
465 cleanup_ns_worker_ctx();
466
467 return 0;
468 }
469
470 static void usage(char *program_name)
471 {
472 printf("%s options", program_name);
473 #if HAVE_LIBAIO
474 printf(" [AIO device(s)]...");
475 #endif
476 printf("\n");
477 printf("\t[-s io size in bytes]\n");
478 printf("\t[-t time in seconds]\n");
479 printf("\t\t(default: 1)]\n");
480 }
481
482 static void
483 print_stats(void)
484 {
485 printf("g_tsc_submit = %ju\n", g_tsc_submit);
486 printf("g_tsc_complete = %ju\n", g_tsc_complete);
487 printf("g_io_completed = %ju\n", g_io_completed);
488
489 printf("submit avg, min, max = %8.1f, %ju, %ju\n",
490 (float)g_tsc_submit / g_io_completed, g_tsc_submit_min, g_tsc_submit_max);
491 printf("complete avg, min, max = %8.1f, %ju, %ju\n",
492 (float)g_tsc_complete / g_io_completed, g_tsc_complete_min, g_tsc_complete_max);
493 }
494
495 static int
496 parse_args(int argc, char **argv)
497 {
498 int op;
499
500 /* default value*/
501 g_io_size_bytes = 0;
502 g_time_in_sec = 0;
503
504 while ((op = getopt(argc, argv, "s:t:")) != -1) {
505 switch (op) {
506 case 's':
507 g_io_size_bytes = atoi(optarg);
508 break;
509 case 't':
510 g_time_in_sec = atoi(optarg);
511 break;
512 default:
513 usage(argv[0]);
514 return 1;
515 }
516 }
517
518 if (!g_io_size_bytes) {
519 usage(argv[0]);
520 return 1;
521 }
522 if (!g_time_in_sec) {
523 usage(argv[0]);
524 return 1;
525 }
526
527 g_aio_optind = optind;
528 optind = 1;
529 return 0;
530 }
531
532 static bool
533 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
534 struct spdk_nvme_ctrlr_opts *opts)
535 {
536 static uint32_t ctrlr_found = 0;
537
538 if (ctrlr_found == 1) {
539 fprintf(stderr, "only attching to one controller, so skipping\n");
540 fprintf(stderr, " controller at PCI address %s\n",
541 trid->traddr);
542 return false;
543 }
544 ctrlr_found = 1;
545
546 printf("Attaching to %s\n", trid->traddr);
547
548 return true;
549 }
550
551 static void
552 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
553 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
554 {
555 printf("Attached to %s\n", trid->traddr);
556
557 register_ctrlr(ctrlr);
558 }
559
560 static int
561 register_controllers(void)
562 {
563 printf("Initializing NVMe Controllers\n");
564
565 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
566 fprintf(stderr, "spdk_nvme_probe() failed\n");
567 return 1;
568 }
569
570 return 0;
571 }
572
573 int main(int argc, char **argv)
574 {
575 int rc;
576 struct spdk_env_opts opts;
577
578 spdk_env_opts_init(&opts);
579 opts.name = "overhead";
580 opts.core_mask = "0x1";
581 spdk_env_init(&opts);
582
583 rc = parse_args(argc, argv);
584 if (rc != 0) {
585 return rc;
586 }
587
588 g_task = spdk_zmalloc(sizeof(struct perf_task), 0, NULL);
589 if (g_task == NULL) {
590 fprintf(stderr, "g_task alloc failed\n");
591 exit(1);
592 }
593
594 g_task->buf = spdk_zmalloc(g_io_size_bytes, 0x1000, NULL);
595 if (g_task->buf == NULL) {
596 fprintf(stderr, "g_task->buf spdk_zmalloc failed\n");
597 exit(1);
598 }
599
600 g_tsc_rate = spdk_get_ticks_hz();
601
602 #if HAVE_LIBAIO
603 if (g_aio_optind < argc) {
604 printf("Measuring overhead for AIO device %s.\n", argv[g_aio_optind]);
605 if (register_aio_file(argv[g_aio_optind]) != 0) {
606 rc = -1;
607 goto cleanup;
608 }
609 } else
610 #endif
611 {
612 if (register_controllers() != 0) {
613 rc = -1;
614 goto cleanup;
615 }
616 }
617
618 printf("Initialization complete. Launching workers.\n");
619
620 rc = work_fn();
621
622 print_stats();
623
624 cleanup:
625 free(g_ns);
626 if (g_ctrlr) {
627 spdk_nvme_detach(g_ctrlr->ctrlr);
628 free(g_ctrlr);
629 }
630
631 if (rc != 0) {
632 fprintf(stderr, "%s: errors occured\n", argv[0]);
633 }
634
635 return rc;
636 }