]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/examples/nvme/abort/abort.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / examples / nvme / abort / abort.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "spdk/stdinc.h"
35
36 #include "spdk/env.h"
37 #include "spdk/log.h"
38 #include "spdk/nvme.h"
39 #include "spdk/queue.h"
40 #include "spdk/string.h"
41 #include "spdk/util.h"
42 #include "spdk/likely.h"
43
44 struct ctrlr_entry {
45 struct spdk_nvme_ctrlr *ctrlr;
46 enum spdk_nvme_transport_type trtype;
47
48 struct ctrlr_entry *next;
49 char name[1024];
50 };
51
52 struct ns_entry {
53 struct spdk_nvme_ctrlr *ctrlr;
54 struct spdk_nvme_ns *ns;
55
56 struct ns_entry *next;
57 uint32_t io_size_blocks;
58 uint32_t num_io_requests;
59 uint64_t size_in_ios;
60 uint32_t block_size;
61 char name[1024];
62 };
63
64 struct ctrlr_worker_ctx {
65 pthread_mutex_t mutex;
66 struct ctrlr_entry *entry;
67 uint64_t abort_submitted;
68 uint64_t abort_submit_failed;
69 uint64_t successful_abort;
70 uint64_t unsuccessful_abort;
71 uint64_t abort_failed;
72 uint64_t current_queue_depth;
73 struct spdk_nvme_ctrlr *ctrlr;
74 struct ctrlr_worker_ctx *next;
75 };
76
77 struct ns_worker_ctx {
78 struct ns_entry *entry;
79 uint64_t io_submitted;
80 uint64_t io_completed;
81 uint64_t io_aborted;
82 uint64_t io_failed;
83 uint64_t current_queue_depth;
84 uint64_t offset_in_ios;
85 bool is_draining;
86 struct spdk_nvme_qpair *qpair;
87 struct ctrlr_worker_ctx *ctrlr_ctx;
88 struct ns_worker_ctx *next;
89 };
90
91 struct perf_task {
92 struct ns_worker_ctx *ns_ctx;
93 void *buf;
94 };
95
96 struct worker_thread {
97 struct ns_worker_ctx *ns_ctx;
98 struct ctrlr_worker_ctx *ctrlr_ctx;
99 struct worker_thread *next;
100 unsigned lcore;
101 };
102
103 static const char *g_workload_type = "read";
104 static struct ctrlr_entry *g_controllers;
105 static struct ns_entry *g_namespaces;
106 static int g_num_namespaces;
107 static struct worker_thread *g_workers;
108 static int g_num_workers;
109 static uint32_t g_master_core;
110
111 static int g_abort_interval = 1;
112
113 static uint64_t g_tsc_rate;
114
115 static uint32_t g_io_size_bytes = 131072;
116 static uint32_t g_max_io_size_blocks;
117 static int g_rw_percentage = -1;
118 static int g_is_random;
119 static int g_queue_depth = 128;
120 static int g_time_in_sec = 3;
121 static int g_dpdk_mem;
122 static int g_shm_id = -1;
123 static bool g_no_pci;
124 static bool g_warn;
125 static bool g_mix_specified;
126
127 static const char *g_core_mask;
128
129 struct trid_entry {
130 struct spdk_nvme_transport_id trid;
131 uint16_t nsid;
132 TAILQ_ENTRY(trid_entry) tailq;
133 };
134
135 static TAILQ_HEAD(, trid_entry) g_trid_list = TAILQ_HEAD_INITIALIZER(g_trid_list);
136
137 static void io_complete(void *ctx, const struct spdk_nvme_cpl *cpl);
138
139 static int
140 build_nvme_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr)
141 {
142 const struct spdk_nvme_transport_id *trid;
143 int res = 0;
144
145 trid = spdk_nvme_ctrlr_get_transport_id(ctrlr);
146
147 switch (trid->trtype) {
148 case SPDK_NVME_TRANSPORT_PCIE:
149 res = snprintf(name, length, "PCIE (%s)", trid->traddr);
150 break;
151 case SPDK_NVME_TRANSPORT_RDMA:
152 res = snprintf(name, length, "RDMA (addr:%s subnqn:%s)", trid->traddr, trid->subnqn);
153 break;
154 case SPDK_NVME_TRANSPORT_TCP:
155 res = snprintf(name, length, "TCP (addr:%s subnqn:%s)", trid->traddr, trid->subnqn);
156 break;
157
158 default:
159 fprintf(stderr, "Unknown transport type %d\n", trid->trtype);
160 break;
161 }
162 return res;
163 }
164
165 static void
166 build_nvme_ns_name(char *name, size_t length, struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
167 {
168 int res = 0;
169
170 res = build_nvme_name(name, length, ctrlr);
171 if (res > 0) {
172 snprintf(name + res, length - res, " NSID %u", nsid);
173 }
174
175 }
176
177 static void
178 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
179 {
180 struct ns_entry *entry;
181 const struct spdk_nvme_ctrlr_data *cdata;
182 uint32_t max_xfer_size, entries, sector_size;
183 uint64_t ns_size;
184 struct spdk_nvme_io_qpair_opts opts;
185
186 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
187
188 if (!spdk_nvme_ns_is_active(ns)) {
189 printf("Controller %-20.20s (%-20.20s): Skipping inactive NS %u\n",
190 cdata->mn, cdata->sn,
191 spdk_nvme_ns_get_id(ns));
192 g_warn = true;
193 return;
194 }
195
196 ns_size = spdk_nvme_ns_get_size(ns);
197 sector_size = spdk_nvme_ns_get_sector_size(ns);
198
199 if (ns_size < g_io_size_bytes || sector_size > g_io_size_bytes) {
200 printf("WARNING: controller %-20.20s (%-20.20s) ns %u has invalid "
201 "ns size %" PRIu64 " / block size %u for I/O size %u\n",
202 cdata->mn, cdata->sn, spdk_nvme_ns_get_id(ns),
203 ns_size, spdk_nvme_ns_get_sector_size(ns), g_io_size_bytes);
204 g_warn = true;
205 return;
206 }
207
208 max_xfer_size = spdk_nvme_ns_get_max_io_xfer_size(ns);
209 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
210 /* NVMe driver may add additional entries based on
211 * stripe size and maximum transfer size, we assume
212 * 1 more entry be used for stripe.
213 */
214 entries = (g_io_size_bytes - 1) / max_xfer_size + 2;
215 if ((g_queue_depth * entries) > opts.io_queue_size) {
216 printf("controller IO queue size %u less than required\n",
217 opts.io_queue_size);
218 printf("Consider using lower queue depth or small IO size because "
219 "IO requests may be queued at the NVMe driver.\n");
220 }
221 /* For requests which have children requests, parent request itself
222 * will also occupy 1 entry.
223 */
224 entries += 1;
225
226 entry = calloc(1, sizeof(struct ns_entry));
227 if (entry == NULL) {
228 perror("ns_entry malloc");
229 exit(1);
230 }
231
232 entry->ctrlr = ctrlr;
233 entry->ns = ns;
234 entry->num_io_requests = g_queue_depth * entries;
235
236 entry->size_in_ios = ns_size / g_io_size_bytes;
237 entry->io_size_blocks = g_io_size_bytes / sector_size;
238
239 entry->block_size = spdk_nvme_ns_get_sector_size(ns);
240
241 if (g_max_io_size_blocks < entry->io_size_blocks) {
242 g_max_io_size_blocks = entry->io_size_blocks;
243 }
244
245 build_nvme_ns_name(entry->name, sizeof(entry->name), ctrlr, spdk_nvme_ns_get_id(ns));
246
247 g_num_namespaces++;
248 entry->next = g_namespaces;
249 g_namespaces = entry;
250 }
251
252 static void
253 unregister_namespaces(void)
254 {
255 struct ns_entry *entry = g_namespaces;
256
257 while (entry) {
258 struct ns_entry *next = entry->next;
259 free(entry);
260 entry = next;
261 }
262 }
263
264 static void
265 register_ctrlr(struct spdk_nvme_ctrlr *ctrlr, struct trid_entry *trid_entry)
266 {
267 struct spdk_nvme_ns *ns;
268 struct ctrlr_entry *entry = malloc(sizeof(struct ctrlr_entry));
269 uint32_t nsid;
270
271 if (entry == NULL) {
272 perror("ctrlr_entry malloc");
273 exit(1);
274 }
275
276 build_nvme_name(entry->name, sizeof(entry->name), ctrlr);
277
278 entry->ctrlr = ctrlr;
279 entry->trtype = trid_entry->trid.trtype;
280 entry->next = g_controllers;
281 g_controllers = entry;
282
283 if (trid_entry->nsid == 0) {
284 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
285 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
286 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
287 if (ns == NULL) {
288 continue;
289 }
290 register_ns(ctrlr, ns);
291 }
292 } else {
293 ns = spdk_nvme_ctrlr_get_ns(ctrlr, trid_entry->nsid);
294 if (!ns) {
295 perror("Namespace does not exist.");
296 exit(1);
297 }
298
299 register_ns(ctrlr, ns);
300 }
301 }
302
303 static void
304 abort_complete(void *ctx, const struct spdk_nvme_cpl *cpl)
305 {
306 struct ctrlr_worker_ctx *ctrlr_ctx = ctx;
307
308 ctrlr_ctx->current_queue_depth--;
309 if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) {
310 ctrlr_ctx->abort_failed++;
311 } else if ((cpl->cdw0 & 0x1) == 0) {
312 ctrlr_ctx->successful_abort++;
313 } else {
314 ctrlr_ctx->unsuccessful_abort++;
315 }
316 }
317
318 static void
319 abort_task(struct perf_task *task)
320 {
321 struct ns_worker_ctx *ns_ctx = task->ns_ctx;
322 struct ctrlr_worker_ctx *ctrlr_ctx = ns_ctx->ctrlr_ctx;
323 int rc;
324
325 /* Hold mutex to guard ctrlr_ctx->current_queue_depth. */
326 pthread_mutex_lock(&ctrlr_ctx->mutex);
327
328 rc = spdk_nvme_ctrlr_cmd_abort_ext(ctrlr_ctx->ctrlr, ns_ctx->qpair, task, abort_complete,
329 ctrlr_ctx);
330
331 if (spdk_unlikely(rc != 0)) {
332 ctrlr_ctx->abort_submit_failed++;
333 } else {
334 ctrlr_ctx->current_queue_depth++;
335 ctrlr_ctx->abort_submitted++;
336 }
337
338 pthread_mutex_unlock(&ctrlr_ctx->mutex);
339 }
340
341 static __thread unsigned int seed = 0;
342
343 static inline void
344 submit_single_io(struct perf_task *task)
345 {
346 uint64_t offset_in_ios, lba;
347 int rc;
348 struct ns_worker_ctx *ns_ctx = task->ns_ctx;
349 struct ns_entry *entry = ns_ctx->entry;
350
351 if (g_is_random) {
352 offset_in_ios = rand_r(&seed) % entry->size_in_ios;
353 } else {
354 offset_in_ios = ns_ctx->offset_in_ios++;
355 if (ns_ctx->offset_in_ios == entry->size_in_ios) {
356 ns_ctx->offset_in_ios = 0;
357 }
358 }
359
360 lba = offset_in_ios * entry->io_size_blocks;
361
362 if ((g_rw_percentage == 100) ||
363 (g_rw_percentage != 0 && (rand_r(&seed) % 100) < g_rw_percentage)) {
364 rc = spdk_nvme_ns_cmd_read(entry->ns, ns_ctx->qpair, task->buf,
365 lba, entry->io_size_blocks, io_complete, task, 0);
366 } else {
367 rc = spdk_nvme_ns_cmd_write(entry->ns, ns_ctx->qpair, task->buf,
368 lba, entry->io_size_blocks, io_complete, task, 0);
369 }
370
371 if (spdk_unlikely(rc != 0)) {
372 fprintf(stderr, "I/O submission failed\n");
373 } else {
374 ns_ctx->current_queue_depth++;
375 ns_ctx->io_submitted++;
376
377 if ((ns_ctx->io_submitted % g_abort_interval) == 0) {
378 abort_task(task);
379 }
380 }
381
382 }
383
384 static void
385 io_complete(void *ctx, const struct spdk_nvme_cpl *cpl)
386 {
387 struct perf_task *task = ctx;
388 struct ns_worker_ctx *ns_ctx = task->ns_ctx;
389
390 ns_ctx->current_queue_depth--;
391 if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) {
392 ns_ctx->io_failed++;
393 } else {
394 ns_ctx->io_completed++;
395 }
396
397 /* is_draining indicates when time has expired for the test run and we are
398 * just waiting for the previously submitted I/O to complete. In this case,
399 * do not submit a new I/O to replace the one just completed.
400 */
401 if (spdk_unlikely(ns_ctx->is_draining)) {
402 spdk_dma_free(task->buf);
403 free(task);
404 } else {
405 submit_single_io(task);
406 }
407 }
408
409 static struct perf_task *
410 allocate_task(struct ns_worker_ctx *ns_ctx)
411 {
412 struct perf_task *task;
413
414 task = calloc(1, sizeof(*task));
415 if (task == NULL) {
416 fprintf(stderr, "Failed to allocate task\n");
417 exit(1);
418 }
419
420 task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL);
421 if (task->buf == NULL) {
422 free(task);
423 fprintf(stderr, "Failed to allocate task->buf\n");
424 exit(1);
425 }
426
427 task->ns_ctx = ns_ctx;
428
429 return task;
430 }
431
432 static void
433 submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
434 {
435 struct perf_task *task;
436
437 while (queue_depth-- > 0) {
438 task = allocate_task(ns_ctx);
439 submit_single_io(task);
440 }
441 }
442
443 static int
444 work_fn(void *arg)
445 {
446 struct worker_thread *worker = (struct worker_thread *)arg;
447 struct ns_worker_ctx *ns_ctx;
448 struct ctrlr_worker_ctx *ctrlr_ctx;
449 struct ns_entry *ns_entry;
450 struct spdk_nvme_io_qpair_opts opts;
451 uint64_t tsc_end;
452 uint32_t unfinished_ctx;
453
454 /* Allocate queue pair for each namespace. */
455 ns_ctx = worker->ns_ctx;
456 while (ns_ctx != NULL) {
457 ns_entry = ns_ctx->entry;
458
459 spdk_nvme_ctrlr_get_default_io_qpair_opts(ns_entry->ctrlr, &opts, sizeof(opts));
460 if (opts.io_queue_requests < ns_entry->num_io_requests) {
461 opts.io_queue_requests = ns_entry->num_io_requests;
462 }
463
464 ns_ctx->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ns_entry->ctrlr, &opts, sizeof(opts));
465 if (ns_ctx->qpair == NULL) {
466 fprintf(stderr, "spdk_nvme_ctrlr_alloc_io_qpair failed\n");
467 return 1;
468 }
469
470 ns_ctx = ns_ctx->next;
471 }
472
473 tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate;
474
475 /* Submit initial I/O for each namespace. */
476 ns_ctx = worker->ns_ctx;
477 while (ns_ctx != NULL) {
478 submit_io(ns_ctx, g_queue_depth);
479 ns_ctx = ns_ctx->next;
480 }
481
482 while (1) {
483 ns_ctx = worker->ns_ctx;
484 while (ns_ctx != NULL) {
485 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0);
486 ns_ctx = ns_ctx->next;
487 }
488
489 if (worker->lcore == g_master_core) {
490 ctrlr_ctx = worker->ctrlr_ctx;
491 while (ctrlr_ctx) {
492 /* Hold mutex to guard ctrlr_ctx->current_queue_depth. */
493 pthread_mutex_lock(&ctrlr_ctx->mutex);
494 spdk_nvme_ctrlr_process_admin_completions(ctrlr_ctx->ctrlr);
495 pthread_mutex_unlock(&ctrlr_ctx->mutex);
496 ctrlr_ctx = ctrlr_ctx->next;
497 }
498 }
499
500 if (spdk_get_ticks() > tsc_end) {
501 break;
502 }
503 }
504
505 do {
506 unfinished_ctx = 0;
507
508 ns_ctx = worker->ns_ctx;
509 while (ns_ctx != NULL) {
510 if (!ns_ctx->is_draining) {
511 ns_ctx->is_draining = true;
512 }
513 if (ns_ctx->current_queue_depth > 0) {
514 spdk_nvme_qpair_process_completions(ns_ctx->qpair, 0);
515 if (ns_ctx->current_queue_depth == 0) {
516 spdk_nvme_ctrlr_free_io_qpair(ns_ctx->qpair);
517 } else {
518 unfinished_ctx++;
519 }
520 }
521 ns_ctx = ns_ctx->next;
522 }
523 } while (unfinished_ctx > 0);
524
525 if (worker->lcore == g_master_core) {
526 do {
527 unfinished_ctx = 0;
528
529 ctrlr_ctx = worker->ctrlr_ctx;
530 while (ctrlr_ctx != NULL) {
531 pthread_mutex_lock(&ctrlr_ctx->mutex);
532 if (ctrlr_ctx->current_queue_depth > 0) {
533 spdk_nvme_ctrlr_process_admin_completions(ctrlr_ctx->ctrlr);
534 if (ctrlr_ctx->current_queue_depth > 0) {
535 unfinished_ctx++;
536 }
537 }
538 pthread_mutex_unlock(&ctrlr_ctx->mutex);
539 ctrlr_ctx = ctrlr_ctx->next;
540 }
541 } while (unfinished_ctx > 0);
542 }
543
544 return 0;
545 }
546
547 static void
548 usage(char *program_name)
549 {
550 printf("%s options", program_name);
551
552 printf("\n");
553 printf("\t[-q io depth]\n");
554 printf("\t[-o io size in bytes]\n");
555 printf("\t[-w io pattern type, must be one of\n");
556 printf("\t\t(read, write, randread, randwrite, rw, randrw)]\n");
557 printf("\t[-M rwmixread (100 for reads, 0 for writes)]\n");
558 printf("\t[-t time in seconds]\n");
559 printf("\t[-c core mask for I/O submission/completion.]\n");
560 printf("\t\t(default: 1)\n");
561 printf("\t[-r Transport ID for local PCIe NVMe or NVMeoF]\n");
562 printf("\t Format: 'key:value [key:value] ...'\n");
563 printf("\t Keys:\n");
564 printf("\t trtype Transport type (e.g. PCIe, RDMA)\n");
565 printf("\t adrfam Address family (e.g. IPv4, IPv6)\n");
566 printf("\t traddr Transport address (e.g. 0000:04:00.0 for PCIe or 192.168.100.8 for RDMA)\n");
567 printf("\t trsvcid Transport service identifier (e.g. 4420)\n");
568 printf("\t subnqn Subsystem NQN (default: %s)\n", SPDK_NVMF_DISCOVERY_NQN);
569 printf("\t Example: -r 'trtype:PCIe traddr:0000:04:00.0' for PCIe or\n");
570 printf("\t -r 'trtype:RDMA adrfam:IPv4 traddr:192.168.100.8 trsvcid:4420' for NVMeoF\n");
571 printf("\t[-s DPDK huge memory size in MB.]\n");
572 printf("\t[-i shared memory group ID]\n");
573 printf("\t[-a abort interval.]\n");
574 printf("\t");
575 spdk_log_usage(stdout, "-T");
576 #ifdef DEBUG
577 printf("\t[-G enable debug logging]\n");
578 #else
579 printf("\t[-G enable debug logging (flag disabled, must reconfigure with --enable-debug)\n");
580 #endif
581 }
582
583 static void
584 unregister_trids(void)
585 {
586 struct trid_entry *trid_entry, *tmp;
587
588 TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, tmp) {
589 TAILQ_REMOVE(&g_trid_list, trid_entry, tailq);
590 free(trid_entry);
591 }
592 }
593
594 static int
595 add_trid(const char *trid_str)
596 {
597 struct trid_entry *trid_entry;
598 struct spdk_nvme_transport_id *trid;
599 char *ns;
600
601 trid_entry = calloc(1, sizeof(*trid_entry));
602 if (trid_entry == NULL) {
603 return -1;
604 }
605
606 trid = &trid_entry->trid;
607 trid->trtype = SPDK_NVME_TRANSPORT_PCIE;
608 snprintf(trid->subnqn, sizeof(trid->subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);
609
610 if (spdk_nvme_transport_id_parse(trid, trid_str) != 0) {
611 fprintf(stderr, "Invalid transport ID format '%s'\n", trid_str);
612 free(trid_entry);
613 return 1;
614 }
615
616 spdk_nvme_transport_id_populate_trstring(trid,
617 spdk_nvme_transport_id_trtype_str(trid->trtype));
618
619 ns = strcasestr(trid_str, "ns:");
620 if (ns) {
621 char nsid_str[6]; /* 5 digits maximum in an nsid */
622 int len;
623 int nsid;
624
625 ns += 3;
626
627 len = strcspn(ns, " \t\n");
628 if (len > 5) {
629 fprintf(stderr, "NVMe namespace IDs must be 5 digits or less\n");
630 free(trid_entry);
631 return 1;
632 }
633
634 memcpy(nsid_str, ns, len);
635 nsid_str[len] = '\0';
636
637 nsid = spdk_strtol(nsid_str, 10);
638 if (nsid <= 0 || nsid > 65535) {
639 fprintf(stderr, "NVMe namespace IDs must be less than 65536 and greater than 0\n");
640 free(trid_entry);
641 return 1;
642 }
643
644 trid_entry->nsid = (uint16_t)nsid;
645 }
646
647 TAILQ_INSERT_TAIL(&g_trid_list, trid_entry, tailq);
648 return 0;
649 }
650
651 static int
652 parse_args(int argc, char **argv)
653 {
654 int op;
655 long int val;
656 int rc;
657
658 while ((op = getopt(argc, argv, "a:c:i:o:q:r:s:t:w:M:")) != -1) {
659 switch (op) {
660 case 'a':
661 case 'i':
662 case 'o':
663 case 'q':
664 case 's':
665 case 't':
666 case 'M':
667 val = spdk_strtol(optarg, 10);
668 if (val < 0) {
669 fprintf(stderr, "Converting a string to integer failed\n");
670 return val;
671 }
672 switch (op) {
673 case 'a':
674 g_abort_interval = val;
675 break;
676 case 'i':
677 g_shm_id = val;
678 break;
679 case 'o':
680 g_io_size_bytes = val;
681 break;
682 case 'q':
683 g_queue_depth = val;
684 break;
685 case 's':
686 g_dpdk_mem = val;
687 break;
688 case 't':
689 g_time_in_sec = val;
690 break;
691 case 'M':
692 g_rw_percentage = val;
693 g_mix_specified = true;
694 break;
695 }
696 break;
697 case 'c':
698 g_core_mask = optarg;
699 break;
700 case 'r':
701 if (add_trid(optarg)) {
702 usage(argv[0]);
703 return 1;
704 }
705 break;
706 case 'w':
707 g_workload_type = optarg;
708 break;
709 case 'G':
710 #ifndef DEBUG
711 fprintf(stderr, "%s must be configured with --enable-debug for -G flag\n",
712 argv[0]);
713 usage(argv[0]);
714 return 1;
715 #else
716 spdk_log_set_flag("nvme");
717 spdk_log_set_print_level(SPDK_LOG_DEBUG);
718 break;
719 #endif
720 case 'T':
721 rc = spdk_log_set_flag(optarg);
722 if (rc < 0) {
723 fprintf(stderr, "unknown flag\n");
724 usage(argv[0]);
725 exit(EXIT_FAILURE);
726 }
727 spdk_log_set_print_level(SPDK_LOG_DEBUG);
728 #ifndef DEBUG
729 fprintf(stderr, "%s must be rebuilt with CONFIG_DEBUG=y for -T flag.\n",
730 argv[0]);
731 usage(argv[0]);
732 return 0;
733 #endif
734 break;
735 default:
736 usage(argv[0]);
737 return 1;
738 }
739 }
740
741 if (!g_queue_depth) {
742 fprintf(stderr, "missing -q (queue size) operand\n");
743 usage(argv[0]);
744 return 1;
745 }
746 if (!g_io_size_bytes) {
747 fprintf(stderr, "missing -o (block size) operand\n");
748 usage(argv[0]);
749 return 1;
750 }
751 if (!g_workload_type) {
752 fprintf(stderr, "missing -t (test time in seconds) operand\n");
753 usage(argv[0]);
754 return 1;
755 }
756
757 if (!g_time_in_sec) {
758 usage(argv[0]);
759 return 1;
760 }
761
762 if (strncmp(g_workload_type, "rand", 4) == 0) {
763 g_is_random = 1;
764 g_workload_type = &g_workload_type[4];
765 }
766
767 if (strcmp(g_workload_type, "read") == 0 || strcmp(g_workload_type, "write") == 0) {
768 g_rw_percentage = strcmp(g_workload_type, "read") == 0 ? 100 : 0;
769 if (g_mix_specified) {
770 fprintf(stderr, "Ignoring -M option... Please use -M option"
771 " only when using rw or randrw.\n");
772 }
773 } else if (strcmp(g_workload_type, "rw") == 0) {
774 if (g_rw_percentage < 0 || g_rw_percentage > 100) {
775 fprintf(stderr,
776 "-M must be specified to value from 0 to 100 "
777 "for rw or randrw.\n");
778 return 1;
779 }
780 } else {
781 fprintf(stderr,
782 "io pattern type must be one of\n"
783 "(read, write, randread, randwrite, rw, randrw)\n");
784 return 1;
785 }
786
787 if (TAILQ_EMPTY(&g_trid_list)) {
788 /* If no transport IDs specified, default to enumerating all local PCIe devices */
789 add_trid("trtype:PCIe");
790 } else {
791 struct trid_entry *trid_entry, *trid_entry_tmp;
792
793 g_no_pci = true;
794 /* check whether there is local PCIe type */
795 TAILQ_FOREACH_SAFE(trid_entry, &g_trid_list, tailq, trid_entry_tmp) {
796 if (trid_entry->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
797 g_no_pci = false;
798 break;
799 }
800 }
801 }
802
803 return 0;
804 }
805
806 static int
807 register_workers(void)
808 {
809 uint32_t i;
810 struct worker_thread *worker;
811
812 g_workers = NULL;
813 g_num_workers = 0;
814
815 SPDK_ENV_FOREACH_CORE(i) {
816 worker = calloc(1, sizeof(*worker));
817 if (worker == NULL) {
818 fprintf(stderr, "Unable to allocate worker\n");
819 return -1;
820 }
821
822 worker->lcore = i;
823 worker->next = g_workers;
824 g_workers = worker;
825 g_num_workers++;
826 }
827
828 return 0;
829 }
830
831 static void
832 unregister_workers(void)
833 {
834 struct worker_thread *worker = g_workers;
835
836 /* Free namespace context and worker thread */
837 while (worker) {
838 struct worker_thread *next_worker = worker->next;
839 struct ns_worker_ctx *ns_ctx = worker->ns_ctx;
840
841 while (ns_ctx) {
842 struct ns_worker_ctx *next_ns_ctx = ns_ctx->next;
843
844 printf("NS: %s I/O completed: %lu, failed: %lu\n",
845 ns_ctx->entry->name, ns_ctx->io_completed, ns_ctx->io_failed);
846 free(ns_ctx);
847 ns_ctx = next_ns_ctx;
848 }
849
850 struct ctrlr_worker_ctx *ctrlr_ctx = worker->ctrlr_ctx;
851
852 while (ctrlr_ctx) {
853 struct ctrlr_worker_ctx *next_ctrlr_ctx = ctrlr_ctx->next;
854
855 printf("CTRLR: %s abort submitted %lu, failed to submit %lu\n",
856 ctrlr_ctx->entry->name, ctrlr_ctx->abort_submitted,
857 ctrlr_ctx->abort_submit_failed);
858 printf("\t success %lu, unsuccess %lu, failed %lu\n",
859 ctrlr_ctx->successful_abort, ctrlr_ctx->unsuccessful_abort,
860 ctrlr_ctx->abort_failed);
861 free(ctrlr_ctx);
862 ctrlr_ctx = next_ctrlr_ctx;
863 }
864
865 free(worker);
866 worker = next_worker;
867 }
868 }
869
870 static bool
871 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
872 struct spdk_nvme_ctrlr_opts *opts)
873 {
874 return true;
875 }
876
877 static void
878 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
879 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
880 {
881 struct trid_entry *trid_entry = cb_ctx;
882 struct spdk_pci_addr pci_addr;
883 struct spdk_pci_device *pci_dev;
884 struct spdk_pci_id pci_id;
885
886 if (trid->trtype != SPDK_NVME_TRANSPORT_PCIE) {
887 printf("Attached to NVMe over Fabrics controller at %s:%s: %s\n",
888 trid->traddr, trid->trsvcid,
889 trid->subnqn);
890 } else {
891 if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) {
892 return;
893 }
894
895 pci_dev = spdk_nvme_ctrlr_get_pci_device(ctrlr);
896 if (!pci_dev) {
897 return;
898 }
899
900 pci_id = spdk_pci_device_get_id(pci_dev);
901
902 printf("Attached to NVMe Controller at %s [%04x:%04x]\n",
903 trid->traddr,
904 pci_id.vendor_id, pci_id.device_id);
905 }
906
907 register_ctrlr(ctrlr, trid_entry);
908 }
909
910 static int
911 register_controllers(void)
912 {
913 struct trid_entry *trid_entry;
914
915 printf("Initializing NVMe Controllers\n");
916
917 TAILQ_FOREACH(trid_entry, &g_trid_list, tailq) {
918 if (spdk_nvme_probe(&trid_entry->trid, trid_entry, probe_cb, attach_cb, NULL) != 0) {
919 fprintf(stderr, "spdk_nvme_probe() failed for transport address '%s'\n",
920 trid_entry->trid.traddr);
921 return -1;
922 }
923 }
924
925 return 0;
926 }
927
928 static void
929 unregister_controllers(void)
930 {
931 struct ctrlr_entry *entry = g_controllers;
932
933 while (entry) {
934 struct ctrlr_entry *next = entry->next;
935 spdk_nvme_detach(entry->ctrlr);
936 free(entry);
937 entry = next;
938 }
939 }
940
941 static int
942 associate_master_worker_with_ctrlr(void)
943 {
944 struct ctrlr_entry *entry = g_controllers;
945 struct worker_thread *worker = g_workers;
946 struct ctrlr_worker_ctx *ctrlr_ctx;
947
948 while (worker) {
949 if (worker->lcore == g_master_core) {
950 break;
951 }
952 worker = worker->next;
953 }
954
955 if (!worker) {
956 return -1;
957 }
958
959 while (entry) {
960 ctrlr_ctx = calloc(1, sizeof(struct ctrlr_worker_ctx));
961 if (!ctrlr_ctx) {
962 return -1;
963 }
964
965 pthread_mutex_init(&ctrlr_ctx->mutex, NULL);
966 ctrlr_ctx->entry = entry;
967 ctrlr_ctx->ctrlr = entry->ctrlr;
968 ctrlr_ctx->next = worker->ctrlr_ctx;
969 worker->ctrlr_ctx = ctrlr_ctx;
970
971 entry = entry->next;
972 }
973
974 return 0;
975 }
976
977 static struct ctrlr_worker_ctx *
978 get_ctrlr_worker_ctx(struct spdk_nvme_ctrlr *ctrlr)
979 {
980 struct worker_thread *worker = g_workers;
981 struct ctrlr_worker_ctx *ctrlr_ctx;
982
983 while (worker != NULL) {
984 if (worker->lcore == g_master_core) {
985 break;
986 }
987 worker = worker->next;
988 }
989
990 if (!worker) {
991 return NULL;
992 }
993
994 ctrlr_ctx = worker->ctrlr_ctx;
995
996 while (ctrlr_ctx != NULL) {
997 if (ctrlr_ctx->ctrlr == ctrlr) {
998 return ctrlr_ctx;
999 }
1000 ctrlr_ctx = ctrlr_ctx->next;
1001 }
1002
1003 return NULL;
1004 }
1005
1006 static int
1007 associate_workers_with_ns(void)
1008 {
1009 struct ns_entry *entry = g_namespaces;
1010 struct worker_thread *worker = g_workers;
1011 struct ns_worker_ctx *ns_ctx;
1012 int i, count;
1013
1014 count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers;
1015
1016 for (i = 0; i < count; i++) {
1017 if (entry == NULL) {
1018 break;
1019 }
1020
1021 ns_ctx = calloc(1, sizeof(struct ns_worker_ctx));
1022 if (!ns_ctx) {
1023 return -1;
1024 }
1025
1026 printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
1027 ns_ctx->entry = entry;
1028 ns_ctx->ctrlr_ctx = get_ctrlr_worker_ctx(entry->ctrlr);
1029 if (!ns_ctx->ctrlr_ctx) {
1030 free(ns_ctx);
1031 return -1;
1032 }
1033
1034 ns_ctx->next = worker->ns_ctx;
1035 worker->ns_ctx = ns_ctx;
1036
1037 worker = worker->next;
1038 if (worker == NULL) {
1039 worker = g_workers;
1040 }
1041
1042 entry = entry->next;
1043 if (entry == NULL) {
1044 entry = g_namespaces;
1045 }
1046 }
1047
1048 return 0;
1049 }
1050
1051 int main(int argc, char **argv)
1052 {
1053 int rc;
1054 struct worker_thread *worker, *master_worker;
1055 struct spdk_env_opts opts;
1056
1057 rc = parse_args(argc, argv);
1058 if (rc != 0) {
1059 return rc;
1060 }
1061
1062 spdk_env_opts_init(&opts);
1063 opts.name = "abort";
1064 opts.shm_id = g_shm_id;
1065 if (g_core_mask) {
1066 opts.core_mask = g_core_mask;
1067 }
1068
1069 if (g_dpdk_mem) {
1070 opts.mem_size = g_dpdk_mem;
1071 }
1072 if (g_no_pci) {
1073 opts.no_pci = g_no_pci;
1074 }
1075 if (spdk_env_init(&opts) < 0) {
1076 fprintf(stderr, "Unable to initialize SPDK env\n");
1077 rc = -1;
1078 goto cleanup;
1079 }
1080
1081 g_tsc_rate = spdk_get_ticks_hz();
1082
1083 if (register_workers() != 0) {
1084 rc = -1;
1085 goto cleanup;
1086 }
1087
1088 if (register_controllers() != 0) {
1089 rc = -1;
1090 goto cleanup;
1091 }
1092
1093 if (g_warn) {
1094 printf("WARNING: Some requested NVMe devices were skipped\n");
1095 }
1096
1097 if (g_num_namespaces == 0) {
1098 fprintf(stderr, "No valid NVMe controllers found\n");
1099 goto cleanup;
1100 }
1101
1102 if (associate_master_worker_with_ctrlr() != 0) {
1103 rc = -1;
1104 goto cleanup;
1105 }
1106
1107 if (associate_workers_with_ns() != 0) {
1108 rc = -1;
1109 goto cleanup;
1110 }
1111
1112 printf("Initialization complete. Launching workers.\n");
1113
1114 /* Launch all of the slave workers */
1115 g_master_core = spdk_env_get_current_core();
1116 master_worker = NULL;
1117 worker = g_workers;
1118 while (worker != NULL) {
1119 if (worker->lcore != g_master_core) {
1120 spdk_env_thread_launch_pinned(worker->lcore, work_fn, worker);
1121 } else {
1122 assert(master_worker == NULL);
1123 master_worker = worker;
1124 }
1125 worker = worker->next;
1126 }
1127
1128 assert(master_worker != NULL);
1129 rc = work_fn(master_worker);
1130
1131 spdk_env_thread_wait_all();
1132
1133 cleanup:
1134 unregister_trids();
1135 unregister_workers();
1136 unregister_namespaces();
1137 unregister_controllers();
1138
1139 if (rc != 0) {
1140 fprintf(stderr, "%s: errors occured\n", argv[0]);
1141 }
1142
1143 return rc;
1144 }