]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/examples/nvme/fio_plugin/fio_plugin.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / examples / nvme / fio_plugin / fio_plugin.c
CommitLineData
7c673cae
FG
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
11fdf7f2 34#include "spdk/stdinc.h"
7c673cae
FG
35
36#include "spdk/nvme.h"
37#include "spdk/env.h"
38#include "spdk/string.h"
11fdf7f2
TL
39#include "spdk/log.h"
40#include "spdk/endian.h"
41#include "spdk/crc16.h"
7c673cae
FG
42
43#include "config-host.h"
44#include "fio.h"
45#include "optgroup.h"
46
47#define NVME_IO_ALIGN 4096
11fdf7f2
TL
48#define FIO_NVME_PI_APPTAG 0x1234
49
50static bool spdk_env_initialized;
51static int spdk_enable_sgl = 0;
52static uint32_t spdk_pract_flag;
53static uint32_t spdk_prchk_flags;
54
55struct spdk_fio_options {
56 void *pad; /* off1 used in option descriptions may not be 0 */
57 int mem_size;
58 int shm_id;
59 int enable_sgl;
60 char *hostnqn;
61 int pi_act;
62 char *pi_chk;
63};
7c673cae
FG
64
65struct spdk_fio_request {
66 struct io_u *io;
11fdf7f2
TL
67 /** Offset in current iovec, fio only uses 1 vector */
68 uint32_t iov_offset;
69
70 /** Application tag and its mask for NVMe PI */
71 uint16_t appmask;
72 uint16_t apptag;
7c673cae
FG
73
74 struct spdk_fio_thread *fio_thread;
75};
76
11fdf7f2
TL
77struct spdk_fio_ctrlr {
78 struct spdk_nvme_transport_id tr_id;
79 struct spdk_nvme_ctrlr_opts opts;
80 struct spdk_nvme_ctrlr *ctrlr;
81 struct spdk_fio_ctrlr *next;
7c673cae
FG
82};
83
11fdf7f2
TL
84static struct spdk_fio_ctrlr *ctrlr_g;
85static int td_count;
86static pthread_t g_ctrlr_thread_id = 0;
87static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
88static bool g_error;
7c673cae 89
11fdf7f2
TL
90struct spdk_fio_qpair {
91 struct fio_file *f;
7c673cae 92 struct spdk_nvme_qpair *qpair;
11fdf7f2
TL
93 struct spdk_nvme_ns *ns;
94 uint32_t io_flags;
95 bool do_nvme_pi;
96 struct spdk_fio_qpair *next;
97 struct spdk_fio_ctrlr *fio_ctrlr;
7c673cae
FG
98};
99
100struct spdk_fio_thread {
101 struct thread_data *td;
102
11fdf7f2
TL
103 struct spdk_fio_qpair *fio_qpair;
104 struct spdk_fio_qpair *fio_qpair_current; // the current fio_qpair to be handled.
7c673cae
FG
105
106 struct io_u **iocq; // io completion queue
107 unsigned int iocq_count; // number of iocq entries filled by last getevents
108 unsigned int iocq_size; // number of iocq entries allocated
11fdf7f2 109 struct fio_file *current_f; // fio_file given by user
7c673cae
FG
110
111};
112
11fdf7f2
TL
113static void *
114spdk_fio_poll_ctrlrs(void *arg)
115{
116 struct spdk_fio_ctrlr *fio_ctrlr;
117 int oldstate;
118 int rc;
119
120 /* Loop until the thread is cancelled */
121 while (true) {
122 rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate);
123 if (rc != 0) {
124 SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n",
125 rc, spdk_strerror(rc));
126 }
127
128 pthread_mutex_lock(&mutex);
129 fio_ctrlr = ctrlr_g;
130
131 while (fio_ctrlr) {
132 spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr);
133 fio_ctrlr = fio_ctrlr->next;
134 }
135
136 pthread_mutex_unlock(&mutex);
137
138 rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
139 if (rc != 0) {
140 SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n",
141 rc, spdk_strerror(rc));
142 }
143
144 /* This is a pthread cancellation point and cannot be removed. */
145 sleep(1);
146 }
147
148 return NULL;
149}
150
7c673cae
FG
151static bool
152probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
153 struct spdk_nvme_ctrlr_opts *opts)
154{
11fdf7f2
TL
155 struct thread_data *td = cb_ctx;
156 struct spdk_fio_options *fio_options = td->eo;
7c673cae 157
11fdf7f2
TL
158 if (fio_options->hostnqn) {
159 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn);
7c673cae
FG
160 }
161
11fdf7f2
TL
162 return true;
163}
164
165static struct spdk_fio_ctrlr *
166get_fio_ctrlr(const struct spdk_nvme_transport_id *trid)
167{
168 struct spdk_fio_ctrlr *fio_ctrlr = ctrlr_g;
169 while (fio_ctrlr) {
170 if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) {
171 return fio_ctrlr;
7c673cae 172 }
11fdf7f2
TL
173
174 fio_ctrlr = fio_ctrlr->next;
175 }
176
177 return NULL;
178}
179
180static bool
181fio_do_nvme_pi_check(struct spdk_fio_qpair *fio_qpair)
182{
183 struct spdk_nvme_ns *ns = NULL;
184 const struct spdk_nvme_ns_data *nsdata;
185
186 ns = fio_qpair->ns;
187 nsdata = spdk_nvme_ns_get_data(ns);
188
189 if (!spdk_nvme_ns_supports_extended_lba(ns)) {
190 return false;
191 }
192
193 if (spdk_nvme_ns_get_pi_type(ns) ==
194 SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) {
195 return false;
7c673cae
FG
196 }
197
11fdf7f2
TL
198 /* PI locates at the first 8 bytes of metadata,
199 * doesn't support now
200 */
201 if (nsdata->dps.md_start) {
202 return false;
203 }
204
205 /* Controller performs PI setup and check */
206 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) {
207 return false;
208 }
209
210 /* Type3 don't support REFTAG */
211 if (spdk_nvme_ns_get_pi_type(ns) ==
212 SPDK_NVME_FMT_NVM_PROTECTION_TYPE3) {
213 return false;
214 }
215
216 return true;
7c673cae
FG
217}
218
219static void
220attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
221 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
222{
11fdf7f2 223 struct thread_data *td = cb_ctx;
7c673cae
FG
224 struct spdk_fio_thread *fio_thread = td->io_ops_data;
225 struct spdk_fio_ctrlr *fio_ctrlr;
11fdf7f2
TL
226 struct spdk_fio_qpair *fio_qpair;
227 struct spdk_nvme_ns *ns;
228 struct fio_file *f = fio_thread->current_f;
229 uint32_t ns_id;
230 char *p;
231
232 p = strstr(f->file_name, "ns=");
233 assert(p != NULL);
234 ns_id = atoi(p + 3);
235 if (!ns_id) {
236 SPDK_ERRLOG("namespace id should be >=1, but current value=0\n");
237 g_error = true;
238 return;
239 }
7c673cae 240
11fdf7f2
TL
241 fio_ctrlr = get_fio_ctrlr(trid);
242 /* it is a new ctrlr and needs to be added */
243 if (!fio_ctrlr) {
244 /* Create an fio_ctrlr and add it to the list */
245 fio_ctrlr = calloc(1, sizeof(*fio_ctrlr));
246 if (!fio_ctrlr) {
247 SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n");
248 g_error = true;
249 return;
250 }
251 fio_ctrlr->opts = *opts;
252 fio_ctrlr->ctrlr = ctrlr;
253 fio_ctrlr->tr_id = *trid;
254 fio_ctrlr->next = ctrlr_g;
255 ctrlr_g = fio_ctrlr;
256 }
7c673cae 257
11fdf7f2
TL
258 ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id);
259 if (ns == NULL) {
260 SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id);
261 g_error = true;
262 return;
263 }
7c673cae 264
11fdf7f2
TL
265 if (!spdk_nvme_ns_is_active(ns)) {
266 SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id);
267 g_error = true;
268 return;
269 }
7c673cae 270
11fdf7f2
TL
271 fio_qpair = fio_thread->fio_qpair;
272 while (fio_qpair != NULL) {
273 if ((fio_qpair->f == f) ||
274 ((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) &&
275 (spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) {
276 /* Not the error case. Avoid duplicated connection */
277 return;
7c673cae 278 }
11fdf7f2
TL
279 fio_qpair = fio_qpair->next;
280 }
281
282 /* create a new qpair */
283 fio_qpair = calloc(1, sizeof(*fio_qpair));
284 if (!fio_qpair) {
285 g_error = true;
286 SPDK_ERRLOG("Cannot allocate space for fio_qpair\n");
287 return;
288 }
289
290 fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, NULL, 0);
291 if (!fio_qpair->qpair) {
292 SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n");
293 g_error = true;
294 free(fio_qpair);
295 return;
296 }
297
298 fio_qpair->ns = ns;
299 fio_qpair->f = f;
300 fio_qpair->fio_ctrlr = fio_ctrlr;
301 fio_qpair->next = fio_thread->fio_qpair;
302 fio_thread->fio_qpair = fio_qpair;
303
304 if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) {
305 fio_qpair->io_flags = spdk_pract_flag | spdk_prchk_flags;
306 }
307
308 fio_qpair->do_nvme_pi = fio_do_nvme_pi_check(fio_qpair);
309
310 f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns);
311 if (f->real_file_size <= 0) {
312 g_error = true;
313 SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns);
314 return;
315 }
316
317 f->filetype = FIO_TYPE_BLOCK;
318 fio_file_set_size_known(f);
319}
320
321static void parse_prchk_flags(const char *prchk_str)
322{
323 if (!prchk_str) {
324 return;
325 }
326
327 if (strstr(prchk_str, "GUARD") != NULL) {
328 spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD;
329 }
330 if (strstr(prchk_str, "REFTAG") != NULL) {
331 spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG;
332 }
333 if (strstr(prchk_str, "APPTAG") != NULL) {
334 spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG;
7c673cae
FG
335 }
336}
337
338/* Called once at initialization. This is responsible for gathering the size of
339 * each "file", which in our case are in the form
11fdf7f2
TL
340 * 'key=value [key=value] ... ns=value'
341 * For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1'
342 * For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */
7c673cae
FG
343static int spdk_fio_setup(struct thread_data *td)
344{
345 struct spdk_fio_thread *fio_thread;
11fdf7f2 346 struct spdk_fio_options *fio_options = td->eo;
7c673cae 347 struct spdk_env_opts opts;
11fdf7f2
TL
348 struct fio_file *f;
349 char *p;
350 int rc = 0;
351 struct spdk_nvme_transport_id trid;
352 struct spdk_fio_ctrlr *fio_ctrlr;
353 char *trid_info;
354 unsigned int i;
7c673cae
FG
355
356 if (!td->o.use_thread) {
357 log_err("spdk: must set thread=1 when using spdk plugin\n");
358 return 1;
359 }
360
11fdf7f2
TL
361 pthread_mutex_lock(&mutex);
362
7c673cae
FG
363 fio_thread = calloc(1, sizeof(*fio_thread));
364 assert(fio_thread != NULL);
365
366 td->io_ops_data = fio_thread;
367 fio_thread->td = td;
368
369 fio_thread->iocq_size = td->o.iodepth;
370 fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *));
371 assert(fio_thread->iocq != NULL);
372
11fdf7f2
TL
373 if (!spdk_env_initialized) {
374 spdk_env_opts_init(&opts);
375 opts.name = "fio";
376 opts.mem_size = fio_options->mem_size;
377 opts.shm_id = fio_options->shm_id;
378 spdk_enable_sgl = fio_options->enable_sgl;
379 spdk_pract_flag = fio_options->pi_act;
380 parse_prchk_flags(fio_options->pi_chk);
381 if (spdk_env_init(&opts) < 0) {
382 SPDK_ERRLOG("Unable to initialize SPDK env\n");
383 free(fio_thread->iocq);
384 free(fio_thread);
385 fio_thread = NULL;
386 pthread_mutex_unlock(&mutex);
387 return 1;
388 }
389 spdk_env_initialized = true;
390 spdk_unaffinitize_thread();
7c673cae 391
11fdf7f2
TL
392 /* Spawn a thread to continue polling the controllers */
393 rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL);
394 if (rc != 0) {
395 SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n");
396 }
7c673cae
FG
397 }
398
11fdf7f2
TL
399 for_each_file(td, f, i) {
400 memset(&trid, 0, sizeof(trid));
401
402 trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
403
404 p = strstr(f->file_name, " ns=");
405 if (p == NULL) {
406 SPDK_ERRLOG("Failed to find namespace 'ns=X'\n");
407 continue;
408 }
409
410 trid_info = strndup(f->file_name, p - f->file_name);
411 if (!trid_info) {
412 SPDK_ERRLOG("Failed to allocate space for trid_info\n");
413 continue;
414 }
415
416 rc = spdk_nvme_transport_id_parse(&trid, trid_info);
417 if (rc < 0) {
418 SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info);
419 free(trid_info);
420 continue;
421 }
422 free(trid_info);
423
424 if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
425 struct spdk_pci_addr pci_addr;
426 if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) {
427 SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr);
428 continue;
429 }
430 spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr);
431 } else {
432 if (trid.subnqn[0] == '\0') {
433 snprintf(trid.subnqn, sizeof(trid.subnqn), "%s",
434 SPDK_NVMF_DISCOVERY_NQN);
435 }
436 }
437
438 fio_thread->current_f = f;
439
440 fio_ctrlr = get_fio_ctrlr(&trid);
441 if (fio_ctrlr) {
442 attach_cb(td, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts);
443 } else {
444 /* Enumerate all of the controllers */
445 if (spdk_nvme_probe(&trid, td, probe_cb, attach_cb, NULL) != 0) {
446 SPDK_ERRLOG("spdk_nvme_probe() failed\n");
447 continue;
448 }
449 }
450
451 if (g_error) {
452 log_err("Failed to initialize spdk fio plugin\n");
453 rc = 1;
454 break;
455 }
456 }
457
458 td_count++;
459
460 pthread_mutex_unlock(&mutex);
461
462 return rc;
7c673cae
FG
463}
464
465static int spdk_fio_open(struct thread_data *td, struct fio_file *f)
466{
467 return 0;
468}
469
470static int spdk_fio_close(struct thread_data *td, struct fio_file *f)
471{
472 return 0;
473}
474
475static int spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem)
476{
11fdf7f2 477 td->orig_buffer = spdk_dma_zmalloc(total_mem, NVME_IO_ALIGN, NULL);
7c673cae
FG
478 return td->orig_buffer == NULL;
479}
480
481static void spdk_fio_iomem_free(struct thread_data *td)
482{
11fdf7f2 483 spdk_dma_free(td->orig_buffer);
7c673cae
FG
484}
485
486static int spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u)
487{
488 struct spdk_fio_thread *fio_thread = td->io_ops_data;
489 struct spdk_fio_request *fio_req;
490
491 fio_req = calloc(1, sizeof(*fio_req));
492 if (fio_req == NULL) {
493 return 1;
494 }
495 fio_req->io = io_u;
496 fio_req->fio_thread = fio_thread;
497
498 io_u->engine_data = fio_req;
499
500 return 0;
501}
502
503static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u)
504{
505 struct spdk_fio_request *fio_req = io_u->engine_data;
506
507 if (fio_req) {
508 assert(fio_req->io == io_u);
509 free(fio_req);
510 io_u->engine_data = NULL;
511 }
512}
513
11fdf7f2
TL
514static void
515fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u)
516{
517 struct spdk_nvme_ns *ns = NULL;
518 struct spdk_fio_request *fio_req = io_u->engine_data;
519 struct spdk_nvme_protection_info *pi;
520 uint16_t crc16;
521 uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, lba_count;
522 uint64_t lba;
523
524 ns = fio_qpair->ns;
525
526 sector_size = spdk_nvme_ns_get_sector_size(ns);
527 md_size = spdk_nvme_ns_get_md_size(ns);
528 extended_lba_size = sector_size + md_size;
529 lba = io_u->offset / extended_lba_size;
530 lba_count = io_u->xfer_buflen / extended_lba_size;
531
532 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) {
533 /* Let's use number of lbas for application tag */
534 fio_req->appmask = 0xffff;
535 fio_req->apptag = FIO_NVME_PI_APPTAG;
536 }
537
538 for (i = 0; i < lba_count; i++) {
539 pi_offset = (extended_lba_size * (i + 1)) - 8;
540 pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset);
541 memset(pi, 0, sizeof(*pi));
542
543 if (io_u->ddir == DDIR_WRITE) {
544 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
545 /* CRC buffer should not include PI */
546 crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i,
547 extended_lba_size - 8);
548 to_be16(&pi->guard, crc16);
549 }
550 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) {
551 /* Let's use number of lbas for application tag */
552 to_be16(&pi->app_tag, FIO_NVME_PI_APPTAG);
553 }
554 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) {
555 to_be32(&pi->ref_tag, (uint32_t)lba + i);
556 }
557 }
558 }
559}
560
561static void
562fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u)
563{
564 struct spdk_nvme_ns *ns = NULL;
565 struct spdk_nvme_protection_info *pi;
566 uint16_t crc16, guard, app_tag;
567 uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, ref_tag, lba_count;
568 uint64_t lba;
569
570 ns = fio_qpair->ns;
571 sector_size = spdk_nvme_ns_get_sector_size(ns);
572 md_size = spdk_nvme_ns_get_md_size(ns);
573 extended_lba_size = sector_size + md_size;
574 lba = io_u->offset / extended_lba_size;
575 lba_count = io_u->xfer_buflen / extended_lba_size;
576
577 for (i = 0; i < lba_count; i++) {
578 pi_offset = (extended_lba_size * (i + 1)) - 8;
579 pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset);
580
581 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) {
582 /* CRC buffer should not include last 8 bytes of PI */
583 crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i,
584 extended_lba_size - 8);
585 to_be16(&guard, crc16);
586 if (pi->guard != guard) {
587 fprintf(stdout, "Get Guard Error LBA 0x%16.16"PRIx64","
588 " Expected 0x%04x but returned with 0x%04x,"
589 " may read the LBA without write it first\n",
590 lba + i, guard, pi->guard);
591 }
592
593 }
594 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) {
595 /* Previously we used the number of lbas as
596 * application tag for writes
597 */
598 to_be16(&app_tag, FIO_NVME_PI_APPTAG);
599 if (pi->app_tag != app_tag) {
600 fprintf(stdout, "Get Application Tag Error LBA 0x%16.16"PRIx64","
601 " Expected 0x%04x but returned with 0x%04x,"
602 " may read the LBA without write it first\n",
603 lba + i, app_tag, pi->app_tag);
604 }
605 }
606 if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) {
607 to_be32(&ref_tag, (uint32_t)lba + i);
608 if (pi->ref_tag != ref_tag) {
609 fprintf(stdout, "Get Reference Tag Error LBA 0x%16.16"PRIx64","
610 " Expected 0x%08x but returned with 0x%08x,"
611 " may read the LBA without write it first\n",
612 lba + i, ref_tag, pi->ref_tag);
613 }
614 }
615 }
616}
617
7c673cae
FG
618static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
619{
620 struct spdk_fio_request *fio_req = ctx;
621 struct spdk_fio_thread *fio_thread = fio_req->fio_thread;
622
11fdf7f2
TL
623 if (fio_thread->fio_qpair->do_nvme_pi) {
624 fio_extended_lba_verify_pi(fio_thread->fio_qpair, fio_req->io);
625 }
626
7c673cae
FG
627 assert(fio_thread->iocq_count < fio_thread->iocq_size);
628 fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io;
629}
630
11fdf7f2
TL
631static void
632spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset)
633{
634 struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref;
635
636 fio_req->iov_offset = sgl_offset;
637}
638
639static int
640spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length)
641{
642 struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref;
643 struct io_u *io_u = fio_req->io;
644
645 *address = io_u->buf;
646 *length = io_u->xfer_buflen;
647
648 if (fio_req->iov_offset) {
649 assert(fio_req->iov_offset <= io_u->xfer_buflen);
650 *address += fio_req->iov_offset;
651 *length -= fio_req->iov_offset;
652 }
653
654 return 0;
655}
656
657#if FIO_IOOPS_VERSION >= 24
658typedef enum fio_q_status fio_q_status_t;
659#else
660typedef int fio_q_status_t;
661#endif
662
663static fio_q_status_t
664spdk_fio_queue(struct thread_data *td, struct io_u *io_u)
7c673cae
FG
665{
666 int rc = 1;
667 struct spdk_fio_thread *fio_thread = td->io_ops_data;
668 struct spdk_fio_request *fio_req = io_u->engine_data;
11fdf7f2
TL
669 struct spdk_fio_qpair *fio_qpair;
670 struct spdk_nvme_ns *ns = NULL;
671 uint32_t block_size;
672 uint64_t lba;
673 uint32_t lba_count;
7c673cae
FG
674
675 /* Find the namespace that corresponds to the file in the io_u */
11fdf7f2
TL
676 fio_qpair = fio_thread->fio_qpair;
677 while (fio_qpair != NULL) {
678 if (fio_qpair->f == io_u->file) {
679 ns = fio_qpair->ns;
7c673cae
FG
680 break;
681 }
11fdf7f2 682 fio_qpair = fio_qpair->next;
7c673cae 683 }
11fdf7f2
TL
684 if (fio_qpair == NULL || ns == NULL) {
685 return -ENXIO;
7c673cae 686 }
7c673cae 687
11fdf7f2
TL
688 block_size = spdk_nvme_ns_get_extended_sector_size(ns);
689
690 lba = io_u->offset / block_size;
691 lba_count = io_u->xfer_buflen / block_size;
692
693 // TODO: considering situations that fio will randomize and verify io_u
694 if (fio_qpair->do_nvme_pi) {
695 fio_extended_lba_setup_pi(fio_qpair, io_u);
696 }
7c673cae
FG
697
698 switch (io_u->ddir) {
699 case DDIR_READ:
11fdf7f2
TL
700 if (!spdk_enable_sgl) {
701 rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count,
702 spdk_fio_completion_cb, fio_req,
703 fio_qpair->io_flags, fio_req->appmask, fio_req->apptag);
704 } else {
705 rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba,
706 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags,
707 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL,
708 fio_req->appmask, fio_req->apptag);
709 }
7c673cae
FG
710 break;
711 case DDIR_WRITE:
11fdf7f2
TL
712 if (!spdk_enable_sgl) {
713 rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count,
714 spdk_fio_completion_cb, fio_req,
715 fio_qpair->io_flags, fio_req->appmask, fio_req->apptag);
716 } else {
717 rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba,
718 lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags,
719 spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL,
720 fio_req->appmask, fio_req->apptag);
721 }
7c673cae
FG
722 break;
723 default:
724 assert(false);
725 break;
726 }
727
11fdf7f2
TL
728 /* NVMe read/write functions return -ENOMEM if there are no free requests. */
729 if (rc == -ENOMEM) {
730 return FIO_Q_BUSY;
731 }
7c673cae 732
11fdf7f2
TL
733 if (rc != 0) {
734 return -abs(rc);
735 }
736
737 return FIO_Q_QUEUED;
7c673cae
FG
738}
739
740static struct io_u *spdk_fio_event(struct thread_data *td, int event)
741{
742 struct spdk_fio_thread *fio_thread = td->io_ops_data;
743
744 assert(event >= 0);
745 assert((unsigned)event < fio_thread->iocq_count);
746 return fio_thread->iocq[event];
747}
748
749static int spdk_fio_getevents(struct thread_data *td, unsigned int min,
750 unsigned int max, const struct timespec *t)
751{
752 struct spdk_fio_thread *fio_thread = td->io_ops_data;
11fdf7f2 753 struct spdk_fio_qpair *fio_qpair = NULL;
7c673cae
FG
754 struct timespec t0, t1;
755 uint64_t timeout = 0;
756
757 if (t) {
758 timeout = t->tv_sec * 1000000000L + t->tv_nsec;
759 clock_gettime(CLOCK_MONOTONIC_RAW, &t0);
760 }
761
762 fio_thread->iocq_count = 0;
763
11fdf7f2
TL
764 /* fetch the next qpair */
765 if (fio_thread->fio_qpair_current) {
766 fio_qpair = fio_thread->fio_qpair_current->next;
767 }
768
7c673cae 769 for (;;) {
11fdf7f2
TL
770 if (fio_qpair == NULL) {
771 fio_qpair = fio_thread->fio_qpair;
772 }
773
774 while (fio_qpair != NULL) {
775 spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count);
7c673cae
FG
776
777 if (fio_thread->iocq_count >= min) {
11fdf7f2
TL
778 /* reset the currrent handling qpair */
779 fio_thread->fio_qpair_current = fio_qpair;
7c673cae
FG
780 return fio_thread->iocq_count;
781 }
782
11fdf7f2 783 fio_qpair = fio_qpair->next;
7c673cae
FG
784 }
785
786 if (t) {
11fdf7f2
TL
787 uint64_t elapse;
788
7c673cae 789 clock_gettime(CLOCK_MONOTONIC_RAW, &t1);
11fdf7f2
TL
790 elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L)
791 + t1.tv_nsec - t0.tv_nsec;
7c673cae
FG
792 if (elapse > timeout) {
793 break;
794 }
795 }
796 }
797
11fdf7f2
TL
798 /* reset the currrent handling qpair */
799 fio_thread->fio_qpair_current = fio_qpair;
7c673cae
FG
800 return fio_thread->iocq_count;
801}
802
803static int spdk_fio_invalidate(struct thread_data *td, struct fio_file *f)
804{
805 /* TODO: This should probably send a flush to the device, but for now just return successful. */
806 return 0;
807}
808
809static void spdk_fio_cleanup(struct thread_data *td)
810{
811 struct spdk_fio_thread *fio_thread = td->io_ops_data;
11fdf7f2
TL
812 struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp;
813
814 fio_qpair = fio_thread->fio_qpair;
815 while (fio_qpair != NULL) {
816 spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair);
817 fio_qpair_tmp = fio_qpair->next;
818 free(fio_qpair);
819 fio_qpair = fio_qpair_tmp;
7c673cae
FG
820 }
821
822 free(fio_thread);
11fdf7f2
TL
823
824 pthread_mutex_lock(&mutex);
825 td_count--;
826 if (td_count == 0) {
827 struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp;
828
829 fio_ctrlr = ctrlr_g;
830 while (fio_ctrlr != NULL) {
831 spdk_nvme_detach(fio_ctrlr->ctrlr);
832 fio_ctrlr_tmp = fio_ctrlr->next;
833 free(fio_ctrlr);
834 fio_ctrlr = fio_ctrlr_tmp;
835 }
836 ctrlr_g = NULL;
837 }
838 pthread_mutex_unlock(&mutex);
839 if (!ctrlr_g) {
840 if (pthread_cancel(g_ctrlr_thread_id) == 0) {
841 pthread_join(g_ctrlr_thread_id, NULL);
842 }
843 }
7c673cae
FG
844}
845
11fdf7f2
TL
846/* This function enables addition of SPDK parameters to the fio config
847 * Adding new parameters by defining them here and defining a callback
848 * function to read the parameter value. */
849static struct fio_option options[] = {
850 {
851 .name = "mem_size_mb",
852 .lname = "Memory size in MB",
853 .type = FIO_OPT_INT,
854 .off1 = offsetof(struct spdk_fio_options, mem_size),
855 .def = "512",
856 .help = "Memory Size for SPDK (MB)",
857 .category = FIO_OPT_C_ENGINE,
858 .group = FIO_OPT_G_INVALID,
859 },
860 {
861 .name = "shm_id",
862 .lname = "shared memory ID",
863 .type = FIO_OPT_INT,
864 .off1 = offsetof(struct spdk_fio_options, shm_id),
865 .def = "-1",
866 .help = "Shared Memory ID",
867 .category = FIO_OPT_C_ENGINE,
868 .group = FIO_OPT_G_INVALID,
869 },
870 {
871 .name = "enable_sgl",
872 .lname = "SGL used for I/O commands",
873 .type = FIO_OPT_INT,
874 .off1 = offsetof(struct spdk_fio_options, enable_sgl),
875 .def = "0",
876 .help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)",
877 .category = FIO_OPT_C_ENGINE,
878 .group = FIO_OPT_G_INVALID,
879 },
880 {
881 .name = "hostnqn",
882 .lname = "Host NQN to use when connecting to controllers.",
883 .type = FIO_OPT_STR_STORE,
884 .off1 = offsetof(struct spdk_fio_options, hostnqn),
885 .help = "Host NQN",
886 .category = FIO_OPT_C_ENGINE,
887 .group = FIO_OPT_G_INVALID,
888 },
889 {
890 .name = "pi_act",
891 .lname = "Protection Information Action",
892 .type = FIO_OPT_INT,
893 .off1 = offsetof(struct spdk_fio_options, pi_act),
894 .def = "1",
895 .help = "Protection Information Action bit (pi_act=1 or pi_act=0)",
896 .category = FIO_OPT_C_ENGINE,
897 .group = FIO_OPT_G_INVALID,
898 },
899 {
900 .name = "pi_chk",
901 .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)",
902 .type = FIO_OPT_STR_STORE,
903 .off1 = offsetof(struct spdk_fio_options, pi_chk),
904 .def = NULL,
905 .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)",
906 .category = FIO_OPT_C_ENGINE,
907 .group = FIO_OPT_G_INVALID,
908 },
909 {
910 .name = NULL,
911 },
912};
913
7c673cae
FG
914/* FIO imports this structure using dlsym */
915struct ioengine_ops ioengine = {
11fdf7f2 916 .name = "spdk",
7c673cae
FG
917 .version = FIO_IOOPS_VERSION,
918 .queue = spdk_fio_queue,
919 .getevents = spdk_fio_getevents,
920 .event = spdk_fio_event,
921 .cleanup = spdk_fio_cleanup,
922 .open_file = spdk_fio_open,
923 .close_file = spdk_fio_close,
924 .invalidate = spdk_fio_invalidate,
925 .iomem_alloc = spdk_fio_iomem_alloc,
926 .iomem_free = spdk_fio_iomem_free,
927 .setup = spdk_fio_setup,
928 .io_u_init = spdk_fio_io_u_init,
929 .io_u_free = spdk_fio_io_u_free,
930 .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN,
11fdf7f2
TL
931 .options = options,
932 .option_struct_size = sizeof(struct spdk_fio_options),
7c673cae 933};
11fdf7f2
TL
934
935static void fio_init fio_spdk_register(void)
936{
937 register_ioengine(&ioengine);
938}
939
940static void fio_exit fio_spdk_unregister(void)
941{
942 unregister_ioengine(&ioengine);
943}