]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright (c) Intel Corporation. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
11fdf7f2 | 34 | #include "spdk/stdinc.h" |
7c673cae FG |
35 | |
36 | #include "spdk/nvme.h" | |
37 | #include "spdk/env.h" | |
38 | #include "spdk/string.h" | |
11fdf7f2 TL |
39 | #include "spdk/log.h" |
40 | #include "spdk/endian.h" | |
41 | #include "spdk/crc16.h" | |
7c673cae FG |
42 | |
43 | #include "config-host.h" | |
44 | #include "fio.h" | |
45 | #include "optgroup.h" | |
46 | ||
47 | #define NVME_IO_ALIGN 4096 | |
11fdf7f2 TL |
48 | #define FIO_NVME_PI_APPTAG 0x1234 |
49 | ||
50 | static bool spdk_env_initialized; | |
51 | static int spdk_enable_sgl = 0; | |
52 | static uint32_t spdk_pract_flag; | |
53 | static uint32_t spdk_prchk_flags; | |
54 | ||
55 | struct spdk_fio_options { | |
56 | void *pad; /* off1 used in option descriptions may not be 0 */ | |
57 | int mem_size; | |
58 | int shm_id; | |
59 | int enable_sgl; | |
60 | char *hostnqn; | |
61 | int pi_act; | |
62 | char *pi_chk; | |
63 | }; | |
7c673cae FG |
64 | |
65 | struct spdk_fio_request { | |
66 | struct io_u *io; | |
11fdf7f2 TL |
67 | /** Offset in current iovec, fio only uses 1 vector */ |
68 | uint32_t iov_offset; | |
69 | ||
70 | /** Application tag and its mask for NVMe PI */ | |
71 | uint16_t appmask; | |
72 | uint16_t apptag; | |
7c673cae FG |
73 | |
74 | struct spdk_fio_thread *fio_thread; | |
75 | }; | |
76 | ||
11fdf7f2 TL |
77 | struct spdk_fio_ctrlr { |
78 | struct spdk_nvme_transport_id tr_id; | |
79 | struct spdk_nvme_ctrlr_opts opts; | |
80 | struct spdk_nvme_ctrlr *ctrlr; | |
81 | struct spdk_fio_ctrlr *next; | |
7c673cae FG |
82 | }; |
83 | ||
11fdf7f2 TL |
84 | static struct spdk_fio_ctrlr *ctrlr_g; |
85 | static int td_count; | |
86 | static pthread_t g_ctrlr_thread_id = 0; | |
87 | static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; | |
88 | static bool g_error; | |
7c673cae | 89 | |
11fdf7f2 TL |
90 | struct spdk_fio_qpair { |
91 | struct fio_file *f; | |
7c673cae | 92 | struct spdk_nvme_qpair *qpair; |
11fdf7f2 TL |
93 | struct spdk_nvme_ns *ns; |
94 | uint32_t io_flags; | |
95 | bool do_nvme_pi; | |
96 | struct spdk_fio_qpair *next; | |
97 | struct spdk_fio_ctrlr *fio_ctrlr; | |
7c673cae FG |
98 | }; |
99 | ||
100 | struct spdk_fio_thread { | |
101 | struct thread_data *td; | |
102 | ||
11fdf7f2 TL |
103 | struct spdk_fio_qpair *fio_qpair; |
104 | struct spdk_fio_qpair *fio_qpair_current; // the current fio_qpair to be handled. | |
7c673cae FG |
105 | |
106 | struct io_u **iocq; // io completion queue | |
107 | unsigned int iocq_count; // number of iocq entries filled by last getevents | |
108 | unsigned int iocq_size; // number of iocq entries allocated | |
11fdf7f2 | 109 | struct fio_file *current_f; // fio_file given by user |
7c673cae FG |
110 | |
111 | }; | |
112 | ||
11fdf7f2 TL |
113 | static void * |
114 | spdk_fio_poll_ctrlrs(void *arg) | |
115 | { | |
116 | struct spdk_fio_ctrlr *fio_ctrlr; | |
117 | int oldstate; | |
118 | int rc; | |
119 | ||
120 | /* Loop until the thread is cancelled */ | |
121 | while (true) { | |
122 | rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate); | |
123 | if (rc != 0) { | |
124 | SPDK_ERRLOG("Unable to set cancel state disabled on g_init_thread (%d): %s\n", | |
125 | rc, spdk_strerror(rc)); | |
126 | } | |
127 | ||
128 | pthread_mutex_lock(&mutex); | |
129 | fio_ctrlr = ctrlr_g; | |
130 | ||
131 | while (fio_ctrlr) { | |
132 | spdk_nvme_ctrlr_process_admin_completions(fio_ctrlr->ctrlr); | |
133 | fio_ctrlr = fio_ctrlr->next; | |
134 | } | |
135 | ||
136 | pthread_mutex_unlock(&mutex); | |
137 | ||
138 | rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate); | |
139 | if (rc != 0) { | |
140 | SPDK_ERRLOG("Unable to set cancel state enabled on g_init_thread (%d): %s\n", | |
141 | rc, spdk_strerror(rc)); | |
142 | } | |
143 | ||
144 | /* This is a pthread cancellation point and cannot be removed. */ | |
145 | sleep(1); | |
146 | } | |
147 | ||
148 | return NULL; | |
149 | } | |
150 | ||
7c673cae FG |
151 | static bool |
152 | probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, | |
153 | struct spdk_nvme_ctrlr_opts *opts) | |
154 | { | |
11fdf7f2 TL |
155 | struct thread_data *td = cb_ctx; |
156 | struct spdk_fio_options *fio_options = td->eo; | |
7c673cae | 157 | |
11fdf7f2 TL |
158 | if (fio_options->hostnqn) { |
159 | snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", fio_options->hostnqn); | |
7c673cae FG |
160 | } |
161 | ||
11fdf7f2 TL |
162 | return true; |
163 | } | |
164 | ||
165 | static struct spdk_fio_ctrlr * | |
166 | get_fio_ctrlr(const struct spdk_nvme_transport_id *trid) | |
167 | { | |
168 | struct spdk_fio_ctrlr *fio_ctrlr = ctrlr_g; | |
169 | while (fio_ctrlr) { | |
170 | if (spdk_nvme_transport_id_compare(trid, &fio_ctrlr->tr_id) == 0) { | |
171 | return fio_ctrlr; | |
7c673cae | 172 | } |
11fdf7f2 TL |
173 | |
174 | fio_ctrlr = fio_ctrlr->next; | |
175 | } | |
176 | ||
177 | return NULL; | |
178 | } | |
179 | ||
180 | static bool | |
181 | fio_do_nvme_pi_check(struct spdk_fio_qpair *fio_qpair) | |
182 | { | |
183 | struct spdk_nvme_ns *ns = NULL; | |
184 | const struct spdk_nvme_ns_data *nsdata; | |
185 | ||
186 | ns = fio_qpair->ns; | |
187 | nsdata = spdk_nvme_ns_get_data(ns); | |
188 | ||
189 | if (!spdk_nvme_ns_supports_extended_lba(ns)) { | |
190 | return false; | |
191 | } | |
192 | ||
193 | if (spdk_nvme_ns_get_pi_type(ns) == | |
194 | SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { | |
195 | return false; | |
7c673cae FG |
196 | } |
197 | ||
11fdf7f2 TL |
198 | /* PI locates at the first 8 bytes of metadata, |
199 | * doesn't support now | |
200 | */ | |
201 | if (nsdata->dps.md_start) { | |
202 | return false; | |
203 | } | |
204 | ||
205 | /* Controller performs PI setup and check */ | |
206 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { | |
207 | return false; | |
208 | } | |
209 | ||
210 | /* Type3 don't support REFTAG */ | |
211 | if (spdk_nvme_ns_get_pi_type(ns) == | |
212 | SPDK_NVME_FMT_NVM_PROTECTION_TYPE3) { | |
213 | return false; | |
214 | } | |
215 | ||
216 | return true; | |
7c673cae FG |
217 | } |
218 | ||
219 | static void | |
220 | attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, | |
221 | struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) | |
222 | { | |
11fdf7f2 | 223 | struct thread_data *td = cb_ctx; |
7c673cae FG |
224 | struct spdk_fio_thread *fio_thread = td->io_ops_data; |
225 | struct spdk_fio_ctrlr *fio_ctrlr; | |
11fdf7f2 TL |
226 | struct spdk_fio_qpair *fio_qpair; |
227 | struct spdk_nvme_ns *ns; | |
228 | struct fio_file *f = fio_thread->current_f; | |
229 | uint32_t ns_id; | |
230 | char *p; | |
231 | ||
232 | p = strstr(f->file_name, "ns="); | |
233 | assert(p != NULL); | |
234 | ns_id = atoi(p + 3); | |
235 | if (!ns_id) { | |
236 | SPDK_ERRLOG("namespace id should be >=1, but current value=0\n"); | |
237 | g_error = true; | |
238 | return; | |
239 | } | |
7c673cae | 240 | |
11fdf7f2 TL |
241 | fio_ctrlr = get_fio_ctrlr(trid); |
242 | /* it is a new ctrlr and needs to be added */ | |
243 | if (!fio_ctrlr) { | |
244 | /* Create an fio_ctrlr and add it to the list */ | |
245 | fio_ctrlr = calloc(1, sizeof(*fio_ctrlr)); | |
246 | if (!fio_ctrlr) { | |
247 | SPDK_ERRLOG("Cannot allocate space for fio_ctrlr\n"); | |
248 | g_error = true; | |
249 | return; | |
250 | } | |
251 | fio_ctrlr->opts = *opts; | |
252 | fio_ctrlr->ctrlr = ctrlr; | |
253 | fio_ctrlr->tr_id = *trid; | |
254 | fio_ctrlr->next = ctrlr_g; | |
255 | ctrlr_g = fio_ctrlr; | |
256 | } | |
7c673cae | 257 | |
11fdf7f2 TL |
258 | ns = spdk_nvme_ctrlr_get_ns(fio_ctrlr->ctrlr, ns_id); |
259 | if (ns == NULL) { | |
260 | SPDK_ERRLOG("Cannot get namespace by ns_id=%d\n", ns_id); | |
261 | g_error = true; | |
262 | return; | |
263 | } | |
7c673cae | 264 | |
11fdf7f2 TL |
265 | if (!spdk_nvme_ns_is_active(ns)) { |
266 | SPDK_ERRLOG("Inactive namespace by ns_id=%d\n", ns_id); | |
267 | g_error = true; | |
268 | return; | |
269 | } | |
7c673cae | 270 | |
11fdf7f2 TL |
271 | fio_qpair = fio_thread->fio_qpair; |
272 | while (fio_qpair != NULL) { | |
273 | if ((fio_qpair->f == f) || | |
274 | ((spdk_nvme_transport_id_compare(trid, &fio_qpair->fio_ctrlr->tr_id) == 0) && | |
275 | (spdk_nvme_ns_get_id(fio_qpair->ns) == ns_id))) { | |
276 | /* Not the error case. Avoid duplicated connection */ | |
277 | return; | |
7c673cae | 278 | } |
11fdf7f2 TL |
279 | fio_qpair = fio_qpair->next; |
280 | } | |
281 | ||
282 | /* create a new qpair */ | |
283 | fio_qpair = calloc(1, sizeof(*fio_qpair)); | |
284 | if (!fio_qpair) { | |
285 | g_error = true; | |
286 | SPDK_ERRLOG("Cannot allocate space for fio_qpair\n"); | |
287 | return; | |
288 | } | |
289 | ||
290 | fio_qpair->qpair = spdk_nvme_ctrlr_alloc_io_qpair(fio_ctrlr->ctrlr, NULL, 0); | |
291 | if (!fio_qpair->qpair) { | |
292 | SPDK_ERRLOG("Cannot allocate nvme io_qpair any more\n"); | |
293 | g_error = true; | |
294 | free(fio_qpair); | |
295 | return; | |
296 | } | |
297 | ||
298 | fio_qpair->ns = ns; | |
299 | fio_qpair->f = f; | |
300 | fio_qpair->fio_ctrlr = fio_ctrlr; | |
301 | fio_qpair->next = fio_thread->fio_qpair; | |
302 | fio_thread->fio_qpair = fio_qpair; | |
303 | ||
304 | if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { | |
305 | fio_qpair->io_flags = spdk_pract_flag | spdk_prchk_flags; | |
306 | } | |
307 | ||
308 | fio_qpair->do_nvme_pi = fio_do_nvme_pi_check(fio_qpair); | |
309 | ||
310 | f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); | |
311 | if (f->real_file_size <= 0) { | |
312 | g_error = true; | |
313 | SPDK_ERRLOG("Cannot get namespace size by ns=%p\n", ns); | |
314 | return; | |
315 | } | |
316 | ||
317 | f->filetype = FIO_TYPE_BLOCK; | |
318 | fio_file_set_size_known(f); | |
319 | } | |
320 | ||
321 | static void parse_prchk_flags(const char *prchk_str) | |
322 | { | |
323 | if (!prchk_str) { | |
324 | return; | |
325 | } | |
326 | ||
327 | if (strstr(prchk_str, "GUARD") != NULL) { | |
328 | spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; | |
329 | } | |
330 | if (strstr(prchk_str, "REFTAG") != NULL) { | |
331 | spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; | |
332 | } | |
333 | if (strstr(prchk_str, "APPTAG") != NULL) { | |
334 | spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; | |
7c673cae FG |
335 | } |
336 | } | |
337 | ||
338 | /* Called once at initialization. This is responsible for gathering the size of | |
339 | * each "file", which in our case are in the form | |
11fdf7f2 TL |
340 | * 'key=value [key=value] ... ns=value' |
341 | * For example, For local PCIe NVMe device - 'trtype=PCIe traddr=0000.04.00.0 ns=1' | |
342 | * For remote exported by NVMe-oF target, 'trtype=RDMA adrfam=IPv4 traddr=192.168.100.8 trsvcid=4420 ns=1' */ | |
7c673cae FG |
343 | static int spdk_fio_setup(struct thread_data *td) |
344 | { | |
345 | struct spdk_fio_thread *fio_thread; | |
11fdf7f2 | 346 | struct spdk_fio_options *fio_options = td->eo; |
7c673cae | 347 | struct spdk_env_opts opts; |
11fdf7f2 TL |
348 | struct fio_file *f; |
349 | char *p; | |
350 | int rc = 0; | |
351 | struct spdk_nvme_transport_id trid; | |
352 | struct spdk_fio_ctrlr *fio_ctrlr; | |
353 | char *trid_info; | |
354 | unsigned int i; | |
7c673cae FG |
355 | |
356 | if (!td->o.use_thread) { | |
357 | log_err("spdk: must set thread=1 when using spdk plugin\n"); | |
358 | return 1; | |
359 | } | |
360 | ||
11fdf7f2 TL |
361 | pthread_mutex_lock(&mutex); |
362 | ||
7c673cae FG |
363 | fio_thread = calloc(1, sizeof(*fio_thread)); |
364 | assert(fio_thread != NULL); | |
365 | ||
366 | td->io_ops_data = fio_thread; | |
367 | fio_thread->td = td; | |
368 | ||
369 | fio_thread->iocq_size = td->o.iodepth; | |
370 | fio_thread->iocq = calloc(fio_thread->iocq_size, sizeof(struct io_u *)); | |
371 | assert(fio_thread->iocq != NULL); | |
372 | ||
11fdf7f2 TL |
373 | if (!spdk_env_initialized) { |
374 | spdk_env_opts_init(&opts); | |
375 | opts.name = "fio"; | |
376 | opts.mem_size = fio_options->mem_size; | |
377 | opts.shm_id = fio_options->shm_id; | |
378 | spdk_enable_sgl = fio_options->enable_sgl; | |
379 | spdk_pract_flag = fio_options->pi_act; | |
380 | parse_prchk_flags(fio_options->pi_chk); | |
381 | if (spdk_env_init(&opts) < 0) { | |
382 | SPDK_ERRLOG("Unable to initialize SPDK env\n"); | |
383 | free(fio_thread->iocq); | |
384 | free(fio_thread); | |
385 | fio_thread = NULL; | |
386 | pthread_mutex_unlock(&mutex); | |
387 | return 1; | |
388 | } | |
389 | spdk_env_initialized = true; | |
390 | spdk_unaffinitize_thread(); | |
7c673cae | 391 | |
11fdf7f2 TL |
392 | /* Spawn a thread to continue polling the controllers */ |
393 | rc = pthread_create(&g_ctrlr_thread_id, NULL, &spdk_fio_poll_ctrlrs, NULL); | |
394 | if (rc != 0) { | |
395 | SPDK_ERRLOG("Unable to spawn a thread to poll admin queues. They won't be polled.\n"); | |
396 | } | |
7c673cae FG |
397 | } |
398 | ||
11fdf7f2 TL |
399 | for_each_file(td, f, i) { |
400 | memset(&trid, 0, sizeof(trid)); | |
401 | ||
402 | trid.trtype = SPDK_NVME_TRANSPORT_PCIE; | |
403 | ||
404 | p = strstr(f->file_name, " ns="); | |
405 | if (p == NULL) { | |
406 | SPDK_ERRLOG("Failed to find namespace 'ns=X'\n"); | |
407 | continue; | |
408 | } | |
409 | ||
410 | trid_info = strndup(f->file_name, p - f->file_name); | |
411 | if (!trid_info) { | |
412 | SPDK_ERRLOG("Failed to allocate space for trid_info\n"); | |
413 | continue; | |
414 | } | |
415 | ||
416 | rc = spdk_nvme_transport_id_parse(&trid, trid_info); | |
417 | if (rc < 0) { | |
418 | SPDK_ERRLOG("Failed to parse given str: %s\n", trid_info); | |
419 | free(trid_info); | |
420 | continue; | |
421 | } | |
422 | free(trid_info); | |
423 | ||
424 | if (trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { | |
425 | struct spdk_pci_addr pci_addr; | |
426 | if (spdk_pci_addr_parse(&pci_addr, trid.traddr) < 0) { | |
427 | SPDK_ERRLOG("Invalid traddr=%s\n", trid.traddr); | |
428 | continue; | |
429 | } | |
430 | spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); | |
431 | } else { | |
432 | if (trid.subnqn[0] == '\0') { | |
433 | snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", | |
434 | SPDK_NVMF_DISCOVERY_NQN); | |
435 | } | |
436 | } | |
437 | ||
438 | fio_thread->current_f = f; | |
439 | ||
440 | fio_ctrlr = get_fio_ctrlr(&trid); | |
441 | if (fio_ctrlr) { | |
442 | attach_cb(td, &trid, fio_ctrlr->ctrlr, &fio_ctrlr->opts); | |
443 | } else { | |
444 | /* Enumerate all of the controllers */ | |
445 | if (spdk_nvme_probe(&trid, td, probe_cb, attach_cb, NULL) != 0) { | |
446 | SPDK_ERRLOG("spdk_nvme_probe() failed\n"); | |
447 | continue; | |
448 | } | |
449 | } | |
450 | ||
451 | if (g_error) { | |
452 | log_err("Failed to initialize spdk fio plugin\n"); | |
453 | rc = 1; | |
454 | break; | |
455 | } | |
456 | } | |
457 | ||
458 | td_count++; | |
459 | ||
460 | pthread_mutex_unlock(&mutex); | |
461 | ||
462 | return rc; | |
7c673cae FG |
463 | } |
464 | ||
465 | static int spdk_fio_open(struct thread_data *td, struct fio_file *f) | |
466 | { | |
467 | return 0; | |
468 | } | |
469 | ||
470 | static int spdk_fio_close(struct thread_data *td, struct fio_file *f) | |
471 | { | |
472 | return 0; | |
473 | } | |
474 | ||
475 | static int spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) | |
476 | { | |
11fdf7f2 | 477 | td->orig_buffer = spdk_dma_zmalloc(total_mem, NVME_IO_ALIGN, NULL); |
7c673cae FG |
478 | return td->orig_buffer == NULL; |
479 | } | |
480 | ||
481 | static void spdk_fio_iomem_free(struct thread_data *td) | |
482 | { | |
11fdf7f2 | 483 | spdk_dma_free(td->orig_buffer); |
7c673cae FG |
484 | } |
485 | ||
486 | static int spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) | |
487 | { | |
488 | struct spdk_fio_thread *fio_thread = td->io_ops_data; | |
489 | struct spdk_fio_request *fio_req; | |
490 | ||
491 | fio_req = calloc(1, sizeof(*fio_req)); | |
492 | if (fio_req == NULL) { | |
493 | return 1; | |
494 | } | |
495 | fio_req->io = io_u; | |
496 | fio_req->fio_thread = fio_thread; | |
497 | ||
498 | io_u->engine_data = fio_req; | |
499 | ||
500 | return 0; | |
501 | } | |
502 | ||
503 | static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) | |
504 | { | |
505 | struct spdk_fio_request *fio_req = io_u->engine_data; | |
506 | ||
507 | if (fio_req) { | |
508 | assert(fio_req->io == io_u); | |
509 | free(fio_req); | |
510 | io_u->engine_data = NULL; | |
511 | } | |
512 | } | |
513 | ||
11fdf7f2 TL |
514 | static void |
515 | fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) | |
516 | { | |
517 | struct spdk_nvme_ns *ns = NULL; | |
518 | struct spdk_fio_request *fio_req = io_u->engine_data; | |
519 | struct spdk_nvme_protection_info *pi; | |
520 | uint16_t crc16; | |
521 | uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, lba_count; | |
522 | uint64_t lba; | |
523 | ||
524 | ns = fio_qpair->ns; | |
525 | ||
526 | sector_size = spdk_nvme_ns_get_sector_size(ns); | |
527 | md_size = spdk_nvme_ns_get_md_size(ns); | |
528 | extended_lba_size = sector_size + md_size; | |
529 | lba = io_u->offset / extended_lba_size; | |
530 | lba_count = io_u->xfer_buflen / extended_lba_size; | |
531 | ||
532 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { | |
533 | /* Let's use number of lbas for application tag */ | |
534 | fio_req->appmask = 0xffff; | |
535 | fio_req->apptag = FIO_NVME_PI_APPTAG; | |
536 | } | |
537 | ||
538 | for (i = 0; i < lba_count; i++) { | |
539 | pi_offset = (extended_lba_size * (i + 1)) - 8; | |
540 | pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); | |
541 | memset(pi, 0, sizeof(*pi)); | |
542 | ||
543 | if (io_u->ddir == DDIR_WRITE) { | |
544 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { | |
545 | /* CRC buffer should not include PI */ | |
546 | crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, | |
547 | extended_lba_size - 8); | |
548 | to_be16(&pi->guard, crc16); | |
549 | } | |
550 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { | |
551 | /* Let's use number of lbas for application tag */ | |
552 | to_be16(&pi->app_tag, FIO_NVME_PI_APPTAG); | |
553 | } | |
554 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { | |
555 | to_be32(&pi->ref_tag, (uint32_t)lba + i); | |
556 | } | |
557 | } | |
558 | } | |
559 | } | |
560 | ||
561 | static void | |
562 | fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) | |
563 | { | |
564 | struct spdk_nvme_ns *ns = NULL; | |
565 | struct spdk_nvme_protection_info *pi; | |
566 | uint16_t crc16, guard, app_tag; | |
567 | uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, ref_tag, lba_count; | |
568 | uint64_t lba; | |
569 | ||
570 | ns = fio_qpair->ns; | |
571 | sector_size = spdk_nvme_ns_get_sector_size(ns); | |
572 | md_size = spdk_nvme_ns_get_md_size(ns); | |
573 | extended_lba_size = sector_size + md_size; | |
574 | lba = io_u->offset / extended_lba_size; | |
575 | lba_count = io_u->xfer_buflen / extended_lba_size; | |
576 | ||
577 | for (i = 0; i < lba_count; i++) { | |
578 | pi_offset = (extended_lba_size * (i + 1)) - 8; | |
579 | pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); | |
580 | ||
581 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { | |
582 | /* CRC buffer should not include last 8 bytes of PI */ | |
583 | crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, | |
584 | extended_lba_size - 8); | |
585 | to_be16(&guard, crc16); | |
586 | if (pi->guard != guard) { | |
587 | fprintf(stdout, "Get Guard Error LBA 0x%16.16"PRIx64"," | |
588 | " Expected 0x%04x but returned with 0x%04x," | |
589 | " may read the LBA without write it first\n", | |
590 | lba + i, guard, pi->guard); | |
591 | } | |
592 | ||
593 | } | |
594 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { | |
595 | /* Previously we used the number of lbas as | |
596 | * application tag for writes | |
597 | */ | |
598 | to_be16(&app_tag, FIO_NVME_PI_APPTAG); | |
599 | if (pi->app_tag != app_tag) { | |
600 | fprintf(stdout, "Get Application Tag Error LBA 0x%16.16"PRIx64"," | |
601 | " Expected 0x%04x but returned with 0x%04x," | |
602 | " may read the LBA without write it first\n", | |
603 | lba + i, app_tag, pi->app_tag); | |
604 | } | |
605 | } | |
606 | if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { | |
607 | to_be32(&ref_tag, (uint32_t)lba + i); | |
608 | if (pi->ref_tag != ref_tag) { | |
609 | fprintf(stdout, "Get Reference Tag Error LBA 0x%16.16"PRIx64"," | |
610 | " Expected 0x%08x but returned with 0x%08x," | |
611 | " may read the LBA without write it first\n", | |
612 | lba + i, ref_tag, pi->ref_tag); | |
613 | } | |
614 | } | |
615 | } | |
616 | } | |
617 | ||
7c673cae FG |
618 | static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) |
619 | { | |
620 | struct spdk_fio_request *fio_req = ctx; | |
621 | struct spdk_fio_thread *fio_thread = fio_req->fio_thread; | |
622 | ||
11fdf7f2 TL |
623 | if (fio_thread->fio_qpair->do_nvme_pi) { |
624 | fio_extended_lba_verify_pi(fio_thread->fio_qpair, fio_req->io); | |
625 | } | |
626 | ||
7c673cae FG |
627 | assert(fio_thread->iocq_count < fio_thread->iocq_size); |
628 | fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; | |
629 | } | |
630 | ||
11fdf7f2 TL |
631 | static void |
632 | spdk_nvme_io_reset_sgl(void *ref, uint32_t sgl_offset) | |
633 | { | |
634 | struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; | |
635 | ||
636 | fio_req->iov_offset = sgl_offset; | |
637 | } | |
638 | ||
639 | static int | |
640 | spdk_nvme_io_next_sge(void *ref, void **address, uint32_t *length) | |
641 | { | |
642 | struct spdk_fio_request *fio_req = (struct spdk_fio_request *)ref; | |
643 | struct io_u *io_u = fio_req->io; | |
644 | ||
645 | *address = io_u->buf; | |
646 | *length = io_u->xfer_buflen; | |
647 | ||
648 | if (fio_req->iov_offset) { | |
649 | assert(fio_req->iov_offset <= io_u->xfer_buflen); | |
650 | *address += fio_req->iov_offset; | |
651 | *length -= fio_req->iov_offset; | |
652 | } | |
653 | ||
654 | return 0; | |
655 | } | |
656 | ||
657 | #if FIO_IOOPS_VERSION >= 24 | |
658 | typedef enum fio_q_status fio_q_status_t; | |
659 | #else | |
660 | typedef int fio_q_status_t; | |
661 | #endif | |
662 | ||
663 | static fio_q_status_t | |
664 | spdk_fio_queue(struct thread_data *td, struct io_u *io_u) | |
7c673cae FG |
665 | { |
666 | int rc = 1; | |
667 | struct spdk_fio_thread *fio_thread = td->io_ops_data; | |
668 | struct spdk_fio_request *fio_req = io_u->engine_data; | |
11fdf7f2 TL |
669 | struct spdk_fio_qpair *fio_qpair; |
670 | struct spdk_nvme_ns *ns = NULL; | |
671 | uint32_t block_size; | |
672 | uint64_t lba; | |
673 | uint32_t lba_count; | |
7c673cae FG |
674 | |
675 | /* Find the namespace that corresponds to the file in the io_u */ | |
11fdf7f2 TL |
676 | fio_qpair = fio_thread->fio_qpair; |
677 | while (fio_qpair != NULL) { | |
678 | if (fio_qpair->f == io_u->file) { | |
679 | ns = fio_qpair->ns; | |
7c673cae FG |
680 | break; |
681 | } | |
11fdf7f2 | 682 | fio_qpair = fio_qpair->next; |
7c673cae | 683 | } |
11fdf7f2 TL |
684 | if (fio_qpair == NULL || ns == NULL) { |
685 | return -ENXIO; | |
7c673cae | 686 | } |
7c673cae | 687 | |
11fdf7f2 TL |
688 | block_size = spdk_nvme_ns_get_extended_sector_size(ns); |
689 | ||
690 | lba = io_u->offset / block_size; | |
691 | lba_count = io_u->xfer_buflen / block_size; | |
692 | ||
693 | // TODO: considering situations that fio will randomize and verify io_u | |
694 | if (fio_qpair->do_nvme_pi) { | |
695 | fio_extended_lba_setup_pi(fio_qpair, io_u); | |
696 | } | |
7c673cae FG |
697 | |
698 | switch (io_u->ddir) { | |
699 | case DDIR_READ: | |
11fdf7f2 TL |
700 | if (!spdk_enable_sgl) { |
701 | rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, | |
702 | spdk_fio_completion_cb, fio_req, | |
703 | fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); | |
704 | } else { | |
705 | rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, | |
706 | lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, | |
707 | spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, | |
708 | fio_req->appmask, fio_req->apptag); | |
709 | } | |
7c673cae FG |
710 | break; |
711 | case DDIR_WRITE: | |
11fdf7f2 TL |
712 | if (!spdk_enable_sgl) { |
713 | rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, | |
714 | spdk_fio_completion_cb, fio_req, | |
715 | fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); | |
716 | } else { | |
717 | rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, | |
718 | lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, | |
719 | spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, | |
720 | fio_req->appmask, fio_req->apptag); | |
721 | } | |
7c673cae FG |
722 | break; |
723 | default: | |
724 | assert(false); | |
725 | break; | |
726 | } | |
727 | ||
11fdf7f2 TL |
728 | /* NVMe read/write functions return -ENOMEM if there are no free requests. */ |
729 | if (rc == -ENOMEM) { | |
730 | return FIO_Q_BUSY; | |
731 | } | |
7c673cae | 732 | |
11fdf7f2 TL |
733 | if (rc != 0) { |
734 | return -abs(rc); | |
735 | } | |
736 | ||
737 | return FIO_Q_QUEUED; | |
7c673cae FG |
738 | } |
739 | ||
740 | static struct io_u *spdk_fio_event(struct thread_data *td, int event) | |
741 | { | |
742 | struct spdk_fio_thread *fio_thread = td->io_ops_data; | |
743 | ||
744 | assert(event >= 0); | |
745 | assert((unsigned)event < fio_thread->iocq_count); | |
746 | return fio_thread->iocq[event]; | |
747 | } | |
748 | ||
749 | static int spdk_fio_getevents(struct thread_data *td, unsigned int min, | |
750 | unsigned int max, const struct timespec *t) | |
751 | { | |
752 | struct spdk_fio_thread *fio_thread = td->io_ops_data; | |
11fdf7f2 | 753 | struct spdk_fio_qpair *fio_qpair = NULL; |
7c673cae FG |
754 | struct timespec t0, t1; |
755 | uint64_t timeout = 0; | |
756 | ||
757 | if (t) { | |
758 | timeout = t->tv_sec * 1000000000L + t->tv_nsec; | |
759 | clock_gettime(CLOCK_MONOTONIC_RAW, &t0); | |
760 | } | |
761 | ||
762 | fio_thread->iocq_count = 0; | |
763 | ||
11fdf7f2 TL |
764 | /* fetch the next qpair */ |
765 | if (fio_thread->fio_qpair_current) { | |
766 | fio_qpair = fio_thread->fio_qpair_current->next; | |
767 | } | |
768 | ||
7c673cae | 769 | for (;;) { |
11fdf7f2 TL |
770 | if (fio_qpair == NULL) { |
771 | fio_qpair = fio_thread->fio_qpair; | |
772 | } | |
773 | ||
774 | while (fio_qpair != NULL) { | |
775 | spdk_nvme_qpair_process_completions(fio_qpair->qpair, max - fio_thread->iocq_count); | |
7c673cae FG |
776 | |
777 | if (fio_thread->iocq_count >= min) { | |
11fdf7f2 TL |
778 | /* reset the currrent handling qpair */ |
779 | fio_thread->fio_qpair_current = fio_qpair; | |
7c673cae FG |
780 | return fio_thread->iocq_count; |
781 | } | |
782 | ||
11fdf7f2 | 783 | fio_qpair = fio_qpair->next; |
7c673cae FG |
784 | } |
785 | ||
786 | if (t) { | |
11fdf7f2 TL |
787 | uint64_t elapse; |
788 | ||
7c673cae | 789 | clock_gettime(CLOCK_MONOTONIC_RAW, &t1); |
11fdf7f2 TL |
790 | elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) |
791 | + t1.tv_nsec - t0.tv_nsec; | |
7c673cae FG |
792 | if (elapse > timeout) { |
793 | break; | |
794 | } | |
795 | } | |
796 | } | |
797 | ||
11fdf7f2 TL |
798 | /* reset the currrent handling qpair */ |
799 | fio_thread->fio_qpair_current = fio_qpair; | |
7c673cae FG |
800 | return fio_thread->iocq_count; |
801 | } | |
802 | ||
803 | static int spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) | |
804 | { | |
805 | /* TODO: This should probably send a flush to the device, but for now just return successful. */ | |
806 | return 0; | |
807 | } | |
808 | ||
809 | static void spdk_fio_cleanup(struct thread_data *td) | |
810 | { | |
811 | struct spdk_fio_thread *fio_thread = td->io_ops_data; | |
11fdf7f2 TL |
812 | struct spdk_fio_qpair *fio_qpair, *fio_qpair_tmp; |
813 | ||
814 | fio_qpair = fio_thread->fio_qpair; | |
815 | while (fio_qpair != NULL) { | |
816 | spdk_nvme_ctrlr_free_io_qpair(fio_qpair->qpair); | |
817 | fio_qpair_tmp = fio_qpair->next; | |
818 | free(fio_qpair); | |
819 | fio_qpair = fio_qpair_tmp; | |
7c673cae FG |
820 | } |
821 | ||
822 | free(fio_thread); | |
11fdf7f2 TL |
823 | |
824 | pthread_mutex_lock(&mutex); | |
825 | td_count--; | |
826 | if (td_count == 0) { | |
827 | struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp; | |
828 | ||
829 | fio_ctrlr = ctrlr_g; | |
830 | while (fio_ctrlr != NULL) { | |
831 | spdk_nvme_detach(fio_ctrlr->ctrlr); | |
832 | fio_ctrlr_tmp = fio_ctrlr->next; | |
833 | free(fio_ctrlr); | |
834 | fio_ctrlr = fio_ctrlr_tmp; | |
835 | } | |
836 | ctrlr_g = NULL; | |
837 | } | |
838 | pthread_mutex_unlock(&mutex); | |
839 | if (!ctrlr_g) { | |
840 | if (pthread_cancel(g_ctrlr_thread_id) == 0) { | |
841 | pthread_join(g_ctrlr_thread_id, NULL); | |
842 | } | |
843 | } | |
7c673cae FG |
844 | } |
845 | ||
11fdf7f2 TL |
846 | /* This function enables addition of SPDK parameters to the fio config |
847 | * Adding new parameters by defining them here and defining a callback | |
848 | * function to read the parameter value. */ | |
849 | static struct fio_option options[] = { | |
850 | { | |
851 | .name = "mem_size_mb", | |
852 | .lname = "Memory size in MB", | |
853 | .type = FIO_OPT_INT, | |
854 | .off1 = offsetof(struct spdk_fio_options, mem_size), | |
855 | .def = "512", | |
856 | .help = "Memory Size for SPDK (MB)", | |
857 | .category = FIO_OPT_C_ENGINE, | |
858 | .group = FIO_OPT_G_INVALID, | |
859 | }, | |
860 | { | |
861 | .name = "shm_id", | |
862 | .lname = "shared memory ID", | |
863 | .type = FIO_OPT_INT, | |
864 | .off1 = offsetof(struct spdk_fio_options, shm_id), | |
865 | .def = "-1", | |
866 | .help = "Shared Memory ID", | |
867 | .category = FIO_OPT_C_ENGINE, | |
868 | .group = FIO_OPT_G_INVALID, | |
869 | }, | |
870 | { | |
871 | .name = "enable_sgl", | |
872 | .lname = "SGL used for I/O commands", | |
873 | .type = FIO_OPT_INT, | |
874 | .off1 = offsetof(struct spdk_fio_options, enable_sgl), | |
875 | .def = "0", | |
876 | .help = "SGL Used for I/O Commands (enable_sgl=1 or enable_sgl=0)", | |
877 | .category = FIO_OPT_C_ENGINE, | |
878 | .group = FIO_OPT_G_INVALID, | |
879 | }, | |
880 | { | |
881 | .name = "hostnqn", | |
882 | .lname = "Host NQN to use when connecting to controllers.", | |
883 | .type = FIO_OPT_STR_STORE, | |
884 | .off1 = offsetof(struct spdk_fio_options, hostnqn), | |
885 | .help = "Host NQN", | |
886 | .category = FIO_OPT_C_ENGINE, | |
887 | .group = FIO_OPT_G_INVALID, | |
888 | }, | |
889 | { | |
890 | .name = "pi_act", | |
891 | .lname = "Protection Information Action", | |
892 | .type = FIO_OPT_INT, | |
893 | .off1 = offsetof(struct spdk_fio_options, pi_act), | |
894 | .def = "1", | |
895 | .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", | |
896 | .category = FIO_OPT_C_ENGINE, | |
897 | .group = FIO_OPT_G_INVALID, | |
898 | }, | |
899 | { | |
900 | .name = "pi_chk", | |
901 | .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)", | |
902 | .type = FIO_OPT_STR_STORE, | |
903 | .off1 = offsetof(struct spdk_fio_options, pi_chk), | |
904 | .def = NULL, | |
905 | .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)", | |
906 | .category = FIO_OPT_C_ENGINE, | |
907 | .group = FIO_OPT_G_INVALID, | |
908 | }, | |
909 | { | |
910 | .name = NULL, | |
911 | }, | |
912 | }; | |
913 | ||
7c673cae FG |
914 | /* FIO imports this structure using dlsym */ |
915 | struct ioengine_ops ioengine = { | |
11fdf7f2 | 916 | .name = "spdk", |
7c673cae FG |
917 | .version = FIO_IOOPS_VERSION, |
918 | .queue = spdk_fio_queue, | |
919 | .getevents = spdk_fio_getevents, | |
920 | .event = spdk_fio_event, | |
921 | .cleanup = spdk_fio_cleanup, | |
922 | .open_file = spdk_fio_open, | |
923 | .close_file = spdk_fio_close, | |
924 | .invalidate = spdk_fio_invalidate, | |
925 | .iomem_alloc = spdk_fio_iomem_alloc, | |
926 | .iomem_free = spdk_fio_iomem_free, | |
927 | .setup = spdk_fio_setup, | |
928 | .io_u_init = spdk_fio_io_u_init, | |
929 | .io_u_free = spdk_fio_io_u_free, | |
930 | .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN, | |
11fdf7f2 TL |
931 | .options = options, |
932 | .option_struct_size = sizeof(struct spdk_fio_options), | |
7c673cae | 933 | }; |
11fdf7f2 TL |
934 | |
935 | static void fio_init fio_spdk_register(void) | |
936 | { | |
937 | register_ioengine(&ioengine); | |
938 | } | |
939 | ||
940 | static void fio_exit fio_spdk_unregister(void) | |
941 | { | |
942 | unregister_ioengine(&ioengine); | |
943 | } |