]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/nvme/nvme.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / lib / nvme / nvme.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "spdk/nvmf_spec.h"
35 #include "nvme_internal.h"
36
37 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
38
39 struct nvme_driver *g_spdk_nvme_driver;
40
41 int32_t spdk_nvme_retry_count;
42
43 int
44 spdk_nvme_detach(struct spdk_nvme_ctrlr *ctrlr)
45 {
46 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
47
48 nvme_ctrlr_proc_put_ref(ctrlr);
49
50 if (nvme_ctrlr_get_ref_count(ctrlr) == 0) {
51 TAILQ_REMOVE(&g_spdk_nvme_driver->attached_ctrlrs, ctrlr, tailq);
52 nvme_ctrlr_destruct(ctrlr);
53 }
54
55 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
56 return 0;
57 }
58
59 void
60 nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl)
61 {
62 struct nvme_completion_poll_status *status = arg;
63
64 /*
65 * Copy status into the argument passed by the caller, so that
66 * the caller can check the status to determine if the
67 * the request passed or failed.
68 */
69 memcpy(&status->cpl, cpl, sizeof(*cpl));
70 status->done = true;
71 }
72
73 struct nvme_request *
74 nvme_allocate_request(struct spdk_nvme_qpair *qpair,
75 const struct nvme_payload *payload, uint32_t payload_size,
76 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
77 {
78 struct nvme_request *req;
79
80 req = STAILQ_FIRST(&qpair->free_req);
81 if (req == NULL) {
82 return req;
83 }
84
85 STAILQ_REMOVE_HEAD(&qpair->free_req, stailq);
86
87 /*
88 * Only memset up to (but not including) the children
89 * TAILQ_ENTRY. children, and following members, are
90 * only used as part of I/O splitting so we avoid
91 * memsetting them until it is actually needed.
92 * They will be initialized in nvme_request_add_child()
93 * if the request is split.
94 */
95 memset(req, 0, offsetof(struct nvme_request, children));
96 req->cb_fn = cb_fn;
97 req->cb_arg = cb_arg;
98 req->payload = *payload;
99 req->payload_size = payload_size;
100 req->qpair = qpair;
101 req->pid = getpid();
102
103 return req;
104 }
105
106 struct nvme_request *
107 nvme_allocate_request_contig(struct spdk_nvme_qpair *qpair,
108 void *buffer, uint32_t payload_size,
109 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
110 {
111 struct nvme_payload payload;
112
113 payload.type = NVME_PAYLOAD_TYPE_CONTIG;
114 payload.u.contig = buffer;
115 payload.md = NULL;
116
117 return nvme_allocate_request(qpair, &payload, payload_size, cb_fn, cb_arg);
118 }
119
120 struct nvme_request *
121 nvme_allocate_request_null(struct spdk_nvme_qpair *qpair, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
122 {
123 return nvme_allocate_request_contig(qpair, NULL, 0, cb_fn, cb_arg);
124 }
125
126 static void
127 nvme_user_copy_cmd_complete(void *arg, const struct spdk_nvme_cpl *cpl)
128 {
129 struct nvme_request *req = arg;
130 enum spdk_nvme_data_transfer xfer;
131
132 if (req->user_buffer && req->payload_size) {
133 /* Copy back to the user buffer and free the contig buffer */
134 assert(req->payload.type == NVME_PAYLOAD_TYPE_CONTIG);
135 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
136 if (xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST ||
137 xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
138 assert(req->pid == getpid());
139 memcpy(req->user_buffer, req->payload.u.contig, req->payload_size);
140 }
141
142 spdk_free(req->payload.u.contig);
143 }
144
145 /* Call the user's original callback now that the buffer has been copied */
146 req->user_cb_fn(req->user_cb_arg, cpl);
147 }
148
149 /**
150 * Allocate a request as well as a physically contiguous buffer to copy to/from the user's buffer.
151 *
152 * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
153 * where the overhead of a copy is not a problem.
154 */
155 struct nvme_request *
156 nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
157 void *buffer, uint32_t payload_size, spdk_nvme_cmd_cb cb_fn,
158 void *cb_arg, bool host_to_controller)
159 {
160 struct nvme_request *req;
161 void *contig_buffer = NULL;
162 uint64_t phys_addr;
163
164 if (buffer && payload_size) {
165 contig_buffer = spdk_zmalloc(payload_size, 4096, &phys_addr);
166 if (!contig_buffer) {
167 return NULL;
168 }
169
170 if (host_to_controller) {
171 memcpy(contig_buffer, buffer, payload_size);
172 }
173 }
174
175 req = nvme_allocate_request_contig(qpair, contig_buffer, payload_size, nvme_user_copy_cmd_complete,
176 NULL);
177 if (!req) {
178 spdk_free(contig_buffer);
179 return NULL;
180 }
181
182 req->user_cb_fn = cb_fn;
183 req->user_cb_arg = cb_arg;
184 req->user_buffer = buffer;
185 req->cb_arg = req;
186
187 return req;
188 }
189
190 void
191 nvme_free_request(struct nvme_request *req)
192 {
193 assert(req != NULL);
194 assert(req->num_children == 0);
195 assert(req->qpair != NULL);
196
197 STAILQ_INSERT_HEAD(&req->qpair->free_req, req, stailq);
198 }
199
200 int
201 nvme_robust_mutex_init_shared(pthread_mutex_t *mtx)
202 {
203 int rc = 0;
204
205 #ifdef __FreeBSD__
206 pthread_mutex_init(mtx, NULL);
207 #else
208 pthread_mutexattr_t attr;
209
210 if (pthread_mutexattr_init(&attr)) {
211 return -1;
212 }
213 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
214 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
215 pthread_mutex_init(mtx, &attr)) {
216 rc = -1;
217 }
218 pthread_mutexattr_destroy(&attr);
219 #endif
220
221 return rc;
222 }
223
224 static int
225 nvme_driver_init(void)
226 {
227 int ret = 0;
228 /* Any socket ID */
229 int socket_id = -1;
230
231 /*
232 * Only one thread from one process will do this driver init work.
233 * The primary process will reserve the shared memory and do the
234 * initialization.
235 * The secondary process will lookup the existing reserved memory.
236 */
237 if (spdk_process_is_primary()) {
238 /* The unique named memzone already reserved. */
239 if (g_spdk_nvme_driver != NULL) {
240 assert(g_spdk_nvme_driver->initialized == true);
241
242 return 0;
243 } else {
244 g_spdk_nvme_driver = spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME,
245 sizeof(struct nvme_driver), socket_id, 0);
246 }
247
248 if (g_spdk_nvme_driver == NULL) {
249 SPDK_ERRLOG("primary process failed to reserve memory\n");
250
251 return -1;
252 }
253 } else {
254 g_spdk_nvme_driver = spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME);
255
256 /* The unique named memzone already reserved by the primary process. */
257 if (g_spdk_nvme_driver != NULL) {
258 /* Wait the nvme driver to get initialized. */
259 while (g_spdk_nvme_driver->initialized == false) {
260 nvme_delay(1000);
261 }
262 } else {
263 SPDK_ERRLOG("primary process is not started yet\n");
264
265 return -1;
266 }
267
268 return 0;
269 }
270
271 /*
272 * At this moment, only one thread from the primary process will do
273 * the g_spdk_nvme_driver initialization
274 */
275 assert(spdk_process_is_primary());
276
277 ret = nvme_robust_mutex_init_shared(&g_spdk_nvme_driver->lock);
278 if (ret != 0) {
279 SPDK_ERRLOG("failed to initialize mutex\n");
280 spdk_memzone_free(SPDK_NVME_DRIVER_NAME);
281 return ret;
282 }
283
284 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
285
286 g_spdk_nvme_driver->initialized = false;
287
288 TAILQ_INIT(&g_spdk_nvme_driver->init_ctrlrs);
289 TAILQ_INIT(&g_spdk_nvme_driver->attached_ctrlrs);
290
291 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
292
293 return ret;
294 }
295
296 int
297 nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, void *devhandle,
298 spdk_nvme_probe_cb probe_cb, void *cb_ctx)
299 {
300 struct spdk_nvme_ctrlr *ctrlr;
301 struct spdk_nvme_ctrlr_opts opts;
302
303 spdk_nvme_ctrlr_opts_set_defaults(&opts);
304
305 if (probe_cb(cb_ctx, trid, &opts)) {
306 ctrlr = nvme_transport_ctrlr_construct(trid, &opts, devhandle);
307 if (ctrlr == NULL) {
308 SPDK_ERRLOG("Failed to construct NVMe controller\n");
309 return -1;
310 }
311
312 TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->init_ctrlrs, ctrlr, tailq);
313 return 0;
314 }
315
316 return 1;
317 }
318
319 static int
320 nvme_init_controllers(void *cb_ctx, spdk_nvme_attach_cb attach_cb)
321 {
322 int rc = 0;
323 int start_rc;
324 struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
325
326 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
327
328 /* Initialize all new controllers in the init_ctrlrs list in parallel. */
329 while (!TAILQ_EMPTY(&g_spdk_nvme_driver->init_ctrlrs)) {
330 TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->init_ctrlrs, tailq, ctrlr_tmp) {
331 /* Drop the driver lock while calling nvme_ctrlr_process_init()
332 * since it needs to acquire the driver lock internally when calling
333 * nvme_ctrlr_start().
334 *
335 * TODO: Rethink the locking - maybe reset should take the lock so that start() and
336 * the functions it calls (in particular nvme_ctrlr_set_num_qpairs())
337 * can assume it is held.
338 */
339 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
340 start_rc = nvme_ctrlr_process_init(ctrlr);
341 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
342
343 if (start_rc) {
344 /* Controller failed to initialize. */
345 TAILQ_REMOVE(&g_spdk_nvme_driver->init_ctrlrs, ctrlr, tailq);
346 nvme_ctrlr_destruct(ctrlr);
347 rc = -1;
348 break;
349 }
350
351 if (ctrlr->state == NVME_CTRLR_STATE_READY) {
352 /*
353 * Controller has been initialized.
354 * Move it to the attached_ctrlrs list.
355 */
356 TAILQ_REMOVE(&g_spdk_nvme_driver->init_ctrlrs, ctrlr, tailq);
357 TAILQ_INSERT_TAIL(&g_spdk_nvme_driver->attached_ctrlrs, ctrlr, tailq);
358
359 /*
360 * Increase the ref count before calling attach_cb() as the user may
361 * call nvme_detach() immediately.
362 */
363 nvme_ctrlr_proc_get_ref(ctrlr);
364
365 /*
366 * Unlock while calling attach_cb() so the user can call other functions
367 * that may take the driver lock, like nvme_detach().
368 */
369 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
370 attach_cb(cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
371 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
372
373 break;
374 }
375 }
376 }
377
378 g_spdk_nvme_driver->initialized = true;
379
380 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
381 return rc;
382 }
383
384 int
385 spdk_nvme_probe(const struct spdk_nvme_transport_id *trid, void *cb_ctx,
386 spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb attach_cb,
387 spdk_nvme_remove_cb remove_cb)
388 {
389 int rc;
390 struct spdk_nvme_ctrlr *ctrlr;
391 struct spdk_nvme_transport_id trid_pcie;
392
393 rc = nvme_driver_init();
394 if (rc != 0) {
395 return rc;
396 }
397
398 if (trid == NULL) {
399 memset(&trid_pcie, 0, sizeof(trid_pcie));
400 trid_pcie.trtype = SPDK_NVME_TRANSPORT_PCIE;
401 trid = &trid_pcie;
402 }
403
404 if (!spdk_nvme_transport_available(trid->trtype)) {
405 SPDK_ERRLOG("NVMe trtype %u not available\n", trid->trtype);
406 return -1;
407 }
408
409 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
410
411 nvme_transport_ctrlr_scan(trid, cb_ctx, probe_cb, remove_cb);
412
413 if (!spdk_process_is_primary()) {
414 TAILQ_FOREACH(ctrlr, &g_spdk_nvme_driver->attached_ctrlrs, tailq) {
415 nvme_ctrlr_proc_get_ref(ctrlr);
416
417 /*
418 * Unlock while calling attach_cb() so the user can call other functions
419 * that may take the driver lock, like nvme_detach().
420 */
421 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
422 attach_cb(cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
423 nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
424 }
425
426 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
427 return 0;
428 }
429
430 nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
431 /*
432 * Keep going even if one or more nvme_attach() calls failed,
433 * but maintain the value of rc to signal errors when we return.
434 */
435
436 rc = nvme_init_controllers(cb_ctx, attach_cb);
437
438 return rc;
439 }
440
441 int
442 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type *trtype, const char *str)
443 {
444 if (trtype == NULL || str == NULL) {
445 return -EINVAL;
446 }
447
448 if (strcasecmp(str, "PCIe") == 0) {
449 *trtype = SPDK_NVME_TRANSPORT_PCIE;
450 } else if (strcasecmp(str, "RDMA") == 0) {
451 *trtype = SPDK_NVME_TRANSPORT_RDMA;
452 } else {
453 return -ENOENT;
454 }
455 return 0;
456 }
457
458 int
459 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam *adrfam, const char *str)
460 {
461 if (adrfam == NULL || str == NULL) {
462 return -EINVAL;
463 }
464
465 if (strcasecmp(str, "IPv4") == 0) {
466 *adrfam = SPDK_NVMF_ADRFAM_IPV4;
467 } else if (strcasecmp(str, "IPv6") == 0) {
468 *adrfam = SPDK_NVMF_ADRFAM_IPV6;
469 } else if (strcasecmp(str, "IB") == 0) {
470 *adrfam = SPDK_NVMF_ADRFAM_IB;
471 } else if (strcasecmp(str, "FC") == 0) {
472 *adrfam = SPDK_NVMF_ADRFAM_FC;
473 } else {
474 return -ENOENT;
475 }
476 return 0;
477 }
478
479 int
480 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id *trid, const char *str)
481 {
482 const char *sep;
483 const char *whitespace = " \t\n";
484 size_t key_len, val_len;
485 char key[32];
486 char val[1024];
487
488 if (trid == NULL || str == NULL) {
489 return -EINVAL;
490 }
491
492 while (*str != '\0') {
493 str += strspn(str, whitespace);
494
495 sep = strchr(str, ':');
496 if (!sep) {
497 SPDK_ERRLOG("Key without : separator\n");
498 return -EINVAL;
499 }
500
501 key_len = sep - str;
502 if (key_len >= sizeof(key)) {
503 SPDK_ERRLOG("Transport key length %zu greater than maximum allowed %zu\n",
504 key_len, sizeof(key) - 1);
505 return -EINVAL;
506 }
507
508 memcpy(key, str, key_len);
509 key[key_len] = '\0';
510
511 str += key_len + 1; /* Skip key: */
512 val_len = strcspn(str, whitespace);
513 if (val_len == 0) {
514 SPDK_ERRLOG("Key without value\n");
515 return -EINVAL;
516 }
517
518 if (val_len >= sizeof(val)) {
519 SPDK_ERRLOG("Transport value length %zu greater than maximum allowed %zu\n",
520 val_len, sizeof(val) - 1);
521 return -EINVAL;
522 }
523
524 memcpy(val, str, val_len);
525 val[val_len] = '\0';
526
527 str += val_len;
528
529 if (strcasecmp(key, "trtype") == 0) {
530 if (spdk_nvme_transport_id_parse_trtype(&trid->trtype, val) != 0) {
531 SPDK_ERRLOG("Unknown trtype '%s'\n", val);
532 return -EINVAL;
533 }
534 } else if (strcasecmp(key, "adrfam") == 0) {
535 if (spdk_nvme_transport_id_parse_adrfam(&trid->adrfam, val) != 0) {
536 SPDK_ERRLOG("Unknown adrfam '%s'\n", val);
537 return -EINVAL;
538 }
539 } else if (strcasecmp(key, "traddr") == 0) {
540 if (val_len > SPDK_NVMF_TRADDR_MAX_LEN) {
541 SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
542 val_len, SPDK_NVMF_TRADDR_MAX_LEN);
543 return -EINVAL;
544 }
545 memcpy(trid->traddr, val, val_len + 1);
546 } else if (strcasecmp(key, "trsvcid") == 0) {
547 if (val_len > SPDK_NVMF_TRSVCID_MAX_LEN) {
548 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
549 val_len, SPDK_NVMF_TRSVCID_MAX_LEN);
550 return -EINVAL;
551 }
552 memcpy(trid->trsvcid, val, val_len + 1);
553 } else if (strcasecmp(key, "subnqn") == 0) {
554 if (val_len > SPDK_NVMF_NQN_MAX_LEN) {
555 SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
556 val_len, SPDK_NVMF_NQN_MAX_LEN);
557 return -EINVAL;
558 }
559 memcpy(trid->subnqn, val, val_len + 1);
560 } else {
561 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key);
562 }
563 }
564
565 return 0;
566 }
567
568 static int
569 cmp_int(int a, int b)
570 {
571 return a - b;
572 }
573
574 int
575 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
576 const struct spdk_nvme_transport_id *trid2)
577 {
578 int cmp;
579
580 cmp = cmp_int(trid1->trtype, trid2->trtype);
581 if (cmp) {
582 return cmp;
583 }
584
585 cmp = cmp_int(trid1->adrfam, trid2->adrfam);
586 if (cmp) {
587 return cmp;
588 }
589
590 cmp = strcasecmp(trid1->traddr, trid2->traddr);
591 if (cmp) {
592 return cmp;
593 }
594
595 cmp = strcasecmp(trid1->trsvcid, trid2->trsvcid);
596 if (cmp) {
597 return cmp;
598 }
599
600 cmp = strcasecmp(trid1->subnqn, trid2->subnqn);
601 if (cmp) {
602 return cmp;
603 }
604
605 return 0;
606 }
607
608 SPDK_LOG_REGISTER_TRACE_FLAG("nvme", SPDK_TRACE_NVME)