4 * Copyright (c) Intel Corporation. All rights reserved.
5 * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "spdk/nvmf_spec.h"
35 #include "spdk/string.h"
36 #include "nvme_internal.h"
37 #include "nvme_io_msg.h"
38 #include "nvme_uevent.h"
40 #define SPDK_NVME_DRIVER_NAME "spdk_nvme_driver"
42 struct nvme_driver
*g_spdk_nvme_driver
;
43 pid_t g_spdk_nvme_pid
;
45 /* gross timeout of 180 seconds in milliseconds */
46 static int g_nvme_driver_timeout_ms
= 3 * 60 * 1000;
48 /* Per-process attached controller list */
49 static TAILQ_HEAD(, spdk_nvme_ctrlr
) g_nvme_attached_ctrlrs
=
50 TAILQ_HEAD_INITIALIZER(g_nvme_attached_ctrlrs
);
52 /* Returns true if ctrlr should be stored on the multi-process shared_attached_ctrlrs list */
54 nvme_ctrlr_shared(const struct spdk_nvme_ctrlr
*ctrlr
)
56 return ctrlr
->trid
.trtype
== SPDK_NVME_TRANSPORT_PCIE
;
60 nvme_ctrlr_connected(struct spdk_nvme_probe_ctx
*probe_ctx
,
61 struct spdk_nvme_ctrlr
*ctrlr
)
63 TAILQ_INSERT_TAIL(&probe_ctx
->init_ctrlrs
, ctrlr
, tailq
);
67 spdk_nvme_detach(struct spdk_nvme_ctrlr
*ctrlr
)
69 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
71 nvme_ctrlr_proc_put_ref(ctrlr
);
73 if (nvme_ctrlr_get_ref_count(ctrlr
) == 0) {
74 nvme_io_msg_ctrlr_detach(ctrlr
);
75 if (nvme_ctrlr_shared(ctrlr
)) {
76 TAILQ_REMOVE(&g_spdk_nvme_driver
->shared_attached_ctrlrs
, ctrlr
, tailq
);
78 TAILQ_REMOVE(&g_nvme_attached_ctrlrs
, ctrlr
, tailq
);
80 nvme_ctrlr_destruct(ctrlr
);
83 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
88 nvme_completion_poll_cb(void *arg
, const struct spdk_nvme_cpl
*cpl
)
90 struct nvme_completion_poll_status
*status
= arg
;
92 if (status
->timed_out
) {
93 /* There is no routine waiting for the completion of this request, free allocated memory */
99 * Copy status into the argument passed by the caller, so that
100 * the caller can check the status to determine if the
101 * the request passed or failed.
103 memcpy(&status
->cpl
, cpl
, sizeof(*cpl
));
108 * Poll qpair for completions until a command completes.
110 * \param qpair queue to poll
111 * \param status completion status. The user must fill this structure with zeroes before calling
113 * \param robust_mutex optional robust mutex to lock while polling qpair
115 * \return 0 if command completed without error,
116 * -EIO if command completed with error,
117 * -ECANCELED if command is not completed due to transport/device error
119 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
120 * and status as the callback argument.
123 nvme_wait_for_completion_robust_lock(
124 struct spdk_nvme_qpair
*qpair
,
125 struct nvme_completion_poll_status
*status
,
126 pthread_mutex_t
*robust_mutex
)
130 while (status
->done
== false) {
132 nvme_robust_mutex_lock(robust_mutex
);
135 rc
= spdk_nvme_qpair_process_completions(qpair
, 0);
138 nvme_robust_mutex_unlock(robust_mutex
);
142 status
->cpl
.status
.sct
= SPDK_NVME_SCT_GENERIC
;
143 status
->cpl
.status
.sc
= SPDK_NVME_SC_ABORTED_SQ_DELETION
;
144 if (status
->done
== false) {
145 status
->timed_out
= true;
151 return spdk_nvme_cpl_is_error(&status
->cpl
) ? -EIO
: 0;
155 nvme_wait_for_completion(struct spdk_nvme_qpair
*qpair
,
156 struct nvme_completion_poll_status
*status
)
158 return nvme_wait_for_completion_robust_lock(qpair
, status
, NULL
);
162 * Poll qpair for completions until a command completes.
164 * \param qpair queue to poll
165 * \param status completion status. The user must fill this structure with zeroes before calling
167 * \param timeout_in_secs optional timeout
169 * \return 0 if command completed without error,
170 * -EIO if command completed with error,
171 * -ECANCELED if command is not completed due to transport/device error or time expired
173 * The command to wait upon must be submitted with nvme_completion_poll_cb as the callback
174 * and status as the callback argument.
177 nvme_wait_for_completion_timeout(struct spdk_nvme_qpair
*qpair
,
178 struct nvme_completion_poll_status
*status
,
179 uint64_t timeout_in_secs
)
181 uint64_t timeout_tsc
= 0;
184 if (timeout_in_secs
) {
185 timeout_tsc
= spdk_get_ticks() + timeout_in_secs
* spdk_get_ticks_hz();
188 while (status
->done
== false) {
189 rc
= spdk_nvme_qpair_process_completions(qpair
, 0);
192 status
->cpl
.status
.sct
= SPDK_NVME_SCT_GENERIC
;
193 status
->cpl
.status
.sc
= SPDK_NVME_SC_ABORTED_SQ_DELETION
;
196 if (timeout_tsc
&& spdk_get_ticks() > timeout_tsc
) {
201 if (status
->done
== false || rc
< 0) {
202 if (status
->done
== false) {
203 status
->timed_out
= true;
208 return spdk_nvme_cpl_is_error(&status
->cpl
) ? -EIO
: 0;
212 nvme_user_copy_cmd_complete(void *arg
, const struct spdk_nvme_cpl
*cpl
)
214 struct nvme_request
*req
= arg
;
215 enum spdk_nvme_data_transfer xfer
;
217 if (req
->user_buffer
&& req
->payload_size
) {
218 /* Copy back to the user buffer and free the contig buffer */
219 assert(nvme_payload_type(&req
->payload
) == NVME_PAYLOAD_TYPE_CONTIG
);
220 xfer
= spdk_nvme_opc_get_data_transfer(req
->cmd
.opc
);
221 if (xfer
== SPDK_NVME_DATA_CONTROLLER_TO_HOST
||
222 xfer
== SPDK_NVME_DATA_BIDIRECTIONAL
) {
223 assert(req
->pid
== getpid());
224 memcpy(req
->user_buffer
, req
->payload
.contig_or_cb_arg
, req
->payload_size
);
227 spdk_free(req
->payload
.contig_or_cb_arg
);
230 /* Call the user's original callback now that the buffer has been copied */
231 req
->user_cb_fn(req
->user_cb_arg
, cpl
);
235 * Allocate a request as well as a DMA-capable buffer to copy to/from the user's buffer.
237 * This is intended for use in non-fast-path functions (admin commands, reservations, etc.)
238 * where the overhead of a copy is not a problem.
240 struct nvme_request
*
241 nvme_allocate_request_user_copy(struct spdk_nvme_qpair
*qpair
,
242 void *buffer
, uint32_t payload_size
, spdk_nvme_cmd_cb cb_fn
,
243 void *cb_arg
, bool host_to_controller
)
245 struct nvme_request
*req
;
246 void *dma_buffer
= NULL
;
248 if (buffer
&& payload_size
) {
249 dma_buffer
= spdk_zmalloc(payload_size
, 4096, NULL
,
250 SPDK_ENV_SOCKET_ID_ANY
, SPDK_MALLOC_DMA
);
255 if (host_to_controller
) {
256 memcpy(dma_buffer
, buffer
, payload_size
);
260 req
= nvme_allocate_request_contig(qpair
, dma_buffer
, payload_size
, nvme_user_copy_cmd_complete
,
263 spdk_free(dma_buffer
);
267 req
->user_cb_fn
= cb_fn
;
268 req
->user_cb_arg
= cb_arg
;
269 req
->user_buffer
= buffer
;
276 * Check if a request has exceeded the controller timeout.
278 * \param req request to check for timeout.
279 * \param cid command ID for command submitted by req (will be passed to timeout_cb_fn)
280 * \param active_proc per-process data for the controller associated with req
281 * \param now_tick current time from spdk_get_ticks()
282 * \return 0 if requests submitted more recently than req should still be checked for timeouts, or
283 * 1 if requests newer than req need not be checked.
285 * The request's timeout callback will be called if needed; the caller is only responsible for
286 * calling this function on each outstanding request.
289 nvme_request_check_timeout(struct nvme_request
*req
, uint16_t cid
,
290 struct spdk_nvme_ctrlr_process
*active_proc
,
293 struct spdk_nvme_qpair
*qpair
= req
->qpair
;
294 struct spdk_nvme_ctrlr
*ctrlr
= qpair
->ctrlr
;
296 assert(active_proc
->timeout_cb_fn
!= NULL
);
298 if (req
->timed_out
|| req
->submit_tick
== 0) {
302 if (req
->pid
!= g_spdk_nvme_pid
) {
306 if (nvme_qpair_is_admin_queue(qpair
) &&
307 req
->cmd
.opc
== SPDK_NVME_OPC_ASYNC_EVENT_REQUEST
) {
311 if (req
->submit_tick
+ active_proc
->timeout_ticks
> now_tick
) {
315 req
->timed_out
= true;
318 * We don't want to expose the admin queue to the user,
319 * so when we're timing out admin commands set the
322 active_proc
->timeout_cb_fn(active_proc
->timeout_cb_arg
, ctrlr
,
323 nvme_qpair_is_admin_queue(qpair
) ? NULL
: qpair
,
329 nvme_robust_mutex_init_shared(pthread_mutex_t
*mtx
)
334 pthread_mutex_init(mtx
, NULL
);
336 pthread_mutexattr_t attr
;
338 if (pthread_mutexattr_init(&attr
)) {
341 if (pthread_mutexattr_setpshared(&attr
, PTHREAD_PROCESS_SHARED
) ||
342 pthread_mutexattr_setrobust(&attr
, PTHREAD_MUTEX_ROBUST
) ||
343 pthread_mutex_init(mtx
, &attr
)) {
346 pthread_mutexattr_destroy(&attr
);
353 nvme_driver_init(void)
355 static pthread_mutex_t g_init_mutex
= PTHREAD_MUTEX_INITIALIZER
;
360 /* Use a special process-private mutex to ensure the global
361 * nvme driver object (g_spdk_nvme_driver) gets initialized by
362 * only one thread. Once that object is established and its
363 * mutex is initialized, we can unlock this mutex and use that
366 pthread_mutex_lock(&g_init_mutex
);
368 /* Each process needs its own pid. */
369 g_spdk_nvme_pid
= getpid();
372 * Only one thread from one process will do this driver init work.
373 * The primary process will reserve the shared memory and do the
375 * The secondary process will lookup the existing reserved memory.
377 if (spdk_process_is_primary()) {
378 /* The unique named memzone already reserved. */
379 if (g_spdk_nvme_driver
!= NULL
) {
380 pthread_mutex_unlock(&g_init_mutex
);
383 g_spdk_nvme_driver
= spdk_memzone_reserve(SPDK_NVME_DRIVER_NAME
,
384 sizeof(struct nvme_driver
), socket_id
,
385 SPDK_MEMZONE_NO_IOVA_CONTIG
);
388 if (g_spdk_nvme_driver
== NULL
) {
389 SPDK_ERRLOG("primary process failed to reserve memory\n");
390 pthread_mutex_unlock(&g_init_mutex
);
394 g_spdk_nvme_driver
= spdk_memzone_lookup(SPDK_NVME_DRIVER_NAME
);
396 /* The unique named memzone already reserved by the primary process. */
397 if (g_spdk_nvme_driver
!= NULL
) {
400 /* Wait the nvme driver to get initialized. */
401 while ((g_spdk_nvme_driver
->initialized
== false) &&
402 (ms_waited
< g_nvme_driver_timeout_ms
)) {
404 nvme_delay(1000); /* delay 1ms */
406 if (g_spdk_nvme_driver
->initialized
== false) {
407 SPDK_ERRLOG("timeout waiting for primary process to init\n");
408 pthread_mutex_unlock(&g_init_mutex
);
412 SPDK_ERRLOG("primary process is not started yet\n");
413 pthread_mutex_unlock(&g_init_mutex
);
417 pthread_mutex_unlock(&g_init_mutex
);
422 * At this moment, only one thread from the primary process will do
423 * the g_spdk_nvme_driver initialization
425 assert(spdk_process_is_primary());
427 ret
= nvme_robust_mutex_init_shared(&g_spdk_nvme_driver
->lock
);
429 SPDK_ERRLOG("failed to initialize mutex\n");
430 spdk_memzone_free(SPDK_NVME_DRIVER_NAME
);
431 pthread_mutex_unlock(&g_init_mutex
);
435 /* The lock in the shared g_spdk_nvme_driver object is now ready to
436 * be used - so we can unlock the g_init_mutex here.
438 pthread_mutex_unlock(&g_init_mutex
);
439 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
441 g_spdk_nvme_driver
->initialized
= false;
442 g_spdk_nvme_driver
->hotplug_fd
= nvme_uevent_connect();
443 if (g_spdk_nvme_driver
->hotplug_fd
< 0) {
444 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "Failed to open uevent netlink socket\n");
447 TAILQ_INIT(&g_spdk_nvme_driver
->shared_attached_ctrlrs
);
449 spdk_uuid_generate(&g_spdk_nvme_driver
->default_extended_host_id
);
451 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
456 /* This function must only be called while holding g_spdk_nvme_driver->lock */
458 nvme_ctrlr_probe(const struct spdk_nvme_transport_id
*trid
,
459 struct spdk_nvme_probe_ctx
*probe_ctx
, void *devhandle
)
461 struct spdk_nvme_ctrlr
*ctrlr
;
462 struct spdk_nvme_ctrlr_opts opts
;
464 assert(trid
!= NULL
);
466 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts
, sizeof(opts
));
468 if (!probe_ctx
->probe_cb
|| probe_ctx
->probe_cb(probe_ctx
->cb_ctx
, trid
, &opts
)) {
469 ctrlr
= nvme_get_ctrlr_by_trid_unsafe(trid
);
471 /* This ctrlr already exists.
472 * Increase the ref count before calling attach_cb() as the user may
473 * call nvme_detach() immediately. */
474 nvme_ctrlr_proc_get_ref(ctrlr
);
476 if (probe_ctx
->attach_cb
) {
477 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
478 probe_ctx
->attach_cb(probe_ctx
->cb_ctx
, &ctrlr
->trid
, ctrlr
, &ctrlr
->opts
);
479 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
484 ctrlr
= nvme_transport_ctrlr_construct(trid
, &opts
, devhandle
);
486 SPDK_ERRLOG("Failed to construct NVMe controller for SSD: %s\n", trid
->traddr
);
489 ctrlr
->remove_cb
= probe_ctx
->remove_cb
;
490 ctrlr
->cb_ctx
= probe_ctx
->cb_ctx
;
492 if (ctrlr
->quirks
& NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE
&&
493 ctrlr
->opts
.io_queue_size
== DEFAULT_IO_QUEUE_SIZE
) {
494 /* If the user specifically set an IO queue size different than the
495 * default, use that value. Otherwise overwrite with the quirked value.
496 * This allows this quirk to be overridden when necessary.
497 * However, cap.mqes still needs to be respected.
499 ctrlr
->opts
.io_queue_size
= spdk_min(DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK
, ctrlr
->cap
.bits
.mqes
+ 1u);
502 nvme_qpair_set_state(ctrlr
->adminq
, NVME_QPAIR_ENABLED
);
503 TAILQ_INSERT_TAIL(&probe_ctx
->init_ctrlrs
, ctrlr
, tailq
);
511 nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr
*ctrlr
,
512 struct spdk_nvme_probe_ctx
*probe_ctx
)
516 rc
= nvme_ctrlr_process_init(ctrlr
);
519 /* Controller failed to initialize. */
520 TAILQ_REMOVE(&probe_ctx
->init_ctrlrs
, ctrlr
, tailq
);
521 SPDK_ERRLOG("Failed to initialize SSD: %s\n", ctrlr
->trid
.traddr
);
522 nvme_ctrlr_fail(ctrlr
, false);
523 nvme_ctrlr_destruct(ctrlr
);
527 if (ctrlr
->state
!= NVME_CTRLR_STATE_READY
) {
531 STAILQ_INIT(&ctrlr
->io_producers
);
534 * Controller has been initialized.
535 * Move it to the attached_ctrlrs list.
537 TAILQ_REMOVE(&probe_ctx
->init_ctrlrs
, ctrlr
, tailq
);
539 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
540 if (nvme_ctrlr_shared(ctrlr
)) {
541 TAILQ_INSERT_TAIL(&g_spdk_nvme_driver
->shared_attached_ctrlrs
, ctrlr
, tailq
);
543 TAILQ_INSERT_TAIL(&g_nvme_attached_ctrlrs
, ctrlr
, tailq
);
547 * Increase the ref count before calling attach_cb() as the user may
548 * call nvme_detach() immediately.
550 nvme_ctrlr_proc_get_ref(ctrlr
);
551 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
553 if (probe_ctx
->attach_cb
) {
554 probe_ctx
->attach_cb(probe_ctx
->cb_ctx
, &ctrlr
->trid
, ctrlr
, &ctrlr
->opts
);
562 nvme_init_controllers(struct spdk_nvme_probe_ctx
*probe_ctx
)
567 rc
= spdk_nvme_probe_poll_async(probe_ctx
);
576 /* This function must not be called while holding g_spdk_nvme_driver->lock */
577 static struct spdk_nvme_ctrlr
*
578 nvme_get_ctrlr_by_trid(const struct spdk_nvme_transport_id
*trid
)
580 struct spdk_nvme_ctrlr
*ctrlr
;
582 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
583 ctrlr
= nvme_get_ctrlr_by_trid_unsafe(trid
);
584 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
589 /* This function must be called while holding g_spdk_nvme_driver->lock */
590 struct spdk_nvme_ctrlr
*
591 nvme_get_ctrlr_by_trid_unsafe(const struct spdk_nvme_transport_id
*trid
)
593 struct spdk_nvme_ctrlr
*ctrlr
;
595 /* Search per-process list */
596 TAILQ_FOREACH(ctrlr
, &g_nvme_attached_ctrlrs
, tailq
) {
597 if (spdk_nvme_transport_id_compare(&ctrlr
->trid
, trid
) == 0) {
602 /* Search multi-process shared list */
603 TAILQ_FOREACH(ctrlr
, &g_spdk_nvme_driver
->shared_attached_ctrlrs
, tailq
) {
604 if (spdk_nvme_transport_id_compare(&ctrlr
->trid
, trid
) == 0) {
612 /* This function must only be called while holding g_spdk_nvme_driver->lock */
614 nvme_probe_internal(struct spdk_nvme_probe_ctx
*probe_ctx
,
618 struct spdk_nvme_ctrlr
*ctrlr
, *ctrlr_tmp
;
620 spdk_nvme_trid_populate_transport(&probe_ctx
->trid
, probe_ctx
->trid
.trtype
);
621 if (!spdk_nvme_transport_available_by_name(probe_ctx
->trid
.trstring
)) {
622 SPDK_ERRLOG("NVMe trtype %u not available\n", probe_ctx
->trid
.trtype
);
626 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
628 rc
= nvme_transport_ctrlr_scan(probe_ctx
, direct_connect
);
630 SPDK_ERRLOG("NVMe ctrlr scan failed\n");
631 TAILQ_FOREACH_SAFE(ctrlr
, &probe_ctx
->init_ctrlrs
, tailq
, ctrlr_tmp
) {
632 TAILQ_REMOVE(&probe_ctx
->init_ctrlrs
, ctrlr
, tailq
);
633 nvme_transport_ctrlr_destruct(ctrlr
);
635 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
640 * Probe controllers on the shared_attached_ctrlrs list
642 if (!spdk_process_is_primary() && (probe_ctx
->trid
.trtype
== SPDK_NVME_TRANSPORT_PCIE
)) {
643 TAILQ_FOREACH(ctrlr
, &g_spdk_nvme_driver
->shared_attached_ctrlrs
, tailq
) {
644 /* Do not attach other ctrlrs if user specify a valid trid */
645 if ((strlen(probe_ctx
->trid
.traddr
) != 0) &&
646 (spdk_nvme_transport_id_compare(&probe_ctx
->trid
, &ctrlr
->trid
))) {
650 /* Do not attach if we failed to initialize it in this process */
651 if (nvme_ctrlr_get_current_process(ctrlr
) == NULL
) {
655 nvme_ctrlr_proc_get_ref(ctrlr
);
658 * Unlock while calling attach_cb() so the user can call other functions
659 * that may take the driver lock, like nvme_detach().
661 if (probe_ctx
->attach_cb
) {
662 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
663 probe_ctx
->attach_cb(probe_ctx
->cb_ctx
, &ctrlr
->trid
, ctrlr
, &ctrlr
->opts
);
664 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
669 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
675 nvme_probe_ctx_init(struct spdk_nvme_probe_ctx
*probe_ctx
,
676 const struct spdk_nvme_transport_id
*trid
,
678 spdk_nvme_probe_cb probe_cb
,
679 spdk_nvme_attach_cb attach_cb
,
680 spdk_nvme_remove_cb remove_cb
)
682 probe_ctx
->trid
= *trid
;
683 probe_ctx
->cb_ctx
= cb_ctx
;
684 probe_ctx
->probe_cb
= probe_cb
;
685 probe_ctx
->attach_cb
= attach_cb
;
686 probe_ctx
->remove_cb
= remove_cb
;
687 TAILQ_INIT(&probe_ctx
->init_ctrlrs
);
691 spdk_nvme_probe(const struct spdk_nvme_transport_id
*trid
, void *cb_ctx
,
692 spdk_nvme_probe_cb probe_cb
, spdk_nvme_attach_cb attach_cb
,
693 spdk_nvme_remove_cb remove_cb
)
695 struct spdk_nvme_transport_id trid_pcie
;
696 struct spdk_nvme_probe_ctx
*probe_ctx
;
699 memset(&trid_pcie
, 0, sizeof(trid_pcie
));
700 spdk_nvme_trid_populate_transport(&trid_pcie
, SPDK_NVME_TRANSPORT_PCIE
);
704 probe_ctx
= spdk_nvme_probe_async(trid
, cb_ctx
, probe_cb
,
705 attach_cb
, remove_cb
);
707 SPDK_ERRLOG("Create probe context failed\n");
712 * Keep going even if one or more nvme_attach() calls failed,
713 * but maintain the value of rc to signal errors when we return.
715 return nvme_init_controllers(probe_ctx
);
719 nvme_connect_probe_cb(void *cb_ctx
, const struct spdk_nvme_transport_id
*trid
,
720 struct spdk_nvme_ctrlr_opts
*opts
)
722 struct spdk_nvme_ctrlr_opts
*requested_opts
= cb_ctx
;
724 assert(requested_opts
);
725 memcpy(opts
, requested_opts
, sizeof(*opts
));
731 nvme_ctrlr_opts_init(struct spdk_nvme_ctrlr_opts
*opts
,
732 const struct spdk_nvme_ctrlr_opts
*opts_user
,
733 size_t opts_size_user
)
738 spdk_nvme_ctrlr_get_default_ctrlr_opts(opts
, opts_size_user
);
740 #define FIELD_OK(field) \
741 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= (opts->opts_size)
743 if (FIELD_OK(num_io_queues
)) {
744 opts
->num_io_queues
= opts_user
->num_io_queues
;
747 if (FIELD_OK(use_cmb_sqs
)) {
748 opts
->use_cmb_sqs
= opts_user
->use_cmb_sqs
;
751 if (FIELD_OK(no_shn_notification
)) {
752 opts
->no_shn_notification
= opts_user
->no_shn_notification
;
755 if (FIELD_OK(arb_mechanism
)) {
756 opts
->arb_mechanism
= opts_user
->arb_mechanism
;
759 if (FIELD_OK(arbitration_burst
)) {
760 opts
->arbitration_burst
= opts_user
->arbitration_burst
;
763 if (FIELD_OK(low_priority_weight
)) {
764 opts
->low_priority_weight
= opts_user
->low_priority_weight
;
767 if (FIELD_OK(medium_priority_weight
)) {
768 opts
->medium_priority_weight
= opts_user
->medium_priority_weight
;
771 if (FIELD_OK(high_priority_weight
)) {
772 opts
->high_priority_weight
= opts_user
->high_priority_weight
;
775 if (FIELD_OK(keep_alive_timeout_ms
)) {
776 opts
->keep_alive_timeout_ms
= opts_user
->keep_alive_timeout_ms
;
779 if (FIELD_OK(transport_retry_count
)) {
780 opts
->transport_retry_count
= opts_user
->transport_retry_count
;
783 if (FIELD_OK(io_queue_size
)) {
784 opts
->io_queue_size
= opts_user
->io_queue_size
;
787 if (FIELD_OK(hostnqn
)) {
788 memcpy(opts
->hostnqn
, opts_user
->hostnqn
, sizeof(opts_user
->hostnqn
));
791 if (FIELD_OK(io_queue_requests
)) {
792 opts
->io_queue_requests
= opts_user
->io_queue_requests
;
795 if (FIELD_OK(src_addr
)) {
796 memcpy(opts
->src_addr
, opts_user
->src_addr
, sizeof(opts_user
->src_addr
));
799 if (FIELD_OK(src_svcid
)) {
800 memcpy(opts
->src_svcid
, opts_user
->src_svcid
, sizeof(opts_user
->src_svcid
));
803 if (FIELD_OK(host_id
)) {
804 memcpy(opts
->host_id
, opts_user
->host_id
, sizeof(opts_user
->host_id
));
806 if (FIELD_OK(extended_host_id
)) {
807 memcpy(opts
->extended_host_id
, opts_user
->extended_host_id
,
808 sizeof(opts_user
->extended_host_id
));
811 if (FIELD_OK(command_set
)) {
812 opts
->command_set
= opts_user
->command_set
;
815 if (FIELD_OK(admin_timeout_ms
)) {
816 opts
->admin_timeout_ms
= opts_user
->admin_timeout_ms
;
819 if (FIELD_OK(header_digest
)) {
820 opts
->header_digest
= opts_user
->header_digest
;
823 if (FIELD_OK(data_digest
)) {
824 opts
->data_digest
= opts_user
->data_digest
;
827 if (FIELD_OK(disable_error_logging
)) {
828 opts
->disable_error_logging
= opts_user
->disable_error_logging
;
831 if (FIELD_OK(transport_ack_timeout
)) {
832 opts
->transport_ack_timeout
= opts_user
->transport_ack_timeout
;
835 if (FIELD_OK(admin_queue_size
)) {
836 opts
->admin_queue_size
= opts_user
->admin_queue_size
;
841 struct spdk_nvme_ctrlr
*
842 spdk_nvme_connect(const struct spdk_nvme_transport_id
*trid
,
843 const struct spdk_nvme_ctrlr_opts
*opts
, size_t opts_size
)
846 struct spdk_nvme_ctrlr
*ctrlr
= NULL
;
847 struct spdk_nvme_probe_ctx
*probe_ctx
;
848 struct spdk_nvme_ctrlr_opts
*opts_local_p
= NULL
;
849 struct spdk_nvme_ctrlr_opts opts_local
;
852 SPDK_ERRLOG("No transport ID specified\n");
857 opts_local_p
= &opts_local
;
858 nvme_ctrlr_opts_init(opts_local_p
, opts
, opts_size
);
861 probe_ctx
= spdk_nvme_connect_async(trid
, opts_local_p
, NULL
);
863 SPDK_ERRLOG("Create probe context failed\n");
867 rc
= nvme_init_controllers(probe_ctx
);
872 ctrlr
= nvme_get_ctrlr_by_trid(trid
);
878 spdk_nvme_trid_populate_transport(struct spdk_nvme_transport_id
*trid
,
879 enum spdk_nvme_transport_type trtype
)
881 const char *trstring
= "";
883 trid
->trtype
= trtype
;
885 case SPDK_NVME_TRANSPORT_FC
:
886 trstring
= SPDK_NVME_TRANSPORT_NAME_FC
;
888 case SPDK_NVME_TRANSPORT_PCIE
:
889 trstring
= SPDK_NVME_TRANSPORT_NAME_PCIE
;
891 case SPDK_NVME_TRANSPORT_RDMA
:
892 trstring
= SPDK_NVME_TRANSPORT_NAME_RDMA
;
894 case SPDK_NVME_TRANSPORT_TCP
:
895 trstring
= SPDK_NVME_TRANSPORT_NAME_TCP
;
897 case SPDK_NVME_TRANSPORT_CUSTOM
:
899 SPDK_ERRLOG("don't use this for custom transports\n");
903 snprintf(trid
->trstring
, SPDK_NVMF_TRSTRING_MAX_LEN
, "%s", trstring
);
907 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id
*trid
, const char *trstring
)
911 if (trstring
== NULL
) {
915 len
= strnlen(trstring
, SPDK_NVMF_TRSTRING_MAX_LEN
);
916 if (len
== SPDK_NVMF_TRSTRING_MAX_LEN
) {
920 rc
= snprintf(trid
->trstring
, SPDK_NVMF_TRSTRING_MAX_LEN
, "%s", trstring
);
925 /* cast official trstring to uppercase version of input. */
926 for (i
= 0; i
< len
; i
++) {
927 trid
->trstring
[i
] = toupper(trid
->trstring
[i
]);
933 spdk_nvme_transport_id_parse_trtype(enum spdk_nvme_transport_type
*trtype
, const char *str
)
935 if (trtype
== NULL
|| str
== NULL
) {
939 if (strcasecmp(str
, "PCIe") == 0) {
940 *trtype
= SPDK_NVME_TRANSPORT_PCIE
;
941 } else if (strcasecmp(str
, "RDMA") == 0) {
942 *trtype
= SPDK_NVME_TRANSPORT_RDMA
;
943 } else if (strcasecmp(str
, "FC") == 0) {
944 *trtype
= SPDK_NVME_TRANSPORT_FC
;
945 } else if (strcasecmp(str
, "TCP") == 0) {
946 *trtype
= SPDK_NVME_TRANSPORT_TCP
;
948 *trtype
= SPDK_NVME_TRANSPORT_CUSTOM
;
954 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype
)
957 case SPDK_NVME_TRANSPORT_PCIE
:
959 case SPDK_NVME_TRANSPORT_RDMA
:
961 case SPDK_NVME_TRANSPORT_FC
:
963 case SPDK_NVME_TRANSPORT_TCP
:
965 case SPDK_NVME_TRANSPORT_CUSTOM
:
973 spdk_nvme_transport_id_parse_adrfam(enum spdk_nvmf_adrfam
*adrfam
, const char *str
)
975 if (adrfam
== NULL
|| str
== NULL
) {
979 if (strcasecmp(str
, "IPv4") == 0) {
980 *adrfam
= SPDK_NVMF_ADRFAM_IPV4
;
981 } else if (strcasecmp(str
, "IPv6") == 0) {
982 *adrfam
= SPDK_NVMF_ADRFAM_IPV6
;
983 } else if (strcasecmp(str
, "IB") == 0) {
984 *adrfam
= SPDK_NVMF_ADRFAM_IB
;
985 } else if (strcasecmp(str
, "FC") == 0) {
986 *adrfam
= SPDK_NVMF_ADRFAM_FC
;
994 spdk_nvme_transport_id_adrfam_str(enum spdk_nvmf_adrfam adrfam
)
997 case SPDK_NVMF_ADRFAM_IPV4
:
999 case SPDK_NVMF_ADRFAM_IPV6
:
1001 case SPDK_NVMF_ADRFAM_IB
:
1003 case SPDK_NVMF_ADRFAM_FC
:
1011 parse_next_key(const char **str
, char *key
, char *val
, size_t key_buf_size
, size_t val_buf_size
)
1014 const char *sep
, *sep1
;
1015 const char *whitespace
= " \t\n";
1016 size_t key_len
, val_len
;
1018 *str
+= strspn(*str
, whitespace
);
1020 sep
= strchr(*str
, ':');
1022 sep
= strchr(*str
, '=');
1024 SPDK_ERRLOG("Key without ':' or '=' separator\n");
1028 sep1
= strchr(*str
, '=');
1029 if ((sep1
!= NULL
) && (sep1
< sep
)) {
1034 key_len
= sep
- *str
;
1035 if (key_len
>= key_buf_size
) {
1036 SPDK_ERRLOG("Key length %zu greater than maximum allowed %zu\n",
1037 key_len
, key_buf_size
- 1);
1041 memcpy(key
, *str
, key_len
);
1042 key
[key_len
] = '\0';
1044 *str
+= key_len
+ 1; /* Skip key: */
1045 val_len
= strcspn(*str
, whitespace
);
1047 SPDK_ERRLOG("Key without value\n");
1051 if (val_len
>= val_buf_size
) {
1052 SPDK_ERRLOG("Value length %zu greater than maximum allowed %zu\n",
1053 val_len
, val_buf_size
- 1);
1057 memcpy(val
, *str
, val_len
);
1058 val
[val_len
] = '\0';
1066 spdk_nvme_transport_id_parse(struct spdk_nvme_transport_id
*trid
, const char *str
)
1072 if (trid
== NULL
|| str
== NULL
) {
1076 while (*str
!= '\0') {
1078 val_len
= parse_next_key(&str
, key
, val
, sizeof(key
), sizeof(val
));
1081 SPDK_ERRLOG("Failed to parse transport ID\n");
1085 if (strcasecmp(key
, "trtype") == 0) {
1086 if (spdk_nvme_transport_id_populate_trstring(trid
, val
) != 0) {
1087 SPDK_ERRLOG("invalid transport '%s'\n", val
);
1090 if (spdk_nvme_transport_id_parse_trtype(&trid
->trtype
, val
) != 0) {
1091 SPDK_ERRLOG("Unknown trtype '%s'\n", val
);
1094 } else if (strcasecmp(key
, "adrfam") == 0) {
1095 if (spdk_nvme_transport_id_parse_adrfam(&trid
->adrfam
, val
) != 0) {
1096 SPDK_ERRLOG("Unknown adrfam '%s'\n", val
);
1099 } else if (strcasecmp(key
, "traddr") == 0) {
1100 if (val_len
> SPDK_NVMF_TRADDR_MAX_LEN
) {
1101 SPDK_ERRLOG("traddr length %zu greater than maximum allowed %u\n",
1102 val_len
, SPDK_NVMF_TRADDR_MAX_LEN
);
1105 memcpy(trid
->traddr
, val
, val_len
+ 1);
1106 } else if (strcasecmp(key
, "trsvcid") == 0) {
1107 if (val_len
> SPDK_NVMF_TRSVCID_MAX_LEN
) {
1108 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1109 val_len
, SPDK_NVMF_TRSVCID_MAX_LEN
);
1112 memcpy(trid
->trsvcid
, val
, val_len
+ 1);
1113 } else if (strcasecmp(key
, "priority") == 0) {
1114 if (val_len
> SPDK_NVMF_PRIORITY_MAX_LEN
) {
1115 SPDK_ERRLOG("priority length %zu greater than maximum allowed %u\n",
1116 val_len
, SPDK_NVMF_PRIORITY_MAX_LEN
);
1119 trid
->priority
= spdk_strtol(val
, 10);
1120 } else if (strcasecmp(key
, "subnqn") == 0) {
1121 if (val_len
> SPDK_NVMF_NQN_MAX_LEN
) {
1122 SPDK_ERRLOG("subnqn length %zu greater than maximum allowed %u\n",
1123 val_len
, SPDK_NVMF_NQN_MAX_LEN
);
1126 memcpy(trid
->subnqn
, val
, val_len
+ 1);
1127 } else if (strcasecmp(key
, "hostaddr") == 0) {
1129 } else if (strcasecmp(key
, "hostsvcid") == 0) {
1131 } else if (strcasecmp(key
, "ns") == 0) {
1133 * Special case. The namespace id parameter may
1134 * optionally be passed in the transport id string
1135 * for an SPDK application (e.g. nvme/perf)
1136 * and additionally parsed therein to limit
1137 * targeting a specific namespace. For this
1138 * scenario, just silently ignore this key
1139 * rather than letting it default to logging
1140 * it as an invalid key.
1143 } else if (strcasecmp(key
, "alt_traddr") == 0) {
1145 * Used by applications for enabling transport ID failover.
1146 * Please see the case above for more information on custom parameters.
1150 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key
);
1158 spdk_nvme_host_id_parse(struct spdk_nvme_host_id
*hostid
, const char *str
)
1161 size_t key_size
= 32;
1162 size_t val_size
= 1024;
1167 if (hostid
== NULL
|| str
== NULL
) {
1171 while (*str
!= '\0') {
1173 val_len
= parse_next_key(&str
, key
, val
, key_size
, val_size
);
1176 SPDK_ERRLOG("Failed to parse host ID\n");
1180 /* Ignore the rest of the options from the transport ID. */
1181 if (strcasecmp(key
, "trtype") == 0) {
1183 } else if (strcasecmp(key
, "adrfam") == 0) {
1185 } else if (strcasecmp(key
, "traddr") == 0) {
1187 } else if (strcasecmp(key
, "trsvcid") == 0) {
1189 } else if (strcasecmp(key
, "subnqn") == 0) {
1191 } else if (strcasecmp(key
, "priority") == 0) {
1193 } else if (strcasecmp(key
, "ns") == 0) {
1195 } else if (strcasecmp(key
, "hostaddr") == 0) {
1196 if (val_len
> SPDK_NVMF_TRADDR_MAX_LEN
) {
1197 SPDK_ERRLOG("hostaddr length %zu greater than maximum allowed %u\n",
1198 val_len
, SPDK_NVMF_TRADDR_MAX_LEN
);
1201 memcpy(hostid
->hostaddr
, val
, val_len
+ 1);
1203 } else if (strcasecmp(key
, "hostsvcid") == 0) {
1204 if (val_len
> SPDK_NVMF_TRSVCID_MAX_LEN
) {
1205 SPDK_ERRLOG("trsvcid length %zu greater than maximum allowed %u\n",
1206 val_len
, SPDK_NVMF_TRSVCID_MAX_LEN
);
1209 memcpy(hostid
->hostsvcid
, val
, val_len
+ 1);
1211 SPDK_ERRLOG("Unknown transport ID key '%s'\n", key
);
1219 cmp_int(int a
, int b
)
1225 spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id
*trid1
,
1226 const struct spdk_nvme_transport_id
*trid2
)
1230 if (trid1
->trtype
== SPDK_NVME_TRANSPORT_CUSTOM
) {
1231 cmp
= strcasecmp(trid1
->trstring
, trid2
->trstring
);
1233 cmp
= cmp_int(trid1
->trtype
, trid2
->trtype
);
1240 if (trid1
->trtype
== SPDK_NVME_TRANSPORT_PCIE
) {
1241 struct spdk_pci_addr pci_addr1
= {};
1242 struct spdk_pci_addr pci_addr2
= {};
1244 /* Normalize PCI addresses before comparing */
1245 if (spdk_pci_addr_parse(&pci_addr1
, trid1
->traddr
) < 0 ||
1246 spdk_pci_addr_parse(&pci_addr2
, trid2
->traddr
) < 0) {
1250 /* PCIe transport ID only uses trtype and traddr */
1251 return spdk_pci_addr_compare(&pci_addr1
, &pci_addr2
);
1254 cmp
= strcasecmp(trid1
->traddr
, trid2
->traddr
);
1259 cmp
= cmp_int(trid1
->adrfam
, trid2
->adrfam
);
1264 cmp
= strcasecmp(trid1
->trsvcid
, trid2
->trsvcid
);
1269 cmp
= strcmp(trid1
->subnqn
, trid2
->subnqn
);
1278 spdk_nvme_prchk_flags_parse(uint32_t *prchk_flags
, const char *str
)
1284 if (prchk_flags
== NULL
|| str
== NULL
) {
1288 while (*str
!= '\0') {
1289 val_len
= parse_next_key(&str
, key
, val
, sizeof(key
), sizeof(val
));
1292 SPDK_ERRLOG("Failed to parse prchk\n");
1296 if (strcasecmp(key
, "prchk") == 0) {
1297 if (strcasestr(val
, "reftag") != NULL
) {
1298 *prchk_flags
|= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG
;
1300 if (strcasestr(val
, "guard") != NULL
) {
1301 *prchk_flags
|= SPDK_NVME_IO_FLAGS_PRCHK_GUARD
;
1304 SPDK_ERRLOG("Unknown key '%s'\n", key
);
1313 spdk_nvme_prchk_flags_str(uint32_t prchk_flags
)
1315 if (prchk_flags
& SPDK_NVME_IO_FLAGS_PRCHK_REFTAG
) {
1316 if (prchk_flags
& SPDK_NVME_IO_FLAGS_PRCHK_GUARD
) {
1317 return "prchk:reftag|guard";
1319 return "prchk:reftag";
1322 if (prchk_flags
& SPDK_NVME_IO_FLAGS_PRCHK_GUARD
) {
1323 return "prchk:guard";
1330 struct spdk_nvme_probe_ctx
*
1331 spdk_nvme_probe_async(const struct spdk_nvme_transport_id
*trid
,
1333 spdk_nvme_probe_cb probe_cb
,
1334 spdk_nvme_attach_cb attach_cb
,
1335 spdk_nvme_remove_cb remove_cb
)
1338 struct spdk_nvme_probe_ctx
*probe_ctx
;
1340 rc
= nvme_driver_init();
1345 probe_ctx
= calloc(1, sizeof(*probe_ctx
));
1350 nvme_probe_ctx_init(probe_ctx
, trid
, cb_ctx
, probe_cb
, attach_cb
, remove_cb
);
1351 rc
= nvme_probe_internal(probe_ctx
, false);
1361 spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx
*probe_ctx
)
1364 struct spdk_nvme_ctrlr
*ctrlr
, *ctrlr_tmp
;
1366 if (!spdk_process_is_primary() && probe_ctx
->trid
.trtype
== SPDK_NVME_TRANSPORT_PCIE
) {
1371 TAILQ_FOREACH_SAFE(ctrlr
, &probe_ctx
->init_ctrlrs
, tailq
, ctrlr_tmp
) {
1372 rc
= nvme_ctrlr_poll_internal(ctrlr
, probe_ctx
);
1379 if (rc
!= 0 || TAILQ_EMPTY(&probe_ctx
->init_ctrlrs
)) {
1380 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
1381 g_spdk_nvme_driver
->initialized
= true;
1382 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
1390 struct spdk_nvme_probe_ctx
*
1391 spdk_nvme_connect_async(const struct spdk_nvme_transport_id
*trid
,
1392 const struct spdk_nvme_ctrlr_opts
*opts
,
1393 spdk_nvme_attach_cb attach_cb
)
1396 spdk_nvme_probe_cb probe_cb
= NULL
;
1397 struct spdk_nvme_probe_ctx
*probe_ctx
;
1399 rc
= nvme_driver_init();
1404 probe_ctx
= calloc(1, sizeof(*probe_ctx
));
1410 probe_cb
= nvme_connect_probe_cb
;
1413 nvme_probe_ctx_init(probe_ctx
, trid
, (void *)opts
, probe_cb
, attach_cb
, NULL
);
1414 rc
= nvme_probe_internal(probe_ctx
, true);
1423 SPDK_LOG_REGISTER_COMPONENT("nvme", SPDK_LOG_NVME
)