4 * Copyright (c) Intel Corporation.
5 * Copyright (c) 2017, IBM Corporation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 * NVMe over PCIe transport
39 #include "spdk/stdinc.h"
41 #include "spdk/likely.h"
42 #include "nvme_internal.h"
43 #include "nvme_uevent.h"
46 * Number of completion queue entries to process before ringing the
47 * completion queue doorbell.
49 #define NVME_MIN_COMPLETIONS (1)
50 #define NVME_MAX_COMPLETIONS (128)
52 #define NVME_ADMIN_ENTRIES (128)
55 * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL
58 #define NVME_MAX_SGL_DESCRIPTORS (251)
60 #define NVME_MAX_PRP_LIST_ENTRIES (505)
62 struct nvme_pcie_enum_ctx
{
63 struct spdk_nvme_probe_ctx
*probe_ctx
;
64 struct spdk_pci_addr pci_addr
;
68 /* PCIe transport extensions for spdk_nvme_ctrlr */
69 struct nvme_pcie_ctrlr
{
70 struct spdk_nvme_ctrlr ctrlr
;
72 /** NVMe MMIO register space */
73 volatile struct spdk_nvme_registers
*regs
;
75 /** NVMe MMIO register size */
78 /* BAR mapping address which contains controller memory buffer */
79 void *cmb_bar_virt_addr
;
81 /* BAR physical address which contains controller memory buffer */
82 uint64_t cmb_bar_phys_addr
;
84 /* Controller memory buffer size in Bytes */
87 /* Current offset of controller memory buffer, relative to start of BAR virt addr */
88 uint64_t cmb_current_offset
;
90 /* Last valid offset into CMB, this differs if CMB memory registration occurs or not */
91 uint64_t cmb_max_offset
;
93 void *cmb_mem_register_addr
;
94 size_t cmb_mem_register_size
;
96 bool cmb_io_data_supported
;
98 /** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */
99 uint32_t doorbell_stride_u32
;
101 /* Opaque handle to associated PCI device. */
102 struct spdk_pci_device
*devhandle
;
104 /* File descriptor returned from spdk_pci_device_claim(). Closed when ctrlr is detached. */
107 /* Flag to indicate the MMIO register has been remapped */
111 struct nvme_tracker
{
112 TAILQ_ENTRY(nvme_tracker
) tq_list
;
114 struct nvme_request
*req
;
120 spdk_nvme_cmd_cb cb_fn
;
123 uint64_t prp_sgl_bus_addr
;
126 uint64_t prp
[NVME_MAX_PRP_LIST_ENTRIES
];
127 struct spdk_nvme_sgl_descriptor sgl
[NVME_MAX_SGL_DESCRIPTORS
];
131 * struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary
132 * and so that there is no padding required to meet alignment requirements.
134 SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker
) == 4096, "nvme_tracker is not 4K");
135 SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker
, u
.sgl
) & 7) == 0, "SGL must be Qword aligned");
137 /* PCIe transport extensions for spdk_nvme_qpair */
138 struct nvme_pcie_qpair
{
139 /* Submission queue tail doorbell */
140 volatile uint32_t *sq_tdbl
;
142 /* Completion queue head doorbell */
143 volatile uint32_t *cq_hdbl
;
145 /* Submission queue */
146 struct spdk_nvme_cmd
*cmd
;
148 /* Completion queue */
149 struct spdk_nvme_cpl
*cpl
;
151 TAILQ_HEAD(, nvme_tracker
) free_tr
;
152 TAILQ_HEAD(nvme_outstanding_tr_head
, nvme_tracker
) outstanding_tr
;
154 /* Array of trackers indexed by command ID. */
155 struct nvme_tracker
*tr
;
157 uint16_t num_entries
;
159 uint16_t max_completions_cap
;
161 uint16_t last_sq_tail
;
168 uint8_t delay_pcie_doorbell
: 1;
169 uint8_t has_shadow_doorbell
: 1;
173 * Base qpair structure.
174 * This is located after the hot data in this structure so that the important parts of
175 * nvme_pcie_qpair are in the same cache line.
177 struct spdk_nvme_qpair qpair
;
180 /* Submission queue shadow tail doorbell */
181 volatile uint32_t *sq_tdbl
;
183 /* Completion queue shadow head doorbell */
184 volatile uint32_t *cq_hdbl
;
186 /* Submission queue event index */
187 volatile uint32_t *sq_eventidx
;
189 /* Completion queue event index */
190 volatile uint32_t *cq_eventidx
;
194 * Fields below this point should not be touched on the normal I/O path.
199 uint64_t cmd_bus_addr
;
200 uint64_t cpl_bus_addr
;
203 static int nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx
*probe_ctx
,
204 struct spdk_pci_addr
*pci_addr
);
205 static int nvme_pcie_qpair_construct(struct spdk_nvme_qpair
*qpair
);
206 static int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair
*qpair
);
208 __thread
struct nvme_pcie_ctrlr
*g_thread_mmio_ctrlr
= NULL
;
209 static volatile uint16_t g_signal_lock
;
210 static bool g_sigset
= false;
211 static int hotplug_fd
= -1;
214 nvme_sigbus_fault_sighandler(int signum
, siginfo_t
*info
, void *ctx
)
218 if (!__sync_bool_compare_and_swap(&g_signal_lock
, 0, 1)) {
222 assert(g_thread_mmio_ctrlr
!= NULL
);
224 if (!g_thread_mmio_ctrlr
->is_remapped
) {
225 map_address
= mmap((void *)g_thread_mmio_ctrlr
->regs
, g_thread_mmio_ctrlr
->regs_size
,
226 PROT_READ
| PROT_WRITE
,
227 MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_FIXED
, -1, 0);
228 if (map_address
== MAP_FAILED
) {
229 SPDK_ERRLOG("mmap failed\n");
233 memset(map_address
, 0xFF, sizeof(struct spdk_nvme_registers
));
234 g_thread_mmio_ctrlr
->regs
= (volatile struct spdk_nvme_registers
*)map_address
;
235 g_thread_mmio_ctrlr
->is_remapped
= true;
242 nvme_pcie_ctrlr_setup_signal(void)
246 sa
.sa_sigaction
= nvme_sigbus_fault_sighandler
;
247 sigemptyset(&sa
.sa_mask
);
248 sa
.sa_flags
= SA_SIGINFO
;
249 sigaction(SIGBUS
, &sa
, NULL
);
252 static inline struct nvme_pcie_ctrlr
*
253 nvme_pcie_ctrlr(struct spdk_nvme_ctrlr
*ctrlr
)
255 assert(ctrlr
->trid
.trtype
== SPDK_NVME_TRANSPORT_PCIE
);
256 return SPDK_CONTAINEROF(ctrlr
, struct nvme_pcie_ctrlr
, ctrlr
);
260 _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx
*probe_ctx
)
262 struct spdk_nvme_ctrlr
*ctrlr
, *tmp
;
263 struct spdk_uevent event
;
264 struct spdk_pci_addr pci_addr
;
265 union spdk_nvme_csts_register csts
;
266 struct spdk_nvme_ctrlr_process
*proc
;
268 while (spdk_get_uevent(hotplug_fd
, &event
) > 0) {
269 if (event
.subsystem
== SPDK_NVME_UEVENT_SUBSYSTEM_UIO
||
270 event
.subsystem
== SPDK_NVME_UEVENT_SUBSYSTEM_VFIO
) {
271 if (event
.action
== SPDK_NVME_UEVENT_ADD
) {
272 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "add nvme address: %s\n",
274 if (spdk_process_is_primary()) {
275 if (!spdk_pci_addr_parse(&pci_addr
, event
.traddr
)) {
276 nvme_pcie_ctrlr_attach(probe_ctx
, &pci_addr
);
279 } else if (event
.action
== SPDK_NVME_UEVENT_REMOVE
) {
280 struct spdk_nvme_transport_id trid
;
282 memset(&trid
, 0, sizeof(trid
));
283 trid
.trtype
= SPDK_NVME_TRANSPORT_PCIE
;
284 snprintf(trid
.traddr
, sizeof(trid
.traddr
), "%s", event
.traddr
);
286 ctrlr
= spdk_nvme_get_ctrlr_by_trid_unsafe(&trid
);
290 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "remove nvme address: %s\n",
293 nvme_ctrlr_fail(ctrlr
, true);
295 /* get the user app to clean up and stop I/O */
296 if (probe_ctx
->remove_cb
) {
297 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
298 probe_ctx
->remove_cb(probe_ctx
->cb_ctx
, ctrlr
);
299 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
305 /* This is a work around for vfio-attached device hot remove detection. */
306 TAILQ_FOREACH_SAFE(ctrlr
, &g_spdk_nvme_driver
->shared_attached_ctrlrs
, tailq
, tmp
) {
307 bool do_remove
= false;
309 if (ctrlr
->trid
.trtype
== SPDK_NVME_TRANSPORT_PCIE
) {
310 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
312 if (spdk_pci_device_is_removed(pctrlr
->devhandle
)) {
317 /* NVMe controller BAR must be mapped in the current process before any access. */
318 proc
= spdk_nvme_ctrlr_get_current_process(ctrlr
);
320 csts
= spdk_nvme_ctrlr_get_regs_csts(ctrlr
);
321 if (csts
.raw
== 0xffffffffU
) {
327 nvme_ctrlr_fail(ctrlr
, true);
328 if (probe_ctx
->remove_cb
) {
329 nvme_robust_mutex_unlock(&g_spdk_nvme_driver
->lock
);
330 probe_ctx
->remove_cb(probe_ctx
->cb_ctx
, ctrlr
);
331 nvme_robust_mutex_lock(&g_spdk_nvme_driver
->lock
);
338 static inline struct nvme_pcie_qpair
*
339 nvme_pcie_qpair(struct spdk_nvme_qpair
*qpair
)
341 assert(qpair
->trtype
== SPDK_NVME_TRANSPORT_PCIE
);
342 return SPDK_CONTAINEROF(qpair
, struct nvme_pcie_qpair
, qpair
);
345 static volatile void *
346 nvme_pcie_reg_addr(struct spdk_nvme_ctrlr
*ctrlr
, uint32_t offset
)
348 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
350 return (volatile void *)((uintptr_t)pctrlr
->regs
+ offset
);
354 nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr
*ctrlr
, uint32_t offset
, uint32_t value
)
356 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
358 assert(offset
<= sizeof(struct spdk_nvme_registers
) - 4);
359 g_thread_mmio_ctrlr
= pctrlr
;
360 spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr
, offset
), value
);
361 g_thread_mmio_ctrlr
= NULL
;
366 nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr
*ctrlr
, uint32_t offset
, uint64_t value
)
368 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
370 assert(offset
<= sizeof(struct spdk_nvme_registers
) - 8);
371 g_thread_mmio_ctrlr
= pctrlr
;
372 spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr
, offset
), value
);
373 g_thread_mmio_ctrlr
= NULL
;
378 nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr
*ctrlr
, uint32_t offset
, uint32_t *value
)
380 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
382 assert(offset
<= sizeof(struct spdk_nvme_registers
) - 4);
383 assert(value
!= NULL
);
384 g_thread_mmio_ctrlr
= pctrlr
;
385 *value
= spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr
, offset
));
386 g_thread_mmio_ctrlr
= NULL
;
387 if (~(*value
) == 0) {
395 nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr
*ctrlr
, uint32_t offset
, uint64_t *value
)
397 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
399 assert(offset
<= sizeof(struct spdk_nvme_registers
) - 8);
400 assert(value
!= NULL
);
401 g_thread_mmio_ctrlr
= pctrlr
;
402 *value
= spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr
, offset
));
403 g_thread_mmio_ctrlr
= NULL
;
404 if (~(*value
) == 0) {
412 nvme_pcie_ctrlr_set_asq(struct nvme_pcie_ctrlr
*pctrlr
, uint64_t value
)
414 return nvme_pcie_ctrlr_set_reg_8(&pctrlr
->ctrlr
, offsetof(struct spdk_nvme_registers
, asq
),
419 nvme_pcie_ctrlr_set_acq(struct nvme_pcie_ctrlr
*pctrlr
, uint64_t value
)
421 return nvme_pcie_ctrlr_set_reg_8(&pctrlr
->ctrlr
, offsetof(struct spdk_nvme_registers
, acq
),
426 nvme_pcie_ctrlr_set_aqa(struct nvme_pcie_ctrlr
*pctrlr
, const union spdk_nvme_aqa_register
*aqa
)
428 return nvme_pcie_ctrlr_set_reg_4(&pctrlr
->ctrlr
, offsetof(struct spdk_nvme_registers
, aqa
.raw
),
433 nvme_pcie_ctrlr_get_cmbloc(struct nvme_pcie_ctrlr
*pctrlr
, union spdk_nvme_cmbloc_register
*cmbloc
)
435 return nvme_pcie_ctrlr_get_reg_4(&pctrlr
->ctrlr
, offsetof(struct spdk_nvme_registers
, cmbloc
.raw
),
440 nvme_pcie_ctrlr_get_cmbsz(struct nvme_pcie_ctrlr
*pctrlr
, union spdk_nvme_cmbsz_register
*cmbsz
)
442 return nvme_pcie_ctrlr_get_reg_4(&pctrlr
->ctrlr
, offsetof(struct spdk_nvme_registers
, cmbsz
.raw
),
447 nvme_pcie_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr
*ctrlr
)
450 * For commands requiring more than 2 PRP entries, one PRP will be
451 * embedded in the command (prp1), and the rest of the PRP entries
452 * will be in a list pointed to by the command (prp2). This means
453 * that real max number of PRP entries we support is 506+1, which
454 * results in a max xfer size of 506*ctrlr->page_size.
456 return NVME_MAX_PRP_LIST_ENTRIES
* ctrlr
->page_size
;
460 nvme_pcie_ctrlr_get_max_sges(struct spdk_nvme_ctrlr
*ctrlr
)
462 return NVME_MAX_SGL_DESCRIPTORS
;
466 nvme_pcie_ctrlr_map_cmb(struct nvme_pcie_ctrlr
*pctrlr
)
471 union spdk_nvme_cmbsz_register cmbsz
;
472 union spdk_nvme_cmbloc_register cmbloc
;
473 uint64_t size
, unit_size
, offset
, bar_size
, bar_phys_addr
;
474 uint64_t mem_register_start
, mem_register_end
;
476 if (nvme_pcie_ctrlr_get_cmbsz(pctrlr
, &cmbsz
) ||
477 nvme_pcie_ctrlr_get_cmbloc(pctrlr
, &cmbloc
)) {
478 SPDK_ERRLOG("get registers failed\n");
482 if (!cmbsz
.bits
.sz
) {
486 bir
= cmbloc
.bits
.bir
;
487 /* Values 0 2 3 4 5 are valid for BAR */
488 if (bir
> 5 || bir
== 1) {
492 /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */
493 unit_size
= (uint64_t)1 << (12 + 4 * cmbsz
.bits
.szu
);
494 /* controller memory buffer size in Bytes */
495 size
= unit_size
* cmbsz
.bits
.sz
;
496 /* controller memory buffer offset from BAR in Bytes */
497 offset
= unit_size
* cmbloc
.bits
.ofst
;
499 rc
= spdk_pci_device_map_bar(pctrlr
->devhandle
, bir
, &addr
,
500 &bar_phys_addr
, &bar_size
);
501 if ((rc
!= 0) || addr
== NULL
) {
505 if (offset
> bar_size
) {
509 if (size
> bar_size
- offset
) {
513 pctrlr
->cmb_bar_virt_addr
= addr
;
514 pctrlr
->cmb_bar_phys_addr
= bar_phys_addr
;
515 pctrlr
->cmb_size
= size
;
516 pctrlr
->cmb_current_offset
= offset
;
517 pctrlr
->cmb_max_offset
= offset
+ size
;
519 if (!cmbsz
.bits
.sqs
) {
520 pctrlr
->ctrlr
.opts
.use_cmb_sqs
= false;
523 /* If only SQS is supported use legacy mapping */
524 if (cmbsz
.bits
.sqs
&& !(cmbsz
.bits
.wds
|| cmbsz
.bits
.rds
)) {
528 /* If CMB is less than 4MiB in size then abort CMB mapping */
529 if (pctrlr
->cmb_size
< (1ULL << 22)) {
533 mem_register_start
= _2MB_PAGE((uintptr_t)pctrlr
->cmb_bar_virt_addr
+ offset
+ VALUE_2MB
- 1);
534 mem_register_end
= _2MB_PAGE((uintptr_t)pctrlr
->cmb_bar_virt_addr
+ offset
+ pctrlr
->cmb_size
);
535 pctrlr
->cmb_mem_register_addr
= (void *)mem_register_start
;
536 pctrlr
->cmb_mem_register_size
= mem_register_end
- mem_register_start
;
538 rc
= spdk_mem_register(pctrlr
->cmb_mem_register_addr
, pctrlr
->cmb_mem_register_size
);
540 SPDK_ERRLOG("spdk_mem_register() failed\n");
543 pctrlr
->cmb_current_offset
= mem_register_start
- ((uint64_t)pctrlr
->cmb_bar_virt_addr
);
544 pctrlr
->cmb_max_offset
= mem_register_end
- ((uint64_t)pctrlr
->cmb_bar_virt_addr
);
545 pctrlr
->cmb_io_data_supported
= true;
549 pctrlr
->cmb_bar_virt_addr
= NULL
;
550 pctrlr
->ctrlr
.opts
.use_cmb_sqs
= false;
555 nvme_pcie_ctrlr_unmap_cmb(struct nvme_pcie_ctrlr
*pctrlr
)
558 union spdk_nvme_cmbloc_register cmbloc
;
559 void *addr
= pctrlr
->cmb_bar_virt_addr
;
562 if (pctrlr
->cmb_mem_register_addr
) {
563 spdk_mem_unregister(pctrlr
->cmb_mem_register_addr
, pctrlr
->cmb_mem_register_size
);
566 if (nvme_pcie_ctrlr_get_cmbloc(pctrlr
, &cmbloc
)) {
567 SPDK_ERRLOG("get_cmbloc() failed\n");
570 rc
= spdk_pci_device_unmap_bar(pctrlr
->devhandle
, cmbloc
.bits
.bir
, addr
);
576 nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr
*ctrlr
, uint64_t length
, uint64_t aligned
,
579 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
580 uint64_t round_offset
;
582 round_offset
= pctrlr
->cmb_current_offset
;
583 round_offset
= (round_offset
+ (aligned
- 1)) & ~(aligned
- 1);
585 /* CMB may only consume part of the BAR, calculate accordingly */
586 if (round_offset
+ length
> pctrlr
->cmb_max_offset
) {
587 SPDK_ERRLOG("Tried to allocate past valid CMB range!\n");
591 *offset
= round_offset
;
592 pctrlr
->cmb_current_offset
= round_offset
+ length
;
597 volatile struct spdk_nvme_registers
*
598 nvme_pcie_ctrlr_get_registers(struct spdk_nvme_ctrlr
*ctrlr
)
600 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
606 nvme_pcie_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr
*ctrlr
, size_t size
)
608 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
611 if (pctrlr
->cmb_bar_virt_addr
== NULL
) {
612 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "CMB not available\n");
616 if (!pctrlr
->cmb_io_data_supported
) {
617 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "CMB doesn't support I/O data\n");
621 if (nvme_pcie_ctrlr_alloc_cmb(ctrlr
, size
, 4, &offset
) != 0) {
622 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "%zu-byte CMB allocation failed\n", size
);
626 return pctrlr
->cmb_bar_virt_addr
+ offset
;
630 nvme_pcie_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr
*ctrlr
, void *buf
, size_t size
)
633 * Do nothing for now.
634 * TODO: Track free space so buffers may be reused.
636 SPDK_ERRLOG("%s: no deallocation for CMB buffers yet!\n",
642 nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr
*pctrlr
)
646 uint64_t phys_addr
, size
;
648 rc
= spdk_pci_device_map_bar(pctrlr
->devhandle
, 0, &addr
,
650 pctrlr
->regs
= (volatile struct spdk_nvme_registers
*)addr
;
651 if ((pctrlr
->regs
== NULL
) || (rc
!= 0)) {
652 SPDK_ERRLOG("nvme_pcicfg_map_bar failed with rc %d or bar %p\n",
657 pctrlr
->regs_size
= size
;
658 nvme_pcie_ctrlr_map_cmb(pctrlr
);
664 nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr
*pctrlr
)
667 void *addr
= (void *)pctrlr
->regs
;
669 if (pctrlr
->ctrlr
.is_removed
) {
673 rc
= nvme_pcie_ctrlr_unmap_cmb(pctrlr
);
675 SPDK_ERRLOG("nvme_ctrlr_unmap_cmb failed with error code %d\n", rc
);
680 /* NOTE: addr may have been remapped here. We're relying on DPDK to call
683 rc
= spdk_pci_device_unmap_bar(pctrlr
->devhandle
, 0, addr
);
689 nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr
*ctrlr
)
691 struct nvme_pcie_qpair
*pqpair
;
694 pqpair
= spdk_zmalloc(sizeof(*pqpair
), 64, NULL
, SPDK_ENV_SOCKET_ID_ANY
, SPDK_MALLOC_SHARE
);
695 if (pqpair
== NULL
) {
699 pqpair
->num_entries
= NVME_ADMIN_ENTRIES
;
700 pqpair
->flags
.delay_pcie_doorbell
= 0;
702 ctrlr
->adminq
= &pqpair
->qpair
;
704 rc
= nvme_qpair_init(ctrlr
->adminq
,
707 SPDK_NVME_QPRIO_URGENT
,
713 return nvme_pcie_qpair_construct(ctrlr
->adminq
);
716 /* This function must only be called while holding g_spdk_nvme_driver->lock */
718 pcie_nvme_enum_cb(void *ctx
, struct spdk_pci_device
*pci_dev
)
720 struct spdk_nvme_transport_id trid
= {};
721 struct nvme_pcie_enum_ctx
*enum_ctx
= ctx
;
722 struct spdk_nvme_ctrlr
*ctrlr
;
723 struct spdk_pci_addr pci_addr
;
725 pci_addr
= spdk_pci_device_get_addr(pci_dev
);
727 trid
.trtype
= SPDK_NVME_TRANSPORT_PCIE
;
728 spdk_pci_addr_fmt(trid
.traddr
, sizeof(trid
.traddr
), &pci_addr
);
730 ctrlr
= spdk_nvme_get_ctrlr_by_trid_unsafe(&trid
);
731 if (!spdk_process_is_primary()) {
733 SPDK_ERRLOG("Controller must be constructed in the primary process first.\n");
737 return nvme_ctrlr_add_process(ctrlr
, pci_dev
);
740 /* check whether user passes the pci_addr */
741 if (enum_ctx
->has_pci_addr
&&
742 (spdk_pci_addr_compare(&pci_addr
, &enum_ctx
->pci_addr
) != 0)) {
746 return nvme_ctrlr_probe(&trid
, enum_ctx
->probe_ctx
, pci_dev
);
750 nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx
*probe_ctx
,
753 struct nvme_pcie_enum_ctx enum_ctx
= {};
755 enum_ctx
.probe_ctx
= probe_ctx
;
757 if (strlen(probe_ctx
->trid
.traddr
) != 0) {
758 if (spdk_pci_addr_parse(&enum_ctx
.pci_addr
, probe_ctx
->trid
.traddr
)) {
761 enum_ctx
.has_pci_addr
= true;
764 if (hotplug_fd
< 0) {
765 hotplug_fd
= spdk_uevent_connect();
766 if (hotplug_fd
< 0) {
767 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "Failed to open uevent netlink socket\n");
770 _nvme_pcie_hotplug_monitor(probe_ctx
);
773 if (enum_ctx
.has_pci_addr
== false) {
774 return spdk_pci_enumerate(spdk_pci_nvme_get_driver(),
775 pcie_nvme_enum_cb
, &enum_ctx
);
777 return spdk_pci_device_attach(spdk_pci_nvme_get_driver(),
778 pcie_nvme_enum_cb
, &enum_ctx
, &enum_ctx
.pci_addr
);
783 nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx
*probe_ctx
, struct spdk_pci_addr
*pci_addr
)
785 struct nvme_pcie_enum_ctx enum_ctx
;
787 enum_ctx
.probe_ctx
= probe_ctx
;
788 enum_ctx
.has_pci_addr
= true;
789 enum_ctx
.pci_addr
= *pci_addr
;
791 return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), pcie_nvme_enum_cb
, &enum_ctx
);
794 struct spdk_nvme_ctrlr
*nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id
*trid
,
795 const struct spdk_nvme_ctrlr_opts
*opts
,
798 struct spdk_pci_device
*pci_dev
= devhandle
;
799 struct nvme_pcie_ctrlr
*pctrlr
;
800 union spdk_nvme_cap_register cap
;
801 union spdk_nvme_vs_register vs
;
804 struct spdk_pci_id pci_id
;
805 struct spdk_pci_addr pci_addr
;
807 if (spdk_pci_addr_parse(&pci_addr
, trid
->traddr
)) {
808 SPDK_ERRLOG("could not parse pci address\n");
812 claim_fd
= spdk_pci_device_claim(&pci_addr
);
814 SPDK_ERRLOG("could not claim device %s\n", trid
->traddr
);
818 pctrlr
= spdk_zmalloc(sizeof(struct nvme_pcie_ctrlr
), 64, NULL
,
819 SPDK_ENV_SOCKET_ID_ANY
, SPDK_MALLOC_SHARE
);
820 if (pctrlr
== NULL
) {
822 SPDK_ERRLOG("could not allocate ctrlr\n");
826 pctrlr
->is_remapped
= false;
827 pctrlr
->ctrlr
.is_removed
= false;
828 pctrlr
->ctrlr
.trid
.trtype
= SPDK_NVME_TRANSPORT_PCIE
;
829 pctrlr
->devhandle
= devhandle
;
830 pctrlr
->ctrlr
.opts
= *opts
;
831 pctrlr
->claim_fd
= claim_fd
;
832 memcpy(&pctrlr
->ctrlr
.trid
, trid
, sizeof(pctrlr
->ctrlr
.trid
));
834 rc
= nvme_pcie_ctrlr_allocate_bars(pctrlr
);
841 /* Enable PCI busmaster and disable INTx */
842 spdk_pci_device_cfg_read32(pci_dev
, &cmd_reg
, 4);
844 spdk_pci_device_cfg_write32(pci_dev
, cmd_reg
, 4);
846 if (nvme_ctrlr_get_cap(&pctrlr
->ctrlr
, &cap
)) {
847 SPDK_ERRLOG("get_cap() failed\n");
853 if (nvme_ctrlr_get_vs(&pctrlr
->ctrlr
, &vs
)) {
854 SPDK_ERRLOG("get_vs() failed\n");
860 nvme_ctrlr_init_cap(&pctrlr
->ctrlr
, &cap
, &vs
);
862 /* Doorbell stride is 2 ^ (dstrd + 2),
863 * but we want multiples of 4, so drop the + 2 */
864 pctrlr
->doorbell_stride_u32
= 1 << cap
.bits
.dstrd
;
866 rc
= nvme_ctrlr_construct(&pctrlr
->ctrlr
);
868 nvme_ctrlr_destruct(&pctrlr
->ctrlr
);
872 pci_id
= spdk_pci_device_get_id(pci_dev
);
873 pctrlr
->ctrlr
.quirks
= nvme_get_quirks(&pci_id
);
875 rc
= nvme_pcie_ctrlr_construct_admin_qpair(&pctrlr
->ctrlr
);
877 nvme_ctrlr_destruct(&pctrlr
->ctrlr
);
881 /* Construct the primary process properties */
882 rc
= nvme_ctrlr_add_process(&pctrlr
->ctrlr
, pci_dev
);
884 nvme_ctrlr_destruct(&pctrlr
->ctrlr
);
888 if (g_sigset
!= true) {
889 nvme_pcie_ctrlr_setup_signal();
893 return &pctrlr
->ctrlr
;
897 nvme_pcie_ctrlr_enable(struct spdk_nvme_ctrlr
*ctrlr
)
899 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
900 struct nvme_pcie_qpair
*padminq
= nvme_pcie_qpair(ctrlr
->adminq
);
901 union spdk_nvme_aqa_register aqa
;
903 if (nvme_pcie_ctrlr_set_asq(pctrlr
, padminq
->cmd_bus_addr
)) {
904 SPDK_ERRLOG("set_asq() failed\n");
908 if (nvme_pcie_ctrlr_set_acq(pctrlr
, padminq
->cpl_bus_addr
)) {
909 SPDK_ERRLOG("set_acq() failed\n");
914 /* acqs and asqs are 0-based. */
915 aqa
.bits
.acqs
= nvme_pcie_qpair(ctrlr
->adminq
)->num_entries
- 1;
916 aqa
.bits
.asqs
= nvme_pcie_qpair(ctrlr
->adminq
)->num_entries
- 1;
918 if (nvme_pcie_ctrlr_set_aqa(pctrlr
, &aqa
)) {
919 SPDK_ERRLOG("set_aqa() failed\n");
927 nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr
*ctrlr
)
929 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
930 struct spdk_pci_device
*devhandle
= nvme_ctrlr_proc_get_devhandle(ctrlr
);
932 close(pctrlr
->claim_fd
);
935 nvme_pcie_qpair_destroy(ctrlr
->adminq
);
938 nvme_ctrlr_destruct_finish(ctrlr
);
940 nvme_ctrlr_free_processes(ctrlr
);
942 nvme_pcie_ctrlr_free_bars(pctrlr
);
945 spdk_pci_device_detach(devhandle
);
954 nvme_qpair_construct_tracker(struct nvme_tracker
*tr
, uint16_t cid
, uint64_t phys_addr
)
956 tr
->prp_sgl_bus_addr
= phys_addr
+ offsetof(struct nvme_tracker
, u
.prp
);
962 nvme_pcie_qpair_reset(struct spdk_nvme_qpair
*qpair
)
964 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
966 pqpair
->last_sq_tail
= pqpair
->sq_tail
= pqpair
->cq_head
= 0;
969 * First time through the completion queue, HW will set phase
970 * bit on completions to 1. So set this to 1 here, indicating
971 * we're looking for a 1 to know which entries have completed.
972 * we'll toggle the bit each time when the completion queue
975 pqpair
->flags
.phase
= 1;
977 memset(pqpair
->cmd
, 0,
978 pqpair
->num_entries
* sizeof(struct spdk_nvme_cmd
));
979 memset(pqpair
->cpl
, 0,
980 pqpair
->num_entries
* sizeof(struct spdk_nvme_cpl
));
986 nvme_pcie_qpair_construct(struct spdk_nvme_qpair
*qpair
)
988 struct spdk_nvme_ctrlr
*ctrlr
= qpair
->ctrlr
;
989 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
990 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
991 struct nvme_tracker
*tr
;
993 volatile uint32_t *doorbell_base
;
995 uint16_t num_trackers
;
996 size_t page_align
= VALUE_2MB
;
997 uint32_t flags
= SPDK_MALLOC_DMA
;
1000 * Limit the maximum number of completions to return per call to prevent wraparound,
1001 * and calculate how many trackers can be submitted at once without overflowing the
1004 pqpair
->max_completions_cap
= pqpair
->num_entries
/ 4;
1005 pqpair
->max_completions_cap
= spdk_max(pqpair
->max_completions_cap
, NVME_MIN_COMPLETIONS
);
1006 pqpair
->max_completions_cap
= spdk_min(pqpair
->max_completions_cap
, NVME_MAX_COMPLETIONS
);
1007 num_trackers
= pqpair
->num_entries
- pqpair
->max_completions_cap
;
1009 SPDK_INFOLOG(SPDK_LOG_NVME
, "max_completions_cap = %" PRIu16
" num_trackers = %" PRIu16
"\n",
1010 pqpair
->max_completions_cap
, num_trackers
);
1012 assert(num_trackers
!= 0);
1014 pqpair
->sq_in_cmb
= false;
1016 if (nvme_qpair_is_admin_queue(&pqpair
->qpair
)) {
1017 flags
|= SPDK_MALLOC_SHARE
;
1020 /* cmd and cpl rings must be aligned on page size boundaries. */
1021 if (ctrlr
->opts
.use_cmb_sqs
) {
1022 if (nvme_pcie_ctrlr_alloc_cmb(ctrlr
, pqpair
->num_entries
* sizeof(struct spdk_nvme_cmd
),
1023 sysconf(_SC_PAGESIZE
), &offset
) == 0) {
1024 pqpair
->cmd
= pctrlr
->cmb_bar_virt_addr
+ offset
;
1025 pqpair
->cmd_bus_addr
= pctrlr
->cmb_bar_phys_addr
+ offset
;
1026 pqpair
->sq_in_cmb
= true;
1030 /* To ensure physical address contiguity we make each ring occupy
1031 * a single hugepage only. See MAX_IO_QUEUE_ENTRIES.
1033 if (pqpair
->sq_in_cmb
== false) {
1034 pqpair
->cmd
= spdk_zmalloc(pqpair
->num_entries
* sizeof(struct spdk_nvme_cmd
),
1036 SPDK_ENV_SOCKET_ID_ANY
, flags
);
1037 if (pqpair
->cmd
== NULL
) {
1038 SPDK_ERRLOG("alloc qpair_cmd failed\n");
1042 pqpair
->cmd_bus_addr
= spdk_vtophys(pqpair
->cmd
, NULL
);
1043 if (pqpair
->cmd_bus_addr
== SPDK_VTOPHYS_ERROR
) {
1044 SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n");
1049 pqpair
->cpl
= spdk_zmalloc(pqpair
->num_entries
* sizeof(struct spdk_nvme_cpl
),
1051 SPDK_ENV_SOCKET_ID_ANY
, flags
);
1052 if (pqpair
->cpl
== NULL
) {
1053 SPDK_ERRLOG("alloc qpair_cpl failed\n");
1057 pqpair
->cpl_bus_addr
= spdk_vtophys(pqpair
->cpl
, NULL
);
1058 if (pqpair
->cpl_bus_addr
== SPDK_VTOPHYS_ERROR
) {
1059 SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n");
1063 doorbell_base
= &pctrlr
->regs
->doorbell
[0].sq_tdbl
;
1064 pqpair
->sq_tdbl
= doorbell_base
+ (2 * qpair
->id
+ 0) * pctrlr
->doorbell_stride_u32
;
1065 pqpair
->cq_hdbl
= doorbell_base
+ (2 * qpair
->id
+ 1) * pctrlr
->doorbell_stride_u32
;
1068 * Reserve space for all of the trackers in a single allocation.
1069 * struct nvme_tracker must be padded so that its size is already a power of 2.
1070 * This ensures the PRP list embedded in the nvme_tracker object will not span a
1071 * 4KB boundary, while allowing access to trackers in tr[] via normal array indexing.
1073 pqpair
->tr
= spdk_zmalloc(num_trackers
* sizeof(*tr
), sizeof(*tr
), NULL
,
1074 SPDK_ENV_SOCKET_ID_ANY
, SPDK_MALLOC_SHARE
);
1075 if (pqpair
->tr
== NULL
) {
1076 SPDK_ERRLOG("nvme_tr failed\n");
1080 TAILQ_INIT(&pqpair
->free_tr
);
1081 TAILQ_INIT(&pqpair
->outstanding_tr
);
1083 for (i
= 0; i
< num_trackers
; i
++) {
1084 tr
= &pqpair
->tr
[i
];
1085 nvme_qpair_construct_tracker(tr
, i
, spdk_vtophys(tr
, NULL
));
1086 TAILQ_INSERT_HEAD(&pqpair
->free_tr
, tr
, tq_list
);
1089 nvme_pcie_qpair_reset(qpair
);
1095 nvme_pcie_copy_command(struct spdk_nvme_cmd
*dst
, const struct spdk_nvme_cmd
*src
)
1097 /* dst and src are known to be non-overlapping and 64-byte aligned. */
1098 #if defined(__SSE2__)
1099 __m128i
*d128
= (__m128i
*)dst
;
1100 const __m128i
*s128
= (const __m128i
*)src
;
1102 _mm_stream_si128(&d128
[0], _mm_load_si128(&s128
[0]));
1103 _mm_stream_si128(&d128
[1], _mm_load_si128(&s128
[1]));
1104 _mm_stream_si128(&d128
[2], _mm_load_si128(&s128
[2]));
1105 _mm_stream_si128(&d128
[3], _mm_load_si128(&s128
[3]));
1112 * Note: the ctrlr_lock must be held when calling this function.
1115 nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair
*qpair
,
1116 struct nvme_request
*req
, struct spdk_nvme_cpl
*cpl
)
1118 struct spdk_nvme_ctrlr
*ctrlr
= qpair
->ctrlr
;
1119 struct nvme_request
*active_req
= req
;
1120 struct spdk_nvme_ctrlr_process
*active_proc
;
1123 * The admin request is from another process. Move to the per
1124 * process list for that process to handle it later.
1126 assert(nvme_qpair_is_admin_queue(qpair
));
1127 assert(active_req
->pid
!= getpid());
1129 active_proc
= spdk_nvme_ctrlr_get_process(ctrlr
, active_req
->pid
);
1131 /* Save the original completion information */
1132 memcpy(&active_req
->cpl
, cpl
, sizeof(*cpl
));
1133 STAILQ_INSERT_TAIL(&active_proc
->active_reqs
, active_req
, stailq
);
1135 SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n",
1138 nvme_free_request(active_req
);
1143 * Note: the ctrlr_lock must be held when calling this function.
1146 nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair
*qpair
)
1148 struct spdk_nvme_ctrlr
*ctrlr
= qpair
->ctrlr
;
1149 struct nvme_request
*req
, *tmp_req
;
1150 pid_t pid
= getpid();
1151 struct spdk_nvme_ctrlr_process
*proc
;
1154 * Check whether there is any pending admin request from
1155 * other active processes.
1157 assert(nvme_qpair_is_admin_queue(qpair
));
1159 proc
= spdk_nvme_ctrlr_get_current_process(ctrlr
);
1161 SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid
);
1166 STAILQ_FOREACH_SAFE(req
, &proc
->active_reqs
, stailq
, tmp_req
) {
1167 STAILQ_REMOVE(&proc
->active_reqs
, req
, nvme_request
, stailq
);
1169 assert(req
->pid
== pid
);
1171 nvme_complete_request(req
->cb_fn
, req
->cb_arg
, qpair
, req
, &req
->cpl
);
1172 nvme_free_request(req
);
1177 nvme_pcie_qpair_need_event(uint16_t event_idx
, uint16_t new_idx
, uint16_t old
)
1179 return (uint16_t)(new_idx
- event_idx
) <= (uint16_t)(new_idx
- old
);
1183 nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair
*qpair
, uint16_t value
,
1184 volatile uint32_t *shadow_db
,
1185 volatile uint32_t *eventidx
)
1196 if (!nvme_pcie_qpair_need_event(*eventidx
, value
, old
)) {
1204 nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair
*qpair
)
1206 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1207 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(qpair
->ctrlr
);
1208 bool need_mmio
= true;
1210 if (spdk_unlikely(pqpair
->flags
.has_shadow_doorbell
)) {
1211 need_mmio
= nvme_pcie_qpair_update_mmio_required(qpair
,
1213 pqpair
->shadow_doorbell
.sq_tdbl
,
1214 pqpair
->shadow_doorbell
.sq_eventidx
);
1217 if (spdk_likely(need_mmio
)) {
1219 g_thread_mmio_ctrlr
= pctrlr
;
1220 spdk_mmio_write_4(pqpair
->sq_tdbl
, pqpair
->sq_tail
);
1221 g_thread_mmio_ctrlr
= NULL
;
1226 nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair
*qpair
)
1228 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1229 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(qpair
->ctrlr
);
1230 bool need_mmio
= true;
1232 if (spdk_unlikely(pqpair
->flags
.has_shadow_doorbell
)) {
1233 need_mmio
= nvme_pcie_qpair_update_mmio_required(qpair
,
1235 pqpair
->shadow_doorbell
.cq_hdbl
,
1236 pqpair
->shadow_doorbell
.cq_eventidx
);
1239 if (spdk_likely(need_mmio
)) {
1240 g_thread_mmio_ctrlr
= pctrlr
;
1241 spdk_mmio_write_4(pqpair
->cq_hdbl
, pqpair
->cq_head
);
1242 g_thread_mmio_ctrlr
= NULL
;
1247 nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair
*qpair
, struct nvme_tracker
*tr
)
1249 struct nvme_request
*req
;
1250 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1253 assert(req
!= NULL
);
1255 /* Copy the command from the tracker to the submission queue. */
1256 nvme_pcie_copy_command(&pqpair
->cmd
[pqpair
->sq_tail
], &req
->cmd
);
1258 if (spdk_unlikely(++pqpair
->sq_tail
== pqpair
->num_entries
)) {
1259 pqpair
->sq_tail
= 0;
1262 if (spdk_unlikely(pqpair
->sq_tail
== pqpair
->sq_head
)) {
1263 SPDK_ERRLOG("sq_tail is passing sq_head!\n");
1266 if (!pqpair
->flags
.delay_pcie_doorbell
) {
1267 nvme_pcie_qpair_ring_sq_doorbell(qpair
);
1272 nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair
*qpair
, struct nvme_tracker
*tr
,
1273 struct spdk_nvme_cpl
*cpl
, bool print_on_error
)
1275 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1276 struct nvme_request
*req
;
1278 bool req_from_current_proc
= true;
1282 assert(req
!= NULL
);
1284 error
= spdk_nvme_cpl_is_error(cpl
);
1285 retry
= error
&& nvme_completion_is_retry(cpl
) &&
1286 req
->retries
< spdk_nvme_retry_count
;
1288 if (error
&& print_on_error
&& !qpair
->ctrlr
->opts
.disable_error_logging
) {
1289 nvme_qpair_print_command(qpair
, &req
->cmd
);
1290 nvme_qpair_print_completion(qpair
, cpl
);
1293 assert(cpl
->cid
== req
->cmd
.cid
);
1297 nvme_pcie_qpair_submit_tracker(qpair
, tr
);
1299 /* Only check admin requests from different processes. */
1300 if (nvme_qpair_is_admin_queue(qpair
) && req
->pid
!= getpid()) {
1301 req_from_current_proc
= false;
1302 nvme_pcie_qpair_insert_pending_admin_request(qpair
, req
, cpl
);
1304 nvme_complete_request(tr
->cb_fn
, tr
->cb_arg
, qpair
, req
, cpl
);
1307 if (req_from_current_proc
== true) {
1308 nvme_qpair_free_request(qpair
, req
);
1313 TAILQ_REMOVE(&pqpair
->outstanding_tr
, tr
, tq_list
);
1314 TAILQ_INSERT_HEAD(&pqpair
->free_tr
, tr
, tq_list
);
1317 * If the controller is in the middle of resetting, don't
1318 * try to submit queued requests here - let the reset logic
1319 * handle that instead.
1321 if (!STAILQ_EMPTY(&qpair
->queued_req
) &&
1322 !qpair
->ctrlr
->is_resetting
) {
1323 req
= STAILQ_FIRST(&qpair
->queued_req
);
1324 STAILQ_REMOVE_HEAD(&qpair
->queued_req
, stailq
);
1325 nvme_qpair_submit_request(qpair
, req
);
1331 nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair
*qpair
,
1332 struct nvme_tracker
*tr
, uint32_t sct
, uint32_t sc
, uint32_t dnr
,
1333 bool print_on_error
)
1335 struct spdk_nvme_cpl cpl
;
1337 memset(&cpl
, 0, sizeof(cpl
));
1338 cpl
.sqid
= qpair
->id
;
1340 cpl
.status
.sct
= sct
;
1342 cpl
.status
.dnr
= dnr
;
1343 nvme_pcie_qpair_complete_tracker(qpair
, tr
, &cpl
, print_on_error
);
1347 nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair
*qpair
, uint32_t dnr
)
1349 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1350 struct nvme_tracker
*tr
, *temp
;
1352 TAILQ_FOREACH_SAFE(tr
, &pqpair
->outstanding_tr
, tq_list
, temp
) {
1353 if (!qpair
->ctrlr
->opts
.disable_error_logging
) {
1354 SPDK_ERRLOG("aborting outstanding command\n");
1356 nvme_pcie_qpair_manual_complete_tracker(qpair
, tr
, SPDK_NVME_SCT_GENERIC
,
1357 SPDK_NVME_SC_ABORTED_BY_REQUEST
, dnr
, true);
1362 nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair
*qpair
)
1364 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1365 struct nvme_tracker
*tr
;
1367 tr
= TAILQ_FIRST(&pqpair
->outstanding_tr
);
1368 while (tr
!= NULL
) {
1369 assert(tr
->req
!= NULL
);
1370 if (tr
->req
->cmd
.opc
== SPDK_NVME_OPC_ASYNC_EVENT_REQUEST
) {
1371 nvme_pcie_qpair_manual_complete_tracker(qpair
, tr
,
1372 SPDK_NVME_SCT_GENERIC
, SPDK_NVME_SC_ABORTED_SQ_DELETION
, 0,
1374 tr
= TAILQ_FIRST(&pqpair
->outstanding_tr
);
1376 tr
= TAILQ_NEXT(tr
, tq_list
);
1382 nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair
*qpair
)
1384 nvme_pcie_admin_qpair_abort_aers(qpair
);
1388 nvme_pcie_qpair_destroy(struct spdk_nvme_qpair
*qpair
)
1390 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1392 if (nvme_qpair_is_admin_queue(qpair
)) {
1393 nvme_pcie_admin_qpair_destroy(qpair
);
1395 if (pqpair
->cmd
&& !pqpair
->sq_in_cmb
) {
1396 spdk_free(pqpair
->cmd
);
1399 spdk_free(pqpair
->cpl
);
1402 spdk_free(pqpair
->tr
);
1405 nvme_qpair_deinit(qpair
);
1413 nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair
*qpair
, uint32_t dnr
)
1415 nvme_pcie_qpair_abort_trackers(qpair
, dnr
);
1419 nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr
*ctrlr
,
1420 struct spdk_nvme_qpair
*io_que
, spdk_nvme_cmd_cb cb_fn
,
1423 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(io_que
);
1424 struct nvme_request
*req
;
1425 struct spdk_nvme_cmd
*cmd
;
1427 req
= nvme_allocate_request_null(ctrlr
->adminq
, cb_fn
, cb_arg
);
1433 cmd
->opc
= SPDK_NVME_OPC_CREATE_IO_CQ
;
1436 * TODO: create a create io completion queue command data
1439 cmd
->cdw10
= ((pqpair
->num_entries
- 1) << 16) | io_que
->id
;
1441 * 0x2 = interrupts enabled
1442 * 0x1 = physically contiguous
1445 cmd
->dptr
.prp
.prp1
= pqpair
->cpl_bus_addr
;
1447 return nvme_ctrlr_submit_admin_request(ctrlr
, req
);
1451 nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr
*ctrlr
,
1452 struct spdk_nvme_qpair
*io_que
, spdk_nvme_cmd_cb cb_fn
, void *cb_arg
)
1454 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(io_que
);
1455 struct nvme_request
*req
;
1456 struct spdk_nvme_cmd
*cmd
;
1458 req
= nvme_allocate_request_null(ctrlr
->adminq
, cb_fn
, cb_arg
);
1464 cmd
->opc
= SPDK_NVME_OPC_CREATE_IO_SQ
;
1467 * TODO: create a create io submission queue command data
1470 cmd
->cdw10
= ((pqpair
->num_entries
- 1) << 16) | io_que
->id
;
1471 /* 0x1 = physically contiguous */
1472 cmd
->cdw11
= (io_que
->id
<< 16) | (io_que
->qprio
<< 1) | 0x1;
1473 cmd
->dptr
.prp
.prp1
= pqpair
->cmd_bus_addr
;
1475 return nvme_ctrlr_submit_admin_request(ctrlr
, req
);
1479 nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_qpair
*qpair
,
1480 spdk_nvme_cmd_cb cb_fn
, void *cb_arg
)
1482 struct nvme_request
*req
;
1483 struct spdk_nvme_cmd
*cmd
;
1485 req
= nvme_allocate_request_null(ctrlr
->adminq
, cb_fn
, cb_arg
);
1491 cmd
->opc
= SPDK_NVME_OPC_DELETE_IO_CQ
;
1492 cmd
->cdw10
= qpair
->id
;
1494 return nvme_ctrlr_submit_admin_request(ctrlr
, req
);
1498 nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_qpair
*qpair
,
1499 spdk_nvme_cmd_cb cb_fn
, void *cb_arg
)
1501 struct nvme_request
*req
;
1502 struct spdk_nvme_cmd
*cmd
;
1504 req
= nvme_allocate_request_null(ctrlr
->adminq
, cb_fn
, cb_arg
);
1510 cmd
->opc
= SPDK_NVME_OPC_DELETE_IO_SQ
;
1511 cmd
->cdw10
= qpair
->id
;
1513 return nvme_ctrlr_submit_admin_request(ctrlr
, req
);
1517 _nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_qpair
*qpair
,
1520 struct nvme_pcie_ctrlr
*pctrlr
= nvme_pcie_ctrlr(ctrlr
);
1521 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1522 struct nvme_completion_poll_status status
;
1525 rc
= nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr
, qpair
, nvme_completion_poll_cb
, &status
);
1530 if (spdk_nvme_wait_for_completion(ctrlr
->adminq
, &status
)) {
1531 SPDK_ERRLOG("nvme_create_io_cq failed!\n");
1535 rc
= nvme_pcie_ctrlr_cmd_create_io_sq(qpair
->ctrlr
, qpair
, nvme_completion_poll_cb
, &status
);
1540 if (spdk_nvme_wait_for_completion(ctrlr
->adminq
, &status
)) {
1541 SPDK_ERRLOG("nvme_create_io_sq failed!\n");
1542 /* Attempt to delete the completion queue */
1543 rc
= nvme_pcie_ctrlr_cmd_delete_io_cq(qpair
->ctrlr
, qpair
, nvme_completion_poll_cb
, &status
);
1547 spdk_nvme_wait_for_completion(ctrlr
->adminq
, &status
);
1551 if (ctrlr
->shadow_doorbell
) {
1552 pqpair
->shadow_doorbell
.sq_tdbl
= ctrlr
->shadow_doorbell
+ (2 * qpair
->id
+ 0) *
1553 pctrlr
->doorbell_stride_u32
;
1554 pqpair
->shadow_doorbell
.cq_hdbl
= ctrlr
->shadow_doorbell
+ (2 * qpair
->id
+ 1) *
1555 pctrlr
->doorbell_stride_u32
;
1556 pqpair
->shadow_doorbell
.sq_eventidx
= ctrlr
->eventidx
+ (2 * qpair
->id
+ 0) *
1557 pctrlr
->doorbell_stride_u32
;
1558 pqpair
->shadow_doorbell
.cq_eventidx
= ctrlr
->eventidx
+ (2 * qpair
->id
+ 1) *
1559 pctrlr
->doorbell_stride_u32
;
1560 pqpair
->flags
.has_shadow_doorbell
= 1;
1562 pqpair
->flags
.has_shadow_doorbell
= 0;
1564 nvme_pcie_qpair_reset(qpair
);
1569 struct spdk_nvme_qpair
*
1570 nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr
*ctrlr
, uint16_t qid
,
1571 const struct spdk_nvme_io_qpair_opts
*opts
)
1573 struct nvme_pcie_qpair
*pqpair
;
1574 struct spdk_nvme_qpair
*qpair
;
1577 assert(ctrlr
!= NULL
);
1579 pqpair
= spdk_zmalloc(sizeof(*pqpair
), 64, NULL
,
1580 SPDK_ENV_SOCKET_ID_ANY
, SPDK_MALLOC_SHARE
);
1581 if (pqpair
== NULL
) {
1585 pqpair
->num_entries
= opts
->io_queue_size
;
1586 pqpair
->flags
.delay_pcie_doorbell
= opts
->delay_pcie_doorbell
;
1588 qpair
= &pqpair
->qpair
;
1590 rc
= nvme_qpair_init(qpair
, qid
, ctrlr
, opts
->qprio
, opts
->io_queue_requests
);
1592 nvme_pcie_qpair_destroy(qpair
);
1596 rc
= nvme_pcie_qpair_construct(qpair
);
1598 nvme_pcie_qpair_destroy(qpair
);
1602 rc
= _nvme_pcie_ctrlr_create_io_qpair(ctrlr
, qpair
, qid
);
1605 SPDK_ERRLOG("I/O queue creation failed\n");
1606 nvme_pcie_qpair_destroy(qpair
);
1614 nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_qpair
*qpair
)
1616 if (nvme_qpair_is_admin_queue(qpair
)) {
1619 return _nvme_pcie_ctrlr_create_io_qpair(ctrlr
, qpair
, qpair
->id
);
1624 nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_qpair
*qpair
)
1629 nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr
*ctrlr
, struct spdk_nvme_qpair
*qpair
)
1631 struct nvme_completion_poll_status status
;
1634 assert(ctrlr
!= NULL
);
1636 if (ctrlr
->is_removed
) {
1640 /* Delete the I/O submission queue */
1641 rc
= nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr
, qpair
, nvme_completion_poll_cb
, &status
);
1643 SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc
);
1646 if (spdk_nvme_wait_for_completion(ctrlr
->adminq
, &status
)) {
1650 /* Delete the completion queue */
1651 rc
= nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr
, qpair
, nvme_completion_poll_cb
, &status
);
1653 SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc
);
1656 if (spdk_nvme_wait_for_completion(ctrlr
->adminq
, &status
)) {
1661 if (qpair
->no_deletion_notification_needed
== 0) {
1662 /* Abort the rest of the I/O */
1663 nvme_pcie_qpair_abort_trackers(qpair
, 1);
1666 nvme_pcie_qpair_destroy(qpair
);
1671 nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair
*qpair
, struct nvme_tracker
*tr
)
1674 * Bad vtophys translation, so abort this request and return
1677 nvme_pcie_qpair_manual_complete_tracker(qpair
, tr
, SPDK_NVME_SCT_GENERIC
,
1678 SPDK_NVME_SC_INVALID_FIELD
,
1679 1 /* do not retry */, true);
1683 * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes.
1685 * *prp_index will be updated to account for the number of PRP entries used.
1688 nvme_pcie_prp_list_append(struct nvme_tracker
*tr
, uint32_t *prp_index
, void *virt_addr
, size_t len
,
1691 struct spdk_nvme_cmd
*cmd
= &tr
->req
->cmd
;
1692 uintptr_t page_mask
= page_size
- 1;
1696 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "prp_index:%u virt_addr:%p len:%u\n",
1697 *prp_index
, virt_addr
, (uint32_t)len
);
1699 if (spdk_unlikely(((uintptr_t)virt_addr
& 3) != 0)) {
1700 SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr
);
1709 * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array,
1710 * so prp_index == count is valid.
1712 if (spdk_unlikely(i
> SPDK_COUNTOF(tr
->u
.prp
))) {
1713 SPDK_ERRLOG("out of PRP entries\n");
1717 phys_addr
= spdk_vtophys(virt_addr
, NULL
);
1718 if (spdk_unlikely(phys_addr
== SPDK_VTOPHYS_ERROR
)) {
1719 SPDK_ERRLOG("vtophys(%p) failed\n", virt_addr
);
1724 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "prp1 = %p\n", (void *)phys_addr
);
1725 cmd
->dptr
.prp
.prp1
= phys_addr
;
1726 seg_len
= page_size
- ((uintptr_t)virt_addr
& page_mask
);
1728 if ((phys_addr
& page_mask
) != 0) {
1729 SPDK_ERRLOG("PRP %u not page aligned (%p)\n", i
, virt_addr
);
1733 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "prp[%u] = %p\n", i
- 1, (void *)phys_addr
);
1734 tr
->u
.prp
[i
- 1] = phys_addr
;
1735 seg_len
= page_size
;
1738 seg_len
= spdk_min(seg_len
, len
);
1739 virt_addr
+= seg_len
;
1744 cmd
->psdt
= SPDK_NVME_PSDT_PRP
;
1746 cmd
->dptr
.prp
.prp2
= 0;
1747 } else if (i
== 2) {
1748 cmd
->dptr
.prp
.prp2
= tr
->u
.prp
[0];
1749 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "prp2 = %p\n", (void *)cmd
->dptr
.prp
.prp2
);
1751 cmd
->dptr
.prp
.prp2
= tr
->prp_sgl_bus_addr
;
1752 SPDK_DEBUGLOG(SPDK_LOG_NVME
, "prp2 = %p (PRP list)\n", (void *)cmd
->dptr
.prp
.prp2
);
1760 * Build PRP list describing physically contiguous payload buffer.
1763 nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair
*qpair
, struct nvme_request
*req
,
1764 struct nvme_tracker
*tr
)
1766 uint32_t prp_index
= 0;
1769 rc
= nvme_pcie_prp_list_append(tr
, &prp_index
, req
->payload
.contig_or_cb_arg
+ req
->payload_offset
,
1770 req
->payload_size
, qpair
->ctrlr
->page_size
);
1772 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1780 * Build SGL list describing scattered payload buffer.
1783 nvme_pcie_qpair_build_hw_sgl_request(struct spdk_nvme_qpair
*qpair
, struct nvme_request
*req
,
1784 struct nvme_tracker
*tr
)
1789 uint32_t remaining_transfer_len
, remaining_user_sge_len
, length
;
1790 struct spdk_nvme_sgl_descriptor
*sgl
;
1794 * Build scattered payloads.
1796 assert(req
->payload_size
!= 0);
1797 assert(nvme_payload_type(&req
->payload
) == NVME_PAYLOAD_TYPE_SGL
);
1798 assert(req
->payload
.reset_sgl_fn
!= NULL
);
1799 assert(req
->payload
.next_sge_fn
!= NULL
);
1800 req
->payload
.reset_sgl_fn(req
->payload
.contig_or_cb_arg
, req
->payload_offset
);
1803 req
->cmd
.psdt
= SPDK_NVME_PSDT_SGL_MPTR_CONTIG
;
1804 req
->cmd
.dptr
.sgl1
.unkeyed
.subtype
= 0;
1806 remaining_transfer_len
= req
->payload_size
;
1808 while (remaining_transfer_len
> 0) {
1809 rc
= req
->payload
.next_sge_fn(req
->payload
.contig_or_cb_arg
,
1810 &virt_addr
, &remaining_user_sge_len
);
1812 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1816 remaining_user_sge_len
= spdk_min(remaining_user_sge_len
, remaining_transfer_len
);
1817 remaining_transfer_len
-= remaining_user_sge_len
;
1818 while (remaining_user_sge_len
> 0) {
1819 if (nseg
>= NVME_MAX_SGL_DESCRIPTORS
) {
1820 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1824 phys_addr
= spdk_vtophys(virt_addr
, NULL
);
1825 if (phys_addr
== SPDK_VTOPHYS_ERROR
) {
1826 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1830 length
= spdk_min(remaining_user_sge_len
, VALUE_2MB
- _2MB_OFFSET(virt_addr
));
1831 remaining_user_sge_len
-= length
;
1832 virt_addr
+= length
;
1834 if (nseg
> 0 && phys_addr
==
1835 (*(sgl
- 1)).address
+ (*(sgl
- 1)).unkeyed
.length
) {
1836 /* extend previous entry */
1837 (*(sgl
- 1)).unkeyed
.length
+= length
;
1841 sgl
->unkeyed
.type
= SPDK_NVME_SGL_TYPE_DATA_BLOCK
;
1842 sgl
->unkeyed
.length
= length
;
1843 sgl
->address
= phys_addr
;
1844 sgl
->unkeyed
.subtype
= 0;
1853 * The whole transfer can be described by a single SGL descriptor.
1854 * Use the special case described by the spec where SGL1's type is Data Block.
1855 * This means the SGL in the tracker is not used at all, so copy the first (and only)
1856 * SGL element into SGL1.
1858 req
->cmd
.dptr
.sgl1
.unkeyed
.type
= SPDK_NVME_SGL_TYPE_DATA_BLOCK
;
1859 req
->cmd
.dptr
.sgl1
.address
= tr
->u
.sgl
[0].address
;
1860 req
->cmd
.dptr
.sgl1
.unkeyed
.length
= tr
->u
.sgl
[0].unkeyed
.length
;
1862 /* For now we can only support 1 SGL segment in NVMe controller */
1863 req
->cmd
.dptr
.sgl1
.unkeyed
.type
= SPDK_NVME_SGL_TYPE_LAST_SEGMENT
;
1864 req
->cmd
.dptr
.sgl1
.address
= tr
->prp_sgl_bus_addr
;
1865 req
->cmd
.dptr
.sgl1
.unkeyed
.length
= nseg
* sizeof(struct spdk_nvme_sgl_descriptor
);
1872 * Build PRP list describing scattered payload buffer.
1875 nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair
*qpair
, struct nvme_request
*req
,
1876 struct nvme_tracker
*tr
)
1880 uint32_t remaining_transfer_len
, length
;
1881 uint32_t prp_index
= 0;
1882 uint32_t page_size
= qpair
->ctrlr
->page_size
;
1885 * Build scattered payloads.
1887 assert(nvme_payload_type(&req
->payload
) == NVME_PAYLOAD_TYPE_SGL
);
1888 assert(req
->payload
.reset_sgl_fn
!= NULL
);
1889 req
->payload
.reset_sgl_fn(req
->payload
.contig_or_cb_arg
, req
->payload_offset
);
1891 remaining_transfer_len
= req
->payload_size
;
1892 while (remaining_transfer_len
> 0) {
1893 assert(req
->payload
.next_sge_fn
!= NULL
);
1894 rc
= req
->payload
.next_sge_fn(req
->payload
.contig_or_cb_arg
, &virt_addr
, &length
);
1896 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1900 length
= spdk_min(remaining_transfer_len
, length
);
1903 * Any incompatible sges should have been handled up in the splitting routine,
1904 * but assert here as an additional check.
1906 * All SGEs except last must end on a page boundary.
1908 assert((length
== remaining_transfer_len
) ||
1909 _is_page_aligned((uintptr_t)virt_addr
+ length
, page_size
));
1911 rc
= nvme_pcie_prp_list_append(tr
, &prp_index
, virt_addr
, length
, page_size
);
1913 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1917 remaining_transfer_len
-= length
;
1924 nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair
*qpair
, struct nvme_request
*req
)
1926 struct nvme_tracker
*tr
;
1929 struct spdk_nvme_ctrlr
*ctrlr
= qpair
->ctrlr
;
1930 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
1932 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair
))) {
1933 nvme_robust_mutex_lock(&ctrlr
->ctrlr_lock
);
1936 tr
= TAILQ_FIRST(&pqpair
->free_tr
);
1940 * Put the request on the qpair's request queue to be
1941 * processed when a tracker frees up via a command
1944 STAILQ_INSERT_TAIL(&qpair
->queued_req
, req
, stailq
);
1948 TAILQ_REMOVE(&pqpair
->free_tr
, tr
, tq_list
); /* remove tr from free_tr */
1949 TAILQ_INSERT_TAIL(&pqpair
->outstanding_tr
, tr
, tq_list
);
1951 tr
->cb_fn
= req
->cb_fn
;
1952 tr
->cb_arg
= req
->cb_arg
;
1953 req
->cmd
.cid
= tr
->cid
;
1955 if (req
->payload_size
&& req
->payload
.md
) {
1956 md_payload
= req
->payload
.md
+ req
->md_offset
;
1957 tr
->req
->cmd
.mptr
= spdk_vtophys(md_payload
, NULL
);
1958 if (tr
->req
->cmd
.mptr
== SPDK_VTOPHYS_ERROR
) {
1959 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1965 if (req
->payload_size
== 0) {
1966 /* Null payload - leave PRP fields untouched */
1968 } else if (nvme_payload_type(&req
->payload
) == NVME_PAYLOAD_TYPE_CONTIG
) {
1969 rc
= nvme_pcie_qpair_build_contig_request(qpair
, req
, tr
);
1970 } else if (nvme_payload_type(&req
->payload
) == NVME_PAYLOAD_TYPE_SGL
) {
1971 if (ctrlr
->flags
& SPDK_NVME_CTRLR_SGL_SUPPORTED
) {
1972 rc
= nvme_pcie_qpair_build_hw_sgl_request(qpair
, req
, tr
);
1974 rc
= nvme_pcie_qpair_build_prps_sgl_request(qpair
, req
, tr
);
1978 nvme_pcie_fail_request_bad_vtophys(qpair
, tr
);
1986 nvme_pcie_qpair_submit_tracker(qpair
, tr
);
1989 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair
))) {
1990 nvme_robust_mutex_unlock(&ctrlr
->ctrlr_lock
);
1997 nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair
*qpair
)
2000 struct nvme_tracker
*tr
, *tmp
;
2001 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
2002 struct spdk_nvme_ctrlr
*ctrlr
= qpair
->ctrlr
;
2003 struct spdk_nvme_ctrlr_process
*active_proc
;
2005 /* Don't check timeouts during controller initialization. */
2006 if (ctrlr
->state
!= NVME_CTRLR_STATE_READY
) {
2010 if (nvme_qpair_is_admin_queue(qpair
)) {
2011 active_proc
= spdk_nvme_ctrlr_get_current_process(ctrlr
);
2013 active_proc
= qpair
->active_proc
;
2016 /* Only check timeouts if the current process has a timeout callback. */
2017 if (active_proc
== NULL
|| active_proc
->timeout_cb_fn
== NULL
) {
2021 t02
= spdk_get_ticks();
2022 TAILQ_FOREACH_SAFE(tr
, &pqpair
->outstanding_tr
, tq_list
, tmp
) {
2023 assert(tr
->req
!= NULL
);
2025 if (nvme_request_check_timeout(tr
->req
, tr
->cid
, active_proc
, t02
)) {
2027 * The requests are in order, so as soon as one has not timed out,
2036 nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair
*qpair
, uint32_t max_completions
)
2038 struct nvme_pcie_qpair
*pqpair
= nvme_pcie_qpair(qpair
);
2039 struct nvme_tracker
*tr
;
2040 struct spdk_nvme_cpl
*cpl
, *next_cpl
;
2041 uint32_t num_completions
= 0;
2042 struct spdk_nvme_ctrlr
*ctrlr
= qpair
->ctrlr
;
2043 uint16_t next_cq_head
;
2045 bool next_is_valid
= false;
2047 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair
))) {
2048 nvme_robust_mutex_lock(&ctrlr
->ctrlr_lock
);
2051 if (max_completions
== 0 || max_completions
> pqpair
->max_completions_cap
) {
2053 * max_completions == 0 means unlimited, but complete at most
2054 * max_completions_cap batch of I/O at a time so that the completion
2055 * queue doorbells don't wrap around.
2057 max_completions
= pqpair
->max_completions_cap
;
2061 cpl
= &pqpair
->cpl
[pqpair
->cq_head
];
2063 if (!next_is_valid
&& cpl
->status
.p
!= pqpair
->flags
.phase
) {
2067 if (spdk_likely(pqpair
->cq_head
+ 1 != pqpair
->num_entries
)) {
2068 next_cq_head
= pqpair
->cq_head
+ 1;
2069 next_phase
= pqpair
->flags
.phase
;
2072 next_phase
= !pqpair
->flags
.phase
;
2074 next_cpl
= &pqpair
->cpl
[next_cq_head
];
2075 next_is_valid
= (next_cpl
->status
.p
== next_phase
);
2076 if (next_is_valid
) {
2077 __builtin_prefetch(&pqpair
->tr
[next_cpl
->cid
]);
2082 * This memory barrier prevents reordering of:
2083 * - load after store from/to tr
2084 * - load after load cpl phase and cpl cid
2087 #elif defined(__aarch64__)
2088 __asm
volatile("dmb oshld" ::: "memory");
2091 if (spdk_unlikely(++pqpair
->cq_head
== pqpair
->num_entries
)) {
2092 pqpair
->cq_head
= 0;
2093 pqpair
->flags
.phase
= !pqpair
->flags
.phase
;
2096 tr
= &pqpair
->tr
[cpl
->cid
];
2097 /* Prefetch the req's STAILQ_ENTRY since we'll need to access it
2098 * as part of putting the req back on the qpair's free list.
2100 __builtin_prefetch(&tr
->req
->stailq
);
2101 pqpair
->sq_head
= cpl
->sqhd
;
2104 nvme_pcie_qpair_complete_tracker(qpair
, tr
, cpl
, true);
2106 SPDK_ERRLOG("cpl does not map to outstanding cmd\n");
2107 nvme_qpair_print_completion(qpair
, cpl
);
2111 if (++num_completions
== max_completions
) {
2116 if (num_completions
> 0) {
2117 nvme_pcie_qpair_ring_cq_doorbell(qpair
);
2120 if (pqpair
->flags
.delay_pcie_doorbell
) {
2121 if (pqpair
->last_sq_tail
!= pqpair
->sq_tail
) {
2122 nvme_pcie_qpair_ring_sq_doorbell(qpair
);
2123 pqpair
->last_sq_tail
= pqpair
->sq_tail
;
2127 if (spdk_unlikely(ctrlr
->timeout_enabled
)) {
2129 * User registered for timeout callback
2131 nvme_pcie_qpair_check_timeout(qpair
);
2134 /* Before returning, complete any pending admin request. */
2135 if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair
))) {
2136 nvme_pcie_qpair_complete_pending_admin_request(qpair
);
2138 nvme_robust_mutex_unlock(&ctrlr
->ctrlr_lock
);
2141 return num_completions
;