]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright (c) Intel Corporation. | |
11fdf7f2 | 5 | * Copyright (c) 2017, IBM Corporation. |
7c673cae FG |
6 | * All rights reserved. |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * | |
12 | * * Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * * Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in | |
16 | * the documentation and/or other materials provided with the | |
17 | * distribution. | |
18 | * * Neither the name of Intel Corporation nor the names of its | |
19 | * contributors may be used to endorse or promote products derived | |
20 | * from this software without specific prior written permission. | |
21 | * | |
22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
33 | */ | |
34 | ||
35 | /* | |
36 | * NVMe over PCIe transport | |
37 | */ | |
38 | ||
11fdf7f2 TL |
39 | #include "spdk/stdinc.h" |
40 | #include "spdk/env.h" | |
41 | #include "spdk/likely.h" | |
7c673cae FG |
42 | #include "nvme_internal.h" |
43 | #include "nvme_uevent.h" | |
44 | ||
7c673cae | 45 | /* |
11fdf7f2 TL |
46 | * Number of completion queue entries to process before ringing the |
47 | * completion queue doorbell. | |
7c673cae | 48 | */ |
11fdf7f2 TL |
49 | #define NVME_MIN_COMPLETIONS (1) |
50 | #define NVME_MAX_COMPLETIONS (128) | |
51 | ||
52 | #define NVME_ADMIN_ENTRIES (128) | |
7c673cae FG |
53 | |
54 | /* | |
55 | * NVME_MAX_SGL_DESCRIPTORS defines the maximum number of descriptors in one SGL | |
56 | * segment. | |
57 | */ | |
9f95a23c | 58 | #define NVME_MAX_SGL_DESCRIPTORS (251) |
7c673cae | 59 | |
9f95a23c | 60 | #define NVME_MAX_PRP_LIST_ENTRIES (505) |
7c673cae | 61 | |
7c673cae | 62 | struct nvme_pcie_enum_ctx { |
9f95a23c | 63 | struct spdk_nvme_probe_ctx *probe_ctx; |
7c673cae FG |
64 | struct spdk_pci_addr pci_addr; |
65 | bool has_pci_addr; | |
66 | }; | |
67 | ||
68 | /* PCIe transport extensions for spdk_nvme_ctrlr */ | |
69 | struct nvme_pcie_ctrlr { | |
70 | struct spdk_nvme_ctrlr ctrlr; | |
71 | ||
72 | /** NVMe MMIO register space */ | |
73 | volatile struct spdk_nvme_registers *regs; | |
74 | ||
75 | /** NVMe MMIO register size */ | |
76 | uint64_t regs_size; | |
77 | ||
78 | /* BAR mapping address which contains controller memory buffer */ | |
79 | void *cmb_bar_virt_addr; | |
80 | ||
81 | /* BAR physical address which contains controller memory buffer */ | |
82 | uint64_t cmb_bar_phys_addr; | |
83 | ||
84 | /* Controller memory buffer size in Bytes */ | |
85 | uint64_t cmb_size; | |
86 | ||
11fdf7f2 | 87 | /* Current offset of controller memory buffer, relative to start of BAR virt addr */ |
7c673cae FG |
88 | uint64_t cmb_current_offset; |
89 | ||
11fdf7f2 TL |
90 | /* Last valid offset into CMB, this differs if CMB memory registration occurs or not */ |
91 | uint64_t cmb_max_offset; | |
92 | ||
93 | void *cmb_mem_register_addr; | |
94 | size_t cmb_mem_register_size; | |
95 | ||
96 | bool cmb_io_data_supported; | |
97 | ||
7c673cae FG |
98 | /** stride in uint32_t units between doorbell registers (1 = 4 bytes, 2 = 8 bytes, ...) */ |
99 | uint32_t doorbell_stride_u32; | |
100 | ||
101 | /* Opaque handle to associated PCI device. */ | |
102 | struct spdk_pci_device *devhandle; | |
103 | ||
11fdf7f2 TL |
104 | /* File descriptor returned from spdk_pci_device_claim(). Closed when ctrlr is detached. */ |
105 | int claim_fd; | |
106 | ||
7c673cae FG |
107 | /* Flag to indicate the MMIO register has been remapped */ |
108 | bool is_remapped; | |
109 | }; | |
110 | ||
111 | struct nvme_tracker { | |
112 | TAILQ_ENTRY(nvme_tracker) tq_list; | |
113 | ||
114 | struct nvme_request *req; | |
115 | uint16_t cid; | |
116 | ||
9f95a23c TL |
117 | uint16_t rsvd0; |
118 | uint32_t rsvd1; | |
7c673cae | 119 | |
9f95a23c TL |
120 | spdk_nvme_cmd_cb cb_fn; |
121 | void *cb_arg; | |
7c673cae FG |
122 | |
123 | uint64_t prp_sgl_bus_addr; | |
124 | ||
125 | union { | |
126 | uint64_t prp[NVME_MAX_PRP_LIST_ENTRIES]; | |
127 | struct spdk_nvme_sgl_descriptor sgl[NVME_MAX_SGL_DESCRIPTORS]; | |
128 | } u; | |
129 | }; | |
130 | /* | |
131 | * struct nvme_tracker must be exactly 4K so that the prp[] array does not cross a page boundary | |
132 | * and so that there is no padding required to meet alignment requirements. | |
133 | */ | |
134 | SPDK_STATIC_ASSERT(sizeof(struct nvme_tracker) == 4096, "nvme_tracker is not 4K"); | |
135 | SPDK_STATIC_ASSERT((offsetof(struct nvme_tracker, u.sgl) & 7) == 0, "SGL must be Qword aligned"); | |
136 | ||
137 | /* PCIe transport extensions for spdk_nvme_qpair */ | |
138 | struct nvme_pcie_qpair { | |
139 | /* Submission queue tail doorbell */ | |
140 | volatile uint32_t *sq_tdbl; | |
141 | ||
142 | /* Completion queue head doorbell */ | |
143 | volatile uint32_t *cq_hdbl; | |
144 | ||
145 | /* Submission queue */ | |
146 | struct spdk_nvme_cmd *cmd; | |
147 | ||
148 | /* Completion queue */ | |
149 | struct spdk_nvme_cpl *cpl; | |
150 | ||
151 | TAILQ_HEAD(, nvme_tracker) free_tr; | |
152 | TAILQ_HEAD(nvme_outstanding_tr_head, nvme_tracker) outstanding_tr; | |
153 | ||
154 | /* Array of trackers indexed by command ID. */ | |
155 | struct nvme_tracker *tr; | |
156 | ||
157 | uint16_t num_entries; | |
158 | ||
11fdf7f2 TL |
159 | uint16_t max_completions_cap; |
160 | ||
9f95a23c | 161 | uint16_t last_sq_tail; |
7c673cae FG |
162 | uint16_t sq_tail; |
163 | uint16_t cq_head; | |
11fdf7f2 | 164 | uint16_t sq_head; |
7c673cae | 165 | |
9f95a23c TL |
166 | struct { |
167 | uint8_t phase : 1; | |
168 | uint8_t delay_pcie_doorbell : 1; | |
169 | uint8_t has_shadow_doorbell : 1; | |
170 | } flags; | |
7c673cae FG |
171 | |
172 | /* | |
173 | * Base qpair structure. | |
174 | * This is located after the hot data in this structure so that the important parts of | |
175 | * nvme_pcie_qpair are in the same cache line. | |
176 | */ | |
177 | struct spdk_nvme_qpair qpair; | |
178 | ||
9f95a23c TL |
179 | struct { |
180 | /* Submission queue shadow tail doorbell */ | |
181 | volatile uint32_t *sq_tdbl; | |
182 | ||
183 | /* Completion queue shadow head doorbell */ | |
184 | volatile uint32_t *cq_hdbl; | |
185 | ||
186 | /* Submission queue event index */ | |
187 | volatile uint32_t *sq_eventidx; | |
188 | ||
189 | /* Completion queue event index */ | |
190 | volatile uint32_t *cq_eventidx; | |
191 | } shadow_doorbell; | |
192 | ||
7c673cae FG |
193 | /* |
194 | * Fields below this point should not be touched on the normal I/O path. | |
195 | */ | |
196 | ||
197 | bool sq_in_cmb; | |
198 | ||
199 | uint64_t cmd_bus_addr; | |
200 | uint64_t cpl_bus_addr; | |
201 | }; | |
202 | ||
9f95a23c | 203 | static int nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx, |
7c673cae FG |
204 | struct spdk_pci_addr *pci_addr); |
205 | static int nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair); | |
206 | static int nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair); | |
207 | ||
208 | __thread struct nvme_pcie_ctrlr *g_thread_mmio_ctrlr = NULL; | |
209 | static volatile uint16_t g_signal_lock; | |
210 | static bool g_sigset = false; | |
211 | static int hotplug_fd = -1; | |
212 | ||
213 | static void | |
214 | nvme_sigbus_fault_sighandler(int signum, siginfo_t *info, void *ctx) | |
215 | { | |
216 | void *map_address; | |
217 | ||
218 | if (!__sync_bool_compare_and_swap(&g_signal_lock, 0, 1)) { | |
219 | return; | |
220 | } | |
221 | ||
222 | assert(g_thread_mmio_ctrlr != NULL); | |
223 | ||
224 | if (!g_thread_mmio_ctrlr->is_remapped) { | |
225 | map_address = mmap((void *)g_thread_mmio_ctrlr->regs, g_thread_mmio_ctrlr->regs_size, | |
226 | PROT_READ | PROT_WRITE, | |
227 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); | |
228 | if (map_address == MAP_FAILED) { | |
229 | SPDK_ERRLOG("mmap failed\n"); | |
230 | g_signal_lock = 0; | |
231 | return; | |
232 | } | |
233 | memset(map_address, 0xFF, sizeof(struct spdk_nvme_registers)); | |
234 | g_thread_mmio_ctrlr->regs = (volatile struct spdk_nvme_registers *)map_address; | |
235 | g_thread_mmio_ctrlr->is_remapped = true; | |
236 | } | |
237 | g_signal_lock = 0; | |
238 | return; | |
239 | } | |
240 | ||
241 | static void | |
242 | nvme_pcie_ctrlr_setup_signal(void) | |
243 | { | |
244 | struct sigaction sa; | |
245 | ||
246 | sa.sa_sigaction = nvme_sigbus_fault_sighandler; | |
247 | sigemptyset(&sa.sa_mask); | |
248 | sa.sa_flags = SA_SIGINFO; | |
249 | sigaction(SIGBUS, &sa, NULL); | |
250 | } | |
251 | ||
9f95a23c TL |
252 | static inline struct nvme_pcie_ctrlr * |
253 | nvme_pcie_ctrlr(struct spdk_nvme_ctrlr *ctrlr) | |
254 | { | |
255 | assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE); | |
256 | return SPDK_CONTAINEROF(ctrlr, struct nvme_pcie_ctrlr, ctrlr); | |
257 | } | |
258 | ||
7c673cae | 259 | static int |
9f95a23c | 260 | _nvme_pcie_hotplug_monitor(struct spdk_nvme_probe_ctx *probe_ctx) |
7c673cae | 261 | { |
11fdf7f2 | 262 | struct spdk_nvme_ctrlr *ctrlr, *tmp; |
7c673cae FG |
263 | struct spdk_uevent event; |
264 | struct spdk_pci_addr pci_addr; | |
11fdf7f2 TL |
265 | union spdk_nvme_csts_register csts; |
266 | struct spdk_nvme_ctrlr_process *proc; | |
7c673cae FG |
267 | |
268 | while (spdk_get_uevent(hotplug_fd, &event) > 0) { | |
11fdf7f2 TL |
269 | if (event.subsystem == SPDK_NVME_UEVENT_SUBSYSTEM_UIO || |
270 | event.subsystem == SPDK_NVME_UEVENT_SUBSYSTEM_VFIO) { | |
7c673cae | 271 | if (event.action == SPDK_NVME_UEVENT_ADD) { |
11fdf7f2 | 272 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "add nvme address: %s\n", |
7c673cae FG |
273 | event.traddr); |
274 | if (spdk_process_is_primary()) { | |
275 | if (!spdk_pci_addr_parse(&pci_addr, event.traddr)) { | |
9f95a23c | 276 | nvme_pcie_ctrlr_attach(probe_ctx, &pci_addr); |
7c673cae FG |
277 | } |
278 | } | |
279 | } else if (event.action == SPDK_NVME_UEVENT_REMOVE) { | |
11fdf7f2 | 280 | struct spdk_nvme_transport_id trid; |
7c673cae | 281 | |
11fdf7f2 TL |
282 | memset(&trid, 0, sizeof(trid)); |
283 | trid.trtype = SPDK_NVME_TRANSPORT_PCIE; | |
284 | snprintf(trid.traddr, sizeof(trid.traddr), "%s", event.traddr); | |
285 | ||
286 | ctrlr = spdk_nvme_get_ctrlr_by_trid_unsafe(&trid); | |
287 | if (ctrlr == NULL) { | |
7c673cae FG |
288 | return 0; |
289 | } | |
11fdf7f2 | 290 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "remove nvme address: %s\n", |
7c673cae FG |
291 | event.traddr); |
292 | ||
293 | nvme_ctrlr_fail(ctrlr, true); | |
294 | ||
295 | /* get the user app to clean up and stop I/O */ | |
9f95a23c | 296 | if (probe_ctx->remove_cb) { |
7c673cae | 297 | nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); |
9f95a23c | 298 | probe_ctx->remove_cb(probe_ctx->cb_ctx, ctrlr); |
7c673cae FG |
299 | nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); |
300 | } | |
301 | } | |
302 | } | |
303 | } | |
11fdf7f2 TL |
304 | |
305 | /* This is a work around for vfio-attached device hot remove detection. */ | |
306 | TAILQ_FOREACH_SAFE(ctrlr, &g_spdk_nvme_driver->shared_attached_ctrlrs, tailq, tmp) { | |
9f95a23c TL |
307 | bool do_remove = false; |
308 | ||
309 | if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) { | |
310 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
311 | ||
312 | if (spdk_pci_device_is_removed(pctrlr->devhandle)) { | |
313 | do_remove = true; | |
314 | } | |
315 | } | |
316 | ||
317 | /* NVMe controller BAR must be mapped in the current process before any access. */ | |
11fdf7f2 TL |
318 | proc = spdk_nvme_ctrlr_get_current_process(ctrlr); |
319 | if (proc) { | |
320 | csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr); | |
321 | if (csts.raw == 0xffffffffU) { | |
9f95a23c TL |
322 | do_remove = true; |
323 | } | |
324 | } | |
325 | ||
326 | if (do_remove) { | |
327 | nvme_ctrlr_fail(ctrlr, true); | |
328 | if (probe_ctx->remove_cb) { | |
329 | nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock); | |
330 | probe_ctx->remove_cb(probe_ctx->cb_ctx, ctrlr); | |
331 | nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock); | |
11fdf7f2 TL |
332 | } |
333 | } | |
334 | } | |
7c673cae FG |
335 | return 0; |
336 | } | |
337 | ||
7c673cae FG |
338 | static inline struct nvme_pcie_qpair * |
339 | nvme_pcie_qpair(struct spdk_nvme_qpair *qpair) | |
340 | { | |
341 | assert(qpair->trtype == SPDK_NVME_TRANSPORT_PCIE); | |
11fdf7f2 | 342 | return SPDK_CONTAINEROF(qpair, struct nvme_pcie_qpair, qpair); |
7c673cae FG |
343 | } |
344 | ||
345 | static volatile void * | |
346 | nvme_pcie_reg_addr(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset) | |
347 | { | |
348 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
349 | ||
350 | return (volatile void *)((uintptr_t)pctrlr->regs + offset); | |
351 | } | |
352 | ||
353 | int | |
354 | nvme_pcie_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value) | |
355 | { | |
356 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
357 | ||
358 | assert(offset <= sizeof(struct spdk_nvme_registers) - 4); | |
359 | g_thread_mmio_ctrlr = pctrlr; | |
360 | spdk_mmio_write_4(nvme_pcie_reg_addr(ctrlr, offset), value); | |
361 | g_thread_mmio_ctrlr = NULL; | |
362 | return 0; | |
363 | } | |
364 | ||
365 | int | |
366 | nvme_pcie_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value) | |
367 | { | |
368 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
369 | ||
370 | assert(offset <= sizeof(struct spdk_nvme_registers) - 8); | |
371 | g_thread_mmio_ctrlr = pctrlr; | |
372 | spdk_mmio_write_8(nvme_pcie_reg_addr(ctrlr, offset), value); | |
373 | g_thread_mmio_ctrlr = NULL; | |
374 | return 0; | |
375 | } | |
376 | ||
377 | int | |
378 | nvme_pcie_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value) | |
379 | { | |
380 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
381 | ||
382 | assert(offset <= sizeof(struct spdk_nvme_registers) - 4); | |
383 | assert(value != NULL); | |
384 | g_thread_mmio_ctrlr = pctrlr; | |
385 | *value = spdk_mmio_read_4(nvme_pcie_reg_addr(ctrlr, offset)); | |
386 | g_thread_mmio_ctrlr = NULL; | |
387 | if (~(*value) == 0) { | |
388 | return -1; | |
389 | } | |
390 | ||
391 | return 0; | |
392 | } | |
393 | ||
394 | int | |
395 | nvme_pcie_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value) | |
396 | { | |
397 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
398 | ||
399 | assert(offset <= sizeof(struct spdk_nvme_registers) - 8); | |
400 | assert(value != NULL); | |
401 | g_thread_mmio_ctrlr = pctrlr; | |
402 | *value = spdk_mmio_read_8(nvme_pcie_reg_addr(ctrlr, offset)); | |
403 | g_thread_mmio_ctrlr = NULL; | |
404 | if (~(*value) == 0) { | |
405 | return -1; | |
406 | } | |
407 | ||
408 | return 0; | |
409 | } | |
410 | ||
411 | static int | |
412 | nvme_pcie_ctrlr_set_asq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) | |
413 | { | |
414 | return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, asq), | |
415 | value); | |
416 | } | |
417 | ||
418 | static int | |
419 | nvme_pcie_ctrlr_set_acq(struct nvme_pcie_ctrlr *pctrlr, uint64_t value) | |
420 | { | |
421 | return nvme_pcie_ctrlr_set_reg_8(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, acq), | |
422 | value); | |
423 | } | |
424 | ||
425 | static int | |
426 | nvme_pcie_ctrlr_set_aqa(struct nvme_pcie_ctrlr *pctrlr, const union spdk_nvme_aqa_register *aqa) | |
427 | { | |
428 | return nvme_pcie_ctrlr_set_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, aqa.raw), | |
429 | aqa->raw); | |
430 | } | |
431 | ||
432 | static int | |
433 | nvme_pcie_ctrlr_get_cmbloc(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbloc_register *cmbloc) | |
434 | { | |
435 | return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbloc.raw), | |
436 | &cmbloc->raw); | |
437 | } | |
438 | ||
439 | static int | |
440 | nvme_pcie_ctrlr_get_cmbsz(struct nvme_pcie_ctrlr *pctrlr, union spdk_nvme_cmbsz_register *cmbsz) | |
441 | { | |
442 | return nvme_pcie_ctrlr_get_reg_4(&pctrlr->ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw), | |
443 | &cmbsz->raw); | |
444 | } | |
445 | ||
446 | uint32_t | |
447 | nvme_pcie_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr) | |
448 | { | |
11fdf7f2 TL |
449 | /* |
450 | * For commands requiring more than 2 PRP entries, one PRP will be | |
451 | * embedded in the command (prp1), and the rest of the PRP entries | |
452 | * will be in a list pointed to by the command (prp2). This means | |
453 | * that real max number of PRP entries we support is 506+1, which | |
454 | * results in a max xfer size of 506*ctrlr->page_size. | |
455 | */ | |
456 | return NVME_MAX_PRP_LIST_ENTRIES * ctrlr->page_size; | |
7c673cae FG |
457 | } |
458 | ||
11fdf7f2 TL |
459 | uint16_t |
460 | nvme_pcie_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr) | |
7c673cae | 461 | { |
11fdf7f2 | 462 | return NVME_MAX_SGL_DESCRIPTORS; |
7c673cae FG |
463 | } |
464 | ||
465 | static void | |
466 | nvme_pcie_ctrlr_map_cmb(struct nvme_pcie_ctrlr *pctrlr) | |
467 | { | |
468 | int rc; | |
469 | void *addr; | |
470 | uint32_t bir; | |
471 | union spdk_nvme_cmbsz_register cmbsz; | |
472 | union spdk_nvme_cmbloc_register cmbloc; | |
473 | uint64_t size, unit_size, offset, bar_size, bar_phys_addr; | |
11fdf7f2 | 474 | uint64_t mem_register_start, mem_register_end; |
7c673cae FG |
475 | |
476 | if (nvme_pcie_ctrlr_get_cmbsz(pctrlr, &cmbsz) || | |
477 | nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { | |
478 | SPDK_ERRLOG("get registers failed\n"); | |
479 | goto exit; | |
480 | } | |
481 | ||
11fdf7f2 | 482 | if (!cmbsz.bits.sz) { |
7c673cae | 483 | goto exit; |
11fdf7f2 | 484 | } |
7c673cae FG |
485 | |
486 | bir = cmbloc.bits.bir; | |
487 | /* Values 0 2 3 4 5 are valid for BAR */ | |
11fdf7f2 | 488 | if (bir > 5 || bir == 1) { |
7c673cae | 489 | goto exit; |
11fdf7f2 | 490 | } |
7c673cae FG |
491 | |
492 | /* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */ | |
493 | unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu); | |
494 | /* controller memory buffer size in Bytes */ | |
495 | size = unit_size * cmbsz.bits.sz; | |
496 | /* controller memory buffer offset from BAR in Bytes */ | |
497 | offset = unit_size * cmbloc.bits.ofst; | |
498 | ||
499 | rc = spdk_pci_device_map_bar(pctrlr->devhandle, bir, &addr, | |
500 | &bar_phys_addr, &bar_size); | |
501 | if ((rc != 0) || addr == NULL) { | |
502 | goto exit; | |
503 | } | |
504 | ||
505 | if (offset > bar_size) { | |
506 | goto exit; | |
507 | } | |
508 | ||
509 | if (size > bar_size - offset) { | |
510 | goto exit; | |
511 | } | |
512 | ||
513 | pctrlr->cmb_bar_virt_addr = addr; | |
514 | pctrlr->cmb_bar_phys_addr = bar_phys_addr; | |
515 | pctrlr->cmb_size = size; | |
516 | pctrlr->cmb_current_offset = offset; | |
11fdf7f2 | 517 | pctrlr->cmb_max_offset = offset + size; |
7c673cae FG |
518 | |
519 | if (!cmbsz.bits.sqs) { | |
520 | pctrlr->ctrlr.opts.use_cmb_sqs = false; | |
521 | } | |
522 | ||
11fdf7f2 TL |
523 | /* If only SQS is supported use legacy mapping */ |
524 | if (cmbsz.bits.sqs && !(cmbsz.bits.wds || cmbsz.bits.rds)) { | |
525 | return; | |
526 | } | |
527 | ||
528 | /* If CMB is less than 4MiB in size then abort CMB mapping */ | |
529 | if (pctrlr->cmb_size < (1ULL << 22)) { | |
530 | goto exit; | |
531 | } | |
532 | ||
9f95a23c TL |
533 | mem_register_start = _2MB_PAGE((uintptr_t)pctrlr->cmb_bar_virt_addr + offset + VALUE_2MB - 1); |
534 | mem_register_end = _2MB_PAGE((uintptr_t)pctrlr->cmb_bar_virt_addr + offset + pctrlr->cmb_size); | |
11fdf7f2 TL |
535 | pctrlr->cmb_mem_register_addr = (void *)mem_register_start; |
536 | pctrlr->cmb_mem_register_size = mem_register_end - mem_register_start; | |
537 | ||
538 | rc = spdk_mem_register(pctrlr->cmb_mem_register_addr, pctrlr->cmb_mem_register_size); | |
539 | if (rc) { | |
540 | SPDK_ERRLOG("spdk_mem_register() failed\n"); | |
541 | goto exit; | |
542 | } | |
543 | pctrlr->cmb_current_offset = mem_register_start - ((uint64_t)pctrlr->cmb_bar_virt_addr); | |
544 | pctrlr->cmb_max_offset = mem_register_end - ((uint64_t)pctrlr->cmb_bar_virt_addr); | |
545 | pctrlr->cmb_io_data_supported = true; | |
546 | ||
7c673cae FG |
547 | return; |
548 | exit: | |
549 | pctrlr->cmb_bar_virt_addr = NULL; | |
550 | pctrlr->ctrlr.opts.use_cmb_sqs = false; | |
551 | return; | |
552 | } | |
553 | ||
554 | static int | |
555 | nvme_pcie_ctrlr_unmap_cmb(struct nvme_pcie_ctrlr *pctrlr) | |
556 | { | |
557 | int rc = 0; | |
558 | union spdk_nvme_cmbloc_register cmbloc; | |
559 | void *addr = pctrlr->cmb_bar_virt_addr; | |
560 | ||
561 | if (addr) { | |
11fdf7f2 TL |
562 | if (pctrlr->cmb_mem_register_addr) { |
563 | spdk_mem_unregister(pctrlr->cmb_mem_register_addr, pctrlr->cmb_mem_register_size); | |
564 | } | |
565 | ||
7c673cae FG |
566 | if (nvme_pcie_ctrlr_get_cmbloc(pctrlr, &cmbloc)) { |
567 | SPDK_ERRLOG("get_cmbloc() failed\n"); | |
568 | return -EIO; | |
569 | } | |
570 | rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, cmbloc.bits.bir, addr); | |
571 | } | |
572 | return rc; | |
573 | } | |
574 | ||
575 | static int | |
576 | nvme_pcie_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t length, uint64_t aligned, | |
577 | uint64_t *offset) | |
578 | { | |
579 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
580 | uint64_t round_offset; | |
581 | ||
582 | round_offset = pctrlr->cmb_current_offset; | |
583 | round_offset = (round_offset + (aligned - 1)) & ~(aligned - 1); | |
584 | ||
11fdf7f2 TL |
585 | /* CMB may only consume part of the BAR, calculate accordingly */ |
586 | if (round_offset + length > pctrlr->cmb_max_offset) { | |
587 | SPDK_ERRLOG("Tried to allocate past valid CMB range!\n"); | |
7c673cae | 588 | return -1; |
11fdf7f2 | 589 | } |
7c673cae FG |
590 | |
591 | *offset = round_offset; | |
592 | pctrlr->cmb_current_offset = round_offset + length; | |
593 | ||
594 | return 0; | |
595 | } | |
596 | ||
9f95a23c TL |
597 | volatile struct spdk_nvme_registers * |
598 | nvme_pcie_ctrlr_get_registers(struct spdk_nvme_ctrlr *ctrlr) | |
599 | { | |
600 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
601 | ||
602 | return pctrlr->regs; | |
603 | } | |
604 | ||
11fdf7f2 TL |
605 | void * |
606 | nvme_pcie_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size) | |
607 | { | |
608 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
609 | uint64_t offset; | |
610 | ||
611 | if (pctrlr->cmb_bar_virt_addr == NULL) { | |
612 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "CMB not available\n"); | |
613 | return NULL; | |
614 | } | |
615 | ||
616 | if (!pctrlr->cmb_io_data_supported) { | |
617 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "CMB doesn't support I/O data\n"); | |
618 | return NULL; | |
619 | } | |
620 | ||
621 | if (nvme_pcie_ctrlr_alloc_cmb(ctrlr, size, 4, &offset) != 0) { | |
622 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "%zu-byte CMB allocation failed\n", size); | |
623 | return NULL; | |
624 | } | |
625 | ||
626 | return pctrlr->cmb_bar_virt_addr + offset; | |
627 | } | |
628 | ||
629 | int | |
630 | nvme_pcie_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size) | |
631 | { | |
632 | /* | |
633 | * Do nothing for now. | |
634 | * TODO: Track free space so buffers may be reused. | |
635 | */ | |
636 | SPDK_ERRLOG("%s: no deallocation for CMB buffers yet!\n", | |
637 | __func__); | |
638 | return 0; | |
639 | } | |
640 | ||
7c673cae FG |
641 | static int |
642 | nvme_pcie_ctrlr_allocate_bars(struct nvme_pcie_ctrlr *pctrlr) | |
643 | { | |
644 | int rc; | |
645 | void *addr; | |
646 | uint64_t phys_addr, size; | |
647 | ||
648 | rc = spdk_pci_device_map_bar(pctrlr->devhandle, 0, &addr, | |
649 | &phys_addr, &size); | |
650 | pctrlr->regs = (volatile struct spdk_nvme_registers *)addr; | |
651 | if ((pctrlr->regs == NULL) || (rc != 0)) { | |
652 | SPDK_ERRLOG("nvme_pcicfg_map_bar failed with rc %d or bar %p\n", | |
653 | rc, pctrlr->regs); | |
654 | return -1; | |
655 | } | |
656 | ||
657 | pctrlr->regs_size = size; | |
658 | nvme_pcie_ctrlr_map_cmb(pctrlr); | |
659 | ||
660 | return 0; | |
661 | } | |
662 | ||
663 | static int | |
664 | nvme_pcie_ctrlr_free_bars(struct nvme_pcie_ctrlr *pctrlr) | |
665 | { | |
666 | int rc = 0; | |
667 | void *addr = (void *)pctrlr->regs; | |
668 | ||
669 | if (pctrlr->ctrlr.is_removed) { | |
670 | return rc; | |
671 | } | |
672 | ||
673 | rc = nvme_pcie_ctrlr_unmap_cmb(pctrlr); | |
674 | if (rc != 0) { | |
675 | SPDK_ERRLOG("nvme_ctrlr_unmap_cmb failed with error code %d\n", rc); | |
676 | return -1; | |
677 | } | |
678 | ||
679 | if (addr) { | |
680 | /* NOTE: addr may have been remapped here. We're relying on DPDK to call | |
681 | * munmap internally. | |
682 | */ | |
683 | rc = spdk_pci_device_unmap_bar(pctrlr->devhandle, 0, addr); | |
684 | } | |
685 | return rc; | |
686 | } | |
687 | ||
688 | static int | |
689 | nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr) | |
690 | { | |
691 | struct nvme_pcie_qpair *pqpair; | |
692 | int rc; | |
693 | ||
11fdf7f2 | 694 | pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); |
7c673cae FG |
695 | if (pqpair == NULL) { |
696 | return -ENOMEM; | |
697 | } | |
698 | ||
699 | pqpair->num_entries = NVME_ADMIN_ENTRIES; | |
9f95a23c | 700 | pqpair->flags.delay_pcie_doorbell = 0; |
7c673cae FG |
701 | |
702 | ctrlr->adminq = &pqpair->qpair; | |
703 | ||
704 | rc = nvme_qpair_init(ctrlr->adminq, | |
705 | 0, /* qpair ID */ | |
706 | ctrlr, | |
707 | SPDK_NVME_QPRIO_URGENT, | |
708 | NVME_ADMIN_ENTRIES); | |
709 | if (rc != 0) { | |
710 | return rc; | |
711 | } | |
712 | ||
713 | return nvme_pcie_qpair_construct(ctrlr->adminq); | |
714 | } | |
715 | ||
716 | /* This function must only be called while holding g_spdk_nvme_driver->lock */ | |
717 | static int | |
718 | pcie_nvme_enum_cb(void *ctx, struct spdk_pci_device *pci_dev) | |
719 | { | |
720 | struct spdk_nvme_transport_id trid = {}; | |
721 | struct nvme_pcie_enum_ctx *enum_ctx = ctx; | |
722 | struct spdk_nvme_ctrlr *ctrlr; | |
7c673cae FG |
723 | struct spdk_pci_addr pci_addr; |
724 | ||
725 | pci_addr = spdk_pci_device_get_addr(pci_dev); | |
726 | ||
727 | trid.trtype = SPDK_NVME_TRANSPORT_PCIE; | |
728 | spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr); | |
729 | ||
11fdf7f2 | 730 | ctrlr = spdk_nvme_get_ctrlr_by_trid_unsafe(&trid); |
9f95a23c TL |
731 | if (!spdk_process_is_primary()) { |
732 | if (!ctrlr) { | |
733 | SPDK_ERRLOG("Controller must be constructed in the primary process first.\n"); | |
734 | return -1; | |
7c673cae | 735 | } |
9f95a23c TL |
736 | |
737 | return nvme_ctrlr_add_process(ctrlr, pci_dev); | |
7c673cae FG |
738 | } |
739 | ||
740 | /* check whether user passes the pci_addr */ | |
741 | if (enum_ctx->has_pci_addr && | |
742 | (spdk_pci_addr_compare(&pci_addr, &enum_ctx->pci_addr) != 0)) { | |
743 | return 1; | |
744 | } | |
745 | ||
9f95a23c | 746 | return nvme_ctrlr_probe(&trid, enum_ctx->probe_ctx, pci_dev); |
7c673cae FG |
747 | } |
748 | ||
749 | int | |
9f95a23c | 750 | nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, |
11fdf7f2 | 751 | bool direct_connect) |
7c673cae FG |
752 | { |
753 | struct nvme_pcie_enum_ctx enum_ctx = {}; | |
754 | ||
9f95a23c | 755 | enum_ctx.probe_ctx = probe_ctx; |
7c673cae | 756 | |
9f95a23c TL |
757 | if (strlen(probe_ctx->trid.traddr) != 0) { |
758 | if (spdk_pci_addr_parse(&enum_ctx.pci_addr, probe_ctx->trid.traddr)) { | |
7c673cae FG |
759 | return -1; |
760 | } | |
761 | enum_ctx.has_pci_addr = true; | |
762 | } | |
763 | ||
764 | if (hotplug_fd < 0) { | |
765 | hotplug_fd = spdk_uevent_connect(); | |
766 | if (hotplug_fd < 0) { | |
11fdf7f2 | 767 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "Failed to open uevent netlink socket\n"); |
7c673cae FG |
768 | } |
769 | } else { | |
9f95a23c | 770 | _nvme_pcie_hotplug_monitor(probe_ctx); |
7c673cae FG |
771 | } |
772 | ||
773 | if (enum_ctx.has_pci_addr == false) { | |
9f95a23c TL |
774 | return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), |
775 | pcie_nvme_enum_cb, &enum_ctx); | |
7c673cae | 776 | } else { |
9f95a23c TL |
777 | return spdk_pci_device_attach(spdk_pci_nvme_get_driver(), |
778 | pcie_nvme_enum_cb, &enum_ctx, &enum_ctx.pci_addr); | |
7c673cae FG |
779 | } |
780 | } | |
781 | ||
782 | static int | |
9f95a23c | 783 | nvme_pcie_ctrlr_attach(struct spdk_nvme_probe_ctx *probe_ctx, struct spdk_pci_addr *pci_addr) |
7c673cae FG |
784 | { |
785 | struct nvme_pcie_enum_ctx enum_ctx; | |
786 | ||
9f95a23c TL |
787 | enum_ctx.probe_ctx = probe_ctx; |
788 | enum_ctx.has_pci_addr = true; | |
789 | enum_ctx.pci_addr = *pci_addr; | |
7c673cae | 790 | |
9f95a23c | 791 | return spdk_pci_enumerate(spdk_pci_nvme_get_driver(), pcie_nvme_enum_cb, &enum_ctx); |
7c673cae FG |
792 | } |
793 | ||
794 | struct spdk_nvme_ctrlr *nvme_pcie_ctrlr_construct(const struct spdk_nvme_transport_id *trid, | |
795 | const struct spdk_nvme_ctrlr_opts *opts, | |
796 | void *devhandle) | |
797 | { | |
798 | struct spdk_pci_device *pci_dev = devhandle; | |
799 | struct nvme_pcie_ctrlr *pctrlr; | |
800 | union spdk_nvme_cap_register cap; | |
11fdf7f2 | 801 | union spdk_nvme_vs_register vs; |
7c673cae | 802 | uint32_t cmd_reg; |
11fdf7f2 | 803 | int rc, claim_fd; |
7c673cae | 804 | struct spdk_pci_id pci_id; |
11fdf7f2 TL |
805 | struct spdk_pci_addr pci_addr; |
806 | ||
807 | if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) { | |
808 | SPDK_ERRLOG("could not parse pci address\n"); | |
809 | return NULL; | |
810 | } | |
811 | ||
812 | claim_fd = spdk_pci_device_claim(&pci_addr); | |
813 | if (claim_fd < 0) { | |
814 | SPDK_ERRLOG("could not claim device %s\n", trid->traddr); | |
815 | return NULL; | |
816 | } | |
7c673cae | 817 | |
11fdf7f2 TL |
818 | pctrlr = spdk_zmalloc(sizeof(struct nvme_pcie_ctrlr), 64, NULL, |
819 | SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); | |
7c673cae | 820 | if (pctrlr == NULL) { |
11fdf7f2 | 821 | close(claim_fd); |
7c673cae FG |
822 | SPDK_ERRLOG("could not allocate ctrlr\n"); |
823 | return NULL; | |
824 | } | |
825 | ||
826 | pctrlr->is_remapped = false; | |
827 | pctrlr->ctrlr.is_removed = false; | |
828 | pctrlr->ctrlr.trid.trtype = SPDK_NVME_TRANSPORT_PCIE; | |
829 | pctrlr->devhandle = devhandle; | |
830 | pctrlr->ctrlr.opts = *opts; | |
11fdf7f2 | 831 | pctrlr->claim_fd = claim_fd; |
7c673cae FG |
832 | memcpy(&pctrlr->ctrlr.trid, trid, sizeof(pctrlr->ctrlr.trid)); |
833 | ||
834 | rc = nvme_pcie_ctrlr_allocate_bars(pctrlr); | |
835 | if (rc != 0) { | |
11fdf7f2 | 836 | close(claim_fd); |
7c673cae FG |
837 | spdk_free(pctrlr); |
838 | return NULL; | |
839 | } | |
840 | ||
841 | /* Enable PCI busmaster and disable INTx */ | |
842 | spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4); | |
843 | cmd_reg |= 0x404; | |
844 | spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4); | |
845 | ||
846 | if (nvme_ctrlr_get_cap(&pctrlr->ctrlr, &cap)) { | |
847 | SPDK_ERRLOG("get_cap() failed\n"); | |
11fdf7f2 | 848 | close(claim_fd); |
7c673cae FG |
849 | spdk_free(pctrlr); |
850 | return NULL; | |
851 | } | |
852 | ||
11fdf7f2 TL |
853 | if (nvme_ctrlr_get_vs(&pctrlr->ctrlr, &vs)) { |
854 | SPDK_ERRLOG("get_vs() failed\n"); | |
855 | close(claim_fd); | |
856 | spdk_free(pctrlr); | |
857 | return NULL; | |
858 | } | |
859 | ||
860 | nvme_ctrlr_init_cap(&pctrlr->ctrlr, &cap, &vs); | |
7c673cae FG |
861 | |
862 | /* Doorbell stride is 2 ^ (dstrd + 2), | |
863 | * but we want multiples of 4, so drop the + 2 */ | |
864 | pctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd; | |
865 | ||
866 | rc = nvme_ctrlr_construct(&pctrlr->ctrlr); | |
867 | if (rc != 0) { | |
868 | nvme_ctrlr_destruct(&pctrlr->ctrlr); | |
869 | return NULL; | |
870 | } | |
871 | ||
872 | pci_id = spdk_pci_device_get_id(pci_dev); | |
873 | pctrlr->ctrlr.quirks = nvme_get_quirks(&pci_id); | |
874 | ||
875 | rc = nvme_pcie_ctrlr_construct_admin_qpair(&pctrlr->ctrlr); | |
876 | if (rc != 0) { | |
877 | nvme_ctrlr_destruct(&pctrlr->ctrlr); | |
878 | return NULL; | |
879 | } | |
880 | ||
881 | /* Construct the primary process properties */ | |
882 | rc = nvme_ctrlr_add_process(&pctrlr->ctrlr, pci_dev); | |
883 | if (rc != 0) { | |
884 | nvme_ctrlr_destruct(&pctrlr->ctrlr); | |
885 | return NULL; | |
886 | } | |
887 | ||
888 | if (g_sigset != true) { | |
889 | nvme_pcie_ctrlr_setup_signal(); | |
890 | g_sigset = true; | |
891 | } | |
892 | ||
893 | return &pctrlr->ctrlr; | |
894 | } | |
895 | ||
896 | int | |
897 | nvme_pcie_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr) | |
898 | { | |
899 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
900 | struct nvme_pcie_qpair *padminq = nvme_pcie_qpair(ctrlr->adminq); | |
901 | union spdk_nvme_aqa_register aqa; | |
902 | ||
903 | if (nvme_pcie_ctrlr_set_asq(pctrlr, padminq->cmd_bus_addr)) { | |
904 | SPDK_ERRLOG("set_asq() failed\n"); | |
905 | return -EIO; | |
906 | } | |
907 | ||
908 | if (nvme_pcie_ctrlr_set_acq(pctrlr, padminq->cpl_bus_addr)) { | |
909 | SPDK_ERRLOG("set_acq() failed\n"); | |
910 | return -EIO; | |
911 | } | |
912 | ||
913 | aqa.raw = 0; | |
914 | /* acqs and asqs are 0-based. */ | |
915 | aqa.bits.acqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; | |
916 | aqa.bits.asqs = nvme_pcie_qpair(ctrlr->adminq)->num_entries - 1; | |
917 | ||
918 | if (nvme_pcie_ctrlr_set_aqa(pctrlr, &aqa)) { | |
919 | SPDK_ERRLOG("set_aqa() failed\n"); | |
920 | return -EIO; | |
921 | } | |
922 | ||
923 | return 0; | |
924 | } | |
925 | ||
926 | int | |
927 | nvme_pcie_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr) | |
928 | { | |
929 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
11fdf7f2 TL |
930 | struct spdk_pci_device *devhandle = nvme_ctrlr_proc_get_devhandle(ctrlr); |
931 | ||
932 | close(pctrlr->claim_fd); | |
7c673cae FG |
933 | |
934 | if (ctrlr->adminq) { | |
935 | nvme_pcie_qpair_destroy(ctrlr->adminq); | |
936 | } | |
937 | ||
11fdf7f2 TL |
938 | nvme_ctrlr_destruct_finish(ctrlr); |
939 | ||
7c673cae FG |
940 | nvme_ctrlr_free_processes(ctrlr); |
941 | ||
942 | nvme_pcie_ctrlr_free_bars(pctrlr); | |
11fdf7f2 TL |
943 | |
944 | if (devhandle) { | |
945 | spdk_pci_device_detach(devhandle); | |
946 | } | |
947 | ||
7c673cae FG |
948 | spdk_free(pctrlr); |
949 | ||
950 | return 0; | |
951 | } | |
952 | ||
953 | static void | |
954 | nvme_qpair_construct_tracker(struct nvme_tracker *tr, uint16_t cid, uint64_t phys_addr) | |
955 | { | |
956 | tr->prp_sgl_bus_addr = phys_addr + offsetof(struct nvme_tracker, u.prp); | |
957 | tr->cid = cid; | |
9f95a23c | 958 | tr->req = NULL; |
7c673cae FG |
959 | } |
960 | ||
961 | int | |
962 | nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair) | |
963 | { | |
964 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
965 | ||
9f95a23c | 966 | pqpair->last_sq_tail = pqpair->sq_tail = pqpair->cq_head = 0; |
7c673cae FG |
967 | |
968 | /* | |
969 | * First time through the completion queue, HW will set phase | |
970 | * bit on completions to 1. So set this to 1 here, indicating | |
971 | * we're looking for a 1 to know which entries have completed. | |
972 | * we'll toggle the bit each time when the completion queue | |
973 | * rolls over. | |
974 | */ | |
9f95a23c | 975 | pqpair->flags.phase = 1; |
7c673cae FG |
976 | |
977 | memset(pqpair->cmd, 0, | |
978 | pqpair->num_entries * sizeof(struct spdk_nvme_cmd)); | |
979 | memset(pqpair->cpl, 0, | |
980 | pqpair->num_entries * sizeof(struct spdk_nvme_cpl)); | |
981 | ||
982 | return 0; | |
983 | } | |
984 | ||
985 | static int | |
986 | nvme_pcie_qpair_construct(struct spdk_nvme_qpair *qpair) | |
987 | { | |
988 | struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; | |
989 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); | |
990 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
991 | struct nvme_tracker *tr; | |
992 | uint16_t i; | |
993 | volatile uint32_t *doorbell_base; | |
7c673cae FG |
994 | uint64_t offset; |
995 | uint16_t num_trackers; | |
9f95a23c | 996 | size_t page_align = VALUE_2MB; |
11fdf7f2 | 997 | uint32_t flags = SPDK_MALLOC_DMA; |
7c673cae | 998 | |
11fdf7f2 TL |
999 | /* |
1000 | * Limit the maximum number of completions to return per call to prevent wraparound, | |
1001 | * and calculate how many trackers can be submitted at once without overflowing the | |
1002 | * completion queue. | |
1003 | */ | |
1004 | pqpair->max_completions_cap = pqpair->num_entries / 4; | |
1005 | pqpair->max_completions_cap = spdk_max(pqpair->max_completions_cap, NVME_MIN_COMPLETIONS); | |
1006 | pqpair->max_completions_cap = spdk_min(pqpair->max_completions_cap, NVME_MAX_COMPLETIONS); | |
1007 | num_trackers = pqpair->num_entries - pqpair->max_completions_cap; | |
1008 | ||
1009 | SPDK_INFOLOG(SPDK_LOG_NVME, "max_completions_cap = %" PRIu16 " num_trackers = %" PRIu16 "\n", | |
1010 | pqpair->max_completions_cap, num_trackers); | |
7c673cae FG |
1011 | |
1012 | assert(num_trackers != 0); | |
1013 | ||
1014 | pqpair->sq_in_cmb = false; | |
1015 | ||
11fdf7f2 TL |
1016 | if (nvme_qpair_is_admin_queue(&pqpair->qpair)) { |
1017 | flags |= SPDK_MALLOC_SHARE; | |
1018 | } | |
1019 | ||
1020 | /* cmd and cpl rings must be aligned on page size boundaries. */ | |
7c673cae FG |
1021 | if (ctrlr->opts.use_cmb_sqs) { |
1022 | if (nvme_pcie_ctrlr_alloc_cmb(ctrlr, pqpair->num_entries * sizeof(struct spdk_nvme_cmd), | |
11fdf7f2 | 1023 | sysconf(_SC_PAGESIZE), &offset) == 0) { |
7c673cae FG |
1024 | pqpair->cmd = pctrlr->cmb_bar_virt_addr + offset; |
1025 | pqpair->cmd_bus_addr = pctrlr->cmb_bar_phys_addr + offset; | |
1026 | pqpair->sq_in_cmb = true; | |
1027 | } | |
1028 | } | |
11fdf7f2 TL |
1029 | |
1030 | /* To ensure physical address contiguity we make each ring occupy | |
1031 | * a single hugepage only. See MAX_IO_QUEUE_ENTRIES. | |
1032 | */ | |
7c673cae FG |
1033 | if (pqpair->sq_in_cmb == false) { |
1034 | pqpair->cmd = spdk_zmalloc(pqpair->num_entries * sizeof(struct spdk_nvme_cmd), | |
9f95a23c | 1035 | page_align, NULL, |
11fdf7f2 | 1036 | SPDK_ENV_SOCKET_ID_ANY, flags); |
7c673cae FG |
1037 | if (pqpair->cmd == NULL) { |
1038 | SPDK_ERRLOG("alloc qpair_cmd failed\n"); | |
1039 | return -ENOMEM; | |
1040 | } | |
9f95a23c TL |
1041 | |
1042 | pqpair->cmd_bus_addr = spdk_vtophys(pqpair->cmd, NULL); | |
1043 | if (pqpair->cmd_bus_addr == SPDK_VTOPHYS_ERROR) { | |
1044 | SPDK_ERRLOG("spdk_vtophys(pqpair->cmd) failed\n"); | |
1045 | return -EFAULT; | |
1046 | } | |
7c673cae FG |
1047 | } |
1048 | ||
1049 | pqpair->cpl = spdk_zmalloc(pqpair->num_entries * sizeof(struct spdk_nvme_cpl), | |
9f95a23c | 1050 | page_align, NULL, |
11fdf7f2 | 1051 | SPDK_ENV_SOCKET_ID_ANY, flags); |
7c673cae FG |
1052 | if (pqpair->cpl == NULL) { |
1053 | SPDK_ERRLOG("alloc qpair_cpl failed\n"); | |
1054 | return -ENOMEM; | |
1055 | } | |
1056 | ||
9f95a23c TL |
1057 | pqpair->cpl_bus_addr = spdk_vtophys(pqpair->cpl, NULL); |
1058 | if (pqpair->cpl_bus_addr == SPDK_VTOPHYS_ERROR) { | |
1059 | SPDK_ERRLOG("spdk_vtophys(pqpair->cpl) failed\n"); | |
1060 | return -EFAULT; | |
1061 | } | |
1062 | ||
7c673cae FG |
1063 | doorbell_base = &pctrlr->regs->doorbell[0].sq_tdbl; |
1064 | pqpair->sq_tdbl = doorbell_base + (2 * qpair->id + 0) * pctrlr->doorbell_stride_u32; | |
1065 | pqpair->cq_hdbl = doorbell_base + (2 * qpair->id + 1) * pctrlr->doorbell_stride_u32; | |
1066 | ||
1067 | /* | |
1068 | * Reserve space for all of the trackers in a single allocation. | |
1069 | * struct nvme_tracker must be padded so that its size is already a power of 2. | |
1070 | * This ensures the PRP list embedded in the nvme_tracker object will not span a | |
1071 | * 4KB boundary, while allowing access to trackers in tr[] via normal array indexing. | |
1072 | */ | |
11fdf7f2 TL |
1073 | pqpair->tr = spdk_zmalloc(num_trackers * sizeof(*tr), sizeof(*tr), NULL, |
1074 | SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); | |
7c673cae FG |
1075 | if (pqpair->tr == NULL) { |
1076 | SPDK_ERRLOG("nvme_tr failed\n"); | |
1077 | return -ENOMEM; | |
1078 | } | |
1079 | ||
1080 | TAILQ_INIT(&pqpair->free_tr); | |
1081 | TAILQ_INIT(&pqpair->outstanding_tr); | |
1082 | ||
1083 | for (i = 0; i < num_trackers; i++) { | |
1084 | tr = &pqpair->tr[i]; | |
9f95a23c | 1085 | nvme_qpair_construct_tracker(tr, i, spdk_vtophys(tr, NULL)); |
7c673cae | 1086 | TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); |
7c673cae FG |
1087 | } |
1088 | ||
1089 | nvme_pcie_qpair_reset(qpair); | |
1090 | ||
1091 | return 0; | |
1092 | } | |
1093 | ||
1094 | static inline void | |
1095 | nvme_pcie_copy_command(struct spdk_nvme_cmd *dst, const struct spdk_nvme_cmd *src) | |
1096 | { | |
1097 | /* dst and src are known to be non-overlapping and 64-byte aligned. */ | |
9f95a23c | 1098 | #if defined(__SSE2__) |
7c673cae FG |
1099 | __m128i *d128 = (__m128i *)dst; |
1100 | const __m128i *s128 = (const __m128i *)src; | |
1101 | ||
9f95a23c TL |
1102 | _mm_stream_si128(&d128[0], _mm_load_si128(&s128[0])); |
1103 | _mm_stream_si128(&d128[1], _mm_load_si128(&s128[1])); | |
1104 | _mm_stream_si128(&d128[2], _mm_load_si128(&s128[2])); | |
1105 | _mm_stream_si128(&d128[3], _mm_load_si128(&s128[3])); | |
7c673cae FG |
1106 | #else |
1107 | *dst = *src; | |
1108 | #endif | |
1109 | } | |
1110 | ||
1111 | /** | |
1112 | * Note: the ctrlr_lock must be held when calling this function. | |
1113 | */ | |
1114 | static void | |
1115 | nvme_pcie_qpair_insert_pending_admin_request(struct spdk_nvme_qpair *qpair, | |
1116 | struct nvme_request *req, struct spdk_nvme_cpl *cpl) | |
1117 | { | |
1118 | struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; | |
1119 | struct nvme_request *active_req = req; | |
1120 | struct spdk_nvme_ctrlr_process *active_proc; | |
7c673cae FG |
1121 | |
1122 | /* | |
1123 | * The admin request is from another process. Move to the per | |
1124 | * process list for that process to handle it later. | |
1125 | */ | |
1126 | assert(nvme_qpair_is_admin_queue(qpair)); | |
1127 | assert(active_req->pid != getpid()); | |
1128 | ||
11fdf7f2 TL |
1129 | active_proc = spdk_nvme_ctrlr_get_process(ctrlr, active_req->pid); |
1130 | if (active_proc) { | |
1131 | /* Save the original completion information */ | |
1132 | memcpy(&active_req->cpl, cpl, sizeof(*cpl)); | |
1133 | STAILQ_INSERT_TAIL(&active_proc->active_reqs, active_req, stailq); | |
1134 | } else { | |
1135 | SPDK_ERRLOG("The owning process (pid %d) is not found. Dropping the request.\n", | |
7c673cae FG |
1136 | active_req->pid); |
1137 | ||
1138 | nvme_free_request(active_req); | |
1139 | } | |
1140 | } | |
1141 | ||
1142 | /** | |
1143 | * Note: the ctrlr_lock must be held when calling this function. | |
1144 | */ | |
1145 | static void | |
1146 | nvme_pcie_qpair_complete_pending_admin_request(struct spdk_nvme_qpair *qpair) | |
1147 | { | |
1148 | struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; | |
1149 | struct nvme_request *req, *tmp_req; | |
7c673cae FG |
1150 | pid_t pid = getpid(); |
1151 | struct spdk_nvme_ctrlr_process *proc; | |
1152 | ||
1153 | /* | |
1154 | * Check whether there is any pending admin request from | |
1155 | * other active processes. | |
1156 | */ | |
1157 | assert(nvme_qpair_is_admin_queue(qpair)); | |
1158 | ||
11fdf7f2 TL |
1159 | proc = spdk_nvme_ctrlr_get_current_process(ctrlr); |
1160 | if (!proc) { | |
7c673cae | 1161 | SPDK_ERRLOG("the active process (pid %d) is not found for this controller.\n", pid); |
11fdf7f2 TL |
1162 | assert(proc); |
1163 | return; | |
7c673cae FG |
1164 | } |
1165 | ||
1166 | STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) { | |
1167 | STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq); | |
1168 | ||
1169 | assert(req->pid == pid); | |
1170 | ||
9f95a23c | 1171 | nvme_complete_request(req->cb_fn, req->cb_arg, qpair, req, &req->cpl); |
7c673cae FG |
1172 | nvme_free_request(req); |
1173 | } | |
1174 | } | |
1175 | ||
11fdf7f2 TL |
1176 | static inline int |
1177 | nvme_pcie_qpair_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) | |
1178 | { | |
1179 | return (uint16_t)(new_idx - event_idx) <= (uint16_t)(new_idx - old); | |
1180 | } | |
1181 | ||
1182 | static bool | |
1183 | nvme_pcie_qpair_update_mmio_required(struct spdk_nvme_qpair *qpair, uint16_t value, | |
1184 | volatile uint32_t *shadow_db, | |
1185 | volatile uint32_t *eventidx) | |
1186 | { | |
1187 | uint16_t old; | |
1188 | ||
1189 | if (!shadow_db) { | |
1190 | return true; | |
1191 | } | |
1192 | ||
1193 | old = *shadow_db; | |
1194 | *shadow_db = value; | |
1195 | ||
1196 | if (!nvme_pcie_qpair_need_event(*eventidx, value, old)) { | |
1197 | return false; | |
1198 | } | |
1199 | ||
1200 | return true; | |
1201 | } | |
1202 | ||
9f95a23c TL |
1203 | static inline void |
1204 | nvme_pcie_qpair_ring_sq_doorbell(struct spdk_nvme_qpair *qpair) | |
1205 | { | |
1206 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
1207 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); | |
1208 | bool need_mmio = true; | |
1209 | ||
1210 | if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { | |
1211 | need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, | |
1212 | pqpair->sq_tail, | |
1213 | pqpair->shadow_doorbell.sq_tdbl, | |
1214 | pqpair->shadow_doorbell.sq_eventidx); | |
1215 | } | |
1216 | ||
1217 | if (spdk_likely(need_mmio)) { | |
1218 | spdk_wmb(); | |
1219 | g_thread_mmio_ctrlr = pctrlr; | |
1220 | spdk_mmio_write_4(pqpair->sq_tdbl, pqpair->sq_tail); | |
1221 | g_thread_mmio_ctrlr = NULL; | |
1222 | } | |
1223 | } | |
1224 | ||
1225 | static inline void | |
1226 | nvme_pcie_qpair_ring_cq_doorbell(struct spdk_nvme_qpair *qpair) | |
1227 | { | |
1228 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
1229 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(qpair->ctrlr); | |
1230 | bool need_mmio = true; | |
1231 | ||
1232 | if (spdk_unlikely(pqpair->flags.has_shadow_doorbell)) { | |
1233 | need_mmio = nvme_pcie_qpair_update_mmio_required(qpair, | |
1234 | pqpair->cq_head, | |
1235 | pqpair->shadow_doorbell.cq_hdbl, | |
1236 | pqpair->shadow_doorbell.cq_eventidx); | |
1237 | } | |
1238 | ||
1239 | if (spdk_likely(need_mmio)) { | |
1240 | g_thread_mmio_ctrlr = pctrlr; | |
1241 | spdk_mmio_write_4(pqpair->cq_hdbl, pqpair->cq_head); | |
1242 | g_thread_mmio_ctrlr = NULL; | |
1243 | } | |
1244 | } | |
1245 | ||
7c673cae FG |
1246 | static void |
1247 | nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) | |
1248 | { | |
1249 | struct nvme_request *req; | |
1250 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
7c673cae | 1251 | |
7c673cae | 1252 | req = tr->req; |
11fdf7f2 | 1253 | assert(req != NULL); |
7c673cae FG |
1254 | |
1255 | /* Copy the command from the tracker to the submission queue. */ | |
1256 | nvme_pcie_copy_command(&pqpair->cmd[pqpair->sq_tail], &req->cmd); | |
1257 | ||
9f95a23c | 1258 | if (spdk_unlikely(++pqpair->sq_tail == pqpair->num_entries)) { |
7c673cae FG |
1259 | pqpair->sq_tail = 0; |
1260 | } | |
1261 | ||
9f95a23c | 1262 | if (spdk_unlikely(pqpair->sq_tail == pqpair->sq_head)) { |
11fdf7f2 TL |
1263 | SPDK_ERRLOG("sq_tail is passing sq_head!\n"); |
1264 | } | |
1265 | ||
9f95a23c TL |
1266 | if (!pqpair->flags.delay_pcie_doorbell) { |
1267 | nvme_pcie_qpair_ring_sq_doorbell(qpair); | |
11fdf7f2 | 1268 | } |
7c673cae FG |
1269 | } |
1270 | ||
1271 | static void | |
1272 | nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr, | |
1273 | struct spdk_nvme_cpl *cpl, bool print_on_error) | |
1274 | { | |
1275 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
1276 | struct nvme_request *req; | |
9f95a23c | 1277 | bool retry, error; |
7c673cae FG |
1278 | bool req_from_current_proc = true; |
1279 | ||
1280 | req = tr->req; | |
1281 | ||
1282 | assert(req != NULL); | |
1283 | ||
1284 | error = spdk_nvme_cpl_is_error(cpl); | |
1285 | retry = error && nvme_completion_is_retry(cpl) && | |
1286 | req->retries < spdk_nvme_retry_count; | |
1287 | ||
9f95a23c | 1288 | if (error && print_on_error && !qpair->ctrlr->opts.disable_error_logging) { |
7c673cae FG |
1289 | nvme_qpair_print_command(qpair, &req->cmd); |
1290 | nvme_qpair_print_completion(qpair, cpl); | |
1291 | } | |
1292 | ||
7c673cae FG |
1293 | assert(cpl->cid == req->cmd.cid); |
1294 | ||
1295 | if (retry) { | |
1296 | req->retries++; | |
1297 | nvme_pcie_qpair_submit_tracker(qpair, tr); | |
1298 | } else { | |
9f95a23c TL |
1299 | /* Only check admin requests from different processes. */ |
1300 | if (nvme_qpair_is_admin_queue(qpair) && req->pid != getpid()) { | |
1301 | req_from_current_proc = false; | |
1302 | nvme_pcie_qpair_insert_pending_admin_request(qpair, req, cpl); | |
1303 | } else { | |
1304 | nvme_complete_request(tr->cb_fn, tr->cb_arg, qpair, req, cpl); | |
7c673cae FG |
1305 | } |
1306 | ||
1307 | if (req_from_current_proc == true) { | |
9f95a23c | 1308 | nvme_qpair_free_request(qpair, req); |
7c673cae FG |
1309 | } |
1310 | ||
1311 | tr->req = NULL; | |
1312 | ||
1313 | TAILQ_REMOVE(&pqpair->outstanding_tr, tr, tq_list); | |
1314 | TAILQ_INSERT_HEAD(&pqpair->free_tr, tr, tq_list); | |
1315 | ||
1316 | /* | |
1317 | * If the controller is in the middle of resetting, don't | |
1318 | * try to submit queued requests here - let the reset logic | |
1319 | * handle that instead. | |
1320 | */ | |
1321 | if (!STAILQ_EMPTY(&qpair->queued_req) && | |
1322 | !qpair->ctrlr->is_resetting) { | |
1323 | req = STAILQ_FIRST(&qpair->queued_req); | |
1324 | STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq); | |
1325 | nvme_qpair_submit_request(qpair, req); | |
1326 | } | |
1327 | } | |
1328 | } | |
1329 | ||
1330 | static void | |
1331 | nvme_pcie_qpair_manual_complete_tracker(struct spdk_nvme_qpair *qpair, | |
1332 | struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr, | |
1333 | bool print_on_error) | |
1334 | { | |
1335 | struct spdk_nvme_cpl cpl; | |
1336 | ||
1337 | memset(&cpl, 0, sizeof(cpl)); | |
1338 | cpl.sqid = qpair->id; | |
1339 | cpl.cid = tr->cid; | |
1340 | cpl.status.sct = sct; | |
1341 | cpl.status.sc = sc; | |
1342 | cpl.status.dnr = dnr; | |
1343 | nvme_pcie_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); | |
1344 | } | |
1345 | ||
1346 | static void | |
1347 | nvme_pcie_qpair_abort_trackers(struct spdk_nvme_qpair *qpair, uint32_t dnr) | |
1348 | { | |
1349 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
1350 | struct nvme_tracker *tr, *temp; | |
1351 | ||
1352 | TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, temp) { | |
9f95a23c TL |
1353 | if (!qpair->ctrlr->opts.disable_error_logging) { |
1354 | SPDK_ERRLOG("aborting outstanding command\n"); | |
1355 | } | |
7c673cae FG |
1356 | nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, |
1357 | SPDK_NVME_SC_ABORTED_BY_REQUEST, dnr, true); | |
1358 | } | |
1359 | } | |
1360 | ||
9f95a23c | 1361 | void |
7c673cae FG |
1362 | nvme_pcie_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair) |
1363 | { | |
1364 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
1365 | struct nvme_tracker *tr; | |
1366 | ||
1367 | tr = TAILQ_FIRST(&pqpair->outstanding_tr); | |
1368 | while (tr != NULL) { | |
1369 | assert(tr->req != NULL); | |
1370 | if (tr->req->cmd.opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) { | |
1371 | nvme_pcie_qpair_manual_complete_tracker(qpair, tr, | |
1372 | SPDK_NVME_SCT_GENERIC, SPDK_NVME_SC_ABORTED_SQ_DELETION, 0, | |
1373 | false); | |
1374 | tr = TAILQ_FIRST(&pqpair->outstanding_tr); | |
1375 | } else { | |
1376 | tr = TAILQ_NEXT(tr, tq_list); | |
1377 | } | |
1378 | } | |
1379 | } | |
1380 | ||
1381 | static void | |
1382 | nvme_pcie_admin_qpair_destroy(struct spdk_nvme_qpair *qpair) | |
1383 | { | |
1384 | nvme_pcie_admin_qpair_abort_aers(qpair); | |
1385 | } | |
1386 | ||
1387 | static int | |
1388 | nvme_pcie_qpair_destroy(struct spdk_nvme_qpair *qpair) | |
1389 | { | |
1390 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
1391 | ||
1392 | if (nvme_qpair_is_admin_queue(qpair)) { | |
1393 | nvme_pcie_admin_qpair_destroy(qpair); | |
1394 | } | |
1395 | if (pqpair->cmd && !pqpair->sq_in_cmb) { | |
1396 | spdk_free(pqpair->cmd); | |
1397 | } | |
1398 | if (pqpair->cpl) { | |
1399 | spdk_free(pqpair->cpl); | |
1400 | } | |
1401 | if (pqpair->tr) { | |
1402 | spdk_free(pqpair->tr); | |
1403 | } | |
1404 | ||
11fdf7f2 TL |
1405 | nvme_qpair_deinit(qpair); |
1406 | ||
7c673cae FG |
1407 | spdk_free(pqpair); |
1408 | ||
1409 | return 0; | |
1410 | } | |
1411 | ||
9f95a23c TL |
1412 | void |
1413 | nvme_pcie_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr) | |
7c673cae | 1414 | { |
9f95a23c | 1415 | nvme_pcie_qpair_abort_trackers(qpair, dnr); |
7c673cae FG |
1416 | } |
1417 | ||
1418 | static int | |
1419 | nvme_pcie_ctrlr_cmd_create_io_cq(struct spdk_nvme_ctrlr *ctrlr, | |
1420 | struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, | |
1421 | void *cb_arg) | |
1422 | { | |
1423 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); | |
1424 | struct nvme_request *req; | |
1425 | struct spdk_nvme_cmd *cmd; | |
1426 | ||
1427 | req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); | |
1428 | if (req == NULL) { | |
1429 | return -ENOMEM; | |
1430 | } | |
1431 | ||
1432 | cmd = &req->cmd; | |
1433 | cmd->opc = SPDK_NVME_OPC_CREATE_IO_CQ; | |
1434 | ||
1435 | /* | |
1436 | * TODO: create a create io completion queue command data | |
1437 | * structure. | |
1438 | */ | |
1439 | cmd->cdw10 = ((pqpair->num_entries - 1) << 16) | io_que->id; | |
1440 | /* | |
1441 | * 0x2 = interrupts enabled | |
1442 | * 0x1 = physically contiguous | |
1443 | */ | |
1444 | cmd->cdw11 = 0x1; | |
1445 | cmd->dptr.prp.prp1 = pqpair->cpl_bus_addr; | |
1446 | ||
1447 | return nvme_ctrlr_submit_admin_request(ctrlr, req); | |
1448 | } | |
1449 | ||
1450 | static int | |
1451 | nvme_pcie_ctrlr_cmd_create_io_sq(struct spdk_nvme_ctrlr *ctrlr, | |
1452 | struct spdk_nvme_qpair *io_que, spdk_nvme_cmd_cb cb_fn, void *cb_arg) | |
1453 | { | |
1454 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(io_que); | |
1455 | struct nvme_request *req; | |
1456 | struct spdk_nvme_cmd *cmd; | |
1457 | ||
1458 | req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); | |
1459 | if (req == NULL) { | |
1460 | return -ENOMEM; | |
1461 | } | |
1462 | ||
1463 | cmd = &req->cmd; | |
1464 | cmd->opc = SPDK_NVME_OPC_CREATE_IO_SQ; | |
1465 | ||
1466 | /* | |
1467 | * TODO: create a create io submission queue command data | |
1468 | * structure. | |
1469 | */ | |
1470 | cmd->cdw10 = ((pqpair->num_entries - 1) << 16) | io_que->id; | |
1471 | /* 0x1 = physically contiguous */ | |
1472 | cmd->cdw11 = (io_que->id << 16) | (io_que->qprio << 1) | 0x1; | |
1473 | cmd->dptr.prp.prp1 = pqpair->cmd_bus_addr; | |
1474 | ||
1475 | return nvme_ctrlr_submit_admin_request(ctrlr, req); | |
1476 | } | |
1477 | ||
1478 | static int | |
1479 | nvme_pcie_ctrlr_cmd_delete_io_cq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, | |
1480 | spdk_nvme_cmd_cb cb_fn, void *cb_arg) | |
1481 | { | |
1482 | struct nvme_request *req; | |
1483 | struct spdk_nvme_cmd *cmd; | |
1484 | ||
1485 | req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); | |
1486 | if (req == NULL) { | |
1487 | return -ENOMEM; | |
1488 | } | |
1489 | ||
1490 | cmd = &req->cmd; | |
1491 | cmd->opc = SPDK_NVME_OPC_DELETE_IO_CQ; | |
1492 | cmd->cdw10 = qpair->id; | |
1493 | ||
1494 | return nvme_ctrlr_submit_admin_request(ctrlr, req); | |
1495 | } | |
1496 | ||
1497 | static int | |
1498 | nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, | |
1499 | spdk_nvme_cmd_cb cb_fn, void *cb_arg) | |
1500 | { | |
1501 | struct nvme_request *req; | |
1502 | struct spdk_nvme_cmd *cmd; | |
1503 | ||
1504 | req = nvme_allocate_request_null(ctrlr->adminq, cb_fn, cb_arg); | |
1505 | if (req == NULL) { | |
1506 | return -ENOMEM; | |
1507 | } | |
1508 | ||
1509 | cmd = &req->cmd; | |
1510 | cmd->opc = SPDK_NVME_OPC_DELETE_IO_SQ; | |
1511 | cmd->cdw10 = qpair->id; | |
1512 | ||
1513 | return nvme_ctrlr_submit_admin_request(ctrlr, req); | |
1514 | } | |
1515 | ||
1516 | static int | |
1517 | _nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, | |
1518 | uint16_t qid) | |
1519 | { | |
11fdf7f2 TL |
1520 | struct nvme_pcie_ctrlr *pctrlr = nvme_pcie_ctrlr(ctrlr); |
1521 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
7c673cae FG |
1522 | struct nvme_completion_poll_status status; |
1523 | int rc; | |
1524 | ||
7c673cae FG |
1525 | rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, &status); |
1526 | if (rc != 0) { | |
1527 | return rc; | |
1528 | } | |
1529 | ||
11fdf7f2 | 1530 | if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { |
7c673cae FG |
1531 | SPDK_ERRLOG("nvme_create_io_cq failed!\n"); |
1532 | return -1; | |
1533 | } | |
1534 | ||
7c673cae FG |
1535 | rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, &status); |
1536 | if (rc != 0) { | |
1537 | return rc; | |
1538 | } | |
1539 | ||
11fdf7f2 | 1540 | if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { |
7c673cae FG |
1541 | SPDK_ERRLOG("nvme_create_io_sq failed!\n"); |
1542 | /* Attempt to delete the completion queue */ | |
7c673cae FG |
1543 | rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, &status); |
1544 | if (rc != 0) { | |
1545 | return -1; | |
1546 | } | |
11fdf7f2 | 1547 | spdk_nvme_wait_for_completion(ctrlr->adminq, &status); |
7c673cae FG |
1548 | return -1; |
1549 | } | |
1550 | ||
11fdf7f2 | 1551 | if (ctrlr->shadow_doorbell) { |
9f95a23c TL |
1552 | pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) * |
1553 | pctrlr->doorbell_stride_u32; | |
1554 | pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) * | |
1555 | pctrlr->doorbell_stride_u32; | |
1556 | pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) * | |
1557 | pctrlr->doorbell_stride_u32; | |
1558 | pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) * | |
1559 | pctrlr->doorbell_stride_u32; | |
1560 | pqpair->flags.has_shadow_doorbell = 1; | |
1561 | } else { | |
1562 | pqpair->flags.has_shadow_doorbell = 0; | |
11fdf7f2 | 1563 | } |
7c673cae FG |
1564 | nvme_pcie_qpair_reset(qpair); |
1565 | ||
1566 | return 0; | |
1567 | } | |
1568 | ||
1569 | struct spdk_nvme_qpair * | |
1570 | nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, | |
11fdf7f2 | 1571 | const struct spdk_nvme_io_qpair_opts *opts) |
7c673cae FG |
1572 | { |
1573 | struct nvme_pcie_qpair *pqpair; | |
1574 | struct spdk_nvme_qpair *qpair; | |
1575 | int rc; | |
1576 | ||
1577 | assert(ctrlr != NULL); | |
1578 | ||
11fdf7f2 TL |
1579 | pqpair = spdk_zmalloc(sizeof(*pqpair), 64, NULL, |
1580 | SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE); | |
7c673cae FG |
1581 | if (pqpair == NULL) { |
1582 | return NULL; | |
1583 | } | |
1584 | ||
11fdf7f2 | 1585 | pqpair->num_entries = opts->io_queue_size; |
9f95a23c | 1586 | pqpair->flags.delay_pcie_doorbell = opts->delay_pcie_doorbell; |
7c673cae FG |
1587 | |
1588 | qpair = &pqpair->qpair; | |
1589 | ||
11fdf7f2 | 1590 | rc = nvme_qpair_init(qpair, qid, ctrlr, opts->qprio, opts->io_queue_requests); |
7c673cae FG |
1591 | if (rc != 0) { |
1592 | nvme_pcie_qpair_destroy(qpair); | |
1593 | return NULL; | |
1594 | } | |
1595 | ||
1596 | rc = nvme_pcie_qpair_construct(qpair); | |
1597 | if (rc != 0) { | |
1598 | nvme_pcie_qpair_destroy(qpair); | |
1599 | return NULL; | |
1600 | } | |
1601 | ||
1602 | rc = _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qid); | |
1603 | ||
1604 | if (rc != 0) { | |
1605 | SPDK_ERRLOG("I/O queue creation failed\n"); | |
1606 | nvme_pcie_qpair_destroy(qpair); | |
1607 | return NULL; | |
1608 | } | |
1609 | ||
1610 | return qpair; | |
1611 | } | |
1612 | ||
1613 | int | |
9f95a23c TL |
1614 | nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) |
1615 | { | |
1616 | if (nvme_qpair_is_admin_queue(qpair)) { | |
1617 | return 0; | |
1618 | } else { | |
1619 | return _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id); | |
1620 | } | |
1621 | } | |
1622 | ||
1623 | void | |
1624 | nvme_pcie_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) | |
7c673cae | 1625 | { |
7c673cae FG |
1626 | } |
1627 | ||
1628 | int | |
1629 | nvme_pcie_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair) | |
1630 | { | |
1631 | struct nvme_completion_poll_status status; | |
1632 | int rc; | |
1633 | ||
1634 | assert(ctrlr != NULL); | |
1635 | ||
1636 | if (ctrlr->is_removed) { | |
1637 | goto free; | |
1638 | } | |
1639 | ||
11fdf7f2 | 1640 | /* Delete the I/O submission queue */ |
7c673cae FG |
1641 | rc = nvme_pcie_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, &status); |
1642 | if (rc != 0) { | |
9f95a23c | 1643 | SPDK_ERRLOG("Failed to send request to delete_io_sq with rc=%d\n", rc); |
7c673cae FG |
1644 | return rc; |
1645 | } | |
11fdf7f2 | 1646 | if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { |
7c673cae FG |
1647 | return -1; |
1648 | } | |
1649 | ||
11fdf7f2 | 1650 | /* Delete the completion queue */ |
7c673cae FG |
1651 | rc = nvme_pcie_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, &status); |
1652 | if (rc != 0) { | |
9f95a23c | 1653 | SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc); |
7c673cae FG |
1654 | return rc; |
1655 | } | |
11fdf7f2 | 1656 | if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) { |
7c673cae FG |
1657 | return -1; |
1658 | } | |
1659 | ||
1660 | free: | |
9f95a23c TL |
1661 | if (qpair->no_deletion_notification_needed == 0) { |
1662 | /* Abort the rest of the I/O */ | |
1663 | nvme_pcie_qpair_abort_trackers(qpair, 1); | |
1664 | } | |
1665 | ||
7c673cae FG |
1666 | nvme_pcie_qpair_destroy(qpair); |
1667 | return 0; | |
1668 | } | |
1669 | ||
1670 | static void | |
1671 | nvme_pcie_fail_request_bad_vtophys(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr) | |
1672 | { | |
1673 | /* | |
1674 | * Bad vtophys translation, so abort this request and return | |
1675 | * immediately. | |
1676 | */ | |
1677 | nvme_pcie_qpair_manual_complete_tracker(qpair, tr, SPDK_NVME_SCT_GENERIC, | |
1678 | SPDK_NVME_SC_INVALID_FIELD, | |
1679 | 1 /* do not retry */, true); | |
1680 | } | |
1681 | ||
11fdf7f2 TL |
1682 | /* |
1683 | * Append PRP list entries to describe a virtually contiguous buffer starting at virt_addr of len bytes. | |
1684 | * | |
1685 | * *prp_index will be updated to account for the number of PRP entries used. | |
7c673cae | 1686 | */ |
9f95a23c | 1687 | static inline int |
11fdf7f2 TL |
1688 | nvme_pcie_prp_list_append(struct nvme_tracker *tr, uint32_t *prp_index, void *virt_addr, size_t len, |
1689 | uint32_t page_size) | |
7c673cae | 1690 | { |
11fdf7f2 TL |
1691 | struct spdk_nvme_cmd *cmd = &tr->req->cmd; |
1692 | uintptr_t page_mask = page_size - 1; | |
7c673cae | 1693 | uint64_t phys_addr; |
11fdf7f2 | 1694 | uint32_t i; |
7c673cae | 1695 | |
11fdf7f2 TL |
1696 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp_index:%u virt_addr:%p len:%u\n", |
1697 | *prp_index, virt_addr, (uint32_t)len); | |
1698 | ||
1699 | if (spdk_unlikely(((uintptr_t)virt_addr & 3) != 0)) { | |
1700 | SPDK_ERRLOG("virt_addr %p not dword aligned\n", virt_addr); | |
1701 | return -EINVAL; | |
7c673cae FG |
1702 | } |
1703 | ||
11fdf7f2 TL |
1704 | i = *prp_index; |
1705 | while (len) { | |
1706 | uint32_t seg_len; | |
1707 | ||
1708 | /* | |
1709 | * prp_index 0 is stored in prp1, and the rest are stored in the prp[] array, | |
1710 | * so prp_index == count is valid. | |
1711 | */ | |
1712 | if (spdk_unlikely(i > SPDK_COUNTOF(tr->u.prp))) { | |
1713 | SPDK_ERRLOG("out of PRP entries\n"); | |
1714 | return -EINVAL; | |
7c673cae | 1715 | } |
7c673cae | 1716 | |
9f95a23c | 1717 | phys_addr = spdk_vtophys(virt_addr, NULL); |
11fdf7f2 TL |
1718 | if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR)) { |
1719 | SPDK_ERRLOG("vtophys(%p) failed\n", virt_addr); | |
1720 | return -EINVAL; | |
1721 | } | |
1722 | ||
1723 | if (i == 0) { | |
1724 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp1 = %p\n", (void *)phys_addr); | |
1725 | cmd->dptr.prp.prp1 = phys_addr; | |
1726 | seg_len = page_size - ((uintptr_t)virt_addr & page_mask); | |
1727 | } else { | |
1728 | if ((phys_addr & page_mask) != 0) { | |
1729 | SPDK_ERRLOG("PRP %u not page aligned (%p)\n", i, virt_addr); | |
1730 | return -EINVAL; | |
7c673cae | 1731 | } |
11fdf7f2 TL |
1732 | |
1733 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp[%u] = %p\n", i - 1, (void *)phys_addr); | |
1734 | tr->u.prp[i - 1] = phys_addr; | |
1735 | seg_len = page_size; | |
7c673cae | 1736 | } |
11fdf7f2 TL |
1737 | |
1738 | seg_len = spdk_min(seg_len, len); | |
1739 | virt_addr += seg_len; | |
1740 | len -= seg_len; | |
1741 | i++; | |
1742 | } | |
1743 | ||
1744 | cmd->psdt = SPDK_NVME_PSDT_PRP; | |
1745 | if (i <= 1) { | |
1746 | cmd->dptr.prp.prp2 = 0; | |
1747 | } else if (i == 2) { | |
1748 | cmd->dptr.prp.prp2 = tr->u.prp[0]; | |
1749 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp2 = %p\n", (void *)cmd->dptr.prp.prp2); | |
1750 | } else { | |
1751 | cmd->dptr.prp.prp2 = tr->prp_sgl_bus_addr; | |
1752 | SPDK_DEBUGLOG(SPDK_LOG_NVME, "prp2 = %p (PRP list)\n", (void *)cmd->dptr.prp.prp2); | |
1753 | } | |
1754 | ||
1755 | *prp_index = i; | |
1756 | return 0; | |
1757 | } | |
1758 | ||
1759 | /** | |
1760 | * Build PRP list describing physically contiguous payload buffer. | |
1761 | */ | |
1762 | static int | |
1763 | nvme_pcie_qpair_build_contig_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, | |
1764 | struct nvme_tracker *tr) | |
1765 | { | |
1766 | uint32_t prp_index = 0; | |
1767 | int rc; | |
1768 | ||
1769 | rc = nvme_pcie_prp_list_append(tr, &prp_index, req->payload.contig_or_cb_arg + req->payload_offset, | |
1770 | req->payload_size, qpair->ctrlr->page_size); | |
1771 | if (rc) { | |
1772 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1773 | return rc; | |
7c673cae FG |
1774 | } |
1775 | ||
1776 | return 0; | |
1777 | } | |
1778 | ||
1779 | /** | |
1780 | * Build SGL list describing scattered payload buffer. | |
1781 | */ | |
1782 | static int | |
1783 | nvme_pcie_qpair_build_hw_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, | |
1784 | struct nvme_tracker *tr) | |
1785 | { | |
1786 | int rc; | |
1787 | void *virt_addr; | |
1788 | uint64_t phys_addr; | |
11fdf7f2 | 1789 | uint32_t remaining_transfer_len, remaining_user_sge_len, length; |
7c673cae FG |
1790 | struct spdk_nvme_sgl_descriptor *sgl; |
1791 | uint32_t nseg = 0; | |
1792 | ||
1793 | /* | |
1794 | * Build scattered payloads. | |
1795 | */ | |
1796 | assert(req->payload_size != 0); | |
11fdf7f2 TL |
1797 | assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); |
1798 | assert(req->payload.reset_sgl_fn != NULL); | |
1799 | assert(req->payload.next_sge_fn != NULL); | |
1800 | req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); | |
7c673cae FG |
1801 | |
1802 | sgl = tr->u.sgl; | |
11fdf7f2 | 1803 | req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG; |
7c673cae FG |
1804 | req->cmd.dptr.sgl1.unkeyed.subtype = 0; |
1805 | ||
1806 | remaining_transfer_len = req->payload_size; | |
1807 | ||
1808 | while (remaining_transfer_len > 0) { | |
11fdf7f2 TL |
1809 | rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, |
1810 | &virt_addr, &remaining_user_sge_len); | |
7c673cae FG |
1811 | if (rc) { |
1812 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1813 | return -1; | |
1814 | } | |
1815 | ||
11fdf7f2 TL |
1816 | remaining_user_sge_len = spdk_min(remaining_user_sge_len, remaining_transfer_len); |
1817 | remaining_transfer_len -= remaining_user_sge_len; | |
1818 | while (remaining_user_sge_len > 0) { | |
1819 | if (nseg >= NVME_MAX_SGL_DESCRIPTORS) { | |
1820 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1821 | return -1; | |
1822 | } | |
7c673cae | 1823 | |
9f95a23c | 1824 | phys_addr = spdk_vtophys(virt_addr, NULL); |
11fdf7f2 TL |
1825 | if (phys_addr == SPDK_VTOPHYS_ERROR) { |
1826 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1827 | return -1; | |
1828 | } | |
1829 | ||
9f95a23c | 1830 | length = spdk_min(remaining_user_sge_len, VALUE_2MB - _2MB_OFFSET(virt_addr)); |
11fdf7f2 TL |
1831 | remaining_user_sge_len -= length; |
1832 | virt_addr += length; | |
1833 | ||
1834 | if (nseg > 0 && phys_addr == | |
1835 | (*(sgl - 1)).address + (*(sgl - 1)).unkeyed.length) { | |
1836 | /* extend previous entry */ | |
1837 | (*(sgl - 1)).unkeyed.length += length; | |
1838 | continue; | |
1839 | } | |
7c673cae | 1840 | |
11fdf7f2 TL |
1841 | sgl->unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; |
1842 | sgl->unkeyed.length = length; | |
1843 | sgl->address = phys_addr; | |
1844 | sgl->unkeyed.subtype = 0; | |
7c673cae | 1845 | |
11fdf7f2 TL |
1846 | sgl++; |
1847 | nseg++; | |
1848 | } | |
7c673cae FG |
1849 | } |
1850 | ||
1851 | if (nseg == 1) { | |
1852 | /* | |
1853 | * The whole transfer can be described by a single SGL descriptor. | |
1854 | * Use the special case described by the spec where SGL1's type is Data Block. | |
1855 | * This means the SGL in the tracker is not used at all, so copy the first (and only) | |
1856 | * SGL element into SGL1. | |
1857 | */ | |
1858 | req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK; | |
1859 | req->cmd.dptr.sgl1.address = tr->u.sgl[0].address; | |
1860 | req->cmd.dptr.sgl1.unkeyed.length = tr->u.sgl[0].unkeyed.length; | |
1861 | } else { | |
1862 | /* For now we can only support 1 SGL segment in NVMe controller */ | |
1863 | req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT; | |
1864 | req->cmd.dptr.sgl1.address = tr->prp_sgl_bus_addr; | |
1865 | req->cmd.dptr.sgl1.unkeyed.length = nseg * sizeof(struct spdk_nvme_sgl_descriptor); | |
1866 | } | |
1867 | ||
1868 | return 0; | |
1869 | } | |
1870 | ||
1871 | /** | |
1872 | * Build PRP list describing scattered payload buffer. | |
1873 | */ | |
1874 | static int | |
1875 | nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req, | |
1876 | struct nvme_tracker *tr) | |
1877 | { | |
1878 | int rc; | |
1879 | void *virt_addr; | |
11fdf7f2 TL |
1880 | uint32_t remaining_transfer_len, length; |
1881 | uint32_t prp_index = 0; | |
1882 | uint32_t page_size = qpair->ctrlr->page_size; | |
7c673cae FG |
1883 | |
1884 | /* | |
1885 | * Build scattered payloads. | |
1886 | */ | |
11fdf7f2 TL |
1887 | assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); |
1888 | assert(req->payload.reset_sgl_fn != NULL); | |
1889 | req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset); | |
7c673cae FG |
1890 | |
1891 | remaining_transfer_len = req->payload_size; | |
7c673cae | 1892 | while (remaining_transfer_len > 0) { |
11fdf7f2 TL |
1893 | assert(req->payload.next_sge_fn != NULL); |
1894 | rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length); | |
7c673cae FG |
1895 | if (rc) { |
1896 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1897 | return -1; | |
1898 | } | |
1899 | ||
11fdf7f2 | 1900 | length = spdk_min(remaining_transfer_len, length); |
7c673cae FG |
1901 | |
1902 | /* | |
1903 | * Any incompatible sges should have been handled up in the splitting routine, | |
1904 | * but assert here as an additional check. | |
11fdf7f2 TL |
1905 | * |
1906 | * All SGEs except last must end on a page boundary. | |
7c673cae | 1907 | */ |
11fdf7f2 TL |
1908 | assert((length == remaining_transfer_len) || |
1909 | _is_page_aligned((uintptr_t)virt_addr + length, page_size)); | |
7c673cae | 1910 | |
11fdf7f2 TL |
1911 | rc = nvme_pcie_prp_list_append(tr, &prp_index, virt_addr, length, page_size); |
1912 | if (rc) { | |
1913 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1914 | return rc; | |
7c673cae FG |
1915 | } |
1916 | ||
11fdf7f2 | 1917 | remaining_transfer_len -= length; |
7c673cae FG |
1918 | } |
1919 | ||
1920 | return 0; | |
1921 | } | |
1922 | ||
7c673cae FG |
1923 | int |
1924 | nvme_pcie_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req) | |
1925 | { | |
1926 | struct nvme_tracker *tr; | |
1927 | int rc = 0; | |
11fdf7f2 | 1928 | void *md_payload; |
7c673cae FG |
1929 | struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; |
1930 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
1931 | ||
9f95a23c | 1932 | if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { |
7c673cae FG |
1933 | nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); |
1934 | } | |
1935 | ||
1936 | tr = TAILQ_FIRST(&pqpair->free_tr); | |
1937 | ||
9f95a23c | 1938 | if (tr == NULL) { |
7c673cae | 1939 | /* |
7c673cae FG |
1940 | * Put the request on the qpair's request queue to be |
1941 | * processed when a tracker frees up via a command | |
9f95a23c | 1942 | * completion. |
7c673cae FG |
1943 | */ |
1944 | STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); | |
1945 | goto exit; | |
1946 | } | |
1947 | ||
1948 | TAILQ_REMOVE(&pqpair->free_tr, tr, tq_list); /* remove tr from free_tr */ | |
1949 | TAILQ_INSERT_TAIL(&pqpair->outstanding_tr, tr, tq_list); | |
1950 | tr->req = req; | |
9f95a23c TL |
1951 | tr->cb_fn = req->cb_fn; |
1952 | tr->cb_arg = req->cb_arg; | |
7c673cae FG |
1953 | req->cmd.cid = tr->cid; |
1954 | ||
11fdf7f2 TL |
1955 | if (req->payload_size && req->payload.md) { |
1956 | md_payload = req->payload.md + req->md_offset; | |
9f95a23c | 1957 | tr->req->cmd.mptr = spdk_vtophys(md_payload, NULL); |
11fdf7f2 TL |
1958 | if (tr->req->cmd.mptr == SPDK_VTOPHYS_ERROR) { |
1959 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1960 | rc = -EINVAL; | |
1961 | goto exit; | |
1962 | } | |
1963 | } | |
1964 | ||
7c673cae | 1965 | if (req->payload_size == 0) { |
9f95a23c | 1966 | /* Null payload - leave PRP fields untouched */ |
7c673cae | 1967 | rc = 0; |
11fdf7f2 | 1968 | } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) { |
7c673cae | 1969 | rc = nvme_pcie_qpair_build_contig_request(qpair, req, tr); |
11fdf7f2 | 1970 | } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) { |
7c673cae FG |
1971 | if (ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED) { |
1972 | rc = nvme_pcie_qpair_build_hw_sgl_request(qpair, req, tr); | |
1973 | } else { | |
1974 | rc = nvme_pcie_qpair_build_prps_sgl_request(qpair, req, tr); | |
1975 | } | |
1976 | } else { | |
1977 | assert(0); | |
1978 | nvme_pcie_fail_request_bad_vtophys(qpair, tr); | |
1979 | rc = -EINVAL; | |
1980 | } | |
1981 | ||
1982 | if (rc < 0) { | |
1983 | goto exit; | |
1984 | } | |
1985 | ||
1986 | nvme_pcie_qpair_submit_tracker(qpair, tr); | |
1987 | ||
1988 | exit: | |
9f95a23c | 1989 | if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { |
7c673cae FG |
1990 | nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); |
1991 | } | |
1992 | ||
1993 | return rc; | |
1994 | } | |
1995 | ||
1996 | static void | |
1997 | nvme_pcie_qpair_check_timeout(struct spdk_nvme_qpair *qpair) | |
1998 | { | |
1999 | uint64_t t02; | |
2000 | struct nvme_tracker *tr, *tmp; | |
2001 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
2002 | struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; | |
11fdf7f2 | 2003 | struct spdk_nvme_ctrlr_process *active_proc; |
7c673cae | 2004 | |
11fdf7f2 TL |
2005 | /* Don't check timeouts during controller initialization. */ |
2006 | if (ctrlr->state != NVME_CTRLR_STATE_READY) { | |
2007 | return; | |
2008 | } | |
2009 | ||
2010 | if (nvme_qpair_is_admin_queue(qpair)) { | |
2011 | active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr); | |
2012 | } else { | |
2013 | active_proc = qpair->active_proc; | |
2014 | } | |
2015 | ||
2016 | /* Only check timeouts if the current process has a timeout callback. */ | |
2017 | if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) { | |
2018 | return; | |
7c673cae FG |
2019 | } |
2020 | ||
2021 | t02 = spdk_get_ticks(); | |
2022 | TAILQ_FOREACH_SAFE(tr, &pqpair->outstanding_tr, tq_list, tmp) { | |
11fdf7f2 | 2023 | assert(tr->req != NULL); |
7c673cae | 2024 | |
11fdf7f2 TL |
2025 | if (nvme_request_check_timeout(tr->req, tr->cid, active_proc, t02)) { |
2026 | /* | |
2027 | * The requests are in order, so as soon as one has not timed out, | |
7c673cae FG |
2028 | * stop iterating. |
2029 | */ | |
2030 | break; | |
2031 | } | |
7c673cae FG |
2032 | } |
2033 | } | |
2034 | ||
2035 | int32_t | |
2036 | nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions) | |
2037 | { | |
2038 | struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair); | |
7c673cae | 2039 | struct nvme_tracker *tr; |
9f95a23c | 2040 | struct spdk_nvme_cpl *cpl, *next_cpl; |
7c673cae FG |
2041 | uint32_t num_completions = 0; |
2042 | struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr; | |
9f95a23c TL |
2043 | uint16_t next_cq_head; |
2044 | uint8_t next_phase; | |
2045 | bool next_is_valid = false; | |
7c673cae | 2046 | |
11fdf7f2 | 2047 | if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { |
7c673cae FG |
2048 | nvme_robust_mutex_lock(&ctrlr->ctrlr_lock); |
2049 | } | |
2050 | ||
11fdf7f2 | 2051 | if (max_completions == 0 || max_completions > pqpair->max_completions_cap) { |
7c673cae | 2052 | /* |
11fdf7f2 TL |
2053 | * max_completions == 0 means unlimited, but complete at most |
2054 | * max_completions_cap batch of I/O at a time so that the completion | |
7c673cae FG |
2055 | * queue doorbells don't wrap around. |
2056 | */ | |
11fdf7f2 | 2057 | max_completions = pqpair->max_completions_cap; |
7c673cae FG |
2058 | } |
2059 | ||
2060 | while (1) { | |
2061 | cpl = &pqpair->cpl[pqpair->cq_head]; | |
2062 | ||
9f95a23c | 2063 | if (!next_is_valid && cpl->status.p != pqpair->flags.phase) { |
7c673cae | 2064 | break; |
11fdf7f2 | 2065 | } |
9f95a23c TL |
2066 | |
2067 | if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) { | |
2068 | next_cq_head = pqpair->cq_head + 1; | |
2069 | next_phase = pqpair->flags.phase; | |
2070 | } else { | |
2071 | next_cq_head = 0; | |
2072 | next_phase = !pqpair->flags.phase; | |
2073 | } | |
2074 | next_cpl = &pqpair->cpl[next_cq_head]; | |
2075 | next_is_valid = (next_cpl->status.p == next_phase); | |
2076 | if (next_is_valid) { | |
2077 | __builtin_prefetch(&pqpair->tr[next_cpl->cid]); | |
2078 | } | |
2079 | ||
11fdf7f2 TL |
2080 | #ifdef __PPC64__ |
2081 | /* | |
2082 | * This memory barrier prevents reordering of: | |
2083 | * - load after store from/to tr | |
2084 | * - load after load cpl phase and cpl cid | |
2085 | */ | |
2086 | spdk_mb(); | |
9f95a23c TL |
2087 | #elif defined(__aarch64__) |
2088 | __asm volatile("dmb oshld" ::: "memory"); | |
11fdf7f2 TL |
2089 | #endif |
2090 | ||
2091 | if (spdk_unlikely(++pqpair->cq_head == pqpair->num_entries)) { | |
2092 | pqpair->cq_head = 0; | |
9f95a23c | 2093 | pqpair->flags.phase = !pqpair->flags.phase; |
11fdf7f2 | 2094 | } |
7c673cae FG |
2095 | |
2096 | tr = &pqpair->tr[cpl->cid]; | |
9f95a23c TL |
2097 | /* Prefetch the req's STAILQ_ENTRY since we'll need to access it |
2098 | * as part of putting the req back on the qpair's free list. | |
2099 | */ | |
2100 | __builtin_prefetch(&tr->req->stailq); | |
11fdf7f2 | 2101 | pqpair->sq_head = cpl->sqhd; |
7c673cae | 2102 | |
9f95a23c | 2103 | if (tr->req) { |
7c673cae FG |
2104 | nvme_pcie_qpair_complete_tracker(qpair, tr, cpl, true); |
2105 | } else { | |
2106 | SPDK_ERRLOG("cpl does not map to outstanding cmd\n"); | |
2107 | nvme_qpair_print_completion(qpair, cpl); | |
2108 | assert(0); | |
2109 | } | |
2110 | ||
7c673cae FG |
2111 | if (++num_completions == max_completions) { |
2112 | break; | |
2113 | } | |
2114 | } | |
2115 | ||
2116 | if (num_completions > 0) { | |
9f95a23c TL |
2117 | nvme_pcie_qpair_ring_cq_doorbell(qpair); |
2118 | } | |
2119 | ||
2120 | if (pqpair->flags.delay_pcie_doorbell) { | |
2121 | if (pqpair->last_sq_tail != pqpair->sq_tail) { | |
2122 | nvme_pcie_qpair_ring_sq_doorbell(qpair); | |
2123 | pqpair->last_sq_tail = pqpair->sq_tail; | |
11fdf7f2 | 2124 | } |
7c673cae FG |
2125 | } |
2126 | ||
11fdf7f2 TL |
2127 | if (spdk_unlikely(ctrlr->timeout_enabled)) { |
2128 | /* | |
2129 | * User registered for timeout callback | |
2130 | */ | |
2131 | nvme_pcie_qpair_check_timeout(qpair); | |
7c673cae FG |
2132 | } |
2133 | ||
2134 | /* Before returning, complete any pending admin request. */ | |
11fdf7f2 | 2135 | if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) { |
7c673cae FG |
2136 | nvme_pcie_qpair_complete_pending_admin_request(qpair); |
2137 | ||
2138 | nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock); | |
2139 | } | |
2140 | ||
2141 | return num_completions; | |
2142 | } |