]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
f67539c2 TL |
4 | * Copyright (c) Intel Corporation. All rights reserved. |
5 | * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. | |
7c673cae FG |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #ifndef __NVME_INTERNAL_H__ | |
35 | #define __NVME_INTERNAL_H__ | |
36 | ||
11fdf7f2 TL |
37 | #include "spdk/config.h" |
38 | #include "spdk/likely.h" | |
39 | #include "spdk/stdinc.h" | |
7c673cae | 40 | |
11fdf7f2 | 41 | #include "spdk/nvme.h" |
7c673cae FG |
42 | |
43 | #if defined(__i386__) || defined(__x86_64__) | |
44 | #include <x86intrin.h> | |
45 | #endif | |
46 | ||
7c673cae FG |
47 | #include "spdk/queue.h" |
48 | #include "spdk/barrier.h" | |
49 | #include "spdk/bit_array.h" | |
50 | #include "spdk/mmio.h" | |
51 | #include "spdk/pci_ids.h" | |
52 | #include "spdk/util.h" | |
f67539c2 | 53 | #include "spdk/memory.h" |
7c673cae FG |
54 | #include "spdk/nvme_intel.h" |
55 | #include "spdk/nvmf_spec.h" | |
11fdf7f2 | 56 | #include "spdk/uuid.h" |
7c673cae FG |
57 | |
58 | #include "spdk_internal/assert.h" | |
59 | #include "spdk_internal/log.h" | |
60 | ||
11fdf7f2 TL |
61 | extern pid_t g_spdk_nvme_pid; |
62 | ||
7c673cae FG |
63 | /* |
64 | * Some Intel devices support vendor-unique read latency log page even | |
65 | * though the log page directory says otherwise. | |
66 | */ | |
67 | #define NVME_INTEL_QUIRK_READ_LATENCY 0x1 | |
68 | ||
69 | /* | |
70 | * Some Intel devices support vendor-unique write latency log page even | |
71 | * though the log page directory says otherwise. | |
72 | */ | |
73 | #define NVME_INTEL_QUIRK_WRITE_LATENCY 0x2 | |
74 | ||
75 | /* | |
76 | * The controller needs a delay before starts checking the device | |
77 | * readiness, which is done by reading the NVME_CSTS_RDY bit. | |
78 | */ | |
79 | #define NVME_QUIRK_DELAY_BEFORE_CHK_RDY 0x4 | |
80 | ||
81 | /* | |
82 | * The controller performs best when I/O is split on particular | |
83 | * LBA boundaries. | |
84 | */ | |
85 | #define NVME_INTEL_QUIRK_STRIPING 0x8 | |
86 | ||
87 | /* | |
11fdf7f2 TL |
88 | * The controller needs a delay after allocating an I/O queue pair |
89 | * before it is ready to accept I/O commands. | |
90 | */ | |
91 | #define NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC 0x10 | |
92 | ||
93 | /* | |
94 | * Earlier NVMe devices do not indicate whether unmapped blocks | |
95 | * will read all zeroes or not. This define indicates that the | |
96 | * device does in fact read all zeroes after an unmap event | |
97 | */ | |
98 | #define NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE 0x20 | |
99 | ||
100 | /* | |
101 | * The controller doesn't handle Identify value others than 0 or 1 correctly. | |
102 | */ | |
103 | #define NVME_QUIRK_IDENTIFY_CNS 0x40 | |
104 | ||
105 | /* | |
106 | * The controller supports Open Channel command set if matching additional | |
107 | * condition, like the first byte (value 0x1) in the vendor specific | |
108 | * bits of the namespace identify structure is set. | |
7c673cae | 109 | */ |
11fdf7f2 TL |
110 | #define NVME_QUIRK_OCSSD 0x80 |
111 | ||
112 | /* | |
113 | * The controller has an Intel vendor ID but does not support Intel vendor-specific | |
114 | * log pages. This is primarily for QEMU emulated SSDs which report an Intel vendor | |
115 | * ID but do not support these log pages. | |
116 | */ | |
117 | #define NVME_INTEL_QUIRK_NO_LOG_PAGES 0x100 | |
7c673cae | 118 | |
9f95a23c TL |
119 | /* |
120 | * The controller does not set SHST_COMPLETE in a reasonable amount of time. This | |
121 | * is primarily seen in virtual VMWare NVMe SSDs. This quirk merely adds an additional | |
122 | * error message that on VMWare NVMe SSDs, the shutdown timeout may be expected. | |
123 | */ | |
124 | #define NVME_QUIRK_SHST_COMPLETE 0x200 | |
125 | ||
f67539c2 TL |
126 | /* |
127 | * The controller requires an extra delay before starting the initialization process | |
128 | * during attach. | |
129 | */ | |
130 | #define NVME_QUIRK_DELAY_BEFORE_INIT 0x400 | |
131 | ||
132 | /* | |
133 | * Some SSDs exhibit poor performance with the default SPDK NVMe IO queue size. | |
134 | * This quirk will increase the default to 1024 which matches other operating | |
135 | * systems, at the cost of some extra memory usage. Users can still override | |
136 | * the increased default by changing the spdk_nvme_io_qpair_opts when allocating | |
137 | * a new queue pair. | |
138 | */ | |
139 | #define NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE 0x800 | |
140 | ||
141 | /** | |
142 | * The maximum access width to PCI memory space is 8 Bytes, don't use AVX2 or | |
143 | * SSE instructions to optimize the memory access(memcpy or memset) larger than | |
144 | * 8 Bytes. | |
145 | */ | |
146 | #define NVME_QUIRK_MAXIMUM_PCI_ACCESS_WIDTH 0x1000 | |
147 | ||
148 | /** | |
149 | * The SSD does not support OPAL even through it sets the security bit in OACS. | |
150 | */ | |
151 | #define NVME_QUIRK_OACS_SECURITY 0x2000 | |
152 | ||
7c673cae FG |
153 | #define NVME_MAX_ASYNC_EVENTS (8) |
154 | ||
9f95a23c | 155 | #define NVME_MAX_ADMIN_TIMEOUT_IN_SECS (30) |
7c673cae FG |
156 | |
157 | /* Maximum log page size to fetch for AERs. */ | |
158 | #define NVME_MAX_AER_LOG_SIZE (4096) | |
159 | ||
160 | /* | |
161 | * NVME_MAX_IO_QUEUES in nvme_spec.h defines the 64K spec-limit, but this | |
162 | * define specifies the maximum number of queues this driver will actually | |
163 | * try to configure, if available. | |
164 | */ | |
165 | #define DEFAULT_MAX_IO_QUEUES (1024) | |
f67539c2 | 166 | #define DEFAULT_ADMIN_QUEUE_SIZE (32) |
7c673cae | 167 | #define DEFAULT_IO_QUEUE_SIZE (256) |
f67539c2 | 168 | #define DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK (1024) /* Matches Linux kernel driver */ |
7c673cae | 169 | |
7c673cae FG |
170 | #define DEFAULT_IO_QUEUE_REQUESTS (512) |
171 | ||
f67539c2 TL |
172 | #define SPDK_NVME_DEFAULT_RETRY_COUNT (4) |
173 | ||
174 | #define SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED (0) | |
175 | #define SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED | |
176 | ||
9f95a23c TL |
177 | #define MIN_KEEP_ALIVE_TIMEOUT_IN_MS (10000) |
178 | ||
11fdf7f2 TL |
179 | /* We want to fit submission and completion rings each in a single 2MB |
180 | * hugepage to ensure physical address contiguity. | |
181 | */ | |
9f95a23c | 182 | #define MAX_IO_QUEUE_ENTRIES (VALUE_2MB / spdk_max( \ |
11fdf7f2 TL |
183 | sizeof(struct spdk_nvme_cmd), \ |
184 | sizeof(struct spdk_nvme_cpl))) | |
7c673cae FG |
185 | |
186 | enum nvme_payload_type { | |
187 | NVME_PAYLOAD_TYPE_INVALID = 0, | |
188 | ||
189 | /** nvme_request::u.payload.contig_buffer is valid for this request */ | |
190 | NVME_PAYLOAD_TYPE_CONTIG, | |
191 | ||
192 | /** nvme_request::u.sgl is valid for this request */ | |
193 | NVME_PAYLOAD_TYPE_SGL, | |
194 | }; | |
195 | ||
7c673cae FG |
196 | /** |
197 | * Descriptor for a request data payload. | |
7c673cae | 198 | */ |
11fdf7f2 TL |
199 | struct nvme_payload { |
200 | /** | |
201 | * Functions for retrieving physical addresses for scattered payloads. | |
202 | */ | |
203 | spdk_nvme_req_reset_sgl_cb reset_sgl_fn; | |
204 | spdk_nvme_req_next_sge_cb next_sge_fn; | |
205 | ||
206 | /** | |
207 | * If reset_sgl_fn == NULL, this is a contig payload, and contig_or_cb_arg contains the | |
208 | * virtual memory address of a single virtually contiguous buffer. | |
209 | * | |
210 | * If reset_sgl_fn != NULL, this is a SGL payload, and contig_or_cb_arg contains the | |
211 | * cb_arg that will be passed to the SGL callback functions. | |
212 | */ | |
213 | void *contig_or_cb_arg; | |
214 | ||
215 | /** Virtual memory address of a single virtually contiguous metadata buffer */ | |
7c673cae | 216 | void *md; |
11fdf7f2 | 217 | }; |
7c673cae | 218 | |
11fdf7f2 TL |
219 | #define NVME_PAYLOAD_CONTIG(contig_, md_) \ |
220 | (struct nvme_payload) { \ | |
221 | .reset_sgl_fn = NULL, \ | |
222 | .next_sge_fn = NULL, \ | |
223 | .contig_or_cb_arg = (contig_), \ | |
224 | .md = (md_), \ | |
225 | } | |
226 | ||
227 | #define NVME_PAYLOAD_SGL(reset_sgl_fn_, next_sge_fn_, cb_arg_, md_) \ | |
228 | (struct nvme_payload) { \ | |
229 | .reset_sgl_fn = (reset_sgl_fn_), \ | |
230 | .next_sge_fn = (next_sge_fn_), \ | |
231 | .contig_or_cb_arg = (cb_arg_), \ | |
232 | .md = (md_), \ | |
233 | } | |
234 | ||
235 | static inline enum nvme_payload_type | |
236 | nvme_payload_type(const struct nvme_payload *payload) { | |
237 | return payload->reset_sgl_fn ? NVME_PAYLOAD_TYPE_SGL : NVME_PAYLOAD_TYPE_CONTIG; | |
238 | } | |
239 | ||
240 | struct nvme_error_cmd { | |
241 | bool do_not_submit; | |
242 | uint64_t timeout_tsc; | |
243 | uint32_t err_count; | |
244 | uint8_t opc; | |
245 | struct spdk_nvme_status status; | |
246 | TAILQ_ENTRY(nvme_error_cmd) link; | |
7c673cae FG |
247 | }; |
248 | ||
249 | struct nvme_request { | |
250 | struct spdk_nvme_cmd cmd; | |
251 | ||
7c673cae FG |
252 | uint8_t retries; |
253 | ||
f67539c2 TL |
254 | uint8_t timed_out : 1; |
255 | ||
256 | /** | |
257 | * True if the request is in the queued_req list. | |
258 | */ | |
259 | uint8_t queued : 1; | |
260 | uint8_t reserved : 6; | |
11fdf7f2 | 261 | |
7c673cae FG |
262 | /** |
263 | * Number of children requests still outstanding for this | |
264 | * request which was split into multiple child requests. | |
265 | */ | |
11fdf7f2 | 266 | uint16_t num_children; |
7c673cae FG |
267 | |
268 | /** | |
269 | * Offset in bytes from the beginning of payload for this request. | |
270 | * This is used for I/O commands that are split into multiple requests. | |
271 | */ | |
272 | uint32_t payload_offset; | |
273 | uint32_t md_offset; | |
274 | ||
11fdf7f2 TL |
275 | uint32_t payload_size; |
276 | ||
277 | /** | |
278 | * Timeout ticks for error injection requests, can be extended in future | |
279 | * to support per-request timeout feature. | |
280 | */ | |
281 | uint64_t timeout_tsc; | |
282 | ||
283 | /** | |
284 | * Data payload for this request's command. | |
285 | */ | |
286 | struct nvme_payload payload; | |
287 | ||
7c673cae FG |
288 | spdk_nvme_cmd_cb cb_fn; |
289 | void *cb_arg; | |
290 | STAILQ_ENTRY(nvme_request) stailq; | |
291 | ||
292 | struct spdk_nvme_qpair *qpair; | |
293 | ||
11fdf7f2 TL |
294 | /* |
295 | * The value of spdk_get_ticks() when the request was submitted to the hardware. | |
296 | * Only set if ctrlr->timeout_enabled is true. | |
297 | */ | |
298 | uint64_t submit_tick; | |
299 | ||
7c673cae FG |
300 | /** |
301 | * The active admin request can be moved to a per process pending | |
302 | * list based on the saved pid to tell which process it belongs | |
303 | * to. The cpl saves the original completion information which | |
304 | * is used in the completion callback. | |
305 | * NOTE: these below two fields are only used for admin request. | |
306 | */ | |
307 | pid_t pid; | |
308 | struct spdk_nvme_cpl cpl; | |
309 | ||
f67539c2 TL |
310 | uint32_t md_size; |
311 | ||
7c673cae FG |
312 | /** |
313 | * The following members should not be reordered with members | |
314 | * above. These members are only needed when splitting | |
315 | * requests which is done rarely, and the driver is careful | |
316 | * to not touch the following fields until a split operation is | |
317 | * needed, to avoid touching an extra cacheline. | |
318 | */ | |
319 | ||
320 | /** | |
321 | * Points to the outstanding child requests for a parent request. | |
322 | * Only valid if a request was split into multiple children | |
323 | * requests, and is not initialized for non-split requests. | |
324 | */ | |
325 | TAILQ_HEAD(, nvme_request) children; | |
326 | ||
327 | /** | |
328 | * Linked-list pointers for a child request in its parent's list. | |
329 | */ | |
330 | TAILQ_ENTRY(nvme_request) child_tailq; | |
331 | ||
332 | /** | |
333 | * Points to a parent request if part of a split request, | |
334 | * NULL otherwise. | |
335 | */ | |
336 | struct nvme_request *parent; | |
337 | ||
338 | /** | |
339 | * Completion status for a parent request. Initialized to all 0's | |
340 | * (SUCCESS) before child requests are submitted. If a child | |
341 | * request completes with error, the error status is copied here, | |
342 | * to ensure that the parent request is also completed with error | |
343 | * status once all child requests are completed. | |
344 | */ | |
345 | struct spdk_nvme_cpl parent_status; | |
346 | ||
347 | /** | |
348 | * The user_cb_fn and user_cb_arg fields are used for holding the original | |
349 | * callback data when using nvme_allocate_request_user_copy. | |
350 | */ | |
351 | spdk_nvme_cmd_cb user_cb_fn; | |
352 | void *user_cb_arg; | |
353 | void *user_buffer; | |
354 | }; | |
355 | ||
356 | struct nvme_completion_poll_status { | |
357 | struct spdk_nvme_cpl cpl; | |
358 | bool done; | |
f67539c2 TL |
359 | /* This flag indicates that the request has been timed out and the memory |
360 | must be freed in a completion callback */ | |
361 | bool timed_out; | |
7c673cae FG |
362 | }; |
363 | ||
364 | struct nvme_async_event_request { | |
365 | struct spdk_nvme_ctrlr *ctrlr; | |
366 | struct nvme_request *req; | |
367 | struct spdk_nvme_cpl cpl; | |
368 | }; | |
369 | ||
f67539c2 TL |
370 | enum nvme_qpair_state { |
371 | NVME_QPAIR_DISCONNECTED, | |
372 | NVME_QPAIR_DISCONNECTING, | |
373 | NVME_QPAIR_CONNECTING, | |
374 | NVME_QPAIR_CONNECTED, | |
375 | NVME_QPAIR_ENABLING, | |
376 | NVME_QPAIR_ENABLED, | |
377 | NVME_QPAIR_DESTROYING, | |
378 | }; | |
379 | ||
7c673cae | 380 | struct spdk_nvme_qpair { |
f67539c2 | 381 | struct spdk_nvme_ctrlr *ctrlr; |
7c673cae | 382 | |
f67539c2 | 383 | uint16_t id; |
7c673cae | 384 | |
f67539c2 | 385 | uint8_t qprio; |
7c673cae | 386 | |
f67539c2 | 387 | uint8_t state : 3; |
9f95a23c | 388 | |
7c673cae FG |
389 | /* |
390 | * Members for handling IO qpair deletion inside of a completion context. | |
391 | * These are specifically defined as single bits, so that they do not | |
392 | * push this data structure out to another cacheline. | |
393 | */ | |
f67539c2 TL |
394 | uint8_t in_completion_context : 1; |
395 | uint8_t delete_after_completion_context: 1; | |
7c673cae | 396 | |
11fdf7f2 TL |
397 | /* |
398 | * Set when no deletion notification is needed. For example, the process | |
399 | * which allocated this qpair exited unexpectedly. | |
400 | */ | |
f67539c2 TL |
401 | uint8_t no_deletion_notification_needed: 1; |
402 | ||
403 | uint8_t first_fused_submitted: 1; | |
11fdf7f2 | 404 | |
f67539c2 | 405 | enum spdk_nvme_transport_type trtype; |
9f95a23c | 406 | |
f67539c2 TL |
407 | STAILQ_HEAD(, nvme_request) free_req; |
408 | STAILQ_HEAD(, nvme_request) queued_req; | |
409 | STAILQ_HEAD(, nvme_request) aborting_queued_req; | |
410 | ||
411 | /* List entry for spdk_nvme_transport_poll_group::qpairs */ | |
412 | STAILQ_ENTRY(spdk_nvme_qpair) poll_group_stailq; | |
9f95a23c TL |
413 | |
414 | /** Commands opcode in this list will return error */ | |
f67539c2 | 415 | TAILQ_HEAD(, nvme_error_cmd) err_cmd_head; |
9f95a23c | 416 | /** Requests in this list will return error */ |
f67539c2 | 417 | STAILQ_HEAD(, nvme_request) err_req_head; |
7c673cae FG |
418 | |
419 | /* List entry for spdk_nvme_ctrlr::active_io_qpairs */ | |
f67539c2 | 420 | TAILQ_ENTRY(spdk_nvme_qpair) tailq; |
7c673cae FG |
421 | |
422 | /* List entry for spdk_nvme_ctrlr_process::allocated_io_qpairs */ | |
f67539c2 TL |
423 | TAILQ_ENTRY(spdk_nvme_qpair) per_process_tailq; |
424 | ||
425 | struct spdk_nvme_ctrlr_process *active_proc; | |
426 | ||
427 | struct spdk_nvme_transport_poll_group *poll_group; | |
428 | ||
429 | void *poll_group_tailq_head; | |
430 | ||
431 | void *req_buf; | |
432 | ||
433 | const struct spdk_nvme_transport *transport; | |
7c673cae | 434 | |
f67539c2 TL |
435 | uint8_t transport_failure_reason: 2; |
436 | }; | |
437 | ||
438 | struct spdk_nvme_poll_group { | |
439 | void *ctx; | |
440 | STAILQ_HEAD(, spdk_nvme_transport_poll_group) tgroups; | |
441 | }; | |
11fdf7f2 | 442 | |
f67539c2 TL |
443 | struct spdk_nvme_transport_poll_group { |
444 | struct spdk_nvme_poll_group *group; | |
445 | const struct spdk_nvme_transport *transport; | |
446 | STAILQ_HEAD(, spdk_nvme_qpair) connected_qpairs; | |
447 | STAILQ_HEAD(, spdk_nvme_qpair) disconnected_qpairs; | |
448 | STAILQ_ENTRY(spdk_nvme_transport_poll_group) link; | |
449 | bool in_completion_context; | |
450 | uint64_t num_qpairs_to_delete; | |
7c673cae FG |
451 | }; |
452 | ||
453 | struct spdk_nvme_ns { | |
454 | struct spdk_nvme_ctrlr *ctrlr; | |
7c673cae FG |
455 | uint32_t sector_size; |
456 | ||
457 | /* | |
458 | * Size of data transferred as part of each block, | |
459 | * including metadata if FLBAS indicates the metadata is transferred | |
460 | * as part of the data buffer at the end of each LBA. | |
461 | */ | |
462 | uint32_t extended_lba_size; | |
463 | ||
464 | uint32_t md_size; | |
465 | uint32_t pi_type; | |
466 | uint32_t sectors_per_max_io; | |
467 | uint32_t sectors_per_stripe; | |
11fdf7f2 | 468 | uint32_t id; |
7c673cae | 469 | uint16_t flags; |
11fdf7f2 TL |
470 | |
471 | /* Namespace Identification Descriptor List (CNS = 03h) */ | |
472 | uint8_t id_desc_list[4096]; | |
7c673cae FG |
473 | }; |
474 | ||
475 | /** | |
476 | * State of struct spdk_nvme_ctrlr (in particular, during initialization). | |
477 | */ | |
478 | enum nvme_ctrlr_state { | |
11fdf7f2 TL |
479 | /** |
480 | * Wait before initializing the controller. | |
481 | */ | |
482 | NVME_CTRLR_STATE_INIT_DELAY, | |
483 | ||
7c673cae FG |
484 | /** |
485 | * Controller has not been initialized yet. | |
486 | */ | |
487 | NVME_CTRLR_STATE_INIT, | |
488 | ||
489 | /** | |
490 | * Waiting for CSTS.RDY to transition from 0 to 1 so that CC.EN may be set to 0. | |
491 | */ | |
492 | NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, | |
493 | ||
494 | /** | |
495 | * Waiting for CSTS.RDY to transition from 1 to 0 so that CC.EN may be set to 1. | |
496 | */ | |
497 | NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, | |
498 | ||
499 | /** | |
500 | * Enable the controller by writing CC.EN to 1 | |
501 | */ | |
502 | NVME_CTRLR_STATE_ENABLE, | |
503 | ||
504 | /** | |
505 | * Waiting for CSTS.RDY to transition from 0 to 1 after enabling the controller. | |
506 | */ | |
507 | NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, | |
508 | ||
11fdf7f2 | 509 | /** |
f67539c2 | 510 | * Reset the Admin queue of the controller. |
11fdf7f2 | 511 | */ |
f67539c2 | 512 | NVME_CTRLR_STATE_RESET_ADMIN_QUEUE, |
11fdf7f2 TL |
513 | |
514 | /** | |
515 | * Identify Controller command will be sent to then controller. | |
516 | */ | |
517 | NVME_CTRLR_STATE_IDENTIFY, | |
518 | ||
519 | /** | |
520 | * Waiting for Identify Controller command be completed. | |
521 | */ | |
522 | NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, | |
523 | ||
524 | /** | |
525 | * Set Number of Queues of the controller. | |
526 | */ | |
527 | NVME_CTRLR_STATE_SET_NUM_QUEUES, | |
528 | ||
529 | /** | |
530 | * Waiting for Set Num of Queues command to be completed. | |
531 | */ | |
532 | NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, | |
533 | ||
11fdf7f2 TL |
534 | /** |
535 | * Construct Namespace data structures of the controller. | |
536 | */ | |
537 | NVME_CTRLR_STATE_CONSTRUCT_NS, | |
538 | ||
539 | /** | |
540 | * Get active Namespace list of the controller. | |
541 | */ | |
542 | NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, | |
543 | ||
f67539c2 TL |
544 | /** |
545 | * Waiting for the Identify Active Namespace commands to be completed. | |
546 | */ | |
547 | NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS, | |
548 | ||
11fdf7f2 TL |
549 | /** |
550 | * Get Identify Namespace Data structure for each NS. | |
551 | */ | |
552 | NVME_CTRLR_STATE_IDENTIFY_NS, | |
553 | ||
554 | /** | |
555 | * Waiting for the Identify Namespace commands to be completed. | |
556 | */ | |
557 | NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, | |
558 | ||
559 | /** | |
560 | * Get Identify Namespace Identification Descriptors. | |
561 | */ | |
562 | NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, | |
563 | ||
564 | /** | |
565 | * Waiting for the Identify Namespace Identification | |
566 | * Descriptors to be completed. | |
567 | */ | |
568 | NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, | |
569 | ||
570 | /** | |
571 | * Configure AER of the controller. | |
572 | */ | |
573 | NVME_CTRLR_STATE_CONFIGURE_AER, | |
574 | ||
575 | /** | |
576 | * Waiting for the Configure AER to be completed. | |
577 | */ | |
578 | NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, | |
579 | ||
580 | /** | |
581 | * Set supported log pages of the controller. | |
582 | */ | |
583 | NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, | |
584 | ||
585 | /** | |
586 | * Set supported features of the controller. | |
587 | */ | |
588 | NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, | |
589 | ||
590 | /** | |
591 | * Set Doorbell Buffer Config of the controller. | |
592 | */ | |
593 | NVME_CTRLR_STATE_SET_DB_BUF_CFG, | |
594 | ||
595 | /** | |
596 | * Waiting for Doorbell Buffer Config to be completed. | |
597 | */ | |
598 | NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, | |
599 | ||
600 | /** | |
601 | * Set Keep Alive Timeout of the controller. | |
602 | */ | |
603 | NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, | |
604 | ||
605 | /** | |
606 | * Waiting for Set Keep Alive Timeout to be completed. | |
607 | */ | |
608 | NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, | |
609 | ||
610 | /** | |
611 | * Set Host ID of the controller. | |
612 | */ | |
613 | NVME_CTRLR_STATE_SET_HOST_ID, | |
614 | ||
615 | /** | |
616 | * Waiting for Set Host ID to be completed. | |
617 | */ | |
618 | NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, | |
619 | ||
7c673cae FG |
620 | /** |
621 | * Controller initialization has completed and the controller is ready. | |
622 | */ | |
11fdf7f2 TL |
623 | NVME_CTRLR_STATE_READY, |
624 | ||
625 | /** | |
626 | * Controller inilialization has an error. | |
627 | */ | |
628 | NVME_CTRLR_STATE_ERROR | |
7c673cae FG |
629 | }; |
630 | ||
f67539c2 | 631 | #define NVME_TIMEOUT_INFINITE 0 |
7c673cae FG |
632 | |
633 | /* | |
634 | * Used to track properties for all processes accessing the controller. | |
635 | */ | |
636 | struct spdk_nvme_ctrlr_process { | |
637 | /** Whether it is the primary process */ | |
638 | bool is_primary; | |
639 | ||
640 | /** Process ID */ | |
641 | pid_t pid; | |
642 | ||
643 | /** Active admin requests to be completed */ | |
644 | STAILQ_HEAD(, nvme_request) active_reqs; | |
645 | ||
646 | TAILQ_ENTRY(spdk_nvme_ctrlr_process) tailq; | |
647 | ||
648 | /** Per process PCI device handle */ | |
649 | struct spdk_pci_device *devhandle; | |
650 | ||
651 | /** Reference to track the number of attachment to this controller. */ | |
652 | int ref; | |
653 | ||
654 | /** Allocated IO qpairs */ | |
655 | TAILQ_HEAD(, spdk_nvme_qpair) allocated_io_qpairs; | |
11fdf7f2 TL |
656 | |
657 | spdk_nvme_aer_cb aer_cb_fn; | |
658 | void *aer_cb_arg; | |
659 | ||
660 | /** | |
661 | * A function pointer to timeout callback function | |
662 | */ | |
663 | spdk_nvme_timeout_cb timeout_cb_fn; | |
664 | void *timeout_cb_arg; | |
665 | uint64_t timeout_ticks; | |
7c673cae FG |
666 | }; |
667 | ||
668 | /* | |
669 | * One of these per allocated PCI device. | |
670 | */ | |
671 | struct spdk_nvme_ctrlr { | |
672 | /* Hot data (accessed in I/O path) starts here. */ | |
673 | ||
674 | /** Array of namespaces indexed by nsid - 1 */ | |
675 | struct spdk_nvme_ns *ns; | |
676 | ||
7c673cae FG |
677 | uint32_t num_ns; |
678 | ||
679 | bool is_removed; | |
680 | ||
681 | bool is_resetting; | |
682 | ||
683 | bool is_failed; | |
684 | ||
f67539c2 | 685 | bool is_destructed; |
9f95a23c | 686 | |
11fdf7f2 TL |
687 | bool timeout_enabled; |
688 | ||
689 | uint16_t max_sges; | |
690 | ||
691 | uint16_t cntlid; | |
692 | ||
7c673cae FG |
693 | /** Controller support flags */ |
694 | uint64_t flags; | |
695 | ||
f67539c2 TL |
696 | /** NVMEoF in-capsule data size in bytes */ |
697 | uint32_t ioccsz_bytes; | |
698 | ||
699 | /** NVMEoF in-capsule data offset in 16 byte units */ | |
700 | uint16_t icdoff; | |
701 | ||
7c673cae FG |
702 | /* Cold data (not accessed in normal I/O path) is after this point. */ |
703 | ||
f67539c2 TL |
704 | struct spdk_nvme_transport_id trid; |
705 | ||
7c673cae | 706 | union spdk_nvme_cap_register cap; |
11fdf7f2 | 707 | union spdk_nvme_vs_register vs; |
7c673cae FG |
708 | |
709 | enum nvme_ctrlr_state state; | |
710 | uint64_t state_timeout_tsc; | |
711 | ||
712 | uint64_t next_keep_alive_tick; | |
713 | uint64_t keep_alive_interval_ticks; | |
714 | ||
715 | TAILQ_ENTRY(spdk_nvme_ctrlr) tailq; | |
716 | ||
717 | /** All the log pages supported */ | |
718 | bool log_page_supported[256]; | |
719 | ||
720 | /** All the features supported */ | |
721 | bool feature_supported[256]; | |
722 | ||
723 | /** maximum i/o size in bytes */ | |
724 | uint32_t max_xfer_size; | |
725 | ||
726 | /** minimum page size supported by this controller in bytes */ | |
727 | uint32_t min_page_size; | |
728 | ||
11fdf7f2 TL |
729 | /** selected memory page size for this controller in bytes */ |
730 | uint32_t page_size; | |
731 | ||
7c673cae FG |
732 | uint32_t num_aers; |
733 | struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS]; | |
7c673cae FG |
734 | |
735 | /** guards access to the controller itself, including admin queues */ | |
736 | pthread_mutex_t ctrlr_lock; | |
737 | ||
7c673cae FG |
738 | struct spdk_nvme_qpair *adminq; |
739 | ||
11fdf7f2 TL |
740 | /** shadow doorbell buffer */ |
741 | uint32_t *shadow_doorbell; | |
742 | /** eventidx buffer */ | |
743 | uint32_t *eventidx; | |
744 | ||
7c673cae FG |
745 | /** |
746 | * Identify Controller data. | |
747 | */ | |
748 | struct spdk_nvme_ctrlr_data cdata; | |
749 | ||
11fdf7f2 TL |
750 | /** |
751 | * Keep track of active namespaces | |
752 | */ | |
753 | uint32_t *active_ns_list; | |
754 | ||
7c673cae FG |
755 | /** |
756 | * Array of Identify Namespace data. | |
757 | * | |
758 | * Stored separately from ns since nsdata should not normally be accessed during I/O. | |
759 | */ | |
760 | struct spdk_nvme_ns_data *nsdata; | |
761 | ||
762 | struct spdk_bit_array *free_io_qids; | |
763 | TAILQ_HEAD(, spdk_nvme_qpair) active_io_qpairs; | |
764 | ||
765 | struct spdk_nvme_ctrlr_opts opts; | |
766 | ||
767 | uint64_t quirks; | |
768 | ||
769 | /* Extra sleep time during controller initialization */ | |
770 | uint64_t sleep_timeout_tsc; | |
771 | ||
772 | /** Track all the processes manage this controller */ | |
773 | TAILQ_HEAD(, spdk_nvme_ctrlr_process) active_procs; | |
774 | ||
7c673cae FG |
775 | |
776 | STAILQ_HEAD(, nvme_request) queued_aborts; | |
777 | uint32_t outstanding_aborts; | |
f67539c2 TL |
778 | |
779 | /* CB to notify the user when the ctrlr is removed/failed. */ | |
780 | spdk_nvme_remove_cb remove_cb; | |
781 | void *cb_ctx; | |
782 | ||
783 | struct spdk_nvme_qpair *external_io_msgs_qpair; | |
784 | pthread_mutex_t external_io_msgs_lock; | |
785 | struct spdk_ring *external_io_msgs; | |
786 | ||
787 | STAILQ_HEAD(, nvme_io_msg_producer) io_producers; | |
7c673cae FG |
788 | }; |
789 | ||
9f95a23c TL |
790 | struct spdk_nvme_probe_ctx { |
791 | struct spdk_nvme_transport_id trid; | |
792 | void *cb_ctx; | |
793 | spdk_nvme_probe_cb probe_cb; | |
794 | spdk_nvme_attach_cb attach_cb; | |
795 | spdk_nvme_remove_cb remove_cb; | |
796 | TAILQ_HEAD(, spdk_nvme_ctrlr) init_ctrlrs; | |
797 | }; | |
798 | ||
7c673cae FG |
799 | struct nvme_driver { |
800 | pthread_mutex_t lock; | |
11fdf7f2 TL |
801 | |
802 | /** Multi-process shared attached controller list */ | |
803 | TAILQ_HEAD(, spdk_nvme_ctrlr) shared_attached_ctrlrs; | |
804 | ||
7c673cae | 805 | bool initialized; |
11fdf7f2 | 806 | struct spdk_uuid default_extended_host_id; |
f67539c2 TL |
807 | |
808 | /** netlink socket fd for hotplug messages */ | |
809 | int hotplug_fd; | |
7c673cae FG |
810 | }; |
811 | ||
812 | extern struct nvme_driver *g_spdk_nvme_driver; | |
813 | ||
11fdf7f2 TL |
814 | int nvme_driver_init(void); |
815 | ||
7c673cae FG |
816 | #define nvme_delay usleep |
817 | ||
818 | static inline bool | |
819 | nvme_qpair_is_admin_queue(struct spdk_nvme_qpair *qpair) | |
820 | { | |
821 | return qpair->id == 0; | |
822 | } | |
823 | ||
824 | static inline bool | |
825 | nvme_qpair_is_io_queue(struct spdk_nvme_qpair *qpair) | |
826 | { | |
827 | return qpair->id != 0; | |
828 | } | |
829 | ||
830 | static inline int | |
831 | nvme_robust_mutex_lock(pthread_mutex_t *mtx) | |
832 | { | |
833 | int rc = pthread_mutex_lock(mtx); | |
834 | ||
835 | #ifndef __FreeBSD__ | |
836 | if (rc == EOWNERDEAD) { | |
837 | rc = pthread_mutex_consistent(mtx); | |
838 | } | |
839 | #endif | |
840 | ||
841 | return rc; | |
842 | } | |
843 | ||
844 | static inline int | |
845 | nvme_robust_mutex_unlock(pthread_mutex_t *mtx) | |
846 | { | |
847 | return pthread_mutex_unlock(mtx); | |
848 | } | |
849 | ||
f67539c2 TL |
850 | /* Poll group management functions. */ |
851 | int nvme_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair); | |
852 | int nvme_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair); | |
853 | ||
7c673cae | 854 | /* Admin functions */ |
11fdf7f2 TL |
855 | int nvme_ctrlr_cmd_identify(struct spdk_nvme_ctrlr *ctrlr, |
856 | uint8_t cns, uint16_t cntid, uint32_t nsid, | |
857 | void *payload, size_t payload_size, | |
858 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
7c673cae FG |
859 | int nvme_ctrlr_cmd_set_num_queues(struct spdk_nvme_ctrlr *ctrlr, |
860 | uint32_t num_queues, spdk_nvme_cmd_cb cb_fn, | |
861 | void *cb_arg); | |
11fdf7f2 TL |
862 | int nvme_ctrlr_cmd_get_num_queues(struct spdk_nvme_ctrlr *ctrlr, |
863 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
7c673cae | 864 | int nvme_ctrlr_cmd_set_async_event_config(struct spdk_nvme_ctrlr *ctrlr, |
11fdf7f2 | 865 | union spdk_nvme_feat_async_event_configuration config, |
7c673cae | 866 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); |
11fdf7f2 TL |
867 | int nvme_ctrlr_cmd_set_host_id(struct spdk_nvme_ctrlr *ctrlr, void *host_id, uint32_t host_id_size, |
868 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
7c673cae FG |
869 | int nvme_ctrlr_cmd_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, |
870 | struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
871 | int nvme_ctrlr_cmd_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, | |
872 | struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
873 | int nvme_ctrlr_cmd_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload, | |
874 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
11fdf7f2 TL |
875 | int nvme_ctrlr_cmd_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr, |
876 | uint64_t prp1, uint64_t prp2, | |
877 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
7c673cae FG |
878 | int nvme_ctrlr_cmd_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, spdk_nvme_cmd_cb cb_fn, |
879 | void *cb_arg); | |
880 | int nvme_ctrlr_cmd_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, | |
881 | struct spdk_nvme_format *format, spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
882 | int nvme_ctrlr_cmd_fw_commit(struct spdk_nvme_ctrlr *ctrlr, | |
883 | const struct spdk_nvme_fw_commit *fw_commit, | |
884 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
885 | int nvme_ctrlr_cmd_fw_image_download(struct spdk_nvme_ctrlr *ctrlr, | |
886 | uint32_t size, uint32_t offset, void *payload, | |
887 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
9f95a23c TL |
888 | int nvme_ctrlr_cmd_sanitize(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, |
889 | struct spdk_nvme_sanitize *sanitize, uint32_t cdw11, | |
890 | spdk_nvme_cmd_cb cb_fn, void *cb_arg); | |
7c673cae | 891 | void nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl); |
f67539c2 TL |
892 | int nvme_wait_for_completion(struct spdk_nvme_qpair *qpair, |
893 | struct nvme_completion_poll_status *status); | |
894 | int nvme_wait_for_completion_robust_lock(struct spdk_nvme_qpair *qpair, | |
11fdf7f2 TL |
895 | struct nvme_completion_poll_status *status, |
896 | pthread_mutex_t *robust_mutex); | |
f67539c2 | 897 | int nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair, |
9f95a23c TL |
898 | struct nvme_completion_poll_status *status, |
899 | uint64_t timeout_in_secs); | |
11fdf7f2 | 900 | |
f67539c2 | 901 | struct spdk_nvme_ctrlr_process *nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, |
11fdf7f2 | 902 | pid_t pid); |
f67539c2 | 903 | struct spdk_nvme_ctrlr_process *nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr); |
7c673cae FG |
904 | int nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle); |
905 | void nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr); | |
11fdf7f2 | 906 | struct spdk_pci_device *nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr); |
7c673cae | 907 | |
9f95a23c TL |
908 | int nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid, |
909 | struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle); | |
7c673cae FG |
910 | |
911 | int nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr); | |
11fdf7f2 | 912 | void nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr); |
7c673cae FG |
913 | void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); |
914 | void nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove); | |
f67539c2 | 915 | int nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr); |
7c673cae | 916 | int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr); |
9f95a23c TL |
917 | void nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx, |
918 | struct spdk_nvme_ctrlr *ctrlr); | |
7c673cae FG |
919 | |
920 | int nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr, | |
921 | struct nvme_request *req); | |
922 | int nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap); | |
11fdf7f2 | 923 | int nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs); |
9f95a23c | 924 | int nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz); |
11fdf7f2 TL |
925 | void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap, |
926 | const union spdk_nvme_vs_register *vs); | |
f67539c2 TL |
927 | void nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair); |
928 | int nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id, | |
929 | struct spdk_nvme_ctrlr *ctrlr, | |
930 | enum spdk_nvme_qprio qprio, | |
931 | uint32_t num_requests); | |
11fdf7f2 | 932 | void nvme_qpair_deinit(struct spdk_nvme_qpair *qpair); |
9f95a23c | 933 | void nvme_qpair_complete_error_reqs(struct spdk_nvme_qpair *qpair); |
7c673cae FG |
934 | int nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair, |
935 | struct nvme_request *req); | |
f67539c2 TL |
936 | void nvme_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); |
937 | uint32_t nvme_qpair_abort_queued_reqs(struct spdk_nvme_qpair *qpair, void *cmd_cb_arg); | |
938 | void nvme_qpair_resubmit_requests(struct spdk_nvme_qpair *qpair, uint32_t num_requests); | |
7c673cae | 939 | |
11fdf7f2 TL |
940 | int nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr); |
941 | void nvme_ns_set_identify_data(struct spdk_nvme_ns *ns); | |
942 | int nvme_ns_construct(struct spdk_nvme_ns *ns, uint32_t id, | |
7c673cae FG |
943 | struct spdk_nvme_ctrlr *ctrlr); |
944 | void nvme_ns_destruct(struct spdk_nvme_ns *ns); | |
f67539c2 | 945 | int nvme_ns_update(struct spdk_nvme_ns *ns); |
7c673cae | 946 | |
11fdf7f2 TL |
947 | int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); |
948 | int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); | |
949 | int nvme_fabric_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value); | |
f67539c2 | 950 | int nvme_fabric_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, bool direct_connect); |
11fdf7f2 | 951 | int nvme_fabric_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value); |
9f95a23c TL |
952 | int nvme_fabric_ctrlr_discover(struct spdk_nvme_ctrlr *ctrlr, |
953 | struct spdk_nvme_probe_ctx *probe_ctx); | |
11fdf7f2 TL |
954 | int nvme_fabric_qpair_connect(struct spdk_nvme_qpair *qpair, uint32_t num_entries); |
955 | ||
956 | static inline struct nvme_request * | |
957 | nvme_allocate_request(struct spdk_nvme_qpair *qpair, | |
f67539c2 | 958 | const struct nvme_payload *payload, uint32_t payload_size, uint32_t md_size, |
11fdf7f2 TL |
959 | spdk_nvme_cmd_cb cb_fn, void *cb_arg) |
960 | { | |
961 | struct nvme_request *req; | |
962 | ||
963 | req = STAILQ_FIRST(&qpair->free_req); | |
964 | if (req == NULL) { | |
965 | return req; | |
966 | } | |
967 | ||
968 | STAILQ_REMOVE_HEAD(&qpair->free_req, stailq); | |
969 | ||
970 | /* | |
971 | * Only memset/zero fields that need it. All other fields | |
972 | * will be initialized appropriately either later in this | |
973 | * function, or before they are needed later in the | |
974 | * submission patch. For example, the children | |
975 | * TAILQ_ENTRY and following members are | |
976 | * only used as part of I/O splitting so we avoid | |
977 | * memsetting them until it is actually needed. | |
978 | * They will be initialized in nvme_request_add_child() | |
979 | * if the request is split. | |
980 | */ | |
981 | memset(req, 0, offsetof(struct nvme_request, payload_size)); | |
982 | ||
983 | req->cb_fn = cb_fn; | |
984 | req->cb_arg = cb_arg; | |
985 | req->payload = *payload; | |
986 | req->payload_size = payload_size; | |
f67539c2 | 987 | req->md_size = md_size; |
11fdf7f2 | 988 | req->pid = g_spdk_nvme_pid; |
9f95a23c | 989 | req->submit_tick = 0; |
11fdf7f2 TL |
990 | |
991 | return req; | |
992 | } | |
993 | ||
994 | static inline struct nvme_request * | |
995 | nvme_allocate_request_contig(struct spdk_nvme_qpair *qpair, | |
996 | void *buffer, uint32_t payload_size, | |
997 | spdk_nvme_cmd_cb cb_fn, void *cb_arg) | |
998 | { | |
999 | struct nvme_payload payload; | |
1000 | ||
1001 | payload = NVME_PAYLOAD_CONTIG(buffer, NULL); | |
1002 | ||
f67539c2 | 1003 | return nvme_allocate_request(qpair, &payload, payload_size, 0, cb_fn, cb_arg); |
11fdf7f2 TL |
1004 | } |
1005 | ||
1006 | static inline struct nvme_request * | |
1007 | nvme_allocate_request_null(struct spdk_nvme_qpair *qpair, spdk_nvme_cmd_cb cb_fn, void *cb_arg) | |
1008 | { | |
1009 | return nvme_allocate_request_contig(qpair, NULL, 0, cb_fn, cb_arg); | |
1010 | } | |
1011 | ||
7c673cae FG |
1012 | struct nvme_request *nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair, |
1013 | void *buffer, uint32_t payload_size, | |
1014 | spdk_nvme_cmd_cb cb_fn, void *cb_arg, bool host_to_controller); | |
11fdf7f2 TL |
1015 | |
1016 | static inline void | |
9f95a23c TL |
1017 | nvme_complete_request(spdk_nvme_cmd_cb cb_fn, void *cb_arg, struct spdk_nvme_qpair *qpair, |
1018 | struct nvme_request *req, struct spdk_nvme_cpl *cpl) | |
11fdf7f2 | 1019 | { |
11fdf7f2 TL |
1020 | struct spdk_nvme_cpl err_cpl; |
1021 | struct nvme_error_cmd *cmd; | |
1022 | ||
1023 | /* error injection at completion path, | |
1024 | * only inject for successful completed commands | |
1025 | */ | |
1026 | if (spdk_unlikely(!TAILQ_EMPTY(&qpair->err_cmd_head) && | |
1027 | !spdk_nvme_cpl_is_error(cpl))) { | |
1028 | TAILQ_FOREACH(cmd, &qpair->err_cmd_head, link) { | |
1029 | ||
1030 | if (cmd->do_not_submit) { | |
1031 | continue; | |
1032 | } | |
1033 | ||
1034 | if ((cmd->opc == req->cmd.opc) && cmd->err_count) { | |
1035 | ||
1036 | err_cpl = *cpl; | |
1037 | err_cpl.status.sct = cmd->status.sct; | |
1038 | err_cpl.status.sc = cmd->status.sc; | |
1039 | ||
1040 | cpl = &err_cpl; | |
1041 | cmd->err_count--; | |
1042 | break; | |
1043 | } | |
1044 | } | |
1045 | } | |
1046 | ||
9f95a23c TL |
1047 | if (cb_fn) { |
1048 | cb_fn(cb_arg, cpl); | |
11fdf7f2 TL |
1049 | } |
1050 | } | |
1051 | ||
1052 | static inline void | |
1053 | nvme_free_request(struct nvme_request *req) | |
1054 | { | |
1055 | assert(req != NULL); | |
1056 | assert(req->num_children == 0); | |
1057 | assert(req->qpair != NULL); | |
1058 | ||
1059 | STAILQ_INSERT_HEAD(&req->qpair->free_req, req, stailq); | |
1060 | } | |
1061 | ||
f67539c2 TL |
1062 | static inline void |
1063 | nvme_qpair_set_state(struct spdk_nvme_qpair *qpair, enum nvme_qpair_state state) | |
1064 | { | |
1065 | qpair->state = state; | |
1066 | } | |
1067 | ||
1068 | static inline enum nvme_qpair_state | |
1069 | nvme_qpair_get_state(struct spdk_nvme_qpair *qpair) { | |
1070 | return qpair->state; | |
1071 | } | |
1072 | ||
9f95a23c TL |
1073 | static inline void |
1074 | nvme_qpair_free_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req) | |
1075 | { | |
1076 | assert(req != NULL); | |
1077 | assert(req->num_children == 0); | |
1078 | ||
1079 | STAILQ_INSERT_HEAD(&qpair->free_req, req, stailq); | |
1080 | } | |
1081 | ||
f67539c2 TL |
1082 | static inline void |
1083 | nvme_request_remove_child(struct nvme_request *parent, struct nvme_request *child) | |
1084 | { | |
1085 | assert(parent != NULL); | |
1086 | assert(child != NULL); | |
1087 | assert(child->parent == parent); | |
1088 | assert(parent->num_children != 0); | |
1089 | ||
1090 | parent->num_children--; | |
1091 | child->parent = NULL; | |
1092 | TAILQ_REMOVE(&parent->children, child, child_tailq); | |
1093 | } | |
1094 | ||
1095 | static inline void | |
1096 | nvme_cb_complete_child(void *child_arg, const struct spdk_nvme_cpl *cpl) | |
1097 | { | |
1098 | struct nvme_request *child = child_arg; | |
1099 | struct nvme_request *parent = child->parent; | |
1100 | ||
1101 | nvme_request_remove_child(parent, child); | |
1102 | ||
1103 | if (spdk_nvme_cpl_is_error(cpl)) { | |
1104 | memcpy(&parent->parent_status, cpl, sizeof(*cpl)); | |
1105 | } | |
1106 | ||
1107 | if (parent->num_children == 0) { | |
1108 | nvme_complete_request(parent->cb_fn, parent->cb_arg, parent->qpair, | |
1109 | parent, &parent->parent_status); | |
1110 | nvme_free_request(parent); | |
1111 | } | |
1112 | } | |
1113 | ||
1114 | static inline void | |
1115 | nvme_request_add_child(struct nvme_request *parent, struct nvme_request *child) | |
1116 | { | |
1117 | assert(parent->num_children != UINT16_MAX); | |
1118 | ||
1119 | if (parent->num_children == 0) { | |
1120 | /* | |
1121 | * Defer initialization of the children TAILQ since it falls | |
1122 | * on a separate cacheline. This ensures we do not touch this | |
1123 | * cacheline except on request splitting cases, which are | |
1124 | * relatively rare. | |
1125 | */ | |
1126 | TAILQ_INIT(&parent->children); | |
1127 | parent->parent = NULL; | |
1128 | memset(&parent->parent_status, 0, sizeof(struct spdk_nvme_cpl)); | |
1129 | } | |
1130 | ||
1131 | parent->num_children++; | |
1132 | TAILQ_INSERT_TAIL(&parent->children, child, child_tailq); | |
1133 | child->parent = parent; | |
1134 | child->cb_fn = nvme_cb_complete_child; | |
1135 | child->cb_arg = child; | |
1136 | } | |
1137 | ||
1138 | static inline void | |
1139 | nvme_request_free_children(struct nvme_request *req) | |
1140 | { | |
1141 | struct nvme_request *child, *tmp; | |
1142 | ||
1143 | if (req->num_children == 0) { | |
1144 | return; | |
1145 | } | |
1146 | ||
1147 | /* free all child nvme_request */ | |
1148 | TAILQ_FOREACH_SAFE(child, &req->children, child_tailq, tmp) { | |
1149 | nvme_request_remove_child(req, child); | |
1150 | nvme_request_free_children(child); | |
1151 | nvme_free_request(child); | |
1152 | } | |
1153 | } | |
1154 | ||
11fdf7f2 TL |
1155 | int nvme_request_check_timeout(struct nvme_request *req, uint16_t cid, |
1156 | struct spdk_nvme_ctrlr_process *active_proc, uint64_t now_tick); | |
7c673cae FG |
1157 | uint64_t nvme_get_quirks(const struct spdk_pci_id *id); |
1158 | ||
7c673cae FG |
1159 | int nvme_robust_mutex_init_shared(pthread_mutex_t *mtx); |
1160 | int nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx); | |
1161 | ||
1162 | bool nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl); | |
7c673cae | 1163 | |
f67539c2 | 1164 | struct spdk_nvme_ctrlr *nvme_get_ctrlr_by_trid_unsafe( |
11fdf7f2 TL |
1165 | const struct spdk_nvme_transport_id *trid); |
1166 | ||
f67539c2 TL |
1167 | const struct spdk_nvme_transport *nvme_get_transport(const char *transport_name); |
1168 | const struct spdk_nvme_transport *nvme_get_first_transport(void); | |
1169 | const struct spdk_nvme_transport *nvme_get_next_transport(const struct spdk_nvme_transport | |
1170 | *transport); | |
7c673cae | 1171 | |
f67539c2 TL |
1172 | /* Transport specific functions */ |
1173 | struct spdk_nvme_ctrlr *nvme_transport_ctrlr_construct(const struct spdk_nvme_transport_id *trid, | |
1174 | const struct spdk_nvme_ctrlr_opts *opts, | |
1175 | void *devhandle); | |
1176 | int nvme_transport_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); | |
1177 | int nvme_transport_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, bool direct_connect); | |
1178 | int nvme_transport_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr); | |
1179 | int nvme_transport_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); | |
1180 | int nvme_transport_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); | |
1181 | int nvme_transport_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value); | |
1182 | int nvme_transport_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value); | |
1183 | uint32_t nvme_transport_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr); | |
1184 | uint16_t nvme_transport_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr); | |
1185 | struct spdk_nvme_qpair *nvme_transport_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, | |
1186 | uint16_t qid, const struct spdk_nvme_io_qpair_opts *opts); | |
1187 | int nvme_transport_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr); | |
1188 | void *nvme_transport_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size); | |
1189 | int nvme_transport_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr); | |
1190 | int nvme_transport_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, | |
1191 | struct spdk_nvme_qpair *qpair); | |
1192 | int nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, | |
1193 | struct spdk_nvme_qpair *qpair); | |
1194 | void nvme_transport_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, | |
1195 | struct spdk_nvme_qpair *qpair); | |
1196 | void nvme_transport_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); | |
1197 | int nvme_transport_qpair_reset(struct spdk_nvme_qpair *qpair); | |
1198 | int nvme_transport_qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req); | |
1199 | int32_t nvme_transport_qpair_process_completions(struct spdk_nvme_qpair *qpair, | |
1200 | uint32_t max_completions); | |
1201 | void nvme_transport_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair); | |
1202 | int nvme_transport_qpair_iterate_requests(struct spdk_nvme_qpair *qpair, | |
1203 | int (*iter_fn)(struct nvme_request *req, void *arg), | |
1204 | void *arg); | |
1205 | ||
1206 | struct spdk_nvme_transport_poll_group *nvme_transport_poll_group_create( | |
1207 | const struct spdk_nvme_transport *transport); | |
1208 | int nvme_transport_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup, | |
1209 | struct spdk_nvme_qpair *qpair); | |
1210 | int nvme_transport_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup, | |
1211 | struct spdk_nvme_qpair *qpair); | |
1212 | int nvme_transport_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair); | |
1213 | int nvme_transport_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair); | |
1214 | int64_t nvme_transport_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup, | |
1215 | uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb); | |
1216 | int nvme_transport_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup); | |
7c673cae FG |
1217 | /* |
1218 | * Below ref related functions must be called with the global | |
1219 | * driver lock held for the multi-process condition. | |
1220 | * Within these functions, the per ctrlr ctrlr_lock is also | |
1221 | * acquired for the multi-thread condition. | |
1222 | */ | |
1223 | void nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr); | |
1224 | void nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr); | |
1225 | int nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr); | |
1226 | ||
1227 | static inline bool | |
11fdf7f2 | 1228 | _is_page_aligned(uint64_t address, uint64_t page_size) |
7c673cae | 1229 | { |
11fdf7f2 | 1230 | return (address & (page_size - 1)) == 0; |
7c673cae FG |
1231 | } |
1232 | ||
1233 | #endif /* __NVME_INTERNAL_H__ */ |