]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/nvme/nvme_internal.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / nvme / nvme_internal.h
CommitLineData
7c673cae
FG
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#ifndef __NVME_INTERNAL_H__
35#define __NVME_INTERNAL_H__
36
11fdf7f2
TL
37#include "spdk/config.h"
38#include "spdk/likely.h"
39#include "spdk/stdinc.h"
7c673cae 40
11fdf7f2 41#include "spdk/nvme.h"
7c673cae
FG
42
43#if defined(__i386__) || defined(__x86_64__)
44#include <x86intrin.h>
45#endif
46
7c673cae
FG
47#include "spdk/queue.h"
48#include "spdk/barrier.h"
49#include "spdk/bit_array.h"
50#include "spdk/mmio.h"
51#include "spdk/pci_ids.h"
52#include "spdk/util.h"
53#include "spdk/nvme_intel.h"
54#include "spdk/nvmf_spec.h"
11fdf7f2 55#include "spdk/uuid.h"
7c673cae
FG
56
57#include "spdk_internal/assert.h"
58#include "spdk_internal/log.h"
9f95a23c 59#include "spdk_internal/memory.h"
7c673cae 60
11fdf7f2
TL
61extern pid_t g_spdk_nvme_pid;
62
7c673cae
FG
63/*
64 * Some Intel devices support vendor-unique read latency log page even
65 * though the log page directory says otherwise.
66 */
67#define NVME_INTEL_QUIRK_READ_LATENCY 0x1
68
69/*
70 * Some Intel devices support vendor-unique write latency log page even
71 * though the log page directory says otherwise.
72 */
73#define NVME_INTEL_QUIRK_WRITE_LATENCY 0x2
74
75/*
76 * The controller needs a delay before starts checking the device
77 * readiness, which is done by reading the NVME_CSTS_RDY bit.
78 */
79#define NVME_QUIRK_DELAY_BEFORE_CHK_RDY 0x4
80
81/*
82 * The controller performs best when I/O is split on particular
83 * LBA boundaries.
84 */
85#define NVME_INTEL_QUIRK_STRIPING 0x8
86
87/*
11fdf7f2
TL
88 * The controller needs a delay after allocating an I/O queue pair
89 * before it is ready to accept I/O commands.
90 */
91#define NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC 0x10
92
93/*
94 * Earlier NVMe devices do not indicate whether unmapped blocks
95 * will read all zeroes or not. This define indicates that the
96 * device does in fact read all zeroes after an unmap event
97 */
98#define NVME_QUIRK_READ_ZERO_AFTER_DEALLOCATE 0x20
99
100/*
101 * The controller doesn't handle Identify value others than 0 or 1 correctly.
102 */
103#define NVME_QUIRK_IDENTIFY_CNS 0x40
104
105/*
106 * The controller supports Open Channel command set if matching additional
107 * condition, like the first byte (value 0x1) in the vendor specific
108 * bits of the namespace identify structure is set.
7c673cae 109 */
11fdf7f2
TL
110#define NVME_QUIRK_OCSSD 0x80
111
112/*
113 * The controller has an Intel vendor ID but does not support Intel vendor-specific
114 * log pages. This is primarily for QEMU emulated SSDs which report an Intel vendor
115 * ID but do not support these log pages.
116 */
117#define NVME_INTEL_QUIRK_NO_LOG_PAGES 0x100
7c673cae 118
9f95a23c
TL
119/*
120 * The controller does not set SHST_COMPLETE in a reasonable amount of time. This
121 * is primarily seen in virtual VMWare NVMe SSDs. This quirk merely adds an additional
122 * error message that on VMWare NVMe SSDs, the shutdown timeout may be expected.
123 */
124#define NVME_QUIRK_SHST_COMPLETE 0x200
125
7c673cae
FG
126#define NVME_MAX_ASYNC_EVENTS (8)
127
9f95a23c 128#define NVME_MAX_ADMIN_TIMEOUT_IN_SECS (30)
7c673cae
FG
129
130/* Maximum log page size to fetch for AERs. */
131#define NVME_MAX_AER_LOG_SIZE (4096)
132
133/*
134 * NVME_MAX_IO_QUEUES in nvme_spec.h defines the 64K spec-limit, but this
135 * define specifies the maximum number of queues this driver will actually
136 * try to configure, if available.
137 */
138#define DEFAULT_MAX_IO_QUEUES (1024)
139#define DEFAULT_IO_QUEUE_SIZE (256)
140
141#define DEFAULT_ADMIN_QUEUE_REQUESTS (32)
142#define DEFAULT_IO_QUEUE_REQUESTS (512)
143
9f95a23c
TL
144#define MIN_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
145
11fdf7f2
TL
146/* We want to fit submission and completion rings each in a single 2MB
147 * hugepage to ensure physical address contiguity.
148 */
9f95a23c 149#define MAX_IO_QUEUE_ENTRIES (VALUE_2MB / spdk_max( \
11fdf7f2
TL
150 sizeof(struct spdk_nvme_cmd), \
151 sizeof(struct spdk_nvme_cpl)))
7c673cae
FG
152
153enum nvme_payload_type {
154 NVME_PAYLOAD_TYPE_INVALID = 0,
155
156 /** nvme_request::u.payload.contig_buffer is valid for this request */
157 NVME_PAYLOAD_TYPE_CONTIG,
158
159 /** nvme_request::u.sgl is valid for this request */
160 NVME_PAYLOAD_TYPE_SGL,
161};
162
7c673cae
FG
163/**
164 * Descriptor for a request data payload.
7c673cae 165 */
11fdf7f2
TL
166struct nvme_payload {
167 /**
168 * Functions for retrieving physical addresses for scattered payloads.
169 */
170 spdk_nvme_req_reset_sgl_cb reset_sgl_fn;
171 spdk_nvme_req_next_sge_cb next_sge_fn;
172
173 /**
174 * If reset_sgl_fn == NULL, this is a contig payload, and contig_or_cb_arg contains the
175 * virtual memory address of a single virtually contiguous buffer.
176 *
177 * If reset_sgl_fn != NULL, this is a SGL payload, and contig_or_cb_arg contains the
178 * cb_arg that will be passed to the SGL callback functions.
179 */
180 void *contig_or_cb_arg;
181
182 /** Virtual memory address of a single virtually contiguous metadata buffer */
7c673cae 183 void *md;
11fdf7f2 184};
7c673cae 185
11fdf7f2
TL
186#define NVME_PAYLOAD_CONTIG(contig_, md_) \
187 (struct nvme_payload) { \
188 .reset_sgl_fn = NULL, \
189 .next_sge_fn = NULL, \
190 .contig_or_cb_arg = (contig_), \
191 .md = (md_), \
192 }
193
194#define NVME_PAYLOAD_SGL(reset_sgl_fn_, next_sge_fn_, cb_arg_, md_) \
195 (struct nvme_payload) { \
196 .reset_sgl_fn = (reset_sgl_fn_), \
197 .next_sge_fn = (next_sge_fn_), \
198 .contig_or_cb_arg = (cb_arg_), \
199 .md = (md_), \
200 }
201
202static inline enum nvme_payload_type
203nvme_payload_type(const struct nvme_payload *payload) {
204 return payload->reset_sgl_fn ? NVME_PAYLOAD_TYPE_SGL : NVME_PAYLOAD_TYPE_CONTIG;
205}
206
207struct nvme_error_cmd {
208 bool do_not_submit;
209 uint64_t timeout_tsc;
210 uint32_t err_count;
211 uint8_t opc;
212 struct spdk_nvme_status status;
213 TAILQ_ENTRY(nvme_error_cmd) link;
7c673cae
FG
214};
215
216struct nvme_request {
217 struct spdk_nvme_cmd cmd;
218
7c673cae
FG
219 uint8_t retries;
220
11fdf7f2
TL
221 bool timed_out;
222
7c673cae
FG
223 /**
224 * Number of children requests still outstanding for this
225 * request which was split into multiple child requests.
226 */
11fdf7f2 227 uint16_t num_children;
7c673cae
FG
228
229 /**
230 * Offset in bytes from the beginning of payload for this request.
231 * This is used for I/O commands that are split into multiple requests.
232 */
233 uint32_t payload_offset;
234 uint32_t md_offset;
235
11fdf7f2
TL
236 uint32_t payload_size;
237
238 /**
239 * Timeout ticks for error injection requests, can be extended in future
240 * to support per-request timeout feature.
241 */
242 uint64_t timeout_tsc;
243
244 /**
245 * Data payload for this request's command.
246 */
247 struct nvme_payload payload;
248
7c673cae
FG
249 spdk_nvme_cmd_cb cb_fn;
250 void *cb_arg;
251 STAILQ_ENTRY(nvme_request) stailq;
252
253 struct spdk_nvme_qpair *qpair;
254
11fdf7f2
TL
255 /*
256 * The value of spdk_get_ticks() when the request was submitted to the hardware.
257 * Only set if ctrlr->timeout_enabled is true.
258 */
259 uint64_t submit_tick;
260
7c673cae
FG
261 /**
262 * The active admin request can be moved to a per process pending
263 * list based on the saved pid to tell which process it belongs
264 * to. The cpl saves the original completion information which
265 * is used in the completion callback.
266 * NOTE: these below two fields are only used for admin request.
267 */
268 pid_t pid;
269 struct spdk_nvme_cpl cpl;
270
271 /**
272 * The following members should not be reordered with members
273 * above. These members are only needed when splitting
274 * requests which is done rarely, and the driver is careful
275 * to not touch the following fields until a split operation is
276 * needed, to avoid touching an extra cacheline.
277 */
278
279 /**
280 * Points to the outstanding child requests for a parent request.
281 * Only valid if a request was split into multiple children
282 * requests, and is not initialized for non-split requests.
283 */
284 TAILQ_HEAD(, nvme_request) children;
285
286 /**
287 * Linked-list pointers for a child request in its parent's list.
288 */
289 TAILQ_ENTRY(nvme_request) child_tailq;
290
291 /**
292 * Points to a parent request if part of a split request,
293 * NULL otherwise.
294 */
295 struct nvme_request *parent;
296
297 /**
298 * Completion status for a parent request. Initialized to all 0's
299 * (SUCCESS) before child requests are submitted. If a child
300 * request completes with error, the error status is copied here,
301 * to ensure that the parent request is also completed with error
302 * status once all child requests are completed.
303 */
304 struct spdk_nvme_cpl parent_status;
305
306 /**
307 * The user_cb_fn and user_cb_arg fields are used for holding the original
308 * callback data when using nvme_allocate_request_user_copy.
309 */
310 spdk_nvme_cmd_cb user_cb_fn;
311 void *user_cb_arg;
312 void *user_buffer;
313};
314
315struct nvme_completion_poll_status {
316 struct spdk_nvme_cpl cpl;
317 bool done;
318};
319
320struct nvme_async_event_request {
321 struct spdk_nvme_ctrlr *ctrlr;
322 struct nvme_request *req;
323 struct spdk_nvme_cpl cpl;
324};
325
326struct spdk_nvme_qpair {
9f95a23c 327 struct spdk_nvme_ctrlr *ctrlr;
7c673cae
FG
328
329 uint16_t id;
330
331 uint8_t qprio;
332
9f95a23c
TL
333 uint8_t is_enabled : 1;
334 uint8_t is_connecting: 1;
335
7c673cae
FG
336 /*
337 * Members for handling IO qpair deletion inside of a completion context.
338 * These are specifically defined as single bits, so that they do not
339 * push this data structure out to another cacheline.
340 */
341 uint8_t in_completion_context : 1;
342 uint8_t delete_after_completion_context: 1;
343
11fdf7f2
TL
344 /*
345 * Set when no deletion notification is needed. For example, the process
346 * which allocated this qpair exited unexpectedly.
347 */
348 uint8_t no_deletion_notification_needed: 1;
349
9f95a23c
TL
350 enum spdk_nvme_transport_type trtype;
351
352 STAILQ_HEAD(, nvme_request) free_req;
353 STAILQ_HEAD(, nvme_request) queued_req;
354
355 /** Commands opcode in this list will return error */
356 TAILQ_HEAD(, nvme_error_cmd) err_cmd_head;
357 /** Requests in this list will return error */
358 STAILQ_HEAD(, nvme_request) err_req_head;
7c673cae
FG
359
360 /* List entry for spdk_nvme_ctrlr::active_io_qpairs */
361 TAILQ_ENTRY(spdk_nvme_qpair) tailq;
362
363 /* List entry for spdk_nvme_ctrlr_process::allocated_io_qpairs */
364 TAILQ_ENTRY(spdk_nvme_qpair) per_process_tailq;
365
11fdf7f2
TL
366 struct spdk_nvme_ctrlr_process *active_proc;
367
7c673cae
FG
368 void *req_buf;
369};
370
371struct spdk_nvme_ns {
372 struct spdk_nvme_ctrlr *ctrlr;
7c673cae
FG
373 uint32_t sector_size;
374
375 /*
376 * Size of data transferred as part of each block,
377 * including metadata if FLBAS indicates the metadata is transferred
378 * as part of the data buffer at the end of each LBA.
379 */
380 uint32_t extended_lba_size;
381
382 uint32_t md_size;
383 uint32_t pi_type;
384 uint32_t sectors_per_max_io;
385 uint32_t sectors_per_stripe;
11fdf7f2 386 uint32_t id;
7c673cae 387 uint16_t flags;
11fdf7f2
TL
388
389 /* Namespace Identification Descriptor List (CNS = 03h) */
390 uint8_t id_desc_list[4096];
7c673cae
FG
391};
392
393/**
394 * State of struct spdk_nvme_ctrlr (in particular, during initialization).
395 */
396enum nvme_ctrlr_state {
11fdf7f2
TL
397 /**
398 * Wait before initializing the controller.
399 */
400 NVME_CTRLR_STATE_INIT_DELAY,
401
7c673cae
FG
402 /**
403 * Controller has not been initialized yet.
404 */
405 NVME_CTRLR_STATE_INIT,
406
407 /**
408 * Waiting for CSTS.RDY to transition from 0 to 1 so that CC.EN may be set to 0.
409 */
410 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
411
412 /**
413 * Waiting for CSTS.RDY to transition from 1 to 0 so that CC.EN may be set to 1.
414 */
415 NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
416
417 /**
418 * Enable the controller by writing CC.EN to 1
419 */
420 NVME_CTRLR_STATE_ENABLE,
421
422 /**
423 * Waiting for CSTS.RDY to transition from 0 to 1 after enabling the controller.
424 */
425 NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
426
11fdf7f2
TL
427 /**
428 * Enable the Admin queue of the controller.
429 */
430 NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE,
431
432 /**
433 * Identify Controller command will be sent to then controller.
434 */
435 NVME_CTRLR_STATE_IDENTIFY,
436
437 /**
438 * Waiting for Identify Controller command be completed.
439 */
440 NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY,
441
442 /**
443 * Set Number of Queues of the controller.
444 */
445 NVME_CTRLR_STATE_SET_NUM_QUEUES,
446
447 /**
448 * Waiting for Set Num of Queues command to be completed.
449 */
450 NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES,
451
452 /**
453 * Get Number of Queues of the controller.
454 */
455 NVME_CTRLR_STATE_GET_NUM_QUEUES,
456
457 /**
458 * Waiting for Get Num of Queues command to be completed.
459 */
460 NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES,
461
462 /**
463 * Construct Namespace data structures of the controller.
464 */
465 NVME_CTRLR_STATE_CONSTRUCT_NS,
466
467 /**
468 * Get active Namespace list of the controller.
469 */
470 NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS,
471
472 /**
473 * Get Identify Namespace Data structure for each NS.
474 */
475 NVME_CTRLR_STATE_IDENTIFY_NS,
476
477 /**
478 * Waiting for the Identify Namespace commands to be completed.
479 */
480 NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS,
481
482 /**
483 * Get Identify Namespace Identification Descriptors.
484 */
485 NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
486
487 /**
488 * Waiting for the Identify Namespace Identification
489 * Descriptors to be completed.
490 */
491 NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS,
492
493 /**
494 * Configure AER of the controller.
495 */
496 NVME_CTRLR_STATE_CONFIGURE_AER,
497
498 /**
499 * Waiting for the Configure AER to be completed.
500 */
501 NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER,
502
503 /**
504 * Set supported log pages of the controller.
505 */
506 NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
507
508 /**
509 * Set supported features of the controller.
510 */
511 NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
512
513 /**
514 * Set Doorbell Buffer Config of the controller.
515 */
516 NVME_CTRLR_STATE_SET_DB_BUF_CFG,
517
518 /**
519 * Waiting for Doorbell Buffer Config to be completed.
520 */
521 NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG,
522
523 /**
524 * Set Keep Alive Timeout of the controller.
525 */
526 NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
527
528 /**
529 * Waiting for Set Keep Alive Timeout to be completed.
530 */
531 NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT,
532
533 /**
534 * Set Host ID of the controller.
535 */
536 NVME_CTRLR_STATE_SET_HOST_ID,
537
538 /**
539 * Waiting for Set Host ID to be completed.
540 */
541 NVME_CTRLR_STATE_WAIT_FOR_HOST_ID,
542
7c673cae
FG
543 /**
544 * Controller initialization has completed and the controller is ready.
545 */
11fdf7f2
TL
546 NVME_CTRLR_STATE_READY,
547
548 /**
549 * Controller inilialization has an error.
550 */
551 NVME_CTRLR_STATE_ERROR
7c673cae
FG
552};
553
554#define NVME_TIMEOUT_INFINITE UINT64_MAX
555
556/*
557 * Used to track properties for all processes accessing the controller.
558 */
559struct spdk_nvme_ctrlr_process {
560 /** Whether it is the primary process */
561 bool is_primary;
562
563 /** Process ID */
564 pid_t pid;
565
566 /** Active admin requests to be completed */
567 STAILQ_HEAD(, nvme_request) active_reqs;
568
569 TAILQ_ENTRY(spdk_nvme_ctrlr_process) tailq;
570
571 /** Per process PCI device handle */
572 struct spdk_pci_device *devhandle;
573
574 /** Reference to track the number of attachment to this controller. */
575 int ref;
576
577 /** Allocated IO qpairs */
578 TAILQ_HEAD(, spdk_nvme_qpair) allocated_io_qpairs;
11fdf7f2
TL
579
580 spdk_nvme_aer_cb aer_cb_fn;
581 void *aer_cb_arg;
582
583 /**
584 * A function pointer to timeout callback function
585 */
586 spdk_nvme_timeout_cb timeout_cb_fn;
587 void *timeout_cb_arg;
588 uint64_t timeout_ticks;
7c673cae
FG
589};
590
591/*
592 * One of these per allocated PCI device.
593 */
594struct spdk_nvme_ctrlr {
595 /* Hot data (accessed in I/O path) starts here. */
596
597 /** Array of namespaces indexed by nsid - 1 */
598 struct spdk_nvme_ns *ns;
599
600 struct spdk_nvme_transport_id trid;
601
602 uint32_t num_ns;
603
604 bool is_removed;
605
606 bool is_resetting;
607
608 bool is_failed;
609
9f95a23c
TL
610 bool is_shutdown;
611
11fdf7f2
TL
612 bool timeout_enabled;
613
614 uint16_t max_sges;
615
616 uint16_t cntlid;
617
7c673cae
FG
618 /** Controller support flags */
619 uint64_t flags;
620
621 /* Cold data (not accessed in normal I/O path) is after this point. */
622
623 union spdk_nvme_cap_register cap;
11fdf7f2 624 union spdk_nvme_vs_register vs;
7c673cae
FG
625
626 enum nvme_ctrlr_state state;
627 uint64_t state_timeout_tsc;
628
629 uint64_t next_keep_alive_tick;
630 uint64_t keep_alive_interval_ticks;
631
632 TAILQ_ENTRY(spdk_nvme_ctrlr) tailq;
633
634 /** All the log pages supported */
635 bool log_page_supported[256];
636
637 /** All the features supported */
638 bool feature_supported[256];
639
640 /** maximum i/o size in bytes */
641 uint32_t max_xfer_size;
642
643 /** minimum page size supported by this controller in bytes */
644 uint32_t min_page_size;
645
11fdf7f2
TL
646 /** selected memory page size for this controller in bytes */
647 uint32_t page_size;
648
7c673cae
FG
649 uint32_t num_aers;
650 struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS];
7c673cae
FG
651
652 /** guards access to the controller itself, including admin queues */
653 pthread_mutex_t ctrlr_lock;
654
655
656 struct spdk_nvme_qpair *adminq;
657
11fdf7f2
TL
658 /** shadow doorbell buffer */
659 uint32_t *shadow_doorbell;
660 /** eventidx buffer */
661 uint32_t *eventidx;
662
7c673cae
FG
663 /**
664 * Identify Controller data.
665 */
666 struct spdk_nvme_ctrlr_data cdata;
667
11fdf7f2
TL
668 /**
669 * Keep track of active namespaces
670 */
671 uint32_t *active_ns_list;
672
7c673cae
FG
673 /**
674 * Array of Identify Namespace data.
675 *
676 * Stored separately from ns since nsdata should not normally be accessed during I/O.
677 */
678 struct spdk_nvme_ns_data *nsdata;
679
680 struct spdk_bit_array *free_io_qids;
681 TAILQ_HEAD(, spdk_nvme_qpair) active_io_qpairs;
682
683 struct spdk_nvme_ctrlr_opts opts;
684
685 uint64_t quirks;
686
687 /* Extra sleep time during controller initialization */
688 uint64_t sleep_timeout_tsc;
689
690 /** Track all the processes manage this controller */
691 TAILQ_HEAD(, spdk_nvme_ctrlr_process) active_procs;
692
7c673cae
FG
693
694 STAILQ_HEAD(, nvme_request) queued_aborts;
695 uint32_t outstanding_aborts;
696};
697
9f95a23c
TL
698struct spdk_nvme_probe_ctx {
699 struct spdk_nvme_transport_id trid;
700 void *cb_ctx;
701 spdk_nvme_probe_cb probe_cb;
702 spdk_nvme_attach_cb attach_cb;
703 spdk_nvme_remove_cb remove_cb;
704 TAILQ_HEAD(, spdk_nvme_ctrlr) init_ctrlrs;
705};
706
7c673cae
FG
707struct nvme_driver {
708 pthread_mutex_t lock;
11fdf7f2
TL
709
710 /** Multi-process shared attached controller list */
711 TAILQ_HEAD(, spdk_nvme_ctrlr) shared_attached_ctrlrs;
712
7c673cae 713 bool initialized;
11fdf7f2 714 struct spdk_uuid default_extended_host_id;
7c673cae
FG
715};
716
717extern struct nvme_driver *g_spdk_nvme_driver;
718
11fdf7f2
TL
719int nvme_driver_init(void);
720
7c673cae
FG
721#define nvme_delay usleep
722
723static inline bool
724nvme_qpair_is_admin_queue(struct spdk_nvme_qpair *qpair)
725{
726 return qpair->id == 0;
727}
728
729static inline bool
730nvme_qpair_is_io_queue(struct spdk_nvme_qpair *qpair)
731{
732 return qpair->id != 0;
733}
734
735static inline int
736nvme_robust_mutex_lock(pthread_mutex_t *mtx)
737{
738 int rc = pthread_mutex_lock(mtx);
739
740#ifndef __FreeBSD__
741 if (rc == EOWNERDEAD) {
742 rc = pthread_mutex_consistent(mtx);
743 }
744#endif
745
746 return rc;
747}
748
749static inline int
750nvme_robust_mutex_unlock(pthread_mutex_t *mtx)
751{
752 return pthread_mutex_unlock(mtx);
753}
754
755/* Admin functions */
11fdf7f2
TL
756int nvme_ctrlr_cmd_identify(struct spdk_nvme_ctrlr *ctrlr,
757 uint8_t cns, uint16_t cntid, uint32_t nsid,
758 void *payload, size_t payload_size,
759 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
7c673cae
FG
760int nvme_ctrlr_cmd_set_num_queues(struct spdk_nvme_ctrlr *ctrlr,
761 uint32_t num_queues, spdk_nvme_cmd_cb cb_fn,
762 void *cb_arg);
11fdf7f2
TL
763int nvme_ctrlr_cmd_get_num_queues(struct spdk_nvme_ctrlr *ctrlr,
764 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
7c673cae 765int nvme_ctrlr_cmd_set_async_event_config(struct spdk_nvme_ctrlr *ctrlr,
11fdf7f2 766 union spdk_nvme_feat_async_event_configuration config,
7c673cae 767 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
11fdf7f2
TL
768int nvme_ctrlr_cmd_set_host_id(struct spdk_nvme_ctrlr *ctrlr, void *host_id, uint32_t host_id_size,
769 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
7c673cae
FG
770int nvme_ctrlr_cmd_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
771 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
772int nvme_ctrlr_cmd_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
773 struct spdk_nvme_ctrlr_list *payload, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
774int nvme_ctrlr_cmd_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload,
775 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
11fdf7f2
TL
776int nvme_ctrlr_cmd_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr,
777 uint64_t prp1, uint64_t prp2,
778 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
7c673cae
FG
779int nvme_ctrlr_cmd_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, spdk_nvme_cmd_cb cb_fn,
780 void *cb_arg);
781int nvme_ctrlr_cmd_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
782 struct spdk_nvme_format *format, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
783int nvme_ctrlr_cmd_fw_commit(struct spdk_nvme_ctrlr *ctrlr,
784 const struct spdk_nvme_fw_commit *fw_commit,
785 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
786int nvme_ctrlr_cmd_fw_image_download(struct spdk_nvme_ctrlr *ctrlr,
787 uint32_t size, uint32_t offset, void *payload,
788 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
9f95a23c
TL
789int nvme_ctrlr_cmd_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp, uint16_t spsp,
790 uint8_t nssf, void *payload, uint32_t payload_size,
791 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
792int nvme_ctrlr_cmd_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
793 uint16_t spsp, uint8_t nssf, void *payload,
794 uint32_t payload_size, spdk_nvme_cmd_cb cb_fn, void *cb_arg);
795int nvme_ctrlr_cmd_sanitize(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
796 struct spdk_nvme_sanitize *sanitize, uint32_t cdw11,
797 spdk_nvme_cmd_cb cb_fn, void *cb_arg);
7c673cae 798void nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl);
11fdf7f2
TL
799int spdk_nvme_wait_for_completion(struct spdk_nvme_qpair *qpair,
800 struct nvme_completion_poll_status *status);
801int spdk_nvme_wait_for_completion_robust_lock(struct spdk_nvme_qpair *qpair,
802 struct nvme_completion_poll_status *status,
803 pthread_mutex_t *robust_mutex);
9f95a23c
TL
804int spdk_nvme_wait_for_completion_timeout(struct spdk_nvme_qpair *qpair,
805 struct nvme_completion_poll_status *status,
806 uint64_t timeout_in_secs);
11fdf7f2
TL
807
808struct spdk_nvme_ctrlr_process *spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr,
809 pid_t pid);
810struct spdk_nvme_ctrlr_process *spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr);
7c673cae
FG
811int nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle);
812void nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr);
11fdf7f2 813struct spdk_pci_device *nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr);
7c673cae 814
9f95a23c
TL
815int nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
816 struct spdk_nvme_probe_ctx *probe_ctx, void *devhandle);
7c673cae
FG
817
818int nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr);
11fdf7f2 819void nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr);
7c673cae
FG
820void nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr);
821void nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove);
822int nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr);
9f95a23c
TL
823void nvme_ctrlr_connected(struct spdk_nvme_probe_ctx *probe_ctx,
824 struct spdk_nvme_ctrlr *ctrlr);
7c673cae
FG
825
826int nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
827 struct nvme_request *req);
828int nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap);
11fdf7f2 829int nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs);
9f95a23c 830int nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz);
11fdf7f2
TL
831void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap,
832 const union spdk_nvme_vs_register *vs);
7c673cae
FG
833int nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id,
834 struct spdk_nvme_ctrlr *ctrlr,
835 enum spdk_nvme_qprio qprio,
836 uint32_t num_requests);
11fdf7f2 837void nvme_qpair_deinit(struct spdk_nvme_qpair *qpair);
7c673cae
FG
838void nvme_qpair_enable(struct spdk_nvme_qpair *qpair);
839void nvme_qpair_disable(struct spdk_nvme_qpair *qpair);
9f95a23c 840void nvme_qpair_complete_error_reqs(struct spdk_nvme_qpair *qpair);
7c673cae
FG
841int nvme_qpair_submit_request(struct spdk_nvme_qpair *qpair,
842 struct nvme_request *req);
843
11fdf7f2
TL
844int nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr);
845void nvme_ns_set_identify_data(struct spdk_nvme_ns *ns);
846int nvme_ns_construct(struct spdk_nvme_ns *ns, uint32_t id,
7c673cae
FG
847 struct spdk_nvme_ctrlr *ctrlr);
848void nvme_ns_destruct(struct spdk_nvme_ns *ns);
849
11fdf7f2
TL
850int nvme_fabric_ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value);
851int nvme_fabric_ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value);
852int nvme_fabric_ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value);
853int nvme_fabric_ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value);
9f95a23c
TL
854int nvme_fabric_ctrlr_discover(struct spdk_nvme_ctrlr *ctrlr,
855 struct spdk_nvme_probe_ctx *probe_ctx);
11fdf7f2
TL
856int nvme_fabric_qpair_connect(struct spdk_nvme_qpair *qpair, uint32_t num_entries);
857
858static inline struct nvme_request *
859nvme_allocate_request(struct spdk_nvme_qpair *qpair,
860 const struct nvme_payload *payload, uint32_t payload_size,
861 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
862{
863 struct nvme_request *req;
864
865 req = STAILQ_FIRST(&qpair->free_req);
866 if (req == NULL) {
867 return req;
868 }
869
870 STAILQ_REMOVE_HEAD(&qpair->free_req, stailq);
871
872 /*
873 * Only memset/zero fields that need it. All other fields
874 * will be initialized appropriately either later in this
875 * function, or before they are needed later in the
876 * submission patch. For example, the children
877 * TAILQ_ENTRY and following members are
878 * only used as part of I/O splitting so we avoid
879 * memsetting them until it is actually needed.
880 * They will be initialized in nvme_request_add_child()
881 * if the request is split.
882 */
883 memset(req, 0, offsetof(struct nvme_request, payload_size));
884
885 req->cb_fn = cb_fn;
886 req->cb_arg = cb_arg;
887 req->payload = *payload;
888 req->payload_size = payload_size;
11fdf7f2 889 req->pid = g_spdk_nvme_pid;
9f95a23c 890 req->submit_tick = 0;
11fdf7f2
TL
891
892 return req;
893}
894
895static inline struct nvme_request *
896nvme_allocate_request_contig(struct spdk_nvme_qpair *qpair,
897 void *buffer, uint32_t payload_size,
898 spdk_nvme_cmd_cb cb_fn, void *cb_arg)
899{
900 struct nvme_payload payload;
901
902 payload = NVME_PAYLOAD_CONTIG(buffer, NULL);
903
904 return nvme_allocate_request(qpair, &payload, payload_size, cb_fn, cb_arg);
905}
906
907static inline struct nvme_request *
908nvme_allocate_request_null(struct spdk_nvme_qpair *qpair, spdk_nvme_cmd_cb cb_fn, void *cb_arg)
909{
910 return nvme_allocate_request_contig(qpair, NULL, 0, cb_fn, cb_arg);
911}
912
7c673cae
FG
913struct nvme_request *nvme_allocate_request_user_copy(struct spdk_nvme_qpair *qpair,
914 void *buffer, uint32_t payload_size,
915 spdk_nvme_cmd_cb cb_fn, void *cb_arg, bool host_to_controller);
11fdf7f2
TL
916
917static inline void
9f95a23c
TL
918nvme_complete_request(spdk_nvme_cmd_cb cb_fn, void *cb_arg, struct spdk_nvme_qpair *qpair,
919 struct nvme_request *req, struct spdk_nvme_cpl *cpl)
11fdf7f2 920{
11fdf7f2
TL
921 struct spdk_nvme_cpl err_cpl;
922 struct nvme_error_cmd *cmd;
923
924 /* error injection at completion path,
925 * only inject for successful completed commands
926 */
927 if (spdk_unlikely(!TAILQ_EMPTY(&qpair->err_cmd_head) &&
928 !spdk_nvme_cpl_is_error(cpl))) {
929 TAILQ_FOREACH(cmd, &qpair->err_cmd_head, link) {
930
931 if (cmd->do_not_submit) {
932 continue;
933 }
934
935 if ((cmd->opc == req->cmd.opc) && cmd->err_count) {
936
937 err_cpl = *cpl;
938 err_cpl.status.sct = cmd->status.sct;
939 err_cpl.status.sc = cmd->status.sc;
940
941 cpl = &err_cpl;
942 cmd->err_count--;
943 break;
944 }
945 }
946 }
947
9f95a23c
TL
948 if (cb_fn) {
949 cb_fn(cb_arg, cpl);
11fdf7f2
TL
950 }
951}
952
953static inline void
954nvme_free_request(struct nvme_request *req)
955{
956 assert(req != NULL);
957 assert(req->num_children == 0);
958 assert(req->qpair != NULL);
959
960 STAILQ_INSERT_HEAD(&req->qpair->free_req, req, stailq);
961}
962
9f95a23c
TL
963static inline void
964nvme_qpair_free_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req)
965{
966 assert(req != NULL);
967 assert(req->num_children == 0);
968
969 STAILQ_INSERT_HEAD(&qpair->free_req, req, stailq);
970}
971
7c673cae 972void nvme_request_remove_child(struct nvme_request *parent, struct nvme_request *child);
11fdf7f2
TL
973int nvme_request_check_timeout(struct nvme_request *req, uint16_t cid,
974 struct spdk_nvme_ctrlr_process *active_proc, uint64_t now_tick);
7c673cae
FG
975uint64_t nvme_get_quirks(const struct spdk_pci_id *id);
976
7c673cae
FG
977int nvme_robust_mutex_init_shared(pthread_mutex_t *mtx);
978int nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx);
979
9f95a23c 980const char *spdk_nvme_cpl_get_status_string(const struct spdk_nvme_status *status);
7c673cae
FG
981bool nvme_completion_is_retry(const struct spdk_nvme_cpl *cpl);
982void nvme_qpair_print_command(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cmd *cmd);
983void nvme_qpair_print_completion(struct spdk_nvme_qpair *qpair, struct spdk_nvme_cpl *cpl);
984
11fdf7f2
TL
985struct spdk_nvme_ctrlr *spdk_nvme_get_ctrlr_by_trid_unsafe(
986 const struct spdk_nvme_transport_id *trid);
987
7c673cae
FG
988/* Transport specific functions */
989#define DECLARE_TRANSPORT(name) \
990 struct spdk_nvme_ctrlr *nvme_ ## name ## _ctrlr_construct(const struct spdk_nvme_transport_id *trid, const struct spdk_nvme_ctrlr_opts *opts, \
991 void *devhandle); \
992 int nvme_ ## name ## _ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr); \
9f95a23c 993 int nvme_ ## name ## _ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx, bool direct_connect); \
7c673cae
FG
994 int nvme_ ## name ## _ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr); \
995 int nvme_ ## name ## _ctrlr_set_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value); \
996 int nvme_ ## name ## _ctrlr_set_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value); \
997 int nvme_ ## name ## _ctrlr_get_reg_4(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value); \
998 int nvme_ ## name ## _ctrlr_get_reg_8(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value); \
999 uint32_t nvme_ ## name ## _ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr); \
11fdf7f2
TL
1000 uint16_t nvme_ ## name ## _ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr); \
1001 struct spdk_nvme_qpair *nvme_ ## name ## _ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid, const struct spdk_nvme_io_qpair_opts *opts); \
1002 void *nvme_ ## name ## _ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size); \
1003 int nvme_ ## name ## _ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size); \
9f95a23c 1004 volatile struct spdk_nvme_registers *nvme_ ## name ## _ctrlr_get_registers(struct spdk_nvme_ctrlr *ctrlr); \
7c673cae 1005 int nvme_ ## name ## _ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); \
9f95a23c
TL
1006 int nvme_ ## name ## _ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); \
1007 void nvme_ ## name ## _ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair); \
1008 void nvme_ ## name ## _qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr); \
7c673cae 1009 int nvme_ ## name ## _qpair_reset(struct spdk_nvme_qpair *qpair); \
7c673cae 1010 int nvme_ ## name ## _qpair_submit_request(struct spdk_nvme_qpair *qpair, struct nvme_request *req); \
9f95a23c
TL
1011 int32_t nvme_ ## name ## _qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions); \
1012 void nvme_ ## name ## _admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair); \
7c673cae
FG
1013
1014DECLARE_TRANSPORT(transport) /* generic transport dispatch functions */
1015DECLARE_TRANSPORT(pcie)
9f95a23c 1016DECLARE_TRANSPORT(tcp)
7c673cae
FG
1017#ifdef SPDK_CONFIG_RDMA
1018DECLARE_TRANSPORT(rdma)
1019#endif
1020
1021#undef DECLARE_TRANSPORT
1022
1023/*
1024 * Below ref related functions must be called with the global
1025 * driver lock held for the multi-process condition.
1026 * Within these functions, the per ctrlr ctrlr_lock is also
1027 * acquired for the multi-thread condition.
1028 */
1029void nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr);
1030void nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr);
1031int nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr);
1032
1033static inline bool
11fdf7f2 1034_is_page_aligned(uint64_t address, uint64_t page_size)
7c673cae 1035{
11fdf7f2 1036 return (address & (page_size - 1)) == 0;
7c673cae
FG
1037}
1038
1039#endif /* __NVME_INTERNAL_H__ */