]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/nvme/nvme_ctrlr.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / lib / nvme / nvme_ctrlr.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "spdk/stdinc.h"
35
36 #include "nvme_internal.h"
37
38 #include "spdk/env.h"
39 #include "spdk/string.h"
40
41 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
42 struct nvme_async_event_request *aer);
43 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns);
44 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns);
45
46 static int
47 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
48 {
49 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
50 &cc->raw);
51 }
52
53 static int
54 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
55 {
56 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
57 &csts->raw);
58 }
59
60 int
61 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
62 {
63 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
64 &cap->raw);
65 }
66
67 int
68 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
69 {
70 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
71 &vs->raw);
72 }
73
74 static int
75 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc)
76 {
77 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
78 cc->raw);
79 }
80
81 void
82 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
83 {
84 char host_id_str[SPDK_UUID_STRING_LEN];
85
86 assert(opts);
87
88 memset(opts, 0, opts_size);
89
90 #define FIELD_OK(field) \
91 offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
92
93 if (FIELD_OK(num_io_queues)) {
94 opts->num_io_queues = DEFAULT_MAX_IO_QUEUES;
95 }
96
97 if (FIELD_OK(use_cmb_sqs)) {
98 opts->use_cmb_sqs = true;
99 }
100
101 if (FIELD_OK(arb_mechanism)) {
102 opts->arb_mechanism = SPDK_NVME_CC_AMS_RR;
103 }
104
105 if (FIELD_OK(keep_alive_timeout_ms)) {
106 opts->keep_alive_timeout_ms = 10 * 1000;
107 }
108
109 if (FIELD_OK(io_queue_size)) {
110 opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE;
111 }
112
113 if (FIELD_OK(io_queue_requests)) {
114 opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS;
115 }
116
117 if (FIELD_OK(host_id)) {
118 memset(opts->host_id, 0, sizeof(opts->host_id));
119 }
120
121 if (nvme_driver_init() == 0) {
122 if (FIELD_OK(extended_host_id)) {
123 memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
124 sizeof(opts->extended_host_id));
125 }
126
127 if (FIELD_OK(hostnqn)) {
128 spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
129 &g_spdk_nvme_driver->default_extended_host_id);
130 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "2014-08.org.nvmexpress:uuid:%s", host_id_str);
131 }
132 }
133
134 if (FIELD_OK(src_addr)) {
135 memset(opts->src_addr, 0, sizeof(opts->src_addr));
136 }
137
138 if (FIELD_OK(src_svcid)) {
139 memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
140 }
141
142 if (FIELD_OK(command_set)) {
143 opts->command_set = SPDK_NVME_CC_CSS_NVM;
144 }
145 #undef FIELD_OK
146 }
147
148 /**
149 * This function will be called when the process allocates the IO qpair.
150 * Note: the ctrlr_lock must be held when calling this function.
151 */
152 static void
153 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
154 {
155 struct spdk_nvme_ctrlr_process *active_proc;
156 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
157
158 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
159 if (active_proc) {
160 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
161 qpair->active_proc = active_proc;
162 }
163 }
164
165 /**
166 * This function will be called when the process frees the IO qpair.
167 * Note: the ctrlr_lock must be held when calling this function.
168 */
169 static void
170 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
171 {
172 struct spdk_nvme_ctrlr_process *active_proc;
173 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
174 struct spdk_nvme_qpair *active_qpair, *tmp_qpair;
175
176 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
177 if (!active_proc) {
178 return;
179 }
180
181 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
182 per_process_tailq, tmp_qpair) {
183 if (active_qpair == qpair) {
184 TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
185 active_qpair, per_process_tailq);
186
187 break;
188 }
189 }
190 }
191
192 void
193 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
194 struct spdk_nvme_io_qpair_opts *opts,
195 size_t opts_size)
196 {
197 assert(ctrlr);
198
199 assert(opts);
200
201 memset(opts, 0, opts_size);
202
203 #define FIELD_OK(field) \
204 offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
205
206 if (FIELD_OK(qprio)) {
207 opts->qprio = SPDK_NVME_QPRIO_URGENT;
208 }
209
210 if (FIELD_OK(io_queue_size)) {
211 opts->io_queue_size = ctrlr->opts.io_queue_size;
212 }
213
214 if (FIELD_OK(io_queue_requests)) {
215 opts->io_queue_requests = ctrlr->opts.io_queue_requests;
216 }
217
218 #undef FIELD_OK
219 }
220
221 struct spdk_nvme_qpair *
222 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
223 const struct spdk_nvme_io_qpair_opts *user_opts,
224 size_t opts_size)
225 {
226 uint32_t qid;
227 struct spdk_nvme_qpair *qpair;
228 union spdk_nvme_cc_register cc;
229 struct spdk_nvme_io_qpair_opts opts;
230
231 if (!ctrlr) {
232 return NULL;
233 }
234
235 /*
236 * Get the default options, then overwrite them with the user-provided options
237 * up to opts_size.
238 *
239 * This allows for extensions of the opts structure without breaking
240 * ABI compatibility.
241 */
242 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
243 if (user_opts) {
244 memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
245 }
246
247 if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
248 SPDK_ERRLOG("get_cc failed\n");
249 return NULL;
250 }
251
252 /* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
253 if ((opts.qprio & 3) != opts.qprio) {
254 return NULL;
255 }
256
257 /*
258 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
259 * default round robin arbitration method.
260 */
261 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts.qprio != SPDK_NVME_QPRIO_URGENT)) {
262 SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n");
263 return NULL;
264 }
265
266 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
267
268 /*
269 * Get the first available I/O queue ID.
270 */
271 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
272 if (qid > ctrlr->opts.num_io_queues) {
273 SPDK_ERRLOG("No free I/O queue IDs\n");
274 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
275 return NULL;
276 }
277
278 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, &opts);
279 if (qpair == NULL) {
280 SPDK_ERRLOG("nvme_transport_ctrlr_create_io_qpair() failed\n");
281 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
282 return NULL;
283 }
284 spdk_bit_array_clear(ctrlr->free_io_qids, qid);
285 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
286
287 nvme_ctrlr_proc_add_io_qpair(qpair);
288
289 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
290
291 if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
292 spdk_delay_us(100);
293 }
294
295 return qpair;
296 }
297
298 int
299 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
300 {
301 struct spdk_nvme_ctrlr *ctrlr;
302
303 if (qpair == NULL) {
304 return 0;
305 }
306
307 ctrlr = qpair->ctrlr;
308
309 if (qpair->in_completion_context) {
310 /*
311 * There are many cases where it is convenient to delete an io qpair in the context
312 * of that qpair's completion routine. To handle this properly, set a flag here
313 * so that the completion routine will perform an actual delete after the context
314 * unwinds.
315 */
316 qpair->delete_after_completion_context = 1;
317 return 0;
318 }
319
320 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
321
322 nvme_ctrlr_proc_remove_io_qpair(qpair);
323
324 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
325 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
326
327 if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) {
328 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
329 return -1;
330 }
331
332 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
333 return 0;
334 }
335
336 static void
337 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
338 struct spdk_nvme_intel_log_page_directory *log_page_directory)
339 {
340 if (log_page_directory == NULL) {
341 return;
342 }
343
344 if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
345 return;
346 }
347
348 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
349
350 if (log_page_directory->read_latency_log_len ||
351 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
352 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
353 }
354 if (log_page_directory->write_latency_log_len ||
355 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
356 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
357 }
358 if (log_page_directory->temperature_statistics_log_len) {
359 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
360 }
361 if (log_page_directory->smart_log_len) {
362 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
363 }
364 if (log_page_directory->marketing_description_log_len) {
365 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
366 }
367 }
368
369 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
370 {
371 int rc = 0;
372 uint64_t phys_addr = 0;
373 struct nvme_completion_poll_status status;
374 struct spdk_nvme_intel_log_page_directory *log_page_directory;
375
376 log_page_directory = spdk_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory),
377 64, &phys_addr, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
378 if (log_page_directory == NULL) {
379 SPDK_ERRLOG("could not allocate log_page_directory\n");
380 return -ENXIO;
381 }
382
383 rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY,
384 SPDK_NVME_GLOBAL_NS_TAG, log_page_directory,
385 sizeof(struct spdk_nvme_intel_log_page_directory),
386 0, nvme_completion_poll_cb, &status);
387 if (rc != 0) {
388 spdk_free(log_page_directory);
389 return rc;
390 }
391
392 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
393 spdk_free(log_page_directory);
394 SPDK_ERRLOG("nvme_ctrlr_cmd_get_log_page failed!\n");
395 return -ENXIO;
396 }
397
398 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
399 spdk_free(log_page_directory);
400 return 0;
401 }
402
403 static int
404 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
405 {
406 int rc = 0;
407
408 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
409 /* Mandatory pages */
410 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
411 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
412 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
413 if (ctrlr->cdata.lpa.celp) {
414 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
415 }
416 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) {
417 rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr);
418 }
419
420 return rc;
421 }
422
423 static void
424 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
425 {
426 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
427 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
428 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
429 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
430 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
431 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
432 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
433 }
434
435 static void
436 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
437 {
438 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
439 /* Mandatory features */
440 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
441 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
442 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
443 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
444 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
445 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
446 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
447 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
448 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
449 /* Optional features */
450 if (ctrlr->cdata.vwc.present) {
451 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
452 }
453 if (ctrlr->cdata.apsta.supported) {
454 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
455 }
456 if (ctrlr->cdata.hmpre) {
457 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
458 }
459 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
460 nvme_ctrlr_set_intel_supported_features(ctrlr);
461 }
462 }
463
464 void
465 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
466 {
467 /*
468 * Set the flag here and leave the work failure of qpairs to
469 * spdk_nvme_qpair_process_completions().
470 */
471 if (hot_remove) {
472 ctrlr->is_removed = true;
473 }
474 ctrlr->is_failed = true;
475 SPDK_ERRLOG("ctrlr %s in failed state.\n", ctrlr->trid.traddr);
476 }
477
478 static void
479 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
480 {
481 union spdk_nvme_cc_register cc;
482 union spdk_nvme_csts_register csts;
483 uint32_t ms_waited = 0;
484 uint32_t shutdown_timeout_ms;
485
486 if (ctrlr->is_removed) {
487 return;
488 }
489
490 if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
491 SPDK_ERRLOG("get_cc() failed\n");
492 return;
493 }
494
495 cc.bits.shn = SPDK_NVME_SHN_NORMAL;
496
497 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
498 SPDK_ERRLOG("set_cc() failed\n");
499 return;
500 }
501
502 /*
503 * The NVMe specification defines RTD3E to be the time between
504 * setting SHN = 1 until the controller will set SHST = 10b.
505 * If the device doesn't report RTD3 entry latency, or if it
506 * reports RTD3 entry latency less than 10 seconds, pick
507 * 10 seconds as a reasonable amount of time to
508 * wait before proceeding.
509 */
510 SPDK_DEBUGLOG(SPDK_LOG_NVME, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
511 shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000;
512 shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000);
513 SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms);
514
515 do {
516 if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
517 SPDK_ERRLOG("get_csts() failed\n");
518 return;
519 }
520
521 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
522 SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown complete in %u milliseconds\n",
523 ms_waited);
524 return;
525 }
526
527 nvme_delay(1000);
528 ms_waited++;
529 } while (ms_waited < shutdown_timeout_ms);
530
531 SPDK_ERRLOG("did not shutdown within %u milliseconds\n", shutdown_timeout_ms);
532 }
533
534 static int
535 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
536 {
537 union spdk_nvme_cc_register cc;
538 int rc;
539
540 rc = nvme_transport_ctrlr_enable(ctrlr);
541 if (rc != 0) {
542 SPDK_ERRLOG("transport ctrlr_enable failed\n");
543 return rc;
544 }
545
546 if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
547 SPDK_ERRLOG("get_cc() failed\n");
548 return -EIO;
549 }
550
551 if (cc.bits.en != 0) {
552 SPDK_ERRLOG("%s called with CC.EN = 1\n", __func__);
553 return -EINVAL;
554 }
555
556 cc.bits.en = 1;
557 cc.bits.css = 0;
558 cc.bits.shn = 0;
559 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
560 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
561
562 /* Page size is 2 ^ (12 + mps). */
563 cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
564
565 if (ctrlr->cap.bits.css == 0) {
566 SPDK_INFOLOG(SPDK_LOG_NVME,
567 "Drive reports no command sets supported. Assuming NVM is supported.\n");
568 ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
569 }
570
571 if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
572 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested I/O command set %u but supported mask is 0x%x\n",
573 ctrlr->opts.command_set, ctrlr->cap.bits.css);
574 return -EINVAL;
575 }
576
577 cc.bits.css = ctrlr->opts.command_set;
578
579 switch (ctrlr->opts.arb_mechanism) {
580 case SPDK_NVME_CC_AMS_RR:
581 break;
582 case SPDK_NVME_CC_AMS_WRR:
583 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
584 break;
585 }
586 return -EINVAL;
587 case SPDK_NVME_CC_AMS_VS:
588 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
589 break;
590 }
591 return -EINVAL;
592 default:
593 return -EINVAL;
594 }
595
596 cc.bits.ams = ctrlr->opts.arb_mechanism;
597
598 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
599 SPDK_ERRLOG("set_cc() failed\n");
600 return -EIO;
601 }
602
603 return 0;
604 }
605
606 #ifdef DEBUG
607 static const char *
608 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
609 {
610 switch (state) {
611 case NVME_CTRLR_STATE_INIT_DELAY:
612 return "delay init";
613 case NVME_CTRLR_STATE_INIT:
614 return "init";
615 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
616 return "disable and wait for CSTS.RDY = 1";
617 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
618 return "disable and wait for CSTS.RDY = 0";
619 case NVME_CTRLR_STATE_ENABLE:
620 return "enable controller by writing CC.EN = 1";
621 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
622 return "wait for CSTS.RDY = 1";
623 case NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE:
624 return "enable admin queue";
625 case NVME_CTRLR_STATE_IDENTIFY:
626 return "identify controller";
627 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
628 return "wait for identify controller";
629 case NVME_CTRLR_STATE_SET_NUM_QUEUES:
630 return "set number of queues";
631 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
632 return "wait for set number of queues";
633 case NVME_CTRLR_STATE_GET_NUM_QUEUES:
634 return "get number of queues";
635 case NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES:
636 return "wait for get number of queues";
637 case NVME_CTRLR_STATE_CONSTRUCT_NS:
638 return "construct namespaces";
639 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
640 return "identify active ns";
641 case NVME_CTRLR_STATE_IDENTIFY_NS:
642 return "identify ns";
643 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
644 return "wait for identify ns";
645 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
646 return "identify namespace id descriptors";
647 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
648 return "wait for identify namespace id descriptors";
649 case NVME_CTRLR_STATE_CONFIGURE_AER:
650 return "configure AER";
651 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
652 return "wait for configure aer";
653 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
654 return "set supported log pages";
655 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
656 return "set supported features";
657 case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
658 return "set doorbell buffer config";
659 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
660 return "wait for doorbell buffer config";
661 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
662 return "set keep alive timeout";
663 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
664 return "wait for set keep alive timeout";
665 case NVME_CTRLR_STATE_SET_HOST_ID:
666 return "set host ID";
667 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
668 return "wait for set host ID";
669 case NVME_CTRLR_STATE_READY:
670 return "ready";
671 case NVME_CTRLR_STATE_ERROR:
672 return "error";
673 }
674 return "unknown";
675 };
676 #endif /* DEBUG */
677
678 static void
679 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
680 uint64_t timeout_in_ms)
681 {
682 ctrlr->state = state;
683 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
684 SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (no timeout)\n",
685 nvme_ctrlr_state_string(ctrlr->state));
686 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
687 } else {
688 SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n",
689 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
690 ctrlr->state_timeout_tsc = spdk_get_ticks() + (timeout_in_ms * spdk_get_ticks_hz()) / 1000;
691 }
692 }
693
694 static void
695 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
696 {
697 if (ctrlr->shadow_doorbell) {
698 spdk_dma_free(ctrlr->shadow_doorbell);
699 ctrlr->shadow_doorbell = NULL;
700 }
701
702 if (ctrlr->eventidx) {
703 spdk_dma_free(ctrlr->eventidx);
704 ctrlr->eventidx = NULL;
705 }
706 }
707
708 static void
709 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl)
710 {
711 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
712
713 if (spdk_nvme_cpl_is_error(cpl)) {
714 SPDK_WARNLOG("Doorbell buffer config failed\n");
715 } else {
716 SPDK_INFOLOG(SPDK_LOG_NVME, "NVMe controller: %s doorbell buffer config enabled\n",
717 ctrlr->trid.traddr);
718 }
719 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
720 }
721
722 static int
723 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
724 {
725 int rc = 0;
726 uint64_t prp1, prp2;
727
728 if (!ctrlr->cdata.oacs.doorbell_buffer_config) {
729 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
730 return 0;
731 }
732
733 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
734 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
735 return 0;
736 }
737
738 /* only 1 page size for doorbell buffer */
739 ctrlr->shadow_doorbell = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size,
740 &prp1);
741 if (ctrlr->shadow_doorbell == NULL) {
742 rc = -ENOMEM;
743 goto error;
744 }
745
746 ctrlr->eventidx = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, &prp2);
747 if (ctrlr->eventidx == NULL) {
748 rc = -ENOMEM;
749 goto error;
750 }
751
752 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG, NVME_TIMEOUT_INFINITE);
753
754 rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
755 nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr);
756 if (rc != 0) {
757 goto error;
758 }
759
760 return 0;
761
762 error:
763 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
764 nvme_ctrlr_free_doorbell_buffer(ctrlr);
765 return rc;
766 }
767
768 int
769 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
770 {
771 int rc = 0;
772 struct spdk_nvme_qpair *qpair;
773 struct nvme_request *req, *tmp;
774
775 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
776
777 if (ctrlr->is_resetting || ctrlr->is_failed) {
778 /*
779 * Controller is already resetting or has failed. Return
780 * immediately since there is no need to kick off another
781 * reset in these cases.
782 */
783 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
784 return 0;
785 }
786
787 ctrlr->is_resetting = true;
788
789 SPDK_NOTICELOG("resetting controller\n");
790
791 /* Free all of the queued abort requests */
792 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
793 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
794 nvme_free_request(req);
795 ctrlr->outstanding_aborts--;
796 }
797
798 /* Disable all queues before disabling the controller hardware. */
799 nvme_qpair_disable(ctrlr->adminq);
800 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
801 nvme_qpair_disable(qpair);
802 }
803
804 /* Doorbell buffer config is invalid during reset */
805 nvme_ctrlr_free_doorbell_buffer(ctrlr);
806
807 /* Set the state back to INIT to cause a full hardware reset. */
808 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
809
810 while (ctrlr->state != NVME_CTRLR_STATE_READY) {
811 if (nvme_ctrlr_process_init(ctrlr) != 0) {
812 SPDK_ERRLOG("%s: controller reinitialization failed\n", __func__);
813 nvme_ctrlr_fail(ctrlr, false);
814 rc = -1;
815 break;
816 }
817 }
818
819 if (!ctrlr->is_failed) {
820 /* Reinitialize qpairs */
821 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
822 if (nvme_transport_ctrlr_reinit_io_qpair(ctrlr, qpair) != 0) {
823 nvme_ctrlr_fail(ctrlr, false);
824 rc = -1;
825 }
826 }
827 }
828
829 ctrlr->is_resetting = false;
830
831 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
832
833 return rc;
834 }
835
836 static void
837 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl)
838 {
839 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
840
841 if (spdk_nvme_cpl_is_error(cpl)) {
842 SPDK_ERRLOG("nvme_identify_controller failed!\n");
843 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
844 return;
845 }
846
847 /*
848 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
849 * controller supports.
850 */
851 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
852 SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
853 if (ctrlr->cdata.mdts > 0) {
854 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
855 ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
856 SPDK_DEBUGLOG(SPDK_LOG_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
857 }
858
859 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
860 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
861 ctrlr->cntlid = ctrlr->cdata.cntlid;
862 } else {
863 /*
864 * Fabrics controllers should already have CNTLID from the Connect command.
865 *
866 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
867 * trust the one from Connect.
868 */
869 if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
870 SPDK_DEBUGLOG(SPDK_LOG_NVME,
871 "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
872 ctrlr->cdata.cntlid, ctrlr->cntlid);
873 }
874 }
875
876 if (ctrlr->cdata.sgls.supported) {
877 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
878 ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
879 }
880
881 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
882 }
883
884 static int
885 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
886 {
887 int rc;
888
889 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY, NVME_TIMEOUT_INFINITE);
890
891 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
892 &ctrlr->cdata, sizeof(ctrlr->cdata),
893 nvme_ctrlr_identify_done, ctrlr);
894 if (rc != 0) {
895 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
896 return rc;
897 }
898
899 return 0;
900 }
901
902 int
903 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
904 {
905 struct nvme_completion_poll_status status;
906 int rc;
907 uint32_t i;
908 uint32_t num_pages;
909 uint32_t next_nsid = 0;
910 uint32_t *new_ns_list = NULL;
911
912
913 /*
914 * The allocated size must be a multiple of sizeof(struct spdk_nvme_ns_list)
915 */
916 num_pages = (ctrlr->num_ns * sizeof(new_ns_list[0]) - 1) / sizeof(struct spdk_nvme_ns_list) + 1;
917 new_ns_list = spdk_dma_zmalloc(num_pages * sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
918 NULL);
919 if (!new_ns_list) {
920 SPDK_ERRLOG("Failed to allocate active_ns_list!\n");
921 return -ENOMEM;
922 }
923
924 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 1, 0) && !(ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
925 /*
926 * Iterate through the pages and fetch each chunk of 1024 namespaces until
927 * there are no more active namespaces
928 */
929 for (i = 0; i < num_pages; i++) {
930 rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, next_nsid,
931 &new_ns_list[1024 * i], sizeof(struct spdk_nvme_ns_list),
932 nvme_completion_poll_cb, &status);
933 if (rc != 0) {
934 goto fail;
935 }
936 if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
937 SPDK_ERRLOG("nvme_ctrlr_cmd_identify_active_ns_list failed!\n");
938 rc = -ENXIO;
939 goto fail;
940 }
941 next_nsid = new_ns_list[1024 * i + 1023];
942 if (next_nsid == 0) {
943 /*
944 * No more active namespaces found, no need to fetch additional chunks
945 */
946 break;
947 }
948 }
949
950 } else {
951 /*
952 * Controller doesn't support active ns list CNS 0x02 so dummy up
953 * an active ns list
954 */
955 for (i = 0; i < ctrlr->num_ns; i++) {
956 new_ns_list[i] = i + 1;
957 }
958 }
959
960 /*
961 * Now that that the list is properly setup, we can swap it in to the ctrlr and
962 * free up the previous one.
963 */
964 spdk_dma_free(ctrlr->active_ns_list);
965 ctrlr->active_ns_list = new_ns_list;
966
967 return 0;
968 fail:
969 spdk_dma_free(new_ns_list);
970 return rc;
971 }
972
973 static void
974 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
975 {
976 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
977 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
978 uint32_t nsid;
979 int rc;
980
981 if (spdk_nvme_cpl_is_error(cpl)) {
982 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
983 return;
984 } else {
985 nvme_ns_set_identify_data(ns);
986 }
987
988 /* move on to the next active NS */
989 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
990 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
991 if (ns == NULL) {
992 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS, NVME_TIMEOUT_INFINITE);
993 return;
994 }
995 ns->ctrlr = ctrlr;
996 ns->id = nsid;
997
998 rc = nvme_ctrlr_identify_ns_async(ns);
999 if (rc) {
1000 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1001 }
1002 }
1003
1004 static int
1005 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns)
1006 {
1007 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1008 struct spdk_nvme_ns_data *nsdata;
1009
1010 nsdata = &ctrlr->nsdata[ns->id - 1];
1011
1012 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS, NVME_TIMEOUT_INFINITE);
1013 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id,
1014 nsdata, sizeof(*nsdata),
1015 nvme_ctrlr_identify_ns_async_done, ns);
1016 }
1017
1018 static int
1019 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1020 {
1021 uint32_t nsid;
1022 struct spdk_nvme_ns *ns;
1023 int rc;
1024
1025 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
1026 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1027 if (ns == NULL) {
1028 /* No active NS, move on to the next state */
1029 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
1030 return 0;
1031 }
1032
1033 ns->ctrlr = ctrlr;
1034 ns->id = nsid;
1035
1036 rc = nvme_ctrlr_identify_ns_async(ns);
1037 if (rc) {
1038 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1039 }
1040
1041 return rc;
1042 }
1043
1044 static void
1045 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
1046 {
1047 struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
1048 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1049 uint32_t nsid;
1050 int rc;
1051
1052 if (spdk_nvme_cpl_is_error(cpl)) {
1053 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
1054 return;
1055 }
1056
1057 /* move on to the next active NS */
1058 nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
1059 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1060 if (ns == NULL) {
1061 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
1062 return;
1063 }
1064
1065 rc = nvme_ctrlr_identify_id_desc_async(ns);
1066 if (rc) {
1067 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1068 }
1069 }
1070
1071 static int
1072 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns)
1073 {
1074 struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1075
1076 memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
1077
1078 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS, NVME_TIMEOUT_INFINITE);
1079 return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST,
1080 0, ns->id, ns->id_desc_list, sizeof(ns->id_desc_list),
1081 nvme_ctrlr_identify_id_desc_async_done, ns);
1082 }
1083
1084 static int
1085 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1086 {
1087 uint32_t nsid;
1088 struct spdk_nvme_ns *ns;
1089 int rc;
1090
1091 if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) ||
1092 (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1093 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
1094 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
1095 return 0;
1096 }
1097
1098 nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
1099 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1100 if (ns == NULL) {
1101 /* No active NS, move on to the next state */
1102 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
1103 return 0;
1104 }
1105
1106 rc = nvme_ctrlr_identify_id_desc_async(ns);
1107 if (rc) {
1108 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1109 }
1110
1111 return rc;
1112 }
1113
1114 static void
1115 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
1116 {
1117 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1118
1119 if (spdk_nvme_cpl_is_error(cpl)) {
1120 SPDK_ERRLOG("Set Features - Number of Queues failed!\n");
1121 }
1122 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
1123 }
1124
1125 static int
1126 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr)
1127 {
1128 int rc;
1129
1130 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
1131 SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n",
1132 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
1133 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
1134 } else if (ctrlr->opts.num_io_queues < 1) {
1135 SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n");
1136 ctrlr->opts.num_io_queues = 1;
1137 }
1138
1139 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
1140
1141 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
1142 nvme_ctrlr_set_num_queues_done, ctrlr);
1143 if (rc != 0) {
1144 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1145 return rc;
1146 }
1147
1148 return 0;
1149 }
1150
1151 static void
1152 nvme_ctrlr_get_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
1153 {
1154 uint32_t cq_allocated, sq_allocated, min_allocated, i;
1155 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1156
1157 if (spdk_nvme_cpl_is_error(cpl)) {
1158 SPDK_ERRLOG("Get Features - Number of Queues failed!\n");
1159 ctrlr->opts.num_io_queues = 0;
1160 } else {
1161 /*
1162 * Data in cdw0 is 0-based.
1163 * Lower 16-bits indicate number of submission queues allocated.
1164 * Upper 16-bits indicate number of completion queues allocated.
1165 */
1166 sq_allocated = (cpl->cdw0 & 0xFFFF) + 1;
1167 cq_allocated = (cpl->cdw0 >> 16) + 1;
1168
1169 /*
1170 * For 1:1 queue mapping, set number of allocated queues to be minimum of
1171 * submission and completion queues.
1172 */
1173 min_allocated = spdk_min(sq_allocated, cq_allocated);
1174
1175 /* Set number of queues to be minimum of requested and actually allocated. */
1176 ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
1177 }
1178
1179 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
1180 if (ctrlr->free_io_qids == NULL) {
1181 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1182 return;
1183 }
1184
1185 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */
1186 spdk_bit_array_clear(ctrlr->free_io_qids, 0);
1187 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
1188 spdk_bit_array_set(ctrlr->free_io_qids, i);
1189 }
1190 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONSTRUCT_NS, NVME_TIMEOUT_INFINITE);
1191 }
1192
1193 static int
1194 nvme_ctrlr_get_num_queues(struct spdk_nvme_ctrlr *ctrlr)
1195 {
1196 int rc;
1197
1198 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES, NVME_TIMEOUT_INFINITE);
1199
1200 /* Obtain the number of queues allocated using Get Features. */
1201 rc = nvme_ctrlr_cmd_get_num_queues(ctrlr, nvme_ctrlr_get_num_queues_done, ctrlr);
1202 if (rc != 0) {
1203 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1204 return rc;
1205 }
1206
1207 return 0;
1208 }
1209
1210 static void
1211 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl)
1212 {
1213 uint32_t keep_alive_interval_ms;
1214 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1215
1216 if (spdk_nvme_cpl_is_error(cpl)) {
1217 SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n",
1218 cpl->status.sc, cpl->status.sct);
1219 ctrlr->opts.keep_alive_timeout_ms = 0;
1220 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1221 return;
1222 }
1223
1224 if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) {
1225 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller adjusted keep alive timeout to %u ms\n",
1226 cpl->cdw0);
1227 }
1228
1229 ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0;
1230
1231 keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2;
1232 if (keep_alive_interval_ms == 0) {
1233 keep_alive_interval_ms = 1;
1234 }
1235 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms);
1236
1237 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000);
1238
1239 /* Schedule the first Keep Alive to be sent as soon as possible. */
1240 ctrlr->next_keep_alive_tick = spdk_get_ticks();
1241 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, NVME_TIMEOUT_INFINITE);
1242 }
1243
1244 static int
1245 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
1246 {
1247 int rc;
1248
1249 if (ctrlr->opts.keep_alive_timeout_ms == 0) {
1250 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, NVME_TIMEOUT_INFINITE);
1251 return 0;
1252 }
1253
1254 if (ctrlr->cdata.kas == 0) {
1255 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller KAS is 0 - not enabling Keep Alive\n");
1256 ctrlr->opts.keep_alive_timeout_ms = 0;
1257 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID, NVME_TIMEOUT_INFINITE);
1258 return 0;
1259 }
1260
1261 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT, NVME_TIMEOUT_INFINITE);
1262
1263 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
1264 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
1265 nvme_ctrlr_set_keep_alive_timeout_done, ctrlr);
1266 if (rc != 0) {
1267 SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc);
1268 ctrlr->opts.keep_alive_timeout_ms = 0;
1269 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1270 return rc;
1271 }
1272
1273 return 0;
1274 }
1275
1276 static void
1277 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl)
1278 {
1279 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1280
1281 if (spdk_nvme_cpl_is_error(cpl)) {
1282 /*
1283 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
1284 * is optional.
1285 */
1286 SPDK_WARNLOG("Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
1287 cpl->status.sc, cpl->status.sct);
1288 } else {
1289 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Set Features - Host ID was successful\n");
1290 }
1291
1292 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1293 }
1294
1295 static int
1296 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
1297 {
1298 uint8_t *host_id;
1299 uint32_t host_id_size;
1300 int rc;
1301
1302 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1303 /*
1304 * NVMe-oF sends the host ID during Connect and doesn't allow
1305 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
1306 */
1307 SPDK_DEBUGLOG(SPDK_LOG_NVME, "NVMe-oF transport - not sending Set Features - Host ID\n");
1308 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1309 return 0;
1310 }
1311
1312 if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
1313 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 128-bit extended host identifier\n");
1314 host_id = ctrlr->opts.extended_host_id;
1315 host_id_size = sizeof(ctrlr->opts.extended_host_id);
1316 } else {
1317 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 64-bit host identifier\n");
1318 host_id = ctrlr->opts.host_id;
1319 host_id_size = sizeof(ctrlr->opts.host_id);
1320 }
1321
1322 /* If the user specified an all-zeroes host identifier, don't send the command. */
1323 if (spdk_mem_all_zero(host_id, host_id_size)) {
1324 SPDK_DEBUGLOG(SPDK_LOG_NVME,
1325 "User did not specify host ID - not sending Set Features - Host ID\n");
1326 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1327 return 0;
1328 }
1329
1330 SPDK_TRACEDUMP(SPDK_LOG_NVME, "host_id", host_id, host_id_size);
1331
1332 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID, NVME_TIMEOUT_INFINITE);
1333
1334 rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr);
1335 if (rc != 0) {
1336 SPDK_ERRLOG("Set Features - Host ID failed: %d\n", rc);
1337 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1338 return rc;
1339 }
1340
1341 return 0;
1342 }
1343
1344 static void
1345 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1346 {
1347 if (ctrlr->ns) {
1348 uint32_t i, num_ns = ctrlr->num_ns;
1349
1350 for (i = 0; i < num_ns; i++) {
1351 nvme_ns_destruct(&ctrlr->ns[i]);
1352 }
1353
1354 spdk_free(ctrlr->ns);
1355 ctrlr->ns = NULL;
1356 ctrlr->num_ns = 0;
1357 }
1358
1359 if (ctrlr->nsdata) {
1360 spdk_free(ctrlr->nsdata);
1361 ctrlr->nsdata = NULL;
1362 }
1363
1364 spdk_dma_free(ctrlr->active_ns_list);
1365 ctrlr->active_ns_list = NULL;
1366 }
1367
1368 static void
1369 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1370 {
1371 uint32_t i, nn = ctrlr->cdata.nn;
1372 struct spdk_nvme_ns_data *nsdata;
1373
1374 for (i = 0; i < nn; i++) {
1375 struct spdk_nvme_ns *ns = &ctrlr->ns[i];
1376 uint32_t nsid = i + 1;
1377 nsdata = &ctrlr->nsdata[nsid - 1];
1378
1379 if ((nsdata->ncap == 0) && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
1380 if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
1381 continue;
1382 }
1383 }
1384
1385 if (nsdata->ncap && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
1386 nvme_ns_destruct(ns);
1387 }
1388 }
1389 }
1390
1391 static int
1392 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1393 {
1394 int rc = 0;
1395 uint32_t nn = ctrlr->cdata.nn;
1396 uint64_t phys_addr = 0;
1397
1398 /* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
1399 * so check if we need to reallocate.
1400 */
1401 if (nn != ctrlr->num_ns) {
1402 nvme_ctrlr_destruct_namespaces(ctrlr);
1403
1404 if (nn == 0) {
1405 SPDK_WARNLOG("controller has 0 namespaces\n");
1406 return 0;
1407 }
1408
1409 ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64,
1410 &phys_addr, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
1411 if (ctrlr->ns == NULL) {
1412 rc = -ENOMEM;
1413 goto fail;
1414 }
1415
1416 ctrlr->nsdata = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64,
1417 &phys_addr, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
1418 if (ctrlr->nsdata == NULL) {
1419 rc = -ENOMEM;
1420 goto fail;
1421 }
1422
1423 ctrlr->num_ns = nn;
1424 }
1425
1426 return 0;
1427
1428 fail:
1429 nvme_ctrlr_destruct_namespaces(ctrlr);
1430 return rc;
1431 }
1432
1433 static void
1434 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
1435 {
1436 struct nvme_async_event_request *aer = arg;
1437 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr;
1438 struct spdk_nvme_ctrlr_process *active_proc;
1439 union spdk_nvme_async_event_completion event;
1440 int rc;
1441
1442 if (cpl->status.sct == SPDK_NVME_SCT_GENERIC &&
1443 cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
1444 /*
1445 * This is simulated when controller is being shut down, to
1446 * effectively abort outstanding asynchronous event requests
1447 * and make sure all memory is freed. Do not repost the
1448 * request in this case.
1449 */
1450 return;
1451 }
1452
1453 if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
1454 cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) {
1455 /*
1456 * SPDK will only send as many AERs as the device says it supports,
1457 * so this status code indicates an out-of-spec device. Do not repost
1458 * the request in this case.
1459 */
1460 SPDK_ERRLOG("Controller appears out-of-spec for asynchronous event request\n"
1461 "handling. Do not repost this AER.\n");
1462 return;
1463 }
1464
1465 event.raw = cpl->cdw0;
1466 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
1467 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
1468 rc = nvme_ctrlr_identify_active_ns(ctrlr);
1469 if (rc) {
1470 return;
1471 }
1472 nvme_ctrlr_update_namespaces(ctrlr);
1473 }
1474
1475 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1476 if (active_proc && active_proc->aer_cb_fn) {
1477 active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
1478 }
1479
1480 /*
1481 * Repost another asynchronous event request to replace the one
1482 * that just completed.
1483 */
1484 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
1485 /*
1486 * We can't do anything to recover from a failure here,
1487 * so just print a warning message and leave the AER unsubmitted.
1488 */
1489 SPDK_ERRLOG("resubmitting AER failed!\n");
1490 }
1491 }
1492
1493 static int
1494 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
1495 struct nvme_async_event_request *aer)
1496 {
1497 struct nvme_request *req;
1498
1499 aer->ctrlr = ctrlr;
1500 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
1501 aer->req = req;
1502 if (req == NULL) {
1503 return -1;
1504 }
1505
1506 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
1507 return nvme_ctrlr_submit_admin_request(ctrlr, req);
1508 }
1509
1510 static void
1511 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl)
1512 {
1513 struct nvme_async_event_request *aer;
1514 int rc;
1515 uint32_t i;
1516 struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1517
1518 if (spdk_nvme_cpl_is_error(cpl)) {
1519 SPDK_NOTICELOG("nvme_ctrlr_configure_aer failed!\n");
1520 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, NVME_TIMEOUT_INFINITE);
1521 return;
1522 }
1523
1524 /* aerl is a zero-based value, so we need to add 1 here. */
1525 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
1526
1527 for (i = 0; i < ctrlr->num_aers; i++) {
1528 aer = &ctrlr->aer[i];
1529 rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
1530 if (rc) {
1531 SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n");
1532 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1533 return;
1534 }
1535 }
1536 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES, NVME_TIMEOUT_INFINITE);
1537 }
1538
1539 static int
1540 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
1541 {
1542 union spdk_nvme_feat_async_event_configuration config;
1543 int rc;
1544
1545 config.raw = 0;
1546 config.bits.crit_warn.bits.available_spare = 1;
1547 config.bits.crit_warn.bits.temperature = 1;
1548 config.bits.crit_warn.bits.device_reliability = 1;
1549 config.bits.crit_warn.bits.read_only = 1;
1550 config.bits.crit_warn.bits.volatile_memory_backup = 1;
1551
1552 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
1553 if (ctrlr->cdata.oaes.ns_attribute_notices) {
1554 config.bits.ns_attr_notice = 1;
1555 }
1556 if (ctrlr->cdata.oaes.fw_activation_notices) {
1557 config.bits.fw_activation_notice = 1;
1558 }
1559 }
1560 if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
1561 config.bits.telemetry_log_notice = 1;
1562 }
1563
1564 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
1565
1566 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config,
1567 nvme_ctrlr_configure_aer_done,
1568 ctrlr);
1569 if (rc != 0) {
1570 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1571 return rc;
1572 }
1573
1574 return 0;
1575 }
1576
1577 struct spdk_nvme_ctrlr_process *
1578 spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
1579 {
1580 struct spdk_nvme_ctrlr_process *active_proc;
1581
1582 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1583 if (active_proc->pid == pid) {
1584 return active_proc;
1585 }
1586 }
1587
1588 return NULL;
1589 }
1590
1591 struct spdk_nvme_ctrlr_process *
1592 spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
1593 {
1594 return spdk_nvme_ctrlr_get_process(ctrlr, getpid());
1595 }
1596
1597 /**
1598 * This function will be called when a process is using the controller.
1599 * 1. For the primary process, it is called when constructing the controller.
1600 * 2. For the secondary process, it is called at probing the controller.
1601 * Note: will check whether the process is already added for the same process.
1602 */
1603 int
1604 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
1605 {
1606 struct spdk_nvme_ctrlr_process *ctrlr_proc;
1607 pid_t pid = getpid();
1608
1609 /* Check whether the process is already added or not */
1610 if (spdk_nvme_ctrlr_get_process(ctrlr, pid)) {
1611 return 0;
1612 }
1613
1614 /* Initialize the per process properties for this ctrlr */
1615 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process),
1616 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
1617 if (ctrlr_proc == NULL) {
1618 SPDK_ERRLOG("failed to allocate memory to track the process props\n");
1619
1620 return -1;
1621 }
1622
1623 ctrlr_proc->is_primary = spdk_process_is_primary();
1624 ctrlr_proc->pid = pid;
1625 STAILQ_INIT(&ctrlr_proc->active_reqs);
1626 ctrlr_proc->devhandle = devhandle;
1627 ctrlr_proc->ref = 0;
1628 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
1629
1630 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
1631
1632 return 0;
1633 }
1634
1635 /**
1636 * This function will be called when the process detaches the controller.
1637 * Note: the ctrlr_lock must be held when calling this function.
1638 */
1639 static void
1640 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
1641 struct spdk_nvme_ctrlr_process *proc)
1642 {
1643 struct spdk_nvme_qpair *qpair, *tmp_qpair;
1644
1645 assert(STAILQ_EMPTY(&proc->active_reqs));
1646
1647 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
1648 spdk_nvme_ctrlr_free_io_qpair(qpair);
1649 }
1650
1651 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
1652
1653 spdk_dma_free(proc);
1654 }
1655
1656 /**
1657 * This function will be called when the process exited unexpectedly
1658 * in order to free any incomplete nvme request, allocated IO qpairs
1659 * and allocated memory.
1660 * Note: the ctrlr_lock must be held when calling this function.
1661 */
1662 static void
1663 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
1664 {
1665 struct nvme_request *req, *tmp_req;
1666 struct spdk_nvme_qpair *qpair, *tmp_qpair;
1667
1668 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
1669 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
1670
1671 assert(req->pid == proc->pid);
1672
1673 nvme_free_request(req);
1674 }
1675
1676 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
1677 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
1678
1679 /*
1680 * The process may have been killed while some qpairs were in their
1681 * completion context. Clear that flag here to allow these IO
1682 * qpairs to be deleted.
1683 */
1684 qpair->in_completion_context = 0;
1685
1686 qpair->no_deletion_notification_needed = 1;
1687
1688 spdk_nvme_ctrlr_free_io_qpair(qpair);
1689 }
1690
1691 spdk_dma_free(proc);
1692 }
1693
1694 /**
1695 * This function will be called when destructing the controller.
1696 * 1. There is no more admin request on this controller.
1697 * 2. Clean up any left resource allocation when its associated process is gone.
1698 */
1699 void
1700 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
1701 {
1702 struct spdk_nvme_ctrlr_process *active_proc, *tmp;
1703
1704 /* Free all the processes' properties and make sure no pending admin IOs */
1705 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1706 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
1707
1708 assert(STAILQ_EMPTY(&active_proc->active_reqs));
1709
1710 spdk_free(active_proc);
1711 }
1712 }
1713
1714 /**
1715 * This function will be called when any other process attaches or
1716 * detaches the controller in order to cleanup those unexpectedly
1717 * terminated processes.
1718 * Note: the ctrlr_lock must be held when calling this function.
1719 */
1720 static int
1721 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
1722 {
1723 struct spdk_nvme_ctrlr_process *active_proc, *tmp;
1724 int active_proc_count = 0;
1725
1726 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1727 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
1728 SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid);
1729
1730 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
1731
1732 nvme_ctrlr_cleanup_process(active_proc);
1733 } else {
1734 active_proc_count++;
1735 }
1736 }
1737
1738 return active_proc_count;
1739 }
1740
1741 void
1742 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
1743 {
1744 struct spdk_nvme_ctrlr_process *active_proc;
1745
1746 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1747
1748 nvme_ctrlr_remove_inactive_proc(ctrlr);
1749
1750 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1751 if (active_proc) {
1752 active_proc->ref++;
1753 }
1754
1755 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1756 }
1757
1758 void
1759 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
1760 {
1761 struct spdk_nvme_ctrlr_process *active_proc;
1762 int proc_count;
1763
1764 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1765
1766 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
1767
1768 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1769 if (active_proc) {
1770 active_proc->ref--;
1771 assert(active_proc->ref >= 0);
1772
1773 /*
1774 * The last active process will be removed at the end of
1775 * the destruction of the controller.
1776 */
1777 if (active_proc->ref == 0 && proc_count != 1) {
1778 nvme_ctrlr_remove_process(ctrlr, active_proc);
1779 }
1780 }
1781
1782 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1783 }
1784
1785 int
1786 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
1787 {
1788 struct spdk_nvme_ctrlr_process *active_proc;
1789 int ref = 0;
1790
1791 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1792
1793 nvme_ctrlr_remove_inactive_proc(ctrlr);
1794
1795 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1796 ref += active_proc->ref;
1797 }
1798
1799 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1800
1801 return ref;
1802 }
1803
1804 /**
1805 * Get the PCI device handle which is only visible to its associated process.
1806 */
1807 struct spdk_pci_device *
1808 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
1809 {
1810 struct spdk_nvme_ctrlr_process *active_proc;
1811 struct spdk_pci_device *devhandle = NULL;
1812
1813 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1814
1815 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1816 if (active_proc) {
1817 devhandle = active_proc->devhandle;
1818 }
1819
1820 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1821
1822 return devhandle;
1823 }
1824
1825 static void
1826 nvme_ctrlr_enable_admin_queue(struct spdk_nvme_ctrlr *ctrlr)
1827 {
1828 nvme_transport_qpair_reset(ctrlr->adminq);
1829 nvme_qpair_enable(ctrlr->adminq);
1830 }
1831
1832 /**
1833 * This function will be called repeatedly during initialization until the controller is ready.
1834 */
1835 int
1836 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
1837 {
1838 union spdk_nvme_cc_register cc;
1839 union spdk_nvme_csts_register csts;
1840 uint32_t ready_timeout_in_ms;
1841 int rc = 0;
1842
1843 /*
1844 * May need to avoid accessing any register on the target controller
1845 * for a while. Return early without touching the FSM.
1846 * Check sleep_timeout_tsc > 0 for unit test.
1847 */
1848 if ((ctrlr->sleep_timeout_tsc > 0) &&
1849 (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) {
1850 return 0;
1851 }
1852 ctrlr->sleep_timeout_tsc = 0;
1853
1854 if (nvme_ctrlr_get_cc(ctrlr, &cc) ||
1855 nvme_ctrlr_get_csts(ctrlr, &csts)) {
1856 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
1857 /* While a device is resetting, it may be unable to service MMIO reads
1858 * temporarily. Allow for this case.
1859 */
1860 SPDK_ERRLOG("Get registers failed while waiting for CSTS.RDY == 0\n");
1861 goto init_timeout;
1862 }
1863 SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state);
1864 nvme_ctrlr_fail(ctrlr, false);
1865 return -EIO;
1866 }
1867
1868 ready_timeout_in_ms = 500 * ctrlr->cap.bits.to;
1869
1870 /*
1871 * Check if the current initialization step is done or has timed out.
1872 */
1873 switch (ctrlr->state) {
1874 case NVME_CTRLR_STATE_INIT_DELAY:
1875 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms);
1876 /*
1877 * Controller may need some delay before it's enabled.
1878 *
1879 * This is a workaround for an issue where the PCIe-attached NVMe controller
1880 * is not ready after VFIO reset. We delay the initialization rather than the
1881 * enabling itself, because this is required only for the very first enabling
1882 * - directly after a VFIO reset.
1883 *
1884 * TODO: Figure out what is actually going wrong.
1885 */
1886 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Adding 2 second delay before initializing the controller\n");
1887 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2000 * spdk_get_ticks_hz() / 1000);
1888 break;
1889
1890 case NVME_CTRLR_STATE_INIT:
1891 /* Begin the hardware initialization by making sure the controller is disabled. */
1892 if (cc.bits.en) {
1893 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1\n");
1894 /*
1895 * Controller is currently enabled. We need to disable it to cause a reset.
1896 *
1897 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
1898 * Wait for the ready bit to be 1 before disabling the controller.
1899 */
1900 if (csts.bits.rdy == 0) {
1901 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
1902 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
1903 return 0;
1904 }
1905
1906 /* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
1907 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
1908 cc.bits.en = 0;
1909 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1910 SPDK_ERRLOG("set_cc() failed\n");
1911 nvme_ctrlr_fail(ctrlr, false);
1912 return -EIO;
1913 }
1914 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1915
1916 /*
1917 * Wait 2.5 seconds before accessing PCI registers.
1918 * Not using sleep() to avoid blocking other controller's initialization.
1919 */
1920 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
1921 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Applying quirk: delay 2.5 seconds before reading registers\n");
1922 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000);
1923 }
1924 return 0;
1925 } else {
1926 if (csts.bits.rdy == 1) {
1927 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n");
1928 }
1929
1930 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1931 return 0;
1932 }
1933 break;
1934
1935 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
1936 if (csts.bits.rdy == 1) {
1937 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n");
1938 /* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
1939 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
1940 cc.bits.en = 0;
1941 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1942 SPDK_ERRLOG("set_cc() failed\n");
1943 nvme_ctrlr_fail(ctrlr, false);
1944 return -EIO;
1945 }
1946 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1947 return 0;
1948 }
1949 break;
1950
1951 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
1952 if (csts.bits.rdy == 0) {
1953 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 0\n");
1954 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
1955 /*
1956 * Delay 100us before setting CC.EN = 1. Some NVMe SSDs miss CC.EN getting
1957 * set to 1 if it is too soon after CSTS.RDY is reported as 0.
1958 */
1959 spdk_delay_us(100);
1960 return 0;
1961 }
1962 break;
1963
1964 case NVME_CTRLR_STATE_ENABLE:
1965 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 1\n");
1966 rc = nvme_ctrlr_enable(ctrlr);
1967 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
1968 return rc;
1969
1970 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
1971 if (csts.bits.rdy == 1) {
1972 SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
1973 /*
1974 * The controller has been enabled.
1975 * Perform the rest of initialization serially.
1976 */
1977 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE, NVME_TIMEOUT_INFINITE);
1978 return 0;
1979 }
1980 break;
1981
1982 case NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE:
1983 nvme_ctrlr_enable_admin_queue(ctrlr);
1984 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE);
1985 break;
1986
1987 case NVME_CTRLR_STATE_IDENTIFY:
1988 rc = nvme_ctrlr_identify(ctrlr);
1989 break;
1990
1991 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
1992 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1993 break;
1994
1995 case NVME_CTRLR_STATE_SET_NUM_QUEUES:
1996 rc = nvme_ctrlr_set_num_queues(ctrlr);
1997 break;
1998
1999 case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
2000 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2001 break;
2002
2003 case NVME_CTRLR_STATE_GET_NUM_QUEUES:
2004 rc = nvme_ctrlr_get_num_queues(ctrlr);
2005 break;
2006
2007 case NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES:
2008 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2009 break;
2010
2011 case NVME_CTRLR_STATE_CONSTRUCT_NS:
2012 rc = nvme_ctrlr_construct_namespaces(ctrlr);
2013 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS, NVME_TIMEOUT_INFINITE);
2014 break;
2015
2016 case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
2017 rc = nvme_ctrlr_identify_active_ns(ctrlr);
2018 if (rc < 0) {
2019 nvme_ctrlr_destruct_namespaces(ctrlr);
2020 }
2021 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, NVME_TIMEOUT_INFINITE);
2022 break;
2023
2024 case NVME_CTRLR_STATE_IDENTIFY_NS:
2025 rc = nvme_ctrlr_identify_namespaces(ctrlr);
2026 break;
2027
2028 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
2029 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2030 break;
2031
2032 case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
2033 rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr);
2034 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER, NVME_TIMEOUT_INFINITE);
2035 break;
2036
2037 case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
2038 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2039 break;
2040
2041 case NVME_CTRLR_STATE_CONFIGURE_AER:
2042 rc = nvme_ctrlr_configure_aer(ctrlr);
2043 break;
2044
2045 case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
2046 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2047 break;
2048
2049 case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
2050 rc = nvme_ctrlr_set_supported_log_pages(ctrlr);
2051 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES, NVME_TIMEOUT_INFINITE);
2052 break;
2053
2054 case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
2055 nvme_ctrlr_set_supported_features(ctrlr);
2056 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG, NVME_TIMEOUT_INFINITE);
2057 break;
2058
2059 case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
2060 rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr);
2061 break;
2062
2063 case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
2064 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2065 break;
2066
2067 case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
2068 rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr);
2069 break;
2070
2071 case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
2072 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2073 break;
2074
2075 case NVME_CTRLR_STATE_SET_HOST_ID:
2076 rc = nvme_ctrlr_set_host_id(ctrlr);
2077 break;
2078
2079 case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
2080 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2081 break;
2082
2083 case NVME_CTRLR_STATE_READY:
2084 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Ctrlr already in ready state\n");
2085 return 0;
2086
2087 case NVME_CTRLR_STATE_ERROR:
2088 SPDK_ERRLOG("Ctrlr %s is in error state\n", ctrlr->trid.traddr);
2089 return -1;
2090
2091 default:
2092 assert(0);
2093 nvme_ctrlr_fail(ctrlr, false);
2094 return -1;
2095 }
2096
2097 init_timeout:
2098 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
2099 spdk_get_ticks() > ctrlr->state_timeout_tsc) {
2100 SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state);
2101 nvme_ctrlr_fail(ctrlr, false);
2102 return -1;
2103 }
2104
2105 return rc;
2106 }
2107
2108 int
2109 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
2110 {
2111 pthread_mutexattr_t attr;
2112 int rc = 0;
2113
2114 if (pthread_mutexattr_init(&attr)) {
2115 return -1;
2116 }
2117 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
2118 #ifndef __FreeBSD__
2119 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
2120 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
2121 #endif
2122 pthread_mutex_init(mtx, &attr)) {
2123 rc = -1;
2124 }
2125 pthread_mutexattr_destroy(&attr);
2126 return rc;
2127 }
2128
2129 int
2130 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
2131 {
2132 int rc;
2133
2134 if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
2135 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE);
2136 } else {
2137 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
2138 }
2139
2140 ctrlr->flags = 0;
2141 ctrlr->free_io_qids = NULL;
2142 ctrlr->is_resetting = false;
2143 ctrlr->is_failed = false;
2144
2145 TAILQ_INIT(&ctrlr->active_io_qpairs);
2146 STAILQ_INIT(&ctrlr->queued_aborts);
2147 ctrlr->outstanding_aborts = 0;
2148
2149 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
2150 if (rc != 0) {
2151 return rc;
2152 }
2153
2154 TAILQ_INIT(&ctrlr->active_procs);
2155
2156 return rc;
2157 }
2158
2159 /* This function should be called once at ctrlr initialization to set up constant properties. */
2160 void
2161 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap,
2162 const union spdk_nvme_vs_register *vs)
2163 {
2164 ctrlr->cap = *cap;
2165 ctrlr->vs = *vs;
2166
2167 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
2168
2169 /* For now, always select page_size == min_page_size. */
2170 ctrlr->page_size = ctrlr->min_page_size;
2171
2172 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
2173 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES);
2174 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
2175
2176 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
2177 }
2178
2179 void
2180 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
2181 {
2182 pthread_mutex_destroy(&ctrlr->ctrlr_lock);
2183 }
2184
2185 void
2186 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
2187 {
2188 struct spdk_nvme_qpair *qpair, *tmp;
2189
2190 SPDK_DEBUGLOG(SPDK_LOG_NVME, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr);
2191 TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
2192 spdk_nvme_ctrlr_free_io_qpair(qpair);
2193 }
2194
2195 nvme_ctrlr_free_doorbell_buffer(ctrlr);
2196
2197 nvme_ctrlr_shutdown(ctrlr);
2198
2199 nvme_ctrlr_destruct_namespaces(ctrlr);
2200
2201 spdk_bit_array_free(&ctrlr->free_io_qids);
2202
2203 nvme_transport_ctrlr_destruct(ctrlr);
2204 }
2205
2206 int
2207 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
2208 struct nvme_request *req)
2209 {
2210 return nvme_qpair_submit_request(ctrlr->adminq, req);
2211 }
2212
2213 static void
2214 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
2215 {
2216 /* Do nothing */
2217 }
2218
2219 /*
2220 * Check if we need to send a Keep Alive command.
2221 * Caller must hold ctrlr->ctrlr_lock.
2222 */
2223 static void
2224 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
2225 {
2226 uint64_t now;
2227 struct nvme_request *req;
2228 struct spdk_nvme_cmd *cmd;
2229 int rc;
2230
2231 now = spdk_get_ticks();
2232 if (now < ctrlr->next_keep_alive_tick) {
2233 return;
2234 }
2235
2236 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
2237 if (req == NULL) {
2238 return;
2239 }
2240
2241 cmd = &req->cmd;
2242 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
2243
2244 rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
2245 if (rc != 0) {
2246 SPDK_ERRLOG("Submitting Keep Alive failed\n");
2247 }
2248
2249 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
2250 }
2251
2252 int32_t
2253 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
2254 {
2255 int32_t num_completions;
2256
2257 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2258 if (ctrlr->keep_alive_interval_ticks) {
2259 nvme_ctrlr_keep_alive(ctrlr);
2260 }
2261 num_completions = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2262 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2263
2264 return num_completions;
2265 }
2266
2267 const struct spdk_nvme_ctrlr_data *
2268 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
2269 {
2270 return &ctrlr->cdata;
2271 }
2272
2273 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
2274 {
2275 union spdk_nvme_csts_register csts;
2276
2277 if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
2278 csts.raw = 0xFFFFFFFFu;
2279 }
2280 return csts;
2281 }
2282
2283 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
2284 {
2285 return ctrlr->cap;
2286 }
2287
2288 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
2289 {
2290 return ctrlr->vs;
2291 }
2292
2293 uint32_t
2294 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
2295 {
2296 return ctrlr->num_ns;
2297 }
2298
2299 static int32_t
2300 spdk_nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2301 {
2302 int32_t result = -1;
2303
2304 if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->num_ns) {
2305 return result;
2306 }
2307
2308 int32_t lower = 0;
2309 int32_t upper = ctrlr->num_ns - 1;
2310 int32_t mid;
2311
2312 while (lower <= upper) {
2313 mid = lower + (upper - lower) / 2;
2314 if (ctrlr->active_ns_list[mid] == nsid) {
2315 result = mid;
2316 break;
2317 } else {
2318 if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) {
2319 lower = mid + 1;
2320 } else {
2321 upper = mid - 1;
2322 }
2323
2324 }
2325 }
2326
2327 return result;
2328 }
2329
2330 bool
2331 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2332 {
2333 return spdk_nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1;
2334 }
2335
2336 uint32_t
2337 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
2338 {
2339 return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0;
2340 }
2341
2342 uint32_t
2343 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
2344 {
2345 int32_t nsid_idx = spdk_nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid);
2346 if (ctrlr->active_ns_list && nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->num_ns - 1) {
2347 return ctrlr->active_ns_list[nsid_idx + 1];
2348 }
2349 return 0;
2350 }
2351
2352 struct spdk_nvme_ns *
2353 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2354 {
2355 if (nsid < 1 || nsid > ctrlr->num_ns) {
2356 return NULL;
2357 }
2358
2359 return &ctrlr->ns[nsid - 1];
2360 }
2361
2362 struct spdk_pci_device *
2363 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
2364 {
2365 if (ctrlr == NULL) {
2366 return NULL;
2367 }
2368
2369 if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
2370 return NULL;
2371 }
2372
2373 return nvme_ctrlr_proc_get_devhandle(ctrlr);
2374 }
2375
2376 uint32_t
2377 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
2378 {
2379 return ctrlr->max_xfer_size;
2380 }
2381
2382 void
2383 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
2384 spdk_nvme_aer_cb aer_cb_fn,
2385 void *aer_cb_arg)
2386 {
2387 struct spdk_nvme_ctrlr_process *active_proc;
2388
2389 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2390
2391 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
2392 if (active_proc) {
2393 active_proc->aer_cb_fn = aer_cb_fn;
2394 active_proc->aer_cb_arg = aer_cb_arg;
2395 }
2396
2397 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2398 }
2399
2400 void
2401 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
2402 uint64_t timeout_us, spdk_nvme_timeout_cb cb_fn, void *cb_arg)
2403 {
2404 struct spdk_nvme_ctrlr_process *active_proc;
2405
2406 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2407
2408 active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
2409 if (active_proc) {
2410 active_proc->timeout_ticks = timeout_us * spdk_get_ticks_hz() / 1000000ULL;
2411 active_proc->timeout_cb_fn = cb_fn;
2412 active_proc->timeout_cb_arg = cb_arg;
2413 }
2414
2415 ctrlr->timeout_enabled = true;
2416
2417 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2418 }
2419
2420 bool
2421 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
2422 {
2423 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
2424 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
2425 return ctrlr->log_page_supported[log_page];
2426 }
2427
2428 bool
2429 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
2430 {
2431 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
2432 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
2433 return ctrlr->feature_supported[feature_code];
2434 }
2435
2436 int
2437 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2438 struct spdk_nvme_ctrlr_list *payload)
2439 {
2440 struct nvme_completion_poll_status status;
2441 int res;
2442 struct spdk_nvme_ns *ns;
2443
2444 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
2445 nvme_completion_poll_cb, &status);
2446 if (res) {
2447 return res;
2448 }
2449 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2450 SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n");
2451 return -ENXIO;
2452 }
2453
2454 res = nvme_ctrlr_identify_active_ns(ctrlr);
2455 if (res) {
2456 return res;
2457 }
2458
2459 ns = &ctrlr->ns[nsid - 1];
2460 return nvme_ns_construct(ns, nsid, ctrlr);
2461 }
2462
2463 int
2464 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2465 struct spdk_nvme_ctrlr_list *payload)
2466 {
2467 struct nvme_completion_poll_status status;
2468 int res;
2469 struct spdk_nvme_ns *ns;
2470
2471 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
2472 nvme_completion_poll_cb, &status);
2473 if (res) {
2474 return res;
2475 }
2476 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2477 SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n");
2478 return -ENXIO;
2479 }
2480
2481 res = nvme_ctrlr_identify_active_ns(ctrlr);
2482 if (res) {
2483 return res;
2484 }
2485
2486 ns = &ctrlr->ns[nsid - 1];
2487 /* Inactive NS */
2488 nvme_ns_destruct(ns);
2489
2490 return 0;
2491 }
2492
2493 uint32_t
2494 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
2495 {
2496 struct nvme_completion_poll_status status;
2497 int res;
2498 uint32_t nsid;
2499 struct spdk_nvme_ns *ns;
2500
2501 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status);
2502 if (res) {
2503 return 0;
2504 }
2505 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2506 SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n");
2507 return 0;
2508 }
2509
2510 nsid = status.cpl.cdw0;
2511 ns = &ctrlr->ns[nsid - 1];
2512 /* Inactive NS */
2513 res = nvme_ns_construct(ns, nsid, ctrlr);
2514 if (res) {
2515 return 0;
2516 }
2517
2518 /* Return the namespace ID that was created */
2519 return nsid;
2520 }
2521
2522 int
2523 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2524 {
2525 struct nvme_completion_poll_status status;
2526 int res;
2527 struct spdk_nvme_ns *ns;
2528
2529 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status);
2530 if (res) {
2531 return res;
2532 }
2533 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2534 SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n");
2535 return -ENXIO;
2536 }
2537
2538 res = nvme_ctrlr_identify_active_ns(ctrlr);
2539 if (res) {
2540 return res;
2541 }
2542
2543 ns = &ctrlr->ns[nsid - 1];
2544 nvme_ns_destruct(ns);
2545
2546 return 0;
2547 }
2548
2549 int
2550 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2551 struct spdk_nvme_format *format)
2552 {
2553 struct nvme_completion_poll_status status;
2554 int res;
2555
2556 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
2557 &status);
2558 if (res) {
2559 return res;
2560 }
2561 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2562 SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n");
2563 return -ENXIO;
2564 }
2565
2566 return spdk_nvme_ctrlr_reset(ctrlr);
2567 }
2568
2569 int
2570 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
2571 int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
2572 {
2573 struct spdk_nvme_fw_commit fw_commit;
2574 struct nvme_completion_poll_status status;
2575 int res;
2576 unsigned int size_remaining;
2577 unsigned int offset;
2578 unsigned int transfer;
2579 void *p;
2580
2581 if (!completion_status) {
2582 return -EINVAL;
2583 }
2584 memset(completion_status, 0, sizeof(struct spdk_nvme_status));
2585 if (size % 4) {
2586 SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n");
2587 return -1;
2588 }
2589
2590 /* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
2591 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
2592 */
2593 if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
2594 (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
2595 SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid command!\n");
2596 return -1;
2597 }
2598
2599 /* Firmware download */
2600 size_remaining = size;
2601 offset = 0;
2602 p = payload;
2603
2604 while (size_remaining > 0) {
2605 transfer = spdk_min(size_remaining, ctrlr->min_page_size);
2606
2607 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
2608 nvme_completion_poll_cb,
2609 &status);
2610 if (res) {
2611 return res;
2612 }
2613
2614 if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2615 SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n");
2616 return -ENXIO;
2617 }
2618 p += transfer;
2619 offset += transfer;
2620 size_remaining -= transfer;
2621 }
2622
2623 /* Firmware commit */
2624 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
2625 fw_commit.fs = slot;
2626 fw_commit.ca = commit_action;
2627
2628 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
2629 &status);
2630 if (res) {
2631 return res;
2632 }
2633
2634 res = spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock);
2635
2636 memcpy(completion_status, &status.cpl.status, sizeof(struct spdk_nvme_status));
2637
2638 if (res) {
2639 if (status.cpl.status.sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
2640 status.cpl.status.sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
2641 if (status.cpl.status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
2642 status.cpl.status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
2643 SPDK_NOTICELOG("firmware activation requires conventional reset to be performed. !\n");
2644 } else {
2645 SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n");
2646 }
2647 return -ENXIO;
2648 }
2649 }
2650
2651 return spdk_nvme_ctrlr_reset(ctrlr);
2652 }
2653
2654 void *
2655 spdk_nvme_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
2656 {
2657 void *buf;
2658
2659 if (size == 0) {
2660 return NULL;
2661 }
2662
2663 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2664 buf = nvme_transport_ctrlr_alloc_cmb_io_buffer(ctrlr, size);
2665 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2666
2667 return buf;
2668 }
2669
2670 void
2671 spdk_nvme_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
2672 {
2673 if (buf && size) {
2674 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2675 nvme_transport_ctrlr_free_cmb_io_buffer(ctrlr, buf, size);
2676 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2677 }
2678 }