]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/nvme/nvme_ctrlr.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / lib / nvme / nvme_ctrlr.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "nvme_internal.h"
35 #include "spdk/env.h"
36 #include <signal.h>
37
38 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
39 struct nvme_async_event_request *aer);
40
41 static int
42 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
43 {
44 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
45 &cc->raw);
46 }
47
48 static int
49 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
50 {
51 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
52 &csts->raw);
53 }
54
55 int
56 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
57 {
58 return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
59 &cap->raw);
60 }
61
62 static int
63 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
64 {
65 return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
66 &vs->raw);
67 }
68
69 static int
70 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc)
71 {
72 return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
73 cc->raw);
74 }
75
76 void
77 spdk_nvme_ctrlr_opts_set_defaults(struct spdk_nvme_ctrlr_opts *opts)
78 {
79 opts->num_io_queues = DEFAULT_MAX_IO_QUEUES;
80 opts->use_cmb_sqs = false;
81 opts->arb_mechanism = SPDK_NVME_CC_AMS_RR;
82 opts->keep_alive_timeout_ms = 10 * 1000;
83 opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE;
84 strncpy(opts->hostnqn, DEFAULT_HOSTNQN, sizeof(opts->hostnqn));
85 opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS;
86 }
87
88 /**
89 * This function will be called when the process allocates the IO qpair.
90 * Note: the ctrlr_lock must be held when calling this function.
91 */
92 static void
93 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
94 {
95 struct spdk_nvme_ctrlr_process *active_proc;
96 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
97 pid_t pid = getpid();
98
99 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
100 if (active_proc->pid == pid) {
101 TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair,
102 per_process_tailq);
103 break;
104 }
105 }
106 }
107
108 /**
109 * This function will be called when the process frees the IO qpair.
110 * Note: the ctrlr_lock must be held when calling this function.
111 */
112 static void
113 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
114 {
115 struct spdk_nvme_ctrlr_process *active_proc;
116 struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
117 struct spdk_nvme_qpair *active_qpair, *tmp_qpair;
118 pid_t pid = getpid();
119 bool proc_found = false;
120
121 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
122 if (active_proc->pid == pid) {
123 proc_found = true;
124 break;
125 }
126 }
127
128 if (proc_found == false) {
129 return;
130 }
131
132 TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
133 per_process_tailq, tmp_qpair) {
134 if (active_qpair == qpair) {
135 TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
136 active_qpair, per_process_tailq);
137
138 break;
139 }
140 }
141 }
142
143 struct spdk_nvme_qpair *
144 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
145 enum spdk_nvme_qprio qprio)
146 {
147 uint32_t qid;
148 struct spdk_nvme_qpair *qpair;
149 union spdk_nvme_cc_register cc;
150
151 if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
152 SPDK_ERRLOG("get_cc failed\n");
153 return NULL;
154 }
155
156 /* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
157 if ((qprio & 3) != qprio) {
158 return NULL;
159 }
160
161 /*
162 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
163 * default round robin arbitration method.
164 */
165 if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (qprio != SPDK_NVME_QPRIO_URGENT)) {
166 SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n");
167 return NULL;
168 }
169
170 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
171
172 /*
173 * Get the first available I/O queue ID.
174 */
175 qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
176 if (qid > ctrlr->opts.num_io_queues) {
177 SPDK_ERRLOG("No free I/O queue IDs\n");
178 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
179 return NULL;
180 }
181
182 qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, qprio);
183 if (qpair == NULL) {
184 SPDK_ERRLOG("transport->ctrlr_create_io_qpair() failed\n");
185 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
186 return NULL;
187 }
188 spdk_bit_array_clear(ctrlr->free_io_qids, qid);
189 TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
190
191 nvme_ctrlr_proc_add_io_qpair(qpair);
192
193 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
194
195 return qpair;
196 }
197
198 int
199 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
200 {
201 struct spdk_nvme_ctrlr *ctrlr;
202
203 if (qpair == NULL) {
204 return 0;
205 }
206
207 ctrlr = qpair->ctrlr;
208
209 if (qpair->in_completion_context) {
210 /*
211 * There are many cases where it is convenient to delete an io qpair in the context
212 * of that qpair's completion routine. To handle this properly, set a flag here
213 * so that the completion routine will perform an actual delete after the context
214 * unwinds.
215 */
216 qpair->delete_after_completion_context = 1;
217 return 0;
218 }
219
220 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
221
222 nvme_ctrlr_proc_remove_io_qpair(qpair);
223
224 TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
225 spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
226
227 spdk_free(qpair->req_buf);
228
229 if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) {
230 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
231 return -1;
232 }
233
234 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
235 return 0;
236 }
237
238 static void
239 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
240 struct spdk_nvme_intel_log_page_directory *log_page_directory)
241 {
242 if (log_page_directory == NULL) {
243 return;
244 }
245
246 if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
247 return;
248 }
249
250 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
251
252 if (log_page_directory->read_latency_log_len ||
253 (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
254 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
255 }
256 if (log_page_directory->write_latency_log_len ||
257 (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
258 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
259 }
260 if (log_page_directory->temperature_statistics_log_len) {
261 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
262 }
263 if (log_page_directory->smart_log_len) {
264 ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
265 }
266 if (log_page_directory->marketing_description_log_len) {
267 ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
268 }
269 }
270
271 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
272 {
273 uint64_t phys_addr = 0;
274 struct nvme_completion_poll_status status;
275 struct spdk_nvme_intel_log_page_directory *log_page_directory;
276
277 log_page_directory = spdk_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory),
278 64, &phys_addr);
279 if (log_page_directory == NULL) {
280 SPDK_ERRLOG("could not allocate log_page_directory\n");
281 return -ENXIO;
282 }
283
284 status.done = false;
285 spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, SPDK_NVME_GLOBAL_NS_TAG,
286 log_page_directory, sizeof(struct spdk_nvme_intel_log_page_directory), 0,
287 nvme_completion_poll_cb,
288 &status);
289 while (status.done == false) {
290 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
291 }
292 if (spdk_nvme_cpl_is_error(&status.cpl)) {
293 spdk_free(log_page_directory);
294 SPDK_ERRLOG("nvme_ctrlr_cmd_get_log_page failed!\n");
295 return -ENXIO;
296 }
297
298 nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
299 spdk_free(log_page_directory);
300 return 0;
301 }
302
303 static void
304 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
305 {
306 memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
307 /* Mandatory pages */
308 ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
309 ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
310 ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
311 if (ctrlr->cdata.lpa.celp) {
312 ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
313 }
314 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
315 nvme_ctrlr_set_intel_support_log_pages(ctrlr);
316 }
317 }
318
319 static void
320 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
321 {
322 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
323 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
324 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
325 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
326 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
327 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
328 ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
329 }
330
331 static void
332 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
333 {
334 memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
335 /* Mandatory features */
336 ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
337 ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
338 ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
339 ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
340 ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
341 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
342 ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
343 ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
344 ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
345 /* Optional features */
346 if (ctrlr->cdata.vwc.present) {
347 ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
348 }
349 if (ctrlr->cdata.apsta.supported) {
350 ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
351 }
352 if (ctrlr->cdata.hmpre) {
353 ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
354 }
355 if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
356 nvme_ctrlr_set_intel_supported_features(ctrlr);
357 }
358 }
359
360 void
361 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
362 {
363 /*
364 * Set the flag here and leave the work failure of qpairs to
365 * spdk_nvme_qpair_process_completions().
366 */
367 if (hot_remove) {
368 ctrlr->is_removed = true;
369 }
370 ctrlr->is_failed = true;
371 }
372
373 static void
374 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
375 {
376 union spdk_nvme_cc_register cc;
377 union spdk_nvme_csts_register csts;
378 int ms_waited = 0;
379
380 if (ctrlr->is_removed) {
381 return;
382 }
383
384 if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
385 SPDK_ERRLOG("get_cc() failed\n");
386 return;
387 }
388
389 cc.bits.shn = SPDK_NVME_SHN_NORMAL;
390
391 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
392 SPDK_ERRLOG("set_cc() failed\n");
393 return;
394 }
395
396 /*
397 * The NVMe spec does not define a timeout period
398 * for shutdown notification, so we just pick
399 * 5 seconds as a reasonable amount of time to
400 * wait before proceeding.
401 */
402 do {
403 if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
404 SPDK_ERRLOG("get_csts() failed\n");
405 return;
406 }
407
408 if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
409 SPDK_TRACELOG(SPDK_TRACE_NVME, "shutdown complete\n");
410 return;
411 }
412
413 nvme_delay(1000);
414 ms_waited++;
415 } while (ms_waited < 5000);
416
417 SPDK_ERRLOG("did not shutdown within 5 seconds\n");
418 }
419
420 static int
421 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
422 {
423 union spdk_nvme_cc_register cc;
424 int rc;
425
426 rc = nvme_transport_ctrlr_enable(ctrlr);
427 if (rc != 0) {
428 SPDK_ERRLOG("transport ctrlr_enable failed\n");
429 return rc;
430 }
431
432 if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
433 SPDK_ERRLOG("get_cc() failed\n");
434 return -EIO;
435 }
436
437 if (cc.bits.en != 0) {
438 SPDK_ERRLOG("%s called with CC.EN = 1\n", __func__);
439 return -EINVAL;
440 }
441
442 cc.bits.en = 1;
443 cc.bits.css = 0;
444 cc.bits.shn = 0;
445 cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
446 cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
447
448 /* Page size is 2 ^ (12 + mps). */
449 cc.bits.mps = spdk_u32log2(PAGE_SIZE) - 12;
450
451 switch (ctrlr->opts.arb_mechanism) {
452 case SPDK_NVME_CC_AMS_RR:
453 break;
454 case SPDK_NVME_CC_AMS_WRR:
455 if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
456 break;
457 }
458 return -EINVAL;
459 case SPDK_NVME_CC_AMS_VS:
460 if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
461 break;
462 }
463 return -EINVAL;
464 default:
465 return -EINVAL;
466 }
467
468 cc.bits.ams = ctrlr->opts.arb_mechanism;
469
470 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
471 SPDK_ERRLOG("set_cc() failed\n");
472 return -EIO;
473 }
474
475 return 0;
476 }
477
478 #ifdef DEBUG
479 static const char *
480 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
481 {
482 switch (state) {
483 case NVME_CTRLR_STATE_INIT:
484 return "init";
485 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
486 return "disable and wait for CSTS.RDY = 1";
487 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
488 return "disable and wait for CSTS.RDY = 0";
489 case NVME_CTRLR_STATE_ENABLE:
490 return "enable controller by writing CC.EN = 1";
491 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
492 return "wait for CSTS.RDY = 1";
493 case NVME_CTRLR_STATE_READY:
494 return "ready";
495 }
496 return "unknown";
497 };
498 #endif /* DEBUG */
499
500 static void
501 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
502 uint64_t timeout_in_ms)
503 {
504 ctrlr->state = state;
505 if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
506 SPDK_TRACELOG(SPDK_TRACE_NVME, "setting state to %s (no timeout)\n",
507 nvme_ctrlr_state_string(ctrlr->state));
508 ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
509 } else {
510 SPDK_TRACELOG(SPDK_TRACE_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n",
511 nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
512 ctrlr->state_timeout_tsc = spdk_get_ticks() + (timeout_in_ms * spdk_get_ticks_hz()) / 1000;
513 }
514 }
515
516 int
517 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
518 {
519 int rc = 0;
520 struct spdk_nvme_qpair *qpair;
521 struct nvme_request *req, *tmp;
522
523 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
524
525 if (ctrlr->is_resetting || ctrlr->is_failed) {
526 /*
527 * Controller is already resetting or has failed. Return
528 * immediately since there is no need to kick off another
529 * reset in these cases.
530 */
531 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
532 return 0;
533 }
534
535 ctrlr->is_resetting = true;
536
537 SPDK_NOTICELOG("resetting controller\n");
538
539 /* Free all of the queued abort requests */
540 STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
541 STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
542 nvme_free_request(req);
543 ctrlr->outstanding_aborts--;
544 }
545
546 /* Disable all queues before disabling the controller hardware. */
547 nvme_qpair_disable(ctrlr->adminq);
548 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
549 nvme_qpair_disable(qpair);
550 }
551
552 /* Set the state back to INIT to cause a full hardware reset. */
553 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
554
555 while (ctrlr->state != NVME_CTRLR_STATE_READY) {
556 if (nvme_ctrlr_process_init(ctrlr) != 0) {
557 SPDK_ERRLOG("%s: controller reinitialization failed\n", __func__);
558 nvme_ctrlr_fail(ctrlr, false);
559 rc = -1;
560 break;
561 }
562 }
563
564 if (!ctrlr->is_failed) {
565 /* Reinitialize qpairs */
566 TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
567 if (nvme_transport_ctrlr_reinit_io_qpair(ctrlr, qpair) != 0) {
568 nvme_ctrlr_fail(ctrlr, false);
569 rc = -1;
570 }
571 }
572 }
573
574 ctrlr->is_resetting = false;
575
576 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
577
578 return rc;
579 }
580
581 static int
582 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
583 {
584 struct nvme_completion_poll_status status;
585 int rc;
586
587 status.done = false;
588 rc = nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
589 nvme_completion_poll_cb, &status);
590 if (rc != 0) {
591 return rc;
592 }
593
594 while (status.done == false) {
595 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
596 }
597 if (spdk_nvme_cpl_is_error(&status.cpl)) {
598 SPDK_ERRLOG("nvme_identify_controller failed!\n");
599 return -ENXIO;
600 }
601
602 /*
603 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
604 * controller supports.
605 */
606 ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
607 SPDK_TRACELOG(SPDK_TRACE_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
608 if (ctrlr->cdata.mdts > 0) {
609 ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
610 ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
611 SPDK_TRACELOG(SPDK_TRACE_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
612 }
613
614 return 0;
615 }
616
617 static int
618 nvme_ctrlr_set_num_qpairs(struct spdk_nvme_ctrlr *ctrlr)
619 {
620 struct nvme_completion_poll_status status;
621 int cq_allocated, sq_allocated;
622 int rc;
623 uint32_t i;
624
625 status.done = false;
626
627 if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
628 SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n",
629 ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
630 ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
631 } else if (ctrlr->opts.num_io_queues < 1) {
632 SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n");
633 ctrlr->opts.num_io_queues = 1;
634 }
635
636 rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
637 nvme_completion_poll_cb, &status);
638 if (rc != 0) {
639 return rc;
640 }
641
642 while (status.done == false) {
643 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
644 }
645 if (spdk_nvme_cpl_is_error(&status.cpl)) {
646 SPDK_ERRLOG("nvme_set_num_queues failed!\n");
647 return -ENXIO;
648 }
649
650 /*
651 * Data in cdw0 is 0-based.
652 * Lower 16-bits indicate number of submission queues allocated.
653 * Upper 16-bits indicate number of completion queues allocated.
654 */
655 sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
656 cq_allocated = (status.cpl.cdw0 >> 16) + 1;
657
658 ctrlr->opts.num_io_queues = spdk_min(sq_allocated, cq_allocated);
659
660 ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
661 if (ctrlr->free_io_qids == NULL) {
662 return -ENOMEM;
663 }
664
665 /* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */
666 spdk_bit_array_clear(ctrlr->free_io_qids, 0);
667 for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
668 spdk_bit_array_set(ctrlr->free_io_qids, i);
669 }
670
671 return 0;
672 }
673
674 static int
675 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
676 {
677 struct nvme_completion_poll_status status;
678 uint32_t keep_alive_interval_ms;
679 int rc;
680
681 if (ctrlr->opts.keep_alive_timeout_ms == 0) {
682 return 0;
683 }
684
685 if (ctrlr->cdata.kas == 0) {
686 SPDK_TRACELOG(SPDK_TRACE_NVME, "Controller KAS is 0 - not enabling Keep Alive\n");
687 ctrlr->opts.keep_alive_timeout_ms = 0;
688 return 0;
689 }
690
691 /* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
692 status.done = false;
693 rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
694 nvme_completion_poll_cb, &status);
695 if (rc != 0) {
696 SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc);
697 ctrlr->opts.keep_alive_timeout_ms = 0;
698 return rc;
699 }
700
701 while (status.done == false) {
702 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
703 }
704 if (spdk_nvme_cpl_is_error(&status.cpl)) {
705 SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n",
706 status.cpl.status.sc, status.cpl.status.sct);
707 ctrlr->opts.keep_alive_timeout_ms = 0;
708 return -ENXIO;
709 }
710
711 if (ctrlr->opts.keep_alive_timeout_ms != status.cpl.cdw0) {
712 SPDK_TRACELOG(SPDK_TRACE_NVME, "Controller adjusted keep alive timeout to %u ms\n",
713 status.cpl.cdw0);
714 }
715
716 ctrlr->opts.keep_alive_timeout_ms = status.cpl.cdw0;
717
718 keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2;
719 if (keep_alive_interval_ms == 0) {
720 keep_alive_interval_ms = 1;
721 }
722 SPDK_TRACELOG(SPDK_TRACE_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms);
723
724 ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000);
725
726 /* Schedule the first Keep Alive to be sent as soon as possible. */
727 ctrlr->next_keep_alive_tick = spdk_get_ticks();
728
729 return 0;
730 }
731
732 static void
733 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
734 {
735 if (ctrlr->ns) {
736 uint32_t i, num_ns = ctrlr->num_ns;
737
738 for (i = 0; i < num_ns; i++) {
739 nvme_ns_destruct(&ctrlr->ns[i]);
740 }
741
742 spdk_free(ctrlr->ns);
743 ctrlr->ns = NULL;
744 ctrlr->num_ns = 0;
745 }
746
747 if (ctrlr->nsdata) {
748 spdk_free(ctrlr->nsdata);
749 ctrlr->nsdata = NULL;
750 }
751 }
752
753 static int
754 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
755 {
756 uint32_t i, nn = ctrlr->cdata.nn;
757 uint64_t phys_addr = 0;
758
759 if (nn == 0) {
760 SPDK_ERRLOG("controller has 0 namespaces\n");
761 return -1;
762 }
763
764 /* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
765 * so check if we need to reallocate.
766 */
767 if (nn != ctrlr->num_ns) {
768 nvme_ctrlr_destruct_namespaces(ctrlr);
769
770 ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64,
771 &phys_addr);
772 if (ctrlr->ns == NULL) {
773 goto fail;
774 }
775
776 ctrlr->nsdata = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64,
777 &phys_addr);
778 if (ctrlr->nsdata == NULL) {
779 goto fail;
780 }
781
782 ctrlr->num_ns = nn;
783 }
784
785 for (i = 0; i < nn; i++) {
786 struct spdk_nvme_ns *ns = &ctrlr->ns[i];
787 uint32_t nsid = i + 1;
788
789 if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
790 goto fail;
791 }
792 }
793
794 return 0;
795
796 fail:
797 nvme_ctrlr_destruct_namespaces(ctrlr);
798 return -1;
799 }
800
801 static void
802 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
803 {
804 struct nvme_async_event_request *aer = arg;
805 struct spdk_nvme_ctrlr *ctrlr = aer->ctrlr;
806
807 if (cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
808 /*
809 * This is simulated when controller is being shut down, to
810 * effectively abort outstanding asynchronous event requests
811 * and make sure all memory is freed. Do not repost the
812 * request in this case.
813 */
814 return;
815 }
816
817 if (ctrlr->aer_cb_fn != NULL) {
818 ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl);
819 }
820
821 /*
822 * Repost another asynchronous event request to replace the one
823 * that just completed.
824 */
825 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
826 /*
827 * We can't do anything to recover from a failure here,
828 * so just print a warning message and leave the AER unsubmitted.
829 */
830 SPDK_ERRLOG("resubmitting AER failed!\n");
831 }
832 }
833
834 static int
835 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
836 struct nvme_async_event_request *aer)
837 {
838 struct nvme_request *req;
839
840 aer->ctrlr = ctrlr;
841 req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
842 aer->req = req;
843 if (req == NULL) {
844 return -1;
845 }
846
847 req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
848 return nvme_ctrlr_submit_admin_request(ctrlr, req);
849 }
850
851 static int
852 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
853 {
854 union spdk_nvme_critical_warning_state state;
855 struct nvme_async_event_request *aer;
856 uint32_t i;
857 struct nvme_completion_poll_status status;
858 int rc;
859
860 status.done = false;
861
862 state.raw = 0xFF;
863 state.bits.reserved = 0;
864 rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, nvme_completion_poll_cb, &status);
865 if (rc != 0) {
866 return rc;
867 }
868
869 while (status.done == false) {
870 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
871 }
872 if (spdk_nvme_cpl_is_error(&status.cpl)) {
873 SPDK_ERRLOG("nvme_ctrlr_cmd_set_async_event_config failed!\n");
874 return 0;
875 }
876
877 /* aerl is a zero-based value, so we need to add 1 here. */
878 ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
879
880 for (i = 0; i < ctrlr->num_aers; i++) {
881 aer = &ctrlr->aer[i];
882 if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
883 SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n");
884 return -1;
885 }
886 }
887
888 return 0;
889 }
890
891 /**
892 * This function will be called when a process is using the controller.
893 * 1. For the primary process, it is called when constructing the controller.
894 * 2. For the secondary process, it is called at probing the controller.
895 * Note: will check whether the process is already added for the same process.
896 */
897 int
898 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
899 {
900 struct spdk_nvme_ctrlr_process *ctrlr_proc, *active_proc;
901 pid_t pid = getpid();
902
903 /* Check whether the process is already added or not */
904 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
905 if (active_proc->pid == pid) {
906 return 0;
907 }
908 }
909
910 /* Initialize the per process properties for this ctrlr */
911 ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 64, NULL);
912 if (ctrlr_proc == NULL) {
913 SPDK_ERRLOG("failed to allocate memory to track the process props\n");
914
915 return -1;
916 }
917
918 ctrlr_proc->is_primary = spdk_process_is_primary();
919 ctrlr_proc->pid = pid;
920 STAILQ_INIT(&ctrlr_proc->active_reqs);
921 ctrlr_proc->devhandle = devhandle;
922 ctrlr_proc->ref = 0;
923 TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
924
925 TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
926
927 return 0;
928 }
929
930 /**
931 * This function will be called when the process detaches the controller.
932 * Note: the ctrlr_lock must be held when calling this function.
933 */
934 static void
935 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
936 struct spdk_nvme_ctrlr_process *proc)
937 {
938 struct spdk_nvme_qpair *qpair, *tmp_qpair;
939
940 assert(STAILQ_EMPTY(&proc->active_reqs));
941
942 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
943 spdk_nvme_ctrlr_free_io_qpair(qpair);
944 }
945
946 TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
947
948 spdk_free(proc);
949 }
950
951 /**
952 * This function will be called when the process exited unexpectedly
953 * in order to free any incomplete nvme request, allocated IO qpairs
954 * and allocated memory.
955 * Note: the ctrlr_lock must be held when calling this function.
956 */
957 static void
958 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
959 {
960 struct nvme_request *req, *tmp_req;
961 struct spdk_nvme_qpair *qpair, *tmp_qpair;
962
963 STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
964 STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
965
966 assert(req->pid == proc->pid);
967
968 nvme_free_request(req);
969 }
970
971 TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
972 TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
973
974 /*
975 * The process may have been killed while some qpairs were in their
976 * completion context. Clear that flag here to allow these IO
977 * qpairs to be deleted.
978 */
979 qpair->in_completion_context = 0;
980 spdk_nvme_ctrlr_free_io_qpair(qpair);
981 }
982
983 spdk_free(proc);
984 }
985
986 /**
987 * This function will be called when destructing the controller.
988 * 1. There is no more admin request on this controller.
989 * 2. Clean up any left resource allocation when its associated process is gone.
990 */
991 void
992 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
993 {
994 struct spdk_nvme_ctrlr_process *active_proc, *tmp;
995
996 /* Free all the processes' properties and make sure no pending admin IOs */
997 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
998 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
999
1000 assert(STAILQ_EMPTY(&active_proc->active_reqs));
1001
1002 spdk_free(active_proc);
1003 }
1004 }
1005
1006 /**
1007 * This function will be called when any other process attaches or
1008 * detaches the controller in order to cleanup those unexpectedly
1009 * terminated processes.
1010 * Note: the ctrlr_lock must be held when calling this function.
1011 */
1012 static int
1013 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
1014 {
1015 struct spdk_nvme_ctrlr_process *active_proc, *tmp;
1016 int active_proc_count = 0;
1017
1018 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1019 if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
1020 SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid);
1021
1022 TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
1023
1024 nvme_ctrlr_cleanup_process(active_proc);
1025 } else {
1026 active_proc_count++;
1027 }
1028 }
1029
1030 return active_proc_count;
1031 }
1032
1033 void
1034 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
1035 {
1036 struct spdk_nvme_ctrlr_process *active_proc;
1037 pid_t pid = getpid();
1038
1039 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1040
1041 nvme_ctrlr_remove_inactive_proc(ctrlr);
1042
1043 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1044 if (active_proc->pid == pid) {
1045 active_proc->ref++;
1046 break;
1047 }
1048 }
1049
1050 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1051 }
1052
1053 void
1054 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
1055 {
1056 struct spdk_nvme_ctrlr_process *active_proc, *tmp;
1057 pid_t pid = getpid();
1058 int proc_count;
1059
1060 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1061
1062 proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
1063
1064 TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1065 if (active_proc->pid == pid) {
1066 active_proc->ref--;
1067 assert(active_proc->ref >= 0);
1068
1069 /*
1070 * The last active process will be removed at the end of
1071 * the destruction of the controller.
1072 */
1073 if (active_proc->ref == 0 && proc_count != 1) {
1074 nvme_ctrlr_remove_process(ctrlr, active_proc);
1075 }
1076
1077 break;
1078 }
1079 }
1080
1081 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1082 }
1083
1084 int
1085 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
1086 {
1087 struct spdk_nvme_ctrlr_process *active_proc;
1088 int ref = 0;
1089
1090 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1091
1092 nvme_ctrlr_remove_inactive_proc(ctrlr);
1093
1094 TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1095 ref += active_proc->ref;
1096 }
1097
1098 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1099
1100 return ref;
1101 }
1102
1103 /**
1104 * This function will be called repeatedly during initialization until the controller is ready.
1105 */
1106 int
1107 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
1108 {
1109 union spdk_nvme_cc_register cc;
1110 union spdk_nvme_csts_register csts;
1111 uint32_t ready_timeout_in_ms;
1112 int rc;
1113
1114 /*
1115 * May need to avoid accessing any register on the target controller
1116 * for a while. Return early without touching the FSM.
1117 * Check sleep_timeout_tsc > 0 for unit test.
1118 */
1119 if ((ctrlr->sleep_timeout_tsc > 0) &&
1120 (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) {
1121 return 0;
1122 }
1123 ctrlr->sleep_timeout_tsc = 0;
1124
1125 if (nvme_ctrlr_get_cc(ctrlr, &cc) ||
1126 nvme_ctrlr_get_csts(ctrlr, &csts)) {
1127 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
1128 /* While a device is resetting, it may be unable to service MMIO reads
1129 * temporarily. Allow for this case.
1130 */
1131 SPDK_TRACELOG(SPDK_TRACE_NVME, "Get registers failed while waiting for CSTS.RDY == 0\n");
1132 goto init_timeout;
1133 }
1134 SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state);
1135 nvme_ctrlr_fail(ctrlr, false);
1136 return -EIO;
1137 }
1138
1139 ready_timeout_in_ms = 500 * ctrlr->cap.bits.to;
1140
1141 /*
1142 * Check if the current initialization step is done or has timed out.
1143 */
1144 switch (ctrlr->state) {
1145 case NVME_CTRLR_STATE_INIT:
1146 /* Begin the hardware initialization by making sure the controller is disabled. */
1147 if (cc.bits.en) {
1148 SPDK_TRACELOG(SPDK_TRACE_NVME, "CC.EN = 1\n");
1149 /*
1150 * Controller is currently enabled. We need to disable it to cause a reset.
1151 *
1152 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
1153 * Wait for the ready bit to be 1 before disabling the controller.
1154 */
1155 if (csts.bits.rdy == 0) {
1156 SPDK_TRACELOG(SPDK_TRACE_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
1157 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
1158 return 0;
1159 }
1160
1161 /* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
1162 SPDK_TRACELOG(SPDK_TRACE_NVME, "Setting CC.EN = 0\n");
1163 cc.bits.en = 0;
1164 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1165 SPDK_ERRLOG("set_cc() failed\n");
1166 nvme_ctrlr_fail(ctrlr, false);
1167 return -EIO;
1168 }
1169 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1170
1171 /*
1172 * Wait 2 secsonds before accessing PCI registers.
1173 * Not using sleep() to avoid blocking other controller's initialization.
1174 */
1175 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
1176 SPDK_TRACELOG(SPDK_TRACE_NVME, "Applying quirk: delay 2 seconds before reading registers\n");
1177 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + 2 * spdk_get_ticks_hz();
1178 }
1179 return 0;
1180 } else {
1181 if (csts.bits.rdy == 1) {
1182 SPDK_TRACELOG(SPDK_TRACE_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n");
1183 }
1184
1185 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1186 return 0;
1187 }
1188 break;
1189
1190 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
1191 if (csts.bits.rdy == 1) {
1192 SPDK_TRACELOG(SPDK_TRACE_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n");
1193 /* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
1194 SPDK_TRACELOG(SPDK_TRACE_NVME, "Setting CC.EN = 0\n");
1195 cc.bits.en = 0;
1196 if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1197 SPDK_ERRLOG("set_cc() failed\n");
1198 nvme_ctrlr_fail(ctrlr, false);
1199 return -EIO;
1200 }
1201 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1202 return 0;
1203 }
1204 break;
1205
1206 case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
1207 if (csts.bits.rdy == 0) {
1208 SPDK_TRACELOG(SPDK_TRACE_NVME, "CC.EN = 0 && CSTS.RDY = 0\n");
1209 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
1210
1211 if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_ENABLE) {
1212 SPDK_TRACELOG(SPDK_TRACE_NVME, "Applying quirk: Delay 100us before enabling.\n");
1213 ctrlr->sleep_timeout_tsc = spdk_get_ticks() + spdk_get_ticks_hz() / 10000;
1214 }
1215
1216 return 0;
1217 }
1218 break;
1219
1220 case NVME_CTRLR_STATE_ENABLE:
1221 SPDK_TRACELOG(SPDK_TRACE_NVME, "Setting CC.EN = 1\n");
1222 rc = nvme_ctrlr_enable(ctrlr);
1223 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
1224 return rc;
1225
1226 case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
1227 if (csts.bits.rdy == 1) {
1228 SPDK_TRACELOG(SPDK_TRACE_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
1229 /*
1230 * The controller has been enabled.
1231 * Perform the rest of initialization in nvme_ctrlr_start() serially.
1232 */
1233 rc = nvme_ctrlr_start(ctrlr);
1234 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1235 return rc;
1236 }
1237 break;
1238
1239 default:
1240 assert(0);
1241 nvme_ctrlr_fail(ctrlr, false);
1242 return -1;
1243 }
1244
1245 init_timeout:
1246 if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
1247 spdk_get_ticks() > ctrlr->state_timeout_tsc) {
1248 SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state);
1249 nvme_ctrlr_fail(ctrlr, false);
1250 return -1;
1251 }
1252
1253 return 0;
1254 }
1255
1256 int
1257 nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
1258 {
1259 nvme_transport_qpair_reset(ctrlr->adminq);
1260
1261 nvme_qpair_enable(ctrlr->adminq);
1262
1263 if (nvme_ctrlr_identify(ctrlr) != 0) {
1264 return -1;
1265 }
1266
1267 if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
1268 return -1;
1269 }
1270
1271 if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
1272 return -1;
1273 }
1274
1275 if (nvme_ctrlr_configure_aer(ctrlr) != 0) {
1276 return -1;
1277 }
1278
1279 nvme_ctrlr_set_supported_log_pages(ctrlr);
1280 nvme_ctrlr_set_supported_features(ctrlr);
1281
1282 if (ctrlr->cdata.sgls.supported) {
1283 ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
1284 }
1285
1286 if (nvme_ctrlr_set_keep_alive_timeout(ctrlr) != 0) {
1287 SPDK_ERRLOG("Setting keep alive timeout failed\n");
1288 return -1;
1289 }
1290
1291 return 0;
1292 }
1293
1294 int
1295 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
1296 {
1297 pthread_mutexattr_t attr;
1298 int rc = 0;
1299
1300 if (pthread_mutexattr_init(&attr)) {
1301 return -1;
1302 }
1303 if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
1304 #ifndef __FreeBSD__
1305 pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
1306 pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
1307 #endif
1308 pthread_mutex_init(mtx, &attr)) {
1309 rc = -1;
1310 }
1311 pthread_mutexattr_destroy(&attr);
1312 return rc;
1313 }
1314
1315 int
1316 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
1317 {
1318 int rc;
1319
1320 nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
1321 ctrlr->flags = 0;
1322 ctrlr->free_io_qids = NULL;
1323 ctrlr->is_resetting = false;
1324 ctrlr->is_failed = false;
1325
1326 TAILQ_INIT(&ctrlr->active_io_qpairs);
1327 STAILQ_INIT(&ctrlr->queued_aborts);
1328 ctrlr->outstanding_aborts = 0;
1329
1330 rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
1331 if (rc != 0) {
1332 return rc;
1333 }
1334
1335 TAILQ_INIT(&ctrlr->active_procs);
1336 ctrlr->timeout_cb_fn = NULL;
1337 ctrlr->timeout_cb_arg = NULL;
1338 ctrlr->timeout_ticks = 0;
1339
1340 return rc;
1341 }
1342
1343 /* This function should be called once at ctrlr initialization to set up constant properties. */
1344 void
1345 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap)
1346 {
1347 uint32_t max_io_queue_size = nvme_transport_ctrlr_get_max_io_queue_size(ctrlr);
1348
1349 ctrlr->cap = *cap;
1350
1351 ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
1352
1353 ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
1354 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
1355 ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, max_io_queue_size);
1356
1357 ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
1358 }
1359
1360 void
1361 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
1362 {
1363 while (!TAILQ_EMPTY(&ctrlr->active_io_qpairs)) {
1364 struct spdk_nvme_qpair *qpair = TAILQ_FIRST(&ctrlr->active_io_qpairs);
1365
1366 spdk_nvme_ctrlr_free_io_qpair(qpair);
1367 }
1368
1369 nvme_ctrlr_shutdown(ctrlr);
1370
1371 nvme_ctrlr_destruct_namespaces(ctrlr);
1372
1373 spdk_bit_array_free(&ctrlr->free_io_qids);
1374
1375 pthread_mutex_destroy(&ctrlr->ctrlr_lock);
1376
1377 nvme_transport_ctrlr_destruct(ctrlr);
1378 }
1379
1380 int
1381 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
1382 struct nvme_request *req)
1383 {
1384 return nvme_qpair_submit_request(ctrlr->adminq, req);
1385 }
1386
1387 static void
1388 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
1389 {
1390 /* Do nothing */
1391 }
1392
1393 /*
1394 * Check if we need to send a Keep Alive command.
1395 * Caller must hold ctrlr->ctrlr_lock.
1396 */
1397 static void
1398 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
1399 {
1400 uint64_t now;
1401 struct nvme_request *req;
1402 struct spdk_nvme_cmd *cmd;
1403 int rc;
1404
1405 now = spdk_get_ticks();
1406 if (now < ctrlr->next_keep_alive_tick) {
1407 return;
1408 }
1409
1410 req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
1411 if (req == NULL) {
1412 return;
1413 }
1414
1415 cmd = &req->cmd;
1416 cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
1417
1418 rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
1419 if (rc != 0) {
1420 SPDK_ERRLOG("Submitting Keep Alive failed\n");
1421 }
1422
1423 ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
1424 }
1425
1426 int32_t
1427 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
1428 {
1429 int32_t num_completions;
1430
1431 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1432 if (ctrlr->keep_alive_interval_ticks) {
1433 nvme_ctrlr_keep_alive(ctrlr);
1434 }
1435 num_completions = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1436 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1437
1438 return num_completions;
1439 }
1440
1441 const struct spdk_nvme_ctrlr_data *
1442 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
1443 {
1444 return &ctrlr->cdata;
1445 }
1446
1447 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
1448 {
1449 union spdk_nvme_csts_register csts;
1450
1451 if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
1452 csts.raw = 0;
1453 }
1454 return csts;
1455 }
1456
1457 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
1458 {
1459 return ctrlr->cap;
1460 }
1461
1462 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
1463 {
1464 union spdk_nvme_vs_register vs;
1465
1466 if (nvme_ctrlr_get_vs(ctrlr, &vs)) {
1467 vs.raw = 0;
1468 }
1469 return vs;
1470 }
1471
1472 uint32_t
1473 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
1474 {
1475 return ctrlr->num_ns;
1476 }
1477
1478 struct spdk_nvme_ns *
1479 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t ns_id)
1480 {
1481 if (ns_id < 1 || ns_id > ctrlr->num_ns) {
1482 return NULL;
1483 }
1484
1485 return &ctrlr->ns[ns_id - 1];
1486 }
1487
1488 void
1489 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
1490 spdk_nvme_aer_cb aer_cb_fn,
1491 void *aer_cb_arg)
1492 {
1493 ctrlr->aer_cb_fn = aer_cb_fn;
1494 ctrlr->aer_cb_arg = aer_cb_arg;
1495 }
1496
1497 void
1498 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
1499 uint32_t nvme_timeout, spdk_nvme_timeout_cb cb_fn, void *cb_arg)
1500 {
1501 ctrlr->timeout_ticks = nvme_timeout * spdk_get_ticks_hz();
1502 ctrlr->timeout_cb_fn = cb_fn;
1503 ctrlr->timeout_cb_arg = cb_arg;
1504 }
1505
1506 bool
1507 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
1508 {
1509 /* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
1510 SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
1511 return ctrlr->log_page_supported[log_page];
1512 }
1513
1514 bool
1515 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
1516 {
1517 /* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
1518 SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
1519 return ctrlr->feature_supported[feature_code];
1520 }
1521
1522 int
1523 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
1524 struct spdk_nvme_ctrlr_list *payload)
1525 {
1526 struct nvme_completion_poll_status status;
1527 int res;
1528
1529 status.done = false;
1530 res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
1531 nvme_completion_poll_cb, &status);
1532 if (res)
1533 return res;
1534 while (status.done == false) {
1535 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1536 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1537 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1538 }
1539 if (spdk_nvme_cpl_is_error(&status.cpl)) {
1540 SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n");
1541 return -ENXIO;
1542 }
1543
1544 return spdk_nvme_ctrlr_reset(ctrlr);
1545 }
1546
1547 int
1548 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
1549 struct spdk_nvme_ctrlr_list *payload)
1550 {
1551 struct nvme_completion_poll_status status;
1552 int res;
1553
1554 status.done = false;
1555 res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
1556 nvme_completion_poll_cb, &status);
1557 if (res)
1558 return res;
1559 while (status.done == false) {
1560 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1561 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1562 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1563 }
1564 if (spdk_nvme_cpl_is_error(&status.cpl)) {
1565 SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n");
1566 return -ENXIO;
1567 }
1568
1569 return spdk_nvme_ctrlr_reset(ctrlr);
1570 }
1571
1572 uint32_t
1573 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
1574 {
1575 struct nvme_completion_poll_status status;
1576 int res;
1577
1578 status.done = false;
1579 res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status);
1580 if (res)
1581 return 0;
1582 while (status.done == false) {
1583 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1584 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1585 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1586 }
1587 if (spdk_nvme_cpl_is_error(&status.cpl)) {
1588 SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n");
1589 return 0;
1590 }
1591
1592 res = spdk_nvme_ctrlr_reset(ctrlr);
1593 if (res) {
1594 return 0;
1595 }
1596
1597 /* Return the namespace ID that was created */
1598 return status.cpl.cdw0;
1599 }
1600
1601 int
1602 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1603 {
1604 struct nvme_completion_poll_status status;
1605 int res;
1606
1607 status.done = false;
1608 res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status);
1609 if (res)
1610 return res;
1611 while (status.done == false) {
1612 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1613 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1614 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1615 }
1616 if (spdk_nvme_cpl_is_error(&status.cpl)) {
1617 SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n");
1618 return -ENXIO;
1619 }
1620
1621 return spdk_nvme_ctrlr_reset(ctrlr);
1622 }
1623
1624 int
1625 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
1626 struct spdk_nvme_format *format)
1627 {
1628 struct nvme_completion_poll_status status;
1629 int res;
1630
1631 status.done = false;
1632 res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
1633 &status);
1634 if (res)
1635 return res;
1636 while (status.done == false) {
1637 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1638 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1639 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1640 }
1641 if (spdk_nvme_cpl_is_error(&status.cpl)) {
1642 SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n");
1643 return -ENXIO;
1644 }
1645
1646 return spdk_nvme_ctrlr_reset(ctrlr);
1647 }
1648
1649 int
1650 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
1651 int slot)
1652 {
1653 struct spdk_nvme_fw_commit fw_commit;
1654 struct nvme_completion_poll_status status;
1655 int res;
1656 unsigned int size_remaining;
1657 unsigned int offset;
1658 unsigned int transfer;
1659 void *p;
1660
1661 if (size % 4) {
1662 SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n");
1663 return -1;
1664 }
1665
1666 /* Firmware download */
1667 size_remaining = size;
1668 offset = 0;
1669 p = payload;
1670
1671 while (size_remaining > 0) {
1672 transfer = spdk_min(size_remaining, ctrlr->min_page_size);
1673 status.done = false;
1674
1675 res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
1676 nvme_completion_poll_cb,
1677 &status);
1678 if (res)
1679 return res;
1680
1681 while (status.done == false) {
1682 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1683 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1684 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1685 }
1686 if (spdk_nvme_cpl_is_error(&status.cpl)) {
1687 SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n");
1688 return -ENXIO;
1689 }
1690 p += transfer;
1691 offset += transfer;
1692 size_remaining -= transfer;
1693 }
1694
1695 /* Firmware commit */
1696 memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
1697 fw_commit.fs = slot;
1698 fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_IMG;
1699
1700 status.done = false;
1701
1702 res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
1703 &status);
1704 if (res)
1705 return res;
1706
1707 while (status.done == false) {
1708 nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1709 spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1710 nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1711 }
1712 if (spdk_nvme_cpl_is_error(&status.cpl)) {
1713 SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n");
1714 return -ENXIO;
1715 }
1716
1717 return spdk_nvme_ctrlr_reset(ctrlr);
1718 }