]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/bdev/nvme/bdev_nvme.c
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / spdk / lib / bdev / nvme / bdev_nvme.c
CommitLineData
11fdf7f2
TL
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include "spdk/stdinc.h"
35
36#include "bdev_nvme.h"
9f95a23c 37#include "common.h"
11fdf7f2
TL
38
39#include "spdk/config.h"
40#include "spdk/conf.h"
41#include "spdk/endian.h"
42#include "spdk/bdev.h"
43#include "spdk/json.h"
44#include "spdk/nvme.h"
9f95a23c 45#include "spdk/nvme_ocssd.h"
11fdf7f2
TL
46#include "spdk/thread.h"
47#include "spdk/string.h"
48#include "spdk/likely.h"
49#include "spdk/util.h"
50
51#include "spdk/bdev_module.h"
52#include "spdk_internal/log.h"
53
54static void bdev_nvme_get_spdk_running_config(FILE *fp);
55static int bdev_nvme_config_json(struct spdk_json_write_ctx *w);
56
57struct nvme_io_channel {
58 struct spdk_nvme_qpair *qpair;
59 struct spdk_poller *poller;
60
61 bool collect_spin_stat;
62 uint64_t spin_ticks;
63 uint64_t start_ticks;
64 uint64_t end_ticks;
65};
66
67struct nvme_bdev_io {
68 /** array of iovecs to transfer. */
69 struct iovec *iovs;
70
71 /** Number of iovecs in iovs array. */
72 int iovcnt;
73
74 /** Current iovec position. */
75 int iovpos;
76
77 /** Offset in current iovec. */
78 uint32_t iov_offset;
79
9f95a23c 80 /** Saved status for admin passthru completion event or PI error verification. */
11fdf7f2
TL
81 struct spdk_nvme_cpl cpl;
82
83 /** Originating thread */
84 struct spdk_thread *orig_thread;
85};
86
11fdf7f2
TL
87struct nvme_probe_ctx {
88 size_t count;
89 struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS];
9f95a23c 90 struct spdk_nvme_host_id hostids[NVME_MAX_CONTROLLERS];
11fdf7f2 91 const char *names[NVME_MAX_CONTROLLERS];
9f95a23c 92 uint32_t prchk_flags[NVME_MAX_CONTROLLERS];
11fdf7f2
TL
93 const char *hostnqn;
94};
95
9f95a23c
TL
96struct nvme_probe_skip_entry {
97 struct spdk_nvme_transport_id trid;
98 TAILQ_ENTRY(nvme_probe_skip_entry) tailq;
99};
100/* All the controllers deleted by users via RPC are skipped by hotplug monitor */
101static TAILQ_HEAD(, nvme_probe_skip_entry) g_skipped_nvme_ctrlrs = TAILQ_HEAD_INITIALIZER(
102 g_skipped_nvme_ctrlrs);
103
11fdf7f2
TL
104static struct spdk_bdev_nvme_opts g_opts = {
105 .action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE,
106 .timeout_us = 0,
107 .retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT,
108 .nvme_adminq_poll_period_us = 1000000ULL,
9f95a23c 109 .nvme_ioq_poll_period_us = 0,
11fdf7f2
TL
110};
111
112#define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
113#define NVME_HOTPLUG_POLL_PERIOD_DEFAULT 100000ULL
114
115static int g_hot_insert_nvme_controller_index = 0;
116static uint64_t g_nvme_hotplug_poll_period_us = NVME_HOTPLUG_POLL_PERIOD_DEFAULT;
117static bool g_nvme_hotplug_enabled = false;
118static struct spdk_thread *g_bdev_nvme_init_thread;
119static struct spdk_poller *g_hotplug_poller;
9f95a23c 120static struct spdk_nvme_probe_ctx *g_hotplug_probe_ctx;
11fdf7f2 121static char *g_nvme_hostnqn = NULL;
11fdf7f2 122
9f95a23c 123static void nvme_ctrlr_create_bdevs(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr);
11fdf7f2
TL
124static int bdev_nvme_library_init(void);
125static void bdev_nvme_library_fini(void);
9f95a23c
TL
126static int bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
127 struct nvme_bdev_io *bio,
128 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba);
129static int bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
130 struct nvme_bdev_io *bio,
131 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba);
132static int bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
133 struct nvme_bdev_io *bio,
134 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba);
11fdf7f2
TL
135static int bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
136 struct nvme_bdev_io *bio,
137 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
138static int bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
139 struct nvme_bdev_io *bio,
140 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes);
141static int bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
142 struct nvme_bdev_io *bio,
143 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len);
9f95a23c 144static int nvme_ctrlr_create_bdev(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid);
11fdf7f2
TL
145
146struct spdk_nvme_qpair *
147spdk_bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch)
148{
149 struct nvme_io_channel *nvme_ch;
150
151 nvme_ch = spdk_io_channel_get_ctx(ctrlr_io_ch);
152
153 return nvme_ch->qpair;
154}
155
11fdf7f2
TL
156static int
157bdev_nvme_get_ctx_size(void)
158{
159 return sizeof(struct nvme_bdev_io);
160}
161
162static struct spdk_bdev_module nvme_if = {
163 .name = "nvme",
164 .module_init = bdev_nvme_library_init,
165 .module_fini = bdev_nvme_library_fini,
166 .config_text = bdev_nvme_get_spdk_running_config,
167 .config_json = bdev_nvme_config_json,
168 .get_ctx_size = bdev_nvme_get_ctx_size,
169
170};
9f95a23c 171SPDK_BDEV_MODULE_REGISTER(nvme, &nvme_if)
11fdf7f2
TL
172
173static int
174bdev_nvme_poll(void *arg)
175{
176 struct nvme_io_channel *ch = arg;
177 int32_t num_completions;
178
179 if (ch->qpair == NULL) {
180 return -1;
181 }
182
183 if (ch->collect_spin_stat && ch->start_ticks == 0) {
184 ch->start_ticks = spdk_get_ticks();
185 }
186
187 num_completions = spdk_nvme_qpair_process_completions(ch->qpair, 0);
188
189 if (ch->collect_spin_stat) {
190 if (num_completions > 0) {
191 if (ch->end_ticks != 0) {
192 ch->spin_ticks += (ch->end_ticks - ch->start_ticks);
193 ch->end_ticks = 0;
194 }
195 ch->start_ticks = 0;
196 } else {
197 ch->end_ticks = spdk_get_ticks();
198 }
199 }
200
201 return num_completions;
202}
203
204static int
205bdev_nvme_poll_adminq(void *arg)
206{
207 struct spdk_nvme_ctrlr *ctrlr = arg;
208
209 return spdk_nvme_ctrlr_process_admin_completions(ctrlr);
210}
211
212static void
213bdev_nvme_unregister_cb(void *io_device)
214{
215 struct spdk_nvme_ctrlr *ctrlr = io_device;
216
217 spdk_nvme_detach(ctrlr);
218}
219
9f95a23c
TL
220static void
221bdev_nvme_ctrlr_destruct(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
222{
223 assert(nvme_bdev_ctrlr->destruct);
224 pthread_mutex_lock(&g_bdev_nvme_mutex);
225 TAILQ_REMOVE(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq);
226 pthread_mutex_unlock(&g_bdev_nvme_mutex);
227 spdk_io_device_unregister(nvme_bdev_ctrlr->ctrlr, bdev_nvme_unregister_cb);
228 spdk_poller_unregister(&nvme_bdev_ctrlr->adminq_timer_poller);
229 free(nvme_bdev_ctrlr->name);
230 free(nvme_bdev_ctrlr->bdevs);
231 free(nvme_bdev_ctrlr);
232}
233
11fdf7f2
TL
234static int
235bdev_nvme_destruct(void *ctx)
236{
237 struct nvme_bdev *nvme_disk = ctx;
9f95a23c 238 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = nvme_disk->nvme_bdev_ctrlr;
11fdf7f2
TL
239
240 pthread_mutex_lock(&g_bdev_nvme_mutex);
9f95a23c 241 nvme_bdev_ctrlr->ref--;
11fdf7f2 242 free(nvme_disk->disk.name);
9f95a23c
TL
243 nvme_disk->active = false;
244 if (nvme_bdev_ctrlr->ref == 0 && nvme_bdev_ctrlr->destruct) {
11fdf7f2 245 pthread_mutex_unlock(&g_bdev_nvme_mutex);
9f95a23c 246 bdev_nvme_ctrlr_destruct(nvme_bdev_ctrlr);
11fdf7f2
TL
247 return 0;
248 }
249
250 pthread_mutex_unlock(&g_bdev_nvme_mutex);
251 return 0;
11fdf7f2
TL
252}
253
254static int
255bdev_nvme_flush(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio,
256 uint64_t offset, uint64_t nbytes)
257{
258 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_SUCCESS);
259
260 return 0;
261}
262
263static void
264_bdev_nvme_reset_done(struct spdk_io_channel_iter *i, int status)
265{
266 void *ctx = spdk_io_channel_iter_get_ctx(i);
267 int rc = SPDK_BDEV_IO_STATUS_SUCCESS;
268
269 if (status) {
270 rc = SPDK_BDEV_IO_STATUS_FAILED;
271 }
272 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(ctx), rc);
273}
274
275static void
276_bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
277{
278 struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i);
279 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
280 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(_ch);
9f95a23c 281 struct spdk_nvme_io_qpair_opts opts;
11fdf7f2 282
9f95a23c
TL
283 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
284 opts.delay_pcie_doorbell = true;
285
286 nvme_ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts));
11fdf7f2
TL
287 if (!nvme_ch->qpair) {
288 spdk_for_each_channel_continue(i, -1);
289 return;
290 }
291
292 spdk_for_each_channel_continue(i, 0);
293}
294
295static void
296_bdev_nvme_reset(struct spdk_io_channel_iter *i, int status)
297{
298 struct spdk_nvme_ctrlr *ctrlr = spdk_io_channel_iter_get_io_device(i);
299 struct nvme_bdev_io *bio = spdk_io_channel_iter_get_ctx(i);
300 int rc;
301
302 if (status) {
303 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
304 return;
305 }
306
307 rc = spdk_nvme_ctrlr_reset(ctrlr);
308 if (rc != 0) {
309 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_FAILED);
310 return;
311 }
312
313 /* Recreate all of the I/O queue pairs */
314 spdk_for_each_channel(ctrlr,
315 _bdev_nvme_reset_create_qpair,
316 bio,
317 _bdev_nvme_reset_done);
318
319
320}
321
322static void
323_bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
324{
325 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
326 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
327 int rc;
328
329 rc = spdk_nvme_ctrlr_free_io_qpair(nvme_ch->qpair);
330 if (!rc) {
331 nvme_ch->qpair = NULL;
332 }
333
334 spdk_for_each_channel_continue(i, rc);
335}
336
337static int
338bdev_nvme_reset(struct nvme_bdev *nbdev, struct nvme_bdev_io *bio)
339{
340 /* First, delete all NVMe I/O queue pairs. */
9f95a23c 341 spdk_for_each_channel(nbdev->nvme_bdev_ctrlr->ctrlr,
11fdf7f2
TL
342 _bdev_nvme_reset_destroy_qpair,
343 bio,
344 _bdev_nvme_reset);
345
346 return 0;
347}
348
349static int
350bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
351 struct nvme_bdev_io *bio,
352 uint64_t offset_blocks,
353 uint64_t num_blocks);
354
355static void
9f95a23c
TL
356bdev_nvme_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
357 bool success)
11fdf7f2
TL
358{
359 int ret;
360
9f95a23c
TL
361 if (!success) {
362 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
363 return;
364 }
365
11fdf7f2
TL
366 ret = bdev_nvme_readv((struct nvme_bdev *)bdev_io->bdev->ctxt,
367 ch,
368 (struct nvme_bdev_io *)bdev_io->driver_ctx,
369 bdev_io->u.bdev.iovs,
370 bdev_io->u.bdev.iovcnt,
9f95a23c 371 bdev_io->u.bdev.md_buf,
11fdf7f2
TL
372 bdev_io->u.bdev.num_blocks,
373 bdev_io->u.bdev.offset_blocks);
374
375 if (spdk_likely(ret == 0)) {
376 return;
377 } else if (ret == -ENOMEM) {
378 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
379 } else {
380 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
381 }
382}
383
384static int
385_bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
386{
387 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
9f95a23c
TL
388 struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
389 struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
390
11fdf7f2
TL
391 if (nvme_ch->qpair == NULL) {
392 /* The device is currently resetting */
393 return -1;
394 }
395
396 switch (bdev_io->type) {
397 case SPDK_BDEV_IO_TYPE_READ:
398 spdk_bdev_io_get_buf(bdev_io, bdev_nvme_get_buf_cb,
399 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
400 return 0;
401
402 case SPDK_BDEV_IO_TYPE_WRITE:
9f95a23c 403 return bdev_nvme_writev(nbdev,
11fdf7f2 404 ch,
9f95a23c 405 nbdev_io,
11fdf7f2
TL
406 bdev_io->u.bdev.iovs,
407 bdev_io->u.bdev.iovcnt,
9f95a23c 408 bdev_io->u.bdev.md_buf,
11fdf7f2
TL
409 bdev_io->u.bdev.num_blocks,
410 bdev_io->u.bdev.offset_blocks);
411
412 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
9f95a23c 413 return bdev_nvme_unmap(nbdev,
11fdf7f2 414 ch,
9f95a23c 415 nbdev_io,
11fdf7f2
TL
416 bdev_io->u.bdev.offset_blocks,
417 bdev_io->u.bdev.num_blocks);
418
419 case SPDK_BDEV_IO_TYPE_UNMAP:
9f95a23c 420 return bdev_nvme_unmap(nbdev,
11fdf7f2 421 ch,
9f95a23c 422 nbdev_io,
11fdf7f2
TL
423 bdev_io->u.bdev.offset_blocks,
424 bdev_io->u.bdev.num_blocks);
425
426 case SPDK_BDEV_IO_TYPE_RESET:
9f95a23c 427 return bdev_nvme_reset(nbdev, nbdev_io);
11fdf7f2
TL
428
429 case SPDK_BDEV_IO_TYPE_FLUSH:
9f95a23c
TL
430 return bdev_nvme_flush(nbdev,
431 nbdev_io,
11fdf7f2
TL
432 bdev_io->u.bdev.offset_blocks,
433 bdev_io->u.bdev.num_blocks);
434
435 case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
9f95a23c 436 return bdev_nvme_admin_passthru(nbdev,
11fdf7f2 437 ch,
9f95a23c 438 nbdev_io,
11fdf7f2
TL
439 &bdev_io->u.nvme_passthru.cmd,
440 bdev_io->u.nvme_passthru.buf,
441 bdev_io->u.nvme_passthru.nbytes);
442
443 case SPDK_BDEV_IO_TYPE_NVME_IO:
9f95a23c 444 return bdev_nvme_io_passthru(nbdev,
11fdf7f2 445 ch,
9f95a23c 446 nbdev_io,
11fdf7f2
TL
447 &bdev_io->u.nvme_passthru.cmd,
448 bdev_io->u.nvme_passthru.buf,
449 bdev_io->u.nvme_passthru.nbytes);
450
451 case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
9f95a23c 452 return bdev_nvme_io_passthru_md(nbdev,
11fdf7f2 453 ch,
9f95a23c 454 nbdev_io,
11fdf7f2
TL
455 &bdev_io->u.nvme_passthru.cmd,
456 bdev_io->u.nvme_passthru.buf,
457 bdev_io->u.nvme_passthru.nbytes,
458 bdev_io->u.nvme_passthru.md_buf,
459 bdev_io->u.nvme_passthru.md_len);
460
461 default:
462 return -EINVAL;
463 }
464 return 0;
465}
466
467static void
468bdev_nvme_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
469{
470 int rc = _bdev_nvme_submit_request(ch, bdev_io);
471
472 if (spdk_unlikely(rc != 0)) {
473 if (rc == -ENOMEM) {
474 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
475 } else {
476 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
477 }
478 }
479}
480
481static bool
482bdev_nvme_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
483{
484 struct nvme_bdev *nbdev = ctx;
485 const struct spdk_nvme_ctrlr_data *cdata;
486
487 switch (io_type) {
488 case SPDK_BDEV_IO_TYPE_READ:
489 case SPDK_BDEV_IO_TYPE_WRITE:
490 case SPDK_BDEV_IO_TYPE_RESET:
491 case SPDK_BDEV_IO_TYPE_FLUSH:
492 case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
493 case SPDK_BDEV_IO_TYPE_NVME_IO:
494 return true;
495
496 case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
497 return spdk_nvme_ns_get_md_size(nbdev->ns) ? true : false;
498
499 case SPDK_BDEV_IO_TYPE_UNMAP:
9f95a23c 500 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
501 return cdata->oncs.dsm;
502
503 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
9f95a23c 504 cdata = spdk_nvme_ctrlr_get_data(nbdev->nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
505 /*
506 * If an NVMe controller guarantees reading unallocated blocks returns zero,
507 * we can implement WRITE_ZEROES as an NVMe deallocate command.
508 */
509 if (cdata->oncs.dsm &&
510 spdk_nvme_ns_get_dealloc_logical_block_read_value(nbdev->ns) == SPDK_NVME_DEALLOC_READ_00) {
511 return true;
512 }
513 /*
514 * The NVMe controller write_zeroes function is currently not used by our driver.
515 * If a user submits an arbitrarily large write_zeroes request to the controller, the request will fail.
516 * Until this is resolved, we only claim support for write_zeroes if deallocated blocks return 0's when read.
517 */
518 return false;
519
520 default:
521 return false;
522 }
523}
524
525static int
526bdev_nvme_create_cb(void *io_device, void *ctx_buf)
527{
528 struct spdk_nvme_ctrlr *ctrlr = io_device;
529 struct nvme_io_channel *ch = ctx_buf;
9f95a23c 530 struct spdk_nvme_io_qpair_opts opts;
11fdf7f2
TL
531
532#ifdef SPDK_CONFIG_VTUNE
533 ch->collect_spin_stat = true;
534#else
535 ch->collect_spin_stat = false;
536#endif
537
9f95a23c
TL
538 spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
539 opts.delay_pcie_doorbell = true;
540
541 ch->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts));
11fdf7f2
TL
542
543 if (ch->qpair == NULL) {
544 return -1;
545 }
546
9f95a23c 547 ch->poller = spdk_poller_register(bdev_nvme_poll, ch, g_opts.nvme_ioq_poll_period_us);
11fdf7f2
TL
548 return 0;
549}
550
551static void
552bdev_nvme_destroy_cb(void *io_device, void *ctx_buf)
553{
554 struct nvme_io_channel *ch = ctx_buf;
555
556 spdk_nvme_ctrlr_free_io_qpair(ch->qpair);
557 spdk_poller_unregister(&ch->poller);
558}
559
560static struct spdk_io_channel *
561bdev_nvme_get_io_channel(void *ctx)
562{
563 struct nvme_bdev *nvme_bdev = ctx;
564
9f95a23c 565 return spdk_get_io_channel(nvme_bdev->nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
566}
567
568static int
569bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
570{
571 struct nvme_bdev *nvme_bdev = ctx;
9f95a23c 572 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = nvme_bdev->nvme_bdev_ctrlr;
11fdf7f2
TL
573 const struct spdk_nvme_ctrlr_data *cdata;
574 struct spdk_nvme_ns *ns;
575 union spdk_nvme_vs_register vs;
576 union spdk_nvme_csts_register csts;
577 char buf[128];
578
9f95a23c
TL
579 cdata = spdk_nvme_ctrlr_get_data(nvme_bdev->nvme_bdev_ctrlr->ctrlr);
580 vs = spdk_nvme_ctrlr_get_regs_vs(nvme_bdev->nvme_bdev_ctrlr->ctrlr);
581 csts = spdk_nvme_ctrlr_get_regs_csts(nvme_bdev->nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
582 ns = nvme_bdev->ns;
583
584 spdk_json_write_named_object_begin(w, "nvme");
585
9f95a23c
TL
586 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
587 spdk_json_write_named_string(w, "pci_address", nvme_bdev_ctrlr->trid.traddr);
11fdf7f2
TL
588 }
589
590 spdk_json_write_named_object_begin(w, "trid");
591
9f95a23c 592 nvme_bdev_dump_trid_json(&nvme_bdev_ctrlr->trid, w);
11fdf7f2
TL
593
594 spdk_json_write_object_end(w);
595
596 spdk_json_write_named_object_begin(w, "ctrlr_data");
597
598 spdk_json_write_named_string_fmt(w, "vendor_id", "0x%04x", cdata->vid);
599
600 snprintf(buf, sizeof(cdata->mn) + 1, "%s", cdata->mn);
601 spdk_str_trim(buf);
602 spdk_json_write_named_string(w, "model_number", buf);
603
604 snprintf(buf, sizeof(cdata->sn) + 1, "%s", cdata->sn);
605 spdk_str_trim(buf);
606 spdk_json_write_named_string(w, "serial_number", buf);
607
608 snprintf(buf, sizeof(cdata->fr) + 1, "%s", cdata->fr);
609 spdk_str_trim(buf);
610 spdk_json_write_named_string(w, "firmware_revision", buf);
611
612 spdk_json_write_named_object_begin(w, "oacs");
613
614 spdk_json_write_named_uint32(w, "security", cdata->oacs.security);
615 spdk_json_write_named_uint32(w, "format", cdata->oacs.format);
616 spdk_json_write_named_uint32(w, "firmware", cdata->oacs.firmware);
617 spdk_json_write_named_uint32(w, "ns_manage", cdata->oacs.ns_manage);
618
619 spdk_json_write_object_end(w);
620
621 spdk_json_write_object_end(w);
622
623 spdk_json_write_named_object_begin(w, "vs");
624
625 spdk_json_write_name(w, "nvme_version");
626 if (vs.bits.ter) {
627 spdk_json_write_string_fmt(w, "%u.%u.%u", vs.bits.mjr, vs.bits.mnr, vs.bits.ter);
628 } else {
629 spdk_json_write_string_fmt(w, "%u.%u", vs.bits.mjr, vs.bits.mnr);
630 }
631
632 spdk_json_write_object_end(w);
633
634 spdk_json_write_named_object_begin(w, "csts");
635
636 spdk_json_write_named_uint32(w, "rdy", csts.bits.rdy);
637 spdk_json_write_named_uint32(w, "cfs", csts.bits.cfs);
638
639 spdk_json_write_object_end(w);
640
641 spdk_json_write_named_object_begin(w, "ns_data");
642
643 spdk_json_write_named_uint32(w, "id", spdk_nvme_ns_get_id(ns));
644
645 spdk_json_write_object_end(w);
646
647 spdk_json_write_object_end(w);
648
649 return 0;
650}
651
652static void
653bdev_nvme_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
654{
655 /* No config per bdev needed */
656}
657
658static uint64_t
659bdev_nvme_get_spin_time(struct spdk_io_channel *ch)
660{
661 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
662 uint64_t spin_time;
663
664 if (!nvme_ch->collect_spin_stat) {
665 return 0;
666 }
667
668 if (nvme_ch->end_ticks != 0) {
669 nvme_ch->spin_ticks += (nvme_ch->end_ticks - nvme_ch->start_ticks);
670 nvme_ch->end_ticks = 0;
671 }
672
673 spin_time = (nvme_ch->spin_ticks * 1000000ULL) / spdk_get_ticks_hz();
674 nvme_ch->start_ticks = 0;
675 nvme_ch->spin_ticks = 0;
676
677 return spin_time;
678}
679
680static const struct spdk_bdev_fn_table nvmelib_fn_table = {
681 .destruct = bdev_nvme_destruct,
682 .submit_request = bdev_nvme_submit_request,
683 .io_type_supported = bdev_nvme_io_type_supported,
684 .get_io_channel = bdev_nvme_get_io_channel,
685 .dump_info_json = bdev_nvme_dump_info_json,
686 .write_config_json = bdev_nvme_write_config_json,
687 .get_spin_time = bdev_nvme_get_spin_time,
688};
689
690static int
9f95a23c 691nvme_ctrlr_create_bdev(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, uint32_t nsid)
11fdf7f2 692{
9f95a23c 693 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr;
11fdf7f2
TL
694 struct nvme_bdev *bdev;
695 struct spdk_nvme_ns *ns;
696 const struct spdk_uuid *uuid;
697 const struct spdk_nvme_ctrlr_data *cdata;
9f95a23c 698 const struct spdk_nvme_ns_data *nsdata;
11fdf7f2
TL
699 int rc;
700
701 cdata = spdk_nvme_ctrlr_get_data(ctrlr);
702
703 ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
704 if (!ns) {
705 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Invalid NS %d\n", nsid);
706 return -EINVAL;
707 }
708
9f95a23c 709 bdev = &nvme_bdev_ctrlr->bdevs[nsid - 1];
11fdf7f2
TL
710 bdev->id = nsid;
711
9f95a23c 712 bdev->nvme_bdev_ctrlr = nvme_bdev_ctrlr;
11fdf7f2 713 bdev->ns = ns;
9f95a23c 714 nvme_bdev_ctrlr->ref++;
11fdf7f2 715
9f95a23c 716 bdev->disk.name = spdk_sprintf_alloc("%sn%d", nvme_bdev_ctrlr->name, spdk_nvme_ns_get_id(ns));
11fdf7f2 717 if (!bdev->disk.name) {
9f95a23c 718 nvme_bdev_ctrlr->ref--;
11fdf7f2
TL
719 memset(bdev, 0, sizeof(*bdev));
720 return -ENOMEM;
721 }
722 bdev->disk.product_name = "NVMe disk";
723
724 bdev->disk.write_cache = 0;
725 if (cdata->vwc.present) {
726 /* Enable if the Volatile Write Cache exists */
727 bdev->disk.write_cache = 1;
728 }
729 bdev->disk.blocklen = spdk_nvme_ns_get_extended_sector_size(ns);
730 bdev->disk.blockcnt = spdk_nvme_ns_get_num_sectors(ns);
731 bdev->disk.optimal_io_boundary = spdk_nvme_ns_get_optimal_io_boundary(ns);
732
733 uuid = spdk_nvme_ns_get_uuid(ns);
734 if (uuid != NULL) {
735 bdev->disk.uuid = *uuid;
736 }
737
9f95a23c
TL
738 bdev->disk.md_len = spdk_nvme_ns_get_md_size(ns);
739 if (bdev->disk.md_len != 0) {
740 nsdata = spdk_nvme_ns_get_data(ns);
741 bdev->disk.md_interleave = nsdata->flbas.extended;
742 bdev->disk.dif_type = (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns);
743 if (bdev->disk.dif_type != SPDK_DIF_DISABLE) {
744 bdev->disk.dif_is_head_of_md = nsdata->dps.md_start;
745 bdev->disk.dif_check_flags = nvme_bdev_ctrlr->prchk_flags;
746 }
747 }
748
11fdf7f2
TL
749 bdev->disk.ctxt = bdev;
750 bdev->disk.fn_table = &nvmelib_fn_table;
751 bdev->disk.module = &nvme_if;
752 rc = spdk_bdev_register(&bdev->disk);
753 if (rc) {
754 free(bdev->disk.name);
9f95a23c 755 nvme_bdev_ctrlr->ref--;
11fdf7f2
TL
756 memset(bdev, 0, sizeof(*bdev));
757 return rc;
758 }
759 bdev->active = true;
760
761 return 0;
762}
763
764
765static bool
766hotplug_probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
767 struct spdk_nvme_ctrlr_opts *opts)
768{
9f95a23c 769 struct nvme_probe_skip_entry *entry;
11fdf7f2 770
9f95a23c
TL
771 TAILQ_FOREACH(entry, &g_skipped_nvme_ctrlrs, tailq) {
772 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
773 return false;
11fdf7f2
TL
774 }
775 }
776
9f95a23c 777 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attaching to %s\n", trid->traddr);
11fdf7f2 778
9f95a23c 779 return true;
11fdf7f2
TL
780}
781
782static bool
783probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
784 struct spdk_nvme_ctrlr_opts *opts)
785{
786 struct nvme_probe_ctx *ctx = cb_ctx;
787
788 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Probing device %s\n", trid->traddr);
789
9f95a23c 790 if (nvme_bdev_ctrlr_get(trid)) {
11fdf7f2
TL
791 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
792 trid->traddr);
793 return false;
794 }
795
796 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
797 bool claim_device = false;
798 size_t i;
799
800 for (i = 0; i < ctx->count; i++) {
801 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
802 claim_device = true;
803 break;
804 }
805 }
806
807 if (!claim_device) {
808 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Not claiming device at %s\n", trid->traddr);
809 return false;
810 }
811 }
812
813 if (ctx->hostnqn) {
814 snprintf(opts->hostnqn, sizeof(opts->hostnqn), "%s", ctx->hostnqn);
815 }
816
817 return true;
818}
819
820static void
821spdk_nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
822{
823 struct spdk_nvme_ctrlr *ctrlr = ctx;
824 int rc;
825
826 if (spdk_nvme_cpl_is_error(cpl)) {
827 SPDK_WARNLOG("Abort failed. Resetting controller.\n");
828 rc = spdk_nvme_ctrlr_reset(ctrlr);
829 if (rc) {
830 SPDK_ERRLOG("Resetting controller failed.\n");
831 }
832 }
833}
834
835static void
836timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
837 struct spdk_nvme_qpair *qpair, uint16_t cid)
838{
839 int rc;
840 union spdk_nvme_csts_register csts;
841
842 SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
843
844 csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
845 if (csts.bits.cfs) {
846 SPDK_ERRLOG("Controller Fatal Status, reset required\n");
847 rc = spdk_nvme_ctrlr_reset(ctrlr);
848 if (rc) {
849 SPDK_ERRLOG("Resetting controller failed.\n");
850 }
851 return;
852 }
853
854 switch (g_opts.action_on_timeout) {
855 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
856 if (qpair) {
857 rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
858 spdk_nvme_abort_cpl, ctrlr);
859 if (rc == 0) {
860 return;
861 }
862
863 SPDK_ERRLOG("Unable to send abort. Resetting.\n");
864 }
865
866 /* FALLTHROUGH */
867 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
868 rc = spdk_nvme_ctrlr_reset(ctrlr);
869 if (rc) {
870 SPDK_ERRLOG("Resetting controller failed.\n");
871 }
872 break;
873 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
9f95a23c
TL
874 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "No action for nvme controller timeout.\n");
875 break;
876 default:
877 SPDK_ERRLOG("An invalid timeout action value is found.\n");
11fdf7f2
TL
878 break;
879 }
880}
881
882static void
883nvme_ctrlr_deactivate_bdev(struct nvme_bdev *bdev)
884{
885 spdk_bdev_unregister(&bdev->disk, NULL, NULL);
886 bdev->active = false;
887}
888
889static void
9f95a23c 890nvme_ctrlr_update_ns_bdevs(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
11fdf7f2 891{
9f95a23c 892 struct spdk_nvme_ctrlr *ctrlr = nvme_bdev_ctrlr->ctrlr;
11fdf7f2
TL
893 uint32_t i;
894 struct nvme_bdev *bdev;
895
9f95a23c 896 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
11fdf7f2
TL
897 uint32_t nsid = i + 1;
898
9f95a23c 899 bdev = &nvme_bdev_ctrlr->bdevs[i];
11fdf7f2
TL
900 if (!bdev->active && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
901 SPDK_NOTICELOG("NSID %u to be added\n", nsid);
9f95a23c 902 nvme_ctrlr_create_bdev(nvme_bdev_ctrlr, nsid);
11fdf7f2
TL
903 }
904
905 if (bdev->active && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
906 SPDK_NOTICELOG("NSID %u Bdev %s is removed\n", nsid, bdev->disk.name);
907 nvme_ctrlr_deactivate_bdev(bdev);
908 }
909 }
910
911}
912
913static void
914aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
915{
9f95a23c 916 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = arg;
11fdf7f2
TL
917 union spdk_nvme_async_event_completion event;
918
919 if (spdk_nvme_cpl_is_error(cpl)) {
920 SPDK_WARNLOG("AER request execute failed");
921 return;
922 }
923
924 event.raw = cpl->cdw0;
925 if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
926 (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
9f95a23c 927 nvme_ctrlr_update_ns_bdevs(nvme_bdev_ctrlr);
11fdf7f2
TL
928 }
929}
930
931static int
932create_ctrlr(struct spdk_nvme_ctrlr *ctrlr,
933 const char *name,
9f95a23c
TL
934 const struct spdk_nvme_transport_id *trid,
935 uint32_t prchk_flags)
11fdf7f2 936{
9f95a23c 937 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
11fdf7f2 938
9f95a23c
TL
939 nvme_bdev_ctrlr = calloc(1, sizeof(*nvme_bdev_ctrlr));
940 if (nvme_bdev_ctrlr == NULL) {
11fdf7f2
TL
941 SPDK_ERRLOG("Failed to allocate device struct\n");
942 return -ENOMEM;
943 }
9f95a23c
TL
944 nvme_bdev_ctrlr->num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
945 nvme_bdev_ctrlr->bdevs = calloc(nvme_bdev_ctrlr->num_ns, sizeof(struct nvme_bdev));
946 if (!nvme_bdev_ctrlr->bdevs) {
11fdf7f2 947 SPDK_ERRLOG("Failed to allocate block devices struct\n");
9f95a23c 948 free(nvme_bdev_ctrlr);
11fdf7f2
TL
949 return -ENOMEM;
950 }
951
9f95a23c
TL
952 nvme_bdev_ctrlr->adminq_timer_poller = NULL;
953 nvme_bdev_ctrlr->ctrlr = ctrlr;
954 nvme_bdev_ctrlr->ref = 0;
955 nvme_bdev_ctrlr->trid = *trid;
956 nvme_bdev_ctrlr->name = strdup(name);
957 if (nvme_bdev_ctrlr->name == NULL) {
958 free(nvme_bdev_ctrlr->bdevs);
959 free(nvme_bdev_ctrlr);
11fdf7f2
TL
960 return -ENOMEM;
961 }
9f95a23c 962 nvme_bdev_ctrlr->prchk_flags = prchk_flags;
11fdf7f2
TL
963
964 spdk_io_device_register(ctrlr, bdev_nvme_create_cb, bdev_nvme_destroy_cb,
965 sizeof(struct nvme_io_channel),
966 name);
967
9f95a23c 968 nvme_ctrlr_create_bdevs(nvme_bdev_ctrlr);
11fdf7f2 969
9f95a23c
TL
970 nvme_bdev_ctrlr->adminq_timer_poller = spdk_poller_register(bdev_nvme_poll_adminq, ctrlr,
971 g_opts.nvme_adminq_poll_period_us);
11fdf7f2 972
9f95a23c 973 TAILQ_INSERT_TAIL(&g_nvme_bdev_ctrlrs, nvme_bdev_ctrlr, tailq);
11fdf7f2 974
9f95a23c 975 if (g_opts.timeout_us > 0) {
11fdf7f2
TL
976 spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_opts.timeout_us,
977 timeout_cb, NULL);
978 }
979
9f95a23c 980 spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_bdev_ctrlr);
11fdf7f2
TL
981
982 return 0;
983}
984
985static void
986attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
987 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
988{
989 struct nvme_probe_ctx *ctx = cb_ctx;
990 char *name = NULL;
9f95a23c 991 uint32_t prchk_flags = 0;
11fdf7f2
TL
992 size_t i;
993
994 if (ctx) {
995 for (i = 0; i < ctx->count; i++) {
996 if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
9f95a23c 997 prchk_flags = ctx->prchk_flags[i];
11fdf7f2
TL
998 name = strdup(ctx->names[i]);
999 break;
1000 }
1001 }
1002 } else {
1003 name = spdk_sprintf_alloc("HotInNvme%d", g_hot_insert_nvme_controller_index++);
1004 }
1005 if (!name) {
1006 SPDK_ERRLOG("Failed to assign name to NVMe device\n");
1007 return;
1008 }
1009
1010 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "Attached to %s (%s)\n", trid->traddr, name);
1011
9f95a23c 1012 create_ctrlr(ctrlr, name, trid, prchk_flags);
11fdf7f2
TL
1013
1014 free(name);
1015}
1016
1017static void
1018remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr)
1019{
1020 uint32_t i;
9f95a23c 1021 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
11fdf7f2
TL
1022 struct nvme_bdev *nvme_bdev;
1023
1024 pthread_mutex_lock(&g_bdev_nvme_mutex);
9f95a23c
TL
1025 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
1026 if (nvme_bdev_ctrlr->ctrlr == ctrlr) {
1027 /* The controller's destruction was already started */
1028 if (nvme_bdev_ctrlr->destruct) {
1029 pthread_mutex_unlock(&g_bdev_nvme_mutex);
1030 return;
1031 }
11fdf7f2 1032 pthread_mutex_unlock(&g_bdev_nvme_mutex);
9f95a23c 1033 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
11fdf7f2
TL
1034 uint32_t nsid = i + 1;
1035
9f95a23c 1036 nvme_bdev = &nvme_bdev_ctrlr->bdevs[nsid - 1];
11fdf7f2 1037 if (nvme_bdev->active) {
9f95a23c 1038 assert(nvme_bdev->id == nsid);
11fdf7f2
TL
1039 spdk_bdev_unregister(&nvme_bdev->disk, NULL, NULL);
1040 }
1041 }
9f95a23c
TL
1042
1043 pthread_mutex_lock(&g_bdev_nvme_mutex);
1044 nvme_bdev_ctrlr->destruct = true;
1045 if (nvme_bdev_ctrlr->ref == 0) {
1046 pthread_mutex_unlock(&g_bdev_nvme_mutex);
1047 bdev_nvme_ctrlr_destruct(nvme_bdev_ctrlr);
1048 } else {
1049 pthread_mutex_unlock(&g_bdev_nvme_mutex);
1050 }
11fdf7f2
TL
1051 return;
1052 }
1053 }
1054 pthread_mutex_unlock(&g_bdev_nvme_mutex);
1055}
1056
1057static int
1058bdev_nvme_hotplug(void *arg)
1059{
9f95a23c
TL
1060 struct spdk_nvme_transport_id trid_pcie;
1061 int done;
1062
1063 if (!g_hotplug_probe_ctx) {
1064 memset(&trid_pcie, 0, sizeof(trid_pcie));
1065 trid_pcie.trtype = SPDK_NVME_TRANSPORT_PCIE;
1066
1067 g_hotplug_probe_ctx = spdk_nvme_probe_async(&trid_pcie, NULL,
1068 hotplug_probe_cb,
1069 attach_cb, remove_cb);
1070 if (!g_hotplug_probe_ctx) {
1071 return -1;
1072 }
1073 }
1074
1075 done = spdk_nvme_probe_poll_async(g_hotplug_probe_ctx);
1076 if (done != -EAGAIN) {
1077 g_hotplug_probe_ctx = NULL;
1078 return 1;
11fdf7f2
TL
1079 }
1080
1081 return -1;
1082}
1083
1084void
1085spdk_bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
1086{
1087 *opts = g_opts;
1088}
1089
1090int
1091spdk_bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts)
1092{
1093 if (g_bdev_nvme_init_thread != NULL) {
1094 return -EPERM;
1095 }
1096
1097 g_opts = *opts;
1098
1099 return 0;
1100}
1101struct set_nvme_hotplug_ctx {
1102 uint64_t period_us;
1103 bool enabled;
9f95a23c 1104 spdk_msg_fn fn;
11fdf7f2
TL
1105 void *fn_ctx;
1106};
1107
1108static void
1109set_nvme_hotplug_period_cb(void *_ctx)
1110{
1111 struct set_nvme_hotplug_ctx *ctx = _ctx;
1112
1113 spdk_poller_unregister(&g_hotplug_poller);
1114 if (ctx->enabled) {
1115 g_hotplug_poller = spdk_poller_register(bdev_nvme_hotplug, NULL, ctx->period_us);
1116 }
1117
1118 g_nvme_hotplug_poll_period_us = ctx->period_us;
1119 g_nvme_hotplug_enabled = ctx->enabled;
1120 if (ctx->fn) {
1121 ctx->fn(ctx->fn_ctx);
1122 }
1123
1124 free(ctx);
1125}
1126
1127int
9f95a23c 1128spdk_bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx)
11fdf7f2
TL
1129{
1130 struct set_nvme_hotplug_ctx *ctx;
1131
1132 if (enabled == true && !spdk_process_is_primary()) {
1133 return -EPERM;
1134 }
1135
1136 ctx = calloc(1, sizeof(*ctx));
1137 if (ctx == NULL) {
1138 return -ENOMEM;
1139 }
1140
1141 period_us = period_us == 0 ? NVME_HOTPLUG_POLL_PERIOD_DEFAULT : period_us;
1142 ctx->period_us = spdk_min(period_us, NVME_HOTPLUG_POLL_PERIOD_MAX);
1143 ctx->enabled = enabled;
1144 ctx->fn = cb;
1145 ctx->fn_ctx = cb_ctx;
1146
1147 spdk_thread_send_msg(g_bdev_nvme_init_thread, set_nvme_hotplug_period_cb, ctx);
1148 return 0;
1149}
1150
9f95a23c
TL
1151static int
1152bdev_nvme_create_and_get_bdev_names(struct spdk_nvme_ctrlr *ctrlr,
1153 const char *base_name,
1154 const char **names, size_t *count,
1155 const struct spdk_nvme_transport_id *trid,
1156 uint32_t prchk_flags)
11fdf7f2 1157{
9f95a23c 1158 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
11fdf7f2
TL
1159 struct nvme_bdev *nvme_bdev;
1160 uint32_t i, nsid;
1161 size_t j;
1162
9f95a23c
TL
1163 if (create_ctrlr(ctrlr, base_name, trid, prchk_flags)) {
1164 SPDK_ERRLOG("Failed to create new device\n");
11fdf7f2
TL
1165 return -1;
1166 }
1167
9f95a23c
TL
1168 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get(trid);
1169 if (!nvme_bdev_ctrlr) {
11fdf7f2 1170 SPDK_ERRLOG("Failed to find new NVMe controller\n");
11fdf7f2
TL
1171 return -1;
1172 }
1173
1174 /*
1175 * Report the new bdevs that were created in this call.
1176 * There can be more than one bdev per NVMe controller since one bdev is created per namespace.
1177 */
1178 j = 0;
9f95a23c 1179 for (i = 0; i < nvme_bdev_ctrlr->num_ns; i++) {
11fdf7f2 1180 nsid = i + 1;
9f95a23c 1181 nvme_bdev = &nvme_bdev_ctrlr->bdevs[nsid - 1];
11fdf7f2
TL
1182 if (!nvme_bdev->active) {
1183 continue;
1184 }
1185 assert(nvme_bdev->id == nsid);
1186 if (j < *count) {
1187 names[j] = nvme_bdev->disk.name;
1188 j++;
1189 } else {
1190 SPDK_ERRLOG("Maximum number of namespaces supported per NVMe controller is %zu. Unable to return all names of created bdevs\n",
1191 *count);
11fdf7f2
TL
1192 return -1;
1193 }
1194 }
1195
1196 *count = j;
1197
9f95a23c
TL
1198 return 0;
1199}
1200
1201struct nvme_async_probe_ctx {
1202 struct spdk_nvme_probe_ctx *probe_ctx;
1203 const char *base_name;
1204 const char **names;
1205 size_t *count;
1206 uint32_t prchk_flags;
1207 struct spdk_poller *poller;
1208 struct spdk_nvme_transport_id trid;
1209 struct spdk_nvme_ctrlr_opts opts;
1210 spdk_bdev_create_nvme_fn cb_fn;
1211 void *cb_ctx;
1212};
1213
1214static void
1215connect_attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
1216 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
1217{
1218 struct spdk_nvme_ctrlr_opts *user_opts = cb_ctx;
1219 struct nvme_async_probe_ctx *ctx;
1220 int rc;
1221
1222 ctx = SPDK_CONTAINEROF(user_opts, struct nvme_async_probe_ctx, opts);
1223 rc = bdev_nvme_create_and_get_bdev_names(ctrlr,
1224 ctx->base_name,
1225 ctx->names, ctx->count,
1226 &ctx->trid,
1227 ctx->prchk_flags);
1228
1229 if (ctx->cb_fn) {
1230 ctx->cb_fn(ctx->cb_ctx, rc);
1231 }
1232}
1233
1234static int
1235bdev_nvme_async_poll(void *arg)
1236{
1237 struct nvme_async_probe_ctx *ctx = arg;
1238 int done;
1239
1240 done = spdk_nvme_probe_poll_async(ctx->probe_ctx);
1241 /* retry again */
1242 if (done == -EAGAIN) {
1243 return 1;
1244 }
1245 spdk_poller_unregister(&ctx->poller);
1246 free(ctx);
1247 return 1;
1248}
1249
1250int
1251spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
1252 struct spdk_nvme_host_id *hostid,
1253 const char *base_name,
1254 const char **names, size_t *count,
1255 const char *hostnqn,
1256 uint32_t prchk_flags,
1257 spdk_bdev_create_nvme_fn cb_fn,
1258 void *cb_ctx)
1259{
1260 struct nvme_probe_skip_entry *entry, *tmp;
1261 struct nvme_async_probe_ctx *ctx;
1262
1263 if (nvme_bdev_ctrlr_get(trid) != NULL) {
1264 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
1265 return -1;
1266 }
1267
1268 if (nvme_bdev_ctrlr_get_by_name(base_name)) {
1269 SPDK_ERRLOG("A controller with the provided name (%s) already exists.\n", base_name);
1270 return -1;
1271 }
1272
1273 if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
1274 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
1275 if (spdk_nvme_transport_id_compare(trid, &entry->trid) == 0) {
1276 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
1277 free(entry);
1278 break;
1279 }
1280 }
1281 }
1282
1283 ctx = calloc(1, sizeof(*ctx));
1284 if (!ctx) {
1285 return -ENOMEM;
1286 }
1287 ctx->base_name = base_name;
1288 ctx->names = names;
1289 ctx->count = count;
1290 ctx->cb_fn = cb_fn;
1291 ctx->cb_ctx = cb_ctx;
1292 ctx->prchk_flags = prchk_flags;
1293 ctx->trid = *trid;
1294
1295 spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->opts, sizeof(ctx->opts));
1296
1297 if (hostnqn) {
1298 snprintf(ctx->opts.hostnqn, sizeof(ctx->opts.hostnqn), "%s", hostnqn);
1299 }
1300
1301 if (hostid->hostaddr[0] != '\0') {
1302 snprintf(ctx->opts.src_addr, sizeof(ctx->opts.src_addr), "%s", hostid->hostaddr);
1303 }
1304
1305 if (hostid->hostsvcid[0] != '\0') {
1306 snprintf(ctx->opts.src_svcid, sizeof(ctx->opts.src_svcid), "%s", hostid->hostsvcid);
1307 }
1308
1309 ctx->probe_ctx = spdk_nvme_connect_async(trid, &ctx->opts, connect_attach_cb);
1310 if (ctx->probe_ctx == NULL) {
1311 SPDK_ERRLOG("No controller was found with provided trid (traddr: %s)\n", trid->traddr);
1312 free(ctx);
1313 return -1;
1314 }
1315 ctx->poller = spdk_poller_register(bdev_nvme_async_poll, ctx, 1000);
1316
11fdf7f2
TL
1317 return 0;
1318}
1319
1320int
1321spdk_bdev_nvme_delete(const char *name)
1322{
9f95a23c
TL
1323 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr = NULL;
1324 struct nvme_probe_skip_entry *entry;
11fdf7f2
TL
1325
1326 if (name == NULL) {
1327 return -EINVAL;
1328 }
1329
9f95a23c
TL
1330 nvme_bdev_ctrlr = nvme_bdev_ctrlr_get_by_name(name);
1331 if (nvme_bdev_ctrlr == NULL) {
11fdf7f2
TL
1332 SPDK_ERRLOG("Failed to find NVMe controller\n");
1333 return -ENODEV;
1334 }
1335
9f95a23c
TL
1336 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1337 entry = calloc(1, sizeof(*entry));
1338 if (!entry) {
1339 return -ENOMEM;
1340 }
1341 entry->trid = nvme_bdev_ctrlr->trid;
1342 TAILQ_INSERT_TAIL(&g_skipped_nvme_ctrlrs, entry, tailq);
1343 }
1344
1345 remove_cb(NULL, nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
1346 return 0;
1347}
1348
1349static int
1350bdev_nvme_library_init(void)
1351{
1352 struct spdk_conf_section *sp;
1353 const char *val;
1354 int rc = 0;
1355 int64_t intval = 0;
1356 size_t i;
1357 struct nvme_probe_ctx *probe_ctx = NULL;
1358 int retry_count;
1359 uint32_t local_nvme_num = 0;
1360 int64_t hotplug_period;
1361 bool hotplug_enabled = g_nvme_hotplug_enabled;
1362
1363 g_bdev_nvme_init_thread = spdk_get_thread();
1364
1365 sp = spdk_conf_find_section(NULL, "Nvme");
1366 if (sp == NULL) {
1367 goto end;
1368 }
1369
1370 probe_ctx = calloc(1, sizeof(*probe_ctx));
1371 if (probe_ctx == NULL) {
1372 SPDK_ERRLOG("Failed to allocate probe_ctx\n");
1373 rc = -1;
1374 goto end;
1375 }
1376
1377 if ((retry_count = spdk_conf_section_get_intval(sp, "RetryCount")) < 0) {
1378 if ((retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount")) < 0) {
1379 retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
1380 } else {
1381 SPDK_WARNLOG("NvmeRetryCount was renamed to RetryCount\n");
1382 SPDK_WARNLOG("Please update your configuration file\n");
1383 }
1384 }
1385
1386 g_opts.retry_count = retry_count;
1387
1388 val = spdk_conf_section_get_val(sp, "TimeoutUsec");
1389 if (val != NULL) {
9f95a23c
TL
1390 intval = spdk_strtoll(val, 10);
1391 if (intval < 0) {
11fdf7f2
TL
1392 SPDK_ERRLOG("Invalid TimeoutUsec value\n");
1393 rc = -1;
1394 goto end;
11fdf7f2
TL
1395 }
1396 }
1397
1398 g_opts.timeout_us = intval;
1399
1400 if (g_opts.timeout_us > 0) {
1401 val = spdk_conf_section_get_val(sp, "ActionOnTimeout");
1402 if (val != NULL) {
1403 if (!strcasecmp(val, "Reset")) {
1404 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
1405 } else if (!strcasecmp(val, "Abort")) {
1406 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT;
1407 }
1408 } else {
1409 /* Handle old name for backward compatibility */
1410 val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout");
1411 if (val) {
1412 SPDK_WARNLOG("ResetControllerOnTimeout was renamed to ActionOnTimeout\n");
1413 SPDK_WARNLOG("Please update your configuration file\n");
1414
1415 if (spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false)) {
1416 g_opts.action_on_timeout = SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET;
1417 }
1418 }
1419 }
1420 }
1421
1422 intval = spdk_conf_section_get_intval(sp, "AdminPollRate");
1423 if (intval > 0) {
1424 g_opts.nvme_adminq_poll_period_us = intval;
1425 }
1426
9f95a23c
TL
1427 intval = spdk_conf_section_get_intval(sp, "IOPollRate");
1428 if (intval > 0) {
1429 g_opts.nvme_ioq_poll_period_us = intval;
1430 }
1431
11fdf7f2
TL
1432 if (spdk_process_is_primary()) {
1433 hotplug_enabled = spdk_conf_section_get_boolval(sp, "HotplugEnable", false);
1434 }
1435
1436 hotplug_period = spdk_conf_section_get_intval(sp, "HotplugPollRate");
9f95a23c
TL
1437 if (hotplug_period < 0) {
1438 hotplug_period = 0;
1439 }
11fdf7f2
TL
1440
1441 g_nvme_hostnqn = spdk_conf_section_get_val(sp, "HostNQN");
1442 probe_ctx->hostnqn = g_nvme_hostnqn;
1443
1444 for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
1445 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0);
1446 if (val == NULL) {
1447 break;
1448 }
1449
1450 rc = spdk_nvme_transport_id_parse(&probe_ctx->trids[i], val);
1451 if (rc < 0) {
1452 SPDK_ERRLOG("Unable to parse TransportID: %s\n", val);
1453 rc = -1;
1454 goto end;
1455 }
1456
9f95a23c
TL
1457 rc = spdk_nvme_host_id_parse(&probe_ctx->hostids[i], val);
1458 if (rc < 0) {
1459 SPDK_ERRLOG("Unable to parse HostID: %s\n", val);
1460 rc = -1;
1461 goto end;
1462 }
1463
11fdf7f2
TL
1464 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 1);
1465 if (val == NULL) {
1466 SPDK_ERRLOG("No name provided for TransportID\n");
1467 rc = -1;
1468 goto end;
1469 }
1470
1471 probe_ctx->names[i] = val;
9f95a23c
TL
1472
1473 val = spdk_conf_section_get_nmval(sp, "TransportID", i, 2);
1474 if (val != NULL) {
1475 rc = spdk_nvme_prchk_flags_parse(&probe_ctx->prchk_flags[i], val);
1476 if (rc < 0) {
1477 SPDK_ERRLOG("Unable to parse prchk: %s\n", val);
1478 rc = -1;
1479 goto end;
1480 }
1481 }
1482
11fdf7f2
TL
1483 probe_ctx->count++;
1484
1485 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
1486 struct spdk_nvme_ctrlr *ctrlr;
1487 struct spdk_nvme_ctrlr_opts opts;
1488
9f95a23c 1489 if (nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) {
11fdf7f2
TL
1490 SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n",
1491 probe_ctx->trids[i].traddr);
1492 rc = -1;
1493 goto end;
1494 }
1495
1496 if (probe_ctx->trids[i].subnqn[0] == '\0') {
1497 SPDK_ERRLOG("Need to provide subsystem nqn\n");
1498 rc = -1;
1499 goto end;
1500 }
1501
1502 spdk_nvme_ctrlr_get_default_ctrlr_opts(&opts, sizeof(opts));
1503
1504 if (probe_ctx->hostnqn != NULL) {
1505 snprintf(opts.hostnqn, sizeof(opts.hostnqn), "%s", probe_ctx->hostnqn);
1506 }
1507
9f95a23c
TL
1508 if (probe_ctx->hostids[i].hostaddr[0] != '\0') {
1509 snprintf(opts.src_addr, sizeof(opts.src_addr), "%s", probe_ctx->hostids[i].hostaddr);
1510 }
1511
1512 if (probe_ctx->hostids[i].hostsvcid[0] != '\0') {
1513 snprintf(opts.src_svcid, sizeof(opts.src_svcid), "%s", probe_ctx->hostids[i].hostsvcid);
1514 }
1515
11fdf7f2
TL
1516 ctrlr = spdk_nvme_connect(&probe_ctx->trids[i], &opts, sizeof(opts));
1517 if (ctrlr == NULL) {
1518 SPDK_ERRLOG("Unable to connect to provided trid (traddr: %s)\n",
1519 probe_ctx->trids[i].traddr);
1520 rc = -1;
1521 goto end;
1522 }
1523
9f95a23c 1524 rc = create_ctrlr(ctrlr, probe_ctx->names[i], &probe_ctx->trids[i], 0);
11fdf7f2
TL
1525 if (rc) {
1526 goto end;
1527 }
1528 } else {
1529 local_nvme_num++;
1530 }
1531 }
1532
1533 if (local_nvme_num > 0) {
1534 /* used to probe local NVMe device */
1535 if (spdk_nvme_probe(NULL, probe_ctx, probe_cb, attach_cb, NULL)) {
1536 rc = -1;
1537 goto end;
1538 }
1539
1540 for (i = 0; i < probe_ctx->count; i++) {
1541 if (probe_ctx->trids[i].trtype != SPDK_NVME_TRANSPORT_PCIE) {
1542 continue;
1543 }
1544
9f95a23c 1545 if (!nvme_bdev_ctrlr_get(&probe_ctx->trids[i])) {
11fdf7f2
TL
1546 SPDK_ERRLOG("NVMe SSD \"%s\" could not be found.\n", probe_ctx->trids[i].traddr);
1547 SPDK_ERRLOG("Check PCIe BDF and that it is attached to UIO/VFIO driver.\n");
1548 }
1549 }
1550 }
1551
1552 rc = spdk_bdev_nvme_set_hotplug(hotplug_enabled, hotplug_period, NULL, NULL);
1553 if (rc) {
1554 SPDK_ERRLOG("Failed to setup hotplug (%d): %s", rc, spdk_strerror(rc));
1555 rc = -1;
1556 }
1557end:
1558 spdk_nvme_retry_count = g_opts.retry_count;
1559
1560 free(probe_ctx);
1561 return rc;
1562}
1563
1564static void
1565bdev_nvme_library_fini(void)
1566{
9f95a23c
TL
1567 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr, *tmp;
1568 struct nvme_probe_skip_entry *entry, *entry_tmp;
1569
11fdf7f2 1570 spdk_poller_unregister(&g_hotplug_poller);
9f95a23c
TL
1571 free(g_hotplug_probe_ctx);
1572
1573 TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, entry_tmp) {
1574 TAILQ_REMOVE(&g_skipped_nvme_ctrlrs, entry, tailq);
1575 free(entry);
1576 }
1577
1578 pthread_mutex_lock(&g_bdev_nvme_mutex);
1579 TAILQ_FOREACH_SAFE(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq, tmp) {
1580 if (nvme_bdev_ctrlr->ref > 0) {
1581 SPDK_ERRLOG("Controller %s is still referenced, can't destroy it\n",
1582 nvme_bdev_ctrlr->name);
1583 continue;
1584 }
1585
1586 if (nvme_bdev_ctrlr->destruct) {
1587 /* This controller's destruction was already started
1588 * before the application started shutting down
1589 */
1590 continue;
1591 }
1592
1593 nvme_bdev_ctrlr->destruct = true;
1594 pthread_mutex_unlock(&g_bdev_nvme_mutex);
1595 bdev_nvme_ctrlr_destruct(nvme_bdev_ctrlr);
1596 pthread_mutex_lock(&g_bdev_nvme_mutex);
1597 }
1598 pthread_mutex_unlock(&g_bdev_nvme_mutex);
11fdf7f2
TL
1599}
1600
9f95a23c
TL
1601static void
1602nvme_ctrlr_create_bdevs(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr)
11fdf7f2
TL
1603{
1604 int rc;
1605 int bdev_created = 0;
1606 uint32_t nsid;
1607
9f95a23c
TL
1608 for (nsid = spdk_nvme_ctrlr_get_first_active_ns(nvme_bdev_ctrlr->ctrlr);
1609 nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(nvme_bdev_ctrlr->ctrlr, nsid)) {
1610 rc = nvme_ctrlr_create_bdev(nvme_bdev_ctrlr, nsid);
11fdf7f2
TL
1611 if (rc == 0) {
1612 bdev_created++;
9f95a23c
TL
1613 } else {
1614 SPDK_NOTICELOG("Failed to create bdev for namespace %u of %s\n", nsid, nvme_bdev_ctrlr->name);
11fdf7f2
TL
1615 }
1616 }
1617
9f95a23c
TL
1618 if (bdev_created == 0) {
1619 SPDK_NOTICELOG("No bdev is created for NVMe controller %s\n", nvme_bdev_ctrlr->name);
1620 }
1621}
1622
1623static void
1624bdev_nvme_verify_pi_error(struct spdk_bdev_io *bdev_io)
1625{
1626 struct spdk_bdev *bdev = bdev_io->bdev;
1627 struct spdk_dif_ctx dif_ctx;
1628 struct spdk_dif_error err_blk = {};
1629 int rc;
1630
1631 rc = spdk_dif_ctx_init(&dif_ctx,
1632 bdev->blocklen, bdev->md_len, bdev->md_interleave,
1633 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
1634 bdev_io->u.bdev.offset_blocks, 0, 0, 0, 0);
1635 if (rc != 0) {
1636 SPDK_ERRLOG("Initialization of DIF context failed\n");
1637 return;
1638 }
1639
1640 if (bdev->md_interleave) {
1641 rc = spdk_dif_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
1642 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
1643 } else {
1644 struct iovec md_iov = {
1645 .iov_base = bdev_io->u.bdev.md_buf,
1646 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
1647 };
1648
1649 rc = spdk_dix_verify(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
1650 &md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
1651 }
1652
1653 if (rc != 0) {
1654 SPDK_ERRLOG("DIF error detected. type=%d, offset=%" PRIu32 "\n",
1655 err_blk.err_type, err_blk.err_offset);
1656 } else {
1657 SPDK_ERRLOG("Hardware reported PI error but SPDK could not find any.\n");
1658 }
1659}
1660
1661static void
1662bdev_nvme_no_pi_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
1663{
1664 struct nvme_bdev_io *bio = ref;
1665 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1666
1667 if (spdk_nvme_cpl_is_success(cpl)) {
1668 /* Run PI verification for read data buffer. */
1669 bdev_nvme_verify_pi_error(bdev_io);
1670 }
1671
1672 /* Return original completion status */
1673 spdk_bdev_io_complete_nvme_status(bdev_io, bio->cpl.status.sct,
1674 bio->cpl.status.sc);
1675}
1676
1677static void
1678bdev_nvme_readv_done(void *ref, const struct spdk_nvme_cpl *cpl)
1679{
1680 struct nvme_bdev_io *bio = ref;
1681 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1682 int ret;
1683
1684 if (spdk_unlikely(spdk_nvme_cpl_is_pi_error(cpl))) {
1685 SPDK_ERRLOG("readv completed with PI error (sct=%d, sc=%d)\n",
1686 cpl->status.sct, cpl->status.sc);
1687
1688 /* Save completion status to use after verifying PI error. */
1689 bio->cpl = *cpl;
1690
1691 /* Read without PI checking to verify PI error. */
1692 ret = bdev_nvme_no_pi_readv((struct nvme_bdev *)bdev_io->bdev->ctxt,
1693 spdk_bdev_io_get_io_channel(bdev_io),
1694 bio,
1695 bdev_io->u.bdev.iovs,
1696 bdev_io->u.bdev.iovcnt,
1697 bdev_io->u.bdev.md_buf,
1698 bdev_io->u.bdev.num_blocks,
1699 bdev_io->u.bdev.offset_blocks);
1700 if (ret == 0) {
1701 return;
1702 }
1703 }
1704
1705 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->status.sct, cpl->status.sc);
1706}
1707
1708static void
1709bdev_nvme_writev_done(void *ref, const struct spdk_nvme_cpl *cpl)
1710{
1711 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref);
1712
1713 if (spdk_nvme_cpl_is_pi_error(cpl)) {
1714 SPDK_ERRLOG("writev completed with PI error (sct=%d, sc=%d)\n",
1715 cpl->status.sct, cpl->status.sc);
1716 /* Run PI verification for write data buffer if PI error is detected. */
1717 bdev_nvme_verify_pi_error(bdev_io);
1718 }
1719
1720 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->status.sct, cpl->status.sc);
11fdf7f2
TL
1721}
1722
1723static void
1724bdev_nvme_queued_done(void *ref, const struct spdk_nvme_cpl *cpl)
1725{
1726 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx((struct nvme_bdev_io *)ref);
1727
1728 spdk_bdev_io_complete_nvme_status(bdev_io, cpl->status.sct, cpl->status.sc);
1729}
1730
1731static void
1732bdev_nvme_admin_passthru_completion(void *ctx)
1733{
1734 struct nvme_bdev_io *bio = ctx;
1735 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
1736
1737 spdk_bdev_io_complete_nvme_status(bdev_io,
1738 bio->cpl.status.sct, bio->cpl.status.sc);
1739}
1740
1741static void
1742bdev_nvme_admin_passthru_done(void *ref, const struct spdk_nvme_cpl *cpl)
1743{
1744 struct nvme_bdev_io *bio = ref;
1745
1746 bio->cpl = *cpl;
1747 spdk_thread_send_msg(bio->orig_thread, bdev_nvme_admin_passthru_completion, bio);
1748}
1749
1750static void
1751bdev_nvme_queued_reset_sgl(void *ref, uint32_t sgl_offset)
1752{
1753 struct nvme_bdev_io *bio = ref;
1754 struct iovec *iov;
1755
1756 bio->iov_offset = sgl_offset;
1757 for (bio->iovpos = 0; bio->iovpos < bio->iovcnt; bio->iovpos++) {
1758 iov = &bio->iovs[bio->iovpos];
1759 if (bio->iov_offset < iov->iov_len) {
1760 break;
1761 }
1762
1763 bio->iov_offset -= iov->iov_len;
1764 }
1765}
1766
1767static int
1768bdev_nvme_queued_next_sge(void *ref, void **address, uint32_t *length)
1769{
1770 struct nvme_bdev_io *bio = ref;
1771 struct iovec *iov;
1772
1773 assert(bio->iovpos < bio->iovcnt);
1774
1775 iov = &bio->iovs[bio->iovpos];
1776
1777 *address = iov->iov_base;
1778 *length = iov->iov_len;
1779
1780 if (bio->iov_offset) {
1781 assert(bio->iov_offset <= iov->iov_len);
1782 *address += bio->iov_offset;
1783 *length -= bio->iov_offset;
1784 }
1785
1786 bio->iov_offset += *length;
1787 if (bio->iov_offset == iov->iov_len) {
1788 bio->iovpos++;
1789 bio->iov_offset = 0;
1790 }
1791
1792 return 0;
1793}
1794
1795static int
9f95a23c
TL
1796bdev_nvme_no_pi_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
1797 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
1798 void *md, uint64_t lba_count, uint64_t lba)
11fdf7f2 1799{
9f95a23c 1800 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
11fdf7f2
TL
1801 int rc;
1802
9f95a23c
TL
1803 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx without PI check\n",
1804 lba_count, lba);
1805
11fdf7f2
TL
1806 bio->iovs = iov;
1807 bio->iovcnt = iovcnt;
1808 bio->iovpos = 0;
1809 bio->iov_offset = 0;
1810
9f95a23c
TL
1811 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->ns, nvme_ch->qpair, lba, lba_count,
1812 bdev_nvme_no_pi_readv_done, bio, 0,
1813 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
1814 md, 0, 0);
1815
1816 if (rc != 0 && rc != -ENOMEM) {
1817 SPDK_ERRLOG("no_pi_readv failed: rc = %d\n", rc);
1818 }
1819 return rc;
1820}
1821
1822static int
1823bdev_nvme_readv(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
1824 struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
1825 void *md, uint64_t lba_count, uint64_t lba)
1826{
1827 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
1828 int rc;
1829
1830 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "read %lu blocks with offset %#lx\n",
1831 lba_count, lba);
1832
1833 bio->iovs = iov;
1834 bio->iovcnt = iovcnt;
1835 bio->iovpos = 0;
1836 bio->iov_offset = 0;
1837
1838 rc = spdk_nvme_ns_cmd_readv_with_md(nbdev->ns, nvme_ch->qpair, lba, lba_count,
1839 bdev_nvme_readv_done, bio, nbdev->disk.dif_check_flags,
1840 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
1841 md, 0, 0);
1842
1843 if (rc != 0 && rc != -ENOMEM) {
1844 SPDK_ERRLOG("readv failed: rc = %d\n", rc);
11fdf7f2 1845 }
9f95a23c
TL
1846 return rc;
1847}
1848
1849static int
1850bdev_nvme_writev(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
1851 struct nvme_bdev_io *bio,
1852 struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba)
1853{
1854 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
1855 int rc;
1856
1857 SPDK_DEBUGLOG(SPDK_LOG_BDEV_NVME, "write %lu blocks with offset %#lx\n",
1858 lba_count, lba);
1859
1860 bio->iovs = iov;
1861 bio->iovcnt = iovcnt;
1862 bio->iovpos = 0;
1863 bio->iov_offset = 0;
1864
1865 rc = spdk_nvme_ns_cmd_writev_with_md(nbdev->ns, nvme_ch->qpair, lba, lba_count,
1866 bdev_nvme_writev_done, bio, nbdev->disk.dif_check_flags,
1867 bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge,
1868 md, 0, 0);
11fdf7f2
TL
1869
1870 if (rc != 0 && rc != -ENOMEM) {
9f95a23c 1871 SPDK_ERRLOG("writev failed: rc = %d\n", rc);
11fdf7f2
TL
1872 }
1873 return rc;
1874}
1875
1876static int
1877bdev_nvme_unmap(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
1878 struct nvme_bdev_io *bio,
1879 uint64_t offset_blocks,
1880 uint64_t num_blocks)
1881{
1882 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
1883 struct spdk_nvme_dsm_range dsm_ranges[SPDK_NVME_DATASET_MANAGEMENT_MAX_RANGES];
1884 struct spdk_nvme_dsm_range *range;
1885 uint64_t offset, remaining;
1886 uint64_t num_ranges_u64;
1887 uint16_t num_ranges;
1888 int rc;
1889
1890 num_ranges_u64 = (num_blocks + SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS - 1) /
1891 SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
1892 if (num_ranges_u64 > SPDK_COUNTOF(dsm_ranges)) {
1893 SPDK_ERRLOG("Unmap request for %" PRIu64 " blocks is too large\n", num_blocks);
1894 return -EINVAL;
1895 }
1896 num_ranges = (uint16_t)num_ranges_u64;
1897
1898 offset = offset_blocks;
1899 remaining = num_blocks;
1900 range = &dsm_ranges[0];
1901
1902 /* Fill max-size ranges until the remaining blocks fit into one range */
1903 while (remaining > SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS) {
1904 range->attributes.raw = 0;
1905 range->length = SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
1906 range->starting_lba = offset;
1907
1908 offset += SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
1909 remaining -= SPDK_NVME_DATASET_MANAGEMENT_RANGE_MAX_BLOCKS;
1910 range++;
1911 }
1912
1913 /* Final range describes the remaining blocks */
1914 range->attributes.raw = 0;
1915 range->length = remaining;
1916 range->starting_lba = offset;
1917
1918 rc = spdk_nvme_ns_cmd_dataset_management(nbdev->ns, nvme_ch->qpair,
1919 SPDK_NVME_DSM_ATTR_DEALLOCATE,
1920 dsm_ranges, num_ranges,
1921 bdev_nvme_queued_done, bio);
1922
1923 return rc;
1924}
1925
1926static int
1927bdev_nvme_admin_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
1928 struct nvme_bdev_io *bio,
1929 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
1930{
9f95a23c 1931 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
1932
1933 if (nbytes > max_xfer_size) {
1934 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
1935 return -EINVAL;
1936 }
1937
1938 bio->orig_thread = spdk_io_channel_get_thread(ch);
1939
9f95a23c 1940 return spdk_nvme_ctrlr_cmd_admin_raw(nbdev->nvme_bdev_ctrlr->ctrlr, cmd, buf,
11fdf7f2
TL
1941 (uint32_t)nbytes, bdev_nvme_admin_passthru_done, bio);
1942}
1943
1944static int
1945bdev_nvme_io_passthru(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
1946 struct nvme_bdev_io *bio,
1947 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes)
1948{
1949 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
9f95a23c 1950 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
1951
1952 if (nbytes > max_xfer_size) {
1953 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
1954 return -EINVAL;
1955 }
1956
1957 /*
1958 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
1959 * so fill it out automatically.
1960 */
1961 cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns);
1962
9f95a23c 1963 return spdk_nvme_ctrlr_cmd_io_raw(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
11fdf7f2
TL
1964 (uint32_t)nbytes, bdev_nvme_queued_done, bio);
1965}
1966
1967static int
1968bdev_nvme_io_passthru_md(struct nvme_bdev *nbdev, struct spdk_io_channel *ch,
1969 struct nvme_bdev_io *bio,
1970 struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len)
1971{
1972 struct nvme_io_channel *nvme_ch = spdk_io_channel_get_ctx(ch);
1973 size_t nr_sectors = nbytes / spdk_nvme_ns_get_extended_sector_size(nbdev->ns);
9f95a23c 1974 uint32_t max_xfer_size = spdk_nvme_ctrlr_get_max_xfer_size(nbdev->nvme_bdev_ctrlr->ctrlr);
11fdf7f2
TL
1975
1976 if (nbytes > max_xfer_size) {
1977 SPDK_ERRLOG("nbytes is greater than MDTS %" PRIu32 ".\n", max_xfer_size);
1978 return -EINVAL;
1979 }
1980
1981 if (md_len != nr_sectors * spdk_nvme_ns_get_md_size(nbdev->ns)) {
1982 SPDK_ERRLOG("invalid meta data buffer size\n");
1983 return -EINVAL;
1984 }
1985
1986 /*
1987 * Each NVMe bdev is a specific namespace, and all NVMe I/O commands require a nsid,
1988 * so fill it out automatically.
1989 */
1990 cmd->nsid = spdk_nvme_ns_get_id(nbdev->ns);
1991
9f95a23c 1992 return spdk_nvme_ctrlr_cmd_io_raw_with_md(nbdev->nvme_bdev_ctrlr->ctrlr, nvme_ch->qpair, cmd, buf,
11fdf7f2
TL
1993 (uint32_t)nbytes, md_buf, bdev_nvme_queued_done, bio);
1994}
1995
1996static void
1997bdev_nvme_get_spdk_running_config(FILE *fp)
1998{
9f95a23c 1999 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
11fdf7f2
TL
2000
2001 fprintf(fp, "\n[Nvme]");
2002 fprintf(fp, "\n"
2003 "# NVMe Device Whitelist\n"
2004 "# Users may specify which NVMe devices to claim by their transport id.\n"
2005 "# See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.\n"
2006 "# The second argument is the assigned name, which can be referenced from\n"
2007 "# other sections in the configuration file. For NVMe devices, a namespace\n"
2008 "# is automatically appended to each name in the format <YourName>nY, where\n"
2009 "# Y is the NSID (starts at 1).\n");
2010
9f95a23c 2011 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
11fdf7f2 2012 const char *trtype;
9f95a23c 2013 const char *prchk_flags;
11fdf7f2 2014
9f95a23c 2015 trtype = spdk_nvme_transport_id_trtype_str(nvme_bdev_ctrlr->trid.trtype);
11fdf7f2
TL
2016 if (!trtype) {
2017 continue;
2018 }
2019
9f95a23c 2020 if (nvme_bdev_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
11fdf7f2
TL
2021 fprintf(fp, "TransportID \"trtype:%s traddr:%s\" %s\n",
2022 trtype,
9f95a23c 2023 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->name);
11fdf7f2
TL
2024 } else {
2025 const char *adrfam;
2026
9f95a23c
TL
2027 adrfam = spdk_nvme_transport_id_adrfam_str(nvme_bdev_ctrlr->trid.adrfam);
2028 prchk_flags = spdk_nvme_prchk_flags_str(nvme_bdev_ctrlr->prchk_flags);
11fdf7f2
TL
2029
2030 if (adrfam) {
9f95a23c 2031 fprintf(fp, "TransportID \"trtype:%s adrfam:%s traddr:%s trsvcid:%s subnqn:%s\" %s",
11fdf7f2 2032 trtype, adrfam,
9f95a23c
TL
2033 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->trid.trsvcid,
2034 nvme_bdev_ctrlr->trid.subnqn, nvme_bdev_ctrlr->name);
11fdf7f2 2035 } else {
9f95a23c 2036 fprintf(fp, "TransportID \"trtype:%s traddr:%s trsvcid:%s subnqn:%s\" %s",
11fdf7f2 2037 trtype,
9f95a23c
TL
2038 nvme_bdev_ctrlr->trid.traddr, nvme_bdev_ctrlr->trid.trsvcid,
2039 nvme_bdev_ctrlr->trid.subnqn, nvme_bdev_ctrlr->name);
11fdf7f2
TL
2040 }
2041
9f95a23c
TL
2042 if (prchk_flags) {
2043 fprintf(fp, " \"%s\"\n", prchk_flags);
2044 } else {
2045 fprintf(fp, "\n");
2046 }
11fdf7f2
TL
2047 }
2048 }
2049
2050 fprintf(fp, "\n"
2051 "# The number of attempts per I/O when an I/O fails. Do not include\n"
2052 "# this key to get the default behavior.\n");
2053 fprintf(fp, "RetryCount %d\n", spdk_nvme_retry_count);
2054 fprintf(fp, "\n"
2055 "# Timeout for each command, in microseconds. If 0, don't track timeouts.\n");
2056 fprintf(fp, "TimeoutUsec %"PRIu64"\n", g_opts.timeout_us);
2057
2058 fprintf(fp, "\n"
2059 "# Action to take on command time out. Only valid when Timeout is greater\n"
2060 "# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort\n"
2061 "# the command, or 'None' to just print a message but do nothing.\n"
2062 "# Admin command timeouts will always result in a reset.\n");
2063 switch (g_opts.action_on_timeout) {
2064 case SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE:
2065 fprintf(fp, "ActionOnTimeout None\n");
2066 break;
2067 case SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET:
2068 fprintf(fp, "ActionOnTimeout Reset\n");
2069 break;
2070 case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
2071 fprintf(fp, "ActionOnTimeout Abort\n");
2072 break;
2073 }
2074
2075 fprintf(fp, "\n"
2076 "# Set how often the admin queue is polled for asynchronous events.\n"
2077 "# Units in microseconds.\n");
2078 fprintf(fp, "AdminPollRate %"PRIu64"\n", g_opts.nvme_adminq_poll_period_us);
9f95a23c 2079 fprintf(fp, "IOPollRate %" PRIu64"\n", g_opts.nvme_ioq_poll_period_us);
11fdf7f2
TL
2080 fprintf(fp, "\n"
2081 "# Disable handling of hotplug (runtime insert and remove) events,\n"
2082 "# users can set to Yes if want to enable it.\n"
2083 "# Default: No\n");
2084 fprintf(fp, "HotplugEnable %s\n", g_nvme_hotplug_enabled ? "Yes" : "No");
2085 fprintf(fp, "\n"
2086 "# Set how often the hotplug is processed for insert and remove events."
2087 "# Units in microseconds.\n");
2088 fprintf(fp, "HotplugPollRate %"PRIu64"\n", g_nvme_hotplug_poll_period_us);
2089 if (g_nvme_hostnqn) {
2090 fprintf(fp, "HostNQN %s\n", g_nvme_hostnqn);
2091 }
2092
2093 fprintf(fp, "\n");
2094}
2095
2096static int
2097bdev_nvme_config_json(struct spdk_json_write_ctx *w)
2098{
9f95a23c 2099 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
11fdf7f2
TL
2100 struct spdk_nvme_transport_id *trid;
2101 const char *action;
2102
2103 if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET) {
2104 action = "reset";
2105 } else if (g_opts.action_on_timeout == SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT) {
2106 action = "abort";
2107 } else {
2108 action = "none";
2109 }
2110
2111 spdk_json_write_object_begin(w);
2112
2113 spdk_json_write_named_string(w, "method", "set_bdev_nvme_options");
2114
2115 spdk_json_write_named_object_begin(w, "params");
2116 spdk_json_write_named_string(w, "action_on_timeout", action);
2117 spdk_json_write_named_uint64(w, "timeout_us", g_opts.timeout_us);
2118 spdk_json_write_named_uint32(w, "retry_count", g_opts.retry_count);
2119 spdk_json_write_named_uint64(w, "nvme_adminq_poll_period_us", g_opts.nvme_adminq_poll_period_us);
9f95a23c 2120 spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
11fdf7f2
TL
2121 spdk_json_write_object_end(w);
2122
2123 spdk_json_write_object_end(w);
2124
2125 pthread_mutex_lock(&g_bdev_nvme_mutex);
9f95a23c
TL
2126 TAILQ_FOREACH(nvme_bdev_ctrlr, &g_nvme_bdev_ctrlrs, tailq) {
2127
2128 if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_bdev_ctrlr->ctrlr)) {
2129 continue;
2130 }
2131
2132 trid = &nvme_bdev_ctrlr->trid;
11fdf7f2
TL
2133
2134 spdk_json_write_object_begin(w);
2135
2136 spdk_json_write_named_string(w, "method", "construct_nvme_bdev");
2137
2138 spdk_json_write_named_object_begin(w, "params");
9f95a23c
TL
2139 spdk_json_write_named_string(w, "name", nvme_bdev_ctrlr->name);
2140 nvme_bdev_dump_trid_json(trid, w);
2141 spdk_json_write_named_bool(w, "prchk_reftag",
2142 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
2143 spdk_json_write_named_bool(w, "prchk_guard",
2144 (nvme_bdev_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
11fdf7f2
TL
2145
2146 spdk_json_write_object_end(w);
2147
2148 spdk_json_write_object_end(w);
2149 }
2150
2151 /* Dump as last parameter to give all NVMe bdevs chance to be constructed
2152 * before enabling hotplug poller.
2153 */
2154 spdk_json_write_object_begin(w);
2155 spdk_json_write_named_string(w, "method", "set_bdev_nvme_hotplug");
2156
2157 spdk_json_write_named_object_begin(w, "params");
2158 spdk_json_write_named_uint64(w, "period_us", g_nvme_hotplug_poll_period_us);
2159 spdk_json_write_named_bool(w, "enable", g_nvme_hotplug_enabled);
2160 spdk_json_write_object_end(w);
2161
2162 spdk_json_write_object_end(w);
2163
2164 pthread_mutex_unlock(&g_bdev_nvme_mutex);
2165 return 0;
2166}
2167
2168struct spdk_nvme_ctrlr *
2169spdk_bdev_nvme_get_ctrlr(struct spdk_bdev *bdev)
2170{
2171 if (!bdev || bdev->module != &nvme_if) {
2172 return NULL;
2173 }
2174
9f95a23c 2175 return SPDK_CONTAINEROF(bdev, struct nvme_bdev, disk)->nvme_bdev_ctrlr->ctrlr;
11fdf7f2
TL
2176}
2177
2178SPDK_LOG_REGISTER_COMPONENT("bdev_nvme", SPDK_LOG_BDEV_NVME)