4 * Copyright (c) Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 * Common code for partition-like virtual bdevs.
38 #include "spdk/bdev.h"
39 #include "spdk/likely.h"
41 #include "spdk/string.h"
42 #include "spdk/thread.h"
44 #include "spdk/bdev_module.h"
46 struct spdk_bdev_part_base
{
47 struct spdk_bdev
*bdev
;
48 struct spdk_bdev_desc
*desc
;
50 uint32_t channel_size
;
51 spdk_bdev_part_base_free_fn base_free_fn
;
54 struct spdk_bdev_module
*module
;
55 struct spdk_bdev_fn_table
*fn_table
;
56 struct bdev_part_tailq
*tailq
;
57 spdk_io_channel_create_cb ch_create_cb
;
58 spdk_io_channel_destroy_cb ch_destroy_cb
;
59 struct spdk_thread
*thread
;
63 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base
*part_base
)
65 return part_base
->bdev
;
68 struct spdk_bdev_desc
*
69 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base
*part_base
)
71 return part_base
->desc
;
74 struct bdev_part_tailq
*
75 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base
*part_base
)
77 return part_base
->tailq
;
81 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base
*part_base
)
83 return part_base
->ctx
;
87 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base
*part_base
)
89 return part_base
->bdev
->name
;
93 bdev_part_base_free(void *ctx
)
95 struct spdk_bdev_desc
*desc
= ctx
;
97 spdk_bdev_close(desc
);
101 spdk_bdev_part_base_free(struct spdk_bdev_part_base
*base
)
104 /* Close the underlying bdev on its same opened thread. */
105 if (base
->thread
&& base
->thread
!= spdk_get_thread()) {
106 spdk_thread_send_msg(base
->thread
, bdev_part_base_free
, base
->desc
);
108 spdk_bdev_close(base
->desc
);
112 if (base
->base_free_fn
!= NULL
) {
113 base
->base_free_fn(base
->ctx
);
120 bdev_part_free_cb(void *io_device
)
122 struct spdk_bdev_part
*part
= io_device
;
123 struct spdk_bdev_part_base
*base
;
126 assert(part
->internal
.base
);
128 base
= part
->internal
.base
;
130 TAILQ_REMOVE(base
->tailq
, part
, tailq
);
132 if (--base
->ref
== 0) {
133 spdk_bdev_module_release_bdev(base
->bdev
);
134 spdk_bdev_part_base_free(base
);
137 spdk_bdev_destruct_done(&part
->internal
.bdev
, 0);
138 free(part
->internal
.bdev
.name
);
139 free(part
->internal
.bdev
.product_name
);
144 spdk_bdev_part_free(struct spdk_bdev_part
*part
)
146 spdk_io_device_unregister(part
, bdev_part_free_cb
);
148 /* Return 1 to indicate that this is an asynchronous operation that isn't complete
149 * until spdk_bdev_destruct_done is called */
154 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base
*part_base
, struct bdev_part_tailq
*tailq
)
156 struct spdk_bdev_part
*part
, *tmp
;
158 TAILQ_FOREACH_SAFE(part
, tailq
, tailq
, tmp
) {
159 if (part
->internal
.base
== part_base
) {
160 spdk_bdev_unregister(&part
->internal
.bdev
, NULL
, NULL
);
166 bdev_part_io_type_supported(void *_part
, enum spdk_bdev_io_type io_type
)
168 struct spdk_bdev_part
*part
= _part
;
170 /* We can't decode/modify passthrough NVMe commands, so don't report
171 * that a partition supports these io types, even if the underlying
175 case SPDK_BDEV_IO_TYPE_NVME_ADMIN
:
176 case SPDK_BDEV_IO_TYPE_NVME_IO
:
177 case SPDK_BDEV_IO_TYPE_NVME_IO_MD
:
183 return part
->internal
.base
->bdev
->fn_table
->io_type_supported(part
->internal
.base
->bdev
->ctxt
,
187 static struct spdk_io_channel
*
188 bdev_part_get_io_channel(void *_part
)
190 struct spdk_bdev_part
*part
= _part
;
192 return spdk_get_io_channel(part
);
196 spdk_bdev_part_get_bdev(struct spdk_bdev_part
*part
)
198 return &part
->internal
.bdev
;
201 struct spdk_bdev_part_base
*
202 spdk_bdev_part_get_base(struct spdk_bdev_part
*part
)
204 return part
->internal
.base
;
208 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part
*part
)
210 return part
->internal
.base
->bdev
;
214 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part
*part
)
216 return part
->internal
.offset_blocks
;
220 bdev_part_remap_dif(struct spdk_bdev_io
*bdev_io
, uint32_t offset
,
221 uint32_t remapped_offset
)
223 struct spdk_bdev
*bdev
= bdev_io
->bdev
;
224 struct spdk_dif_ctx dif_ctx
;
225 struct spdk_dif_error err_blk
= {};
228 if (spdk_likely(!(bdev
->dif_check_flags
& SPDK_DIF_FLAGS_REFTAG_CHECK
))) {
232 rc
= spdk_dif_ctx_init(&dif_ctx
,
233 bdev
->blocklen
, bdev
->md_len
, bdev
->md_interleave
,
234 bdev
->dif_is_head_of_md
, bdev
->dif_type
, bdev
->dif_check_flags
,
237 SPDK_ERRLOG("Initialization of DIF context failed\n");
241 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx
, remapped_offset
);
243 if (bdev
->md_interleave
) {
244 rc
= spdk_dif_remap_ref_tag(bdev_io
->u
.bdev
.iovs
, bdev_io
->u
.bdev
.iovcnt
,
245 bdev_io
->u
.bdev
.num_blocks
, &dif_ctx
, &err_blk
);
247 struct iovec md_iov
= {
248 .iov_base
= bdev_io
->u
.bdev
.md_buf
,
249 .iov_len
= bdev_io
->u
.bdev
.num_blocks
* bdev
->md_len
,
252 rc
= spdk_dix_remap_ref_tag(&md_iov
, bdev_io
->u
.bdev
.num_blocks
, &dif_ctx
, &err_blk
);
256 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32
"\n",
257 err_blk
.err_type
, err_blk
.err_offset
);
264 bdev_part_complete_read_io(struct spdk_bdev_io
*bdev_io
, bool success
, void *cb_arg
)
266 struct spdk_bdev_io
*part_io
= cb_arg
;
267 uint32_t offset
, remapped_offset
;
270 offset
= bdev_io
->u
.bdev
.offset_blocks
;
271 remapped_offset
= part_io
->u
.bdev
.offset_blocks
;
274 rc
= bdev_part_remap_dif(bdev_io
, offset
, remapped_offset
);
280 status
= success
? SPDK_BDEV_IO_STATUS_SUCCESS
: SPDK_BDEV_IO_STATUS_FAILED
;
282 spdk_bdev_io_complete(part_io
, status
);
283 spdk_bdev_free_io(bdev_io
);
287 bdev_part_complete_io(struct spdk_bdev_io
*bdev_io
, bool success
, void *cb_arg
)
289 struct spdk_bdev_io
*part_io
= cb_arg
;
290 int status
= success
? SPDK_BDEV_IO_STATUS_SUCCESS
: SPDK_BDEV_IO_STATUS_FAILED
;
292 spdk_bdev_io_complete(part_io
, status
);
293 spdk_bdev_free_io(bdev_io
);
297 bdev_part_complete_zcopy_io(struct spdk_bdev_io
*bdev_io
, bool success
, void *cb_arg
)
299 struct spdk_bdev_io
*part_io
= cb_arg
;
300 int status
= success
? SPDK_BDEV_IO_STATUS_SUCCESS
: SPDK_BDEV_IO_STATUS_FAILED
;
302 spdk_bdev_io_set_buf(part_io
, bdev_io
->u
.bdev
.iovs
[0].iov_base
, bdev_io
->u
.bdev
.iovs
[0].iov_len
);
303 spdk_bdev_io_complete(part_io
, status
);
304 spdk_bdev_free_io(bdev_io
);
308 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel
*ch
, struct spdk_bdev_io
*bdev_io
)
310 struct spdk_bdev_part
*part
= ch
->part
;
311 struct spdk_io_channel
*base_ch
= ch
->base_ch
;
312 struct spdk_bdev_desc
*base_desc
= part
->internal
.base
->desc
;
313 uint64_t offset
, remapped_offset
;
316 offset
= bdev_io
->u
.bdev
.offset_blocks
;
317 remapped_offset
= offset
+ part
->internal
.offset_blocks
;
319 /* Modify the I/O to adjust for the offset within the base bdev. */
320 switch (bdev_io
->type
) {
321 case SPDK_BDEV_IO_TYPE_READ
:
322 if (bdev_io
->u
.bdev
.md_buf
== NULL
) {
323 rc
= spdk_bdev_readv_blocks(base_desc
, base_ch
, bdev_io
->u
.bdev
.iovs
,
324 bdev_io
->u
.bdev
.iovcnt
, remapped_offset
,
325 bdev_io
->u
.bdev
.num_blocks
,
326 bdev_part_complete_read_io
, bdev_io
);
328 rc
= spdk_bdev_readv_blocks_with_md(base_desc
, base_ch
,
329 bdev_io
->u
.bdev
.iovs
,
330 bdev_io
->u
.bdev
.iovcnt
,
331 bdev_io
->u
.bdev
.md_buf
, remapped_offset
,
332 bdev_io
->u
.bdev
.num_blocks
,
333 bdev_part_complete_read_io
, bdev_io
);
336 case SPDK_BDEV_IO_TYPE_WRITE
:
337 rc
= bdev_part_remap_dif(bdev_io
, offset
, remapped_offset
);
339 return SPDK_BDEV_IO_STATUS_FAILED
;
342 if (bdev_io
->u
.bdev
.md_buf
== NULL
) {
343 rc
= spdk_bdev_writev_blocks(base_desc
, base_ch
, bdev_io
->u
.bdev
.iovs
,
344 bdev_io
->u
.bdev
.iovcnt
, remapped_offset
,
345 bdev_io
->u
.bdev
.num_blocks
,
346 bdev_part_complete_io
, bdev_io
);
348 rc
= spdk_bdev_writev_blocks_with_md(base_desc
, base_ch
,
349 bdev_io
->u
.bdev
.iovs
,
350 bdev_io
->u
.bdev
.iovcnt
,
351 bdev_io
->u
.bdev
.md_buf
, remapped_offset
,
352 bdev_io
->u
.bdev
.num_blocks
,
353 bdev_part_complete_io
, bdev_io
);
356 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES
:
357 rc
= spdk_bdev_write_zeroes_blocks(base_desc
, base_ch
, remapped_offset
,
358 bdev_io
->u
.bdev
.num_blocks
, bdev_part_complete_io
,
361 case SPDK_BDEV_IO_TYPE_UNMAP
:
362 rc
= spdk_bdev_unmap_blocks(base_desc
, base_ch
, remapped_offset
,
363 bdev_io
->u
.bdev
.num_blocks
, bdev_part_complete_io
,
366 case SPDK_BDEV_IO_TYPE_FLUSH
:
367 rc
= spdk_bdev_flush_blocks(base_desc
, base_ch
, remapped_offset
,
368 bdev_io
->u
.bdev
.num_blocks
, bdev_part_complete_io
,
371 case SPDK_BDEV_IO_TYPE_RESET
:
372 rc
= spdk_bdev_reset(base_desc
, base_ch
,
373 bdev_part_complete_io
, bdev_io
);
375 case SPDK_BDEV_IO_TYPE_ZCOPY
:
376 rc
= spdk_bdev_zcopy_start(base_desc
, base_ch
, remapped_offset
,
377 bdev_io
->u
.bdev
.num_blocks
, bdev_io
->u
.bdev
.zcopy
.populate
,
378 bdev_part_complete_zcopy_io
, bdev_io
);
381 SPDK_ERRLOG("unknown I/O type %d\n", bdev_io
->type
);
382 return SPDK_BDEV_IO_STATUS_FAILED
;
389 bdev_part_channel_create_cb(void *io_device
, void *ctx_buf
)
391 struct spdk_bdev_part
*part
= (struct spdk_bdev_part
*)io_device
;
392 struct spdk_bdev_part_channel
*ch
= ctx_buf
;
395 ch
->base_ch
= spdk_bdev_get_io_channel(part
->internal
.base
->desc
);
396 if (ch
->base_ch
== NULL
) {
400 if (part
->internal
.base
->ch_create_cb
) {
401 return part
->internal
.base
->ch_create_cb(io_device
, ctx_buf
);
408 bdev_part_channel_destroy_cb(void *io_device
, void *ctx_buf
)
410 struct spdk_bdev_part
*part
= (struct spdk_bdev_part
*)io_device
;
411 struct spdk_bdev_part_channel
*ch
= ctx_buf
;
413 if (part
->internal
.base
->ch_destroy_cb
) {
414 part
->internal
.base
->ch_destroy_cb(io_device
, ctx_buf
);
416 spdk_put_io_channel(ch
->base_ch
);
419 struct spdk_bdev_part_base
*
420 spdk_bdev_part_base_construct(struct spdk_bdev
*bdev
,
421 spdk_bdev_remove_cb_t remove_cb
, struct spdk_bdev_module
*module
,
422 struct spdk_bdev_fn_table
*fn_table
, struct bdev_part_tailq
*tailq
,
423 spdk_bdev_part_base_free_fn free_fn
, void *ctx
,
424 uint32_t channel_size
, spdk_io_channel_create_cb ch_create_cb
,
425 spdk_io_channel_destroy_cb ch_destroy_cb
)
428 struct spdk_bdev_part_base
*base
;
430 base
= calloc(1, sizeof(*base
));
432 SPDK_ERRLOG("Memory allocation failure\n");
435 fn_table
->get_io_channel
= bdev_part_get_io_channel
;
436 fn_table
->io_type_supported
= bdev_part_io_type_supported
;
441 base
->module
= module
;
442 base
->fn_table
= fn_table
;
444 base
->base_free_fn
= free_fn
;
446 base
->claimed
= false;
447 base
->channel_size
= channel_size
;
448 base
->ch_create_cb
= ch_create_cb
;
449 base
->ch_destroy_cb
= ch_destroy_cb
;
451 rc
= spdk_bdev_open(bdev
, false, remove_cb
, base
, &base
->desc
);
453 spdk_bdev_part_base_free(base
);
454 SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev
),
459 /* Save the thread where the base device is opened */
460 base
->thread
= spdk_get_thread();
466 spdk_bdev_part_construct(struct spdk_bdev_part
*part
, struct spdk_bdev_part_base
*base
,
467 char *name
, uint64_t offset_blocks
, uint64_t num_blocks
,
470 part
->internal
.bdev
.blocklen
= base
->bdev
->blocklen
;
471 part
->internal
.bdev
.blockcnt
= num_blocks
;
472 part
->internal
.offset_blocks
= offset_blocks
;
474 part
->internal
.bdev
.write_cache
= base
->bdev
->write_cache
;
475 part
->internal
.bdev
.required_alignment
= base
->bdev
->required_alignment
;
476 part
->internal
.bdev
.ctxt
= part
;
477 part
->internal
.bdev
.module
= base
->module
;
478 part
->internal
.bdev
.fn_table
= base
->fn_table
;
480 part
->internal
.bdev
.md_interleave
= base
->bdev
->md_interleave
;
481 part
->internal
.bdev
.md_len
= base
->bdev
->md_len
;
482 part
->internal
.bdev
.dif_type
= base
->bdev
->dif_type
;
483 part
->internal
.bdev
.dif_is_head_of_md
= base
->bdev
->dif_is_head_of_md
;
484 part
->internal
.bdev
.dif_check_flags
= base
->bdev
->dif_check_flags
;
486 part
->internal
.bdev
.name
= strdup(name
);
487 part
->internal
.bdev
.product_name
= strdup(product_name
);
489 if (part
->internal
.bdev
.name
== NULL
) {
490 SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base
->bdev
));
492 } else if (part
->internal
.bdev
.product_name
== NULL
) {
493 free(part
->internal
.bdev
.name
);
494 SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
495 spdk_bdev_get_name(base
->bdev
));
500 part
->internal
.base
= base
;
502 if (!base
->claimed
) {
505 rc
= spdk_bdev_module_claim_bdev(base
->bdev
, base
->desc
, base
->module
);
507 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base
->bdev
));
508 free(part
->internal
.bdev
.name
);
509 free(part
->internal
.bdev
.product_name
);
512 base
->claimed
= true;
515 spdk_io_device_register(part
, bdev_part_channel_create_cb
,
516 bdev_part_channel_destroy_cb
,
520 spdk_bdev_register(&part
->internal
.bdev
);
521 TAILQ_INSERT_TAIL(base
->tailq
, part
, tailq
);