]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/bdev/part.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / bdev / part.c
CommitLineData
11fdf7f2
TL
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34/*
35 * Common code for partition-like virtual bdevs.
36 */
37
38#include "spdk/bdev.h"
39#include "spdk/log.h"
40#include "spdk/string.h"
41
42#include "spdk/bdev_module.h"
43
44struct spdk_bdev_part_base {
45 struct spdk_bdev *bdev;
46 struct spdk_bdev_desc *desc;
47 uint32_t ref;
48 uint32_t channel_size;
49 spdk_bdev_part_base_free_fn base_free_fn;
50 void *ctx;
51 bool claimed;
52 struct spdk_bdev_module *module;
53 struct spdk_bdev_fn_table *fn_table;
54 struct bdev_part_tailq *tailq;
55 spdk_io_channel_create_cb ch_create_cb;
56 spdk_io_channel_destroy_cb ch_destroy_cb;
57};
58
59struct spdk_bdev *
60spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
61{
62 return part_base->bdev;
63}
64
65struct spdk_bdev_desc *
66spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
67{
68 return part_base->desc;
69}
70
71struct bdev_part_tailq *
72spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
73{
74 return part_base->tailq;
75}
76
77void *
78spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
79{
80 return part_base->ctx;
81}
82
83void
84spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
85{
86 if (base->desc) {
87 spdk_bdev_close(base->desc);
88 base->desc = NULL;
89 }
90
91 if (base->base_free_fn != NULL) {
92 base->base_free_fn(base->ctx);
93 }
94
95 free(base);
96}
97
98static void
99spdk_bdev_part_free_cb(void *io_device)
100{
101 struct spdk_bdev_part *part = io_device;
102 struct spdk_bdev_part_base *base;
103
104 assert(part);
105 assert(part->internal.base);
106
107 base = part->internal.base;
108
109 TAILQ_REMOVE(base->tailq, part, tailq);
110
111 if (__sync_sub_and_fetch(&base->ref, 1) == 0) {
112 spdk_bdev_module_release_bdev(base->bdev);
113 spdk_bdev_part_base_free(base);
114 }
115
116 spdk_bdev_destruct_done(&part->internal.bdev, 0);
117 free(part->internal.bdev.name);
118 free(part->internal.bdev.product_name);
119 free(part);
120}
121
122int
123spdk_bdev_part_free(struct spdk_bdev_part *part)
124{
125 spdk_io_device_unregister(part, spdk_bdev_part_free_cb);
126
127 /* Return 1 to indicate that this is an asynchronous operation that isn't complete
128 * until spdk_bdev_destruct_done is called */
129 return 1;
130}
131
132void
9f95a23c 133spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
11fdf7f2
TL
134{
135 struct spdk_bdev_part *part, *tmp;
136
137 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
9f95a23c 138 if (part->internal.base == part_base) {
11fdf7f2
TL
139 spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
140 }
141 }
142}
143
144static bool
145spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
146{
147 struct spdk_bdev_part *part = _part;
148
9f95a23c
TL
149 /* We can't decode/modify passthrough NVMe commands, so don't report
150 * that a partition supports these io types, even if the underlying
151 * bdev does.
152 */
153 switch (io_type) {
154 case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
155 case SPDK_BDEV_IO_TYPE_NVME_IO:
156 case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
157 return false;
158 default:
159 break;
160 }
161
11fdf7f2
TL
162 return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
163 io_type);
164}
165
166static struct spdk_io_channel *
167spdk_bdev_part_get_io_channel(void *_part)
168{
169 struct spdk_bdev_part *part = _part;
170
171 return spdk_get_io_channel(part);
172}
173
174struct spdk_bdev *
175spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
176{
177 return &part->internal.bdev;
178}
179
180struct spdk_bdev_part_base *
181spdk_bdev_part_get_base(struct spdk_bdev_part *part)
182{
183 return part->internal.base;
184}
185
186struct spdk_bdev *
187spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
188{
189 return part->internal.base->bdev;
190}
191
192uint64_t
193spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
194{
195 return part->internal.offset_blocks;
196}
197
198static void
199spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
200{
201 struct spdk_bdev_io *part_io = cb_arg;
202 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
203
204 spdk_bdev_io_complete(part_io, status);
205 spdk_bdev_free_io(bdev_io);
206}
207
208int
209spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
210{
211 struct spdk_bdev_part *part = ch->part;
212 struct spdk_io_channel *base_ch = ch->base_ch;
213 struct spdk_bdev_desc *base_desc = part->internal.base->desc;
214 uint64_t offset;
215 int rc = 0;
216
217 /* Modify the I/O to adjust for the offset within the base bdev. */
218 switch (bdev_io->type) {
219 case SPDK_BDEV_IO_TYPE_READ:
220 offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
221 rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
222 bdev_io->u.bdev.iovcnt, offset,
223 bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
224 bdev_io);
225 break;
226 case SPDK_BDEV_IO_TYPE_WRITE:
227 offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
228 rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
229 bdev_io->u.bdev.iovcnt, offset,
230 bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io,
231 bdev_io);
232 break;
233 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
234 offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
235 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
236 spdk_bdev_part_complete_io, bdev_io);
237 break;
238 case SPDK_BDEV_IO_TYPE_UNMAP:
239 offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
240 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
241 spdk_bdev_part_complete_io, bdev_io);
242 break;
243 case SPDK_BDEV_IO_TYPE_FLUSH:
244 offset = bdev_io->u.bdev.offset_blocks + part->internal.offset_blocks;
245 rc = spdk_bdev_flush_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks,
246 spdk_bdev_part_complete_io, bdev_io);
247 break;
248 case SPDK_BDEV_IO_TYPE_RESET:
249 rc = spdk_bdev_reset(base_desc, base_ch,
250 spdk_bdev_part_complete_io, bdev_io);
251 break;
252 default:
9f95a23c 253 SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
11fdf7f2
TL
254 return SPDK_BDEV_IO_STATUS_FAILED;
255 }
256
257 return rc;
258}
259
260static int
261spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
262{
263 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
264 struct spdk_bdev_part_channel *ch = ctx_buf;
265
266 ch->part = part;
267 ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
268 if (ch->base_ch == NULL) {
269 return -1;
270 }
271
272 if (part->internal.base->ch_create_cb) {
273 return part->internal.base->ch_create_cb(io_device, ctx_buf);
274 } else {
275 return 0;
276 }
277}
278
279static void
280spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
281{
282 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
283 struct spdk_bdev_part_channel *ch = ctx_buf;
284
285 if (part->internal.base->ch_destroy_cb) {
286 part->internal.base->ch_destroy_cb(io_device, ctx_buf);
287 }
288 spdk_put_io_channel(ch->base_ch);
289}
290
291struct spdk_bdev_part_base *
292 spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
293 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
294 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
295 spdk_bdev_part_base_free_fn free_fn, void *ctx,
296 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
297 spdk_io_channel_destroy_cb ch_destroy_cb)
298{
299 int rc;
300 struct spdk_bdev_part_base *base;
301
302 base = calloc(1, sizeof(*base));
303 if (!base) {
304 SPDK_ERRLOG("Memory allocation failure\n");
305 return NULL;
306 }
307 fn_table->get_io_channel = spdk_bdev_part_get_io_channel;
308 fn_table->io_type_supported = spdk_bdev_part_io_type_supported;
309
310 base->bdev = bdev;
311 base->desc = NULL;
312 base->ref = 0;
313 base->module = module;
314 base->fn_table = fn_table;
315 base->tailq = tailq;
316 base->base_free_fn = free_fn;
317 base->ctx = ctx;
318 base->claimed = false;
319 base->channel_size = channel_size;
320 base->ch_create_cb = ch_create_cb;
321 base->ch_destroy_cb = ch_destroy_cb;
322
9f95a23c 323 rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc);
11fdf7f2
TL
324 if (rc) {
325 spdk_bdev_part_base_free(base);
9f95a23c
TL
326 SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev),
327 spdk_strerror(-rc));
11fdf7f2
TL
328 return NULL;
329 }
330
331 return base;
332}
333
334int
335spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
336 char *name, uint64_t offset_blocks, uint64_t num_blocks,
337 char *product_name)
338{
339 part->internal.bdev.blocklen = base->bdev->blocklen;
340 part->internal.bdev.blockcnt = num_blocks;
341 part->internal.offset_blocks = offset_blocks;
342
343 part->internal.bdev.write_cache = base->bdev->write_cache;
9f95a23c 344 part->internal.bdev.required_alignment = base->bdev->required_alignment;
11fdf7f2
TL
345 part->internal.bdev.ctxt = part;
346 part->internal.bdev.module = base->module;
347 part->internal.bdev.fn_table = base->fn_table;
348
349 part->internal.bdev.name = strdup(name);
350 part->internal.bdev.product_name = strdup(product_name);
351
352 if (part->internal.bdev.name == NULL) {
353 SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
354 return -1;
355 } else if (part->internal.bdev.product_name == NULL) {
356 free(part->internal.bdev.name);
357 SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
358 spdk_bdev_get_name(base->bdev));
359 return -1;
360 }
361
362 __sync_fetch_and_add(&base->ref, 1);
363 part->internal.base = base;
364
365 if (!base->claimed) {
366 int rc;
367
368 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
369 if (rc) {
370 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
371 free(part->internal.bdev.name);
372 free(part->internal.bdev.product_name);
373 return -1;
374 }
375 base->claimed = true;
376 }
377
378 spdk_io_device_register(part, spdk_bdev_part_channel_create_cb,
379 spdk_bdev_part_channel_destroy_cb,
380 base->channel_size,
381 name);
382
9f95a23c 383 spdk_bdev_register(&part->internal.bdev);
11fdf7f2
TL
384 TAILQ_INSERT_TAIL(base->tailq, part, tailq);
385
386 return 0;
387}