]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/bdev/part.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / lib / bdev / part.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Common code for partition-like virtual bdevs.
36 */
37
38 #include "spdk/bdev.h"
39 #include "spdk/likely.h"
40 #include "spdk/log.h"
41 #include "spdk/string.h"
42 #include "spdk/thread.h"
43
44 #include "spdk/bdev_module.h"
45
46 struct spdk_bdev_part_base {
47 struct spdk_bdev *bdev;
48 struct spdk_bdev_desc *desc;
49 uint32_t ref;
50 uint32_t channel_size;
51 spdk_bdev_part_base_free_fn base_free_fn;
52 void *ctx;
53 bool claimed;
54 struct spdk_bdev_module *module;
55 struct spdk_bdev_fn_table *fn_table;
56 struct bdev_part_tailq *tailq;
57 spdk_io_channel_create_cb ch_create_cb;
58 spdk_io_channel_destroy_cb ch_destroy_cb;
59 struct spdk_thread *thread;
60 };
61
62 struct spdk_bdev *
63 spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
64 {
65 return part_base->bdev;
66 }
67
68 struct spdk_bdev_desc *
69 spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
70 {
71 return part_base->desc;
72 }
73
74 struct bdev_part_tailq *
75 spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
76 {
77 return part_base->tailq;
78 }
79
80 void *
81 spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
82 {
83 return part_base->ctx;
84 }
85
86 const char *
87 spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base)
88 {
89 return part_base->bdev->name;
90 }
91
92 static void
93 bdev_part_base_free(void *ctx)
94 {
95 struct spdk_bdev_desc *desc = ctx;
96
97 spdk_bdev_close(desc);
98 }
99
100 void
101 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
102 {
103 if (base->desc) {
104 /* Close the underlying bdev on its same opened thread. */
105 if (base->thread && base->thread != spdk_get_thread()) {
106 spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc);
107 } else {
108 spdk_bdev_close(base->desc);
109 }
110 }
111
112 if (base->base_free_fn != NULL) {
113 base->base_free_fn(base->ctx);
114 }
115
116 free(base);
117 }
118
119 static void
120 bdev_part_free_cb(void *io_device)
121 {
122 struct spdk_bdev_part *part = io_device;
123 struct spdk_bdev_part_base *base;
124
125 assert(part);
126 assert(part->internal.base);
127
128 base = part->internal.base;
129
130 TAILQ_REMOVE(base->tailq, part, tailq);
131
132 if (--base->ref == 0) {
133 spdk_bdev_module_release_bdev(base->bdev);
134 spdk_bdev_part_base_free(base);
135 }
136
137 spdk_bdev_destruct_done(&part->internal.bdev, 0);
138 free(part->internal.bdev.name);
139 free(part->internal.bdev.product_name);
140 free(part);
141 }
142
143 int
144 spdk_bdev_part_free(struct spdk_bdev_part *part)
145 {
146 spdk_io_device_unregister(part, bdev_part_free_cb);
147
148 /* Return 1 to indicate that this is an asynchronous operation that isn't complete
149 * until spdk_bdev_destruct_done is called */
150 return 1;
151 }
152
153 void
154 spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
155 {
156 struct spdk_bdev_part *part, *tmp;
157
158 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
159 if (part->internal.base == part_base) {
160 spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
161 }
162 }
163 }
164
165 static bool
166 bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
167 {
168 struct spdk_bdev_part *part = _part;
169
170 /* We can't decode/modify passthrough NVMe commands, so don't report
171 * that a partition supports these io types, even if the underlying
172 * bdev does.
173 */
174 switch (io_type) {
175 case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
176 case SPDK_BDEV_IO_TYPE_NVME_IO:
177 case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
178 return false;
179 default:
180 break;
181 }
182
183 return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
184 io_type);
185 }
186
187 static struct spdk_io_channel *
188 bdev_part_get_io_channel(void *_part)
189 {
190 struct spdk_bdev_part *part = _part;
191
192 return spdk_get_io_channel(part);
193 }
194
195 struct spdk_bdev *
196 spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
197 {
198 return &part->internal.bdev;
199 }
200
201 struct spdk_bdev_part_base *
202 spdk_bdev_part_get_base(struct spdk_bdev_part *part)
203 {
204 return part->internal.base;
205 }
206
207 struct spdk_bdev *
208 spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
209 {
210 return part->internal.base->bdev;
211 }
212
213 uint64_t
214 spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
215 {
216 return part->internal.offset_blocks;
217 }
218
219 static int
220 bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset,
221 uint32_t remapped_offset)
222 {
223 struct spdk_bdev *bdev = bdev_io->bdev;
224 struct spdk_dif_ctx dif_ctx;
225 struct spdk_dif_error err_blk = {};
226 int rc;
227
228 if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
229 return 0;
230 }
231
232 rc = spdk_dif_ctx_init(&dif_ctx,
233 bdev->blocklen, bdev->md_len, bdev->md_interleave,
234 bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
235 offset, 0, 0, 0, 0);
236 if (rc != 0) {
237 SPDK_ERRLOG("Initialization of DIF context failed\n");
238 return rc;
239 }
240
241 spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
242
243 if (bdev->md_interleave) {
244 rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
245 bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
246 } else {
247 struct iovec md_iov = {
248 .iov_base = bdev_io->u.bdev.md_buf,
249 .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
250 };
251
252 rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
253 }
254
255 if (rc != 0) {
256 SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n",
257 err_blk.err_type, err_blk.err_offset);
258 }
259
260 return rc;
261 }
262
263 static void
264 bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
265 {
266 struct spdk_bdev_io *part_io = cb_arg;
267 uint32_t offset, remapped_offset;
268 int rc, status;
269
270 offset = bdev_io->u.bdev.offset_blocks;
271 remapped_offset = part_io->u.bdev.offset_blocks;
272
273 if (success) {
274 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
275 if (rc != 0) {
276 success = false;
277 }
278 }
279
280 status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
281
282 spdk_bdev_io_complete(part_io, status);
283 spdk_bdev_free_io(bdev_io);
284 }
285
286 static void
287 bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
288 {
289 struct spdk_bdev_io *part_io = cb_arg;
290 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
291
292 spdk_bdev_io_complete(part_io, status);
293 spdk_bdev_free_io(bdev_io);
294 }
295
296 static void
297 bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
298 {
299 struct spdk_bdev_io *part_io = cb_arg;
300 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
301
302 spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len);
303 spdk_bdev_io_complete(part_io, status);
304 spdk_bdev_free_io(bdev_io);
305 }
306
307 int
308 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
309 {
310 struct spdk_bdev_part *part = ch->part;
311 struct spdk_io_channel *base_ch = ch->base_ch;
312 struct spdk_bdev_desc *base_desc = part->internal.base->desc;
313 uint64_t offset, remapped_offset;
314 int rc = 0;
315
316 offset = bdev_io->u.bdev.offset_blocks;
317 remapped_offset = offset + part->internal.offset_blocks;
318
319 /* Modify the I/O to adjust for the offset within the base bdev. */
320 switch (bdev_io->type) {
321 case SPDK_BDEV_IO_TYPE_READ:
322 if (bdev_io->u.bdev.md_buf == NULL) {
323 rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
324 bdev_io->u.bdev.iovcnt, remapped_offset,
325 bdev_io->u.bdev.num_blocks,
326 bdev_part_complete_read_io, bdev_io);
327 } else {
328 rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch,
329 bdev_io->u.bdev.iovs,
330 bdev_io->u.bdev.iovcnt,
331 bdev_io->u.bdev.md_buf, remapped_offset,
332 bdev_io->u.bdev.num_blocks,
333 bdev_part_complete_read_io, bdev_io);
334 }
335 break;
336 case SPDK_BDEV_IO_TYPE_WRITE:
337 rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
338 if (rc != 0) {
339 return SPDK_BDEV_IO_STATUS_FAILED;
340 }
341
342 if (bdev_io->u.bdev.md_buf == NULL) {
343 rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
344 bdev_io->u.bdev.iovcnt, remapped_offset,
345 bdev_io->u.bdev.num_blocks,
346 bdev_part_complete_io, bdev_io);
347 } else {
348 rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch,
349 bdev_io->u.bdev.iovs,
350 bdev_io->u.bdev.iovcnt,
351 bdev_io->u.bdev.md_buf, remapped_offset,
352 bdev_io->u.bdev.num_blocks,
353 bdev_part_complete_io, bdev_io);
354 }
355 break;
356 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
357 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset,
358 bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
359 bdev_io);
360 break;
361 case SPDK_BDEV_IO_TYPE_UNMAP:
362 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset,
363 bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
364 bdev_io);
365 break;
366 case SPDK_BDEV_IO_TYPE_FLUSH:
367 rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset,
368 bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
369 bdev_io);
370 break;
371 case SPDK_BDEV_IO_TYPE_RESET:
372 rc = spdk_bdev_reset(base_desc, base_ch,
373 bdev_part_complete_io, bdev_io);
374 break;
375 case SPDK_BDEV_IO_TYPE_ZCOPY:
376 rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset,
377 bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate,
378 bdev_part_complete_zcopy_io, bdev_io);
379 break;
380 default:
381 SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
382 return SPDK_BDEV_IO_STATUS_FAILED;
383 }
384
385 return rc;
386 }
387
388 static int
389 bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
390 {
391 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
392 struct spdk_bdev_part_channel *ch = ctx_buf;
393
394 ch->part = part;
395 ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
396 if (ch->base_ch == NULL) {
397 return -1;
398 }
399
400 if (part->internal.base->ch_create_cb) {
401 return part->internal.base->ch_create_cb(io_device, ctx_buf);
402 } else {
403 return 0;
404 }
405 }
406
407 static void
408 bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
409 {
410 struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
411 struct spdk_bdev_part_channel *ch = ctx_buf;
412
413 if (part->internal.base->ch_destroy_cb) {
414 part->internal.base->ch_destroy_cb(io_device, ctx_buf);
415 }
416 spdk_put_io_channel(ch->base_ch);
417 }
418
419 struct spdk_bdev_part_base *
420 spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
421 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
422 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
423 spdk_bdev_part_base_free_fn free_fn, void *ctx,
424 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
425 spdk_io_channel_destroy_cb ch_destroy_cb)
426 {
427 int rc;
428 struct spdk_bdev_part_base *base;
429
430 base = calloc(1, sizeof(*base));
431 if (!base) {
432 SPDK_ERRLOG("Memory allocation failure\n");
433 return NULL;
434 }
435 fn_table->get_io_channel = bdev_part_get_io_channel;
436 fn_table->io_type_supported = bdev_part_io_type_supported;
437
438 base->bdev = bdev;
439 base->desc = NULL;
440 base->ref = 0;
441 base->module = module;
442 base->fn_table = fn_table;
443 base->tailq = tailq;
444 base->base_free_fn = free_fn;
445 base->ctx = ctx;
446 base->claimed = false;
447 base->channel_size = channel_size;
448 base->ch_create_cb = ch_create_cb;
449 base->ch_destroy_cb = ch_destroy_cb;
450
451 rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc);
452 if (rc) {
453 spdk_bdev_part_base_free(base);
454 SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev),
455 spdk_strerror(-rc));
456 return NULL;
457 }
458
459 /* Save the thread where the base device is opened */
460 base->thread = spdk_get_thread();
461
462 return base;
463 }
464
465 int
466 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
467 char *name, uint64_t offset_blocks, uint64_t num_blocks,
468 char *product_name)
469 {
470 part->internal.bdev.blocklen = base->bdev->blocklen;
471 part->internal.bdev.blockcnt = num_blocks;
472 part->internal.offset_blocks = offset_blocks;
473
474 part->internal.bdev.write_cache = base->bdev->write_cache;
475 part->internal.bdev.required_alignment = base->bdev->required_alignment;
476 part->internal.bdev.ctxt = part;
477 part->internal.bdev.module = base->module;
478 part->internal.bdev.fn_table = base->fn_table;
479
480 part->internal.bdev.md_interleave = base->bdev->md_interleave;
481 part->internal.bdev.md_len = base->bdev->md_len;
482 part->internal.bdev.dif_type = base->bdev->dif_type;
483 part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md;
484 part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags;
485
486 part->internal.bdev.name = strdup(name);
487 part->internal.bdev.product_name = strdup(product_name);
488
489 if (part->internal.bdev.name == NULL) {
490 SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
491 return -1;
492 } else if (part->internal.bdev.product_name == NULL) {
493 free(part->internal.bdev.name);
494 SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
495 spdk_bdev_get_name(base->bdev));
496 return -1;
497 }
498
499 base->ref++;
500 part->internal.base = base;
501
502 if (!base->claimed) {
503 int rc;
504
505 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
506 if (rc) {
507 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
508 free(part->internal.bdev.name);
509 free(part->internal.bdev.product_name);
510 return -1;
511 }
512 base->claimed = true;
513 }
514
515 spdk_io_device_register(part, bdev_part_channel_create_cb,
516 bdev_part_channel_destroy_cb,
517 base->channel_size,
518 name);
519
520 spdk_bdev_register(&part->internal.bdev);
521 TAILQ_INSERT_TAIL(base->tailq, part, tailq);
522
523 return 0;
524 }