]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/bdev/pmem/bdev_pmem.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / lib / bdev / pmem / bdev_pmem.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "spdk/conf.h"
35 #include "spdk/string.h"
36 #include "spdk/likely.h"
37 #include "spdk/util.h"
38 #include "spdk/rpc.h"
39 #include "spdk/bdev_module.h"
40 #include "spdk_internal/log.h"
41 #include "spdk/config.h"
42
43 #include "bdev_pmem.h"
44 #include "libpmemblk.h"
45
46 struct pmem_disk {
47 struct spdk_bdev disk;
48 PMEMblkpool *pool;
49 char pmem_file[NAME_MAX];
50 TAILQ_ENTRY(pmem_disk) tailq;
51 };
52
53 static TAILQ_HEAD(, pmem_disk) g_pmem_disks = TAILQ_HEAD_INITIALIZER(g_pmem_disks);
54
55 static int bdev_pmem_initialize(void);
56 static void bdev_pmem_finish(void);
57
58 static struct spdk_bdev_module pmem_if = {
59 .name = "pmem",
60 .module_init = bdev_pmem_initialize,
61 .module_fini = bdev_pmem_finish,
62 .async_fini = true,
63
64 };
65
66 SPDK_BDEV_MODULE_REGISTER(pmem, &pmem_if)
67
68 typedef int(*spdk_bdev_pmem_io_request)(PMEMblkpool *pbp, void *buf, long long blockno);
69
70 static int
71 _bdev_pmem_submit_io_read(PMEMblkpool *pbp, void *buf, long long blockno)
72 {
73 return pmemblk_read(pbp, buf, blockno);
74 }
75
76 static int
77 _bdev_pmem_submit_io_write(PMEMblkpool *pbp, void *buf, long long blockno)
78 {
79 return pmemblk_write(pbp, buf, blockno);
80 }
81
82 static int
83 bdev_pmem_destruct(void *ctx)
84 {
85 struct pmem_disk *pdisk = ctx;
86
87 TAILQ_REMOVE(&g_pmem_disks, pdisk, tailq);
88 free(pdisk->disk.name);
89 pmemblk_close(pdisk->pool);
90 free(pdisk);
91
92 return 0;
93 }
94
95 static int
96 bdev_pmem_check_iov_len(struct iovec *iovs, int iovcnt, size_t num_blocks, uint32_t block_size)
97 {
98 size_t nbytes = num_blocks * block_size;
99 int i;
100
101 for (i = 0; i < iovcnt; i++) {
102 if (spdk_unlikely(iovs[i].iov_base == NULL && iovs[i].iov_len != 0)) {
103 return -1;
104 }
105
106 if (nbytes <= iovs[i].iov_len) {
107 return 0;
108 }
109
110 if (spdk_unlikely(iovs[i].iov_len % block_size != 0)) {
111 return -1;
112 }
113
114 nbytes -= iovs[i].iov_len;
115 }
116
117 return -1;
118 }
119
120 static void
121 bdev_pmem_submit_io(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
122 struct spdk_io_channel *ch,
123 struct iovec *iov, int iovcnt,
124 uint64_t offset_blocks, size_t num_blocks, uint32_t block_size,
125 spdk_bdev_pmem_io_request fn)
126 {
127 int rc;
128 size_t nbytes, offset, len;
129 enum spdk_bdev_io_status status;
130
131 rc = bdev_pmem_check_iov_len(iov, iovcnt, num_blocks, block_size);
132 if (rc) {
133 status = SPDK_BDEV_IO_STATUS_FAILED;
134 goto end;
135 }
136
137 SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "io %lu bytes from offset %#lx\n",
138 num_blocks, offset_blocks);
139
140 for (nbytes = num_blocks * block_size; nbytes > 0; iov++) {
141 len = spdk_min(iov->iov_len, nbytes);
142 nbytes -= len;
143
144 offset = 0;
145 while (offset != len) {
146 rc = fn(pdisk->pool, iov->iov_base + offset, offset_blocks);
147 if (rc != 0) {
148 SPDK_ERRLOG("pmemblk io failed: %d (%s)\n", errno, pmemblk_errormsg());
149 status = SPDK_BDEV_IO_STATUS_FAILED;
150 goto end;
151 }
152
153 offset += block_size;
154 offset_blocks++;
155 }
156 }
157
158 assert(num_blocks == offset_blocks - bdev_io->u.bdev.offset_blocks);
159 status = SPDK_BDEV_IO_STATUS_SUCCESS;
160 end:
161
162 spdk_bdev_io_complete(bdev_io, status);
163 }
164
165 static void
166 bdev_pmem_write_zeros(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
167 struct spdk_io_channel *ch, uint64_t offset_blocks,
168 uint64_t num_blocks, uint32_t block_size)
169 {
170 int rc;
171 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
172
173 while (num_blocks > 0) {
174 rc = pmemblk_set_zero(pdisk->pool, offset_blocks);
175 if (rc != 0) {
176 SPDK_ERRLOG("pmemblk_set_zero failed: %d (%s)\n", errno, pmemblk_errormsg());
177 status = SPDK_BDEV_IO_STATUS_FAILED;
178 break;
179 }
180 offset_blocks++;
181 num_blocks--;
182 }
183 spdk_bdev_io_complete(bdev_io, status);
184 }
185
186 static void
187 bdev_pmem_io_get_buf_cb(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io,
188 bool success)
189 {
190 if (!success) {
191 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
192 return;
193 }
194
195 bdev_pmem_submit_io(bdev_io,
196 bdev_io->bdev->ctxt,
197 channel,
198 bdev_io->u.bdev.iovs,
199 bdev_io->u.bdev.iovcnt,
200 bdev_io->u.bdev.offset_blocks,
201 bdev_io->u.bdev.num_blocks,
202 bdev_io->bdev->blocklen,
203 _bdev_pmem_submit_io_read);
204 }
205
206 static void
207 bdev_pmem_submit_request(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io)
208 {
209 switch (bdev_io->type) {
210 case SPDK_BDEV_IO_TYPE_READ:
211 spdk_bdev_io_get_buf(bdev_io, bdev_pmem_io_get_buf_cb,
212 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
213 break;
214 case SPDK_BDEV_IO_TYPE_WRITE:
215 bdev_pmem_submit_io(bdev_io,
216 bdev_io->bdev->ctxt,
217 channel,
218 bdev_io->u.bdev.iovs,
219 bdev_io->u.bdev.iovcnt,
220 bdev_io->u.bdev.offset_blocks,
221 bdev_io->u.bdev.num_blocks,
222 bdev_io->bdev->blocklen,
223 _bdev_pmem_submit_io_write);
224 break;
225 case SPDK_BDEV_IO_TYPE_UNMAP:
226 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
227 bdev_pmem_write_zeros(bdev_io,
228 bdev_io->bdev->ctxt,
229 channel,
230 bdev_io->u.bdev.offset_blocks,
231 bdev_io->u.bdev.num_blocks,
232 bdev_io->bdev->blocklen);
233 break;
234 case SPDK_BDEV_IO_TYPE_RESET:
235 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
236 break;
237 default:
238 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
239 }
240 }
241
242 static bool
243 bdev_pmem_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
244 {
245 switch (io_type) {
246 case SPDK_BDEV_IO_TYPE_READ:
247 case SPDK_BDEV_IO_TYPE_WRITE:
248 case SPDK_BDEV_IO_TYPE_RESET:
249 case SPDK_BDEV_IO_TYPE_UNMAP:
250 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
251 return true;
252 default:
253 return false;
254 }
255 }
256
257 static struct spdk_io_channel *
258 bdev_pmem_get_io_channel(void *ctx)
259 {
260 return spdk_get_io_channel(&g_pmem_disks);
261 }
262
263 static int
264 bdev_pmem_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
265 {
266 struct pmem_disk *pdisk = ctx;
267
268 spdk_json_write_named_object_begin(w, "pmem");
269 spdk_json_write_named_string(w, "pmem_file", pdisk->pmem_file);
270 spdk_json_write_object_end(w);
271
272 return 0;
273 }
274
275 static int
276 bdev_pmem_create_cb(void *io_device, void *ctx_buf)
277 {
278 return 0;
279 }
280
281 static void
282 bdev_pmem_destroy_cb(void *io_device, void *ctx_buf)
283 {
284 }
285
286 static void
287 bdev_pmem_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
288 {
289 struct pmem_disk *disk = bdev->ctxt;
290
291 spdk_json_write_object_begin(w);
292
293 spdk_json_write_named_string(w, "method", "construct_pmem_bdev");
294
295 spdk_json_write_named_object_begin(w, "params");
296 spdk_json_write_named_string(w, "name", bdev->name);
297 spdk_json_write_named_string(w, "pmem_file", disk->pmem_file);
298 spdk_json_write_object_end(w);
299
300 spdk_json_write_object_end(w);
301 }
302
303 static const struct spdk_bdev_fn_table pmem_fn_table = {
304 .destruct = bdev_pmem_destruct,
305 .submit_request = bdev_pmem_submit_request,
306 .io_type_supported = bdev_pmem_io_type_supported,
307 .get_io_channel = bdev_pmem_get_io_channel,
308 .dump_info_json = bdev_pmem_dump_info_json,
309 .write_config_json = bdev_pmem_write_config_json,
310 };
311
312 int
313 spdk_create_pmem_disk(const char *pmem_file, const char *name, struct spdk_bdev **bdev)
314 {
315 uint64_t num_blocks;
316 uint32_t block_size;
317 struct pmem_disk *pdisk;
318 int rc;
319
320 *bdev = NULL;
321
322 if (name == NULL) {
323 SPDK_ERRLOG("Missing name parameter for spdk_create_pmem_disk()\n");
324 return EINVAL;
325 }
326
327 if (pmemblk_check(pmem_file, 0) != 1) {
328 SPDK_ERRLOG("Pool '%s' check failed: %s\n", pmem_file, pmemblk_errormsg());
329 return EIO;
330 }
331
332 pdisk = calloc(1, sizeof(*pdisk));
333 if (!pdisk) {
334 return ENOMEM;
335 }
336
337 snprintf(pdisk->pmem_file, sizeof(pdisk->pmem_file), "%s", pmem_file);
338 pdisk->pool = pmemblk_open(pmem_file, 0);
339 if (!pdisk->pool) {
340 SPDK_ERRLOG("Opening pmem pool '%s' failed: %d\n", pmem_file, errno);
341 free(pdisk);
342 return errno;
343 }
344
345 block_size = pmemblk_bsize(pdisk->pool);
346 num_blocks = pmemblk_nblock(pdisk->pool);
347
348 if (block_size == 0) {
349 SPDK_ERRLOG("Block size must be more than 0 bytes\n");
350 pmemblk_close(pdisk->pool);
351 free(pdisk);
352 return EINVAL;
353 }
354
355 if (num_blocks == 0) {
356 SPDK_ERRLOG("Disk must be more than 0 blocks\n");
357 pmemblk_close(pdisk->pool);
358 free(pdisk);
359 return EINVAL;
360 }
361
362 pdisk->disk.name = strdup(name);
363 if (!pdisk->disk.name) {
364 pmemblk_close(pdisk->pool);
365 free(pdisk);
366 return ENOMEM;
367 }
368
369 pdisk->disk.product_name = "pmemblk disk";
370 pdisk->disk.write_cache = 0;
371 pdisk->disk.blocklen = block_size;
372 pdisk->disk.blockcnt = num_blocks;
373
374 pdisk->disk.ctxt = pdisk;
375 pdisk->disk.fn_table = &pmem_fn_table;
376 pdisk->disk.module = &pmem_if;
377
378 rc = spdk_bdev_register(&pdisk->disk);
379 if (rc) {
380 pmemblk_close(pdisk->pool);
381 free(pdisk->disk.name);
382 free(pdisk);
383 return rc;
384 }
385
386 TAILQ_INSERT_TAIL(&g_pmem_disks, pdisk, tailq);
387
388 *bdev = &pdisk->disk;
389
390 return 0;
391 }
392
393 void
394 spdk_delete_pmem_disk(struct spdk_bdev *bdev, spdk_delete_pmem_complete cb_fn, void *cb_arg)
395 {
396 if (!bdev || bdev->module != &pmem_if) {
397 cb_fn(cb_arg, -ENODEV);
398 return;
399 }
400
401 spdk_bdev_unregister(bdev, cb_fn, cb_arg);
402 }
403
404 static void
405 bdev_pmem_read_conf(void)
406 {
407 struct spdk_conf_section *sp;
408 struct spdk_bdev *bdev;
409 const char *pmem_file;
410 const char *bdev_name;
411 int i;
412
413 sp = spdk_conf_find_section(NULL, "Pmem");
414 if (sp == NULL) {
415 return;
416 }
417
418 for (i = 0; ; i++) {
419 if (!spdk_conf_section_get_nval(sp, "Blk", i)) {
420 break;
421 }
422
423 pmem_file = spdk_conf_section_get_nmval(sp, "Blk", i, 0);
424 if (pmem_file == NULL) {
425 SPDK_ERRLOG("Pmem: missing filename\n");
426 continue;
427 }
428
429 bdev_name = spdk_conf_section_get_nmval(sp, "Blk", i, 1);
430 if (bdev_name == NULL) {
431 SPDK_ERRLOG("Pmem: missing bdev name\n");
432 continue;
433 }
434
435 spdk_create_pmem_disk(pmem_file, bdev_name, &bdev);
436 }
437 }
438
439 static int
440 bdev_pmem_initialize(void)
441 {
442 const char *err = pmemblk_check_version(PMEMBLK_MAJOR_VERSION, PMEMBLK_MINOR_VERSION);
443
444 if (err != NULL) {
445 SPDK_ERRLOG("Invalid libpmemblk version (expected %d.%d): %s\n", PMEMBLK_MAJOR_VERSION,
446 PMEMBLK_MINOR_VERSION, err);
447 return -1;
448 }
449
450 #ifdef SPDK_CONFIG_DEBUG
451 setenv("PMEMBLK_LOG_LEVEL", "1", 1);
452 #endif
453 spdk_io_device_register(&g_pmem_disks, bdev_pmem_create_cb, bdev_pmem_destroy_cb, 0, "pmem_bdev");
454
455 bdev_pmem_read_conf();
456
457 return 0;
458
459 }
460
461 static void
462 bdev_pmem_finish_done(void *io_device)
463 {
464 spdk_bdev_module_finish_done();
465 }
466
467 static void
468 bdev_pmem_finish(void)
469 {
470 spdk_io_device_unregister(&g_pmem_disks, bdev_pmem_finish_done);
471 }
472
473 SPDK_LOG_REGISTER_COMPONENT("bdev_pmem", SPDK_LOG_BDEV_PMEM)