]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/bdev/pmem/bdev_pmem.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / lib / bdev / pmem / bdev_pmem.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "spdk/conf.h"
35 #include "spdk/string.h"
36 #include "spdk/likely.h"
37 #include "spdk/util.h"
38 #include "spdk/rpc.h"
39 #include "spdk/bdev_module.h"
40 #include "spdk_internal/log.h"
41
42 #include "bdev_pmem.h"
43 #include "libpmemblk.h"
44
45 struct pmem_disk {
46 struct spdk_bdev disk;
47 PMEMblkpool *pool;
48 char pmem_file[NAME_MAX];
49 TAILQ_ENTRY(pmem_disk) tailq;
50 };
51
52 static TAILQ_HEAD(, pmem_disk) g_pmem_disks = TAILQ_HEAD_INITIALIZER(g_pmem_disks);
53
54 static int bdev_pmem_initialize(void);
55 static void bdev_pmem_finish(void);
56
57 static struct spdk_bdev_module pmem_if = {
58 .name = "pmem",
59 .module_init = bdev_pmem_initialize,
60 .module_fini = bdev_pmem_finish,
61 .async_fini = true,
62
63 };
64
65 SPDK_BDEV_MODULE_REGISTER(&pmem_if)
66
67 typedef int(*spdk_bdev_pmem_io_request)(PMEMblkpool *pbp, void *buf, long long blockno);
68
69 static int
70 _bdev_pmem_submit_io_read(PMEMblkpool *pbp, void *buf, long long blockno)
71 {
72 return pmemblk_read(pbp, buf, blockno);
73 }
74
75 static int
76 _bdev_pmem_submit_io_write(PMEMblkpool *pbp, void *buf, long long blockno)
77 {
78 return pmemblk_write(pbp, buf, blockno);
79 }
80
81 static int
82 bdev_pmem_destruct(void *ctx)
83 {
84 struct pmem_disk *pdisk = ctx;
85
86 TAILQ_REMOVE(&g_pmem_disks, pdisk, tailq);
87 free(pdisk->disk.name);
88 pmemblk_close(pdisk->pool);
89 free(pdisk);
90
91 return 0;
92 }
93
94 static int
95 bdev_pmem_check_iov_len(struct iovec *iovs, int iovcnt, size_t num_blocks, uint32_t block_size)
96 {
97 size_t nbytes = num_blocks * block_size;
98 int i;
99
100 for (i = 0; i < iovcnt; i++) {
101 if (spdk_unlikely(iovs[i].iov_base == NULL && iovs[i].iov_len != 0)) {
102 return -1;
103 }
104
105 if (nbytes <= iovs[i].iov_len) {
106 return 0;
107 }
108
109 if (spdk_unlikely(iovs[i].iov_len % block_size != 0)) {
110 return -1;
111 }
112
113 nbytes -= iovs[i].iov_len;
114 }
115
116 return -1;
117 }
118
119 static void
120 bdev_pmem_submit_io(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
121 struct spdk_io_channel *ch,
122 struct iovec *iov, int iovcnt,
123 uint64_t offset_blocks, size_t num_blocks, uint32_t block_size,
124 spdk_bdev_pmem_io_request fn)
125 {
126 int rc;
127 size_t nbytes, offset, len;
128 enum spdk_bdev_io_status status;
129
130 rc = bdev_pmem_check_iov_len(iov, iovcnt, num_blocks, block_size);
131 if (rc) {
132 status = SPDK_BDEV_IO_STATUS_FAILED;
133 goto end;
134 }
135
136 SPDK_DEBUGLOG(SPDK_LOG_BDEV_PMEM, "io %lu bytes from offset %#lx\n",
137 num_blocks, offset_blocks);
138
139 for (nbytes = num_blocks * block_size; nbytes > 0; iov++) {
140 len = spdk_min(iov->iov_len, nbytes);
141 nbytes -= len;
142
143 offset = 0;
144 while (offset != len) {
145 rc = fn(pdisk->pool, iov->iov_base + offset, offset_blocks);
146 if (rc != 0) {
147 SPDK_ERRLOG("pmemblk io failed: %d (%s)\n", errno, pmemblk_errormsg());
148 status = SPDK_BDEV_IO_STATUS_FAILED;
149 goto end;
150 }
151
152 offset += block_size;
153 offset_blocks++;
154 }
155 }
156
157 assert(num_blocks == offset_blocks - bdev_io->u.bdev.offset_blocks);
158 status = SPDK_BDEV_IO_STATUS_SUCCESS;
159 end:
160
161 spdk_bdev_io_complete(bdev_io, status);
162 }
163
164 static void
165 bdev_pmem_write_zeros(struct spdk_bdev_io *bdev_io, struct pmem_disk *pdisk,
166 struct spdk_io_channel *ch, uint64_t offset_blocks,
167 uint64_t num_blocks, uint32_t block_size)
168 {
169 int rc;
170 enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
171
172 while (num_blocks > 0) {
173 rc = pmemblk_set_zero(pdisk->pool, offset_blocks);
174 if (rc != 0) {
175 SPDK_ERRLOG("pmemblk_set_zero failed: %d (%s)\n", errno, pmemblk_errormsg());
176 status = SPDK_BDEV_IO_STATUS_FAILED;
177 break;
178 }
179 offset_blocks++;
180 num_blocks--;
181 }
182 spdk_bdev_io_complete(bdev_io, status);
183 }
184
185 static void
186 bdev_pmem_io_get_buf_cb(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io)
187 {
188 bdev_pmem_submit_io(bdev_io,
189 bdev_io->bdev->ctxt,
190 channel,
191 bdev_io->u.bdev.iovs,
192 bdev_io->u.bdev.iovcnt,
193 bdev_io->u.bdev.offset_blocks,
194 bdev_io->u.bdev.num_blocks,
195 bdev_io->bdev->blocklen,
196 _bdev_pmem_submit_io_read);
197 }
198
199 static void
200 bdev_pmem_submit_request(struct spdk_io_channel *channel, struct spdk_bdev_io *bdev_io)
201 {
202 switch (bdev_io->type) {
203 case SPDK_BDEV_IO_TYPE_READ:
204 spdk_bdev_io_get_buf(bdev_io, bdev_pmem_io_get_buf_cb,
205 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
206 break;
207 case SPDK_BDEV_IO_TYPE_WRITE:
208 bdev_pmem_submit_io(bdev_io,
209 bdev_io->bdev->ctxt,
210 channel,
211 bdev_io->u.bdev.iovs,
212 bdev_io->u.bdev.iovcnt,
213 bdev_io->u.bdev.offset_blocks,
214 bdev_io->u.bdev.num_blocks,
215 bdev_io->bdev->blocklen,
216 _bdev_pmem_submit_io_write);
217 break;
218 case SPDK_BDEV_IO_TYPE_UNMAP:
219 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
220 bdev_pmem_write_zeros(bdev_io,
221 bdev_io->bdev->ctxt,
222 channel,
223 bdev_io->u.bdev.offset_blocks,
224 bdev_io->u.bdev.num_blocks,
225 bdev_io->bdev->blocklen);
226 break;
227 case SPDK_BDEV_IO_TYPE_RESET:
228 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
229 break;
230 default:
231 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
232 }
233 }
234
235 static bool
236 bdev_pmem_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
237 {
238 switch (io_type) {
239 case SPDK_BDEV_IO_TYPE_READ:
240 case SPDK_BDEV_IO_TYPE_WRITE:
241 case SPDK_BDEV_IO_TYPE_RESET:
242 case SPDK_BDEV_IO_TYPE_UNMAP:
243 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
244 return true;
245 default:
246 return false;
247 }
248 }
249
250 static struct spdk_io_channel *
251 bdev_pmem_get_io_channel(void *ctx)
252 {
253 return spdk_get_io_channel(&g_pmem_disks);
254 }
255
256 static int
257 bdev_pmem_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
258 {
259 struct pmem_disk *pdisk = ctx;
260
261 spdk_json_write_name(w, "pmem");
262 spdk_json_write_object_begin(w);
263 spdk_json_write_name(w, "pmem_file");
264 spdk_json_write_string(w, pdisk->pmem_file);
265 spdk_json_write_object_end(w);
266
267 return 0;
268 }
269
270 static int
271 bdev_pmem_create_cb(void *io_device, void *ctx_buf)
272 {
273 return 0;
274 }
275
276 static void
277 bdev_pmem_destroy_cb(void *io_device, void *ctx_buf)
278 {
279 }
280
281 static void
282 bdev_pmem_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
283 {
284 struct pmem_disk *disk = bdev->ctxt;
285
286 spdk_json_write_object_begin(w);
287
288 spdk_json_write_named_string(w, "method", "construct_pmem_bdev");
289
290 spdk_json_write_named_object_begin(w, "params");
291 spdk_json_write_named_string(w, "name", bdev->name);
292 spdk_json_write_named_string(w, "pmem_file", disk->pmem_file);
293 spdk_json_write_object_end(w);
294
295 spdk_json_write_object_end(w);
296 }
297
298 static const struct spdk_bdev_fn_table pmem_fn_table = {
299 .destruct = bdev_pmem_destruct,
300 .submit_request = bdev_pmem_submit_request,
301 .io_type_supported = bdev_pmem_io_type_supported,
302 .get_io_channel = bdev_pmem_get_io_channel,
303 .dump_info_json = bdev_pmem_dump_info_json,
304 .write_config_json = bdev_pmem_write_config_json,
305 };
306
307 int
308 spdk_create_pmem_disk(const char *pmem_file, const char *name, struct spdk_bdev **bdev)
309 {
310 uint64_t num_blocks;
311 uint32_t block_size;
312 struct pmem_disk *pdisk;
313 int rc;
314
315 *bdev = NULL;
316
317 if (name == NULL) {
318 SPDK_ERRLOG("Missing name parameter for spdk_create_pmem_disk()\n");
319 return EINVAL;
320 }
321
322 if (pmemblk_check(pmem_file, 0) != 1) {
323 SPDK_ERRLOG("Pool '%s' check failed: %s\n", pmem_file, pmemblk_errormsg());
324 return EIO;
325 }
326
327 pdisk = calloc(1, sizeof(*pdisk));
328 if (!pdisk) {
329 return ENOMEM;
330 }
331
332 snprintf(pdisk->pmem_file, sizeof(pdisk->pmem_file), "%s", pmem_file);
333 pdisk->pool = pmemblk_open(pmem_file, 0);
334 if (!pdisk->pool) {
335 SPDK_ERRLOG("Opening pmem pool '%s' failed: %d\n", pmem_file, errno);
336 free(pdisk);
337 return errno;
338 }
339
340 block_size = pmemblk_bsize(pdisk->pool);
341 num_blocks = pmemblk_nblock(pdisk->pool);
342
343 if (block_size == 0) {
344 SPDK_ERRLOG("Block size must be more than 0 bytes\n");
345 pmemblk_close(pdisk->pool);
346 free(pdisk);
347 return EINVAL;
348 }
349
350 if (num_blocks == 0) {
351 SPDK_ERRLOG("Disk must be more than 0 blocks\n");
352 pmemblk_close(pdisk->pool);
353 free(pdisk);
354 return EINVAL;
355 }
356
357 pdisk->disk.name = strdup(name);
358 if (!pdisk->disk.name) {
359 pmemblk_close(pdisk->pool);
360 free(pdisk);
361 return ENOMEM;
362 }
363
364 pdisk->disk.product_name = "pmemblk disk";
365 pdisk->disk.write_cache = 0;
366 pdisk->disk.blocklen = block_size;
367 pdisk->disk.blockcnt = num_blocks;
368
369 pdisk->disk.ctxt = pdisk;
370 pdisk->disk.fn_table = &pmem_fn_table;
371 pdisk->disk.module = &pmem_if;
372
373 rc = spdk_bdev_register(&pdisk->disk);
374 if (rc) {
375 pmemblk_close(pdisk->pool);
376 free(pdisk->disk.name);
377 free(pdisk);
378 return rc;
379 }
380
381 TAILQ_INSERT_TAIL(&g_pmem_disks, pdisk, tailq);
382
383 *bdev = &pdisk->disk;
384
385 return 0;
386 }
387
388 void
389 spdk_delete_pmem_disk(struct spdk_bdev *bdev, spdk_delete_pmem_complete cb_fn, void *cb_arg)
390 {
391 if (!bdev || bdev->module != &pmem_if) {
392 cb_fn(cb_arg, -ENODEV);
393 return;
394 }
395
396 spdk_bdev_unregister(bdev, cb_fn, cb_arg);
397 }
398
399 static void
400 bdev_pmem_read_conf(void)
401 {
402 struct spdk_conf_section *sp;
403 struct spdk_bdev *bdev;
404 const char *pmem_file;
405 const char *bdev_name;
406 int i;
407
408 sp = spdk_conf_find_section(NULL, "Pmem");
409 if (sp == NULL) {
410 return;
411 }
412
413 for (i = 0; ; i++) {
414 if (!spdk_conf_section_get_nval(sp, "Blk", i)) {
415 break;
416 }
417
418 pmem_file = spdk_conf_section_get_nmval(sp, "Blk", i, 0);
419 if (pmem_file == NULL) {
420 SPDK_ERRLOG("Pmem: missing filename\n");
421 continue;
422 }
423
424 bdev_name = spdk_conf_section_get_nmval(sp, "Blk", i, 1);
425 if (bdev_name == NULL) {
426 SPDK_ERRLOG("Pmem: missing bdev name\n");
427 continue;
428 }
429
430 spdk_create_pmem_disk(pmem_file, bdev_name, &bdev);
431 }
432 }
433
434 static int
435 bdev_pmem_initialize(void)
436 {
437 const char *err = pmemblk_check_version(PMEMBLK_MAJOR_VERSION, PMEMBLK_MINOR_VERSION);
438
439 if (err != NULL) {
440 SPDK_ERRLOG("Invalid libpmemblk version (expected %d.%d): %s\n", PMEMBLK_MAJOR_VERSION,
441 PMEMBLK_MINOR_VERSION, err);
442 return -1;
443 }
444
445 spdk_io_device_register(&g_pmem_disks, bdev_pmem_create_cb, bdev_pmem_destroy_cb, 0, "pmem_bdev");
446
447 bdev_pmem_read_conf();
448
449 return 0;
450
451 }
452
453 static void
454 bdev_pmem_finish_done(void *io_device)
455 {
456 spdk_bdev_module_finish_done();
457 }
458
459 static void
460 bdev_pmem_finish(void)
461 {
462 spdk_io_device_unregister(&g_pmem_disks, bdev_pmem_finish_done);
463 }
464
465 SPDK_LOG_REGISTER_COMPONENT("bdev_pmem", SPDK_LOG_BDEV_PMEM)