]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/lib/bdev/aio/blockdev_aio.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / spdk / lib / bdev / aio / blockdev_aio.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "blockdev_aio.h"
35
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <stdbool.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <sys/stat.h>
42 #include <sys/ioctl.h>
43
44 #include "spdk/bdev.h"
45 #include "spdk/conf.h"
46 #include "spdk/env.h"
47 #include "spdk/fd.h"
48 #include "spdk/io_channel.h"
49
50 #include "spdk_internal/log.h"
51
52 static int blockdev_aio_initialize(void);
53 static void aio_free_disk(struct file_disk *fdisk);
54
55 static int
56 blockdev_aio_get_ctx_size(void)
57 {
58 return sizeof(struct blockdev_aio_task);
59 }
60
61 SPDK_BDEV_MODULE_REGISTER(blockdev_aio_initialize, NULL, NULL, blockdev_aio_get_ctx_size)
62
63 static int
64 blockdev_aio_open(struct file_disk *disk)
65 {
66 int fd;
67
68 fd = open(disk->file, O_RDWR | O_DIRECT);
69 if (fd < 0) {
70 perror("open");
71 disk->fd = -1;
72 return -1;
73 }
74
75 disk->fd = fd;
76
77 return 0;
78 }
79
80 static int
81 blockdev_aio_close(struct file_disk *disk)
82 {
83 int rc;
84
85 if (disk->fd == -1) {
86 return 0;
87 }
88
89 rc = close(disk->fd);
90 if (rc < 0) {
91 perror("close");
92 return -1;
93 }
94
95 disk->fd = -1;
96
97 return 0;
98 }
99
100 static int64_t
101 blockdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
102 struct blockdev_aio_task *aio_task,
103 struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
104 {
105 struct iocb *iocb = &aio_task->iocb;
106 struct blockdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
107 int rc;
108
109 io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
110 iocb->data = aio_task;
111 aio_task->len = nbytes;
112
113 SPDK_TRACELOG(SPDK_TRACE_AIO, "read %d iovs size %lu to off: %#lx\n",
114 iovcnt, nbytes, offset);
115
116 rc = io_submit(aio_ch->io_ctx, 1, &iocb);
117 if (rc < 0) {
118 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
119 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
120 return -1;
121 }
122
123 return nbytes;
124 }
125
126 static int64_t
127 blockdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
128 struct blockdev_aio_task *aio_task,
129 struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
130 {
131 struct iocb *iocb = &aio_task->iocb;
132 struct blockdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
133 int rc;
134
135 io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
136 iocb->data = aio_task;
137 aio_task->len = len;
138
139 SPDK_TRACELOG(SPDK_TRACE_AIO, "write %d iovs size %lu from off: %#lx\n",
140 iovcnt, len, offset);
141
142 rc = io_submit(aio_ch->io_ctx, 1, &iocb);
143 if (rc < 0) {
144 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
145 SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
146 return -1;
147 }
148
149 return len;
150 }
151
152 static void
153 blockdev_aio_flush(struct file_disk *fdisk, struct blockdev_aio_task *aio_task,
154 uint64_t offset, uint64_t nbytes)
155 {
156 int rc = fsync(fdisk->fd);
157
158 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task),
159 rc == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
160 }
161
162 static int
163 blockdev_aio_destruct(void *ctx)
164 {
165 struct file_disk *fdisk = ctx;
166 int rc = 0;
167
168 rc = blockdev_aio_close(fdisk);
169 if (rc < 0) {
170 SPDK_ERRLOG("blockdev_aio_close() failed\n");
171 }
172 aio_free_disk(fdisk);
173 return rc;
174 }
175
176 static int
177 blockdev_aio_initialize_io_channel(struct blockdev_aio_io_channel *ch)
178 {
179 ch->queue_depth = 128;
180
181 if (io_setup(ch->queue_depth, &ch->io_ctx) < 0) {
182 SPDK_ERRLOG("async I/O context setup failure\n");
183 return -1;
184 }
185
186 ch->events = calloc(sizeof(struct io_event), ch->queue_depth);
187 if (!ch->events) {
188 io_destroy(ch->io_ctx);
189 return -1;
190 }
191
192 return 0;
193 }
194
195 static void
196 blockdev_aio_poll(void *arg)
197 {
198 struct blockdev_aio_io_channel *ch = arg;
199 int nr, i;
200 enum spdk_bdev_io_status status;
201 struct blockdev_aio_task *aio_task;
202 struct timespec timeout;
203
204 timeout.tv_sec = 0;
205 timeout.tv_nsec = 0;
206
207 nr = io_getevents(ch->io_ctx, 1, ch->queue_depth,
208 ch->events, &timeout);
209
210 if (nr < 0) {
211 SPDK_ERRLOG("%s: io_getevents returned %d\n", __func__, nr);
212 return;
213 }
214
215 for (i = 0; i < nr; i++) {
216 aio_task = ch->events[i].data;
217 if (ch->events[i].res != aio_task->len) {
218 status = SPDK_BDEV_IO_STATUS_FAILED;
219 } else {
220 status = SPDK_BDEV_IO_STATUS_SUCCESS;
221 }
222
223 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), status);
224 }
225 }
226
227 static void
228 blockdev_aio_reset(struct file_disk *fdisk, struct blockdev_aio_task *aio_task)
229 {
230 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
231 }
232
233 static void blockdev_aio_get_rbuf_cb(struct spdk_bdev_io *bdev_io)
234 {
235 blockdev_aio_readv((struct file_disk *)bdev_io->ctx,
236 bdev_io->ch,
237 (struct blockdev_aio_task *)bdev_io->driver_ctx,
238 bdev_io->u.read.iovs,
239 bdev_io->u.read.iovcnt,
240 bdev_io->u.read.len,
241 bdev_io->u.read.offset);
242 }
243
244 static int _blockdev_aio_submit_request(struct spdk_bdev_io *bdev_io)
245 {
246 switch (bdev_io->type) {
247 case SPDK_BDEV_IO_TYPE_READ:
248 spdk_bdev_io_get_rbuf(bdev_io, blockdev_aio_get_rbuf_cb);
249 return 0;
250
251 case SPDK_BDEV_IO_TYPE_WRITE:
252 blockdev_aio_writev((struct file_disk *)bdev_io->ctx,
253 bdev_io->ch,
254 (struct blockdev_aio_task *)bdev_io->driver_ctx,
255 bdev_io->u.write.iovs,
256 bdev_io->u.write.iovcnt,
257 bdev_io->u.write.len,
258 bdev_io->u.write.offset);
259 return 0;
260 case SPDK_BDEV_IO_TYPE_FLUSH:
261 blockdev_aio_flush((struct file_disk *)bdev_io->ctx,
262 (struct blockdev_aio_task *)bdev_io->driver_ctx,
263 bdev_io->u.flush.offset,
264 bdev_io->u.flush.length);
265 return 0;
266
267 case SPDK_BDEV_IO_TYPE_RESET:
268 blockdev_aio_reset((struct file_disk *)bdev_io->ctx,
269 (struct blockdev_aio_task *)bdev_io->driver_ctx);
270 return 0;
271 default:
272 return -1;
273 }
274 }
275
276 static void blockdev_aio_submit_request(struct spdk_bdev_io *bdev_io)
277 {
278 if (_blockdev_aio_submit_request(bdev_io) < 0) {
279 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
280 }
281 }
282
283 static bool
284 blockdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
285 {
286 switch (io_type) {
287 case SPDK_BDEV_IO_TYPE_READ:
288 case SPDK_BDEV_IO_TYPE_WRITE:
289 case SPDK_BDEV_IO_TYPE_FLUSH:
290 case SPDK_BDEV_IO_TYPE_RESET:
291 return true;
292
293 default:
294 return false;
295 }
296 }
297
298 static int
299 blockdev_aio_create_cb(void *io_device, uint32_t priority, void *ctx_buf, void *unique_ctx)
300 {
301 struct blockdev_aio_io_channel *ch = ctx_buf;
302
303 if (blockdev_aio_initialize_io_channel(ch) != 0) {
304 return -1;
305 }
306
307 spdk_poller_register(&ch->poller, blockdev_aio_poll, ch,
308 spdk_env_get_current_core(), 0);
309 return 0;
310 }
311
312 static void
313 blockdev_aio_destroy_cb(void *io_device, void *ctx_buf)
314 {
315 struct blockdev_aio_io_channel *io_channel = ctx_buf;
316
317 io_destroy(io_channel->io_ctx);
318 free(io_channel->events);
319 spdk_poller_unregister(&io_channel->poller, NULL);
320 }
321
322 static struct spdk_io_channel *
323 blockdev_aio_get_io_channel(void *ctx, uint32_t priority)
324 {
325 struct file_disk *fdisk = ctx;
326
327 return spdk_get_io_channel(&fdisk->fd, priority, false, NULL);
328 }
329
330 static const struct spdk_bdev_fn_table aio_fn_table = {
331 .destruct = blockdev_aio_destruct,
332 .submit_request = blockdev_aio_submit_request,
333 .io_type_supported = blockdev_aio_io_type_supported,
334 .get_io_channel = blockdev_aio_get_io_channel,
335 };
336
337 static void aio_free_disk(struct file_disk *fdisk)
338 {
339 if (fdisk == NULL)
340 return;
341 free(fdisk);
342 }
343
344 struct spdk_bdev *
345 create_aio_disk(const char *name, const char *fname)
346 {
347 struct file_disk *fdisk;
348
349 fdisk = calloc(sizeof(*fdisk), 1);
350 if (!fdisk) {
351 SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
352 return NULL;
353 }
354
355 fdisk->file = fname;
356 if (blockdev_aio_open(fdisk)) {
357 SPDK_ERRLOG("Unable to open file %s. fd: %d errno: %d\n", fname, fdisk->fd, errno);
358 goto error_return;
359 }
360
361 fdisk->size = spdk_fd_get_size(fdisk->fd);
362
363 TAILQ_INIT(&fdisk->sync_completion_list);
364 snprintf(fdisk->disk.name, SPDK_BDEV_MAX_NAME_LENGTH, "%s", name);
365 snprintf(fdisk->disk.product_name, SPDK_BDEV_MAX_PRODUCT_NAME_LENGTH, "AIO disk");
366
367 fdisk->disk.need_aligned_buffer = 1;
368 fdisk->disk.write_cache = 1;
369 fdisk->disk.blocklen = spdk_fd_get_blocklen(fdisk->fd);
370 fdisk->disk.blockcnt = fdisk->size / fdisk->disk.blocklen;
371 fdisk->disk.ctxt = fdisk;
372
373 fdisk->disk.fn_table = &aio_fn_table;
374
375 spdk_io_device_register(&fdisk->fd, blockdev_aio_create_cb, blockdev_aio_destroy_cb,
376 sizeof(struct blockdev_aio_io_channel));
377 spdk_bdev_register(&fdisk->disk);
378 return &fdisk->disk;
379
380 error_return:
381 blockdev_aio_close(fdisk);
382 aio_free_disk(fdisk);
383 return NULL;
384 }
385
386 static int blockdev_aio_initialize(void)
387 {
388 size_t i;
389 struct spdk_conf_section *sp;
390 struct spdk_bdev *bdev;
391
392 sp = spdk_conf_find_section(NULL, "AIO");
393 if (!sp) {
394 return 0;
395 }
396
397 i = 0;
398 while (true) {
399 const char *file;
400 const char *name;
401
402 file = spdk_conf_section_get_nmval(sp, "AIO", i, 0);
403 if (!file) {
404 break;
405 }
406
407 name = spdk_conf_section_get_nmval(sp, "AIO", i, 1);
408 if (!name) {
409 SPDK_ERRLOG("No name provided for AIO disk with file %s\n", file);
410 i++;
411 continue;
412 }
413
414 bdev = create_aio_disk(name, file);
415 if (!bdev) {
416 SPDK_ERRLOG("Unable to create AIO bdev from file %s\n", file);
417 i++;
418 continue;
419 }
420
421 i++;
422 }
423
424 return 0;
425 }
426
427 SPDK_LOG_REGISTER_TRACE_FLAG("aio", SPDK_TRACE_AIO)