]> git.proxmox.com Git - ceph.git/blob - ceph/src/os/bluestore/aio.cc
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / os / bluestore / aio.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <algorithm>
5 #include "ceph_aio.h"
6
7 std::ostream& operator<<(std::ostream& os, const aio_t& aio)
8 {
9 unsigned i = 0;
10 os << "aio: ";
11 for (auto& iov : aio.iov) {
12 os << "\n [" << i++ << "] 0x"
13 << std::hex << iov.iov_base << "~" << iov.iov_len << std::dec;
14 }
15 return os;
16 }
17
18 int aio_queue_t::submit_batch(aio_iter begin, aio_iter end,
19 uint16_t aios_size, void *priv,
20 int *retries)
21 {
22 // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds
23 int attempts = 16;
24 int delay = 125;
25 int r;
26
27 aio_iter cur = begin;
28 struct aio_t *piocb[aios_size];
29 int left = 0;
30 while (cur != end) {
31 cur->priv = priv;
32 *(piocb+left) = &(*cur);
33 ++left;
34 ++cur;
35 }
36 ceph_assert(aios_size >= left);
37 int done = 0;
38 while (left > 0) {
39 #if defined(HAVE_LIBAIO)
40 r = io_submit(ctx, std::min(left, max_iodepth), (struct iocb**)(piocb + done));
41 #elif defined(HAVE_POSIXAIO)
42 if (piocb[done]->n_aiocb == 1) {
43 // TODO: consider batching multiple reads together with lio_listio
44 piocb[done]->aio.aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT;
45 piocb[done]->aio.aiocb.aio_sigevent.sigev_notify_kqueue = ctx;
46 piocb[done]->aio.aiocb.aio_sigevent.sigev_value.sival_ptr = piocb[done];
47 r = aio_read(&piocb[done]->aio.aiocb);
48 } else {
49 struct sigevent sev;
50 sev.sigev_notify = SIGEV_KEVENT;
51 sev.sigev_notify_kqueue = ctx;
52 sev.sigev_value.sival_ptr = piocb[done];
53 r = lio_listio(LIO_NOWAIT, &piocb[done]->aio.aiocbp, piocb[done]->n_aiocb, &sev);
54 }
55 #endif
56 if (r < 0) {
57 if (r == -EAGAIN && attempts-- > 0) {
58 usleep(delay);
59 delay *= 2;
60 (*retries)++;
61 continue;
62 }
63 return r;
64 }
65 ceph_assert(r > 0);
66 done += r;
67 left -= r;
68 attempts = 16;
69 delay = 125;
70 }
71 return done;
72 }
73
74 int aio_queue_t::get_next_completed(int timeout_ms, aio_t **paio, int max)
75 {
76 #if defined(HAVE_LIBAIO)
77 io_event events[max];
78 #elif defined(HAVE_POSIXAIO)
79 struct kevent events[max];
80 #endif
81 struct timespec t = {
82 timeout_ms / 1000,
83 (timeout_ms % 1000) * 1000 * 1000
84 };
85
86 int r = 0;
87 do {
88 #if defined(HAVE_LIBAIO)
89 r = io_getevents(ctx, 1, max, events, &t);
90 #elif defined(HAVE_POSIXAIO)
91 r = kevent(ctx, NULL, 0, events, max, &t);
92 if (r < 0)
93 r = -errno;
94 #endif
95 } while (r == -EINTR);
96
97 for (int i=0; i<r; ++i) {
98 #if defined(HAVE_LIBAIO)
99 paio[i] = (aio_t *)events[i].obj;
100 paio[i]->rval = events[i].res;
101 #else
102 paio[i] = (aio_t*)events[i].udata;
103 if (paio[i]->n_aiocb == 1) {
104 paio[i]->rval = aio_return(&paio[i]->aio.aiocb);
105 } else {
106 // Emulate the return value of pwritev. I can't find any documentation
107 // for what the value of io_event.res is supposed to be. I'm going to
108 // assume that it's just like pwritev/preadv/pwrite/pread.
109 paio[i]->rval = 0;
110 for (int j = 0; j < paio[i]->n_aiocb; j++) {
111 int res = aio_return(&paio[i]->aio.aiocbp[j]);
112 if (res < 0) {
113 paio[i]->rval = res;
114 break;
115 } else {
116 paio[i]->rval += res;
117 }
118 }
119 free(paio[i]->aio.aiocbp);
120 }
121 #endif
122 }
123 return r;
124 }