]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/bluestore/aio.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / os / bluestore / aio.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
28e407b8 4#include <algorithm>
11fdf7f2 5#include "ceph_aio.h"
7c673cae 6
11fdf7f2 7std::ostream& operator<<(std::ostream& os, const aio_t& aio)
7c673cae 8{
11fdf7f2
TL
9 unsigned i = 0;
10 os << "aio: ";
11 for (auto& iov : aio.iov) {
12 os << "\n [" << i++ << "] 0x"
13 << std::hex << iov.iov_base << "~" << iov.iov_len << std::dec;
7c673cae 14 }
11fdf7f2 15 return os;
224ce89b
WB
16}
17
18int aio_queue_t::submit_batch(aio_iter begin, aio_iter end,
19 uint16_t aios_size, void *priv,
20 int *retries)
21{
22 // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds
23 int attempts = 16;
24 int delay = 125;
11fdf7f2 25 int r;
224ce89b
WB
26
27 aio_iter cur = begin;
11fdf7f2 28 struct aio_t *piocb[aios_size];
181888fb 29 int left = 0;
224ce89b
WB
30 while (cur != end) {
31 cur->priv = priv;
11fdf7f2 32 *(piocb+left) = &(*cur);
181888fb 33 ++left;
224ce89b
WB
34 ++cur;
35 }
11fdf7f2 36 ceph_assert(aios_size >= left);
181888fb
FG
37 int done = 0;
38 while (left > 0) {
11fdf7f2
TL
39#if defined(HAVE_LIBAIO)
40 r = io_submit(ctx, std::min(left, max_iodepth), (struct iocb**)(piocb + done));
41#elif defined(HAVE_POSIXAIO)
42 if (piocb[done]->n_aiocb == 1) {
43 // TODO: consider batching multiple reads together with lio_listio
44 piocb[done]->aio.aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT;
45 piocb[done]->aio.aiocb.aio_sigevent.sigev_notify_kqueue = ctx;
46 piocb[done]->aio.aiocb.aio_sigevent.sigev_value.sival_ptr = piocb[done];
47 r = aio_read(&piocb[done]->aio.aiocb);
48 } else {
49 struct sigevent sev;
50 sev.sigev_notify = SIGEV_KEVENT;
51 sev.sigev_notify_kqueue = ctx;
52 sev.sigev_value.sival_ptr = piocb[done];
53 r = lio_listio(LIO_NOWAIT, &piocb[done]->aio.aiocbp, piocb[done]->n_aiocb, &sev);
54 }
55#endif
224ce89b
WB
56 if (r < 0) {
57 if (r == -EAGAIN && attempts-- > 0) {
58 usleep(delay);
59 delay *= 2;
60 (*retries)++;
61 continue;
62 }
181888fb 63 return r;
224ce89b 64 }
11fdf7f2 65 ceph_assert(r > 0);
181888fb
FG
66 done += r;
67 left -= r;
28e407b8
AA
68 attempts = 16;
69 delay = 125;
224ce89b 70 }
181888fb 71 return done;
7c673cae
FG
72}
73
74int aio_queue_t::get_next_completed(int timeout_ms, aio_t **paio, int max)
75{
11fdf7f2
TL
76#if defined(HAVE_LIBAIO)
77 io_event events[max];
78#elif defined(HAVE_POSIXAIO)
79 struct kevent events[max];
80#endif
7c673cae
FG
81 struct timespec t = {
82 timeout_ms / 1000,
83 (timeout_ms % 1000) * 1000 * 1000
84 };
85
86 int r = 0;
87 do {
11fdf7f2
TL
88#if defined(HAVE_LIBAIO)
89 r = io_getevents(ctx, 1, max, events, &t);
90#elif defined(HAVE_POSIXAIO)
91 r = kevent(ctx, NULL, 0, events, max, &t);
92 if (r < 0)
93 r = -errno;
94#endif
7c673cae
FG
95 } while (r == -EINTR);
96
97 for (int i=0; i<r; ++i) {
11fdf7f2
TL
98#if defined(HAVE_LIBAIO)
99 paio[i] = (aio_t *)events[i].obj;
100 paio[i]->rval = events[i].res;
101#else
102 paio[i] = (aio_t*)events[i].udata;
103 if (paio[i]->n_aiocb == 1) {
104 paio[i]->rval = aio_return(&paio[i]->aio.aiocb);
105 } else {
106 // Emulate the return value of pwritev. I can't find any documentation
107 // for what the value of io_event.res is supposed to be. I'm going to
108 // assume that it's just like pwritev/preadv/pwrite/pread.
109 paio[i]->rval = 0;
110 for (int j = 0; j < paio[i]->n_aiocb; j++) {
111 int res = aio_return(&paio[i]->aio.aiocbp[j]);
112 if (res < 0) {
113 paio[i]->rval = res;
114 break;
115 } else {
116 paio[i]->rval += res;
117 }
118 }
119 free(paio[i]->aio.aiocbp);
120 }
121#endif
7c673cae
FG
122 }
123 return r;
124}