]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
28e407b8 | 4 | #include <algorithm> |
11fdf7f2 | 5 | #include "ceph_aio.h" |
7c673cae | 6 | |
11fdf7f2 | 7 | std::ostream& operator<<(std::ostream& os, const aio_t& aio) |
7c673cae | 8 | { |
11fdf7f2 TL |
9 | unsigned i = 0; |
10 | os << "aio: "; | |
11 | for (auto& iov : aio.iov) { | |
12 | os << "\n [" << i++ << "] 0x" | |
13 | << std::hex << iov.iov_base << "~" << iov.iov_len << std::dec; | |
7c673cae | 14 | } |
11fdf7f2 | 15 | return os; |
224ce89b WB |
16 | } |
17 | ||
18 | int aio_queue_t::submit_batch(aio_iter begin, aio_iter end, | |
19 | uint16_t aios_size, void *priv, | |
20 | int *retries) | |
21 | { | |
22 | // 2^16 * 125us = ~8 seconds, so max sleep is ~16 seconds | |
23 | int attempts = 16; | |
24 | int delay = 125; | |
11fdf7f2 | 25 | int r; |
224ce89b WB |
26 | |
27 | aio_iter cur = begin; | |
11fdf7f2 | 28 | struct aio_t *piocb[aios_size]; |
181888fb | 29 | int left = 0; |
224ce89b WB |
30 | while (cur != end) { |
31 | cur->priv = priv; | |
11fdf7f2 | 32 | *(piocb+left) = &(*cur); |
181888fb | 33 | ++left; |
224ce89b WB |
34 | ++cur; |
35 | } | |
11fdf7f2 | 36 | ceph_assert(aios_size >= left); |
181888fb FG |
37 | int done = 0; |
38 | while (left > 0) { | |
11fdf7f2 TL |
39 | #if defined(HAVE_LIBAIO) |
40 | r = io_submit(ctx, std::min(left, max_iodepth), (struct iocb**)(piocb + done)); | |
41 | #elif defined(HAVE_POSIXAIO) | |
42 | if (piocb[done]->n_aiocb == 1) { | |
43 | // TODO: consider batching multiple reads together with lio_listio | |
44 | piocb[done]->aio.aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT; | |
45 | piocb[done]->aio.aiocb.aio_sigevent.sigev_notify_kqueue = ctx; | |
46 | piocb[done]->aio.aiocb.aio_sigevent.sigev_value.sival_ptr = piocb[done]; | |
47 | r = aio_read(&piocb[done]->aio.aiocb); | |
48 | } else { | |
49 | struct sigevent sev; | |
50 | sev.sigev_notify = SIGEV_KEVENT; | |
51 | sev.sigev_notify_kqueue = ctx; | |
52 | sev.sigev_value.sival_ptr = piocb[done]; | |
53 | r = lio_listio(LIO_NOWAIT, &piocb[done]->aio.aiocbp, piocb[done]->n_aiocb, &sev); | |
54 | } | |
55 | #endif | |
224ce89b WB |
56 | if (r < 0) { |
57 | if (r == -EAGAIN && attempts-- > 0) { | |
58 | usleep(delay); | |
59 | delay *= 2; | |
60 | (*retries)++; | |
61 | continue; | |
62 | } | |
181888fb | 63 | return r; |
224ce89b | 64 | } |
11fdf7f2 | 65 | ceph_assert(r > 0); |
181888fb FG |
66 | done += r; |
67 | left -= r; | |
28e407b8 AA |
68 | attempts = 16; |
69 | delay = 125; | |
224ce89b | 70 | } |
181888fb | 71 | return done; |
7c673cae FG |
72 | } |
73 | ||
74 | int aio_queue_t::get_next_completed(int timeout_ms, aio_t **paio, int max) | |
75 | { | |
11fdf7f2 TL |
76 | #if defined(HAVE_LIBAIO) |
77 | io_event events[max]; | |
78 | #elif defined(HAVE_POSIXAIO) | |
79 | struct kevent events[max]; | |
80 | #endif | |
7c673cae FG |
81 | struct timespec t = { |
82 | timeout_ms / 1000, | |
83 | (timeout_ms % 1000) * 1000 * 1000 | |
84 | }; | |
85 | ||
86 | int r = 0; | |
87 | do { | |
11fdf7f2 TL |
88 | #if defined(HAVE_LIBAIO) |
89 | r = io_getevents(ctx, 1, max, events, &t); | |
90 | #elif defined(HAVE_POSIXAIO) | |
91 | r = kevent(ctx, NULL, 0, events, max, &t); | |
92 | if (r < 0) | |
93 | r = -errno; | |
94 | #endif | |
7c673cae FG |
95 | } while (r == -EINTR); |
96 | ||
97 | for (int i=0; i<r; ++i) { | |
11fdf7f2 TL |
98 | #if defined(HAVE_LIBAIO) |
99 | paio[i] = (aio_t *)events[i].obj; | |
100 | paio[i]->rval = events[i].res; | |
101 | #else | |
102 | paio[i] = (aio_t*)events[i].udata; | |
103 | if (paio[i]->n_aiocb == 1) { | |
104 | paio[i]->rval = aio_return(&paio[i]->aio.aiocb); | |
105 | } else { | |
106 | // Emulate the return value of pwritev. I can't find any documentation | |
107 | // for what the value of io_event.res is supposed to be. I'm going to | |
108 | // assume that it's just like pwritev/preadv/pwrite/pread. | |
109 | paio[i]->rval = 0; | |
110 | for (int j = 0; j < paio[i]->n_aiocb; j++) { | |
111 | int res = aio_return(&paio[i]->aio.aiocbp[j]); | |
112 | if (res < 0) { | |
113 | paio[i]->rval = res; | |
114 | break; | |
115 | } else { | |
116 | paio[i]->rval += res; | |
117 | } | |
118 | } | |
119 | free(paio[i]->aio.aiocbp); | |
120 | } | |
121 | #endif | |
7c673cae FG |
122 | } |
123 | return r; | |
124 | } |