]>
Commit | Line | Data |
---|---|---|
3c529d93 AL |
1 | /* |
2 | * QEMU posix-aio emulation | |
3 | * | |
4 | * Copyright IBM, Corp. 2008 | |
5 | * | |
6 | * Authors: | |
7 | * Anthony Liguori <aliguori@us.ibm.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
10 | * the COPYING file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
221f715d | 14 | #include <sys/ioctl.h> |
3c529d93 AL |
15 | #include <pthread.h> |
16 | #include <unistd.h> | |
17 | #include <errno.h> | |
30525aff | 18 | #include <time.h> |
8653c015 | 19 | #include <string.h> |
20 | #include <stdlib.h> | |
21 | #include <stdio.h> | |
3c529d93 | 22 | #include "osdep.h" |
f141eafe | 23 | #include "qemu-common.h" |
3c529d93 AL |
24 | |
25 | #include "posix-aio-compat.h" | |
26 | ||
27 | static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; | |
28 | static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; | |
29 | static pthread_t thread_id; | |
a8227a5a | 30 | static pthread_attr_t attr; |
3c529d93 AL |
31 | static int max_threads = 64; |
32 | static int cur_threads = 0; | |
33 | static int idle_threads = 0; | |
34 | static TAILQ_HEAD(, qemu_paiocb) request_list; | |
35 | ||
8653c015 | 36 | static void die2(int err, const char *what) |
37 | { | |
38 | fprintf(stderr, "%s failed: %s\n", what, strerror(err)); | |
39 | abort(); | |
40 | } | |
41 | ||
42 | static void die(const char *what) | |
43 | { | |
44 | die2(errno, what); | |
45 | } | |
46 | ||
47 | static void mutex_lock(pthread_mutex_t *mutex) | |
48 | { | |
49 | int ret = pthread_mutex_lock(mutex); | |
50 | if (ret) die2(ret, "pthread_mutex_lock"); | |
51 | } | |
52 | ||
53 | static void mutex_unlock(pthread_mutex_t *mutex) | |
54 | { | |
55 | int ret = pthread_mutex_unlock(mutex); | |
56 | if (ret) die2(ret, "pthread_mutex_unlock"); | |
57 | } | |
58 | ||
59 | static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, | |
60 | struct timespec *ts) | |
61 | { | |
62 | int ret = pthread_cond_timedwait(cond, mutex, ts); | |
63 | if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait"); | |
64 | return ret; | |
65 | } | |
66 | ||
5d47e372 | 67 | static void cond_signal(pthread_cond_t *cond) |
8653c015 | 68 | { |
5d47e372 | 69 | int ret = pthread_cond_signal(cond); |
70 | if (ret) die2(ret, "pthread_cond_signal"); | |
8653c015 | 71 | } |
72 | ||
73 | static void thread_create(pthread_t *thread, pthread_attr_t *attr, | |
74 | void *(*start_routine)(void*), void *arg) | |
75 | { | |
76 | int ret = pthread_create(thread, attr, start_routine, arg); | |
77 | if (ret) die2(ret, "pthread_create"); | |
78 | } | |
79 | ||
f141eafe AL |
80 | static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb) |
81 | { | |
82 | int ret; | |
83 | ||
84 | ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf); | |
85 | if (ret == -1) | |
86 | return -errno; | |
87 | return ret; | |
88 | } | |
89 | ||
90 | /* | |
91 | * Check if we need to copy the data in the aiocb into a new | |
92 | * properly aligned buffer. | |
93 | */ | |
94 | static int aiocb_needs_copy(struct qemu_paiocb *aiocb) | |
95 | { | |
96 | if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) { | |
97 | int i; | |
98 | ||
99 | for (i = 0; i < aiocb->aio_niov; i++) | |
100 | if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512) | |
101 | return 1; | |
102 | } | |
103 | ||
104 | return 0; | |
105 | } | |
106 | ||
107 | static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf) | |
221f715d AL |
108 | { |
109 | size_t offset = 0; | |
f141eafe | 110 | size_t len; |
221f715d AL |
111 | |
112 | while (offset < aiocb->aio_nbytes) { | |
f141eafe AL |
113 | if (aiocb->aio_type == QEMU_PAIO_WRITE) |
114 | len = pwrite(aiocb->aio_fildes, | |
115 | (const char *)buf + offset, | |
116 | aiocb->aio_nbytes - offset, | |
117 | aiocb->aio_offset + offset); | |
118 | else | |
119 | len = pread(aiocb->aio_fildes, | |
120 | buf + offset, | |
221f715d AL |
121 | aiocb->aio_nbytes - offset, |
122 | aiocb->aio_offset + offset); | |
221f715d | 123 | |
f141eafe AL |
124 | if (len == -1 && errno == EINTR) |
125 | continue; | |
126 | else if (len == -1) { | |
127 | offset = -errno; | |
128 | break; | |
129 | } else if (len == 0) | |
130 | break; | |
131 | ||
132 | offset += len; | |
221f715d AL |
133 | } |
134 | ||
135 | return offset; | |
136 | } | |
137 | ||
f141eafe | 138 | static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb) |
221f715d | 139 | { |
f141eafe AL |
140 | size_t nbytes; |
141 | char *buf; | |
142 | ||
143 | if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) { | |
144 | /* | |
145 | * If there is just a single buffer, and it is properly aligned | |
146 | * we can just use plain pread/pwrite without any problems. | |
147 | */ | |
148 | return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base); | |
149 | } | |
221f715d | 150 | |
f141eafe AL |
151 | /* |
152 | * Ok, we have to do it the hard way, copy all segments into | |
153 | * a single aligned buffer. | |
154 | */ | |
155 | buf = qemu_memalign(512, aiocb->aio_nbytes); | |
156 | if (aiocb->aio_type == QEMU_PAIO_WRITE) { | |
157 | char *p = buf; | |
158 | int i; | |
159 | ||
160 | for (i = 0; i < aiocb->aio_niov; ++i) { | |
161 | memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); | |
162 | p += aiocb->aio_iov[i].iov_len; | |
163 | } | |
164 | } | |
165 | ||
166 | nbytes = handle_aiocb_rw_linear(aiocb, buf); | |
167 | if (aiocb->aio_type != QEMU_PAIO_WRITE) { | |
168 | char *p = buf; | |
169 | size_t count = aiocb->aio_nbytes, copy; | |
170 | int i; | |
171 | ||
172 | for (i = 0; i < aiocb->aio_niov && count; ++i) { | |
173 | copy = count; | |
174 | if (copy > aiocb->aio_iov[i].iov_len) | |
175 | copy = aiocb->aio_iov[i].iov_len; | |
176 | memcpy(aiocb->aio_iov[i].iov_base, p, copy); | |
177 | p += copy; | |
178 | count -= copy; | |
179 | } | |
180 | } | |
181 | qemu_vfree(buf); | |
182 | ||
183 | return nbytes; | |
221f715d AL |
184 | } |
185 | ||
3c529d93 AL |
186 | static void *aio_thread(void *unused) |
187 | { | |
a8227a5a | 188 | pid_t pid; |
3c529d93 AL |
189 | sigset_t set; |
190 | ||
a8227a5a | 191 | pid = getpid(); |
192 | ||
3c529d93 | 193 | /* block all signals */ |
8653c015 | 194 | if (sigfillset(&set)) die("sigfillset"); |
195 | if (sigprocmask(SIG_BLOCK, &set, NULL)) die("sigprocmask"); | |
3c529d93 AL |
196 | |
197 | while (1) { | |
198 | struct qemu_paiocb *aiocb; | |
221f715d | 199 | size_t ret = 0; |
30525aff | 200 | qemu_timeval tv; |
201 | struct timespec ts; | |
202 | ||
203 | qemu_gettimeofday(&tv); | |
204 | ts.tv_sec = tv.tv_sec + 10; | |
205 | ts.tv_nsec = 0; | |
3c529d93 | 206 | |
8653c015 | 207 | mutex_lock(&lock); |
3c529d93 AL |
208 | |
209 | while (TAILQ_EMPTY(&request_list) && | |
210 | !(ret == ETIMEDOUT)) { | |
8653c015 | 211 | ret = cond_timedwait(&cond, &lock, &ts); |
3c529d93 AL |
212 | } |
213 | ||
514f7a27 | 214 | if (TAILQ_EMPTY(&request_list)) |
3c529d93 AL |
215 | break; |
216 | ||
217 | aiocb = TAILQ_FIRST(&request_list); | |
218 | TAILQ_REMOVE(&request_list, aiocb, node); | |
3c529d93 | 219 | aiocb->active = 1; |
3c529d93 | 220 | idle_threads--; |
8653c015 | 221 | mutex_unlock(&lock); |
3c529d93 | 222 | |
221f715d AL |
223 | switch (aiocb->aio_type) { |
224 | case QEMU_PAIO_READ: | |
225 | case QEMU_PAIO_WRITE: | |
f141eafe | 226 | ret = handle_aiocb_rw(aiocb); |
221f715d AL |
227 | break; |
228 | case QEMU_PAIO_IOCTL: | |
229 | ret = handle_aiocb_ioctl(aiocb); | |
230 | break; | |
231 | default: | |
232 | fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); | |
233 | ret = -EINVAL; | |
234 | break; | |
235 | } | |
3c529d93 | 236 | |
8653c015 | 237 | mutex_lock(&lock); |
221f715d | 238 | aiocb->ret = ret; |
3c529d93 | 239 | idle_threads++; |
8653c015 | 240 | mutex_unlock(&lock); |
3c529d93 | 241 | |
a8227a5a | 242 | if (kill(pid, aiocb->ev_signo)) die("kill failed"); |
3c529d93 AL |
243 | } |
244 | ||
245 | idle_threads--; | |
246 | cur_threads--; | |
8653c015 | 247 | mutex_unlock(&lock); |
3c529d93 AL |
248 | |
249 | return NULL; | |
250 | } | |
251 | ||
8653c015 | 252 | static void spawn_thread(void) |
3c529d93 | 253 | { |
3c529d93 AL |
254 | cur_threads++; |
255 | idle_threads++; | |
8653c015 | 256 | thread_create(&thread_id, &attr, aio_thread, NULL); |
3c529d93 AL |
257 | } |
258 | ||
259 | int qemu_paio_init(struct qemu_paioinit *aioinit) | |
260 | { | |
a8227a5a | 261 | int ret; |
262 | ||
263 | ret = pthread_attr_init(&attr); | |
264 | if (ret) die2(ret, "pthread_attr_init"); | |
265 | ||
266 | ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); | |
267 | if (ret) die2(ret, "pthread_attr_setdetachstate"); | |
268 | ||
3c529d93 AL |
269 | TAILQ_INIT(&request_list); |
270 | ||
271 | return 0; | |
272 | } | |
273 | ||
221f715d | 274 | static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type) |
3c529d93 | 275 | { |
221f715d | 276 | aiocb->aio_type = type; |
3c529d93 AL |
277 | aiocb->ret = -EINPROGRESS; |
278 | aiocb->active = 0; | |
8653c015 | 279 | mutex_lock(&lock); |
3c529d93 AL |
280 | if (idle_threads == 0 && cur_threads < max_threads) |
281 | spawn_thread(); | |
282 | TAILQ_INSERT_TAIL(&request_list, aiocb, node); | |
8653c015 | 283 | mutex_unlock(&lock); |
5d47e372 | 284 | cond_signal(&cond); |
3c529d93 AL |
285 | |
286 | return 0; | |
287 | } | |
288 | ||
289 | int qemu_paio_read(struct qemu_paiocb *aiocb) | |
290 | { | |
221f715d | 291 | return qemu_paio_submit(aiocb, QEMU_PAIO_READ); |
3c529d93 AL |
292 | } |
293 | ||
294 | int qemu_paio_write(struct qemu_paiocb *aiocb) | |
295 | { | |
221f715d AL |
296 | return qemu_paio_submit(aiocb, QEMU_PAIO_WRITE); |
297 | } | |
298 | ||
299 | int qemu_paio_ioctl(struct qemu_paiocb *aiocb) | |
300 | { | |
301 | return qemu_paio_submit(aiocb, QEMU_PAIO_IOCTL); | |
3c529d93 AL |
302 | } |
303 | ||
304 | ssize_t qemu_paio_return(struct qemu_paiocb *aiocb) | |
305 | { | |
306 | ssize_t ret; | |
307 | ||
8653c015 | 308 | mutex_lock(&lock); |
3c529d93 | 309 | ret = aiocb->ret; |
8653c015 | 310 | mutex_unlock(&lock); |
3c529d93 AL |
311 | |
312 | return ret; | |
313 | } | |
314 | ||
315 | int qemu_paio_error(struct qemu_paiocb *aiocb) | |
316 | { | |
317 | ssize_t ret = qemu_paio_return(aiocb); | |
318 | ||
319 | if (ret < 0) | |
320 | ret = -ret; | |
321 | else | |
322 | ret = 0; | |
323 | ||
324 | return ret; | |
325 | } | |
326 | ||
327 | int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb) | |
328 | { | |
329 | int ret; | |
330 | ||
8653c015 | 331 | mutex_lock(&lock); |
3c529d93 AL |
332 | if (!aiocb->active) { |
333 | TAILQ_REMOVE(&request_list, aiocb, node); | |
334 | aiocb->ret = -ECANCELED; | |
335 | ret = QEMU_PAIO_CANCELED; | |
336 | } else if (aiocb->ret == -EINPROGRESS) | |
337 | ret = QEMU_PAIO_NOTCANCELED; | |
338 | else | |
339 | ret = QEMU_PAIO_ALLDONE; | |
8653c015 | 340 | mutex_unlock(&lock); |
3c529d93 AL |
341 | |
342 | return ret; | |
343 | } |