]> git.proxmox.com Git - qemu.git/blame - posix-aio-compat.c
virtio-console: rename dvq to ovq
[qemu.git] / posix-aio-compat.c
CommitLineData
3c529d93
AL
1/*
2 * QEMU posix-aio emulation
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
221f715d 14#include <sys/ioctl.h>
3c529d93
AL
15#include <pthread.h>
16#include <unistd.h>
17#include <errno.h>
30525aff 18#include <time.h>
8653c015 19#include <string.h>
20#include <stdlib.h>
21#include <stdio.h>
3c529d93 22#include "osdep.h"
f141eafe 23#include "qemu-common.h"
3c529d93
AL
24
25#include "posix-aio-compat.h"
26
27static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
28static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
29static pthread_t thread_id;
a8227a5a 30static pthread_attr_t attr;
3c529d93
AL
31static int max_threads = 64;
32static int cur_threads = 0;
33static int idle_threads = 0;
34static TAILQ_HEAD(, qemu_paiocb) request_list;
35
2341f9a1 36#ifdef CONFIG_PREADV
ceb42de8
AL
37static int preadv_present = 1;
38#else
39static int preadv_present = 0;
40#endif
41
8653c015 42static void die2(int err, const char *what)
43{
44 fprintf(stderr, "%s failed: %s\n", what, strerror(err));
45 abort();
46}
47
48static void die(const char *what)
49{
50 die2(errno, what);
51}
52
53static void mutex_lock(pthread_mutex_t *mutex)
54{
55 int ret = pthread_mutex_lock(mutex);
56 if (ret) die2(ret, "pthread_mutex_lock");
57}
58
59static void mutex_unlock(pthread_mutex_t *mutex)
60{
61 int ret = pthread_mutex_unlock(mutex);
62 if (ret) die2(ret, "pthread_mutex_unlock");
63}
64
65static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
66 struct timespec *ts)
67{
68 int ret = pthread_cond_timedwait(cond, mutex, ts);
69 if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait");
70 return ret;
71}
72
5d47e372 73static void cond_signal(pthread_cond_t *cond)
8653c015 74{
5d47e372 75 int ret = pthread_cond_signal(cond);
76 if (ret) die2(ret, "pthread_cond_signal");
8653c015 77}
78
79static void thread_create(pthread_t *thread, pthread_attr_t *attr,
80 void *(*start_routine)(void*), void *arg)
81{
82 int ret = pthread_create(thread, attr, start_routine, arg);
83 if (ret) die2(ret, "pthread_create");
84}
85
f141eafe
AL
86static size_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
87{
88 int ret;
89
90 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
91 if (ret == -1)
92 return -errno;
e7d54ae8
CH
93
94 /*
95 * This looks weird, but the aio code only consideres a request
96 * successfull if it has written the number full number of bytes.
97 *
98 * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
99 * so in fact we return the ioctl command here to make posix_aio_read()
100 * happy..
101 */
102 return aiocb->aio_nbytes;
f141eafe
AL
103}
104
2341f9a1 105#ifdef CONFIG_PREADV
ceb42de8
AL
106
107static ssize_t
108qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
109{
110 return preadv(fd, iov, nr_iov, offset);
111}
112
113static ssize_t
114qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
115{
116 return pwritev(fd, iov, nr_iov, offset);
117}
118
119#else
120
121static ssize_t
122qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
123{
124 return -ENOSYS;
125}
126
127static ssize_t
128qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
129{
130 return -ENOSYS;
131}
132
133#endif
134
f141eafe
AL
135/*
136 * Check if we need to copy the data in the aiocb into a new
137 * properly aligned buffer.
138 */
139static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
140{
141 if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
142 int i;
143
144 for (i = 0; i < aiocb->aio_niov; i++)
145 if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
146 return 1;
147 }
148
149 return 0;
150}
151
ceb42de8
AL
152static size_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
153{
154 size_t offset = 0;
155 ssize_t len;
156
157 do {
158 if (aiocb->aio_type == QEMU_PAIO_WRITE)
159 len = qemu_pwritev(aiocb->aio_fildes,
160 aiocb->aio_iov,
161 aiocb->aio_niov,
162 aiocb->aio_offset + offset);
163 else
164 len = qemu_preadv(aiocb->aio_fildes,
165 aiocb->aio_iov,
166 aiocb->aio_niov,
167 aiocb->aio_offset + offset);
168 } while (len == -1 && errno == EINTR);
169
170 if (len == -1)
171 return -errno;
172 return len;
173}
174
f141eafe 175static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
221f715d
AL
176{
177 size_t offset = 0;
f141eafe 178 size_t len;
221f715d
AL
179
180 while (offset < aiocb->aio_nbytes) {
f141eafe
AL
181 if (aiocb->aio_type == QEMU_PAIO_WRITE)
182 len = pwrite(aiocb->aio_fildes,
183 (const char *)buf + offset,
184 aiocb->aio_nbytes - offset,
185 aiocb->aio_offset + offset);
186 else
187 len = pread(aiocb->aio_fildes,
188 buf + offset,
221f715d
AL
189 aiocb->aio_nbytes - offset,
190 aiocb->aio_offset + offset);
221f715d 191
f141eafe
AL
192 if (len == -1 && errno == EINTR)
193 continue;
194 else if (len == -1) {
195 offset = -errno;
196 break;
197 } else if (len == 0)
198 break;
199
200 offset += len;
221f715d
AL
201 }
202
203 return offset;
204}
205
f141eafe 206static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
221f715d 207{
f141eafe
AL
208 size_t nbytes;
209 char *buf;
210
ceb42de8 211 if (!aiocb_needs_copy(aiocb)) {
f141eafe
AL
212 /*
213 * If there is just a single buffer, and it is properly aligned
214 * we can just use plain pread/pwrite without any problems.
215 */
ceb42de8
AL
216 if (aiocb->aio_niov == 1)
217 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
218
219 /*
220 * We have more than one iovec, and all are properly aligned.
221 *
222 * Try preadv/pwritev first and fall back to linearizing the
223 * buffer if it's not supported.
224 */
225 if (preadv_present) {
226 nbytes = handle_aiocb_rw_vector(aiocb);
227 if (nbytes == aiocb->aio_nbytes)
228 return nbytes;
229 if (nbytes < 0 && nbytes != -ENOSYS)
230 return nbytes;
231 preadv_present = 0;
232 }
233
234 /*
235 * XXX(hch): short read/write. no easy way to handle the reminder
236 * using these interfaces. For now retry using plain
237 * pread/pwrite?
238 */
f141eafe 239 }
221f715d 240
f141eafe
AL
241 /*
242 * Ok, we have to do it the hard way, copy all segments into
243 * a single aligned buffer.
244 */
245 buf = qemu_memalign(512, aiocb->aio_nbytes);
246 if (aiocb->aio_type == QEMU_PAIO_WRITE) {
247 char *p = buf;
248 int i;
249
250 for (i = 0; i < aiocb->aio_niov; ++i) {
251 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
252 p += aiocb->aio_iov[i].iov_len;
253 }
254 }
255
256 nbytes = handle_aiocb_rw_linear(aiocb, buf);
257 if (aiocb->aio_type != QEMU_PAIO_WRITE) {
258 char *p = buf;
259 size_t count = aiocb->aio_nbytes, copy;
260 int i;
261
262 for (i = 0; i < aiocb->aio_niov && count; ++i) {
263 copy = count;
264 if (copy > aiocb->aio_iov[i].iov_len)
265 copy = aiocb->aio_iov[i].iov_len;
266 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
267 p += copy;
268 count -= copy;
269 }
270 }
271 qemu_vfree(buf);
272
273 return nbytes;
221f715d
AL
274}
275
3c529d93
AL
276static void *aio_thread(void *unused)
277{
a8227a5a 278 pid_t pid;
3c529d93
AL
279 sigset_t set;
280
a8227a5a 281 pid = getpid();
282
3c529d93 283 /* block all signals */
8653c015 284 if (sigfillset(&set)) die("sigfillset");
285 if (sigprocmask(SIG_BLOCK, &set, NULL)) die("sigprocmask");
3c529d93
AL
286
287 while (1) {
288 struct qemu_paiocb *aiocb;
221f715d 289 size_t ret = 0;
30525aff 290 qemu_timeval tv;
291 struct timespec ts;
292
293 qemu_gettimeofday(&tv);
294 ts.tv_sec = tv.tv_sec + 10;
295 ts.tv_nsec = 0;
3c529d93 296
8653c015 297 mutex_lock(&lock);
3c529d93
AL
298
299 while (TAILQ_EMPTY(&request_list) &&
300 !(ret == ETIMEDOUT)) {
8653c015 301 ret = cond_timedwait(&cond, &lock, &ts);
3c529d93
AL
302 }
303
514f7a27 304 if (TAILQ_EMPTY(&request_list))
3c529d93
AL
305 break;
306
307 aiocb = TAILQ_FIRST(&request_list);
308 TAILQ_REMOVE(&request_list, aiocb, node);
3c529d93 309 aiocb->active = 1;
3c529d93 310 idle_threads--;
8653c015 311 mutex_unlock(&lock);
3c529d93 312
221f715d
AL
313 switch (aiocb->aio_type) {
314 case QEMU_PAIO_READ:
315 case QEMU_PAIO_WRITE:
f141eafe 316 ret = handle_aiocb_rw(aiocb);
221f715d
AL
317 break;
318 case QEMU_PAIO_IOCTL:
319 ret = handle_aiocb_ioctl(aiocb);
320 break;
321 default:
322 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
323 ret = -EINVAL;
324 break;
325 }
3c529d93 326
8653c015 327 mutex_lock(&lock);
221f715d 328 aiocb->ret = ret;
3c529d93 329 idle_threads++;
8653c015 330 mutex_unlock(&lock);
3c529d93 331
a8227a5a 332 if (kill(pid, aiocb->ev_signo)) die("kill failed");
3c529d93
AL
333 }
334
335 idle_threads--;
336 cur_threads--;
8653c015 337 mutex_unlock(&lock);
3c529d93
AL
338
339 return NULL;
340}
341
8653c015 342static void spawn_thread(void)
3c529d93 343{
3c529d93
AL
344 cur_threads++;
345 idle_threads++;
8653c015 346 thread_create(&thread_id, &attr, aio_thread, NULL);
3c529d93
AL
347}
348
349int qemu_paio_init(struct qemu_paioinit *aioinit)
350{
a8227a5a 351 int ret;
352
353 ret = pthread_attr_init(&attr);
354 if (ret) die2(ret, "pthread_attr_init");
355
356 ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
357 if (ret) die2(ret, "pthread_attr_setdetachstate");
358
3c529d93
AL
359 TAILQ_INIT(&request_list);
360
361 return 0;
362}
363
221f715d 364static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type)
3c529d93 365{
221f715d 366 aiocb->aio_type = type;
3c529d93
AL
367 aiocb->ret = -EINPROGRESS;
368 aiocb->active = 0;
8653c015 369 mutex_lock(&lock);
3c529d93
AL
370 if (idle_threads == 0 && cur_threads < max_threads)
371 spawn_thread();
372 TAILQ_INSERT_TAIL(&request_list, aiocb, node);
8653c015 373 mutex_unlock(&lock);
5d47e372 374 cond_signal(&cond);
3c529d93
AL
375
376 return 0;
377}
378
379int qemu_paio_read(struct qemu_paiocb *aiocb)
380{
221f715d 381 return qemu_paio_submit(aiocb, QEMU_PAIO_READ);
3c529d93
AL
382}
383
384int qemu_paio_write(struct qemu_paiocb *aiocb)
385{
221f715d
AL
386 return qemu_paio_submit(aiocb, QEMU_PAIO_WRITE);
387}
388
389int qemu_paio_ioctl(struct qemu_paiocb *aiocb)
390{
391 return qemu_paio_submit(aiocb, QEMU_PAIO_IOCTL);
3c529d93
AL
392}
393
394ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
395{
396 ssize_t ret;
397
8653c015 398 mutex_lock(&lock);
3c529d93 399 ret = aiocb->ret;
8653c015 400 mutex_unlock(&lock);
3c529d93
AL
401
402 return ret;
403}
404
405int qemu_paio_error(struct qemu_paiocb *aiocb)
406{
407 ssize_t ret = qemu_paio_return(aiocb);
408
409 if (ret < 0)
410 ret = -ret;
411 else
412 ret = 0;
413
414 return ret;
415}
416
417int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb)
418{
419 int ret;
420
8653c015 421 mutex_lock(&lock);
3c529d93
AL
422 if (!aiocb->active) {
423 TAILQ_REMOVE(&request_list, aiocb, node);
424 aiocb->ret = -ECANCELED;
425 ret = QEMU_PAIO_CANCELED;
426 } else if (aiocb->ret == -EINPROGRESS)
427 ret = QEMU_PAIO_NOTCANCELED;
428 else
429 ret = QEMU_PAIO_ALLDONE;
8653c015 430 mutex_unlock(&lock);
3c529d93
AL
431
432 return ret;
433}