]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/um/os-Linux/aio.c
[PATCH] uml: add host AIO support to block driver
[mirror_ubuntu-hirsute-kernel.git] / arch / um / os-Linux / aio.c
CommitLineData
75e5584c
JD
1/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
09ace81c 9#include <string.h>
75e5584c
JD
10#include <errno.h>
11#include <sched.h>
12#include <sys/syscall.h>
13#include "os.h"
14#include "helper.h"
15#include "aio.h"
16#include "init.h"
17#include "user.h"
18#include "mode.h"
19
75e5584c
JD
20static int aio_req_fd_r = -1;
21static int aio_req_fd_w = -1;
22
09ace81c
JD
23static int update_aio(struct aio_context *aio, int res)
24{
25 if(res < 0)
26 aio->len = res;
27 else if((res == 0) && (aio->type == AIO_READ)){
28 /* This is the EOF case - we have hit the end of the file
29 * and it ends in a partial block, so we fill the end of
30 * the block with zeros and claim success.
31 */
32 memset(aio->data, 0, aio->len);
33 aio->len = 0;
34 }
35 else if(res > 0){
36 aio->len -= res;
37 aio->data += res;
38 aio->offset += res;
39 return aio->len;
40 }
41
42 return 0;
43}
44
75e5584c
JD
45#if defined(HAVE_AIO_ABI)
46#include <linux/aio_abi.h>
47
48/* If we have the headers, we are going to build with AIO enabled.
49 * If we don't have aio in libc, we define the necessary stubs here.
50 */
51
52#if !defined(HAVE_AIO_LIBC)
53
54static long io_setup(int n, aio_context_t *ctxp)
55{
56 return syscall(__NR_io_setup, n, ctxp);
57}
58
59static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
60{
61 return syscall(__NR_io_submit, ctx, nr, iocbpp);
62}
63
64static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
65 struct io_event *events, struct timespec *timeout)
66{
67 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
68}
69
70#endif
71
72/* The AIO_MMAP cases force the mmapped page into memory here
73 * rather than in whatever place first touches the data. I used
74 * to do this by touching the page, but that's delicate because
75 * gcc is prone to optimizing that away. So, what's done here
76 * is we read from the descriptor from which the page was
77 * mapped. The caller is required to pass an offset which is
78 * inside the page that was mapped. Thus, when the read
79 * returns, we know that the page is in the page cache, and
80 * that it now backs the mmapped area.
81 */
82
09ace81c 83static int do_aio(aio_context_t ctx, struct aio_context *aio)
75e5584c
JD
84{
85 struct iocb iocb, *iocbp = &iocb;
86 char c;
87 int err;
88
89 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
90 .aio_reqprio = 0,
09ace81c
JD
91 .aio_fildes = aio->fd,
92 .aio_buf = (unsigned long) aio->data,
93 .aio_nbytes = aio->len,
94 .aio_offset = aio->offset,
75e5584c
JD
95 .aio_reserved1 = 0,
96 .aio_reserved2 = 0,
97 .aio_reserved3 = 0 });
98
09ace81c 99 switch(aio->type){
75e5584c
JD
100 case AIO_READ:
101 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
75e5584c
JD
102 break;
103 case AIO_WRITE:
104 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
75e5584c
JD
105 break;
106 case AIO_MMAP:
107 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
108 iocb.aio_buf = (unsigned long) &c;
109 iocb.aio_nbytes = sizeof(c);
75e5584c
JD
110 break;
111 default:
09ace81c 112 printk("Bogus op in do_aio - %d\n", aio->type);
75e5584c 113 err = -EINVAL;
09ace81c 114 goto out;
75e5584c 115 }
09ace81c
JD
116
117 err = io_submit(ctx, 1, &iocbp);
75e5584c
JD
118 if(err > 0)
119 err = 0;
120
09ace81c 121 out:
75e5584c
JD
122 return err;
123}
124
125static aio_context_t ctx = 0;
126
127static int aio_thread(void *arg)
128{
129 struct aio_thread_reply reply;
09ace81c 130 struct aio_context *aio;
75e5584c 131 struct io_event event;
09ace81c 132 int err, n;
75e5584c
JD
133
134 signal(SIGWINCH, SIG_IGN);
135
136 while(1){
137 n = io_getevents(ctx, 1, 1, &event, NULL);
138 if(n < 0){
139 if(errno == EINTR)
140 continue;
141 printk("aio_thread - io_getevents failed, "
142 "errno = %d\n", errno);
143 }
144 else {
09ace81c
JD
145 aio = (struct aio_context *) event.data;
146 if(update_aio(aio, event.res)){
147 do_aio(ctx, aio);
148 continue;
149 }
150
75e5584c 151 reply = ((struct aio_thread_reply)
09ace81c
JD
152 { .data = aio,
153 .err = aio->len });
154 err = os_write_file(aio->reply_fd, &reply,
155 sizeof(reply));
75e5584c 156 if(err != sizeof(reply))
09ace81c
JD
157 printk("aio_thread - write failed, "
158 "fd = %d, err = %d\n", aio->reply_fd,
159 -err);
75e5584c
JD
160 }
161 }
162 return 0;
163}
164
165#endif
166
09ace81c 167static int do_not_aio(struct aio_context *aio)
75e5584c
JD
168{
169 char c;
170 int err;
171
09ace81c 172 switch(aio->type){
75e5584c 173 case AIO_READ:
09ace81c 174 err = os_seek_file(aio->fd, aio->offset);
75e5584c
JD
175 if(err)
176 goto out;
177
09ace81c 178 err = os_read_file(aio->fd, aio->data, aio->len);
75e5584c
JD
179 break;
180 case AIO_WRITE:
09ace81c 181 err = os_seek_file(aio->fd, aio->offset);
75e5584c
JD
182 if(err)
183 goto out;
184
09ace81c 185 err = os_write_file(aio->fd, aio->data, aio->len);
75e5584c
JD
186 break;
187 case AIO_MMAP:
09ace81c 188 err = os_seek_file(aio->fd, aio->offset);
75e5584c
JD
189 if(err)
190 goto out;
191
09ace81c 192 err = os_read_file(aio->fd, &c, sizeof(c));
75e5584c
JD
193 break;
194 default:
09ace81c 195 printk("do_not_aio - bad request type : %d\n", aio->type);
75e5584c
JD
196 err = -EINVAL;
197 break;
198 }
199
200 out:
201 return err;
202}
203
204static int not_aio_thread(void *arg)
205{
09ace81c 206 struct aio_context *aio;
75e5584c
JD
207 struct aio_thread_reply reply;
208 int err;
209
210 signal(SIGWINCH, SIG_IGN);
211 while(1){
09ace81c
JD
212 err = os_read_file(aio_req_fd_r, &aio, sizeof(aio));
213 if(err != sizeof(aio)){
75e5584c
JD
214 if(err < 0)
215 printk("not_aio_thread - read failed, "
216 "fd = %d, err = %d\n", aio_req_fd_r,
217 -err);
218 else {
219 printk("not_aio_thread - short read, fd = %d, "
220 "length = %d\n", aio_req_fd_r, err);
221 }
222 continue;
223 }
09ace81c
JD
224 again:
225 err = do_not_aio(aio);
226
227 if(update_aio(aio, err))
228 goto again;
229
230 reply = ((struct aio_thread_reply) { .data = aio,
231 .err = aio->len });
232 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
75e5584c
JD
233 if(err != sizeof(reply))
234 printk("not_aio_thread - write failed, fd = %d, "
235 "err = %d\n", aio_req_fd_r, -err);
236 }
237}
238
09ace81c
JD
239static int submit_aio_24(struct aio_context *aio)
240{
241 int err;
242
243 err = os_write_file(aio_req_fd_w, &aio, sizeof(aio));
244 if(err == sizeof(aio))
245 err = 0;
246
247 return err;
248}
249
75e5584c 250static int aio_pid = -1;
09ace81c 251static int (*submit_proc)(struct aio_context *aio);
75e5584c
JD
252
253static int init_aio_24(void)
254{
255 unsigned long stack;
256 int fds[2], err;
257
258 err = os_pipe(fds, 1, 1);
259 if(err)
260 goto out;
261
262 aio_req_fd_w = fds[0];
263 aio_req_fd_r = fds[1];
264 err = run_helper_thread(not_aio_thread, NULL,
265 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
266 if(err < 0)
267 goto out_close_pipe;
268
269 aio_pid = err;
270 goto out;
271
272 out_close_pipe:
273 os_close_file(fds[0]);
274 os_close_file(fds[1]);
275 aio_req_fd_w = -1;
276 aio_req_fd_r = -1;
277 out:
278#ifndef HAVE_AIO_ABI
279 printk("/usr/include/linux/aio_abi.h not present during build\n");
280#endif
281 printk("2.6 host AIO support not used - falling back to I/O "
282 "thread\n");
09ace81c
JD
283
284 submit_proc = submit_aio_24;
285
75e5584c
JD
286 return 0;
287}
288
289#ifdef HAVE_AIO_ABI
290#define DEFAULT_24_AIO 0
09ace81c
JD
291static int submit_aio_26(struct aio_context *aio)
292{
293 struct aio_thread_reply reply;
294 int err;
295
296 err = do_aio(ctx, aio);
297 if(err){
298 reply = ((struct aio_thread_reply) { .data = aio,
299 .err = err });
300 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
301 if(err != sizeof(reply))
302 printk("submit_aio_26 - write failed, "
303 "fd = %d, err = %d\n", aio->reply_fd, -err);
304 else err = 0;
305 }
306
307 return err;
308}
309
75e5584c
JD
310static int init_aio_26(void)
311{
312 unsigned long stack;
313 int err;
314
315 if(io_setup(256, &ctx)){
316 printk("aio_thread failed to initialize context, err = %d\n",
317 errno);
318 return -errno;
319 }
320
321 err = run_helper_thread(aio_thread, NULL,
322 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
323 if(err < 0)
324 return -errno;
325
326 aio_pid = err;
327
328 printk("Using 2.6 host AIO\n");
75e5584c 329
09ace81c 330 submit_proc = submit_aio_26;
75e5584c 331
09ace81c 332 return 0;
75e5584c
JD
333}
334
335#else
336#define DEFAULT_24_AIO 1
09ace81c 337static int submit_aio_26(struct aio_context *aio)
75e5584c
JD
338{
339 return -ENOSYS;
340}
341
09ace81c 342static int init_aio_26(void)
75e5584c 343{
09ace81c 344 submit_proc = submit_aio_26;
75e5584c
JD
345 return -ENOSYS;
346}
347#endif
348
349static int aio_24 = DEFAULT_24_AIO;
350
351static int __init set_aio_24(char *name, int *add)
352{
353 aio_24 = 1;
354 return 0;
355}
356
357__uml_setup("aio=2.4", set_aio_24,
358"aio=2.4\n"
359" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
360" available. 2.4 AIO is a single thread that handles one request at a\n"
361" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
362" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
363" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
364" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
365" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
366" your /usr/include/linux in order to build an AIO-capable UML\n\n"
367);
368
369static int init_aio(void)
370{
371 int err;
372
373 CHOOSE_MODE(({
374 if(!aio_24){
375 printk("Disabling 2.6 AIO in tt mode\n");
376 aio_24 = 1;
377 } }), (void) 0);
378
379 if(!aio_24){
380 err = init_aio_26();
381 if(err && (errno == ENOSYS)){
382 printk("2.6 AIO not supported on the host - "
383 "reverting to 2.4 AIO\n");
384 aio_24 = 1;
385 }
386 else return err;
387 }
388
389 if(aio_24)
390 return init_aio_24();
391
392 return 0;
393}
394
395/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
396 * needs to be called when the kernel is running because it calls run_helper,
397 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
398 * kernel does not run __exitcalls on shutdown, and can't because many of them
399 * break when called outside of module unloading.
400 */
401__initcall(init_aio);
402
403static void exit_aio(void)
404{
405 if(aio_pid != -1)
406 os_kill_process(aio_pid, 1);
407}
408
409__uml_exitcall(exit_aio);
410
09ace81c 411int submit_aio(struct aio_context *aio)
75e5584c 412{
09ace81c 413 return (*submit_proc)(aio);
75e5584c 414}