]>
Commit | Line | Data |
---|---|---|
75e5584c JD |
1 | /* |
2 | * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) | |
3 | * Licensed under the GPL | |
4 | */ | |
5 | ||
6 | #include <stdlib.h> | |
7 | #include <unistd.h> | |
8 | #include <signal.h> | |
09ace81c | 9 | #include <string.h> |
75e5584c JD |
10 | #include <errno.h> |
11 | #include <sched.h> | |
12 | #include <sys/syscall.h> | |
13 | #include "os.h" | |
14 | #include "helper.h" | |
15 | #include "aio.h" | |
16 | #include "init.h" | |
17 | #include "user.h" | |
18 | #include "mode.h" | |
19 | ||
75e5584c JD |
20 | static int aio_req_fd_r = -1; |
21 | static int aio_req_fd_w = -1; | |
22 | ||
09ace81c JD |
23 | static int update_aio(struct aio_context *aio, int res) |
24 | { | |
25 | if(res < 0) | |
26 | aio->len = res; | |
27 | else if((res == 0) && (aio->type == AIO_READ)){ | |
28 | /* This is the EOF case - we have hit the end of the file | |
29 | * and it ends in a partial block, so we fill the end of | |
30 | * the block with zeros and claim success. | |
31 | */ | |
32 | memset(aio->data, 0, aio->len); | |
33 | aio->len = 0; | |
34 | } | |
35 | else if(res > 0){ | |
36 | aio->len -= res; | |
37 | aio->data += res; | |
38 | aio->offset += res; | |
39 | return aio->len; | |
40 | } | |
41 | ||
42 | return 0; | |
43 | } | |
44 | ||
75e5584c JD |
45 | #if defined(HAVE_AIO_ABI) |
46 | #include <linux/aio_abi.h> | |
47 | ||
48 | /* If we have the headers, we are going to build with AIO enabled. | |
49 | * If we don't have aio in libc, we define the necessary stubs here. | |
50 | */ | |
51 | ||
52 | #if !defined(HAVE_AIO_LIBC) | |
53 | ||
54 | static long io_setup(int n, aio_context_t *ctxp) | |
55 | { | |
56 | return syscall(__NR_io_setup, n, ctxp); | |
57 | } | |
58 | ||
59 | static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) | |
60 | { | |
61 | return syscall(__NR_io_submit, ctx, nr, iocbpp); | |
62 | } | |
63 | ||
64 | static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, | |
65 | struct io_event *events, struct timespec *timeout) | |
66 | { | |
67 | return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); | |
68 | } | |
69 | ||
70 | #endif | |
71 | ||
72 | /* The AIO_MMAP cases force the mmapped page into memory here | |
73 | * rather than in whatever place first touches the data. I used | |
74 | * to do this by touching the page, but that's delicate because | |
75 | * gcc is prone to optimizing that away. So, what's done here | |
76 | * is we read from the descriptor from which the page was | |
77 | * mapped. The caller is required to pass an offset which is | |
78 | * inside the page that was mapped. Thus, when the read | |
79 | * returns, we know that the page is in the page cache, and | |
80 | * that it now backs the mmapped area. | |
81 | */ | |
82 | ||
09ace81c | 83 | static int do_aio(aio_context_t ctx, struct aio_context *aio) |
75e5584c JD |
84 | { |
85 | struct iocb iocb, *iocbp = &iocb; | |
86 | char c; | |
87 | int err; | |
88 | ||
89 | iocb = ((struct iocb) { .aio_data = (unsigned long) aio, | |
90 | .aio_reqprio = 0, | |
09ace81c JD |
91 | .aio_fildes = aio->fd, |
92 | .aio_buf = (unsigned long) aio->data, | |
93 | .aio_nbytes = aio->len, | |
94 | .aio_offset = aio->offset, | |
75e5584c JD |
95 | .aio_reserved1 = 0, |
96 | .aio_reserved2 = 0, | |
97 | .aio_reserved3 = 0 }); | |
98 | ||
09ace81c | 99 | switch(aio->type){ |
75e5584c JD |
100 | case AIO_READ: |
101 | iocb.aio_lio_opcode = IOCB_CMD_PREAD; | |
75e5584c JD |
102 | break; |
103 | case AIO_WRITE: | |
104 | iocb.aio_lio_opcode = IOCB_CMD_PWRITE; | |
75e5584c JD |
105 | break; |
106 | case AIO_MMAP: | |
107 | iocb.aio_lio_opcode = IOCB_CMD_PREAD; | |
108 | iocb.aio_buf = (unsigned long) &c; | |
109 | iocb.aio_nbytes = sizeof(c); | |
75e5584c JD |
110 | break; |
111 | default: | |
09ace81c | 112 | printk("Bogus op in do_aio - %d\n", aio->type); |
75e5584c | 113 | err = -EINVAL; |
09ace81c | 114 | goto out; |
75e5584c | 115 | } |
09ace81c JD |
116 | |
117 | err = io_submit(ctx, 1, &iocbp); | |
75e5584c JD |
118 | if(err > 0) |
119 | err = 0; | |
120 | ||
09ace81c | 121 | out: |
75e5584c JD |
122 | return err; |
123 | } | |
124 | ||
125 | static aio_context_t ctx = 0; | |
126 | ||
127 | static int aio_thread(void *arg) | |
128 | { | |
129 | struct aio_thread_reply reply; | |
09ace81c | 130 | struct aio_context *aio; |
75e5584c | 131 | struct io_event event; |
09ace81c | 132 | int err, n; |
75e5584c JD |
133 | |
134 | signal(SIGWINCH, SIG_IGN); | |
135 | ||
136 | while(1){ | |
137 | n = io_getevents(ctx, 1, 1, &event, NULL); | |
138 | if(n < 0){ | |
139 | if(errno == EINTR) | |
140 | continue; | |
141 | printk("aio_thread - io_getevents failed, " | |
142 | "errno = %d\n", errno); | |
143 | } | |
144 | else { | |
09ace81c JD |
145 | aio = (struct aio_context *) event.data; |
146 | if(update_aio(aio, event.res)){ | |
147 | do_aio(ctx, aio); | |
148 | continue; | |
149 | } | |
150 | ||
75e5584c | 151 | reply = ((struct aio_thread_reply) |
09ace81c JD |
152 | { .data = aio, |
153 | .err = aio->len }); | |
154 | err = os_write_file(aio->reply_fd, &reply, | |
155 | sizeof(reply)); | |
75e5584c | 156 | if(err != sizeof(reply)) |
09ace81c JD |
157 | printk("aio_thread - write failed, " |
158 | "fd = %d, err = %d\n", aio->reply_fd, | |
159 | -err); | |
75e5584c JD |
160 | } |
161 | } | |
162 | return 0; | |
163 | } | |
164 | ||
165 | #endif | |
166 | ||
09ace81c | 167 | static int do_not_aio(struct aio_context *aio) |
75e5584c JD |
168 | { |
169 | char c; | |
170 | int err; | |
171 | ||
09ace81c | 172 | switch(aio->type){ |
75e5584c | 173 | case AIO_READ: |
09ace81c | 174 | err = os_seek_file(aio->fd, aio->offset); |
75e5584c JD |
175 | if(err) |
176 | goto out; | |
177 | ||
09ace81c | 178 | err = os_read_file(aio->fd, aio->data, aio->len); |
75e5584c JD |
179 | break; |
180 | case AIO_WRITE: | |
09ace81c | 181 | err = os_seek_file(aio->fd, aio->offset); |
75e5584c JD |
182 | if(err) |
183 | goto out; | |
184 | ||
09ace81c | 185 | err = os_write_file(aio->fd, aio->data, aio->len); |
75e5584c JD |
186 | break; |
187 | case AIO_MMAP: | |
09ace81c | 188 | err = os_seek_file(aio->fd, aio->offset); |
75e5584c JD |
189 | if(err) |
190 | goto out; | |
191 | ||
09ace81c | 192 | err = os_read_file(aio->fd, &c, sizeof(c)); |
75e5584c JD |
193 | break; |
194 | default: | |
09ace81c | 195 | printk("do_not_aio - bad request type : %d\n", aio->type); |
75e5584c JD |
196 | err = -EINVAL; |
197 | break; | |
198 | } | |
199 | ||
200 | out: | |
201 | return err; | |
202 | } | |
203 | ||
204 | static int not_aio_thread(void *arg) | |
205 | { | |
09ace81c | 206 | struct aio_context *aio; |
75e5584c JD |
207 | struct aio_thread_reply reply; |
208 | int err; | |
209 | ||
210 | signal(SIGWINCH, SIG_IGN); | |
211 | while(1){ | |
09ace81c JD |
212 | err = os_read_file(aio_req_fd_r, &aio, sizeof(aio)); |
213 | if(err != sizeof(aio)){ | |
75e5584c JD |
214 | if(err < 0) |
215 | printk("not_aio_thread - read failed, " | |
216 | "fd = %d, err = %d\n", aio_req_fd_r, | |
217 | -err); | |
218 | else { | |
219 | printk("not_aio_thread - short read, fd = %d, " | |
220 | "length = %d\n", aio_req_fd_r, err); | |
221 | } | |
222 | continue; | |
223 | } | |
09ace81c JD |
224 | again: |
225 | err = do_not_aio(aio); | |
226 | ||
227 | if(update_aio(aio, err)) | |
228 | goto again; | |
229 | ||
230 | reply = ((struct aio_thread_reply) { .data = aio, | |
231 | .err = aio->len }); | |
232 | err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); | |
75e5584c JD |
233 | if(err != sizeof(reply)) |
234 | printk("not_aio_thread - write failed, fd = %d, " | |
235 | "err = %d\n", aio_req_fd_r, -err); | |
236 | } | |
237 | } | |
238 | ||
09ace81c JD |
239 | static int submit_aio_24(struct aio_context *aio) |
240 | { | |
241 | int err; | |
242 | ||
243 | err = os_write_file(aio_req_fd_w, &aio, sizeof(aio)); | |
244 | if(err == sizeof(aio)) | |
245 | err = 0; | |
246 | ||
247 | return err; | |
248 | } | |
249 | ||
75e5584c | 250 | static int aio_pid = -1; |
09ace81c | 251 | static int (*submit_proc)(struct aio_context *aio); |
75e5584c JD |
252 | |
253 | static int init_aio_24(void) | |
254 | { | |
255 | unsigned long stack; | |
256 | int fds[2], err; | |
257 | ||
258 | err = os_pipe(fds, 1, 1); | |
259 | if(err) | |
260 | goto out; | |
261 | ||
262 | aio_req_fd_w = fds[0]; | |
263 | aio_req_fd_r = fds[1]; | |
264 | err = run_helper_thread(not_aio_thread, NULL, | |
265 | CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); | |
266 | if(err < 0) | |
267 | goto out_close_pipe; | |
268 | ||
269 | aio_pid = err; | |
270 | goto out; | |
271 | ||
272 | out_close_pipe: | |
273 | os_close_file(fds[0]); | |
274 | os_close_file(fds[1]); | |
275 | aio_req_fd_w = -1; | |
276 | aio_req_fd_r = -1; | |
277 | out: | |
278 | #ifndef HAVE_AIO_ABI | |
279 | printk("/usr/include/linux/aio_abi.h not present during build\n"); | |
280 | #endif | |
281 | printk("2.6 host AIO support not used - falling back to I/O " | |
282 | "thread\n"); | |
09ace81c JD |
283 | |
284 | submit_proc = submit_aio_24; | |
285 | ||
75e5584c JD |
286 | return 0; |
287 | } | |
288 | ||
289 | #ifdef HAVE_AIO_ABI | |
290 | #define DEFAULT_24_AIO 0 | |
09ace81c JD |
291 | static int submit_aio_26(struct aio_context *aio) |
292 | { | |
293 | struct aio_thread_reply reply; | |
294 | int err; | |
295 | ||
296 | err = do_aio(ctx, aio); | |
297 | if(err){ | |
298 | reply = ((struct aio_thread_reply) { .data = aio, | |
299 | .err = err }); | |
300 | err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); | |
301 | if(err != sizeof(reply)) | |
302 | printk("submit_aio_26 - write failed, " | |
303 | "fd = %d, err = %d\n", aio->reply_fd, -err); | |
304 | else err = 0; | |
305 | } | |
306 | ||
307 | return err; | |
308 | } | |
309 | ||
75e5584c JD |
310 | static int init_aio_26(void) |
311 | { | |
312 | unsigned long stack; | |
313 | int err; | |
314 | ||
315 | if(io_setup(256, &ctx)){ | |
316 | printk("aio_thread failed to initialize context, err = %d\n", | |
317 | errno); | |
318 | return -errno; | |
319 | } | |
320 | ||
321 | err = run_helper_thread(aio_thread, NULL, | |
322 | CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); | |
323 | if(err < 0) | |
324 | return -errno; | |
325 | ||
326 | aio_pid = err; | |
327 | ||
328 | printk("Using 2.6 host AIO\n"); | |
75e5584c | 329 | |
09ace81c | 330 | submit_proc = submit_aio_26; |
75e5584c | 331 | |
09ace81c | 332 | return 0; |
75e5584c JD |
333 | } |
334 | ||
335 | #else | |
336 | #define DEFAULT_24_AIO 1 | |
09ace81c | 337 | static int submit_aio_26(struct aio_context *aio) |
75e5584c JD |
338 | { |
339 | return -ENOSYS; | |
340 | } | |
341 | ||
09ace81c | 342 | static int init_aio_26(void) |
75e5584c | 343 | { |
09ace81c | 344 | submit_proc = submit_aio_26; |
75e5584c JD |
345 | return -ENOSYS; |
346 | } | |
347 | #endif | |
348 | ||
349 | static int aio_24 = DEFAULT_24_AIO; | |
350 | ||
351 | static int __init set_aio_24(char *name, int *add) | |
352 | { | |
353 | aio_24 = 1; | |
354 | return 0; | |
355 | } | |
356 | ||
357 | __uml_setup("aio=2.4", set_aio_24, | |
358 | "aio=2.4\n" | |
359 | " This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n" | |
360 | " available. 2.4 AIO is a single thread that handles one request at a\n" | |
361 | " time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n" | |
362 | " interface to handle an arbitrary number of pending requests. 2.6 AIO \n" | |
363 | " is not available in tt mode, on 2.4 hosts, or when UML is built with\n" | |
364 | " /usr/include/linux/aio_abi.h not available. Many distributions don't\n" | |
365 | " include aio_abi.h, so you will need to copy it from a kernel tree to\n" | |
366 | " your /usr/include/linux in order to build an AIO-capable UML\n\n" | |
367 | ); | |
368 | ||
369 | static int init_aio(void) | |
370 | { | |
371 | int err; | |
372 | ||
373 | CHOOSE_MODE(({ | |
374 | if(!aio_24){ | |
375 | printk("Disabling 2.6 AIO in tt mode\n"); | |
376 | aio_24 = 1; | |
377 | } }), (void) 0); | |
378 | ||
379 | if(!aio_24){ | |
380 | err = init_aio_26(); | |
381 | if(err && (errno == ENOSYS)){ | |
382 | printk("2.6 AIO not supported on the host - " | |
383 | "reverting to 2.4 AIO\n"); | |
384 | aio_24 = 1; | |
385 | } | |
386 | else return err; | |
387 | } | |
388 | ||
389 | if(aio_24) | |
390 | return init_aio_24(); | |
391 | ||
392 | return 0; | |
393 | } | |
394 | ||
395 | /* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio | |
396 | * needs to be called when the kernel is running because it calls run_helper, | |
397 | * which needs get_free_page. exit_aio is a __uml_exitcall because the generic | |
398 | * kernel does not run __exitcalls on shutdown, and can't because many of them | |
399 | * break when called outside of module unloading. | |
400 | */ | |
401 | __initcall(init_aio); | |
402 | ||
403 | static void exit_aio(void) | |
404 | { | |
405 | if(aio_pid != -1) | |
406 | os_kill_process(aio_pid, 1); | |
407 | } | |
408 | ||
409 | __uml_exitcall(exit_aio); | |
410 | ||
09ace81c | 411 | int submit_aio(struct aio_context *aio) |
75e5584c | 412 | { |
09ace81c | 413 | return (*submit_proc)(aio); |
75e5584c | 414 | } |