]> git.proxmox.com Git - qemu.git/blame - block-raw-posix.c
Add a parameter to disable host cache, by Laurent Vivier.
[qemu.git] / block-raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
2f726488 25#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
87ecb68b 26#include "qemu-timer.h"
ae5fc450 27#include "exec-all.h"
faf07963 28#endif
83f64091
FB
29#include "block_int.h"
30#include <assert.h>
83f64091
FB
31#include <aio.h>
32
83f64091
FB
33#ifdef CONFIG_COCOA
34#include <paths.h>
35#include <sys/param.h>
36#include <IOKit/IOKitLib.h>
37#include <IOKit/IOBSD.h>
38#include <IOKit/storage/IOMediaBSDClient.h>
39#include <IOKit/storage/IOMedia.h>
40#include <IOKit/storage/IOCDMedia.h>
41//#include <IOKit/storage/IOCDTypes.h>
42#include <CoreFoundation/CoreFoundation.h>
43#endif
44
45#ifdef __sun__
2e9671da
TS
46#define _POSIX_PTHREAD_SEMANTICS 1
47#include <signal.h>
83f64091
FB
48#include <sys/dkio.h>
49#endif
19cb3738
FB
50#ifdef __linux__
51#include <sys/ioctl.h>
52#include <linux/cdrom.h>
53#include <linux/fd.h>
54#endif
1cb6c3fd
TS
55#ifdef __FreeBSD__
56#include <sys/disk.h>
57#endif
83f64091 58
19cb3738 59//#define DEBUG_FLOPPY
83f64091 60
faf07963 61//#define DEBUG_BLOCK
2f726488 62#if defined(DEBUG_BLOCK) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
a50a6282 63#define DEBUG_BLOCK_PRINT(formatCstr, args...) do { if (loglevel != 0) \
2e03286b 64 { fprintf(logfile, formatCstr, ##args); fflush(logfile); } } while (0)
8c05dbf9
TS
65#else
66#define DEBUG_BLOCK_PRINT(formatCstr, args...)
67#endif
68
19cb3738
FB
69#define FTYPE_FILE 0
70#define FTYPE_CD 1
71#define FTYPE_FD 2
83f64091 72
bed5cc52
FB
73#define ALIGNED_BUFFER_SIZE (32 * 512)
74
19cb3738
FB
75/* if the FD is not accessed during that time (in ms), we try to
76 reopen it to see if the disk has been changed */
77#define FD_OPEN_TIMEOUT 1000
83f64091 78
19cb3738
FB
79typedef struct BDRVRawState {
80 int fd;
81 int type;
8c05dbf9 82 unsigned int lseek_err_cnt;
19cb3738
FB
83#if defined(__linux__)
84 /* linux floppy specific */
6dd2db52 85 int fd_open_flags;
19cb3738
FB
86 int64_t fd_open_time;
87 int64_t fd_error_time;
88 int fd_got_error;
89 int fd_media_changed;
83f64091 90#endif
bed5cc52
FB
91#if defined(O_DIRECT) && !defined(QEMU_IMG)
92 uint8_t* aligned_buf;
93#endif
19cb3738
FB
94} BDRVRawState;
95
96static int fd_open(BlockDriverState *bs);
83f64091
FB
97
98static int raw_open(BlockDriverState *bs, const char *filename, int flags)
99{
100 BDRVRawState *s = bs->opaque;
19cb3738 101 int fd, open_flags, ret;
83f64091 102
8c05dbf9
TS
103 s->lseek_err_cnt = 0;
104
83f64091
FB
105 open_flags = O_BINARY;
106 if ((flags & BDRV_O_ACCESS) == O_RDWR) {
107 open_flags |= O_RDWR;
108 } else {
109 open_flags |= O_RDONLY;
110 bs->read_only = 1;
111 }
112 if (flags & BDRV_O_CREAT)
113 open_flags |= O_CREAT | O_TRUNC;
33f00271
AZ
114#ifdef O_DIRECT
115 if (flags & BDRV_O_DIRECT)
116 open_flags |= O_DIRECT;
117#endif
83f64091 118
19cb3738
FB
119 s->type = FTYPE_FILE;
120
83f64091 121 fd = open(filename, open_flags, 0644);
19cb3738
FB
122 if (fd < 0) {
123 ret = -errno;
124 if (ret == -EROFS)
125 ret = -EACCES;
126 return ret;
127 }
83f64091 128 s->fd = fd;
bed5cc52
FB
129#if defined(O_DIRECT) && !defined(QEMU_IMG)
130 s->aligned_buf = NULL;
131 if (flags & BDRV_O_DIRECT) {
132 s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
133 if (s->aligned_buf == NULL) {
134 ret = -errno;
135 close(fd);
136 return ret;
137 }
138 }
139#endif
83f64091
FB
140 return 0;
141}
142
143/* XXX: use host sector size if necessary with:
144#ifdef DIOCGSECTORSIZE
145 {
146 unsigned int sectorsize = 512;
147 if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
148 sectorsize > bufsize)
149 bufsize = sectorsize;
150 }
151#endif
152#ifdef CONFIG_COCOA
153 u_int32_t blockSize = 512;
154 if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
155 bufsize = blockSize;
156 }
157#endif
158*/
159
bed5cc52
FB
160/*
161 * offset and count are in bytes, but must be multiples of 512 for files
162 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
163 *
164 * This function may be called without alignment if the caller ensures
165 * that O_DIRECT is not in effect.
166 */
167static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
83f64091
FB
168 uint8_t *buf, int count)
169{
170 BDRVRawState *s = bs->opaque;
171 int ret;
3b46e624 172
19cb3738
FB
173 ret = fd_open(bs);
174 if (ret < 0)
175 return ret;
176
985a03b0 177 if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
8c05dbf9
TS
178 ++(s->lseek_err_cnt);
179 if(s->lseek_err_cnt <= 10) {
92868412
JM
180 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
181 "] lseek failed : %d = %s\n",
8c05dbf9
TS
182 s->fd, bs->filename, offset, buf, count,
183 bs->total_sectors, errno, strerror(errno));
184 }
185 return -1;
186 }
187 s->lseek_err_cnt=0;
188
83f64091 189 ret = read(s->fd, buf, count);
8c05dbf9
TS
190 if (ret == count)
191 goto label__raw_read__success;
192
92868412
JM
193 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
194 "] read failed %d : %d = %s\n",
8c05dbf9
TS
195 s->fd, bs->filename, offset, buf, count,
196 bs->total_sectors, ret, errno, strerror(errno));
197
198 /* Try harder for CDrom. */
199 if (bs->type == BDRV_TYPE_CDROM) {
200 lseek(s->fd, offset, SEEK_SET);
201 ret = read(s->fd, buf, count);
202 if (ret == count)
203 goto label__raw_read__success;
204 lseek(s->fd, offset, SEEK_SET);
205 ret = read(s->fd, buf, count);
206 if (ret == count)
207 goto label__raw_read__success;
208
92868412
JM
209 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
210 "] retry read failed %d : %d = %s\n",
8c05dbf9
TS
211 s->fd, bs->filename, offset, buf, count,
212 bs->total_sectors, ret, errno, strerror(errno));
213 }
214
8c05dbf9
TS
215label__raw_read__success:
216
83f64091
FB
217 return ret;
218}
219
bed5cc52
FB
220/*
221 * offset and count are in bytes, but must be multiples of 512 for files
222 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
223 *
224 * This function may be called without alignment if the caller ensures
225 * that O_DIRECT is not in effect.
226 */
227static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
83f64091
FB
228 const uint8_t *buf, int count)
229{
230 BDRVRawState *s = bs->opaque;
231 int ret;
3b46e624 232
19cb3738
FB
233 ret = fd_open(bs);
234 if (ret < 0)
235 return ret;
236
985a03b0 237 if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
8c05dbf9
TS
238 ++(s->lseek_err_cnt);
239 if(s->lseek_err_cnt) {
92868412
JM
240 DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
241 PRId64 "] lseek failed : %d = %s\n",
8c05dbf9
TS
242 s->fd, bs->filename, offset, buf, count,
243 bs->total_sectors, errno, strerror(errno));
244 }
245 return -1;
246 }
247 s->lseek_err_cnt = 0;
248
83f64091 249 ret = write(s->fd, buf, count);
8c05dbf9
TS
250 if (ret == count)
251 goto label__raw_write__success;
252
92868412
JM
253 DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
254 "] write failed %d : %d = %s\n",
8c05dbf9
TS
255 s->fd, bs->filename, offset, buf, count,
256 bs->total_sectors, ret, errno, strerror(errno));
257
8c05dbf9
TS
258label__raw_write__success:
259
83f64091
FB
260 return ret;
261}
262
bed5cc52
FB
263
264#if defined(O_DIRECT) && !defined(QEMU_IMG)
265/*
266 * offset and count are in bytes and possibly not aligned. For files opened
267 * with O_DIRECT, necessary alignments are ensured before calling
268 * raw_pread_aligned to do the actual read.
269 */
270static int raw_pread(BlockDriverState *bs, int64_t offset,
271 uint8_t *buf, int count)
272{
273 BDRVRawState *s = bs->opaque;
274 int size, ret, shift, sum;
275
276 sum = 0;
277
278 if (s->aligned_buf != NULL) {
279
280 if (offset & 0x1ff) {
281 /* align offset on a 512 bytes boundary */
282
283 shift = offset & 0x1ff;
284 size = (shift + count + 0x1ff) & ~0x1ff;
285 if (size > ALIGNED_BUFFER_SIZE)
286 size = ALIGNED_BUFFER_SIZE;
287 ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
288 if (ret < 0)
289 return ret;
290
291 size = 512 - shift;
292 if (size > count)
293 size = count;
294 memcpy(buf, s->aligned_buf + shift, size);
295
296 buf += size;
297 offset += size;
298 count -= size;
299 sum += size;
300
301 if (count == 0)
302 return sum;
303 }
304 if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
305
306 /* read on aligned buffer */
307
308 while (count) {
309
310 size = (count + 0x1ff) & ~0x1ff;
311 if (size > ALIGNED_BUFFER_SIZE)
312 size = ALIGNED_BUFFER_SIZE;
313
314 ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
315 if (ret < 0)
316 return ret;
317
318 size = ret;
319 if (size > count)
320 size = count;
321
322 memcpy(buf, s->aligned_buf, size);
323
324 buf += size;
325 offset += size;
326 count -= size;
327 sum += size;
328 }
329
330 return sum;
331 }
332 }
333
334 return raw_pread_aligned(bs, offset, buf, count) + sum;
335}
336
337/*
338 * offset and count are in bytes and possibly not aligned. For files opened
339 * with O_DIRECT, necessary alignments are ensured before calling
340 * raw_pwrite_aligned to do the actual write.
341 */
342static int raw_pwrite(BlockDriverState *bs, int64_t offset,
343 const uint8_t *buf, int count)
344{
345 BDRVRawState *s = bs->opaque;
346 int size, ret, shift, sum;
347
348 sum = 0;
349
350 if (s->aligned_buf != NULL) {
351
352 if (offset & 0x1ff) {
353 /* align offset on a 512 bytes boundary */
354 shift = offset & 0x1ff;
355 ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
356 if (ret < 0)
357 return ret;
358
359 size = 512 - shift;
360 if (size > count)
361 size = count;
362 memcpy(s->aligned_buf + shift, buf, size);
363
364 ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
365 if (ret < 0)
366 return ret;
367
368 buf += size;
369 offset += size;
370 count -= size;
371 sum += size;
372
373 if (count == 0)
374 return sum;
375 }
376 if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
377
378 while ((size = (count & ~0x1ff)) != 0) {
379
380 if (size > ALIGNED_BUFFER_SIZE)
381 size = ALIGNED_BUFFER_SIZE;
382
383 memcpy(s->aligned_buf, buf, size);
384
385 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
386 if (ret < 0)
387 return ret;
388
389 buf += ret;
390 offset += ret;
391 count -= ret;
392 sum += ret;
393 }
394 /* here, count < 512 because (count & ~0x1ff) == 0 */
395 if (count) {
396 ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
397 if (ret < 0)
398 return ret;
399 memcpy(s->aligned_buf, buf, count);
400
401 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
402 if (ret < 0)
403 return ret;
404 if (count < ret)
405 ret = count;
406
407 sum += ret;
408 }
409 return sum;
410 }
411 }
412 return raw_pwrite_aligned(bs, offset, buf, count) + sum;
413}
414
415#else
416#define raw_pread raw_pread_aligned
417#define raw_pwrite raw_pwrite_aligned
418#endif
419
420
83f64091 421/***********************************************************/
19cb3738 422/* Unix AIO using POSIX AIO */
83f64091
FB
423
424typedef struct RawAIOCB {
ce1a14dc 425 BlockDriverAIOCB common;
83f64091 426 struct aiocb aiocb;
ce1a14dc 427 struct RawAIOCB *next;
bed5cc52 428 int ret;
83f64091
FB
429} RawAIOCB;
430
431static int aio_sig_num = SIGUSR2;
ce1a14dc 432static RawAIOCB *first_aio; /* AIO issued */
979b67ad 433static int aio_initialized = 0;
83f64091 434
83f64091
FB
435static void aio_signal_handler(int signum)
436{
2f726488 437#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
83f64091
FB
438 CPUState *env = cpu_single_env;
439 if (env) {
440 /* stop the currently executing cpu because a timer occured */
441 cpu_interrupt(env, CPU_INTERRUPT_EXIT);
442#ifdef USE_KQEMU
443 if (env->kqemu_enabled) {
444 kqemu_cpu_interrupt(env);
445 }
446#endif
447 }
979b67ad 448#endif
83f64091
FB
449}
450
451void qemu_aio_init(void)
452{
453 struct sigaction act;
979b67ad
FB
454
455 aio_initialized = 1;
3b46e624 456
83f64091
FB
457 sigfillset(&act.sa_mask);
458 act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
459 act.sa_handler = aio_signal_handler;
460 sigaction(aio_sig_num, &act, NULL);
461
19cb3738 462#if defined(__GLIBC__) && defined(__linux__)
83f64091 463 {
19cb3738
FB
464 /* XXX: aio thread exit seems to hang on RedHat 9 and this init
465 seems to fix the problem. */
83f64091
FB
466 struct aioinit ai;
467 memset(&ai, 0, sizeof(ai));
01534fe9
FB
468 ai.aio_threads = 1;
469 ai.aio_num = 1;
83f64091
FB
470 ai.aio_idle_time = 365 * 100000;
471 aio_init(&ai);
472 }
19cb3738 473#endif
83f64091 474}
83f64091
FB
475
476void qemu_aio_poll(void)
477{
ce1a14dc 478 RawAIOCB *acb, **pacb;
83f64091
FB
479 int ret;
480
481 for(;;) {
482 pacb = &first_aio;
483 for(;;) {
484 acb = *pacb;
485 if (!acb)
486 goto the_end;
ce1a14dc 487 ret = aio_error(&acb->aiocb);
83f64091
FB
488 if (ret == ECANCELED) {
489 /* remove the request */
ce1a14dc
PB
490 *pacb = acb->next;
491 qemu_aio_release(acb);
83f64091
FB
492 } else if (ret != EINPROGRESS) {
493 /* end of aio */
494 if (ret == 0) {
ce1a14dc
PB
495 ret = aio_return(&acb->aiocb);
496 if (ret == acb->aiocb.aio_nbytes)
83f64091
FB
497 ret = 0;
498 else
19cb3738 499 ret = -EINVAL;
83f64091
FB
500 } else {
501 ret = -ret;
502 }
503 /* remove the request */
ce1a14dc 504 *pacb = acb->next;
83f64091 505 /* call the callback */
ce1a14dc
PB
506 acb->common.cb(acb->common.opaque, ret);
507 qemu_aio_release(acb);
83f64091
FB
508 break;
509 } else {
ce1a14dc 510 pacb = &acb->next;
83f64091
FB
511 }
512 }
513 }
514 the_end: ;
515}
516
6192bc37
PB
517/* Wait for all IO requests to complete. */
518void qemu_aio_flush(void)
519{
520 qemu_aio_wait_start();
521 qemu_aio_poll();
522 while (first_aio) {
523 qemu_aio_wait();
524 }
525 qemu_aio_wait_end();
526}
527
83f64091
FB
528/* wait until at least one AIO was handled */
529static sigset_t wait_oset;
530
531void qemu_aio_wait_start(void)
532{
533 sigset_t set;
979b67ad
FB
534
535 if (!aio_initialized)
536 qemu_aio_init();
83f64091
FB
537 sigemptyset(&set);
538 sigaddset(&set, aio_sig_num);
539 sigprocmask(SIG_BLOCK, &set, &wait_oset);
540}
541
542void qemu_aio_wait(void)
543{
544 sigset_t set;
545 int nb_sigs;
6eb5733a 546
2f726488 547#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
6eb5733a
FB
548 if (qemu_bh_poll())
549 return;
550#endif
83f64091
FB
551 sigemptyset(&set);
552 sigaddset(&set, aio_sig_num);
553 sigwait(&set, &nb_sigs);
554 qemu_aio_poll();
555}
556
557void qemu_aio_wait_end(void)
558{
559 sigprocmask(SIG_SETMASK, &wait_oset, NULL);
560}
561
ce1a14dc
PB
562static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
563 int64_t sector_num, uint8_t *buf, int nb_sectors,
564 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 565{
ce1a14dc
PB
566 BDRVRawState *s = bs->opaque;
567 RawAIOCB *acb;
568
19cb3738
FB
569 if (fd_open(bs) < 0)
570 return NULL;
571
ce1a14dc
PB
572 acb = qemu_aio_get(bs, cb, opaque);
573 if (!acb)
574 return NULL;
575 acb->aiocb.aio_fildes = s->fd;
576 acb->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
577 acb->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
578 acb->aiocb.aio_buf = buf;
985a03b0
TS
579 if (nb_sectors < 0)
580 acb->aiocb.aio_nbytes = -nb_sectors;
581 else
582 acb->aiocb.aio_nbytes = nb_sectors * 512;
ce1a14dc
PB
583 acb->aiocb.aio_offset = sector_num * 512;
584 acb->next = first_aio;
585 first_aio = acb;
586 return acb;
83f64091
FB
587}
588
2f726488 589#if !defined(QEMU_IMG) && !defined(QEMU_NBD)
bed5cc52
FB
590static void raw_aio_em_cb(void* opaque)
591{
592 RawAIOCB *acb = opaque;
593 acb->common.cb(acb->common.opaque, acb->ret);
594 qemu_aio_release(acb);
595}
596#endif
597
ce1a14dc
PB
598static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
599 int64_t sector_num, uint8_t *buf, int nb_sectors,
600 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 601{
ce1a14dc 602 RawAIOCB *acb;
83f64091 603
bed5cc52
FB
604 /*
605 * If O_DIRECT is used and the buffer is not aligned fall back
606 * to synchronous IO.
607 */
2f726488 608#if defined(O_DIRECT) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
bed5cc52
FB
609 BDRVRawState *s = bs->opaque;
610
611 if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
612 QEMUBH *bh;
613 acb = qemu_aio_get(bs, cb, opaque);
614 acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
615 bh = qemu_bh_new(raw_aio_em_cb, acb);
616 qemu_bh_schedule(bh);
617 return &acb->common;
618 }
619#endif
620
ce1a14dc
PB
621 acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
622 if (!acb)
623 return NULL;
624 if (aio_read(&acb->aiocb) < 0) {
625 qemu_aio_release(acb);
626 return NULL;
5fafdf24 627 }
ce1a14dc 628 return &acb->common;
83f64091
FB
629}
630
ce1a14dc
PB
631static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
632 int64_t sector_num, const uint8_t *buf, int nb_sectors,
633 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 634{
ce1a14dc 635 RawAIOCB *acb;
83f64091 636
bed5cc52
FB
637 /*
638 * If O_DIRECT is used and the buffer is not aligned fall back
639 * to synchronous IO.
640 */
2f726488 641#if defined(O_DIRECT) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
bed5cc52
FB
642 BDRVRawState *s = bs->opaque;
643
644 if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
645 QEMUBH *bh;
646 acb = qemu_aio_get(bs, cb, opaque);
647 acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
648 bh = qemu_bh_new(raw_aio_em_cb, acb);
649 qemu_bh_schedule(bh);
650 return &acb->common;
651 }
652#endif
653
ce1a14dc
PB
654 acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
655 if (!acb)
656 return NULL;
657 if (aio_write(&acb->aiocb) < 0) {
658 qemu_aio_release(acb);
659 return NULL;
5fafdf24 660 }
ce1a14dc 661 return &acb->common;
83f64091
FB
662}
663
ce1a14dc 664static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
83f64091 665{
83f64091 666 int ret;
ce1a14dc
PB
667 RawAIOCB *acb = (RawAIOCB *)blockacb;
668 RawAIOCB **pacb;
83f64091 669
ce1a14dc 670 ret = aio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
83f64091
FB
671 if (ret == AIO_NOTCANCELED) {
672 /* fail safe: if the aio could not be canceled, we wait for
673 it */
ce1a14dc 674 while (aio_error(&acb->aiocb) == EINPROGRESS);
83f64091
FB
675 }
676
677 /* remove the callback from the queue */
678 pacb = &first_aio;
679 for(;;) {
680 if (*pacb == NULL) {
681 break;
682 } else if (*pacb == acb) {
ce1a14dc
PB
683 *pacb = acb->next;
684 qemu_aio_release(acb);
83f64091
FB
685 break;
686 }
ce1a14dc 687 pacb = &acb->next;
83f64091
FB
688 }
689}
690
83f64091
FB
691static void raw_close(BlockDriverState *bs)
692{
693 BDRVRawState *s = bs->opaque;
19cb3738
FB
694 if (s->fd >= 0) {
695 close(s->fd);
696 s->fd = -1;
bed5cc52
FB
697#if defined(O_DIRECT) && !defined(QEMU_IMG)
698 if (s->aligned_buf != NULL)
699 qemu_free(s->aligned_buf);
700#endif
19cb3738 701 }
83f64091
FB
702}
703
704static int raw_truncate(BlockDriverState *bs, int64_t offset)
705{
706 BDRVRawState *s = bs->opaque;
19cb3738
FB
707 if (s->type != FTYPE_FILE)
708 return -ENOTSUP;
83f64091
FB
709 if (ftruncate(s->fd, offset) < 0)
710 return -errno;
711 return 0;
712}
713
714static int64_t raw_getlength(BlockDriverState *bs)
715{
716 BDRVRawState *s = bs->opaque;
717 int fd = s->fd;
718 int64_t size;
719#ifdef _BSD
720 struct stat sb;
721#endif
722#ifdef __sun__
723 struct dk_minfo minfo;
724 int rv;
725#endif
19cb3738
FB
726 int ret;
727
728 ret = fd_open(bs);
729 if (ret < 0)
730 return ret;
83f64091
FB
731
732#ifdef _BSD
733 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
734#ifdef DIOCGMEDIASIZE
735 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
736#endif
737#ifdef CONFIG_COCOA
738 size = LONG_LONG_MAX;
739#else
740 size = lseek(fd, 0LL, SEEK_END);
741#endif
742 } else
743#endif
744#ifdef __sun__
745 /*
746 * use the DKIOCGMEDIAINFO ioctl to read the size.
747 */
748 rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
749 if ( rv != -1 ) {
750 size = minfo.dki_lbsize * minfo.dki_capacity;
751 } else /* there are reports that lseek on some devices
752 fails, but irc discussion said that contingency
753 on contingency was overkill */
754#endif
755 {
756 size = lseek(fd, 0, SEEK_END);
757 }
83f64091
FB
758 return size;
759}
760
761static int raw_create(const char *filename, int64_t total_size,
762 const char *backing_file, int flags)
763{
764 int fd;
765
766 if (flags || backing_file)
767 return -ENOTSUP;
768
5fafdf24 769 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
83f64091
FB
770 0644);
771 if (fd < 0)
772 return -EIO;
773 ftruncate(fd, total_size * 512);
774 close(fd);
775 return 0;
776}
777
778static void raw_flush(BlockDriverState *bs)
779{
780 BDRVRawState *s = bs->opaque;
781 fsync(s->fd);
782}
783
784BlockDriver bdrv_raw = {
785 "raw",
786 sizeof(BDRVRawState),
787 NULL, /* no probe for protocols */
788 raw_open,
789 NULL,
790 NULL,
791 raw_close,
792 raw_create,
793 raw_flush,
3b46e624 794
83f64091
FB
795 .bdrv_aio_read = raw_aio_read,
796 .bdrv_aio_write = raw_aio_write,
797 .bdrv_aio_cancel = raw_aio_cancel,
ce1a14dc 798 .aiocb_size = sizeof(RawAIOCB),
83f64091
FB
799 .protocol_name = "file",
800 .bdrv_pread = raw_pread,
801 .bdrv_pwrite = raw_pwrite,
802 .bdrv_truncate = raw_truncate,
803 .bdrv_getlength = raw_getlength,
804};
805
19cb3738
FB
806/***********************************************/
807/* host device */
808
809#ifdef CONFIG_COCOA
810static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
811static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
812
813kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
814{
5fafdf24 815 kern_return_t kernResult;
19cb3738
FB
816 mach_port_t masterPort;
817 CFMutableDictionaryRef classesToMatch;
818
819 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
820 if ( KERN_SUCCESS != kernResult ) {
821 printf( "IOMasterPort returned %d\n", kernResult );
822 }
3b46e624 823
5fafdf24 824 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
825 if ( classesToMatch == NULL ) {
826 printf( "IOServiceMatching returned a NULL dictionary.\n" );
827 } else {
828 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
829 }
830 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
831 if ( KERN_SUCCESS != kernResult )
832 {
833 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
834 }
3b46e624 835
19cb3738
FB
836 return kernResult;
837}
838
839kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
840{
841 io_object_t nextMedia;
842 kern_return_t kernResult = KERN_FAILURE;
843 *bsdPath = '\0';
844 nextMedia = IOIteratorNext( mediaIterator );
845 if ( nextMedia )
846 {
847 CFTypeRef bsdPathAsCFString;
848 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
849 if ( bsdPathAsCFString ) {
850 size_t devPathLength;
851 strcpy( bsdPath, _PATH_DEV );
852 strcat( bsdPath, "r" );
853 devPathLength = strlen( bsdPath );
854 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
855 kernResult = KERN_SUCCESS;
856 }
857 CFRelease( bsdPathAsCFString );
858 }
859 IOObjectRelease( nextMedia );
860 }
3b46e624 861
19cb3738
FB
862 return kernResult;
863}
864
865#endif
866
867static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
868{
869 BDRVRawState *s = bs->opaque;
870 int fd, open_flags, ret;
871
872#ifdef CONFIG_COCOA
873 if (strstart(filename, "/dev/cdrom", NULL)) {
874 kern_return_t kernResult;
875 io_iterator_t mediaIterator;
876 char bsdPath[ MAXPATHLEN ];
877 int fd;
5fafdf24 878
19cb3738
FB
879 kernResult = FindEjectableCDMedia( &mediaIterator );
880 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 881
19cb3738
FB
882 if ( bsdPath[ 0 ] != '\0' ) {
883 strcat(bsdPath,"s0");
884 /* some CDs don't have a partition 0 */
885 fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
886 if (fd < 0) {
887 bsdPath[strlen(bsdPath)-1] = '1';
888 } else {
889 close(fd);
890 }
891 filename = bsdPath;
892 }
3b46e624 893
19cb3738
FB
894 if ( mediaIterator )
895 IOObjectRelease( mediaIterator );
896 }
897#endif
898 open_flags = O_BINARY;
899 if ((flags & BDRV_O_ACCESS) == O_RDWR) {
900 open_flags |= O_RDWR;
901 } else {
902 open_flags |= O_RDONLY;
903 bs->read_only = 1;
904 }
33f00271
AZ
905#ifdef O_DIRECT
906 if (flags & BDRV_O_DIRECT)
907 open_flags |= O_DIRECT;
908#endif
19cb3738
FB
909
910 s->type = FTYPE_FILE;
911#if defined(__linux__)
912 if (strstart(filename, "/dev/cd", NULL)) {
913 /* open will not fail even if no CD is inserted */
914 open_flags |= O_NONBLOCK;
915 s->type = FTYPE_CD;
916 } else if (strstart(filename, "/dev/fd", NULL)) {
917 s->type = FTYPE_FD;
6dd2db52 918 s->fd_open_flags = open_flags;
19cb3738
FB
919 /* open will not fail even if no floppy is inserted */
920 open_flags |= O_NONBLOCK;
985a03b0
TS
921 } else if (strstart(filename, "/dev/sg", NULL)) {
922 bs->sg = 1;
19cb3738
FB
923 }
924#endif
925 fd = open(filename, open_flags, 0644);
926 if (fd < 0) {
927 ret = -errno;
928 if (ret == -EROFS)
929 ret = -EACCES;
930 return ret;
931 }
932 s->fd = fd;
933#if defined(__linux__)
934 /* close fd so that we can reopen it as needed */
935 if (s->type == FTYPE_FD) {
936 close(s->fd);
937 s->fd = -1;
938 s->fd_media_changed = 1;
939 }
940#endif
941 return 0;
942}
943
2f726488 944#if defined(__linux__) && !defined(QEMU_IMG) && !defined(QEMU_NBD)
19cb3738
FB
945
946/* Note: we do not have a reliable method to detect if the floppy is
947 present. The current method is to try to open the floppy at every
948 I/O and to keep it opened during a few hundreds of ms. */
949static int fd_open(BlockDriverState *bs)
950{
951 BDRVRawState *s = bs->opaque;
952 int last_media_present;
953
954 if (s->type != FTYPE_FD)
955 return 0;
956 last_media_present = (s->fd >= 0);
5fafdf24 957 if (s->fd >= 0 &&
19cb3738
FB
958 (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
959 close(s->fd);
960 s->fd = -1;
961#ifdef DEBUG_FLOPPY
962 printf("Floppy closed\n");
963#endif
964 }
965 if (s->fd < 0) {
5fafdf24 966 if (s->fd_got_error &&
19cb3738
FB
967 (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
968#ifdef DEBUG_FLOPPY
969 printf("No floppy (open delayed)\n");
970#endif
971 return -EIO;
972 }
6dd2db52 973 s->fd = open(bs->filename, s->fd_open_flags);
19cb3738
FB
974 if (s->fd < 0) {
975 s->fd_error_time = qemu_get_clock(rt_clock);
976 s->fd_got_error = 1;
977 if (last_media_present)
978 s->fd_media_changed = 1;
979#ifdef DEBUG_FLOPPY
980 printf("No floppy\n");
981#endif
982 return -EIO;
983 }
984#ifdef DEBUG_FLOPPY
985 printf("Floppy opened\n");
986#endif
987 }
988 if (!last_media_present)
989 s->fd_media_changed = 1;
990 s->fd_open_time = qemu_get_clock(rt_clock);
991 s->fd_got_error = 0;
992 return 0;
993}
994#else
995static int fd_open(BlockDriverState *bs)
996{
997 return 0;
998}
999#endif
1000
1001#if defined(__linux__)
1002
1003static int raw_is_inserted(BlockDriverState *bs)
1004{
1005 BDRVRawState *s = bs->opaque;
1006 int ret;
1007
1008 switch(s->type) {
1009 case FTYPE_CD:
1010 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1011 if (ret == CDS_DISC_OK)
1012 return 1;
1013 else
1014 return 0;
1015 break;
1016 case FTYPE_FD:
1017 ret = fd_open(bs);
1018 return (ret >= 0);
1019 default:
1020 return 1;
1021 }
1022}
1023
1024/* currently only used by fdc.c, but a CD version would be good too */
1025static int raw_media_changed(BlockDriverState *bs)
1026{
1027 BDRVRawState *s = bs->opaque;
1028
1029 switch(s->type) {
1030 case FTYPE_FD:
1031 {
1032 int ret;
1033 /* XXX: we do not have a true media changed indication. It
1034 does not work if the floppy is changed without trying
1035 to read it */
1036 fd_open(bs);
1037 ret = s->fd_media_changed;
1038 s->fd_media_changed = 0;
1039#ifdef DEBUG_FLOPPY
1040 printf("Floppy changed=%d\n", ret);
1041#endif
1042 return ret;
1043 }
1044 default:
1045 return -ENOTSUP;
1046 }
1047}
1048
1049static int raw_eject(BlockDriverState *bs, int eject_flag)
1050{
1051 BDRVRawState *s = bs->opaque;
1052
1053 switch(s->type) {
1054 case FTYPE_CD:
1055 if (eject_flag) {
1056 if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
1057 perror("CDROMEJECT");
1058 } else {
1059 if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
1060 perror("CDROMEJECT");
1061 }
1062 break;
1063 case FTYPE_FD:
1064 {
1065 int fd;
1066 if (s->fd >= 0) {
1067 close(s->fd);
1068 s->fd = -1;
1069 }
6dd2db52 1070 fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
19cb3738
FB
1071 if (fd >= 0) {
1072 if (ioctl(fd, FDEJECT, 0) < 0)
1073 perror("FDEJECT");
1074 close(fd);
1075 }
1076 }
1077 break;
1078 default:
1079 return -ENOTSUP;
1080 }
1081 return 0;
1082}
1083
1084static int raw_set_locked(BlockDriverState *bs, int locked)
1085{
1086 BDRVRawState *s = bs->opaque;
1087
1088 switch(s->type) {
1089 case FTYPE_CD:
1090 if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
1091 /* Note: an error can happen if the distribution automatically
1092 mounts the CD-ROM */
1093 // perror("CDROM_LOCKDOOR");
1094 }
1095 break;
1096 default:
1097 return -ENOTSUP;
1098 }
1099 return 0;
1100}
1101
985a03b0
TS
1102static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1103{
1104 BDRVRawState *s = bs->opaque;
1105
1106 return ioctl(s->fd, req, buf);
1107}
19cb3738
FB
1108#else
1109
1110static int raw_is_inserted(BlockDriverState *bs)
1111{
1112 return 1;
1113}
1114
1115static int raw_media_changed(BlockDriverState *bs)
1116{
1117 return -ENOTSUP;
1118}
1119
1120static int raw_eject(BlockDriverState *bs, int eject_flag)
1121{
1122 return -ENOTSUP;
1123}
1124
1125static int raw_set_locked(BlockDriverState *bs, int locked)
1126{
1127 return -ENOTSUP;
1128}
1129
985a03b0
TS
1130static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1131{
1132 return -ENOTSUP;
1133}
19cb3738
FB
1134#endif /* !linux */
1135
1136BlockDriver bdrv_host_device = {
1137 "host_device",
1138 sizeof(BDRVRawState),
1139 NULL, /* no probe for protocols */
1140 hdev_open,
1141 NULL,
1142 NULL,
1143 raw_close,
1144 NULL,
1145 raw_flush,
3b46e624 1146
19cb3738
FB
1147 .bdrv_aio_read = raw_aio_read,
1148 .bdrv_aio_write = raw_aio_write,
1149 .bdrv_aio_cancel = raw_aio_cancel,
1150 .aiocb_size = sizeof(RawAIOCB),
1151 .bdrv_pread = raw_pread,
1152 .bdrv_pwrite = raw_pwrite,
1153 .bdrv_getlength = raw_getlength,
1154
1155 /* removable device support */
1156 .bdrv_is_inserted = raw_is_inserted,
1157 .bdrv_media_changed = raw_media_changed,
1158 .bdrv_eject = raw_eject,
1159 .bdrv_set_locked = raw_set_locked,
985a03b0
TS
1160 /* generic scsi device */
1161 .bdrv_ioctl = raw_ioctl,
19cb3738 1162};