]> git.proxmox.com Git - mirror_qemu.git/blame - block/raw-posix.c
tcg/ppc: Fix !softmmu case
[mirror_qemu.git] / block / raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
87ecb68b 25#include "qemu-timer.h"
baf35cb9 26#include "qemu-char.h"
0bf9e31a 27#include "qemu-log.h"
83f64091 28#include "block_int.h"
5efa9d5a 29#include "module.h"
de81a169
PB
30#include "trace.h"
31#include "thread-pool.h"
32#include "iov.h"
9f8540ec 33#include "raw-aio.h"
83f64091 34
83affaa6 35#if defined(__APPLE__) && (__MACH__)
83f64091
FB
36#include <paths.h>
37#include <sys/param.h>
38#include <IOKit/IOKitLib.h>
39#include <IOKit/IOBSD.h>
40#include <IOKit/storage/IOMediaBSDClient.h>
41#include <IOKit/storage/IOMedia.h>
42#include <IOKit/storage/IOCDMedia.h>
43//#include <IOKit/storage/IOCDTypes.h>
44#include <CoreFoundation/CoreFoundation.h>
45#endif
46
47#ifdef __sun__
2e9671da 48#define _POSIX_PTHREAD_SEMANTICS 1
83f64091
FB
49#include <sys/dkio.h>
50#endif
19cb3738 51#ifdef __linux__
343f8568
JS
52#include <sys/types.h>
53#include <sys/stat.h>
19cb3738 54#include <sys/ioctl.h>
05acda4d 55#include <sys/param.h>
19cb3738
FB
56#include <linux/cdrom.h>
57#include <linux/fd.h>
5500316d
PB
58#include <linux/fs.h>
59#endif
60#ifdef CONFIG_FIEMAP
61#include <linux/fiemap.h>
19cb3738 62#endif
a167ba50 63#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1cb6c3fd 64#include <sys/disk.h>
9f23011a 65#include <sys/cdio.h>
1cb6c3fd 66#endif
83f64091 67
128ab2ff
BS
68#ifdef __OpenBSD__
69#include <sys/ioctl.h>
70#include <sys/disklabel.h>
71#include <sys/dkio.h>
72#endif
73
d1f6fd8d
CE
74#ifdef __NetBSD__
75#include <sys/ioctl.h>
76#include <sys/disklabel.h>
77#include <sys/dkio.h>
78#include <sys/disk.h>
79#endif
80
c5e97233
BS
81#ifdef __DragonFly__
82#include <sys/ioctl.h>
83#include <sys/diskslice.h>
84#endif
85
dce512de
CH
86#ifdef CONFIG_XFS
87#include <xfs/xfs.h>
88#endif
89
19cb3738 90//#define DEBUG_FLOPPY
83f64091 91
faf07963 92//#define DEBUG_BLOCK
03ff3ca3 93#if defined(DEBUG_BLOCK)
001faf32
BS
94#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
95 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
8c05dbf9 96#else
001faf32 97#define DEBUG_BLOCK_PRINT(formatCstr, ...)
8c05dbf9
TS
98#endif
99
f6465578
AL
100/* OS X does not have O_DSYNC */
101#ifndef O_DSYNC
1c27a8b3 102#ifdef O_SYNC
7ab064d2 103#define O_DSYNC O_SYNC
1c27a8b3
JA
104#elif defined(O_FSYNC)
105#define O_DSYNC O_FSYNC
106#endif
f6465578
AL
107#endif
108
9f7965c7
AL
109/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
110#ifndef O_DIRECT
111#define O_DIRECT O_DSYNC
112#endif
113
19cb3738
FB
114#define FTYPE_FILE 0
115#define FTYPE_CD 1
116#define FTYPE_FD 2
83f64091 117
c57c846a 118/* if the FD is not accessed during that time (in ns), we try to
19cb3738 119 reopen it to see if the disk has been changed */
c57c846a 120#define FD_OPEN_TIMEOUT (1000000000)
83f64091 121
581b9e29
CH
122#define MAX_BLOCKSIZE 4096
123
19cb3738
FB
124typedef struct BDRVRawState {
125 int fd;
126 int type;
0e1d8f4c 127 int open_flags;
19cb3738
FB
128#if defined(__linux__)
129 /* linux floppy specific */
19cb3738
FB
130 int64_t fd_open_time;
131 int64_t fd_error_time;
132 int fd_got_error;
133 int fd_media_changed;
83f64091 134#endif
e44bd6fc 135#ifdef CONFIG_LINUX_AIO
5c6c3a6c 136 int use_aio;
1e5b9d2f 137 void *aio_ctx;
e44bd6fc 138#endif
dce512de
CH
139#ifdef CONFIG_XFS
140 bool is_xfs : 1;
141#endif
19cb3738
FB
142} BDRVRawState;
143
eeb6b45d
JC
144typedef struct BDRVRawReopenState {
145 int fd;
146 int open_flags;
147#ifdef CONFIG_LINUX_AIO
148 int use_aio;
149#endif
150} BDRVRawReopenState;
151
19cb3738 152static int fd_open(BlockDriverState *bs);
22afa7b5 153static int64_t raw_getlength(BlockDriverState *bs);
83f64091 154
de81a169
PB
155typedef struct RawPosixAIOData {
156 BlockDriverState *bs;
157 int aio_fildes;
158 union {
159 struct iovec *aio_iov;
160 void *aio_ioctl_buf;
161 };
162 int aio_niov;
163 size_t aio_nbytes;
164#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
165 off_t aio_offset;
166 int aio_type;
167} RawPosixAIOData;
168
a167ba50 169#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8 170static int cdrom_reopen(BlockDriverState *bs);
9f23011a
BS
171#endif
172
1de1ae0a
CE
173#if defined(__NetBSD__)
174static int raw_normalize_devicepath(const char **filename)
175{
176 static char namebuf[PATH_MAX];
177 const char *dp, *fname;
178 struct stat sb;
179
180 fname = *filename;
181 dp = strrchr(fname, '/');
182 if (lstat(fname, &sb) < 0) {
183 fprintf(stderr, "%s: stat failed: %s\n",
184 fname, strerror(errno));
185 return -errno;
186 }
187
188 if (!S_ISBLK(sb.st_mode)) {
189 return 0;
190 }
191
192 if (dp == NULL) {
193 snprintf(namebuf, PATH_MAX, "r%s", fname);
194 } else {
195 snprintf(namebuf, PATH_MAX, "%.*s/r%s",
196 (int)(dp - fname), fname, dp + 1);
197 }
198 fprintf(stderr, "%s is a block device", fname);
199 *filename = namebuf;
200 fprintf(stderr, ", using %s\n", *filename);
201
202 return 0;
203}
204#else
205static int raw_normalize_devicepath(const char **filename)
206{
207 return 0;
208}
209#endif
210
6a8dc042
JC
211static void raw_parse_flags(int bdrv_flags, int *open_flags)
212{
213 assert(open_flags != NULL);
214
215 *open_flags |= O_BINARY;
216 *open_flags &= ~O_ACCMODE;
217 if (bdrv_flags & BDRV_O_RDWR) {
218 *open_flags |= O_RDWR;
219 } else {
220 *open_flags |= O_RDONLY;
221 }
222
223 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
224 * and O_DIRECT for no caching. */
225 if ((bdrv_flags & BDRV_O_NOCACHE)) {
226 *open_flags |= O_DIRECT;
227 }
6a8dc042
JC
228}
229
fc32a72d
JC
230#ifdef CONFIG_LINUX_AIO
231static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
232{
233 int ret = -1;
234 assert(aio_ctx != NULL);
235 assert(use_aio != NULL);
236 /*
237 * Currently Linux do AIO only for files opened with O_DIRECT
238 * specified so check NOCACHE flag too
239 */
240 if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
241 (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
242
243 /* if non-NULL, laio_init() has already been run */
244 if (*aio_ctx == NULL) {
245 *aio_ctx = laio_init();
246 if (!*aio_ctx) {
247 goto error;
248 }
249 }
250 *use_aio = 1;
251 } else {
252 *use_aio = 0;
253 }
254
255 ret = 0;
256
257error:
258 return ret;
259}
260#endif
261
90babde0 262static int raw_open_common(BlockDriverState *bs, const char *filename,
19a3da7f 263 int bdrv_flags, int open_flags)
83f64091
FB
264{
265 BDRVRawState *s = bs->opaque;
0e1d8f4c 266 int fd, ret;
83f64091 267
1de1ae0a
CE
268 ret = raw_normalize_devicepath(&filename);
269 if (ret != 0) {
270 return ret;
271 }
272
6a8dc042
JC
273 s->open_flags = open_flags;
274 raw_parse_flags(bdrv_flags, &s->open_flags);
83f64091 275
90babde0 276 s->fd = -1;
40ff6d7e 277 fd = qemu_open(filename, s->open_flags, 0644);
19cb3738
FB
278 if (fd < 0) {
279 ret = -errno;
280 if (ret == -EROFS)
281 ret = -EACCES;
282 return ret;
283 }
83f64091 284 s->fd = fd;
9ef91a67 285
5c6c3a6c 286#ifdef CONFIG_LINUX_AIO
fc32a72d 287 if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
47e6b251
PB
288 qemu_close(fd);
289 return -errno;
9ef91a67 290 }
fc32a72d 291#endif
9ef91a67 292
dce512de
CH
293#ifdef CONFIG_XFS
294 if (platform_test_xfs_fd(s->fd)) {
295 s->is_xfs = 1;
296 }
297#endif
298
83f64091
FB
299 return 0;
300}
301
90babde0
CH
302static int raw_open(BlockDriverState *bs, const char *filename, int flags)
303{
304 BDRVRawState *s = bs->opaque;
305
306 s->type = FTYPE_FILE;
9a2d77ad 307 return raw_open_common(bs, filename, flags, 0);
90babde0
CH
308}
309
eeb6b45d
JC
310static int raw_reopen_prepare(BDRVReopenState *state,
311 BlockReopenQueue *queue, Error **errp)
312{
313 BDRVRawState *s;
314 BDRVRawReopenState *raw_s;
315 int ret = 0;
316
317 assert(state != NULL);
318 assert(state->bs != NULL);
319
320 s = state->bs->opaque;
321
322 state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
323 raw_s = state->opaque;
324
325#ifdef CONFIG_LINUX_AIO
326 raw_s->use_aio = s->use_aio;
327
328 /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
329 * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
330 * won't override aio_ctx if aio_ctx is non-NULL */
331 if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
332 return -1;
333 }
334#endif
335
336 raw_parse_flags(state->flags, &raw_s->open_flags);
337
338 raw_s->fd = -1;
339
340 int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
341#ifdef O_NOATIME
342 fcntl_flags |= O_NOATIME;
343#endif
344
345 if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
346 /* dup the original fd */
347 /* TODO: use qemu fcntl wrapper */
348#ifdef F_DUPFD_CLOEXEC
349 raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
350#else
351 raw_s->fd = dup(s->fd);
352 if (raw_s->fd != -1) {
353 qemu_set_cloexec(raw_s->fd);
354 }
355#endif
356 if (raw_s->fd >= 0) {
357 ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
358 if (ret) {
359 qemu_close(raw_s->fd);
360 raw_s->fd = -1;
361 }
362 }
363 }
364
365 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
366 if (raw_s->fd == -1) {
367 assert(!(raw_s->open_flags & O_CREAT));
368 raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
369 if (raw_s->fd == -1) {
370 ret = -1;
371 }
372 }
373 return ret;
374}
375
376
377static void raw_reopen_commit(BDRVReopenState *state)
378{
379 BDRVRawReopenState *raw_s = state->opaque;
380 BDRVRawState *s = state->bs->opaque;
381
382 s->open_flags = raw_s->open_flags;
383
384 qemu_close(s->fd);
385 s->fd = raw_s->fd;
386#ifdef CONFIG_LINUX_AIO
387 s->use_aio = raw_s->use_aio;
388#endif
389
390 g_free(state->opaque);
391 state->opaque = NULL;
392}
393
394
395static void raw_reopen_abort(BDRVReopenState *state)
396{
397 BDRVRawReopenState *raw_s = state->opaque;
398
399 /* nothing to do if NULL, we didn't get far enough */
400 if (raw_s == NULL) {
401 return;
402 }
403
404 if (raw_s->fd >= 0) {
405 qemu_close(raw_s->fd);
406 raw_s->fd = -1;
407 }
408 g_free(state->opaque);
409 state->opaque = NULL;
410}
411
412
83f64091
FB
413/* XXX: use host sector size if necessary with:
414#ifdef DIOCGSECTORSIZE
415 {
416 unsigned int sectorsize = 512;
417 if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
418 sectorsize > bufsize)
419 bufsize = sectorsize;
420 }
421#endif
422#ifdef CONFIG_COCOA
2ee9fb48 423 uint32_t blockSize = 512;
83f64091
FB
424 if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
425 bufsize = blockSize;
426 }
427#endif
428*/
429
9ef91a67
CH
430/*
431 * Check if all memory in this vector is sector aligned.
432 */
581b9e29 433static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
a76bab49 434{
9ef91a67 435 int i;
83f64091 436
9ef91a67 437 for (i = 0; i < qiov->niov; i++) {
581b9e29 438 if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
9ef91a67 439 return 0;
c16b5a2c 440 }
c16b5a2c 441 }
c16b5a2c 442
9ef91a67 443 return 1;
c16b5a2c
CH
444}
445
de81a169
PB
446static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
447{
448 int ret;
449
450 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
451 if (ret == -1) {
452 return -errno;
453 }
454
455 /*
456 * This looks weird, but the aio code only considers a request
457 * successful if it has written the full number of bytes.
458 *
459 * Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
460 * so in fact we return the ioctl command here to make posix_aio_read()
461 * happy..
462 */
463 return aiocb->aio_nbytes;
464}
465
466static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
467{
468 int ret;
469
470 ret = qemu_fdatasync(aiocb->aio_fildes);
471 if (ret == -1) {
472 return -errno;
473 }
474 return 0;
475}
476
477#ifdef CONFIG_PREADV
478
479static bool preadv_present = true;
480
481static ssize_t
482qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
483{
484 return preadv(fd, iov, nr_iov, offset);
485}
486
487static ssize_t
488qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
489{
490 return pwritev(fd, iov, nr_iov, offset);
491}
492
493#else
494
495static bool preadv_present = false;
496
497static ssize_t
498qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
499{
500 return -ENOSYS;
501}
502
503static ssize_t
504qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
505{
506 return -ENOSYS;
507}
508
509#endif
510
511static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
512{
513 ssize_t len;
514
515 do {
516 if (aiocb->aio_type & QEMU_AIO_WRITE)
517 len = qemu_pwritev(aiocb->aio_fildes,
518 aiocb->aio_iov,
519 aiocb->aio_niov,
520 aiocb->aio_offset);
521 else
522 len = qemu_preadv(aiocb->aio_fildes,
523 aiocb->aio_iov,
524 aiocb->aio_niov,
525 aiocb->aio_offset);
526 } while (len == -1 && errno == EINTR);
527
528 if (len == -1) {
529 return -errno;
530 }
531 return len;
532}
533
534/*
535 * Read/writes the data to/from a given linear buffer.
536 *
537 * Returns the number of bytes handles or -errno in case of an error. Short
538 * reads are only returned if the end of the file is reached.
539 */
540static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
541{
542 ssize_t offset = 0;
543 ssize_t len;
544
545 while (offset < aiocb->aio_nbytes) {
546 if (aiocb->aio_type & QEMU_AIO_WRITE) {
547 len = pwrite(aiocb->aio_fildes,
548 (const char *)buf + offset,
549 aiocb->aio_nbytes - offset,
550 aiocb->aio_offset + offset);
551 } else {
552 len = pread(aiocb->aio_fildes,
553 buf + offset,
554 aiocb->aio_nbytes - offset,
555 aiocb->aio_offset + offset);
556 }
557 if (len == -1 && errno == EINTR) {
558 continue;
559 } else if (len == -1) {
560 offset = -errno;
561 break;
562 } else if (len == 0) {
563 break;
564 }
565 offset += len;
566 }
567
568 return offset;
569}
570
571static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
572{
573 ssize_t nbytes;
574 char *buf;
575
576 if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
577 /*
578 * If there is just a single buffer, and it is properly aligned
579 * we can just use plain pread/pwrite without any problems.
580 */
581 if (aiocb->aio_niov == 1) {
582 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
583 }
584 /*
585 * We have more than one iovec, and all are properly aligned.
586 *
587 * Try preadv/pwritev first and fall back to linearizing the
588 * buffer if it's not supported.
589 */
590 if (preadv_present) {
591 nbytes = handle_aiocb_rw_vector(aiocb);
592 if (nbytes == aiocb->aio_nbytes ||
593 (nbytes < 0 && nbytes != -ENOSYS)) {
594 return nbytes;
595 }
596 preadv_present = false;
597 }
598
599 /*
600 * XXX(hch): short read/write. no easy way to handle the reminder
601 * using these interfaces. For now retry using plain
602 * pread/pwrite?
603 */
604 }
605
606 /*
607 * Ok, we have to do it the hard way, copy all segments into
608 * a single aligned buffer.
609 */
610 buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
611 if (aiocb->aio_type & QEMU_AIO_WRITE) {
612 char *p = buf;
613 int i;
614
615 for (i = 0; i < aiocb->aio_niov; ++i) {
616 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
617 p += aiocb->aio_iov[i].iov_len;
618 }
619 }
620
621 nbytes = handle_aiocb_rw_linear(aiocb, buf);
622 if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
623 char *p = buf;
624 size_t count = aiocb->aio_nbytes, copy;
625 int i;
626
627 for (i = 0; i < aiocb->aio_niov && count; ++i) {
628 copy = count;
629 if (copy > aiocb->aio_iov[i].iov_len) {
630 copy = aiocb->aio_iov[i].iov_len;
631 }
632 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
633 p += copy;
634 count -= copy;
635 }
636 }
637 qemu_vfree(buf);
638
639 return nbytes;
640}
641
642static int aio_worker(void *arg)
643{
644 RawPosixAIOData *aiocb = arg;
645 ssize_t ret = 0;
646
647 switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
648 case QEMU_AIO_READ:
649 ret = handle_aiocb_rw(aiocb);
650 if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
651 iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
652 0, aiocb->aio_nbytes - ret);
653
654 ret = aiocb->aio_nbytes;
655 }
656 if (ret == aiocb->aio_nbytes) {
657 ret = 0;
658 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
659 ret = -EINVAL;
660 }
661 break;
662 case QEMU_AIO_WRITE:
663 ret = handle_aiocb_rw(aiocb);
664 if (ret == aiocb->aio_nbytes) {
665 ret = 0;
666 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
667 ret = -EINVAL;
668 }
669 break;
670 case QEMU_AIO_FLUSH:
671 ret = handle_aiocb_flush(aiocb);
672 break;
673 case QEMU_AIO_IOCTL:
674 ret = handle_aiocb_ioctl(aiocb);
675 break;
676 default:
677 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
678 ret = -EINVAL;
679 break;
680 }
681
682 g_slice_free(RawPosixAIOData, aiocb);
683 return ret;
684}
685
686static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
687 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
688 BlockDriverCompletionFunc *cb, void *opaque, int type)
689{
690 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
691
692 acb->bs = bs;
693 acb->aio_type = type;
694 acb->aio_fildes = fd;
695
696 if (qiov) {
697 acb->aio_iov = qiov->iov;
698 acb->aio_niov = qiov->niov;
699 }
700 acb->aio_nbytes = nb_sectors * 512;
701 acb->aio_offset = sector_num * 512;
702
703 trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
704 return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
705}
706
707static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
708 unsigned long int req, void *buf,
709 BlockDriverCompletionFunc *cb, void *opaque)
710{
711 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
712
713 acb->bs = bs;
714 acb->aio_type = QEMU_AIO_IOCTL;
715 acb->aio_fildes = fd;
716 acb->aio_offset = 0;
717 acb->aio_ioctl_buf = buf;
718 acb->aio_ioctl_cmd = req;
719
720 return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
721}
722
9ef91a67
CH
723static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
724 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
725 BlockDriverCompletionFunc *cb, void *opaque, int type)
83f64091 726{
ce1a14dc 727 BDRVRawState *s = bs->opaque;
ce1a14dc 728
19cb3738
FB
729 if (fd_open(bs) < 0)
730 return NULL;
731
f141eafe
AL
732 /*
733 * If O_DIRECT is used the buffer needs to be aligned on a sector
c1ee7d56 734 * boundary. Check if this is the case or tell the low-level
9ef91a67 735 * driver that it needs to copy the buffer.
f141eafe 736 */
9acc5a06 737 if ((bs->open_flags & BDRV_O_NOCACHE)) {
581b9e29 738 if (!qiov_is_aligned(bs, qiov)) {
5c6c3a6c 739 type |= QEMU_AIO_MISALIGNED;
e44bd6fc 740#ifdef CONFIG_LINUX_AIO
5c6c3a6c
CH
741 } else if (s->use_aio) {
742 return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
e44bd6fc
SW
743 nb_sectors, cb, opaque, type);
744#endif
5c6c3a6c 745 }
9ef91a67 746 }
f141eafe 747
1e5b9d2f 748 return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
9ef91a67 749 cb, opaque, type);
83f64091
FB
750}
751
f141eafe
AL
752static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
753 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 754 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 755{
9ef91a67
CH
756 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
757 cb, opaque, QEMU_AIO_READ);
83f64091
FB
758}
759
f141eafe
AL
760static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
761 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 762 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 763{
9ef91a67
CH
764 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
765 cb, opaque, QEMU_AIO_WRITE);
83f64091 766}
53538725 767
b2e12bc6
CH
768static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
769 BlockDriverCompletionFunc *cb, void *opaque)
770{
771 BDRVRawState *s = bs->opaque;
772
773 if (fd_open(bs) < 0)
774 return NULL;
775
1e5b9d2f 776 return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
b2e12bc6
CH
777}
778
83f64091
FB
779static void raw_close(BlockDriverState *bs)
780{
781 BDRVRawState *s = bs->opaque;
19cb3738 782 if (s->fd >= 0) {
2e1e79da 783 qemu_close(s->fd);
19cb3738
FB
784 s->fd = -1;
785 }
83f64091
FB
786}
787
788static int raw_truncate(BlockDriverState *bs, int64_t offset)
789{
790 BDRVRawState *s = bs->opaque;
55b949c8
CH
791 struct stat st;
792
793 if (fstat(s->fd, &st)) {
83f64091 794 return -errno;
55b949c8
CH
795 }
796
797 if (S_ISREG(st.st_mode)) {
798 if (ftruncate(s->fd, offset) < 0) {
799 return -errno;
800 }
801 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
802 if (offset > raw_getlength(bs)) {
803 return -EINVAL;
804 }
805 } else {
806 return -ENOTSUP;
807 }
808
83f64091
FB
809 return 0;
810}
811
128ab2ff
BS
812#ifdef __OpenBSD__
813static int64_t raw_getlength(BlockDriverState *bs)
814{
815 BDRVRawState *s = bs->opaque;
816 int fd = s->fd;
817 struct stat st;
818
819 if (fstat(fd, &st))
820 return -1;
821 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
822 struct disklabel dl;
823
824 if (ioctl(fd, DIOCGDINFO, &dl))
825 return -1;
826 return (uint64_t)dl.d_secsize *
827 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
828 } else
829 return st.st_size;
830}
d1f6fd8d
CE
831#elif defined(__NetBSD__)
832static int64_t raw_getlength(BlockDriverState *bs)
833{
834 BDRVRawState *s = bs->opaque;
835 int fd = s->fd;
836 struct stat st;
837
838 if (fstat(fd, &st))
839 return -1;
840 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
841 struct dkwedge_info dkw;
842
843 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
844 return dkw.dkw_size * 512;
845 } else {
846 struct disklabel dl;
847
848 if (ioctl(fd, DIOCGDINFO, &dl))
849 return -1;
850 return (uint64_t)dl.d_secsize *
851 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
852 }
853 } else
854 return st.st_size;
855}
50779cc2
CH
856#elif defined(__sun__)
857static int64_t raw_getlength(BlockDriverState *bs)
858{
859 BDRVRawState *s = bs->opaque;
860 struct dk_minfo minfo;
861 int ret;
862
863 ret = fd_open(bs);
864 if (ret < 0) {
865 return ret;
866 }
867
868 /*
869 * Use the DKIOCGMEDIAINFO ioctl to read the size.
870 */
871 ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
872 if (ret != -1) {
873 return minfo.dki_lbsize * minfo.dki_capacity;
874 }
875
876 /*
877 * There are reports that lseek on some devices fails, but
878 * irc discussion said that contingency on contingency was overkill.
879 */
880 return lseek(s->fd, 0, SEEK_END);
881}
882#elif defined(CONFIG_BSD)
883static int64_t raw_getlength(BlockDriverState *bs)
83f64091
FB
884{
885 BDRVRawState *s = bs->opaque;
886 int fd = s->fd;
887 int64_t size;
83f64091 888 struct stat sb;
a167ba50 889#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a 890 int reopened = 0;
83f64091 891#endif
19cb3738
FB
892 int ret;
893
894 ret = fd_open(bs);
895 if (ret < 0)
896 return ret;
83f64091 897
a167ba50 898#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
899again:
900#endif
83f64091
FB
901 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
902#ifdef DIOCGMEDIASIZE
903 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
c5e97233
BS
904#elif defined(DIOCGPART)
905 {
906 struct partinfo pi;
907 if (ioctl(fd, DIOCGPART, &pi) == 0)
908 size = pi.media_size;
909 else
910 size = 0;
911 }
912 if (size == 0)
83f64091 913#endif
83affaa6 914#if defined(__APPLE__) && defined(__MACH__)
83f64091
FB
915 size = LONG_LONG_MAX;
916#else
917 size = lseek(fd, 0LL, SEEK_END);
9f23011a 918#endif
a167ba50 919#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
920 switch(s->type) {
921 case FTYPE_CD:
922 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
923 if (size == 2048LL * (unsigned)-1)
924 size = 0;
925 /* XXX no disc? maybe we need to reopen... */
f3a5d3f8 926 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
9f23011a
BS
927 reopened = 1;
928 goto again;
929 }
930 }
83f64091 931#endif
50779cc2 932 } else {
83f64091
FB
933 size = lseek(fd, 0, SEEK_END);
934 }
83f64091
FB
935 return size;
936}
50779cc2
CH
937#else
938static int64_t raw_getlength(BlockDriverState *bs)
939{
940 BDRVRawState *s = bs->opaque;
941 int ret;
942
943 ret = fd_open(bs);
944 if (ret < 0) {
945 return ret;
946 }
947
948 return lseek(s->fd, 0, SEEK_END);
949}
128ab2ff 950#endif
83f64091 951
4a1d5e1f
FZ
952static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
953{
954 struct stat st;
955 BDRVRawState *s = bs->opaque;
956
957 if (fstat(s->fd, &st) < 0) {
958 return -errno;
959 }
960 return (int64_t)st.st_blocks * 512;
961}
962
0e7e1989 963static int raw_create(const char *filename, QEMUOptionParameter *options)
83f64091
FB
964{
965 int fd;
1e37d059 966 int result = 0;
0e7e1989 967 int64_t total_size = 0;
83f64091 968
0e7e1989
KW
969 /* Read out options */
970 while (options && options->name) {
971 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
9040385d 972 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
973 }
974 options++;
975 }
83f64091 976
6165f4d8
CB
977 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
978 0644);
1e37d059
SW
979 if (fd < 0) {
980 result = -errno;
981 } else {
9040385d 982 if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
1e37d059
SW
983 result = -errno;
984 }
2e1e79da 985 if (qemu_close(fd) != 0) {
1e37d059
SW
986 result = -errno;
987 }
988 }
989 return result;
83f64091
FB
990}
991
5500316d
PB
992/*
993 * Returns true iff the specified sector is present in the disk image. Drivers
994 * not implementing the functionality are assumed to not support backing files,
995 * hence all their sectors are reported as allocated.
996 *
997 * If 'sector_num' is beyond the end of the disk image the return value is 0
998 * and 'pnum' is set to 0.
999 *
1000 * 'pnum' is set to the number of sectors (including and immediately following
1001 * the specified sector) that are known to be in the same
1002 * allocated/unallocated state.
1003 *
1004 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
1005 * beyond the end of the disk image it will be clamped.
1006 */
1007static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
1008 int64_t sector_num,
1009 int nb_sectors, int *pnum)
1010{
5500316d
PB
1011 off_t start, data, hole;
1012 int ret;
1013
1014 ret = fd_open(bs);
1015 if (ret < 0) {
1016 return ret;
1017 }
1018
1019 start = sector_num * BDRV_SECTOR_SIZE;
94282e71 1020
5500316d 1021#ifdef CONFIG_FIEMAP
94282e71
KW
1022
1023 BDRVRawState *s = bs->opaque;
5500316d
PB
1024 struct {
1025 struct fiemap fm;
1026 struct fiemap_extent fe;
1027 } f;
94282e71 1028
5500316d
PB
1029 f.fm.fm_start = start;
1030 f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
1031 f.fm.fm_flags = 0;
1032 f.fm.fm_extent_count = 1;
1033 f.fm.fm_reserved = 0;
1034 if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
1035 /* Assume everything is allocated. */
1036 *pnum = nb_sectors;
1037 return 1;
1038 }
1039
1040 if (f.fm.fm_mapped_extents == 0) {
1041 /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
1042 * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
1043 */
1044 off_t length = lseek(s->fd, 0, SEEK_END);
1045 hole = f.fm.fm_start;
1046 data = MIN(f.fm.fm_start + f.fm.fm_length, length);
1047 } else {
1048 data = f.fe.fe_logical;
1049 hole = f.fe.fe_logical + f.fe.fe_length;
1050 }
94282e71 1051
5500316d 1052#elif defined SEEK_HOLE && defined SEEK_DATA
94282e71
KW
1053
1054 BDRVRawState *s = bs->opaque;
1055
5500316d
PB
1056 hole = lseek(s->fd, start, SEEK_HOLE);
1057 if (hole == -1) {
1058 /* -ENXIO indicates that sector_num was past the end of the file.
1059 * There is a virtual hole there. */
1060 assert(errno != -ENXIO);
1061
1062 /* Most likely EINVAL. Assume everything is allocated. */
1063 *pnum = nb_sectors;
1064 return 1;
1065 }
1066
1067 if (hole > start) {
1068 data = start;
1069 } else {
1070 /* On a hole. We need another syscall to find its end. */
1071 data = lseek(s->fd, start, SEEK_DATA);
1072 if (data == -1) {
1073 data = lseek(s->fd, 0, SEEK_END);
1074 }
1075 }
1076#else
1077 *pnum = nb_sectors;
1078 return 1;
1079#endif
1080
1081 if (data <= start) {
1082 /* On a data extent, compute sectors to the end of the extent. */
1083 *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
1084 return 1;
1085 } else {
1086 /* On a hole, compute sectors to the beginning of the next extent. */
1087 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
1088 return 0;
1089 }
1090}
1091
dce512de
CH
1092#ifdef CONFIG_XFS
1093static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
1094{
1095 struct xfs_flock64 fl;
1096
1097 memset(&fl, 0, sizeof(fl));
1098 fl.l_whence = SEEK_SET;
1099 fl.l_start = sector_num << 9;
1100 fl.l_len = (int64_t)nb_sectors << 9;
1101
1102 if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
1103 DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
1104 return -errno;
1105 }
1106
1107 return 0;
1108}
1109#endif
1110
6db39ae2
PB
1111static coroutine_fn int raw_co_discard(BlockDriverState *bs,
1112 int64_t sector_num, int nb_sectors)
dce512de
CH
1113{
1114#ifdef CONFIG_XFS
1115 BDRVRawState *s = bs->opaque;
1116
1117 if (s->is_xfs) {
1118 return xfs_discard(s, sector_num, nb_sectors);
1119 }
1120#endif
1121
1122 return 0;
1123}
0e7e1989
KW
1124
1125static QEMUOptionParameter raw_create_options[] = {
db08adf5
KW
1126 {
1127 .name = BLOCK_OPT_SIZE,
1128 .type = OPT_SIZE,
1129 .help = "Virtual disk size"
1130 },
0e7e1989
KW
1131 { NULL }
1132};
1133
84a12e66
CH
1134static BlockDriver bdrv_file = {
1135 .format_name = "file",
1136 .protocol_name = "file",
856ae5c3
BS
1137 .instance_size = sizeof(BDRVRawState),
1138 .bdrv_probe = NULL, /* no probe for protocols */
66f82cee 1139 .bdrv_file_open = raw_open,
eeb6b45d
JC
1140 .bdrv_reopen_prepare = raw_reopen_prepare,
1141 .bdrv_reopen_commit = raw_reopen_commit,
1142 .bdrv_reopen_abort = raw_reopen_abort,
856ae5c3
BS
1143 .bdrv_close = raw_close,
1144 .bdrv_create = raw_create,
6db39ae2 1145 .bdrv_co_discard = raw_co_discard,
5500316d 1146 .bdrv_co_is_allocated = raw_co_is_allocated,
3b46e624 1147
f141eafe
AL
1148 .bdrv_aio_readv = raw_aio_readv,
1149 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1150 .bdrv_aio_flush = raw_aio_flush,
3c529d93 1151
83f64091
FB
1152 .bdrv_truncate = raw_truncate,
1153 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1154 .bdrv_get_allocated_file_size
1155 = raw_get_allocated_file_size,
0e7e1989
KW
1156
1157 .create_options = raw_create_options,
83f64091
FB
1158};
1159
19cb3738
FB
1160/***********************************************/
1161/* host device */
1162
83affaa6 1163#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1164static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1165static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1166
1167kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1168{
5fafdf24 1169 kern_return_t kernResult;
19cb3738
FB
1170 mach_port_t masterPort;
1171 CFMutableDictionaryRef classesToMatch;
1172
1173 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1174 if ( KERN_SUCCESS != kernResult ) {
1175 printf( "IOMasterPort returned %d\n", kernResult );
1176 }
3b46e624 1177
5fafdf24 1178 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
1179 if ( classesToMatch == NULL ) {
1180 printf( "IOServiceMatching returned a NULL dictionary.\n" );
1181 } else {
1182 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1183 }
1184 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1185 if ( KERN_SUCCESS != kernResult )
1186 {
1187 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1188 }
3b46e624 1189
19cb3738
FB
1190 return kernResult;
1191}
1192
1193kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1194{
1195 io_object_t nextMedia;
1196 kern_return_t kernResult = KERN_FAILURE;
1197 *bsdPath = '\0';
1198 nextMedia = IOIteratorNext( mediaIterator );
1199 if ( nextMedia )
1200 {
1201 CFTypeRef bsdPathAsCFString;
1202 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1203 if ( bsdPathAsCFString ) {
1204 size_t devPathLength;
1205 strcpy( bsdPath, _PATH_DEV );
1206 strcat( bsdPath, "r" );
1207 devPathLength = strlen( bsdPath );
1208 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1209 kernResult = KERN_SUCCESS;
1210 }
1211 CFRelease( bsdPathAsCFString );
1212 }
1213 IOObjectRelease( nextMedia );
1214 }
3b46e624 1215
19cb3738
FB
1216 return kernResult;
1217}
1218
1219#endif
1220
508c7cb3
CH
1221static int hdev_probe_device(const char *filename)
1222{
1223 struct stat st;
1224
1225 /* allow a dedicated CD-ROM driver to match with a higher priority */
1226 if (strstart(filename, "/dev/cdrom", NULL))
1227 return 50;
1228
1229 if (stat(filename, &st) >= 0 &&
1230 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1231 return 100;
1232 }
1233
1234 return 0;
1235}
1236
19cb3738
FB
1237static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
1238{
1239 BDRVRawState *s = bs->opaque;
a76bab49 1240
83affaa6 1241#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1242 if (strstart(filename, "/dev/cdrom", NULL)) {
1243 kern_return_t kernResult;
1244 io_iterator_t mediaIterator;
1245 char bsdPath[ MAXPATHLEN ];
1246 int fd;
5fafdf24 1247
19cb3738
FB
1248 kernResult = FindEjectableCDMedia( &mediaIterator );
1249 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 1250
19cb3738
FB
1251 if ( bsdPath[ 0 ] != '\0' ) {
1252 strcat(bsdPath,"s0");
1253 /* some CDs don't have a partition 0 */
6165f4d8 1254 fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
19cb3738
FB
1255 if (fd < 0) {
1256 bsdPath[strlen(bsdPath)-1] = '1';
1257 } else {
2e1e79da 1258 qemu_close(fd);
19cb3738
FB
1259 }
1260 filename = bsdPath;
1261 }
3b46e624 1262
19cb3738
FB
1263 if ( mediaIterator )
1264 IOObjectRelease( mediaIterator );
1265 }
1266#endif
19cb3738
FB
1267
1268 s->type = FTYPE_FILE;
4dd75c70 1269#if defined(__linux__)
05acda4d
BK
1270 {
1271 char resolved_path[ MAXPATHLEN ], *temp;
1272
1273 temp = realpath(filename, resolved_path);
1274 if (temp && strstart(temp, "/dev/sg", NULL)) {
1275 bs->sg = 1;
1276 }
19cb3738
FB
1277 }
1278#endif
90babde0 1279
19a3da7f 1280 return raw_open_common(bs, filename, flags, 0);
19cb3738
FB
1281}
1282
03ff3ca3 1283#if defined(__linux__)
19cb3738
FB
1284/* Note: we do not have a reliable method to detect if the floppy is
1285 present. The current method is to try to open the floppy at every
1286 I/O and to keep it opened during a few hundreds of ms. */
1287static int fd_open(BlockDriverState *bs)
1288{
1289 BDRVRawState *s = bs->opaque;
1290 int last_media_present;
1291
1292 if (s->type != FTYPE_FD)
1293 return 0;
1294 last_media_present = (s->fd >= 0);
5fafdf24 1295 if (s->fd >= 0 &&
c57c846a 1296 (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
2e1e79da 1297 qemu_close(s->fd);
19cb3738
FB
1298 s->fd = -1;
1299#ifdef DEBUG_FLOPPY
1300 printf("Floppy closed\n");
1301#endif
1302 }
1303 if (s->fd < 0) {
5fafdf24 1304 if (s->fd_got_error &&
c57c846a 1305 (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
19cb3738
FB
1306#ifdef DEBUG_FLOPPY
1307 printf("No floppy (open delayed)\n");
1308#endif
1309 return -EIO;
1310 }
6165f4d8 1311 s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
19cb3738 1312 if (s->fd < 0) {
c57c846a 1313 s->fd_error_time = get_clock();
19cb3738
FB
1314 s->fd_got_error = 1;
1315 if (last_media_present)
1316 s->fd_media_changed = 1;
1317#ifdef DEBUG_FLOPPY
1318 printf("No floppy\n");
1319#endif
1320 return -EIO;
1321 }
1322#ifdef DEBUG_FLOPPY
1323 printf("Floppy opened\n");
1324#endif
1325 }
1326 if (!last_media_present)
1327 s->fd_media_changed = 1;
c57c846a 1328 s->fd_open_time = get_clock();
19cb3738
FB
1329 s->fd_got_error = 0;
1330 return 0;
1331}
19cb3738 1332
63ec93db 1333static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
985a03b0
TS
1334{
1335 BDRVRawState *s = bs->opaque;
1336
1337 return ioctl(s->fd, req, buf);
1338}
221f715d 1339
63ec93db 1340static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
221f715d
AL
1341 unsigned long int req, void *buf,
1342 BlockDriverCompletionFunc *cb, void *opaque)
1343{
f141eafe 1344 BDRVRawState *s = bs->opaque;
221f715d 1345
f141eafe
AL
1346 if (fd_open(bs) < 0)
1347 return NULL;
9ef91a67 1348 return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
221f715d
AL
1349}
1350
a167ba50 1351#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1352static int fd_open(BlockDriverState *bs)
1353{
1354 BDRVRawState *s = bs->opaque;
1355
1356 /* this is just to ensure s->fd is sane (its called by io ops) */
1357 if (s->fd >= 0)
1358 return 0;
1359 return -EIO;
1360}
9f23011a 1361#else /* !linux && !FreeBSD */
19cb3738 1362
08af02e2
AL
1363static int fd_open(BlockDriverState *bs)
1364{
1365 return 0;
1366}
1367
221f715d 1368#endif /* !linux && !FreeBSD */
04eeb8b6 1369
0e7e1989 1370static int hdev_create(const char *filename, QEMUOptionParameter *options)
93c65b47
AL
1371{
1372 int fd;
1373 int ret = 0;
1374 struct stat stat_buf;
0e7e1989 1375 int64_t total_size = 0;
93c65b47 1376
0e7e1989
KW
1377 /* Read out options */
1378 while (options && options->name) {
1379 if (!strcmp(options->name, "size")) {
9040385d 1380 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
1381 }
1382 options++;
1383 }
93c65b47 1384
6165f4d8 1385 fd = qemu_open(filename, O_WRONLY | O_BINARY);
93c65b47 1386 if (fd < 0)
57e69b7d 1387 return -errno;
93c65b47
AL
1388
1389 if (fstat(fd, &stat_buf) < 0)
57e69b7d 1390 ret = -errno;
4099df58 1391 else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
57e69b7d 1392 ret = -ENODEV;
9040385d 1393 else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
93c65b47
AL
1394 ret = -ENOSPC;
1395
2e1e79da 1396 qemu_close(fd);
93c65b47
AL
1397 return ret;
1398}
1399
336c1c12
KW
1400static int hdev_has_zero_init(BlockDriverState *bs)
1401{
1402 return 0;
1403}
1404
5efa9d5a 1405static BlockDriver bdrv_host_device = {
0b4ce02e 1406 .format_name = "host_device",
84a12e66 1407 .protocol_name = "host_device",
0b4ce02e
KW
1408 .instance_size = sizeof(BDRVRawState),
1409 .bdrv_probe_device = hdev_probe_device,
66f82cee 1410 .bdrv_file_open = hdev_open,
0b4ce02e 1411 .bdrv_close = raw_close,
93c65b47 1412 .bdrv_create = hdev_create,
0b4ce02e 1413 .create_options = raw_create_options,
336c1c12 1414 .bdrv_has_zero_init = hdev_has_zero_init,
3b46e624 1415
f141eafe
AL
1416 .bdrv_aio_readv = raw_aio_readv,
1417 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1418 .bdrv_aio_flush = raw_aio_flush,
3c529d93 1419
55b949c8 1420 .bdrv_truncate = raw_truncate,
e60f469c 1421 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1422 .bdrv_get_allocated_file_size
1423 = raw_get_allocated_file_size,
19cb3738 1424
f3a5d3f8 1425 /* generic scsi device */
63ec93db
CH
1426#ifdef __linux__
1427 .bdrv_ioctl = hdev_ioctl,
63ec93db
CH
1428 .bdrv_aio_ioctl = hdev_aio_ioctl,
1429#endif
f3a5d3f8
CH
1430};
1431
1432#ifdef __linux__
1433static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
1434{
1435 BDRVRawState *s = bs->opaque;
1436 int ret;
1437
f3a5d3f8 1438 s->type = FTYPE_FD;
f3a5d3f8 1439
19a3da7f
BS
1440 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
1441 ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
f3a5d3f8
CH
1442 if (ret)
1443 return ret;
1444
1445 /* close fd so that we can reopen it as needed */
2e1e79da 1446 qemu_close(s->fd);
f3a5d3f8
CH
1447 s->fd = -1;
1448 s->fd_media_changed = 1;
1449
1450 return 0;
1451}
1452
508c7cb3
CH
1453static int floppy_probe_device(const char *filename)
1454{
2ebf7c4b
CR
1455 int fd, ret;
1456 int prio = 0;
1457 struct floppy_struct fdparam;
343f8568 1458 struct stat st;
2ebf7c4b 1459
e1740828
CB
1460 if (strstart(filename, "/dev/fd", NULL) &&
1461 !strstart(filename, "/dev/fdset/", NULL)) {
2ebf7c4b 1462 prio = 50;
e1740828 1463 }
2ebf7c4b 1464
6165f4d8 1465 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
2ebf7c4b
CR
1466 if (fd < 0) {
1467 goto out;
1468 }
343f8568
JS
1469 ret = fstat(fd, &st);
1470 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1471 goto outc;
1472 }
2ebf7c4b
CR
1473
1474 /* Attempt to detect via a floppy specific ioctl */
1475 ret = ioctl(fd, FDGETPRM, &fdparam);
1476 if (ret >= 0)
1477 prio = 100;
1478
343f8568 1479outc:
2e1e79da 1480 qemu_close(fd);
2ebf7c4b
CR
1481out:
1482 return prio;
508c7cb3
CH
1483}
1484
1485
f3a5d3f8
CH
1486static int floppy_is_inserted(BlockDriverState *bs)
1487{
1488 return fd_open(bs) >= 0;
1489}
1490
1491static int floppy_media_changed(BlockDriverState *bs)
1492{
1493 BDRVRawState *s = bs->opaque;
1494 int ret;
1495
1496 /*
1497 * XXX: we do not have a true media changed indication.
1498 * It does not work if the floppy is changed without trying to read it.
1499 */
1500 fd_open(bs);
1501 ret = s->fd_media_changed;
1502 s->fd_media_changed = 0;
1503#ifdef DEBUG_FLOPPY
1504 printf("Floppy changed=%d\n", ret);
1505#endif
1506 return ret;
1507}
1508
f36f3949 1509static void floppy_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1510{
1511 BDRVRawState *s = bs->opaque;
1512 int fd;
1513
1514 if (s->fd >= 0) {
2e1e79da 1515 qemu_close(s->fd);
f3a5d3f8
CH
1516 s->fd = -1;
1517 }
6165f4d8 1518 fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
f3a5d3f8
CH
1519 if (fd >= 0) {
1520 if (ioctl(fd, FDEJECT, 0) < 0)
1521 perror("FDEJECT");
2e1e79da 1522 qemu_close(fd);
f3a5d3f8 1523 }
f3a5d3f8
CH
1524}
1525
1526static BlockDriver bdrv_host_floppy = {
1527 .format_name = "host_floppy",
84a12e66 1528 .protocol_name = "host_floppy",
f3a5d3f8 1529 .instance_size = sizeof(BDRVRawState),
508c7cb3 1530 .bdrv_probe_device = floppy_probe_device,
66f82cee 1531 .bdrv_file_open = floppy_open,
f3a5d3f8
CH
1532 .bdrv_close = raw_close,
1533 .bdrv_create = hdev_create,
0b4ce02e 1534 .create_options = raw_create_options,
336c1c12 1535 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1536
f3a5d3f8
CH
1537 .bdrv_aio_readv = raw_aio_readv,
1538 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1539 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1540
55b949c8 1541 .bdrv_truncate = raw_truncate,
f3a5d3f8 1542 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1543 .bdrv_get_allocated_file_size
1544 = raw_get_allocated_file_size,
f3a5d3f8
CH
1545
1546 /* removable device support */
1547 .bdrv_is_inserted = floppy_is_inserted,
1548 .bdrv_media_changed = floppy_media_changed,
1549 .bdrv_eject = floppy_eject,
f3a5d3f8
CH
1550};
1551
1552static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1553{
1554 BDRVRawState *s = bs->opaque;
1555
f3a5d3f8
CH
1556 s->type = FTYPE_CD;
1557
19a3da7f
BS
1558 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
1559 return raw_open_common(bs, filename, flags, O_NONBLOCK);
f3a5d3f8
CH
1560}
1561
508c7cb3
CH
1562static int cdrom_probe_device(const char *filename)
1563{
3baf720e
CR
1564 int fd, ret;
1565 int prio = 0;
343f8568 1566 struct stat st;
3baf720e 1567
6165f4d8 1568 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
3baf720e
CR
1569 if (fd < 0) {
1570 goto out;
1571 }
343f8568
JS
1572 ret = fstat(fd, &st);
1573 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1574 goto outc;
1575 }
3baf720e
CR
1576
1577 /* Attempt to detect via a CDROM specific ioctl */
1578 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1579 if (ret >= 0)
1580 prio = 100;
1581
343f8568 1582outc:
2e1e79da 1583 qemu_close(fd);
3baf720e
CR
1584out:
1585 return prio;
508c7cb3
CH
1586}
1587
f3a5d3f8
CH
1588static int cdrom_is_inserted(BlockDriverState *bs)
1589{
1590 BDRVRawState *s = bs->opaque;
1591 int ret;
1592
1593 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1594 if (ret == CDS_DISC_OK)
1595 return 1;
1596 return 0;
1597}
1598
f36f3949 1599static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1600{
1601 BDRVRawState *s = bs->opaque;
1602
1603 if (eject_flag) {
1604 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1605 perror("CDROMEJECT");
1606 } else {
1607 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1608 perror("CDROMEJECT");
1609 }
f3a5d3f8
CH
1610}
1611
025e849a 1612static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
1613{
1614 BDRVRawState *s = bs->opaque;
1615
1616 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1617 /*
1618 * Note: an error can happen if the distribution automatically
1619 * mounts the CD-ROM
1620 */
1621 /* perror("CDROM_LOCKDOOR"); */
1622 }
f3a5d3f8
CH
1623}
1624
1625static BlockDriver bdrv_host_cdrom = {
1626 .format_name = "host_cdrom",
84a12e66 1627 .protocol_name = "host_cdrom",
f3a5d3f8 1628 .instance_size = sizeof(BDRVRawState),
508c7cb3 1629 .bdrv_probe_device = cdrom_probe_device,
66f82cee 1630 .bdrv_file_open = cdrom_open,
f3a5d3f8
CH
1631 .bdrv_close = raw_close,
1632 .bdrv_create = hdev_create,
0b4ce02e 1633 .create_options = raw_create_options,
336c1c12 1634 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1635
f3a5d3f8
CH
1636 .bdrv_aio_readv = raw_aio_readv,
1637 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1638 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1639
55b949c8 1640 .bdrv_truncate = raw_truncate,
f3a5d3f8 1641 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1642 .bdrv_get_allocated_file_size
1643 = raw_get_allocated_file_size,
f3a5d3f8
CH
1644
1645 /* removable device support */
1646 .bdrv_is_inserted = cdrom_is_inserted,
1647 .bdrv_eject = cdrom_eject,
025e849a 1648 .bdrv_lock_medium = cdrom_lock_medium,
f3a5d3f8
CH
1649
1650 /* generic scsi device */
63ec93db 1651 .bdrv_ioctl = hdev_ioctl,
63ec93db 1652 .bdrv_aio_ioctl = hdev_aio_ioctl,
f3a5d3f8
CH
1653};
1654#endif /* __linux__ */
1655
a167ba50 1656#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
1657static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1658{
1659 BDRVRawState *s = bs->opaque;
1660 int ret;
1661
1662 s->type = FTYPE_CD;
1663
19a3da7f 1664 ret = raw_open_common(bs, filename, flags, 0);
f3a5d3f8
CH
1665 if (ret)
1666 return ret;
1667
9b2260cb 1668 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1669 ioctl(s->fd, CDIOCALLOW);
1670 return 0;
1671}
1672
508c7cb3
CH
1673static int cdrom_probe_device(const char *filename)
1674{
1675 if (strstart(filename, "/dev/cd", NULL) ||
1676 strstart(filename, "/dev/acd", NULL))
1677 return 100;
1678 return 0;
1679}
1680
f3a5d3f8
CH
1681static int cdrom_reopen(BlockDriverState *bs)
1682{
1683 BDRVRawState *s = bs->opaque;
1684 int fd;
1685
1686 /*
1687 * Force reread of possibly changed/newly loaded disc,
1688 * FreeBSD seems to not notice sometimes...
1689 */
1690 if (s->fd >= 0)
2e1e79da 1691 qemu_close(s->fd);
6165f4d8 1692 fd = qemu_open(bs->filename, s->open_flags, 0644);
f3a5d3f8
CH
1693 if (fd < 0) {
1694 s->fd = -1;
1695 return -EIO;
1696 }
1697 s->fd = fd;
1698
9b2260cb 1699 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1700 ioctl(s->fd, CDIOCALLOW);
1701 return 0;
1702}
1703
1704static int cdrom_is_inserted(BlockDriverState *bs)
1705{
1706 return raw_getlength(bs) > 0;
1707}
1708
f36f3949 1709static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1710{
1711 BDRVRawState *s = bs->opaque;
1712
1713 if (s->fd < 0)
822e1cd1 1714 return;
f3a5d3f8
CH
1715
1716 (void) ioctl(s->fd, CDIOCALLOW);
1717
1718 if (eject_flag) {
1719 if (ioctl(s->fd, CDIOCEJECT) < 0)
1720 perror("CDIOCEJECT");
1721 } else {
1722 if (ioctl(s->fd, CDIOCCLOSE) < 0)
1723 perror("CDIOCCLOSE");
1724 }
1725
822e1cd1 1726 cdrom_reopen(bs);
f3a5d3f8
CH
1727}
1728
025e849a 1729static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
1730{
1731 BDRVRawState *s = bs->opaque;
1732
1733 if (s->fd < 0)
7bf37fed 1734 return;
f3a5d3f8
CH
1735 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
1736 /*
1737 * Note: an error can happen if the distribution automatically
1738 * mounts the CD-ROM
1739 */
1740 /* perror("CDROM_LOCKDOOR"); */
1741 }
f3a5d3f8
CH
1742}
1743
1744static BlockDriver bdrv_host_cdrom = {
1745 .format_name = "host_cdrom",
84a12e66 1746 .protocol_name = "host_cdrom",
f3a5d3f8 1747 .instance_size = sizeof(BDRVRawState),
508c7cb3 1748 .bdrv_probe_device = cdrom_probe_device,
66f82cee 1749 .bdrv_file_open = cdrom_open,
f3a5d3f8
CH
1750 .bdrv_close = raw_close,
1751 .bdrv_create = hdev_create,
0b4ce02e 1752 .create_options = raw_create_options,
336c1c12 1753 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1754
f3a5d3f8
CH
1755 .bdrv_aio_readv = raw_aio_readv,
1756 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1757 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1758
55b949c8 1759 .bdrv_truncate = raw_truncate,
f3a5d3f8 1760 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1761 .bdrv_get_allocated_file_size
1762 = raw_get_allocated_file_size,
f3a5d3f8 1763
19cb3738 1764 /* removable device support */
f3a5d3f8
CH
1765 .bdrv_is_inserted = cdrom_is_inserted,
1766 .bdrv_eject = cdrom_eject,
025e849a 1767 .bdrv_lock_medium = cdrom_lock_medium,
19cb3738 1768};
f3a5d3f8 1769#endif /* __FreeBSD__ */
5efa9d5a 1770
84a12e66 1771static void bdrv_file_init(void)
5efa9d5a 1772{
508c7cb3
CH
1773 /*
1774 * Register all the drivers. Note that order is important, the driver
1775 * registered last will get probed first.
1776 */
84a12e66 1777 bdrv_register(&bdrv_file);
5efa9d5a 1778 bdrv_register(&bdrv_host_device);
f3a5d3f8
CH
1779#ifdef __linux__
1780 bdrv_register(&bdrv_host_floppy);
1781 bdrv_register(&bdrv_host_cdrom);
1782#endif
a167ba50 1783#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
1784 bdrv_register(&bdrv_host_cdrom);
1785#endif
5efa9d5a
AL
1786}
1787
84a12e66 1788block_init(bdrv_file_init);