]> git.proxmox.com Git - qemu.git/blame - block/raw-posix.c
raw-posix: support discard on more filesystems
[qemu.git] / block / raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
1de7afc9
PB
25#include "qemu/timer.h"
26#include "qemu/log.h"
737e150e 27#include "block/block_int.h"
1de7afc9 28#include "qemu/module.h"
de81a169 29#include "trace.h"
737e150e 30#include "block/thread-pool.h"
1de7afc9 31#include "qemu/iov.h"
9f8540ec 32#include "raw-aio.h"
83f64091 33
83affaa6 34#if defined(__APPLE__) && (__MACH__)
83f64091
FB
35#include <paths.h>
36#include <sys/param.h>
37#include <IOKit/IOKitLib.h>
38#include <IOKit/IOBSD.h>
39#include <IOKit/storage/IOMediaBSDClient.h>
40#include <IOKit/storage/IOMedia.h>
41#include <IOKit/storage/IOCDMedia.h>
42//#include <IOKit/storage/IOCDTypes.h>
43#include <CoreFoundation/CoreFoundation.h>
44#endif
45
46#ifdef __sun__
2e9671da 47#define _POSIX_PTHREAD_SEMANTICS 1
83f64091
FB
48#include <sys/dkio.h>
49#endif
19cb3738 50#ifdef __linux__
343f8568
JS
51#include <sys/types.h>
52#include <sys/stat.h>
19cb3738 53#include <sys/ioctl.h>
05acda4d 54#include <sys/param.h>
19cb3738
FB
55#include <linux/cdrom.h>
56#include <linux/fd.h>
5500316d
PB
57#include <linux/fs.h>
58#endif
59#ifdef CONFIG_FIEMAP
60#include <linux/fiemap.h>
19cb3738 61#endif
3d4fa43e
KK
62#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
63#include <linux/falloc.h>
64#endif
a167ba50 65#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1cb6c3fd 66#include <sys/disk.h>
9f23011a 67#include <sys/cdio.h>
1cb6c3fd 68#endif
83f64091 69
128ab2ff
BS
70#ifdef __OpenBSD__
71#include <sys/ioctl.h>
72#include <sys/disklabel.h>
73#include <sys/dkio.h>
74#endif
75
d1f6fd8d
CE
76#ifdef __NetBSD__
77#include <sys/ioctl.h>
78#include <sys/disklabel.h>
79#include <sys/dkio.h>
80#include <sys/disk.h>
81#endif
82
c5e97233
BS
83#ifdef __DragonFly__
84#include <sys/ioctl.h>
85#include <sys/diskslice.h>
86#endif
87
dce512de
CH
88#ifdef CONFIG_XFS
89#include <xfs/xfs.h>
90#endif
91
19cb3738 92//#define DEBUG_FLOPPY
83f64091 93
faf07963 94//#define DEBUG_BLOCK
03ff3ca3 95#if defined(DEBUG_BLOCK)
001faf32
BS
96#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
97 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
8c05dbf9 98#else
001faf32 99#define DEBUG_BLOCK_PRINT(formatCstr, ...)
8c05dbf9
TS
100#endif
101
f6465578
AL
102/* OS X does not have O_DSYNC */
103#ifndef O_DSYNC
1c27a8b3 104#ifdef O_SYNC
7ab064d2 105#define O_DSYNC O_SYNC
1c27a8b3 106#elif defined(O_FSYNC)
107#define O_DSYNC O_FSYNC
108#endif
f6465578
AL
109#endif
110
9f7965c7
AL
111/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
112#ifndef O_DIRECT
113#define O_DIRECT O_DSYNC
114#endif
115
19cb3738
FB
116#define FTYPE_FILE 0
117#define FTYPE_CD 1
118#define FTYPE_FD 2
83f64091 119
c57c846a 120/* if the FD is not accessed during that time (in ns), we try to
19cb3738 121 reopen it to see if the disk has been changed */
c57c846a 122#define FD_OPEN_TIMEOUT (1000000000)
83f64091 123
581b9e29
CH
124#define MAX_BLOCKSIZE 4096
125
19cb3738
FB
126typedef struct BDRVRawState {
127 int fd;
128 int type;
0e1d8f4c 129 int open_flags;
19cb3738
FB
130#if defined(__linux__)
131 /* linux floppy specific */
19cb3738
FB
132 int64_t fd_open_time;
133 int64_t fd_error_time;
134 int fd_got_error;
135 int fd_media_changed;
83f64091 136#endif
e44bd6fc 137#ifdef CONFIG_LINUX_AIO
5c6c3a6c 138 int use_aio;
1e5b9d2f 139 void *aio_ctx;
e44bd6fc 140#endif
dce512de
CH
141#ifdef CONFIG_XFS
142 bool is_xfs : 1;
143#endif
19cb3738
FB
144} BDRVRawState;
145
eeb6b45d
JC
146typedef struct BDRVRawReopenState {
147 int fd;
148 int open_flags;
149#ifdef CONFIG_LINUX_AIO
150 int use_aio;
151#endif
152} BDRVRawReopenState;
153
19cb3738 154static int fd_open(BlockDriverState *bs);
22afa7b5 155static int64_t raw_getlength(BlockDriverState *bs);
83f64091 156
de81a169
PB
157typedef struct RawPosixAIOData {
158 BlockDriverState *bs;
159 int aio_fildes;
160 union {
161 struct iovec *aio_iov;
162 void *aio_ioctl_buf;
163 };
164 int aio_niov;
165 size_t aio_nbytes;
166#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
167 off_t aio_offset;
168 int aio_type;
169} RawPosixAIOData;
170
a167ba50 171#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8 172static int cdrom_reopen(BlockDriverState *bs);
9f23011a
BS
173#endif
174
1de1ae0a
CE
175#if defined(__NetBSD__)
176static int raw_normalize_devicepath(const char **filename)
177{
178 static char namebuf[PATH_MAX];
179 const char *dp, *fname;
180 struct stat sb;
181
182 fname = *filename;
183 dp = strrchr(fname, '/');
184 if (lstat(fname, &sb) < 0) {
185 fprintf(stderr, "%s: stat failed: %s\n",
186 fname, strerror(errno));
187 return -errno;
188 }
189
190 if (!S_ISBLK(sb.st_mode)) {
191 return 0;
192 }
193
194 if (dp == NULL) {
195 snprintf(namebuf, PATH_MAX, "r%s", fname);
196 } else {
197 snprintf(namebuf, PATH_MAX, "%.*s/r%s",
198 (int)(dp - fname), fname, dp + 1);
199 }
200 fprintf(stderr, "%s is a block device", fname);
201 *filename = namebuf;
202 fprintf(stderr, ", using %s\n", *filename);
203
204 return 0;
205}
206#else
207static int raw_normalize_devicepath(const char **filename)
208{
209 return 0;
210}
211#endif
212
6a8dc042
JC
213static void raw_parse_flags(int bdrv_flags, int *open_flags)
214{
215 assert(open_flags != NULL);
216
217 *open_flags |= O_BINARY;
218 *open_flags &= ~O_ACCMODE;
219 if (bdrv_flags & BDRV_O_RDWR) {
220 *open_flags |= O_RDWR;
221 } else {
222 *open_flags |= O_RDONLY;
223 }
224
225 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
226 * and O_DIRECT for no caching. */
227 if ((bdrv_flags & BDRV_O_NOCACHE)) {
228 *open_flags |= O_DIRECT;
229 }
6a8dc042
JC
230}
231
fc32a72d
JC
232#ifdef CONFIG_LINUX_AIO
233static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
234{
235 int ret = -1;
236 assert(aio_ctx != NULL);
237 assert(use_aio != NULL);
238 /*
239 * Currently Linux do AIO only for files opened with O_DIRECT
240 * specified so check NOCACHE flag too
241 */
242 if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
243 (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
244
245 /* if non-NULL, laio_init() has already been run */
246 if (*aio_ctx == NULL) {
247 *aio_ctx = laio_init();
248 if (!*aio_ctx) {
249 goto error;
250 }
251 }
252 *use_aio = 1;
253 } else {
254 *use_aio = 0;
255 }
256
257 ret = 0;
258
259error:
260 return ret;
261}
262#endif
263
90babde0 264static int raw_open_common(BlockDriverState *bs, const char *filename,
19a3da7f 265 int bdrv_flags, int open_flags)
83f64091
FB
266{
267 BDRVRawState *s = bs->opaque;
0e1d8f4c 268 int fd, ret;
83f64091 269
1de1ae0a
CE
270 ret = raw_normalize_devicepath(&filename);
271 if (ret != 0) {
272 return ret;
273 }
274
6a8dc042
JC
275 s->open_flags = open_flags;
276 raw_parse_flags(bdrv_flags, &s->open_flags);
83f64091 277
90babde0 278 s->fd = -1;
40ff6d7e 279 fd = qemu_open(filename, s->open_flags, 0644);
19cb3738
FB
280 if (fd < 0) {
281 ret = -errno;
282 if (ret == -EROFS)
283 ret = -EACCES;
284 return ret;
285 }
83f64091 286 s->fd = fd;
9ef91a67 287
5c6c3a6c 288#ifdef CONFIG_LINUX_AIO
fc32a72d 289 if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
47e6b251
PB
290 qemu_close(fd);
291 return -errno;
9ef91a67 292 }
fc32a72d 293#endif
9ef91a67 294
dce512de
CH
295#ifdef CONFIG_XFS
296 if (platform_test_xfs_fd(s->fd)) {
297 s->is_xfs = 1;
298 }
299#endif
300
83f64091
FB
301 return 0;
302}
303
90babde0
CH
304static int raw_open(BlockDriverState *bs, const char *filename, int flags)
305{
306 BDRVRawState *s = bs->opaque;
307
308 s->type = FTYPE_FILE;
9a2d77ad 309 return raw_open_common(bs, filename, flags, 0);
90babde0
CH
310}
311
eeb6b45d
JC
312static int raw_reopen_prepare(BDRVReopenState *state,
313 BlockReopenQueue *queue, Error **errp)
314{
315 BDRVRawState *s;
316 BDRVRawReopenState *raw_s;
317 int ret = 0;
318
319 assert(state != NULL);
320 assert(state->bs != NULL);
321
322 s = state->bs->opaque;
323
324 state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
325 raw_s = state->opaque;
326
327#ifdef CONFIG_LINUX_AIO
328 raw_s->use_aio = s->use_aio;
329
330 /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
331 * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
332 * won't override aio_ctx if aio_ctx is non-NULL */
333 if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
334 return -1;
335 }
336#endif
337
1bc6b705
JC
338 if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
339 raw_s->open_flags |= O_NONBLOCK;
340 }
341
eeb6b45d
JC
342 raw_parse_flags(state->flags, &raw_s->open_flags);
343
344 raw_s->fd = -1;
345
346 int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
347#ifdef O_NOATIME
348 fcntl_flags |= O_NOATIME;
349#endif
350
351 if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
352 /* dup the original fd */
353 /* TODO: use qemu fcntl wrapper */
354#ifdef F_DUPFD_CLOEXEC
355 raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
356#else
357 raw_s->fd = dup(s->fd);
358 if (raw_s->fd != -1) {
359 qemu_set_cloexec(raw_s->fd);
360 }
361#endif
362 if (raw_s->fd >= 0) {
363 ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
364 if (ret) {
365 qemu_close(raw_s->fd);
366 raw_s->fd = -1;
367 }
368 }
369 }
370
371 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
372 if (raw_s->fd == -1) {
373 assert(!(raw_s->open_flags & O_CREAT));
374 raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
375 if (raw_s->fd == -1) {
376 ret = -1;
377 }
378 }
379 return ret;
380}
381
382
383static void raw_reopen_commit(BDRVReopenState *state)
384{
385 BDRVRawReopenState *raw_s = state->opaque;
386 BDRVRawState *s = state->bs->opaque;
387
388 s->open_flags = raw_s->open_flags;
389
390 qemu_close(s->fd);
391 s->fd = raw_s->fd;
392#ifdef CONFIG_LINUX_AIO
393 s->use_aio = raw_s->use_aio;
394#endif
395
396 g_free(state->opaque);
397 state->opaque = NULL;
398}
399
400
401static void raw_reopen_abort(BDRVReopenState *state)
402{
403 BDRVRawReopenState *raw_s = state->opaque;
404
405 /* nothing to do if NULL, we didn't get far enough */
406 if (raw_s == NULL) {
407 return;
408 }
409
410 if (raw_s->fd >= 0) {
411 qemu_close(raw_s->fd);
412 raw_s->fd = -1;
413 }
414 g_free(state->opaque);
415 state->opaque = NULL;
416}
417
418
83f64091
FB
419/* XXX: use host sector size if necessary with:
420#ifdef DIOCGSECTORSIZE
421 {
422 unsigned int sectorsize = 512;
423 if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
424 sectorsize > bufsize)
425 bufsize = sectorsize;
426 }
427#endif
428#ifdef CONFIG_COCOA
2ee9fb48 429 uint32_t blockSize = 512;
83f64091
FB
430 if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
431 bufsize = blockSize;
432 }
433#endif
434*/
435
de81a169
PB
436static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
437{
438 int ret;
439
440 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
441 if (ret == -1) {
442 return -errno;
443 }
444
b608c8dc 445 return 0;
de81a169
PB
446}
447
448static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
449{
450 int ret;
451
452 ret = qemu_fdatasync(aiocb->aio_fildes);
453 if (ret == -1) {
454 return -errno;
455 }
456 return 0;
457}
458
459#ifdef CONFIG_PREADV
460
461static bool preadv_present = true;
462
463static ssize_t
464qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
465{
466 return preadv(fd, iov, nr_iov, offset);
467}
468
469static ssize_t
470qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
471{
472 return pwritev(fd, iov, nr_iov, offset);
473}
474
475#else
476
477static bool preadv_present = false;
478
479static ssize_t
480qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
481{
482 return -ENOSYS;
483}
484
485static ssize_t
486qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
487{
488 return -ENOSYS;
489}
490
491#endif
492
493static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
494{
495 ssize_t len;
496
497 do {
498 if (aiocb->aio_type & QEMU_AIO_WRITE)
499 len = qemu_pwritev(aiocb->aio_fildes,
500 aiocb->aio_iov,
501 aiocb->aio_niov,
502 aiocb->aio_offset);
503 else
504 len = qemu_preadv(aiocb->aio_fildes,
505 aiocb->aio_iov,
506 aiocb->aio_niov,
507 aiocb->aio_offset);
508 } while (len == -1 && errno == EINTR);
509
510 if (len == -1) {
511 return -errno;
512 }
513 return len;
514}
515
516/*
517 * Read/writes the data to/from a given linear buffer.
518 *
519 * Returns the number of bytes handles or -errno in case of an error. Short
520 * reads are only returned if the end of the file is reached.
521 */
522static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
523{
524 ssize_t offset = 0;
525 ssize_t len;
526
527 while (offset < aiocb->aio_nbytes) {
528 if (aiocb->aio_type & QEMU_AIO_WRITE) {
529 len = pwrite(aiocb->aio_fildes,
530 (const char *)buf + offset,
531 aiocb->aio_nbytes - offset,
532 aiocb->aio_offset + offset);
533 } else {
534 len = pread(aiocb->aio_fildes,
535 buf + offset,
536 aiocb->aio_nbytes - offset,
537 aiocb->aio_offset + offset);
538 }
539 if (len == -1 && errno == EINTR) {
540 continue;
541 } else if (len == -1) {
542 offset = -errno;
543 break;
544 } else if (len == 0) {
545 break;
546 }
547 offset += len;
548 }
549
550 return offset;
551}
552
553static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
554{
555 ssize_t nbytes;
556 char *buf;
557
558 if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
559 /*
560 * If there is just a single buffer, and it is properly aligned
561 * we can just use plain pread/pwrite without any problems.
562 */
563 if (aiocb->aio_niov == 1) {
564 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
565 }
566 /*
567 * We have more than one iovec, and all are properly aligned.
568 *
569 * Try preadv/pwritev first and fall back to linearizing the
570 * buffer if it's not supported.
571 */
572 if (preadv_present) {
573 nbytes = handle_aiocb_rw_vector(aiocb);
574 if (nbytes == aiocb->aio_nbytes ||
575 (nbytes < 0 && nbytes != -ENOSYS)) {
576 return nbytes;
577 }
578 preadv_present = false;
579 }
580
581 /*
582 * XXX(hch): short read/write. no easy way to handle the reminder
583 * using these interfaces. For now retry using plain
584 * pread/pwrite?
585 */
586 }
587
588 /*
589 * Ok, we have to do it the hard way, copy all segments into
590 * a single aligned buffer.
591 */
592 buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
593 if (aiocb->aio_type & QEMU_AIO_WRITE) {
594 char *p = buf;
595 int i;
596
597 for (i = 0; i < aiocb->aio_niov; ++i) {
598 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
599 p += aiocb->aio_iov[i].iov_len;
600 }
601 }
602
603 nbytes = handle_aiocb_rw_linear(aiocb, buf);
604 if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
605 char *p = buf;
606 size_t count = aiocb->aio_nbytes, copy;
607 int i;
608
609 for (i = 0; i < aiocb->aio_niov && count; ++i) {
610 copy = count;
611 if (copy > aiocb->aio_iov[i].iov_len) {
612 copy = aiocb->aio_iov[i].iov_len;
613 }
614 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
615 p += copy;
616 count -= copy;
617 }
618 }
619 qemu_vfree(buf);
620
621 return nbytes;
622}
623
624static int aio_worker(void *arg)
625{
626 RawPosixAIOData *aiocb = arg;
627 ssize_t ret = 0;
628
629 switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
630 case QEMU_AIO_READ:
631 ret = handle_aiocb_rw(aiocb);
632 if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
633 iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
634 0, aiocb->aio_nbytes - ret);
635
636 ret = aiocb->aio_nbytes;
637 }
638 if (ret == aiocb->aio_nbytes) {
639 ret = 0;
640 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
641 ret = -EINVAL;
642 }
643 break;
644 case QEMU_AIO_WRITE:
645 ret = handle_aiocb_rw(aiocb);
646 if (ret == aiocb->aio_nbytes) {
647 ret = 0;
648 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
649 ret = -EINVAL;
650 }
651 break;
652 case QEMU_AIO_FLUSH:
653 ret = handle_aiocb_flush(aiocb);
654 break;
655 case QEMU_AIO_IOCTL:
656 ret = handle_aiocb_ioctl(aiocb);
657 break;
658 default:
659 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
660 ret = -EINVAL;
661 break;
662 }
663
664 g_slice_free(RawPosixAIOData, aiocb);
665 return ret;
666}
667
668static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
669 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
670 BlockDriverCompletionFunc *cb, void *opaque, int type)
671{
672 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
673
674 acb->bs = bs;
675 acb->aio_type = type;
676 acb->aio_fildes = fd;
677
678 if (qiov) {
679 acb->aio_iov = qiov->iov;
680 acb->aio_niov = qiov->niov;
681 }
682 acb->aio_nbytes = nb_sectors * 512;
683 acb->aio_offset = sector_num * 512;
684
685 trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
686 return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
687}
688
9ef91a67
CH
689static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
690 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
691 BlockDriverCompletionFunc *cb, void *opaque, int type)
83f64091 692{
ce1a14dc 693 BDRVRawState *s = bs->opaque;
ce1a14dc 694
19cb3738
FB
695 if (fd_open(bs) < 0)
696 return NULL;
697
f141eafe
AL
698 /*
699 * If O_DIRECT is used the buffer needs to be aligned on a sector
c1ee7d56 700 * boundary. Check if this is the case or tell the low-level
9ef91a67 701 * driver that it needs to copy the buffer.
f141eafe 702 */
9acc5a06 703 if ((bs->open_flags & BDRV_O_NOCACHE)) {
c53b1c51 704 if (!bdrv_qiov_is_aligned(bs, qiov)) {
5c6c3a6c 705 type |= QEMU_AIO_MISALIGNED;
e44bd6fc 706#ifdef CONFIG_LINUX_AIO
5c6c3a6c
CH
707 } else if (s->use_aio) {
708 return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
e44bd6fc
SW
709 nb_sectors, cb, opaque, type);
710#endif
5c6c3a6c 711 }
9ef91a67 712 }
f141eafe 713
1e5b9d2f 714 return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
9ef91a67 715 cb, opaque, type);
83f64091
FB
716}
717
f141eafe
AL
718static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
719 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 720 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 721{
9ef91a67
CH
722 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
723 cb, opaque, QEMU_AIO_READ);
83f64091
FB
724}
725
f141eafe
AL
726static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
727 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 728 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 729{
9ef91a67
CH
730 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
731 cb, opaque, QEMU_AIO_WRITE);
83f64091 732}
53538725 733
b2e12bc6
CH
734static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
735 BlockDriverCompletionFunc *cb, void *opaque)
736{
737 BDRVRawState *s = bs->opaque;
738
739 if (fd_open(bs) < 0)
740 return NULL;
741
1e5b9d2f 742 return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
b2e12bc6
CH
743}
744
83f64091
FB
745static void raw_close(BlockDriverState *bs)
746{
747 BDRVRawState *s = bs->opaque;
19cb3738 748 if (s->fd >= 0) {
2e1e79da 749 qemu_close(s->fd);
19cb3738
FB
750 s->fd = -1;
751 }
83f64091
FB
752}
753
754static int raw_truncate(BlockDriverState *bs, int64_t offset)
755{
756 BDRVRawState *s = bs->opaque;
55b949c8
CH
757 struct stat st;
758
759 if (fstat(s->fd, &st)) {
83f64091 760 return -errno;
55b949c8
CH
761 }
762
763 if (S_ISREG(st.st_mode)) {
764 if (ftruncate(s->fd, offset) < 0) {
765 return -errno;
766 }
767 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
768 if (offset > raw_getlength(bs)) {
769 return -EINVAL;
770 }
771 } else {
772 return -ENOTSUP;
773 }
774
83f64091
FB
775 return 0;
776}
777
128ab2ff
BS
778#ifdef __OpenBSD__
779static int64_t raw_getlength(BlockDriverState *bs)
780{
781 BDRVRawState *s = bs->opaque;
782 int fd = s->fd;
783 struct stat st;
784
785 if (fstat(fd, &st))
786 return -1;
787 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
788 struct disklabel dl;
789
790 if (ioctl(fd, DIOCGDINFO, &dl))
791 return -1;
792 return (uint64_t)dl.d_secsize *
793 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
794 } else
795 return st.st_size;
796}
d1f6fd8d
CE
797#elif defined(__NetBSD__)
798static int64_t raw_getlength(BlockDriverState *bs)
799{
800 BDRVRawState *s = bs->opaque;
801 int fd = s->fd;
802 struct stat st;
803
804 if (fstat(fd, &st))
805 return -1;
806 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
807 struct dkwedge_info dkw;
808
809 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
810 return dkw.dkw_size * 512;
811 } else {
812 struct disklabel dl;
813
814 if (ioctl(fd, DIOCGDINFO, &dl))
815 return -1;
816 return (uint64_t)dl.d_secsize *
817 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
818 }
819 } else
820 return st.st_size;
821}
50779cc2
CH
822#elif defined(__sun__)
823static int64_t raw_getlength(BlockDriverState *bs)
824{
825 BDRVRawState *s = bs->opaque;
826 struct dk_minfo minfo;
827 int ret;
828
829 ret = fd_open(bs);
830 if (ret < 0) {
831 return ret;
832 }
833
834 /*
835 * Use the DKIOCGMEDIAINFO ioctl to read the size.
836 */
837 ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
838 if (ret != -1) {
839 return minfo.dki_lbsize * minfo.dki_capacity;
840 }
841
842 /*
843 * There are reports that lseek on some devices fails, but
844 * irc discussion said that contingency on contingency was overkill.
845 */
846 return lseek(s->fd, 0, SEEK_END);
847}
848#elif defined(CONFIG_BSD)
849static int64_t raw_getlength(BlockDriverState *bs)
83f64091
FB
850{
851 BDRVRawState *s = bs->opaque;
852 int fd = s->fd;
853 int64_t size;
83f64091 854 struct stat sb;
a167ba50 855#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a 856 int reopened = 0;
83f64091 857#endif
19cb3738
FB
858 int ret;
859
860 ret = fd_open(bs);
861 if (ret < 0)
862 return ret;
83f64091 863
a167ba50 864#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
865again:
866#endif
83f64091
FB
867 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
868#ifdef DIOCGMEDIASIZE
869 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
c5e97233
BS
870#elif defined(DIOCGPART)
871 {
872 struct partinfo pi;
873 if (ioctl(fd, DIOCGPART, &pi) == 0)
874 size = pi.media_size;
875 else
876 size = 0;
877 }
878 if (size == 0)
83f64091 879#endif
83affaa6 880#if defined(__APPLE__) && defined(__MACH__)
83f64091
FB
881 size = LONG_LONG_MAX;
882#else
883 size = lseek(fd, 0LL, SEEK_END);
9f23011a 884#endif
a167ba50 885#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
886 switch(s->type) {
887 case FTYPE_CD:
888 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
889 if (size == 2048LL * (unsigned)-1)
890 size = 0;
891 /* XXX no disc? maybe we need to reopen... */
f3a5d3f8 892 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
9f23011a
BS
893 reopened = 1;
894 goto again;
895 }
896 }
83f64091 897#endif
50779cc2 898 } else {
83f64091
FB
899 size = lseek(fd, 0, SEEK_END);
900 }
83f64091
FB
901 return size;
902}
50779cc2
CH
903#else
904static int64_t raw_getlength(BlockDriverState *bs)
905{
906 BDRVRawState *s = bs->opaque;
907 int ret;
908
909 ret = fd_open(bs);
910 if (ret < 0) {
911 return ret;
912 }
913
914 return lseek(s->fd, 0, SEEK_END);
915}
128ab2ff 916#endif
83f64091 917
4a1d5e1f
FZ
918static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
919{
920 struct stat st;
921 BDRVRawState *s = bs->opaque;
922
923 if (fstat(s->fd, &st) < 0) {
924 return -errno;
925 }
926 return (int64_t)st.st_blocks * 512;
927}
928
0e7e1989 929static int raw_create(const char *filename, QEMUOptionParameter *options)
83f64091
FB
930{
931 int fd;
1e37d059 932 int result = 0;
0e7e1989 933 int64_t total_size = 0;
83f64091 934
0e7e1989
KW
935 /* Read out options */
936 while (options && options->name) {
937 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
9040385d 938 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
939 }
940 options++;
941 }
83f64091 942
6165f4d8
CB
943 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
944 0644);
1e37d059
SW
945 if (fd < 0) {
946 result = -errno;
947 } else {
9040385d 948 if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
1e37d059
SW
949 result = -errno;
950 }
2e1e79da 951 if (qemu_close(fd) != 0) {
1e37d059
SW
952 result = -errno;
953 }
954 }
955 return result;
83f64091
FB
956}
957
5500316d
PB
958/*
959 * Returns true iff the specified sector is present in the disk image. Drivers
960 * not implementing the functionality are assumed to not support backing files,
961 * hence all their sectors are reported as allocated.
962 *
963 * If 'sector_num' is beyond the end of the disk image the return value is 0
964 * and 'pnum' is set to 0.
965 *
966 * 'pnum' is set to the number of sectors (including and immediately following
967 * the specified sector) that are known to be in the same
968 * allocated/unallocated state.
969 *
970 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
971 * beyond the end of the disk image it will be clamped.
972 */
973static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
974 int64_t sector_num,
975 int nb_sectors, int *pnum)
976{
5500316d
PB
977 off_t start, data, hole;
978 int ret;
979
980 ret = fd_open(bs);
981 if (ret < 0) {
982 return ret;
983 }
984
985 start = sector_num * BDRV_SECTOR_SIZE;
94282e71 986
5500316d 987#ifdef CONFIG_FIEMAP
94282e71
KW
988
989 BDRVRawState *s = bs->opaque;
5500316d
PB
990 struct {
991 struct fiemap fm;
992 struct fiemap_extent fe;
993 } f;
94282e71 994
5500316d
PB
995 f.fm.fm_start = start;
996 f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
997 f.fm.fm_flags = 0;
998 f.fm.fm_extent_count = 1;
999 f.fm.fm_reserved = 0;
1000 if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
1001 /* Assume everything is allocated. */
1002 *pnum = nb_sectors;
1003 return 1;
1004 }
1005
1006 if (f.fm.fm_mapped_extents == 0) {
1007 /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
1008 * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
1009 */
1010 off_t length = lseek(s->fd, 0, SEEK_END);
1011 hole = f.fm.fm_start;
1012 data = MIN(f.fm.fm_start + f.fm.fm_length, length);
1013 } else {
1014 data = f.fe.fe_logical;
1015 hole = f.fe.fe_logical + f.fe.fe_length;
1016 }
94282e71 1017
5500316d 1018#elif defined SEEK_HOLE && defined SEEK_DATA
94282e71
KW
1019
1020 BDRVRawState *s = bs->opaque;
1021
5500316d
PB
1022 hole = lseek(s->fd, start, SEEK_HOLE);
1023 if (hole == -1) {
1024 /* -ENXIO indicates that sector_num was past the end of the file.
1025 * There is a virtual hole there. */
1026 assert(errno != -ENXIO);
1027
1028 /* Most likely EINVAL. Assume everything is allocated. */
1029 *pnum = nb_sectors;
1030 return 1;
1031 }
1032
1033 if (hole > start) {
1034 data = start;
1035 } else {
1036 /* On a hole. We need another syscall to find its end. */
1037 data = lseek(s->fd, start, SEEK_DATA);
1038 if (data == -1) {
1039 data = lseek(s->fd, 0, SEEK_END);
1040 }
1041 }
1042#else
1043 *pnum = nb_sectors;
1044 return 1;
1045#endif
1046
1047 if (data <= start) {
1048 /* On a data extent, compute sectors to the end of the extent. */
1049 *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
1050 return 1;
1051 } else {
1052 /* On a hole, compute sectors to the beginning of the next extent. */
1053 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
1054 return 0;
1055 }
1056}
1057
dce512de
CH
1058#ifdef CONFIG_XFS
1059static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
1060{
1061 struct xfs_flock64 fl;
1062
1063 memset(&fl, 0, sizeof(fl));
1064 fl.l_whence = SEEK_SET;
1065 fl.l_start = sector_num << 9;
1066 fl.l_len = (int64_t)nb_sectors << 9;
1067
1068 if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
1069 DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
1070 return -errno;
1071 }
1072
1073 return 0;
1074}
1075#endif
1076
6db39ae2
PB
1077static coroutine_fn int raw_co_discard(BlockDriverState *bs,
1078 int64_t sector_num, int nb_sectors)
dce512de 1079{
3d4fa43e
KK
1080 int ret = -EOPNOTSUPP;
1081
1082#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_XFS)
dce512de
CH
1083 BDRVRawState *s = bs->opaque;
1084
3d4fa43e 1085#ifdef CONFIG_XFS
dce512de
CH
1086 if (s->is_xfs) {
1087 return xfs_discard(s, sector_num, nb_sectors);
1088 }
1089#endif
1090
3d4fa43e
KK
1091#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
1092 do {
1093 if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1094 sector_num << BDRV_SECTOR_BITS,
1095 (int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) {
1096 return 0;
1097 }
1098 } while (errno == EINTR);
1099
1100 ret = -errno;
1101#endif
1102#endif
1103
1104 if (ret == -EOPNOTSUPP) {
1105 return 0;
1106 }
1107 return ret;
dce512de 1108}
0e7e1989
KW
1109
1110static QEMUOptionParameter raw_create_options[] = {
db08adf5
KW
1111 {
1112 .name = BLOCK_OPT_SIZE,
1113 .type = OPT_SIZE,
1114 .help = "Virtual disk size"
1115 },
0e7e1989
KW
1116 { NULL }
1117};
1118
84a12e66
CH
1119static BlockDriver bdrv_file = {
1120 .format_name = "file",
1121 .protocol_name = "file",
856ae5c3
BS
1122 .instance_size = sizeof(BDRVRawState),
1123 .bdrv_probe = NULL, /* no probe for protocols */
66f82cee 1124 .bdrv_file_open = raw_open,
eeb6b45d
JC
1125 .bdrv_reopen_prepare = raw_reopen_prepare,
1126 .bdrv_reopen_commit = raw_reopen_commit,
1127 .bdrv_reopen_abort = raw_reopen_abort,
856ae5c3
BS
1128 .bdrv_close = raw_close,
1129 .bdrv_create = raw_create,
6db39ae2 1130 .bdrv_co_discard = raw_co_discard,
5500316d 1131 .bdrv_co_is_allocated = raw_co_is_allocated,
3b46e624 1132
f141eafe
AL
1133 .bdrv_aio_readv = raw_aio_readv,
1134 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1135 .bdrv_aio_flush = raw_aio_flush,
3c529d93 1136
83f64091
FB
1137 .bdrv_truncate = raw_truncate,
1138 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1139 .bdrv_get_allocated_file_size
1140 = raw_get_allocated_file_size,
0e7e1989
KW
1141
1142 .create_options = raw_create_options,
83f64091
FB
1143};
1144
19cb3738
FB
1145/***********************************************/
1146/* host device */
1147
83affaa6 1148#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1149static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1150static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1151
1152kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1153{
5fafdf24 1154 kern_return_t kernResult;
19cb3738
FB
1155 mach_port_t masterPort;
1156 CFMutableDictionaryRef classesToMatch;
1157
1158 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1159 if ( KERN_SUCCESS != kernResult ) {
1160 printf( "IOMasterPort returned %d\n", kernResult );
1161 }
3b46e624 1162
5fafdf24 1163 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
1164 if ( classesToMatch == NULL ) {
1165 printf( "IOServiceMatching returned a NULL dictionary.\n" );
1166 } else {
1167 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1168 }
1169 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1170 if ( KERN_SUCCESS != kernResult )
1171 {
1172 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1173 }
3b46e624 1174
19cb3738
FB
1175 return kernResult;
1176}
1177
1178kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1179{
1180 io_object_t nextMedia;
1181 kern_return_t kernResult = KERN_FAILURE;
1182 *bsdPath = '\0';
1183 nextMedia = IOIteratorNext( mediaIterator );
1184 if ( nextMedia )
1185 {
1186 CFTypeRef bsdPathAsCFString;
1187 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1188 if ( bsdPathAsCFString ) {
1189 size_t devPathLength;
1190 strcpy( bsdPath, _PATH_DEV );
1191 strcat( bsdPath, "r" );
1192 devPathLength = strlen( bsdPath );
1193 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1194 kernResult = KERN_SUCCESS;
1195 }
1196 CFRelease( bsdPathAsCFString );
1197 }
1198 IOObjectRelease( nextMedia );
1199 }
3b46e624 1200
19cb3738
FB
1201 return kernResult;
1202}
1203
1204#endif
1205
508c7cb3
CH
1206static int hdev_probe_device(const char *filename)
1207{
1208 struct stat st;
1209
1210 /* allow a dedicated CD-ROM driver to match with a higher priority */
1211 if (strstart(filename, "/dev/cdrom", NULL))
1212 return 50;
1213
1214 if (stat(filename, &st) >= 0 &&
1215 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1216 return 100;
1217 }
1218
1219 return 0;
1220}
1221
19cb3738
FB
1222static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
1223{
1224 BDRVRawState *s = bs->opaque;
a76bab49 1225
83affaa6 1226#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1227 if (strstart(filename, "/dev/cdrom", NULL)) {
1228 kern_return_t kernResult;
1229 io_iterator_t mediaIterator;
1230 char bsdPath[ MAXPATHLEN ];
1231 int fd;
5fafdf24 1232
19cb3738
FB
1233 kernResult = FindEjectableCDMedia( &mediaIterator );
1234 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 1235
19cb3738
FB
1236 if ( bsdPath[ 0 ] != '\0' ) {
1237 strcat(bsdPath,"s0");
1238 /* some CDs don't have a partition 0 */
6165f4d8 1239 fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
19cb3738
FB
1240 if (fd < 0) {
1241 bsdPath[strlen(bsdPath)-1] = '1';
1242 } else {
2e1e79da 1243 qemu_close(fd);
19cb3738
FB
1244 }
1245 filename = bsdPath;
1246 }
3b46e624 1247
19cb3738
FB
1248 if ( mediaIterator )
1249 IOObjectRelease( mediaIterator );
1250 }
1251#endif
19cb3738
FB
1252
1253 s->type = FTYPE_FILE;
4dd75c70 1254#if defined(__linux__)
05acda4d
BK
1255 {
1256 char resolved_path[ MAXPATHLEN ], *temp;
1257
1258 temp = realpath(filename, resolved_path);
1259 if (temp && strstart(temp, "/dev/sg", NULL)) {
1260 bs->sg = 1;
1261 }
19cb3738
FB
1262 }
1263#endif
90babde0 1264
19a3da7f 1265 return raw_open_common(bs, filename, flags, 0);
19cb3738
FB
1266}
1267
03ff3ca3 1268#if defined(__linux__)
19cb3738
FB
1269/* Note: we do not have a reliable method to detect if the floppy is
1270 present. The current method is to try to open the floppy at every
1271 I/O and to keep it opened during a few hundreds of ms. */
1272static int fd_open(BlockDriverState *bs)
1273{
1274 BDRVRawState *s = bs->opaque;
1275 int last_media_present;
1276
1277 if (s->type != FTYPE_FD)
1278 return 0;
1279 last_media_present = (s->fd >= 0);
5fafdf24 1280 if (s->fd >= 0 &&
c57c846a 1281 (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
2e1e79da 1282 qemu_close(s->fd);
19cb3738
FB
1283 s->fd = -1;
1284#ifdef DEBUG_FLOPPY
1285 printf("Floppy closed\n");
1286#endif
1287 }
1288 if (s->fd < 0) {
5fafdf24 1289 if (s->fd_got_error &&
c57c846a 1290 (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
19cb3738
FB
1291#ifdef DEBUG_FLOPPY
1292 printf("No floppy (open delayed)\n");
1293#endif
1294 return -EIO;
1295 }
6165f4d8 1296 s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
19cb3738 1297 if (s->fd < 0) {
c57c846a 1298 s->fd_error_time = get_clock();
19cb3738
FB
1299 s->fd_got_error = 1;
1300 if (last_media_present)
1301 s->fd_media_changed = 1;
1302#ifdef DEBUG_FLOPPY
1303 printf("No floppy\n");
1304#endif
1305 return -EIO;
1306 }
1307#ifdef DEBUG_FLOPPY
1308 printf("Floppy opened\n");
1309#endif
1310 }
1311 if (!last_media_present)
1312 s->fd_media_changed = 1;
c57c846a 1313 s->fd_open_time = get_clock();
19cb3738
FB
1314 s->fd_got_error = 0;
1315 return 0;
1316}
19cb3738 1317
63ec93db 1318static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
985a03b0
TS
1319{
1320 BDRVRawState *s = bs->opaque;
1321
1322 return ioctl(s->fd, req, buf);
1323}
221f715d 1324
63ec93db 1325static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
221f715d
AL
1326 unsigned long int req, void *buf,
1327 BlockDriverCompletionFunc *cb, void *opaque)
1328{
f141eafe 1329 BDRVRawState *s = bs->opaque;
c208e8c2 1330 RawPosixAIOData *acb;
221f715d 1331
f141eafe
AL
1332 if (fd_open(bs) < 0)
1333 return NULL;
c208e8c2
PB
1334
1335 acb = g_slice_new(RawPosixAIOData);
1336 acb->bs = bs;
1337 acb->aio_type = QEMU_AIO_IOCTL;
1338 acb->aio_fildes = s->fd;
1339 acb->aio_offset = 0;
1340 acb->aio_ioctl_buf = buf;
1341 acb->aio_ioctl_cmd = req;
1342 return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
221f715d
AL
1343}
1344
a167ba50 1345#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1346static int fd_open(BlockDriverState *bs)
1347{
1348 BDRVRawState *s = bs->opaque;
1349
1350 /* this is just to ensure s->fd is sane (its called by io ops) */
1351 if (s->fd >= 0)
1352 return 0;
1353 return -EIO;
1354}
9f23011a 1355#else /* !linux && !FreeBSD */
19cb3738 1356
08af02e2
AL
1357static int fd_open(BlockDriverState *bs)
1358{
1359 return 0;
1360}
1361
221f715d 1362#endif /* !linux && !FreeBSD */
04eeb8b6 1363
0e7e1989 1364static int hdev_create(const char *filename, QEMUOptionParameter *options)
93c65b47
AL
1365{
1366 int fd;
1367 int ret = 0;
1368 struct stat stat_buf;
0e7e1989 1369 int64_t total_size = 0;
93c65b47 1370
0e7e1989
KW
1371 /* Read out options */
1372 while (options && options->name) {
1373 if (!strcmp(options->name, "size")) {
9040385d 1374 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
1375 }
1376 options++;
1377 }
93c65b47 1378
6165f4d8 1379 fd = qemu_open(filename, O_WRONLY | O_BINARY);
93c65b47 1380 if (fd < 0)
57e69b7d 1381 return -errno;
93c65b47
AL
1382
1383 if (fstat(fd, &stat_buf) < 0)
57e69b7d 1384 ret = -errno;
4099df58 1385 else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
57e69b7d 1386 ret = -ENODEV;
9040385d 1387 else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
93c65b47
AL
1388 ret = -ENOSPC;
1389
2e1e79da 1390 qemu_close(fd);
93c65b47
AL
1391 return ret;
1392}
1393
336c1c12
KW
1394static int hdev_has_zero_init(BlockDriverState *bs)
1395{
1396 return 0;
1397}
1398
5efa9d5a 1399static BlockDriver bdrv_host_device = {
0b4ce02e 1400 .format_name = "host_device",
84a12e66 1401 .protocol_name = "host_device",
0b4ce02e
KW
1402 .instance_size = sizeof(BDRVRawState),
1403 .bdrv_probe_device = hdev_probe_device,
66f82cee 1404 .bdrv_file_open = hdev_open,
0b4ce02e 1405 .bdrv_close = raw_close,
1bc6b705
JC
1406 .bdrv_reopen_prepare = raw_reopen_prepare,
1407 .bdrv_reopen_commit = raw_reopen_commit,
1408 .bdrv_reopen_abort = raw_reopen_abort,
93c65b47 1409 .bdrv_create = hdev_create,
0b4ce02e 1410 .create_options = raw_create_options,
336c1c12 1411 .bdrv_has_zero_init = hdev_has_zero_init,
3b46e624 1412
f141eafe
AL
1413 .bdrv_aio_readv = raw_aio_readv,
1414 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1415 .bdrv_aio_flush = raw_aio_flush,
3c529d93 1416
55b949c8 1417 .bdrv_truncate = raw_truncate,
e60f469c 1418 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1419 .bdrv_get_allocated_file_size
1420 = raw_get_allocated_file_size,
19cb3738 1421
f3a5d3f8 1422 /* generic scsi device */
63ec93db
CH
1423#ifdef __linux__
1424 .bdrv_ioctl = hdev_ioctl,
63ec93db
CH
1425 .bdrv_aio_ioctl = hdev_aio_ioctl,
1426#endif
f3a5d3f8
CH
1427};
1428
1429#ifdef __linux__
1430static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
1431{
1432 BDRVRawState *s = bs->opaque;
1433 int ret;
1434
f3a5d3f8 1435 s->type = FTYPE_FD;
f3a5d3f8 1436
19a3da7f
BS
1437 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
1438 ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
f3a5d3f8
CH
1439 if (ret)
1440 return ret;
1441
1442 /* close fd so that we can reopen it as needed */
2e1e79da 1443 qemu_close(s->fd);
f3a5d3f8
CH
1444 s->fd = -1;
1445 s->fd_media_changed = 1;
1446
1447 return 0;
1448}
1449
508c7cb3
CH
1450static int floppy_probe_device(const char *filename)
1451{
2ebf7c4b
CR
1452 int fd, ret;
1453 int prio = 0;
1454 struct floppy_struct fdparam;
343f8568 1455 struct stat st;
2ebf7c4b 1456
e1740828
CB
1457 if (strstart(filename, "/dev/fd", NULL) &&
1458 !strstart(filename, "/dev/fdset/", NULL)) {
2ebf7c4b 1459 prio = 50;
e1740828 1460 }
2ebf7c4b 1461
6165f4d8 1462 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
2ebf7c4b
CR
1463 if (fd < 0) {
1464 goto out;
1465 }
343f8568
JS
1466 ret = fstat(fd, &st);
1467 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1468 goto outc;
1469 }
2ebf7c4b
CR
1470
1471 /* Attempt to detect via a floppy specific ioctl */
1472 ret = ioctl(fd, FDGETPRM, &fdparam);
1473 if (ret >= 0)
1474 prio = 100;
1475
343f8568 1476outc:
2e1e79da 1477 qemu_close(fd);
2ebf7c4b
CR
1478out:
1479 return prio;
508c7cb3
CH
1480}
1481
1482
f3a5d3f8
CH
1483static int floppy_is_inserted(BlockDriverState *bs)
1484{
1485 return fd_open(bs) >= 0;
1486}
1487
1488static int floppy_media_changed(BlockDriverState *bs)
1489{
1490 BDRVRawState *s = bs->opaque;
1491 int ret;
1492
1493 /*
1494 * XXX: we do not have a true media changed indication.
1495 * It does not work if the floppy is changed without trying to read it.
1496 */
1497 fd_open(bs);
1498 ret = s->fd_media_changed;
1499 s->fd_media_changed = 0;
1500#ifdef DEBUG_FLOPPY
1501 printf("Floppy changed=%d\n", ret);
1502#endif
1503 return ret;
1504}
1505
f36f3949 1506static void floppy_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1507{
1508 BDRVRawState *s = bs->opaque;
1509 int fd;
1510
1511 if (s->fd >= 0) {
2e1e79da 1512 qemu_close(s->fd);
f3a5d3f8
CH
1513 s->fd = -1;
1514 }
6165f4d8 1515 fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
f3a5d3f8
CH
1516 if (fd >= 0) {
1517 if (ioctl(fd, FDEJECT, 0) < 0)
1518 perror("FDEJECT");
2e1e79da 1519 qemu_close(fd);
f3a5d3f8 1520 }
f3a5d3f8
CH
1521}
1522
1523static BlockDriver bdrv_host_floppy = {
1524 .format_name = "host_floppy",
84a12e66 1525 .protocol_name = "host_floppy",
f3a5d3f8 1526 .instance_size = sizeof(BDRVRawState),
508c7cb3 1527 .bdrv_probe_device = floppy_probe_device,
66f82cee 1528 .bdrv_file_open = floppy_open,
f3a5d3f8 1529 .bdrv_close = raw_close,
1bc6b705
JC
1530 .bdrv_reopen_prepare = raw_reopen_prepare,
1531 .bdrv_reopen_commit = raw_reopen_commit,
1532 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1533 .bdrv_create = hdev_create,
0b4ce02e 1534 .create_options = raw_create_options,
336c1c12 1535 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1536
f3a5d3f8
CH
1537 .bdrv_aio_readv = raw_aio_readv,
1538 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1539 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1540
55b949c8 1541 .bdrv_truncate = raw_truncate,
f3a5d3f8 1542 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1543 .bdrv_get_allocated_file_size
1544 = raw_get_allocated_file_size,
f3a5d3f8
CH
1545
1546 /* removable device support */
1547 .bdrv_is_inserted = floppy_is_inserted,
1548 .bdrv_media_changed = floppy_media_changed,
1549 .bdrv_eject = floppy_eject,
f3a5d3f8
CH
1550};
1551
1552static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1553{
1554 BDRVRawState *s = bs->opaque;
1555
f3a5d3f8
CH
1556 s->type = FTYPE_CD;
1557
19a3da7f
BS
1558 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
1559 return raw_open_common(bs, filename, flags, O_NONBLOCK);
f3a5d3f8
CH
1560}
1561
508c7cb3
CH
1562static int cdrom_probe_device(const char *filename)
1563{
3baf720e
CR
1564 int fd, ret;
1565 int prio = 0;
343f8568 1566 struct stat st;
3baf720e 1567
6165f4d8 1568 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
3baf720e
CR
1569 if (fd < 0) {
1570 goto out;
1571 }
343f8568
JS
1572 ret = fstat(fd, &st);
1573 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1574 goto outc;
1575 }
3baf720e
CR
1576
1577 /* Attempt to detect via a CDROM specific ioctl */
1578 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1579 if (ret >= 0)
1580 prio = 100;
1581
343f8568 1582outc:
2e1e79da 1583 qemu_close(fd);
3baf720e
CR
1584out:
1585 return prio;
508c7cb3
CH
1586}
1587
f3a5d3f8
CH
1588static int cdrom_is_inserted(BlockDriverState *bs)
1589{
1590 BDRVRawState *s = bs->opaque;
1591 int ret;
1592
1593 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1594 if (ret == CDS_DISC_OK)
1595 return 1;
1596 return 0;
1597}
1598
f36f3949 1599static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1600{
1601 BDRVRawState *s = bs->opaque;
1602
1603 if (eject_flag) {
1604 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1605 perror("CDROMEJECT");
1606 } else {
1607 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1608 perror("CDROMEJECT");
1609 }
f3a5d3f8
CH
1610}
1611
025e849a 1612static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
1613{
1614 BDRVRawState *s = bs->opaque;
1615
1616 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1617 /*
1618 * Note: an error can happen if the distribution automatically
1619 * mounts the CD-ROM
1620 */
1621 /* perror("CDROM_LOCKDOOR"); */
1622 }
f3a5d3f8
CH
1623}
1624
1625static BlockDriver bdrv_host_cdrom = {
1626 .format_name = "host_cdrom",
84a12e66 1627 .protocol_name = "host_cdrom",
f3a5d3f8 1628 .instance_size = sizeof(BDRVRawState),
508c7cb3 1629 .bdrv_probe_device = cdrom_probe_device,
66f82cee 1630 .bdrv_file_open = cdrom_open,
f3a5d3f8 1631 .bdrv_close = raw_close,
1bc6b705
JC
1632 .bdrv_reopen_prepare = raw_reopen_prepare,
1633 .bdrv_reopen_commit = raw_reopen_commit,
1634 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1635 .bdrv_create = hdev_create,
0b4ce02e 1636 .create_options = raw_create_options,
336c1c12 1637 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1638
f3a5d3f8
CH
1639 .bdrv_aio_readv = raw_aio_readv,
1640 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1641 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1642
55b949c8 1643 .bdrv_truncate = raw_truncate,
f3a5d3f8 1644 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1645 .bdrv_get_allocated_file_size
1646 = raw_get_allocated_file_size,
f3a5d3f8
CH
1647
1648 /* removable device support */
1649 .bdrv_is_inserted = cdrom_is_inserted,
1650 .bdrv_eject = cdrom_eject,
025e849a 1651 .bdrv_lock_medium = cdrom_lock_medium,
f3a5d3f8
CH
1652
1653 /* generic scsi device */
63ec93db 1654 .bdrv_ioctl = hdev_ioctl,
63ec93db 1655 .bdrv_aio_ioctl = hdev_aio_ioctl,
f3a5d3f8
CH
1656};
1657#endif /* __linux__ */
1658
a167ba50 1659#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
1660static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1661{
1662 BDRVRawState *s = bs->opaque;
1663 int ret;
1664
1665 s->type = FTYPE_CD;
1666
19a3da7f 1667 ret = raw_open_common(bs, filename, flags, 0);
f3a5d3f8
CH
1668 if (ret)
1669 return ret;
1670
9b2260cb 1671 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1672 ioctl(s->fd, CDIOCALLOW);
1673 return 0;
1674}
1675
508c7cb3
CH
1676static int cdrom_probe_device(const char *filename)
1677{
1678 if (strstart(filename, "/dev/cd", NULL) ||
1679 strstart(filename, "/dev/acd", NULL))
1680 return 100;
1681 return 0;
1682}
1683
f3a5d3f8
CH
1684static int cdrom_reopen(BlockDriverState *bs)
1685{
1686 BDRVRawState *s = bs->opaque;
1687 int fd;
1688
1689 /*
1690 * Force reread of possibly changed/newly loaded disc,
1691 * FreeBSD seems to not notice sometimes...
1692 */
1693 if (s->fd >= 0)
2e1e79da 1694 qemu_close(s->fd);
6165f4d8 1695 fd = qemu_open(bs->filename, s->open_flags, 0644);
f3a5d3f8
CH
1696 if (fd < 0) {
1697 s->fd = -1;
1698 return -EIO;
1699 }
1700 s->fd = fd;
1701
9b2260cb 1702 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1703 ioctl(s->fd, CDIOCALLOW);
1704 return 0;
1705}
1706
1707static int cdrom_is_inserted(BlockDriverState *bs)
1708{
1709 return raw_getlength(bs) > 0;
1710}
1711
f36f3949 1712static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1713{
1714 BDRVRawState *s = bs->opaque;
1715
1716 if (s->fd < 0)
822e1cd1 1717 return;
f3a5d3f8
CH
1718
1719 (void) ioctl(s->fd, CDIOCALLOW);
1720
1721 if (eject_flag) {
1722 if (ioctl(s->fd, CDIOCEJECT) < 0)
1723 perror("CDIOCEJECT");
1724 } else {
1725 if (ioctl(s->fd, CDIOCCLOSE) < 0)
1726 perror("CDIOCCLOSE");
1727 }
1728
822e1cd1 1729 cdrom_reopen(bs);
f3a5d3f8
CH
1730}
1731
025e849a 1732static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
1733{
1734 BDRVRawState *s = bs->opaque;
1735
1736 if (s->fd < 0)
7bf37fed 1737 return;
f3a5d3f8
CH
1738 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
1739 /*
1740 * Note: an error can happen if the distribution automatically
1741 * mounts the CD-ROM
1742 */
1743 /* perror("CDROM_LOCKDOOR"); */
1744 }
f3a5d3f8
CH
1745}
1746
1747static BlockDriver bdrv_host_cdrom = {
1748 .format_name = "host_cdrom",
84a12e66 1749 .protocol_name = "host_cdrom",
f3a5d3f8 1750 .instance_size = sizeof(BDRVRawState),
508c7cb3 1751 .bdrv_probe_device = cdrom_probe_device,
66f82cee 1752 .bdrv_file_open = cdrom_open,
f3a5d3f8 1753 .bdrv_close = raw_close,
1bc6b705
JC
1754 .bdrv_reopen_prepare = raw_reopen_prepare,
1755 .bdrv_reopen_commit = raw_reopen_commit,
1756 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1757 .bdrv_create = hdev_create,
0b4ce02e 1758 .create_options = raw_create_options,
336c1c12 1759 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1760
f3a5d3f8
CH
1761 .bdrv_aio_readv = raw_aio_readv,
1762 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1763 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1764
55b949c8 1765 .bdrv_truncate = raw_truncate,
f3a5d3f8 1766 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1767 .bdrv_get_allocated_file_size
1768 = raw_get_allocated_file_size,
f3a5d3f8 1769
19cb3738 1770 /* removable device support */
f3a5d3f8
CH
1771 .bdrv_is_inserted = cdrom_is_inserted,
1772 .bdrv_eject = cdrom_eject,
025e849a 1773 .bdrv_lock_medium = cdrom_lock_medium,
19cb3738 1774};
f3a5d3f8 1775#endif /* __FreeBSD__ */
5efa9d5a 1776
4065742a
SH
1777#ifdef CONFIG_LINUX_AIO
1778/**
1779 * Return the file descriptor for Linux AIO
1780 *
1781 * This function is a layering violation and should be removed when it becomes
1782 * possible to call the block layer outside the global mutex. It allows the
1783 * caller to hijack the file descriptor so I/O can be performed outside the
1784 * block layer.
1785 */
1786int raw_get_aio_fd(BlockDriverState *bs)
1787{
1788 BDRVRawState *s;
1789
1790 if (!bs->drv) {
1791 return -ENOMEDIUM;
1792 }
1793
1794 if (bs->drv == bdrv_find_format("raw")) {
1795 bs = bs->file;
1796 }
1797
1798 /* raw-posix has several protocols so just check for raw_aio_readv */
1799 if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
1800 return -ENOTSUP;
1801 }
1802
1803 s = bs->opaque;
1804 if (!s->use_aio) {
1805 return -ENOTSUP;
1806 }
1807 return s->fd;
1808}
1809#endif /* CONFIG_LINUX_AIO */
1810
84a12e66 1811static void bdrv_file_init(void)
5efa9d5a 1812{
508c7cb3
CH
1813 /*
1814 * Register all the drivers. Note that order is important, the driver
1815 * registered last will get probed first.
1816 */
84a12e66 1817 bdrv_register(&bdrv_file);
5efa9d5a 1818 bdrv_register(&bdrv_host_device);
f3a5d3f8
CH
1819#ifdef __linux__
1820 bdrv_register(&bdrv_host_floppy);
1821 bdrv_register(&bdrv_host_cdrom);
1822#endif
a167ba50 1823#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
1824 bdrv_register(&bdrv_host_cdrom);
1825#endif
5efa9d5a
AL
1826}
1827
84a12e66 1828block_init(bdrv_file_init);