]> git.proxmox.com Git - mirror_qemu.git/blame - block/raw-posix.c
raw-posix: implement write_zeroes with MAY_UNMAP for block devices
[mirror_qemu.git] / block / raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
1de7afc9
PB
25#include "qemu/timer.h"
26#include "qemu/log.h"
737e150e 27#include "block/block_int.h"
1de7afc9 28#include "qemu/module.h"
de81a169 29#include "trace.h"
737e150e 30#include "block/thread-pool.h"
1de7afc9 31#include "qemu/iov.h"
9f8540ec 32#include "raw-aio.h"
83f64091 33
83affaa6 34#if defined(__APPLE__) && (__MACH__)
83f64091
FB
35#include <paths.h>
36#include <sys/param.h>
37#include <IOKit/IOKitLib.h>
38#include <IOKit/IOBSD.h>
39#include <IOKit/storage/IOMediaBSDClient.h>
40#include <IOKit/storage/IOMedia.h>
41#include <IOKit/storage/IOCDMedia.h>
42//#include <IOKit/storage/IOCDTypes.h>
43#include <CoreFoundation/CoreFoundation.h>
44#endif
45
46#ifdef __sun__
2e9671da 47#define _POSIX_PTHREAD_SEMANTICS 1
83f64091
FB
48#include <sys/dkio.h>
49#endif
19cb3738 50#ifdef __linux__
343f8568
JS
51#include <sys/types.h>
52#include <sys/stat.h>
19cb3738 53#include <sys/ioctl.h>
05acda4d 54#include <sys/param.h>
19cb3738
FB
55#include <linux/cdrom.h>
56#include <linux/fd.h>
5500316d
PB
57#include <linux/fs.h>
58#endif
59#ifdef CONFIG_FIEMAP
60#include <linux/fiemap.h>
19cb3738 61#endif
3d4fa43e
KK
62#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
63#include <linux/falloc.h>
64#endif
a167ba50 65#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1cb6c3fd 66#include <sys/disk.h>
9f23011a 67#include <sys/cdio.h>
1cb6c3fd 68#endif
83f64091 69
128ab2ff
BS
70#ifdef __OpenBSD__
71#include <sys/ioctl.h>
72#include <sys/disklabel.h>
73#include <sys/dkio.h>
74#endif
75
d1f6fd8d
CE
76#ifdef __NetBSD__
77#include <sys/ioctl.h>
78#include <sys/disklabel.h>
79#include <sys/dkio.h>
80#include <sys/disk.h>
81#endif
82
c5e97233
BS
83#ifdef __DragonFly__
84#include <sys/ioctl.h>
85#include <sys/diskslice.h>
86#endif
87
dce512de
CH
88#ifdef CONFIG_XFS
89#include <xfs/xfs.h>
90#endif
91
19cb3738 92//#define DEBUG_FLOPPY
83f64091 93
faf07963 94//#define DEBUG_BLOCK
03ff3ca3 95#if defined(DEBUG_BLOCK)
001faf32
BS
96#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
97 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
8c05dbf9 98#else
001faf32 99#define DEBUG_BLOCK_PRINT(formatCstr, ...)
8c05dbf9
TS
100#endif
101
f6465578
AL
102/* OS X does not have O_DSYNC */
103#ifndef O_DSYNC
1c27a8b3 104#ifdef O_SYNC
7ab064d2 105#define O_DSYNC O_SYNC
1c27a8b3
JA
106#elif defined(O_FSYNC)
107#define O_DSYNC O_FSYNC
108#endif
f6465578
AL
109#endif
110
9f7965c7
AL
111/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
112#ifndef O_DIRECT
113#define O_DIRECT O_DSYNC
114#endif
115
19cb3738
FB
116#define FTYPE_FILE 0
117#define FTYPE_CD 1
118#define FTYPE_FD 2
83f64091 119
c57c846a 120/* if the FD is not accessed during that time (in ns), we try to
19cb3738 121 reopen it to see if the disk has been changed */
c57c846a 122#define FD_OPEN_TIMEOUT (1000000000)
83f64091 123
581b9e29
CH
124#define MAX_BLOCKSIZE 4096
125
19cb3738
FB
126typedef struct BDRVRawState {
127 int fd;
128 int type;
0e1d8f4c 129 int open_flags;
19cb3738
FB
130#if defined(__linux__)
131 /* linux floppy specific */
19cb3738
FB
132 int64_t fd_open_time;
133 int64_t fd_error_time;
134 int fd_got_error;
135 int fd_media_changed;
83f64091 136#endif
e44bd6fc 137#ifdef CONFIG_LINUX_AIO
5c6c3a6c 138 int use_aio;
1e5b9d2f 139 void *aio_ctx;
e44bd6fc 140#endif
dce512de 141#ifdef CONFIG_XFS
260a82e5 142 bool is_xfs:1;
dce512de 143#endif
260a82e5
PB
144 bool has_discard:1;
145 bool discard_zeroes:1;
19cb3738
FB
146} BDRVRawState;
147
eeb6b45d
JC
148typedef struct BDRVRawReopenState {
149 int fd;
150 int open_flags;
151#ifdef CONFIG_LINUX_AIO
152 int use_aio;
153#endif
154} BDRVRawReopenState;
155
19cb3738 156static int fd_open(BlockDriverState *bs);
22afa7b5 157static int64_t raw_getlength(BlockDriverState *bs);
83f64091 158
de81a169
PB
159typedef struct RawPosixAIOData {
160 BlockDriverState *bs;
161 int aio_fildes;
162 union {
163 struct iovec *aio_iov;
164 void *aio_ioctl_buf;
165 };
166 int aio_niov;
8238010b 167 uint64_t aio_nbytes;
de81a169
PB
168#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
169 off_t aio_offset;
170 int aio_type;
171} RawPosixAIOData;
172
a167ba50 173#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8 174static int cdrom_reopen(BlockDriverState *bs);
9f23011a
BS
175#endif
176
1de1ae0a
CE
177#if defined(__NetBSD__)
178static int raw_normalize_devicepath(const char **filename)
179{
180 static char namebuf[PATH_MAX];
181 const char *dp, *fname;
182 struct stat sb;
183
184 fname = *filename;
185 dp = strrchr(fname, '/');
186 if (lstat(fname, &sb) < 0) {
187 fprintf(stderr, "%s: stat failed: %s\n",
188 fname, strerror(errno));
189 return -errno;
190 }
191
192 if (!S_ISBLK(sb.st_mode)) {
193 return 0;
194 }
195
196 if (dp == NULL) {
197 snprintf(namebuf, PATH_MAX, "r%s", fname);
198 } else {
199 snprintf(namebuf, PATH_MAX, "%.*s/r%s",
200 (int)(dp - fname), fname, dp + 1);
201 }
202 fprintf(stderr, "%s is a block device", fname);
203 *filename = namebuf;
204 fprintf(stderr, ", using %s\n", *filename);
205
206 return 0;
207}
208#else
209static int raw_normalize_devicepath(const char **filename)
210{
211 return 0;
212}
213#endif
214
6a8dc042
JC
215static void raw_parse_flags(int bdrv_flags, int *open_flags)
216{
217 assert(open_flags != NULL);
218
219 *open_flags |= O_BINARY;
220 *open_flags &= ~O_ACCMODE;
221 if (bdrv_flags & BDRV_O_RDWR) {
222 *open_flags |= O_RDWR;
223 } else {
224 *open_flags |= O_RDONLY;
225 }
226
227 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
228 * and O_DIRECT for no caching. */
229 if ((bdrv_flags & BDRV_O_NOCACHE)) {
230 *open_flags |= O_DIRECT;
231 }
6a8dc042
JC
232}
233
fc32a72d
JC
234#ifdef CONFIG_LINUX_AIO
235static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
236{
237 int ret = -1;
238 assert(aio_ctx != NULL);
239 assert(use_aio != NULL);
240 /*
241 * Currently Linux do AIO only for files opened with O_DIRECT
242 * specified so check NOCACHE flag too
243 */
244 if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
245 (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
246
247 /* if non-NULL, laio_init() has already been run */
248 if (*aio_ctx == NULL) {
249 *aio_ctx = laio_init();
250 if (!*aio_ctx) {
251 goto error;
252 }
253 }
254 *use_aio = 1;
255 } else {
256 *use_aio = 0;
257 }
258
259 ret = 0;
260
261error:
262 return ret;
263}
264#endif
265
c66a6157
KW
266static QemuOptsList raw_runtime_opts = {
267 .name = "raw",
268 .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
269 .desc = {
270 {
271 .name = "filename",
272 .type = QEMU_OPT_STRING,
273 .help = "File name of the image",
274 },
275 { /* end of list */ }
276 },
277};
278
279static int raw_open_common(BlockDriverState *bs, QDict *options,
e428e439 280 int bdrv_flags, int open_flags, Error **errp)
83f64091
FB
281{
282 BDRVRawState *s = bs->opaque;
c66a6157
KW
283 QemuOpts *opts;
284 Error *local_err = NULL;
285 const char *filename;
0e1d8f4c 286 int fd, ret;
260a82e5 287 struct stat st;
83f64091 288
c66a6157
KW
289 opts = qemu_opts_create_nofail(&raw_runtime_opts);
290 qemu_opts_absorb_qdict(opts, options, &local_err);
291 if (error_is_set(&local_err)) {
e428e439 292 error_propagate(errp, local_err);
c66a6157
KW
293 ret = -EINVAL;
294 goto fail;
295 }
296
297 filename = qemu_opt_get(opts, "filename");
298
1de1ae0a
CE
299 ret = raw_normalize_devicepath(&filename);
300 if (ret != 0) {
e428e439 301 error_setg_errno(errp, -ret, "Could not normalize device path");
c66a6157 302 goto fail;
1de1ae0a
CE
303 }
304
6a8dc042
JC
305 s->open_flags = open_flags;
306 raw_parse_flags(bdrv_flags, &s->open_flags);
83f64091 307
90babde0 308 s->fd = -1;
40ff6d7e 309 fd = qemu_open(filename, s->open_flags, 0644);
19cb3738
FB
310 if (fd < 0) {
311 ret = -errno;
c66a6157 312 if (ret == -EROFS) {
19cb3738 313 ret = -EACCES;
c66a6157
KW
314 }
315 goto fail;
19cb3738 316 }
83f64091 317 s->fd = fd;
9ef91a67 318
5c6c3a6c 319#ifdef CONFIG_LINUX_AIO
fc32a72d 320 if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
47e6b251 321 qemu_close(fd);
c66a6157 322 ret = -errno;
e428e439 323 error_setg_errno(errp, -ret, "Could not set AIO state");
c66a6157 324 goto fail;
9ef91a67 325 }
fc32a72d 326#endif
9ef91a67 327
7ce21016 328 s->has_discard = true;
260a82e5
PB
329
330 if (fstat(s->fd, &st) < 0) {
331 error_setg_errno(errp, errno, "Could not stat file");
332 goto fail;
333 }
334 if (S_ISREG(st.st_mode)) {
335 s->discard_zeroes = true;
336 }
d0b4503e
PB
337 if (S_ISBLK(st.st_mode)) {
338#ifdef BLKDISCARDZEROES
339 unsigned int arg;
340 if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
341 s->discard_zeroes = true;
342 }
343#endif
344#ifdef __linux__
345 /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do
346 * not rely on the contents of discarded blocks unless using O_DIRECT.
347 */
348 if (!(bs->open_flags & BDRV_O_NOCACHE)) {
349 s->discard_zeroes = false;
350 }
351#endif
352 }
260a82e5 353
dce512de
CH
354#ifdef CONFIG_XFS
355 if (platform_test_xfs_fd(s->fd)) {
7ce21016 356 s->is_xfs = true;
dce512de
CH
357 }
358#endif
359
c66a6157
KW
360 ret = 0;
361fail:
362 qemu_opts_del(opts);
363 return ret;
83f64091
FB
364}
365
015a1036
HR
366static int raw_open(BlockDriverState *bs, QDict *options, int flags,
367 Error **errp)
90babde0
CH
368{
369 BDRVRawState *s = bs->opaque;
e428e439
HR
370 Error *local_err = NULL;
371 int ret;
90babde0
CH
372
373 s->type = FTYPE_FILE;
e428e439
HR
374 ret = raw_open_common(bs, options, flags, 0, &local_err);
375 if (error_is_set(&local_err)) {
376 error_propagate(errp, local_err);
377 }
378 return ret;
90babde0
CH
379}
380
eeb6b45d
JC
381static int raw_reopen_prepare(BDRVReopenState *state,
382 BlockReopenQueue *queue, Error **errp)
383{
384 BDRVRawState *s;
385 BDRVRawReopenState *raw_s;
386 int ret = 0;
387
388 assert(state != NULL);
389 assert(state->bs != NULL);
390
391 s = state->bs->opaque;
392
393 state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
394 raw_s = state->opaque;
395
396#ifdef CONFIG_LINUX_AIO
397 raw_s->use_aio = s->use_aio;
398
399 /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
400 * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
401 * won't override aio_ctx if aio_ctx is non-NULL */
402 if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
e428e439 403 error_setg(errp, "Could not set AIO state");
eeb6b45d
JC
404 return -1;
405 }
406#endif
407
1bc6b705
JC
408 if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
409 raw_s->open_flags |= O_NONBLOCK;
410 }
411
eeb6b45d
JC
412 raw_parse_flags(state->flags, &raw_s->open_flags);
413
414 raw_s->fd = -1;
415
fdf263f6 416 int fcntl_flags = O_APPEND | O_NONBLOCK;
eeb6b45d
JC
417#ifdef O_NOATIME
418 fcntl_flags |= O_NOATIME;
419#endif
420
fdf263f6
AF
421#ifdef O_ASYNC
422 /* Not all operating systems have O_ASYNC, and those that don't
423 * will not let us track the state into raw_s->open_flags (typically
424 * you achieve the same effect with an ioctl, for example I_SETSIG
425 * on Solaris). But we do not use O_ASYNC, so that's fine.
426 */
427 assert((s->open_flags & O_ASYNC) == 0);
428#endif
429
eeb6b45d
JC
430 if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
431 /* dup the original fd */
432 /* TODO: use qemu fcntl wrapper */
433#ifdef F_DUPFD_CLOEXEC
434 raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
435#else
436 raw_s->fd = dup(s->fd);
437 if (raw_s->fd != -1) {
438 qemu_set_cloexec(raw_s->fd);
439 }
440#endif
441 if (raw_s->fd >= 0) {
442 ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
443 if (ret) {
444 qemu_close(raw_s->fd);
445 raw_s->fd = -1;
446 }
447 }
448 }
449
450 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
451 if (raw_s->fd == -1) {
452 assert(!(raw_s->open_flags & O_CREAT));
453 raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
454 if (raw_s->fd == -1) {
e428e439 455 error_setg_errno(errp, errno, "Could not reopen file");
eeb6b45d
JC
456 ret = -1;
457 }
458 }
459 return ret;
460}
461
462
463static void raw_reopen_commit(BDRVReopenState *state)
464{
465 BDRVRawReopenState *raw_s = state->opaque;
466 BDRVRawState *s = state->bs->opaque;
467
468 s->open_flags = raw_s->open_flags;
469
470 qemu_close(s->fd);
471 s->fd = raw_s->fd;
472#ifdef CONFIG_LINUX_AIO
473 s->use_aio = raw_s->use_aio;
474#endif
475
476 g_free(state->opaque);
477 state->opaque = NULL;
478}
479
480
481static void raw_reopen_abort(BDRVReopenState *state)
482{
483 BDRVRawReopenState *raw_s = state->opaque;
484
485 /* nothing to do if NULL, we didn't get far enough */
486 if (raw_s == NULL) {
487 return;
488 }
489
490 if (raw_s->fd >= 0) {
491 qemu_close(raw_s->fd);
492 raw_s->fd = -1;
493 }
494 g_free(state->opaque);
495 state->opaque = NULL;
496}
497
498
83f64091
FB
499/* XXX: use host sector size if necessary with:
500#ifdef DIOCGSECTORSIZE
501 {
502 unsigned int sectorsize = 512;
503 if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
504 sectorsize > bufsize)
505 bufsize = sectorsize;
506 }
507#endif
508#ifdef CONFIG_COCOA
2ee9fb48 509 uint32_t blockSize = 512;
83f64091
FB
510 if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
511 bufsize = blockSize;
512 }
513#endif
514*/
515
de81a169
PB
516static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
517{
518 int ret;
519
520 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
521 if (ret == -1) {
522 return -errno;
523 }
524
b608c8dc 525 return 0;
de81a169
PB
526}
527
528static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
529{
530 int ret;
531
532 ret = qemu_fdatasync(aiocb->aio_fildes);
533 if (ret == -1) {
534 return -errno;
535 }
536 return 0;
537}
538
539#ifdef CONFIG_PREADV
540
541static bool preadv_present = true;
542
543static ssize_t
544qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
545{
546 return preadv(fd, iov, nr_iov, offset);
547}
548
549static ssize_t
550qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
551{
552 return pwritev(fd, iov, nr_iov, offset);
553}
554
555#else
556
557static bool preadv_present = false;
558
559static ssize_t
560qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
561{
562 return -ENOSYS;
563}
564
565static ssize_t
566qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
567{
568 return -ENOSYS;
569}
570
571#endif
572
573static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
574{
575 ssize_t len;
576
577 do {
578 if (aiocb->aio_type & QEMU_AIO_WRITE)
579 len = qemu_pwritev(aiocb->aio_fildes,
580 aiocb->aio_iov,
581 aiocb->aio_niov,
582 aiocb->aio_offset);
583 else
584 len = qemu_preadv(aiocb->aio_fildes,
585 aiocb->aio_iov,
586 aiocb->aio_niov,
587 aiocb->aio_offset);
588 } while (len == -1 && errno == EINTR);
589
590 if (len == -1) {
591 return -errno;
592 }
593 return len;
594}
595
596/*
597 * Read/writes the data to/from a given linear buffer.
598 *
599 * Returns the number of bytes handles or -errno in case of an error. Short
600 * reads are only returned if the end of the file is reached.
601 */
602static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
603{
604 ssize_t offset = 0;
605 ssize_t len;
606
607 while (offset < aiocb->aio_nbytes) {
608 if (aiocb->aio_type & QEMU_AIO_WRITE) {
609 len = pwrite(aiocb->aio_fildes,
610 (const char *)buf + offset,
611 aiocb->aio_nbytes - offset,
612 aiocb->aio_offset + offset);
613 } else {
614 len = pread(aiocb->aio_fildes,
615 buf + offset,
616 aiocb->aio_nbytes - offset,
617 aiocb->aio_offset + offset);
618 }
619 if (len == -1 && errno == EINTR) {
620 continue;
621 } else if (len == -1) {
622 offset = -errno;
623 break;
624 } else if (len == 0) {
625 break;
626 }
627 offset += len;
628 }
629
630 return offset;
631}
632
633static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
634{
635 ssize_t nbytes;
636 char *buf;
637
638 if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
639 /*
640 * If there is just a single buffer, and it is properly aligned
641 * we can just use plain pread/pwrite without any problems.
642 */
643 if (aiocb->aio_niov == 1) {
644 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
645 }
646 /*
647 * We have more than one iovec, and all are properly aligned.
648 *
649 * Try preadv/pwritev first and fall back to linearizing the
650 * buffer if it's not supported.
651 */
652 if (preadv_present) {
653 nbytes = handle_aiocb_rw_vector(aiocb);
654 if (nbytes == aiocb->aio_nbytes ||
655 (nbytes < 0 && nbytes != -ENOSYS)) {
656 return nbytes;
657 }
658 preadv_present = false;
659 }
660
661 /*
662 * XXX(hch): short read/write. no easy way to handle the reminder
663 * using these interfaces. For now retry using plain
664 * pread/pwrite?
665 */
666 }
667
668 /*
669 * Ok, we have to do it the hard way, copy all segments into
670 * a single aligned buffer.
671 */
672 buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
673 if (aiocb->aio_type & QEMU_AIO_WRITE) {
674 char *p = buf;
675 int i;
676
677 for (i = 0; i < aiocb->aio_niov; ++i) {
678 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
679 p += aiocb->aio_iov[i].iov_len;
680 }
681 }
682
683 nbytes = handle_aiocb_rw_linear(aiocb, buf);
684 if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
685 char *p = buf;
686 size_t count = aiocb->aio_nbytes, copy;
687 int i;
688
689 for (i = 0; i < aiocb->aio_niov && count; ++i) {
690 copy = count;
691 if (copy > aiocb->aio_iov[i].iov_len) {
692 copy = aiocb->aio_iov[i].iov_len;
693 }
694 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
695 p += copy;
696 count -= copy;
697 }
698 }
699 qemu_vfree(buf);
700
701 return nbytes;
702}
703
8238010b
PB
704#ifdef CONFIG_XFS
705static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
706{
707 struct xfs_flock64 fl;
708
709 memset(&fl, 0, sizeof(fl));
710 fl.l_whence = SEEK_SET;
711 fl.l_start = offset;
712 fl.l_len = bytes;
713
714 if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
715 DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
716 return -errno;
717 }
718
719 return 0;
720}
721#endif
722
723static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
724{
725 int ret = -EOPNOTSUPP;
726 BDRVRawState *s = aiocb->bs->opaque;
727
7ce21016
PB
728 if (!s->has_discard) {
729 return -ENOTSUP;
8238010b
PB
730 }
731
732 if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
733#ifdef BLKDISCARD
734 do {
735 uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
736 if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
737 return 0;
738 }
739 } while (errno == EINTR);
740
741 ret = -errno;
742#endif
743 } else {
744#ifdef CONFIG_XFS
745 if (s->is_xfs) {
746 return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
747 }
748#endif
749
750#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
751 do {
752 if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
753 aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
754 return 0;
755 }
756 } while (errno == EINTR);
757
758 ret = -errno;
759#endif
760 }
761
762 if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
763 ret == -ENOTTY) {
7ce21016
PB
764 s->has_discard = false;
765 ret = -ENOTSUP;
8238010b
PB
766 }
767 return ret;
768}
769
de81a169
PB
770static int aio_worker(void *arg)
771{
772 RawPosixAIOData *aiocb = arg;
773 ssize_t ret = 0;
774
775 switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
776 case QEMU_AIO_READ:
777 ret = handle_aiocb_rw(aiocb);
778 if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
779 iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
780 0, aiocb->aio_nbytes - ret);
781
782 ret = aiocb->aio_nbytes;
783 }
784 if (ret == aiocb->aio_nbytes) {
785 ret = 0;
786 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
787 ret = -EINVAL;
788 }
789 break;
790 case QEMU_AIO_WRITE:
791 ret = handle_aiocb_rw(aiocb);
792 if (ret == aiocb->aio_nbytes) {
793 ret = 0;
794 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
795 ret = -EINVAL;
796 }
797 break;
798 case QEMU_AIO_FLUSH:
799 ret = handle_aiocb_flush(aiocb);
800 break;
801 case QEMU_AIO_IOCTL:
802 ret = handle_aiocb_ioctl(aiocb);
803 break;
8238010b
PB
804 case QEMU_AIO_DISCARD:
805 ret = handle_aiocb_discard(aiocb);
806 break;
de81a169
PB
807 default:
808 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
809 ret = -EINVAL;
810 break;
811 }
812
813 g_slice_free(RawPosixAIOData, aiocb);
814 return ret;
815}
816
260a82e5
PB
817static int paio_submit_co(BlockDriverState *bs, int fd,
818 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
819 int type)
820{
821 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
822 ThreadPool *pool;
823
824 acb->bs = bs;
825 acb->aio_type = type;
826 acb->aio_fildes = fd;
827
828 if (qiov) {
829 acb->aio_iov = qiov->iov;
830 acb->aio_niov = qiov->niov;
831 }
832 acb->aio_nbytes = nb_sectors * 512;
833 acb->aio_offset = sector_num * 512;
834
835 trace_paio_submit_co(sector_num, nb_sectors, type);
836 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
837 return thread_pool_submit_co(pool, aio_worker, acb);
838}
839
de81a169
PB
840static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
841 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
842 BlockDriverCompletionFunc *cb, void *opaque, int type)
843{
844 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
c4d9d196 845 ThreadPool *pool;
de81a169
PB
846
847 acb->bs = bs;
848 acb->aio_type = type;
849 acb->aio_fildes = fd;
850
851 if (qiov) {
852 acb->aio_iov = qiov->iov;
853 acb->aio_niov = qiov->niov;
854 }
855 acb->aio_nbytes = nb_sectors * 512;
856 acb->aio_offset = sector_num * 512;
857
858 trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
c4d9d196
SH
859 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
860 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
de81a169
PB
861}
862
9ef91a67
CH
863static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
864 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
865 BlockDriverCompletionFunc *cb, void *opaque, int type)
83f64091 866{
ce1a14dc 867 BDRVRawState *s = bs->opaque;
ce1a14dc 868
19cb3738
FB
869 if (fd_open(bs) < 0)
870 return NULL;
871
f141eafe
AL
872 /*
873 * If O_DIRECT is used the buffer needs to be aligned on a sector
c1ee7d56 874 * boundary. Check if this is the case or tell the low-level
9ef91a67 875 * driver that it needs to copy the buffer.
f141eafe 876 */
9acc5a06 877 if ((bs->open_flags & BDRV_O_NOCACHE)) {
c53b1c51 878 if (!bdrv_qiov_is_aligned(bs, qiov)) {
5c6c3a6c 879 type |= QEMU_AIO_MISALIGNED;
e44bd6fc 880#ifdef CONFIG_LINUX_AIO
5c6c3a6c
CH
881 } else if (s->use_aio) {
882 return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
e44bd6fc
SW
883 nb_sectors, cb, opaque, type);
884#endif
5c6c3a6c 885 }
9ef91a67 886 }
f141eafe 887
1e5b9d2f 888 return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
9ef91a67 889 cb, opaque, type);
83f64091
FB
890}
891
f141eafe
AL
892static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
893 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 894 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 895{
9ef91a67
CH
896 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
897 cb, opaque, QEMU_AIO_READ);
83f64091
FB
898}
899
f141eafe
AL
900static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
901 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 902 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 903{
9ef91a67
CH
904 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
905 cb, opaque, QEMU_AIO_WRITE);
83f64091 906}
53538725 907
b2e12bc6
CH
908static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
909 BlockDriverCompletionFunc *cb, void *opaque)
910{
911 BDRVRawState *s = bs->opaque;
912
913 if (fd_open(bs) < 0)
914 return NULL;
915
1e5b9d2f 916 return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
b2e12bc6
CH
917}
918
83f64091
FB
919static void raw_close(BlockDriverState *bs)
920{
921 BDRVRawState *s = bs->opaque;
19cb3738 922 if (s->fd >= 0) {
2e1e79da 923 qemu_close(s->fd);
19cb3738
FB
924 s->fd = -1;
925 }
83f64091
FB
926}
927
928static int raw_truncate(BlockDriverState *bs, int64_t offset)
929{
930 BDRVRawState *s = bs->opaque;
55b949c8
CH
931 struct stat st;
932
933 if (fstat(s->fd, &st)) {
83f64091 934 return -errno;
55b949c8
CH
935 }
936
937 if (S_ISREG(st.st_mode)) {
938 if (ftruncate(s->fd, offset) < 0) {
939 return -errno;
940 }
941 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
942 if (offset > raw_getlength(bs)) {
943 return -EINVAL;
944 }
945 } else {
946 return -ENOTSUP;
947 }
948
83f64091
FB
949 return 0;
950}
951
128ab2ff
BS
952#ifdef __OpenBSD__
953static int64_t raw_getlength(BlockDriverState *bs)
954{
955 BDRVRawState *s = bs->opaque;
956 int fd = s->fd;
957 struct stat st;
958
959 if (fstat(fd, &st))
960 return -1;
961 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
962 struct disklabel dl;
963
964 if (ioctl(fd, DIOCGDINFO, &dl))
965 return -1;
966 return (uint64_t)dl.d_secsize *
967 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
968 } else
969 return st.st_size;
970}
d1f6fd8d
CE
971#elif defined(__NetBSD__)
972static int64_t raw_getlength(BlockDriverState *bs)
973{
974 BDRVRawState *s = bs->opaque;
975 int fd = s->fd;
976 struct stat st;
977
978 if (fstat(fd, &st))
979 return -1;
980 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
981 struct dkwedge_info dkw;
982
983 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
984 return dkw.dkw_size * 512;
985 } else {
986 struct disklabel dl;
987
988 if (ioctl(fd, DIOCGDINFO, &dl))
989 return -1;
990 return (uint64_t)dl.d_secsize *
991 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
992 }
993 } else
994 return st.st_size;
995}
50779cc2
CH
996#elif defined(__sun__)
997static int64_t raw_getlength(BlockDriverState *bs)
998{
999 BDRVRawState *s = bs->opaque;
1000 struct dk_minfo minfo;
1001 int ret;
1002
1003 ret = fd_open(bs);
1004 if (ret < 0) {
1005 return ret;
1006 }
1007
1008 /*
1009 * Use the DKIOCGMEDIAINFO ioctl to read the size.
1010 */
1011 ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
1012 if (ret != -1) {
1013 return minfo.dki_lbsize * minfo.dki_capacity;
1014 }
1015
1016 /*
1017 * There are reports that lseek on some devices fails, but
1018 * irc discussion said that contingency on contingency was overkill.
1019 */
1020 return lseek(s->fd, 0, SEEK_END);
1021}
1022#elif defined(CONFIG_BSD)
1023static int64_t raw_getlength(BlockDriverState *bs)
83f64091
FB
1024{
1025 BDRVRawState *s = bs->opaque;
1026 int fd = s->fd;
1027 int64_t size;
83f64091 1028 struct stat sb;
a167ba50 1029#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a 1030 int reopened = 0;
83f64091 1031#endif
19cb3738
FB
1032 int ret;
1033
1034 ret = fd_open(bs);
1035 if (ret < 0)
1036 return ret;
83f64091 1037
a167ba50 1038#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1039again:
1040#endif
83f64091
FB
1041 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
1042#ifdef DIOCGMEDIASIZE
1043 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
c5e97233
BS
1044#elif defined(DIOCGPART)
1045 {
1046 struct partinfo pi;
1047 if (ioctl(fd, DIOCGPART, &pi) == 0)
1048 size = pi.media_size;
1049 else
1050 size = 0;
1051 }
1052 if (size == 0)
83f64091 1053#endif
83affaa6 1054#if defined(__APPLE__) && defined(__MACH__)
83f64091
FB
1055 size = LONG_LONG_MAX;
1056#else
1057 size = lseek(fd, 0LL, SEEK_END);
9f23011a 1058#endif
a167ba50 1059#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1060 switch(s->type) {
1061 case FTYPE_CD:
1062 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
1063 if (size == 2048LL * (unsigned)-1)
1064 size = 0;
1065 /* XXX no disc? maybe we need to reopen... */
f3a5d3f8 1066 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
9f23011a
BS
1067 reopened = 1;
1068 goto again;
1069 }
1070 }
83f64091 1071#endif
50779cc2 1072 } else {
83f64091
FB
1073 size = lseek(fd, 0, SEEK_END);
1074 }
83f64091
FB
1075 return size;
1076}
50779cc2
CH
1077#else
1078static int64_t raw_getlength(BlockDriverState *bs)
1079{
1080 BDRVRawState *s = bs->opaque;
1081 int ret;
1082
1083 ret = fd_open(bs);
1084 if (ret < 0) {
1085 return ret;
1086 }
1087
1088 return lseek(s->fd, 0, SEEK_END);
1089}
128ab2ff 1090#endif
83f64091 1091
4a1d5e1f
FZ
1092static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
1093{
1094 struct stat st;
1095 BDRVRawState *s = bs->opaque;
1096
1097 if (fstat(s->fd, &st) < 0) {
1098 return -errno;
1099 }
1100 return (int64_t)st.st_blocks * 512;
1101}
1102
d5124c00
HR
1103static int raw_create(const char *filename, QEMUOptionParameter *options,
1104 Error **errp)
83f64091
FB
1105{
1106 int fd;
1e37d059 1107 int result = 0;
0e7e1989 1108 int64_t total_size = 0;
83f64091 1109
0e7e1989
KW
1110 /* Read out options */
1111 while (options && options->name) {
1112 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
9040385d 1113 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
1114 }
1115 options++;
1116 }
83f64091 1117
6165f4d8
CB
1118 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
1119 0644);
1e37d059
SW
1120 if (fd < 0) {
1121 result = -errno;
e428e439 1122 error_setg_errno(errp, -result, "Could not create file");
1e37d059 1123 } else {
9040385d 1124 if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
1e37d059 1125 result = -errno;
e428e439 1126 error_setg_errno(errp, -result, "Could not resize file");
1e37d059 1127 }
2e1e79da 1128 if (qemu_close(fd) != 0) {
1e37d059 1129 result = -errno;
e428e439 1130 error_setg_errno(errp, -result, "Could not close the new file");
1e37d059
SW
1131 }
1132 }
1133 return result;
83f64091
FB
1134}
1135
5500316d
PB
1136/*
1137 * Returns true iff the specified sector is present in the disk image. Drivers
1138 * not implementing the functionality are assumed to not support backing files,
1139 * hence all their sectors are reported as allocated.
1140 *
1141 * If 'sector_num' is beyond the end of the disk image the return value is 0
1142 * and 'pnum' is set to 0.
1143 *
1144 * 'pnum' is set to the number of sectors (including and immediately following
1145 * the specified sector) that are known to be in the same
1146 * allocated/unallocated state.
1147 *
1148 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
1149 * beyond the end of the disk image it will be clamped.
1150 */
b6b8a333 1151static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
5500316d
PB
1152 int64_t sector_num,
1153 int nb_sectors, int *pnum)
1154{
5500316d 1155 off_t start, data, hole;
63390a8d 1156 int64_t ret;
5500316d
PB
1157
1158 ret = fd_open(bs);
1159 if (ret < 0) {
1160 return ret;
1161 }
1162
1163 start = sector_num * BDRV_SECTOR_SIZE;
63390a8d 1164 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
94282e71 1165
5500316d 1166#ifdef CONFIG_FIEMAP
94282e71
KW
1167
1168 BDRVRawState *s = bs->opaque;
5500316d
PB
1169 struct {
1170 struct fiemap fm;
1171 struct fiemap_extent fe;
1172 } f;
94282e71 1173
5500316d
PB
1174 f.fm.fm_start = start;
1175 f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
1176 f.fm.fm_flags = 0;
1177 f.fm.fm_extent_count = 1;
1178 f.fm.fm_reserved = 0;
1179 if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
1180 /* Assume everything is allocated. */
1181 *pnum = nb_sectors;
63390a8d 1182 return ret;
5500316d
PB
1183 }
1184
1185 if (f.fm.fm_mapped_extents == 0) {
1186 /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
1187 * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
1188 */
1189 off_t length = lseek(s->fd, 0, SEEK_END);
1190 hole = f.fm.fm_start;
1191 data = MIN(f.fm.fm_start + f.fm.fm_length, length);
1192 } else {
1193 data = f.fe.fe_logical;
1194 hole = f.fe.fe_logical + f.fe.fe_length;
f5f7abcf
PB
1195 if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
1196 ret |= BDRV_BLOCK_ZERO;
1197 }
5500316d 1198 }
94282e71 1199
5500316d 1200#elif defined SEEK_HOLE && defined SEEK_DATA
94282e71
KW
1201
1202 BDRVRawState *s = bs->opaque;
1203
5500316d
PB
1204 hole = lseek(s->fd, start, SEEK_HOLE);
1205 if (hole == -1) {
1206 /* -ENXIO indicates that sector_num was past the end of the file.
1207 * There is a virtual hole there. */
1208 assert(errno != -ENXIO);
1209
1210 /* Most likely EINVAL. Assume everything is allocated. */
1211 *pnum = nb_sectors;
63390a8d 1212 return ret;
5500316d
PB
1213 }
1214
1215 if (hole > start) {
1216 data = start;
1217 } else {
1218 /* On a hole. We need another syscall to find its end. */
1219 data = lseek(s->fd, start, SEEK_DATA);
1220 if (data == -1) {
1221 data = lseek(s->fd, 0, SEEK_END);
1222 }
1223 }
1224#else
63390a8d
PB
1225 data = 0;
1226 hole = start + nb_sectors * BDRV_SECTOR_SIZE;
5500316d
PB
1227#endif
1228
1229 if (data <= start) {
1230 /* On a data extent, compute sectors to the end of the extent. */
1231 *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
5500316d
PB
1232 } else {
1233 /* On a hole, compute sectors to the beginning of the next extent. */
1234 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
63390a8d
PB
1235 ret &= ~BDRV_BLOCK_DATA;
1236 ret |= BDRV_BLOCK_ZERO;
5500316d 1237 }
63390a8d
PB
1238
1239 return ret;
5500316d
PB
1240}
1241
8238010b
PB
1242static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
1243 int64_t sector_num, int nb_sectors,
1244 BlockDriverCompletionFunc *cb, void *opaque)
dce512de 1245{
dce512de
CH
1246 BDRVRawState *s = bs->opaque;
1247
8238010b
PB
1248 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1249 cb, opaque, QEMU_AIO_DISCARD);
dce512de 1250}
0e7e1989 1251
260a82e5
PB
1252static int coroutine_fn raw_co_write_zeroes(
1253 BlockDriverState *bs, int64_t sector_num,
1254 int nb_sectors, BdrvRequestFlags flags)
1255{
1256 BDRVRawState *s = bs->opaque;
1257
1258 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
1259 return -ENOTSUP;
1260 }
1261 if (!s->discard_zeroes) {
1262 return -ENOTSUP;
1263 }
1264 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1265 QEMU_AIO_DISCARD);
1266}
1267
1268static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1269{
1270 BDRVRawState *s = bs->opaque;
1271
1272 bdi->unallocated_blocks_are_zero = s->discard_zeroes;
1273 bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
1274 return 0;
1275}
1276
0e7e1989 1277static QEMUOptionParameter raw_create_options[] = {
db08adf5
KW
1278 {
1279 .name = BLOCK_OPT_SIZE,
1280 .type = OPT_SIZE,
1281 .help = "Virtual disk size"
1282 },
0e7e1989
KW
1283 { NULL }
1284};
1285
84a12e66
CH
1286static BlockDriver bdrv_file = {
1287 .format_name = "file",
1288 .protocol_name = "file",
856ae5c3 1289 .instance_size = sizeof(BDRVRawState),
030be321 1290 .bdrv_needs_filename = true,
856ae5c3 1291 .bdrv_probe = NULL, /* no probe for protocols */
66f82cee 1292 .bdrv_file_open = raw_open,
eeb6b45d
JC
1293 .bdrv_reopen_prepare = raw_reopen_prepare,
1294 .bdrv_reopen_commit = raw_reopen_commit,
1295 .bdrv_reopen_abort = raw_reopen_abort,
856ae5c3
BS
1296 .bdrv_close = raw_close,
1297 .bdrv_create = raw_create,
3ac21627 1298 .bdrv_has_zero_init = bdrv_has_zero_init_1,
b6b8a333 1299 .bdrv_co_get_block_status = raw_co_get_block_status,
260a82e5 1300 .bdrv_co_write_zeroes = raw_co_write_zeroes,
3b46e624 1301
f141eafe
AL
1302 .bdrv_aio_readv = raw_aio_readv,
1303 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1304 .bdrv_aio_flush = raw_aio_flush,
8238010b 1305 .bdrv_aio_discard = raw_aio_discard,
3c529d93 1306
83f64091
FB
1307 .bdrv_truncate = raw_truncate,
1308 .bdrv_getlength = raw_getlength,
260a82e5 1309 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
1310 .bdrv_get_allocated_file_size
1311 = raw_get_allocated_file_size,
0e7e1989
KW
1312
1313 .create_options = raw_create_options,
83f64091
FB
1314};
1315
19cb3738
FB
1316/***********************************************/
1317/* host device */
1318
83affaa6 1319#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1320static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1321static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1322
1323kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1324{
5fafdf24 1325 kern_return_t kernResult;
19cb3738
FB
1326 mach_port_t masterPort;
1327 CFMutableDictionaryRef classesToMatch;
1328
1329 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1330 if ( KERN_SUCCESS != kernResult ) {
1331 printf( "IOMasterPort returned %d\n", kernResult );
1332 }
3b46e624 1333
5fafdf24 1334 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
1335 if ( classesToMatch == NULL ) {
1336 printf( "IOServiceMatching returned a NULL dictionary.\n" );
1337 } else {
1338 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1339 }
1340 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1341 if ( KERN_SUCCESS != kernResult )
1342 {
1343 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1344 }
3b46e624 1345
19cb3738
FB
1346 return kernResult;
1347}
1348
1349kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1350{
1351 io_object_t nextMedia;
1352 kern_return_t kernResult = KERN_FAILURE;
1353 *bsdPath = '\0';
1354 nextMedia = IOIteratorNext( mediaIterator );
1355 if ( nextMedia )
1356 {
1357 CFTypeRef bsdPathAsCFString;
1358 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1359 if ( bsdPathAsCFString ) {
1360 size_t devPathLength;
1361 strcpy( bsdPath, _PATH_DEV );
1362 strcat( bsdPath, "r" );
1363 devPathLength = strlen( bsdPath );
1364 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1365 kernResult = KERN_SUCCESS;
1366 }
1367 CFRelease( bsdPathAsCFString );
1368 }
1369 IOObjectRelease( nextMedia );
1370 }
3b46e624 1371
19cb3738
FB
1372 return kernResult;
1373}
1374
1375#endif
1376
508c7cb3
CH
1377static int hdev_probe_device(const char *filename)
1378{
1379 struct stat st;
1380
1381 /* allow a dedicated CD-ROM driver to match with a higher priority */
1382 if (strstart(filename, "/dev/cdrom", NULL))
1383 return 50;
1384
1385 if (stat(filename, &st) >= 0 &&
1386 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1387 return 100;
1388 }
1389
1390 return 0;
1391}
1392
da888d37
SH
1393static int check_hdev_writable(BDRVRawState *s)
1394{
1395#if defined(BLKROGET)
1396 /* Linux block devices can be configured "read-only" using blockdev(8).
1397 * This is independent of device node permissions and therefore open(2)
1398 * with O_RDWR succeeds. Actual writes fail with EPERM.
1399 *
1400 * bdrv_open() is supposed to fail if the disk is read-only. Explicitly
1401 * check for read-only block devices so that Linux block devices behave
1402 * properly.
1403 */
1404 struct stat st;
1405 int readonly = 0;
1406
1407 if (fstat(s->fd, &st)) {
1408 return -errno;
1409 }
1410
1411 if (!S_ISBLK(st.st_mode)) {
1412 return 0;
1413 }
1414
1415 if (ioctl(s->fd, BLKROGET, &readonly) < 0) {
1416 return -errno;
1417 }
1418
1419 if (readonly) {
1420 return -EACCES;
1421 }
1422#endif /* defined(BLKROGET) */
1423 return 0;
1424}
1425
015a1036
HR
1426static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
1427 Error **errp)
19cb3738
FB
1428{
1429 BDRVRawState *s = bs->opaque;
e428e439 1430 Error *local_err = NULL;
da888d37 1431 int ret;
c66a6157 1432 const char *filename = qdict_get_str(options, "filename");
a76bab49 1433
83affaa6 1434#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1435 if (strstart(filename, "/dev/cdrom", NULL)) {
1436 kern_return_t kernResult;
1437 io_iterator_t mediaIterator;
1438 char bsdPath[ MAXPATHLEN ];
1439 int fd;
5fafdf24 1440
19cb3738
FB
1441 kernResult = FindEjectableCDMedia( &mediaIterator );
1442 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 1443
19cb3738
FB
1444 if ( bsdPath[ 0 ] != '\0' ) {
1445 strcat(bsdPath,"s0");
1446 /* some CDs don't have a partition 0 */
6165f4d8 1447 fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
19cb3738
FB
1448 if (fd < 0) {
1449 bsdPath[strlen(bsdPath)-1] = '1';
1450 } else {
2e1e79da 1451 qemu_close(fd);
19cb3738
FB
1452 }
1453 filename = bsdPath;
a5c5ea3f 1454 qdict_put(options, "filename", qstring_from_str(filename));
19cb3738 1455 }
3b46e624 1456
19cb3738
FB
1457 if ( mediaIterator )
1458 IOObjectRelease( mediaIterator );
1459 }
1460#endif
19cb3738
FB
1461
1462 s->type = FTYPE_FILE;
4dd75c70 1463#if defined(__linux__)
05acda4d
BK
1464 {
1465 char resolved_path[ MAXPATHLEN ], *temp;
1466
1467 temp = realpath(filename, resolved_path);
1468 if (temp && strstart(temp, "/dev/sg", NULL)) {
1469 bs->sg = 1;
1470 }
19cb3738
FB
1471 }
1472#endif
90babde0 1473
e428e439 1474 ret = raw_open_common(bs, options, flags, 0, &local_err);
da888d37 1475 if (ret < 0) {
e428e439
HR
1476 if (error_is_set(&local_err)) {
1477 error_propagate(errp, local_err);
1478 }
da888d37
SH
1479 return ret;
1480 }
1481
1482 if (flags & BDRV_O_RDWR) {
1483 ret = check_hdev_writable(s);
1484 if (ret < 0) {
1485 raw_close(bs);
e428e439 1486 error_setg_errno(errp, -ret, "The device is not writable");
da888d37
SH
1487 return ret;
1488 }
1489 }
1490
1491 return ret;
19cb3738
FB
1492}
1493
03ff3ca3 1494#if defined(__linux__)
19cb3738
FB
1495/* Note: we do not have a reliable method to detect if the floppy is
1496 present. The current method is to try to open the floppy at every
1497 I/O and to keep it opened during a few hundreds of ms. */
1498static int fd_open(BlockDriverState *bs)
1499{
1500 BDRVRawState *s = bs->opaque;
1501 int last_media_present;
1502
1503 if (s->type != FTYPE_FD)
1504 return 0;
1505 last_media_present = (s->fd >= 0);
5fafdf24 1506 if (s->fd >= 0 &&
c57c846a 1507 (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
2e1e79da 1508 qemu_close(s->fd);
19cb3738
FB
1509 s->fd = -1;
1510#ifdef DEBUG_FLOPPY
1511 printf("Floppy closed\n");
1512#endif
1513 }
1514 if (s->fd < 0) {
5fafdf24 1515 if (s->fd_got_error &&
c57c846a 1516 (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
19cb3738
FB
1517#ifdef DEBUG_FLOPPY
1518 printf("No floppy (open delayed)\n");
1519#endif
1520 return -EIO;
1521 }
6165f4d8 1522 s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
19cb3738 1523 if (s->fd < 0) {
c57c846a 1524 s->fd_error_time = get_clock();
19cb3738
FB
1525 s->fd_got_error = 1;
1526 if (last_media_present)
1527 s->fd_media_changed = 1;
1528#ifdef DEBUG_FLOPPY
1529 printf("No floppy\n");
1530#endif
1531 return -EIO;
1532 }
1533#ifdef DEBUG_FLOPPY
1534 printf("Floppy opened\n");
1535#endif
1536 }
1537 if (!last_media_present)
1538 s->fd_media_changed = 1;
c57c846a 1539 s->fd_open_time = get_clock();
19cb3738
FB
1540 s->fd_got_error = 0;
1541 return 0;
1542}
19cb3738 1543
63ec93db 1544static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
985a03b0
TS
1545{
1546 BDRVRawState *s = bs->opaque;
1547
1548 return ioctl(s->fd, req, buf);
1549}
221f715d 1550
63ec93db 1551static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
221f715d
AL
1552 unsigned long int req, void *buf,
1553 BlockDriverCompletionFunc *cb, void *opaque)
1554{
f141eafe 1555 BDRVRawState *s = bs->opaque;
c208e8c2 1556 RawPosixAIOData *acb;
c4d9d196 1557 ThreadPool *pool;
221f715d 1558
f141eafe
AL
1559 if (fd_open(bs) < 0)
1560 return NULL;
c208e8c2
PB
1561
1562 acb = g_slice_new(RawPosixAIOData);
1563 acb->bs = bs;
1564 acb->aio_type = QEMU_AIO_IOCTL;
1565 acb->aio_fildes = s->fd;
1566 acb->aio_offset = 0;
1567 acb->aio_ioctl_buf = buf;
1568 acb->aio_ioctl_cmd = req;
c4d9d196
SH
1569 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
1570 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
221f715d
AL
1571}
1572
a167ba50 1573#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1574static int fd_open(BlockDriverState *bs)
1575{
1576 BDRVRawState *s = bs->opaque;
1577
1578 /* this is just to ensure s->fd is sane (its called by io ops) */
1579 if (s->fd >= 0)
1580 return 0;
1581 return -EIO;
1582}
9f23011a 1583#else /* !linux && !FreeBSD */
19cb3738 1584
08af02e2
AL
1585static int fd_open(BlockDriverState *bs)
1586{
1587 return 0;
1588}
1589
221f715d 1590#endif /* !linux && !FreeBSD */
04eeb8b6 1591
c36dd8a0
AF
1592static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
1593 int64_t sector_num, int nb_sectors,
1594 BlockDriverCompletionFunc *cb, void *opaque)
1595{
1596 BDRVRawState *s = bs->opaque;
1597
1598 if (fd_open(bs) < 0) {
1599 return NULL;
1600 }
1601 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1602 cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
1603}
1604
d0b4503e
PB
1605static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
1606 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
1607{
1608 BDRVRawState *s = bs->opaque;
1609 int rc;
1610
1611 rc = fd_open(bs);
1612 if (rc < 0) {
1613 return rc;
1614 }
1615 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
1616 return -ENOTSUP;
1617 }
1618 if (!s->discard_zeroes) {
1619 return -ENOTSUP;
1620 }
1621 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1622 QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
1623}
1624
d5124c00
HR
1625static int hdev_create(const char *filename, QEMUOptionParameter *options,
1626 Error **errp)
93c65b47
AL
1627{
1628 int fd;
1629 int ret = 0;
1630 struct stat stat_buf;
0e7e1989 1631 int64_t total_size = 0;
93c65b47 1632
0e7e1989
KW
1633 /* Read out options */
1634 while (options && options->name) {
1635 if (!strcmp(options->name, "size")) {
9040385d 1636 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
1637 }
1638 options++;
1639 }
93c65b47 1640
6165f4d8 1641 fd = qemu_open(filename, O_WRONLY | O_BINARY);
e428e439
HR
1642 if (fd < 0) {
1643 ret = -errno;
1644 error_setg_errno(errp, -ret, "Could not open device");
1645 return ret;
1646 }
93c65b47 1647
e428e439 1648 if (fstat(fd, &stat_buf) < 0) {
57e69b7d 1649 ret = -errno;
e428e439
HR
1650 error_setg_errno(errp, -ret, "Could not stat device");
1651 } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
1652 error_setg(errp,
1653 "The given file is neither a block nor a character device");
57e69b7d 1654 ret = -ENODEV;
e428e439
HR
1655 } else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE) {
1656 error_setg(errp, "Device is too small");
93c65b47 1657 ret = -ENOSPC;
e428e439 1658 }
93c65b47 1659
2e1e79da 1660 qemu_close(fd);
93c65b47
AL
1661 return ret;
1662}
1663
5efa9d5a 1664static BlockDriver bdrv_host_device = {
0b4ce02e 1665 .format_name = "host_device",
84a12e66 1666 .protocol_name = "host_device",
0b4ce02e 1667 .instance_size = sizeof(BDRVRawState),
030be321 1668 .bdrv_needs_filename = true,
0b4ce02e 1669 .bdrv_probe_device = hdev_probe_device,
66f82cee 1670 .bdrv_file_open = hdev_open,
0b4ce02e 1671 .bdrv_close = raw_close,
1bc6b705
JC
1672 .bdrv_reopen_prepare = raw_reopen_prepare,
1673 .bdrv_reopen_commit = raw_reopen_commit,
1674 .bdrv_reopen_abort = raw_reopen_abort,
93c65b47 1675 .bdrv_create = hdev_create,
0b4ce02e 1676 .create_options = raw_create_options,
d0b4503e 1677 .bdrv_co_write_zeroes = hdev_co_write_zeroes,
3b46e624 1678
f141eafe
AL
1679 .bdrv_aio_readv = raw_aio_readv,
1680 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1681 .bdrv_aio_flush = raw_aio_flush,
8238010b 1682 .bdrv_aio_discard = hdev_aio_discard,
3c529d93 1683
55b949c8 1684 .bdrv_truncate = raw_truncate,
e60f469c 1685 .bdrv_getlength = raw_getlength,
260a82e5 1686 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
1687 .bdrv_get_allocated_file_size
1688 = raw_get_allocated_file_size,
19cb3738 1689
f3a5d3f8 1690 /* generic scsi device */
63ec93db
CH
1691#ifdef __linux__
1692 .bdrv_ioctl = hdev_ioctl,
63ec93db
CH
1693 .bdrv_aio_ioctl = hdev_aio_ioctl,
1694#endif
f3a5d3f8
CH
1695};
1696
1697#ifdef __linux__
015a1036
HR
1698static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
1699 Error **errp)
f3a5d3f8
CH
1700{
1701 BDRVRawState *s = bs->opaque;
e428e439 1702 Error *local_err = NULL;
f3a5d3f8
CH
1703 int ret;
1704
f3a5d3f8 1705 s->type = FTYPE_FD;
f3a5d3f8 1706
19a3da7f 1707 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
e428e439
HR
1708 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
1709 if (ret) {
1710 if (error_is_set(&local_err)) {
1711 error_propagate(errp, local_err);
1712 }
f3a5d3f8 1713 return ret;
e428e439 1714 }
f3a5d3f8
CH
1715
1716 /* close fd so that we can reopen it as needed */
2e1e79da 1717 qemu_close(s->fd);
f3a5d3f8
CH
1718 s->fd = -1;
1719 s->fd_media_changed = 1;
1720
1721 return 0;
1722}
1723
508c7cb3
CH
1724static int floppy_probe_device(const char *filename)
1725{
2ebf7c4b
CR
1726 int fd, ret;
1727 int prio = 0;
1728 struct floppy_struct fdparam;
343f8568 1729 struct stat st;
2ebf7c4b 1730
e1740828
CB
1731 if (strstart(filename, "/dev/fd", NULL) &&
1732 !strstart(filename, "/dev/fdset/", NULL)) {
2ebf7c4b 1733 prio = 50;
e1740828 1734 }
2ebf7c4b 1735
6165f4d8 1736 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
2ebf7c4b
CR
1737 if (fd < 0) {
1738 goto out;
1739 }
343f8568
JS
1740 ret = fstat(fd, &st);
1741 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1742 goto outc;
1743 }
2ebf7c4b
CR
1744
1745 /* Attempt to detect via a floppy specific ioctl */
1746 ret = ioctl(fd, FDGETPRM, &fdparam);
1747 if (ret >= 0)
1748 prio = 100;
1749
343f8568 1750outc:
2e1e79da 1751 qemu_close(fd);
2ebf7c4b
CR
1752out:
1753 return prio;
508c7cb3
CH
1754}
1755
1756
f3a5d3f8
CH
1757static int floppy_is_inserted(BlockDriverState *bs)
1758{
1759 return fd_open(bs) >= 0;
1760}
1761
1762static int floppy_media_changed(BlockDriverState *bs)
1763{
1764 BDRVRawState *s = bs->opaque;
1765 int ret;
1766
1767 /*
1768 * XXX: we do not have a true media changed indication.
1769 * It does not work if the floppy is changed without trying to read it.
1770 */
1771 fd_open(bs);
1772 ret = s->fd_media_changed;
1773 s->fd_media_changed = 0;
1774#ifdef DEBUG_FLOPPY
1775 printf("Floppy changed=%d\n", ret);
1776#endif
1777 return ret;
1778}
1779
f36f3949 1780static void floppy_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1781{
1782 BDRVRawState *s = bs->opaque;
1783 int fd;
1784
1785 if (s->fd >= 0) {
2e1e79da 1786 qemu_close(s->fd);
f3a5d3f8
CH
1787 s->fd = -1;
1788 }
6165f4d8 1789 fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
f3a5d3f8
CH
1790 if (fd >= 0) {
1791 if (ioctl(fd, FDEJECT, 0) < 0)
1792 perror("FDEJECT");
2e1e79da 1793 qemu_close(fd);
f3a5d3f8 1794 }
f3a5d3f8
CH
1795}
1796
1797static BlockDriver bdrv_host_floppy = {
1798 .format_name = "host_floppy",
84a12e66 1799 .protocol_name = "host_floppy",
f3a5d3f8 1800 .instance_size = sizeof(BDRVRawState),
030be321 1801 .bdrv_needs_filename = true,
508c7cb3 1802 .bdrv_probe_device = floppy_probe_device,
66f82cee 1803 .bdrv_file_open = floppy_open,
f3a5d3f8 1804 .bdrv_close = raw_close,
1bc6b705
JC
1805 .bdrv_reopen_prepare = raw_reopen_prepare,
1806 .bdrv_reopen_commit = raw_reopen_commit,
1807 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1808 .bdrv_create = hdev_create,
0b4ce02e 1809 .create_options = raw_create_options,
f3a5d3f8 1810
f3a5d3f8
CH
1811 .bdrv_aio_readv = raw_aio_readv,
1812 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1813 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1814
55b949c8 1815 .bdrv_truncate = raw_truncate,
b94a2610
KW
1816 .bdrv_getlength = raw_getlength,
1817 .has_variable_length = true,
4a1d5e1f
FZ
1818 .bdrv_get_allocated_file_size
1819 = raw_get_allocated_file_size,
f3a5d3f8
CH
1820
1821 /* removable device support */
1822 .bdrv_is_inserted = floppy_is_inserted,
1823 .bdrv_media_changed = floppy_media_changed,
1824 .bdrv_eject = floppy_eject,
f3a5d3f8
CH
1825};
1826
015a1036
HR
1827static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
1828 Error **errp)
f3a5d3f8
CH
1829{
1830 BDRVRawState *s = bs->opaque;
e428e439
HR
1831 Error *local_err = NULL;
1832 int ret;
f3a5d3f8 1833
f3a5d3f8
CH
1834 s->type = FTYPE_CD;
1835
19a3da7f 1836 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
e428e439
HR
1837 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
1838 if (error_is_set(&local_err)) {
1839 error_propagate(errp, local_err);
1840 }
1841 return ret;
f3a5d3f8
CH
1842}
1843
508c7cb3
CH
1844static int cdrom_probe_device(const char *filename)
1845{
3baf720e
CR
1846 int fd, ret;
1847 int prio = 0;
343f8568 1848 struct stat st;
3baf720e 1849
6165f4d8 1850 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
3baf720e
CR
1851 if (fd < 0) {
1852 goto out;
1853 }
343f8568
JS
1854 ret = fstat(fd, &st);
1855 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1856 goto outc;
1857 }
3baf720e
CR
1858
1859 /* Attempt to detect via a CDROM specific ioctl */
1860 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1861 if (ret >= 0)
1862 prio = 100;
1863
343f8568 1864outc:
2e1e79da 1865 qemu_close(fd);
3baf720e
CR
1866out:
1867 return prio;
508c7cb3
CH
1868}
1869
f3a5d3f8
CH
1870static int cdrom_is_inserted(BlockDriverState *bs)
1871{
1872 BDRVRawState *s = bs->opaque;
1873 int ret;
1874
1875 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1876 if (ret == CDS_DISC_OK)
1877 return 1;
1878 return 0;
1879}
1880
f36f3949 1881static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1882{
1883 BDRVRawState *s = bs->opaque;
1884
1885 if (eject_flag) {
1886 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1887 perror("CDROMEJECT");
1888 } else {
1889 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1890 perror("CDROMEJECT");
1891 }
f3a5d3f8
CH
1892}
1893
025e849a 1894static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
1895{
1896 BDRVRawState *s = bs->opaque;
1897
1898 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1899 /*
1900 * Note: an error can happen if the distribution automatically
1901 * mounts the CD-ROM
1902 */
1903 /* perror("CDROM_LOCKDOOR"); */
1904 }
f3a5d3f8
CH
1905}
1906
1907static BlockDriver bdrv_host_cdrom = {
1908 .format_name = "host_cdrom",
84a12e66 1909 .protocol_name = "host_cdrom",
f3a5d3f8 1910 .instance_size = sizeof(BDRVRawState),
030be321 1911 .bdrv_needs_filename = true,
508c7cb3 1912 .bdrv_probe_device = cdrom_probe_device,
66f82cee 1913 .bdrv_file_open = cdrom_open,
f3a5d3f8 1914 .bdrv_close = raw_close,
1bc6b705
JC
1915 .bdrv_reopen_prepare = raw_reopen_prepare,
1916 .bdrv_reopen_commit = raw_reopen_commit,
1917 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1918 .bdrv_create = hdev_create,
0b4ce02e 1919 .create_options = raw_create_options,
f3a5d3f8 1920
f3a5d3f8
CH
1921 .bdrv_aio_readv = raw_aio_readv,
1922 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1923 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1924
55b949c8 1925 .bdrv_truncate = raw_truncate,
b94a2610
KW
1926 .bdrv_getlength = raw_getlength,
1927 .has_variable_length = true,
4a1d5e1f
FZ
1928 .bdrv_get_allocated_file_size
1929 = raw_get_allocated_file_size,
f3a5d3f8
CH
1930
1931 /* removable device support */
1932 .bdrv_is_inserted = cdrom_is_inserted,
1933 .bdrv_eject = cdrom_eject,
025e849a 1934 .bdrv_lock_medium = cdrom_lock_medium,
f3a5d3f8
CH
1935
1936 /* generic scsi device */
63ec93db 1937 .bdrv_ioctl = hdev_ioctl,
63ec93db 1938 .bdrv_aio_ioctl = hdev_aio_ioctl,
f3a5d3f8
CH
1939};
1940#endif /* __linux__ */
1941
a167ba50 1942#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
511018e4
AT
1943static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
1944 Error **errp)
f3a5d3f8
CH
1945{
1946 BDRVRawState *s = bs->opaque;
e428e439 1947 Error *local_err = NULL;
f3a5d3f8
CH
1948 int ret;
1949
1950 s->type = FTYPE_CD;
1951
e428e439
HR
1952 ret = raw_open_common(bs, options, flags, 0, &local_err);
1953 if (ret) {
1954 if (error_is_set(&local_err)) {
1955 error_propagate(errp, local_err);
1956 }
f3a5d3f8 1957 return ret;
e428e439 1958 }
f3a5d3f8 1959
9b2260cb 1960 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1961 ioctl(s->fd, CDIOCALLOW);
1962 return 0;
1963}
1964
508c7cb3
CH
1965static int cdrom_probe_device(const char *filename)
1966{
1967 if (strstart(filename, "/dev/cd", NULL) ||
1968 strstart(filename, "/dev/acd", NULL))
1969 return 100;
1970 return 0;
1971}
1972
f3a5d3f8
CH
1973static int cdrom_reopen(BlockDriverState *bs)
1974{
1975 BDRVRawState *s = bs->opaque;
1976 int fd;
1977
1978 /*
1979 * Force reread of possibly changed/newly loaded disc,
1980 * FreeBSD seems to not notice sometimes...
1981 */
1982 if (s->fd >= 0)
2e1e79da 1983 qemu_close(s->fd);
6165f4d8 1984 fd = qemu_open(bs->filename, s->open_flags, 0644);
f3a5d3f8
CH
1985 if (fd < 0) {
1986 s->fd = -1;
1987 return -EIO;
1988 }
1989 s->fd = fd;
1990
9b2260cb 1991 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1992 ioctl(s->fd, CDIOCALLOW);
1993 return 0;
1994}
1995
1996static int cdrom_is_inserted(BlockDriverState *bs)
1997{
1998 return raw_getlength(bs) > 0;
1999}
2000
f36f3949 2001static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2002{
2003 BDRVRawState *s = bs->opaque;
2004
2005 if (s->fd < 0)
822e1cd1 2006 return;
f3a5d3f8
CH
2007
2008 (void) ioctl(s->fd, CDIOCALLOW);
2009
2010 if (eject_flag) {
2011 if (ioctl(s->fd, CDIOCEJECT) < 0)
2012 perror("CDIOCEJECT");
2013 } else {
2014 if (ioctl(s->fd, CDIOCCLOSE) < 0)
2015 perror("CDIOCCLOSE");
2016 }
2017
822e1cd1 2018 cdrom_reopen(bs);
f3a5d3f8
CH
2019}
2020
025e849a 2021static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
2022{
2023 BDRVRawState *s = bs->opaque;
2024
2025 if (s->fd < 0)
7bf37fed 2026 return;
f3a5d3f8
CH
2027 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
2028 /*
2029 * Note: an error can happen if the distribution automatically
2030 * mounts the CD-ROM
2031 */
2032 /* perror("CDROM_LOCKDOOR"); */
2033 }
f3a5d3f8
CH
2034}
2035
2036static BlockDriver bdrv_host_cdrom = {
2037 .format_name = "host_cdrom",
84a12e66 2038 .protocol_name = "host_cdrom",
f3a5d3f8 2039 .instance_size = sizeof(BDRVRawState),
030be321 2040 .bdrv_needs_filename = true,
508c7cb3 2041 .bdrv_probe_device = cdrom_probe_device,
66f82cee 2042 .bdrv_file_open = cdrom_open,
f3a5d3f8 2043 .bdrv_close = raw_close,
1bc6b705
JC
2044 .bdrv_reopen_prepare = raw_reopen_prepare,
2045 .bdrv_reopen_commit = raw_reopen_commit,
2046 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 2047 .bdrv_create = hdev_create,
0b4ce02e 2048 .create_options = raw_create_options,
f3a5d3f8 2049
f3a5d3f8
CH
2050 .bdrv_aio_readv = raw_aio_readv,
2051 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2052 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 2053
55b949c8 2054 .bdrv_truncate = raw_truncate,
b94a2610
KW
2055 .bdrv_getlength = raw_getlength,
2056 .has_variable_length = true,
4a1d5e1f
FZ
2057 .bdrv_get_allocated_file_size
2058 = raw_get_allocated_file_size,
f3a5d3f8 2059
19cb3738 2060 /* removable device support */
f3a5d3f8
CH
2061 .bdrv_is_inserted = cdrom_is_inserted,
2062 .bdrv_eject = cdrom_eject,
025e849a 2063 .bdrv_lock_medium = cdrom_lock_medium,
19cb3738 2064};
f3a5d3f8 2065#endif /* __FreeBSD__ */
5efa9d5a 2066
4065742a
SH
2067#ifdef CONFIG_LINUX_AIO
2068/**
2069 * Return the file descriptor for Linux AIO
2070 *
2071 * This function is a layering violation and should be removed when it becomes
2072 * possible to call the block layer outside the global mutex. It allows the
2073 * caller to hijack the file descriptor so I/O can be performed outside the
2074 * block layer.
2075 */
2076int raw_get_aio_fd(BlockDriverState *bs)
2077{
2078 BDRVRawState *s;
2079
2080 if (!bs->drv) {
2081 return -ENOMEDIUM;
2082 }
2083
2084 if (bs->drv == bdrv_find_format("raw")) {
2085 bs = bs->file;
2086 }
2087
2088 /* raw-posix has several protocols so just check for raw_aio_readv */
2089 if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
2090 return -ENOTSUP;
2091 }
2092
2093 s = bs->opaque;
2094 if (!s->use_aio) {
2095 return -ENOTSUP;
2096 }
2097 return s->fd;
2098}
2099#endif /* CONFIG_LINUX_AIO */
2100
84a12e66 2101static void bdrv_file_init(void)
5efa9d5a 2102{
508c7cb3
CH
2103 /*
2104 * Register all the drivers. Note that order is important, the driver
2105 * registered last will get probed first.
2106 */
84a12e66 2107 bdrv_register(&bdrv_file);
5efa9d5a 2108 bdrv_register(&bdrv_host_device);
f3a5d3f8
CH
2109#ifdef __linux__
2110 bdrv_register(&bdrv_host_floppy);
2111 bdrv_register(&bdrv_host_cdrom);
2112#endif
a167ba50 2113#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
2114 bdrv_register(&bdrv_host_cdrom);
2115#endif
5efa9d5a
AL
2116}
2117
84a12e66 2118block_init(bdrv_file_init);