]> git.proxmox.com Git - mirror_qemu.git/blame - block/raw-posix.c
block: Keep "filename" option after parsing
[mirror_qemu.git] / block / raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
1de7afc9
PB
25#include "qemu/timer.h"
26#include "qemu/log.h"
737e150e 27#include "block/block_int.h"
1de7afc9 28#include "qemu/module.h"
de81a169 29#include "trace.h"
737e150e 30#include "block/thread-pool.h"
1de7afc9 31#include "qemu/iov.h"
9f8540ec 32#include "raw-aio.h"
83f64091 33
83affaa6 34#if defined(__APPLE__) && (__MACH__)
83f64091
FB
35#include <paths.h>
36#include <sys/param.h>
37#include <IOKit/IOKitLib.h>
38#include <IOKit/IOBSD.h>
39#include <IOKit/storage/IOMediaBSDClient.h>
40#include <IOKit/storage/IOMedia.h>
41#include <IOKit/storage/IOCDMedia.h>
42//#include <IOKit/storage/IOCDTypes.h>
43#include <CoreFoundation/CoreFoundation.h>
44#endif
45
46#ifdef __sun__
2e9671da 47#define _POSIX_PTHREAD_SEMANTICS 1
83f64091
FB
48#include <sys/dkio.h>
49#endif
19cb3738 50#ifdef __linux__
343f8568
JS
51#include <sys/types.h>
52#include <sys/stat.h>
19cb3738 53#include <sys/ioctl.h>
05acda4d 54#include <sys/param.h>
19cb3738
FB
55#include <linux/cdrom.h>
56#include <linux/fd.h>
5500316d
PB
57#include <linux/fs.h>
58#endif
59#ifdef CONFIG_FIEMAP
60#include <linux/fiemap.h>
19cb3738 61#endif
3d4fa43e
KK
62#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
63#include <linux/falloc.h>
64#endif
a167ba50 65#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1cb6c3fd 66#include <sys/disk.h>
9f23011a 67#include <sys/cdio.h>
1cb6c3fd 68#endif
83f64091 69
128ab2ff
BS
70#ifdef __OpenBSD__
71#include <sys/ioctl.h>
72#include <sys/disklabel.h>
73#include <sys/dkio.h>
74#endif
75
d1f6fd8d
CE
76#ifdef __NetBSD__
77#include <sys/ioctl.h>
78#include <sys/disklabel.h>
79#include <sys/dkio.h>
80#include <sys/disk.h>
81#endif
82
c5e97233
BS
83#ifdef __DragonFly__
84#include <sys/ioctl.h>
85#include <sys/diskslice.h>
86#endif
87
dce512de
CH
88#ifdef CONFIG_XFS
89#include <xfs/xfs.h>
90#endif
91
19cb3738 92//#define DEBUG_FLOPPY
83f64091 93
faf07963 94//#define DEBUG_BLOCK
03ff3ca3 95#if defined(DEBUG_BLOCK)
001faf32
BS
96#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
97 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
8c05dbf9 98#else
001faf32 99#define DEBUG_BLOCK_PRINT(formatCstr, ...)
8c05dbf9
TS
100#endif
101
f6465578
AL
102/* OS X does not have O_DSYNC */
103#ifndef O_DSYNC
1c27a8b3 104#ifdef O_SYNC
7ab064d2 105#define O_DSYNC O_SYNC
1c27a8b3
JA
106#elif defined(O_FSYNC)
107#define O_DSYNC O_FSYNC
108#endif
f6465578
AL
109#endif
110
9f7965c7
AL
111/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
112#ifndef O_DIRECT
113#define O_DIRECT O_DSYNC
114#endif
115
19cb3738
FB
116#define FTYPE_FILE 0
117#define FTYPE_CD 1
118#define FTYPE_FD 2
83f64091 119
c57c846a 120/* if the FD is not accessed during that time (in ns), we try to
19cb3738 121 reopen it to see if the disk has been changed */
c57c846a 122#define FD_OPEN_TIMEOUT (1000000000)
83f64091 123
581b9e29
CH
124#define MAX_BLOCKSIZE 4096
125
19cb3738
FB
126typedef struct BDRVRawState {
127 int fd;
128 int type;
0e1d8f4c 129 int open_flags;
c25f53b0
PB
130 size_t buf_align;
131
19cb3738
FB
132#if defined(__linux__)
133 /* linux floppy specific */
19cb3738
FB
134 int64_t fd_open_time;
135 int64_t fd_error_time;
136 int fd_got_error;
137 int fd_media_changed;
83f64091 138#endif
e44bd6fc 139#ifdef CONFIG_LINUX_AIO
5c6c3a6c 140 int use_aio;
1e5b9d2f 141 void *aio_ctx;
e44bd6fc 142#endif
dce512de 143#ifdef CONFIG_XFS
260a82e5 144 bool is_xfs:1;
dce512de 145#endif
260a82e5 146 bool has_discard:1;
97a2ae34 147 bool has_write_zeroes:1;
260a82e5 148 bool discard_zeroes:1;
19cb3738
FB
149} BDRVRawState;
150
eeb6b45d
JC
151typedef struct BDRVRawReopenState {
152 int fd;
153 int open_flags;
154#ifdef CONFIG_LINUX_AIO
155 int use_aio;
156#endif
157} BDRVRawReopenState;
158
19cb3738 159static int fd_open(BlockDriverState *bs);
22afa7b5 160static int64_t raw_getlength(BlockDriverState *bs);
83f64091 161
de81a169
PB
162typedef struct RawPosixAIOData {
163 BlockDriverState *bs;
164 int aio_fildes;
165 union {
166 struct iovec *aio_iov;
167 void *aio_ioctl_buf;
168 };
169 int aio_niov;
8238010b 170 uint64_t aio_nbytes;
de81a169
PB
171#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
172 off_t aio_offset;
173 int aio_type;
174} RawPosixAIOData;
175
a167ba50 176#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8 177static int cdrom_reopen(BlockDriverState *bs);
9f23011a
BS
178#endif
179
1de1ae0a
CE
180#if defined(__NetBSD__)
181static int raw_normalize_devicepath(const char **filename)
182{
183 static char namebuf[PATH_MAX];
184 const char *dp, *fname;
185 struct stat sb;
186
187 fname = *filename;
188 dp = strrchr(fname, '/');
189 if (lstat(fname, &sb) < 0) {
190 fprintf(stderr, "%s: stat failed: %s\n",
191 fname, strerror(errno));
192 return -errno;
193 }
194
195 if (!S_ISBLK(sb.st_mode)) {
196 return 0;
197 }
198
199 if (dp == NULL) {
200 snprintf(namebuf, PATH_MAX, "r%s", fname);
201 } else {
202 snprintf(namebuf, PATH_MAX, "%.*s/r%s",
203 (int)(dp - fname), fname, dp + 1);
204 }
205 fprintf(stderr, "%s is a block device", fname);
206 *filename = namebuf;
207 fprintf(stderr, ", using %s\n", *filename);
208
209 return 0;
210}
211#else
212static int raw_normalize_devicepath(const char **filename)
213{
214 return 0;
215}
216#endif
217
c25f53b0
PB
218static void raw_probe_alignment(BlockDriverState *bs)
219{
220 BDRVRawState *s = bs->opaque;
221 char *buf;
222 unsigned int sector_size;
223
224 /* For /dev/sg devices the alignment is not really used.
225 With buffered I/O, we don't have any restrictions. */
226 if (bs->sg || !(s->open_flags & O_DIRECT)) {
227 bs->request_alignment = 1;
228 s->buf_align = 1;
229 return;
230 }
231
232 /* Try a few ioctls to get the right size */
233 bs->request_alignment = 0;
234 s->buf_align = 0;
235
236#ifdef BLKSSZGET
237 if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
238 bs->request_alignment = sector_size;
239 }
240#endif
241#ifdef DKIOCGETBLOCKSIZE
242 if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
243 bs->request_alignment = sector_size;
244 }
245#endif
246#ifdef DIOCGSECTORSIZE
247 if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
248 bs->request_alignment = sector_size;
249 }
250#endif
251#ifdef CONFIG_XFS
252 if (s->is_xfs) {
253 struct dioattr da;
254 if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
255 bs->request_alignment = da.d_miniosz;
256 /* The kernel returns wrong information for d_mem */
257 /* s->buf_align = da.d_mem; */
258 }
259 }
260#endif
261
262 /* If we could not get the sizes so far, we can only guess them */
263 if (!s->buf_align) {
264 size_t align;
265 buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
266 for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
267 if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
268 s->buf_align = align;
269 break;
270 }
271 }
272 qemu_vfree(buf);
273 }
274
275 if (!bs->request_alignment) {
276 size_t align;
277 buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
278 for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
279 if (pread(s->fd, buf, align, 0) >= 0) {
280 bs->request_alignment = align;
281 break;
282 }
283 }
284 qemu_vfree(buf);
285 }
286}
287
6a8dc042
JC
288static void raw_parse_flags(int bdrv_flags, int *open_flags)
289{
290 assert(open_flags != NULL);
291
292 *open_flags |= O_BINARY;
293 *open_flags &= ~O_ACCMODE;
294 if (bdrv_flags & BDRV_O_RDWR) {
295 *open_flags |= O_RDWR;
296 } else {
297 *open_flags |= O_RDONLY;
298 }
299
300 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
301 * and O_DIRECT for no caching. */
302 if ((bdrv_flags & BDRV_O_NOCACHE)) {
303 *open_flags |= O_DIRECT;
304 }
6a8dc042
JC
305}
306
fc32a72d
JC
307#ifdef CONFIG_LINUX_AIO
308static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
309{
310 int ret = -1;
311 assert(aio_ctx != NULL);
312 assert(use_aio != NULL);
313 /*
314 * Currently Linux do AIO only for files opened with O_DIRECT
315 * specified so check NOCACHE flag too
316 */
317 if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
318 (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
319
320 /* if non-NULL, laio_init() has already been run */
321 if (*aio_ctx == NULL) {
322 *aio_ctx = laio_init();
323 if (!*aio_ctx) {
324 goto error;
325 }
326 }
327 *use_aio = 1;
328 } else {
329 *use_aio = 0;
330 }
331
332 ret = 0;
333
334error:
335 return ret;
336}
337#endif
338
c66a6157
KW
339static QemuOptsList raw_runtime_opts = {
340 .name = "raw",
341 .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
342 .desc = {
343 {
344 .name = "filename",
345 .type = QEMU_OPT_STRING,
346 .help = "File name of the image",
347 },
348 { /* end of list */ }
349 },
350};
351
352static int raw_open_common(BlockDriverState *bs, QDict *options,
e428e439 353 int bdrv_flags, int open_flags, Error **errp)
83f64091
FB
354{
355 BDRVRawState *s = bs->opaque;
c66a6157
KW
356 QemuOpts *opts;
357 Error *local_err = NULL;
358 const char *filename;
0e1d8f4c 359 int fd, ret;
260a82e5 360 struct stat st;
83f64091 361
87ea75d5 362 opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
c66a6157 363 qemu_opts_absorb_qdict(opts, options, &local_err);
84d18f06 364 if (local_err) {
e428e439 365 error_propagate(errp, local_err);
c66a6157
KW
366 ret = -EINVAL;
367 goto fail;
368 }
369
370 filename = qemu_opt_get(opts, "filename");
371
1de1ae0a
CE
372 ret = raw_normalize_devicepath(&filename);
373 if (ret != 0) {
e428e439 374 error_setg_errno(errp, -ret, "Could not normalize device path");
c66a6157 375 goto fail;
1de1ae0a
CE
376 }
377
6a8dc042
JC
378 s->open_flags = open_flags;
379 raw_parse_flags(bdrv_flags, &s->open_flags);
83f64091 380
90babde0 381 s->fd = -1;
40ff6d7e 382 fd = qemu_open(filename, s->open_flags, 0644);
19cb3738
FB
383 if (fd < 0) {
384 ret = -errno;
c66a6157 385 if (ret == -EROFS) {
19cb3738 386 ret = -EACCES;
c66a6157
KW
387 }
388 goto fail;
19cb3738 389 }
83f64091 390 s->fd = fd;
9ef91a67 391
5c6c3a6c 392#ifdef CONFIG_LINUX_AIO
fc32a72d 393 if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
47e6b251 394 qemu_close(fd);
c66a6157 395 ret = -errno;
e428e439 396 error_setg_errno(errp, -ret, "Could not set AIO state");
c66a6157 397 goto fail;
9ef91a67 398 }
fc32a72d 399#endif
9ef91a67 400
7ce21016 401 s->has_discard = true;
97a2ae34 402 s->has_write_zeroes = true;
260a82e5
PB
403
404 if (fstat(s->fd, &st) < 0) {
405 error_setg_errno(errp, errno, "Could not stat file");
406 goto fail;
407 }
408 if (S_ISREG(st.st_mode)) {
409 s->discard_zeroes = true;
410 }
d0b4503e
PB
411 if (S_ISBLK(st.st_mode)) {
412#ifdef BLKDISCARDZEROES
413 unsigned int arg;
414 if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
415 s->discard_zeroes = true;
416 }
417#endif
418#ifdef __linux__
419 /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do
420 * not rely on the contents of discarded blocks unless using O_DIRECT.
97a2ae34 421 * Same for BLKZEROOUT.
d0b4503e
PB
422 */
423 if (!(bs->open_flags & BDRV_O_NOCACHE)) {
424 s->discard_zeroes = false;
97a2ae34 425 s->has_write_zeroes = false;
d0b4503e
PB
426 }
427#endif
428 }
260a82e5 429
dce512de
CH
430#ifdef CONFIG_XFS
431 if (platform_test_xfs_fd(s->fd)) {
7ce21016 432 s->is_xfs = true;
dce512de
CH
433 }
434#endif
435
c66a6157
KW
436 ret = 0;
437fail:
438 qemu_opts_del(opts);
439 return ret;
83f64091
FB
440}
441
015a1036
HR
442static int raw_open(BlockDriverState *bs, QDict *options, int flags,
443 Error **errp)
90babde0
CH
444{
445 BDRVRawState *s = bs->opaque;
e428e439
HR
446 Error *local_err = NULL;
447 int ret;
90babde0
CH
448
449 s->type = FTYPE_FILE;
e428e439 450 ret = raw_open_common(bs, options, flags, 0, &local_err);
84d18f06 451 if (local_err) {
e428e439
HR
452 error_propagate(errp, local_err);
453 }
454 return ret;
90babde0
CH
455}
456
eeb6b45d
JC
457static int raw_reopen_prepare(BDRVReopenState *state,
458 BlockReopenQueue *queue, Error **errp)
459{
460 BDRVRawState *s;
461 BDRVRawReopenState *raw_s;
462 int ret = 0;
463
464 assert(state != NULL);
465 assert(state->bs != NULL);
466
467 s = state->bs->opaque;
468
469 state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
470 raw_s = state->opaque;
471
472#ifdef CONFIG_LINUX_AIO
473 raw_s->use_aio = s->use_aio;
474
475 /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
476 * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
477 * won't override aio_ctx if aio_ctx is non-NULL */
478 if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
e428e439 479 error_setg(errp, "Could not set AIO state");
eeb6b45d
JC
480 return -1;
481 }
482#endif
483
1bc6b705
JC
484 if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
485 raw_s->open_flags |= O_NONBLOCK;
486 }
487
eeb6b45d
JC
488 raw_parse_flags(state->flags, &raw_s->open_flags);
489
490 raw_s->fd = -1;
491
fdf263f6 492 int fcntl_flags = O_APPEND | O_NONBLOCK;
eeb6b45d
JC
493#ifdef O_NOATIME
494 fcntl_flags |= O_NOATIME;
495#endif
496
fdf263f6
AF
497#ifdef O_ASYNC
498 /* Not all operating systems have O_ASYNC, and those that don't
499 * will not let us track the state into raw_s->open_flags (typically
500 * you achieve the same effect with an ioctl, for example I_SETSIG
501 * on Solaris). But we do not use O_ASYNC, so that's fine.
502 */
503 assert((s->open_flags & O_ASYNC) == 0);
504#endif
505
eeb6b45d
JC
506 if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
507 /* dup the original fd */
508 /* TODO: use qemu fcntl wrapper */
509#ifdef F_DUPFD_CLOEXEC
510 raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
511#else
512 raw_s->fd = dup(s->fd);
513 if (raw_s->fd != -1) {
514 qemu_set_cloexec(raw_s->fd);
515 }
516#endif
517 if (raw_s->fd >= 0) {
518 ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
519 if (ret) {
520 qemu_close(raw_s->fd);
521 raw_s->fd = -1;
522 }
523 }
524 }
525
526 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
527 if (raw_s->fd == -1) {
528 assert(!(raw_s->open_flags & O_CREAT));
529 raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
530 if (raw_s->fd == -1) {
e428e439 531 error_setg_errno(errp, errno, "Could not reopen file");
eeb6b45d
JC
532 ret = -1;
533 }
534 }
535 return ret;
536}
537
eeb6b45d
JC
538static void raw_reopen_commit(BDRVReopenState *state)
539{
540 BDRVRawReopenState *raw_s = state->opaque;
541 BDRVRawState *s = state->bs->opaque;
542
543 s->open_flags = raw_s->open_flags;
544
545 qemu_close(s->fd);
546 s->fd = raw_s->fd;
547#ifdef CONFIG_LINUX_AIO
548 s->use_aio = raw_s->use_aio;
549#endif
550
551 g_free(state->opaque);
552 state->opaque = NULL;
553}
554
555
556static void raw_reopen_abort(BDRVReopenState *state)
557{
558 BDRVRawReopenState *raw_s = state->opaque;
559
560 /* nothing to do if NULL, we didn't get far enough */
561 if (raw_s == NULL) {
562 return;
563 }
564
565 if (raw_s->fd >= 0) {
566 qemu_close(raw_s->fd);
567 raw_s->fd = -1;
568 }
569 g_free(state->opaque);
570 state->opaque = NULL;
571}
572
c25f53b0
PB
573static int raw_refresh_limits(BlockDriverState *bs)
574{
575 BDRVRawState *s = bs->opaque;
eeb6b45d 576
c25f53b0
PB
577 raw_probe_alignment(bs);
578 bs->bl.opt_mem_alignment = s->buf_align;
579
580 return 0;
581}
83f64091 582
de81a169
PB
583static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
584{
585 int ret;
586
587 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
588 if (ret == -1) {
589 return -errno;
590 }
591
b608c8dc 592 return 0;
de81a169
PB
593}
594
595static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
596{
597 int ret;
598
599 ret = qemu_fdatasync(aiocb->aio_fildes);
600 if (ret == -1) {
601 return -errno;
602 }
603 return 0;
604}
605
606#ifdef CONFIG_PREADV
607
608static bool preadv_present = true;
609
610static ssize_t
611qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
612{
613 return preadv(fd, iov, nr_iov, offset);
614}
615
616static ssize_t
617qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
618{
619 return pwritev(fd, iov, nr_iov, offset);
620}
621
622#else
623
624static bool preadv_present = false;
625
626static ssize_t
627qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
628{
629 return -ENOSYS;
630}
631
632static ssize_t
633qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
634{
635 return -ENOSYS;
636}
637
638#endif
639
640static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
641{
642 ssize_t len;
643
644 do {
645 if (aiocb->aio_type & QEMU_AIO_WRITE)
646 len = qemu_pwritev(aiocb->aio_fildes,
647 aiocb->aio_iov,
648 aiocb->aio_niov,
649 aiocb->aio_offset);
650 else
651 len = qemu_preadv(aiocb->aio_fildes,
652 aiocb->aio_iov,
653 aiocb->aio_niov,
654 aiocb->aio_offset);
655 } while (len == -1 && errno == EINTR);
656
657 if (len == -1) {
658 return -errno;
659 }
660 return len;
661}
662
663/*
664 * Read/writes the data to/from a given linear buffer.
665 *
666 * Returns the number of bytes handles or -errno in case of an error. Short
667 * reads are only returned if the end of the file is reached.
668 */
669static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
670{
671 ssize_t offset = 0;
672 ssize_t len;
673
674 while (offset < aiocb->aio_nbytes) {
675 if (aiocb->aio_type & QEMU_AIO_WRITE) {
676 len = pwrite(aiocb->aio_fildes,
677 (const char *)buf + offset,
678 aiocb->aio_nbytes - offset,
679 aiocb->aio_offset + offset);
680 } else {
681 len = pread(aiocb->aio_fildes,
682 buf + offset,
683 aiocb->aio_nbytes - offset,
684 aiocb->aio_offset + offset);
685 }
686 if (len == -1 && errno == EINTR) {
687 continue;
688 } else if (len == -1) {
689 offset = -errno;
690 break;
691 } else if (len == 0) {
692 break;
693 }
694 offset += len;
695 }
696
697 return offset;
698}
699
700static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
701{
702 ssize_t nbytes;
703 char *buf;
704
705 if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
706 /*
707 * If there is just a single buffer, and it is properly aligned
708 * we can just use plain pread/pwrite without any problems.
709 */
710 if (aiocb->aio_niov == 1) {
711 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
712 }
713 /*
714 * We have more than one iovec, and all are properly aligned.
715 *
716 * Try preadv/pwritev first and fall back to linearizing the
717 * buffer if it's not supported.
718 */
719 if (preadv_present) {
720 nbytes = handle_aiocb_rw_vector(aiocb);
721 if (nbytes == aiocb->aio_nbytes ||
722 (nbytes < 0 && nbytes != -ENOSYS)) {
723 return nbytes;
724 }
725 preadv_present = false;
726 }
727
728 /*
729 * XXX(hch): short read/write. no easy way to handle the reminder
730 * using these interfaces. For now retry using plain
731 * pread/pwrite?
732 */
733 }
734
735 /*
736 * Ok, we have to do it the hard way, copy all segments into
737 * a single aligned buffer.
738 */
739 buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
740 if (aiocb->aio_type & QEMU_AIO_WRITE) {
741 char *p = buf;
742 int i;
743
744 for (i = 0; i < aiocb->aio_niov; ++i) {
745 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
746 p += aiocb->aio_iov[i].iov_len;
747 }
748 }
749
750 nbytes = handle_aiocb_rw_linear(aiocb, buf);
751 if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
752 char *p = buf;
753 size_t count = aiocb->aio_nbytes, copy;
754 int i;
755
756 for (i = 0; i < aiocb->aio_niov && count; ++i) {
757 copy = count;
758 if (copy > aiocb->aio_iov[i].iov_len) {
759 copy = aiocb->aio_iov[i].iov_len;
760 }
761 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
762 p += copy;
763 count -= copy;
764 }
765 }
766 qemu_vfree(buf);
767
768 return nbytes;
769}
770
8238010b 771#ifdef CONFIG_XFS
97a2ae34
PB
772static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
773{
774 struct xfs_flock64 fl;
775
776 memset(&fl, 0, sizeof(fl));
777 fl.l_whence = SEEK_SET;
778 fl.l_start = offset;
779 fl.l_len = bytes;
780
781 if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
782 DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
783 return -errno;
784 }
785
786 return 0;
787}
788
8238010b
PB
789static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
790{
791 struct xfs_flock64 fl;
792
793 memset(&fl, 0, sizeof(fl));
794 fl.l_whence = SEEK_SET;
795 fl.l_start = offset;
796 fl.l_len = bytes;
797
798 if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
799 DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
800 return -errno;
801 }
802
803 return 0;
804}
805#endif
806
97a2ae34
PB
807static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
808{
809 int ret = -EOPNOTSUPP;
810 BDRVRawState *s = aiocb->bs->opaque;
811
812 if (s->has_write_zeroes == 0) {
813 return -ENOTSUP;
814 }
815
816 if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
817#ifdef BLKZEROOUT
818 do {
819 uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
820 if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
821 return 0;
822 }
823 } while (errno == EINTR);
824
825 ret = -errno;
826#endif
827 } else {
828#ifdef CONFIG_XFS
829 if (s->is_xfs) {
830 return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
831 }
832#endif
833 }
834
835 if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
836 ret == -ENOTTY) {
837 s->has_write_zeroes = false;
838 ret = -ENOTSUP;
839 }
840 return ret;
841}
842
8238010b
PB
843static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
844{
845 int ret = -EOPNOTSUPP;
846 BDRVRawState *s = aiocb->bs->opaque;
847
7ce21016
PB
848 if (!s->has_discard) {
849 return -ENOTSUP;
8238010b
PB
850 }
851
852 if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
853#ifdef BLKDISCARD
854 do {
855 uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
856 if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
857 return 0;
858 }
859 } while (errno == EINTR);
860
861 ret = -errno;
862#endif
863 } else {
864#ifdef CONFIG_XFS
865 if (s->is_xfs) {
866 return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
867 }
868#endif
869
870#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
871 do {
872 if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
873 aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
874 return 0;
875 }
876 } while (errno == EINTR);
877
878 ret = -errno;
879#endif
880 }
881
882 if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
883 ret == -ENOTTY) {
7ce21016
PB
884 s->has_discard = false;
885 ret = -ENOTSUP;
8238010b
PB
886 }
887 return ret;
888}
889
de81a169
PB
890static int aio_worker(void *arg)
891{
892 RawPosixAIOData *aiocb = arg;
893 ssize_t ret = 0;
894
895 switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
896 case QEMU_AIO_READ:
897 ret = handle_aiocb_rw(aiocb);
898 if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
899 iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
900 0, aiocb->aio_nbytes - ret);
901
902 ret = aiocb->aio_nbytes;
903 }
904 if (ret == aiocb->aio_nbytes) {
905 ret = 0;
906 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
907 ret = -EINVAL;
908 }
909 break;
910 case QEMU_AIO_WRITE:
911 ret = handle_aiocb_rw(aiocb);
912 if (ret == aiocb->aio_nbytes) {
913 ret = 0;
914 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
915 ret = -EINVAL;
916 }
917 break;
918 case QEMU_AIO_FLUSH:
919 ret = handle_aiocb_flush(aiocb);
920 break;
921 case QEMU_AIO_IOCTL:
922 ret = handle_aiocb_ioctl(aiocb);
923 break;
8238010b
PB
924 case QEMU_AIO_DISCARD:
925 ret = handle_aiocb_discard(aiocb);
926 break;
97a2ae34
PB
927 case QEMU_AIO_WRITE_ZEROES:
928 ret = handle_aiocb_write_zeroes(aiocb);
929 break;
de81a169
PB
930 default:
931 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
932 ret = -EINVAL;
933 break;
934 }
935
936 g_slice_free(RawPosixAIOData, aiocb);
937 return ret;
938}
939
260a82e5
PB
940static int paio_submit_co(BlockDriverState *bs, int fd,
941 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
942 int type)
943{
944 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
945 ThreadPool *pool;
946
947 acb->bs = bs;
948 acb->aio_type = type;
949 acb->aio_fildes = fd;
950
951 if (qiov) {
952 acb->aio_iov = qiov->iov;
953 acb->aio_niov = qiov->niov;
954 }
955 acb->aio_nbytes = nb_sectors * 512;
956 acb->aio_offset = sector_num * 512;
957
958 trace_paio_submit_co(sector_num, nb_sectors, type);
959 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
960 return thread_pool_submit_co(pool, aio_worker, acb);
961}
962
de81a169
PB
963static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
964 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
965 BlockDriverCompletionFunc *cb, void *opaque, int type)
966{
967 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
c4d9d196 968 ThreadPool *pool;
de81a169
PB
969
970 acb->bs = bs;
971 acb->aio_type = type;
972 acb->aio_fildes = fd;
973
974 if (qiov) {
975 acb->aio_iov = qiov->iov;
976 acb->aio_niov = qiov->niov;
977 }
978 acb->aio_nbytes = nb_sectors * 512;
979 acb->aio_offset = sector_num * 512;
980
981 trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
c4d9d196
SH
982 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
983 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
de81a169
PB
984}
985
9ef91a67
CH
986static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
987 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
988 BlockDriverCompletionFunc *cb, void *opaque, int type)
83f64091 989{
ce1a14dc 990 BDRVRawState *s = bs->opaque;
ce1a14dc 991
19cb3738
FB
992 if (fd_open(bs) < 0)
993 return NULL;
994
f141eafe
AL
995 /*
996 * If O_DIRECT is used the buffer needs to be aligned on a sector
c1ee7d56 997 * boundary. Check if this is the case or tell the low-level
9ef91a67 998 * driver that it needs to copy the buffer.
f141eafe 999 */
9acc5a06 1000 if ((bs->open_flags & BDRV_O_NOCACHE)) {
c53b1c51 1001 if (!bdrv_qiov_is_aligned(bs, qiov)) {
5c6c3a6c 1002 type |= QEMU_AIO_MISALIGNED;
e44bd6fc 1003#ifdef CONFIG_LINUX_AIO
5c6c3a6c
CH
1004 } else if (s->use_aio) {
1005 return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
e44bd6fc
SW
1006 nb_sectors, cb, opaque, type);
1007#endif
5c6c3a6c 1008 }
9ef91a67 1009 }
f141eafe 1010
1e5b9d2f 1011 return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
9ef91a67 1012 cb, opaque, type);
83f64091
FB
1013}
1014
f141eafe
AL
1015static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
1016 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 1017 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 1018{
9ef91a67
CH
1019 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
1020 cb, opaque, QEMU_AIO_READ);
83f64091
FB
1021}
1022
f141eafe
AL
1023static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
1024 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 1025 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 1026{
9ef91a67
CH
1027 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
1028 cb, opaque, QEMU_AIO_WRITE);
83f64091 1029}
53538725 1030
b2e12bc6
CH
1031static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
1032 BlockDriverCompletionFunc *cb, void *opaque)
1033{
1034 BDRVRawState *s = bs->opaque;
1035
1036 if (fd_open(bs) < 0)
1037 return NULL;
1038
1e5b9d2f 1039 return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
b2e12bc6
CH
1040}
1041
83f64091
FB
1042static void raw_close(BlockDriverState *bs)
1043{
1044 BDRVRawState *s = bs->opaque;
19cb3738 1045 if (s->fd >= 0) {
2e1e79da 1046 qemu_close(s->fd);
19cb3738
FB
1047 s->fd = -1;
1048 }
83f64091
FB
1049}
1050
1051static int raw_truncate(BlockDriverState *bs, int64_t offset)
1052{
1053 BDRVRawState *s = bs->opaque;
55b949c8
CH
1054 struct stat st;
1055
1056 if (fstat(s->fd, &st)) {
83f64091 1057 return -errno;
55b949c8
CH
1058 }
1059
1060 if (S_ISREG(st.st_mode)) {
1061 if (ftruncate(s->fd, offset) < 0) {
1062 return -errno;
1063 }
1064 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1065 if (offset > raw_getlength(bs)) {
1066 return -EINVAL;
1067 }
1068 } else {
1069 return -ENOTSUP;
1070 }
1071
83f64091
FB
1072 return 0;
1073}
1074
128ab2ff
BS
1075#ifdef __OpenBSD__
1076static int64_t raw_getlength(BlockDriverState *bs)
1077{
1078 BDRVRawState *s = bs->opaque;
1079 int fd = s->fd;
1080 struct stat st;
1081
1082 if (fstat(fd, &st))
1083 return -1;
1084 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1085 struct disklabel dl;
1086
1087 if (ioctl(fd, DIOCGDINFO, &dl))
1088 return -1;
1089 return (uint64_t)dl.d_secsize *
1090 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
1091 } else
1092 return st.st_size;
1093}
d1f6fd8d
CE
1094#elif defined(__NetBSD__)
1095static int64_t raw_getlength(BlockDriverState *bs)
1096{
1097 BDRVRawState *s = bs->opaque;
1098 int fd = s->fd;
1099 struct stat st;
1100
1101 if (fstat(fd, &st))
1102 return -1;
1103 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1104 struct dkwedge_info dkw;
1105
1106 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
1107 return dkw.dkw_size * 512;
1108 } else {
1109 struct disklabel dl;
1110
1111 if (ioctl(fd, DIOCGDINFO, &dl))
1112 return -1;
1113 return (uint64_t)dl.d_secsize *
1114 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
1115 }
1116 } else
1117 return st.st_size;
1118}
50779cc2
CH
1119#elif defined(__sun__)
1120static int64_t raw_getlength(BlockDriverState *bs)
1121{
1122 BDRVRawState *s = bs->opaque;
1123 struct dk_minfo minfo;
1124 int ret;
1125
1126 ret = fd_open(bs);
1127 if (ret < 0) {
1128 return ret;
1129 }
1130
1131 /*
1132 * Use the DKIOCGMEDIAINFO ioctl to read the size.
1133 */
1134 ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
1135 if (ret != -1) {
1136 return minfo.dki_lbsize * minfo.dki_capacity;
1137 }
1138
1139 /*
1140 * There are reports that lseek on some devices fails, but
1141 * irc discussion said that contingency on contingency was overkill.
1142 */
1143 return lseek(s->fd, 0, SEEK_END);
1144}
1145#elif defined(CONFIG_BSD)
1146static int64_t raw_getlength(BlockDriverState *bs)
83f64091
FB
1147{
1148 BDRVRawState *s = bs->opaque;
1149 int fd = s->fd;
1150 int64_t size;
83f64091 1151 struct stat sb;
a167ba50 1152#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a 1153 int reopened = 0;
83f64091 1154#endif
19cb3738
FB
1155 int ret;
1156
1157 ret = fd_open(bs);
1158 if (ret < 0)
1159 return ret;
83f64091 1160
a167ba50 1161#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1162again:
1163#endif
83f64091
FB
1164 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
1165#ifdef DIOCGMEDIASIZE
1166 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
c5e97233
BS
1167#elif defined(DIOCGPART)
1168 {
1169 struct partinfo pi;
1170 if (ioctl(fd, DIOCGPART, &pi) == 0)
1171 size = pi.media_size;
1172 else
1173 size = 0;
1174 }
1175 if (size == 0)
83f64091 1176#endif
83affaa6 1177#if defined(__APPLE__) && defined(__MACH__)
83f64091
FB
1178 size = LONG_LONG_MAX;
1179#else
1180 size = lseek(fd, 0LL, SEEK_END);
9f23011a 1181#endif
a167ba50 1182#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1183 switch(s->type) {
1184 case FTYPE_CD:
1185 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
1186 if (size == 2048LL * (unsigned)-1)
1187 size = 0;
1188 /* XXX no disc? maybe we need to reopen... */
f3a5d3f8 1189 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
9f23011a
BS
1190 reopened = 1;
1191 goto again;
1192 }
1193 }
83f64091 1194#endif
50779cc2 1195 } else {
83f64091
FB
1196 size = lseek(fd, 0, SEEK_END);
1197 }
83f64091
FB
1198 return size;
1199}
50779cc2
CH
1200#else
1201static int64_t raw_getlength(BlockDriverState *bs)
1202{
1203 BDRVRawState *s = bs->opaque;
1204 int ret;
1205
1206 ret = fd_open(bs);
1207 if (ret < 0) {
1208 return ret;
1209 }
1210
1211 return lseek(s->fd, 0, SEEK_END);
1212}
128ab2ff 1213#endif
83f64091 1214
4a1d5e1f
FZ
1215static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
1216{
1217 struct stat st;
1218 BDRVRawState *s = bs->opaque;
1219
1220 if (fstat(s->fd, &st) < 0) {
1221 return -errno;
1222 }
1223 return (int64_t)st.st_blocks * 512;
1224}
1225
d5124c00
HR
1226static int raw_create(const char *filename, QEMUOptionParameter *options,
1227 Error **errp)
83f64091
FB
1228{
1229 int fd;
1e37d059 1230 int result = 0;
0e7e1989 1231 int64_t total_size = 0;
83f64091 1232
0e7e1989
KW
1233 /* Read out options */
1234 while (options && options->name) {
1235 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
9040385d 1236 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
1237 }
1238 options++;
1239 }
83f64091 1240
6165f4d8
CB
1241 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
1242 0644);
1e37d059
SW
1243 if (fd < 0) {
1244 result = -errno;
e428e439 1245 error_setg_errno(errp, -result, "Could not create file");
1e37d059 1246 } else {
9040385d 1247 if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
1e37d059 1248 result = -errno;
e428e439 1249 error_setg_errno(errp, -result, "Could not resize file");
1e37d059 1250 }
2e1e79da 1251 if (qemu_close(fd) != 0) {
1e37d059 1252 result = -errno;
e428e439 1253 error_setg_errno(errp, -result, "Could not close the new file");
1e37d059
SW
1254 }
1255 }
1256 return result;
83f64091
FB
1257}
1258
5500316d
PB
1259/*
1260 * Returns true iff the specified sector is present in the disk image. Drivers
1261 * not implementing the functionality are assumed to not support backing files,
1262 * hence all their sectors are reported as allocated.
1263 *
1264 * If 'sector_num' is beyond the end of the disk image the return value is 0
1265 * and 'pnum' is set to 0.
1266 *
1267 * 'pnum' is set to the number of sectors (including and immediately following
1268 * the specified sector) that are known to be in the same
1269 * allocated/unallocated state.
1270 *
1271 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
1272 * beyond the end of the disk image it will be clamped.
1273 */
b6b8a333 1274static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
5500316d
PB
1275 int64_t sector_num,
1276 int nb_sectors, int *pnum)
1277{
5500316d 1278 off_t start, data, hole;
63390a8d 1279 int64_t ret;
5500316d
PB
1280
1281 ret = fd_open(bs);
1282 if (ret < 0) {
1283 return ret;
1284 }
1285
1286 start = sector_num * BDRV_SECTOR_SIZE;
63390a8d 1287 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
94282e71 1288
5500316d 1289#ifdef CONFIG_FIEMAP
94282e71
KW
1290
1291 BDRVRawState *s = bs->opaque;
5500316d
PB
1292 struct {
1293 struct fiemap fm;
1294 struct fiemap_extent fe;
1295 } f;
94282e71 1296
5500316d
PB
1297 f.fm.fm_start = start;
1298 f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
1299 f.fm.fm_flags = 0;
1300 f.fm.fm_extent_count = 1;
1301 f.fm.fm_reserved = 0;
1302 if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
1303 /* Assume everything is allocated. */
1304 *pnum = nb_sectors;
63390a8d 1305 return ret;
5500316d
PB
1306 }
1307
1308 if (f.fm.fm_mapped_extents == 0) {
1309 /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
1310 * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
1311 */
1312 off_t length = lseek(s->fd, 0, SEEK_END);
1313 hole = f.fm.fm_start;
1314 data = MIN(f.fm.fm_start + f.fm.fm_length, length);
1315 } else {
1316 data = f.fe.fe_logical;
1317 hole = f.fe.fe_logical + f.fe.fe_length;
f5f7abcf
PB
1318 if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
1319 ret |= BDRV_BLOCK_ZERO;
1320 }
5500316d 1321 }
94282e71 1322
5500316d 1323#elif defined SEEK_HOLE && defined SEEK_DATA
94282e71
KW
1324
1325 BDRVRawState *s = bs->opaque;
1326
5500316d
PB
1327 hole = lseek(s->fd, start, SEEK_HOLE);
1328 if (hole == -1) {
1329 /* -ENXIO indicates that sector_num was past the end of the file.
1330 * There is a virtual hole there. */
1331 assert(errno != -ENXIO);
1332
1333 /* Most likely EINVAL. Assume everything is allocated. */
1334 *pnum = nb_sectors;
63390a8d 1335 return ret;
5500316d
PB
1336 }
1337
1338 if (hole > start) {
1339 data = start;
1340 } else {
1341 /* On a hole. We need another syscall to find its end. */
1342 data = lseek(s->fd, start, SEEK_DATA);
1343 if (data == -1) {
1344 data = lseek(s->fd, 0, SEEK_END);
1345 }
1346 }
1347#else
63390a8d
PB
1348 data = 0;
1349 hole = start + nb_sectors * BDRV_SECTOR_SIZE;
5500316d
PB
1350#endif
1351
1352 if (data <= start) {
1353 /* On a data extent, compute sectors to the end of the extent. */
1354 *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
5500316d
PB
1355 } else {
1356 /* On a hole, compute sectors to the beginning of the next extent. */
1357 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
63390a8d
PB
1358 ret &= ~BDRV_BLOCK_DATA;
1359 ret |= BDRV_BLOCK_ZERO;
5500316d 1360 }
63390a8d
PB
1361
1362 return ret;
5500316d
PB
1363}
1364
8238010b
PB
1365static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
1366 int64_t sector_num, int nb_sectors,
1367 BlockDriverCompletionFunc *cb, void *opaque)
dce512de 1368{
dce512de
CH
1369 BDRVRawState *s = bs->opaque;
1370
8238010b
PB
1371 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1372 cb, opaque, QEMU_AIO_DISCARD);
dce512de 1373}
0e7e1989 1374
260a82e5
PB
1375static int coroutine_fn raw_co_write_zeroes(
1376 BlockDriverState *bs, int64_t sector_num,
1377 int nb_sectors, BdrvRequestFlags flags)
1378{
1379 BDRVRawState *s = bs->opaque;
1380
1381 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
97a2ae34
PB
1382 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1383 QEMU_AIO_WRITE_ZEROES);
1384 } else if (s->discard_zeroes) {
1385 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1386 QEMU_AIO_DISCARD);
260a82e5 1387 }
97a2ae34 1388 return -ENOTSUP;
260a82e5
PB
1389}
1390
1391static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1392{
1393 BDRVRawState *s = bs->opaque;
1394
1395 bdi->unallocated_blocks_are_zero = s->discard_zeroes;
1396 bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
1397 return 0;
1398}
1399
0e7e1989 1400static QEMUOptionParameter raw_create_options[] = {
db08adf5
KW
1401 {
1402 .name = BLOCK_OPT_SIZE,
1403 .type = OPT_SIZE,
1404 .help = "Virtual disk size"
1405 },
0e7e1989
KW
1406 { NULL }
1407};
1408
84a12e66
CH
1409static BlockDriver bdrv_file = {
1410 .format_name = "file",
1411 .protocol_name = "file",
856ae5c3 1412 .instance_size = sizeof(BDRVRawState),
030be321 1413 .bdrv_needs_filename = true,
856ae5c3 1414 .bdrv_probe = NULL, /* no probe for protocols */
66f82cee 1415 .bdrv_file_open = raw_open,
eeb6b45d
JC
1416 .bdrv_reopen_prepare = raw_reopen_prepare,
1417 .bdrv_reopen_commit = raw_reopen_commit,
1418 .bdrv_reopen_abort = raw_reopen_abort,
856ae5c3
BS
1419 .bdrv_close = raw_close,
1420 .bdrv_create = raw_create,
3ac21627 1421 .bdrv_has_zero_init = bdrv_has_zero_init_1,
b6b8a333 1422 .bdrv_co_get_block_status = raw_co_get_block_status,
260a82e5 1423 .bdrv_co_write_zeroes = raw_co_write_zeroes,
3b46e624 1424
f141eafe
AL
1425 .bdrv_aio_readv = raw_aio_readv,
1426 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1427 .bdrv_aio_flush = raw_aio_flush,
8238010b 1428 .bdrv_aio_discard = raw_aio_discard,
c25f53b0 1429 .bdrv_refresh_limits = raw_refresh_limits,
3c529d93 1430
83f64091
FB
1431 .bdrv_truncate = raw_truncate,
1432 .bdrv_getlength = raw_getlength,
260a82e5 1433 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
1434 .bdrv_get_allocated_file_size
1435 = raw_get_allocated_file_size,
0e7e1989
KW
1436
1437 .create_options = raw_create_options,
83f64091
FB
1438};
1439
19cb3738
FB
1440/***********************************************/
1441/* host device */
1442
83affaa6 1443#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1444static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1445static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1446
1447kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1448{
5fafdf24 1449 kern_return_t kernResult;
19cb3738
FB
1450 mach_port_t masterPort;
1451 CFMutableDictionaryRef classesToMatch;
1452
1453 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1454 if ( KERN_SUCCESS != kernResult ) {
1455 printf( "IOMasterPort returned %d\n", kernResult );
1456 }
3b46e624 1457
5fafdf24 1458 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
1459 if ( classesToMatch == NULL ) {
1460 printf( "IOServiceMatching returned a NULL dictionary.\n" );
1461 } else {
1462 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1463 }
1464 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1465 if ( KERN_SUCCESS != kernResult )
1466 {
1467 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1468 }
3b46e624 1469
19cb3738
FB
1470 return kernResult;
1471}
1472
1473kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1474{
1475 io_object_t nextMedia;
1476 kern_return_t kernResult = KERN_FAILURE;
1477 *bsdPath = '\0';
1478 nextMedia = IOIteratorNext( mediaIterator );
1479 if ( nextMedia )
1480 {
1481 CFTypeRef bsdPathAsCFString;
1482 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1483 if ( bsdPathAsCFString ) {
1484 size_t devPathLength;
1485 strcpy( bsdPath, _PATH_DEV );
1486 strcat( bsdPath, "r" );
1487 devPathLength = strlen( bsdPath );
1488 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1489 kernResult = KERN_SUCCESS;
1490 }
1491 CFRelease( bsdPathAsCFString );
1492 }
1493 IOObjectRelease( nextMedia );
1494 }
3b46e624 1495
19cb3738
FB
1496 return kernResult;
1497}
1498
1499#endif
1500
508c7cb3
CH
1501static int hdev_probe_device(const char *filename)
1502{
1503 struct stat st;
1504
1505 /* allow a dedicated CD-ROM driver to match with a higher priority */
1506 if (strstart(filename, "/dev/cdrom", NULL))
1507 return 50;
1508
1509 if (stat(filename, &st) >= 0 &&
1510 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1511 return 100;
1512 }
1513
1514 return 0;
1515}
1516
da888d37
SH
1517static int check_hdev_writable(BDRVRawState *s)
1518{
1519#if defined(BLKROGET)
1520 /* Linux block devices can be configured "read-only" using blockdev(8).
1521 * This is independent of device node permissions and therefore open(2)
1522 * with O_RDWR succeeds. Actual writes fail with EPERM.
1523 *
1524 * bdrv_open() is supposed to fail if the disk is read-only. Explicitly
1525 * check for read-only block devices so that Linux block devices behave
1526 * properly.
1527 */
1528 struct stat st;
1529 int readonly = 0;
1530
1531 if (fstat(s->fd, &st)) {
1532 return -errno;
1533 }
1534
1535 if (!S_ISBLK(st.st_mode)) {
1536 return 0;
1537 }
1538
1539 if (ioctl(s->fd, BLKROGET, &readonly) < 0) {
1540 return -errno;
1541 }
1542
1543 if (readonly) {
1544 return -EACCES;
1545 }
1546#endif /* defined(BLKROGET) */
1547 return 0;
1548}
1549
015a1036
HR
1550static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
1551 Error **errp)
19cb3738
FB
1552{
1553 BDRVRawState *s = bs->opaque;
e428e439 1554 Error *local_err = NULL;
da888d37 1555 int ret;
c66a6157 1556 const char *filename = qdict_get_str(options, "filename");
a76bab49 1557
83affaa6 1558#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1559 if (strstart(filename, "/dev/cdrom", NULL)) {
1560 kern_return_t kernResult;
1561 io_iterator_t mediaIterator;
1562 char bsdPath[ MAXPATHLEN ];
1563 int fd;
5fafdf24 1564
19cb3738
FB
1565 kernResult = FindEjectableCDMedia( &mediaIterator );
1566 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 1567
19cb3738
FB
1568 if ( bsdPath[ 0 ] != '\0' ) {
1569 strcat(bsdPath,"s0");
1570 /* some CDs don't have a partition 0 */
6165f4d8 1571 fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
19cb3738
FB
1572 if (fd < 0) {
1573 bsdPath[strlen(bsdPath)-1] = '1';
1574 } else {
2e1e79da 1575 qemu_close(fd);
19cb3738
FB
1576 }
1577 filename = bsdPath;
a5c5ea3f 1578 qdict_put(options, "filename", qstring_from_str(filename));
19cb3738 1579 }
3b46e624 1580
19cb3738
FB
1581 if ( mediaIterator )
1582 IOObjectRelease( mediaIterator );
1583 }
1584#endif
19cb3738
FB
1585
1586 s->type = FTYPE_FILE;
4dd75c70 1587#if defined(__linux__)
05acda4d
BK
1588 {
1589 char resolved_path[ MAXPATHLEN ], *temp;
1590
1591 temp = realpath(filename, resolved_path);
1592 if (temp && strstart(temp, "/dev/sg", NULL)) {
1593 bs->sg = 1;
1594 }
19cb3738
FB
1595 }
1596#endif
90babde0 1597
e428e439 1598 ret = raw_open_common(bs, options, flags, 0, &local_err);
da888d37 1599 if (ret < 0) {
84d18f06 1600 if (local_err) {
e428e439
HR
1601 error_propagate(errp, local_err);
1602 }
da888d37
SH
1603 return ret;
1604 }
1605
1606 if (flags & BDRV_O_RDWR) {
1607 ret = check_hdev_writable(s);
1608 if (ret < 0) {
1609 raw_close(bs);
e428e439 1610 error_setg_errno(errp, -ret, "The device is not writable");
da888d37
SH
1611 return ret;
1612 }
1613 }
1614
1615 return ret;
19cb3738
FB
1616}
1617
03ff3ca3 1618#if defined(__linux__)
19cb3738
FB
1619/* Note: we do not have a reliable method to detect if the floppy is
1620 present. The current method is to try to open the floppy at every
1621 I/O and to keep it opened during a few hundreds of ms. */
1622static int fd_open(BlockDriverState *bs)
1623{
1624 BDRVRawState *s = bs->opaque;
1625 int last_media_present;
1626
1627 if (s->type != FTYPE_FD)
1628 return 0;
1629 last_media_present = (s->fd >= 0);
5fafdf24 1630 if (s->fd >= 0 &&
c57c846a 1631 (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
2e1e79da 1632 qemu_close(s->fd);
19cb3738
FB
1633 s->fd = -1;
1634#ifdef DEBUG_FLOPPY
1635 printf("Floppy closed\n");
1636#endif
1637 }
1638 if (s->fd < 0) {
5fafdf24 1639 if (s->fd_got_error &&
c57c846a 1640 (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
19cb3738
FB
1641#ifdef DEBUG_FLOPPY
1642 printf("No floppy (open delayed)\n");
1643#endif
1644 return -EIO;
1645 }
6165f4d8 1646 s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
19cb3738 1647 if (s->fd < 0) {
c57c846a 1648 s->fd_error_time = get_clock();
19cb3738
FB
1649 s->fd_got_error = 1;
1650 if (last_media_present)
1651 s->fd_media_changed = 1;
1652#ifdef DEBUG_FLOPPY
1653 printf("No floppy\n");
1654#endif
1655 return -EIO;
1656 }
1657#ifdef DEBUG_FLOPPY
1658 printf("Floppy opened\n");
1659#endif
1660 }
1661 if (!last_media_present)
1662 s->fd_media_changed = 1;
c57c846a 1663 s->fd_open_time = get_clock();
19cb3738
FB
1664 s->fd_got_error = 0;
1665 return 0;
1666}
19cb3738 1667
63ec93db 1668static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
985a03b0
TS
1669{
1670 BDRVRawState *s = bs->opaque;
1671
1672 return ioctl(s->fd, req, buf);
1673}
221f715d 1674
63ec93db 1675static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
221f715d
AL
1676 unsigned long int req, void *buf,
1677 BlockDriverCompletionFunc *cb, void *opaque)
1678{
f141eafe 1679 BDRVRawState *s = bs->opaque;
c208e8c2 1680 RawPosixAIOData *acb;
c4d9d196 1681 ThreadPool *pool;
221f715d 1682
f141eafe
AL
1683 if (fd_open(bs) < 0)
1684 return NULL;
c208e8c2
PB
1685
1686 acb = g_slice_new(RawPosixAIOData);
1687 acb->bs = bs;
1688 acb->aio_type = QEMU_AIO_IOCTL;
1689 acb->aio_fildes = s->fd;
1690 acb->aio_offset = 0;
1691 acb->aio_ioctl_buf = buf;
1692 acb->aio_ioctl_cmd = req;
c4d9d196
SH
1693 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
1694 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
221f715d
AL
1695}
1696
a167ba50 1697#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1698static int fd_open(BlockDriverState *bs)
1699{
1700 BDRVRawState *s = bs->opaque;
1701
1702 /* this is just to ensure s->fd is sane (its called by io ops) */
1703 if (s->fd >= 0)
1704 return 0;
1705 return -EIO;
1706}
9f23011a 1707#else /* !linux && !FreeBSD */
19cb3738 1708
08af02e2
AL
1709static int fd_open(BlockDriverState *bs)
1710{
1711 return 0;
1712}
1713
221f715d 1714#endif /* !linux && !FreeBSD */
04eeb8b6 1715
c36dd8a0
AF
1716static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
1717 int64_t sector_num, int nb_sectors,
1718 BlockDriverCompletionFunc *cb, void *opaque)
1719{
1720 BDRVRawState *s = bs->opaque;
1721
1722 if (fd_open(bs) < 0) {
1723 return NULL;
1724 }
1725 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1726 cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
1727}
1728
d0b4503e
PB
1729static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
1730 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
1731{
1732 BDRVRawState *s = bs->opaque;
1733 int rc;
1734
1735 rc = fd_open(bs);
1736 if (rc < 0) {
1737 return rc;
1738 }
1739 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
97a2ae34
PB
1740 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1741 QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
1742 } else if (s->discard_zeroes) {
1743 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1744 QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
d0b4503e 1745 }
97a2ae34 1746 return -ENOTSUP;
d0b4503e
PB
1747}
1748
d5124c00
HR
1749static int hdev_create(const char *filename, QEMUOptionParameter *options,
1750 Error **errp)
93c65b47
AL
1751{
1752 int fd;
1753 int ret = 0;
1754 struct stat stat_buf;
0e7e1989 1755 int64_t total_size = 0;
93c65b47 1756
0e7e1989
KW
1757 /* Read out options */
1758 while (options && options->name) {
1759 if (!strcmp(options->name, "size")) {
9040385d 1760 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
1761 }
1762 options++;
1763 }
93c65b47 1764
6165f4d8 1765 fd = qemu_open(filename, O_WRONLY | O_BINARY);
e428e439
HR
1766 if (fd < 0) {
1767 ret = -errno;
1768 error_setg_errno(errp, -ret, "Could not open device");
1769 return ret;
1770 }
93c65b47 1771
e428e439 1772 if (fstat(fd, &stat_buf) < 0) {
57e69b7d 1773 ret = -errno;
e428e439
HR
1774 error_setg_errno(errp, -ret, "Could not stat device");
1775 } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
1776 error_setg(errp,
1777 "The given file is neither a block nor a character device");
57e69b7d 1778 ret = -ENODEV;
e428e439
HR
1779 } else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE) {
1780 error_setg(errp, "Device is too small");
93c65b47 1781 ret = -ENOSPC;
e428e439 1782 }
93c65b47 1783
2e1e79da 1784 qemu_close(fd);
93c65b47
AL
1785 return ret;
1786}
1787
5efa9d5a 1788static BlockDriver bdrv_host_device = {
0b4ce02e 1789 .format_name = "host_device",
84a12e66 1790 .protocol_name = "host_device",
0b4ce02e 1791 .instance_size = sizeof(BDRVRawState),
030be321 1792 .bdrv_needs_filename = true,
0b4ce02e 1793 .bdrv_probe_device = hdev_probe_device,
66f82cee 1794 .bdrv_file_open = hdev_open,
0b4ce02e 1795 .bdrv_close = raw_close,
1bc6b705
JC
1796 .bdrv_reopen_prepare = raw_reopen_prepare,
1797 .bdrv_reopen_commit = raw_reopen_commit,
1798 .bdrv_reopen_abort = raw_reopen_abort,
93c65b47 1799 .bdrv_create = hdev_create,
0b4ce02e 1800 .create_options = raw_create_options,
d0b4503e 1801 .bdrv_co_write_zeroes = hdev_co_write_zeroes,
3b46e624 1802
f141eafe
AL
1803 .bdrv_aio_readv = raw_aio_readv,
1804 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1805 .bdrv_aio_flush = raw_aio_flush,
8238010b 1806 .bdrv_aio_discard = hdev_aio_discard,
c25f53b0 1807 .bdrv_refresh_limits = raw_refresh_limits,
3c529d93 1808
55b949c8 1809 .bdrv_truncate = raw_truncate,
e60f469c 1810 .bdrv_getlength = raw_getlength,
260a82e5 1811 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
1812 .bdrv_get_allocated_file_size
1813 = raw_get_allocated_file_size,
19cb3738 1814
f3a5d3f8 1815 /* generic scsi device */
63ec93db
CH
1816#ifdef __linux__
1817 .bdrv_ioctl = hdev_ioctl,
63ec93db
CH
1818 .bdrv_aio_ioctl = hdev_aio_ioctl,
1819#endif
f3a5d3f8
CH
1820};
1821
1822#ifdef __linux__
015a1036
HR
1823static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
1824 Error **errp)
f3a5d3f8
CH
1825{
1826 BDRVRawState *s = bs->opaque;
e428e439 1827 Error *local_err = NULL;
f3a5d3f8
CH
1828 int ret;
1829
f3a5d3f8 1830 s->type = FTYPE_FD;
f3a5d3f8 1831
19a3da7f 1832 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
e428e439
HR
1833 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
1834 if (ret) {
84d18f06 1835 if (local_err) {
e428e439
HR
1836 error_propagate(errp, local_err);
1837 }
f3a5d3f8 1838 return ret;
e428e439 1839 }
f3a5d3f8
CH
1840
1841 /* close fd so that we can reopen it as needed */
2e1e79da 1842 qemu_close(s->fd);
f3a5d3f8
CH
1843 s->fd = -1;
1844 s->fd_media_changed = 1;
1845
1846 return 0;
1847}
1848
508c7cb3
CH
1849static int floppy_probe_device(const char *filename)
1850{
2ebf7c4b
CR
1851 int fd, ret;
1852 int prio = 0;
1853 struct floppy_struct fdparam;
343f8568 1854 struct stat st;
2ebf7c4b 1855
e1740828
CB
1856 if (strstart(filename, "/dev/fd", NULL) &&
1857 !strstart(filename, "/dev/fdset/", NULL)) {
2ebf7c4b 1858 prio = 50;
e1740828 1859 }
2ebf7c4b 1860
6165f4d8 1861 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
2ebf7c4b
CR
1862 if (fd < 0) {
1863 goto out;
1864 }
343f8568
JS
1865 ret = fstat(fd, &st);
1866 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1867 goto outc;
1868 }
2ebf7c4b
CR
1869
1870 /* Attempt to detect via a floppy specific ioctl */
1871 ret = ioctl(fd, FDGETPRM, &fdparam);
1872 if (ret >= 0)
1873 prio = 100;
1874
343f8568 1875outc:
2e1e79da 1876 qemu_close(fd);
2ebf7c4b
CR
1877out:
1878 return prio;
508c7cb3
CH
1879}
1880
1881
f3a5d3f8
CH
1882static int floppy_is_inserted(BlockDriverState *bs)
1883{
1884 return fd_open(bs) >= 0;
1885}
1886
1887static int floppy_media_changed(BlockDriverState *bs)
1888{
1889 BDRVRawState *s = bs->opaque;
1890 int ret;
1891
1892 /*
1893 * XXX: we do not have a true media changed indication.
1894 * It does not work if the floppy is changed without trying to read it.
1895 */
1896 fd_open(bs);
1897 ret = s->fd_media_changed;
1898 s->fd_media_changed = 0;
1899#ifdef DEBUG_FLOPPY
1900 printf("Floppy changed=%d\n", ret);
1901#endif
1902 return ret;
1903}
1904
f36f3949 1905static void floppy_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1906{
1907 BDRVRawState *s = bs->opaque;
1908 int fd;
1909
1910 if (s->fd >= 0) {
2e1e79da 1911 qemu_close(s->fd);
f3a5d3f8
CH
1912 s->fd = -1;
1913 }
6165f4d8 1914 fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
f3a5d3f8
CH
1915 if (fd >= 0) {
1916 if (ioctl(fd, FDEJECT, 0) < 0)
1917 perror("FDEJECT");
2e1e79da 1918 qemu_close(fd);
f3a5d3f8 1919 }
f3a5d3f8
CH
1920}
1921
1922static BlockDriver bdrv_host_floppy = {
1923 .format_name = "host_floppy",
84a12e66 1924 .protocol_name = "host_floppy",
f3a5d3f8 1925 .instance_size = sizeof(BDRVRawState),
030be321 1926 .bdrv_needs_filename = true,
508c7cb3 1927 .bdrv_probe_device = floppy_probe_device,
66f82cee 1928 .bdrv_file_open = floppy_open,
f3a5d3f8 1929 .bdrv_close = raw_close,
1bc6b705
JC
1930 .bdrv_reopen_prepare = raw_reopen_prepare,
1931 .bdrv_reopen_commit = raw_reopen_commit,
1932 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1933 .bdrv_create = hdev_create,
0b4ce02e 1934 .create_options = raw_create_options,
f3a5d3f8 1935
f3a5d3f8
CH
1936 .bdrv_aio_readv = raw_aio_readv,
1937 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1938 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 1939 .bdrv_refresh_limits = raw_refresh_limits,
f3a5d3f8 1940
55b949c8 1941 .bdrv_truncate = raw_truncate,
b94a2610
KW
1942 .bdrv_getlength = raw_getlength,
1943 .has_variable_length = true,
4a1d5e1f
FZ
1944 .bdrv_get_allocated_file_size
1945 = raw_get_allocated_file_size,
f3a5d3f8
CH
1946
1947 /* removable device support */
1948 .bdrv_is_inserted = floppy_is_inserted,
1949 .bdrv_media_changed = floppy_media_changed,
1950 .bdrv_eject = floppy_eject,
f3a5d3f8
CH
1951};
1952
015a1036
HR
1953static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
1954 Error **errp)
f3a5d3f8
CH
1955{
1956 BDRVRawState *s = bs->opaque;
e428e439
HR
1957 Error *local_err = NULL;
1958 int ret;
f3a5d3f8 1959
f3a5d3f8
CH
1960 s->type = FTYPE_CD;
1961
19a3da7f 1962 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
e428e439 1963 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
84d18f06 1964 if (local_err) {
e428e439
HR
1965 error_propagate(errp, local_err);
1966 }
1967 return ret;
f3a5d3f8
CH
1968}
1969
508c7cb3
CH
1970static int cdrom_probe_device(const char *filename)
1971{
3baf720e
CR
1972 int fd, ret;
1973 int prio = 0;
343f8568 1974 struct stat st;
3baf720e 1975
6165f4d8 1976 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
3baf720e
CR
1977 if (fd < 0) {
1978 goto out;
1979 }
343f8568
JS
1980 ret = fstat(fd, &st);
1981 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1982 goto outc;
1983 }
3baf720e
CR
1984
1985 /* Attempt to detect via a CDROM specific ioctl */
1986 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1987 if (ret >= 0)
1988 prio = 100;
1989
343f8568 1990outc:
2e1e79da 1991 qemu_close(fd);
3baf720e
CR
1992out:
1993 return prio;
508c7cb3
CH
1994}
1995
f3a5d3f8
CH
1996static int cdrom_is_inserted(BlockDriverState *bs)
1997{
1998 BDRVRawState *s = bs->opaque;
1999 int ret;
2000
2001 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
2002 if (ret == CDS_DISC_OK)
2003 return 1;
2004 return 0;
2005}
2006
f36f3949 2007static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2008{
2009 BDRVRawState *s = bs->opaque;
2010
2011 if (eject_flag) {
2012 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
2013 perror("CDROMEJECT");
2014 } else {
2015 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
2016 perror("CDROMEJECT");
2017 }
f3a5d3f8
CH
2018}
2019
025e849a 2020static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
2021{
2022 BDRVRawState *s = bs->opaque;
2023
2024 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
2025 /*
2026 * Note: an error can happen if the distribution automatically
2027 * mounts the CD-ROM
2028 */
2029 /* perror("CDROM_LOCKDOOR"); */
2030 }
f3a5d3f8
CH
2031}
2032
2033static BlockDriver bdrv_host_cdrom = {
2034 .format_name = "host_cdrom",
84a12e66 2035 .protocol_name = "host_cdrom",
f3a5d3f8 2036 .instance_size = sizeof(BDRVRawState),
030be321 2037 .bdrv_needs_filename = true,
508c7cb3 2038 .bdrv_probe_device = cdrom_probe_device,
66f82cee 2039 .bdrv_file_open = cdrom_open,
f3a5d3f8 2040 .bdrv_close = raw_close,
1bc6b705
JC
2041 .bdrv_reopen_prepare = raw_reopen_prepare,
2042 .bdrv_reopen_commit = raw_reopen_commit,
2043 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 2044 .bdrv_create = hdev_create,
0b4ce02e 2045 .create_options = raw_create_options,
f3a5d3f8 2046
f3a5d3f8
CH
2047 .bdrv_aio_readv = raw_aio_readv,
2048 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2049 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2050 .bdrv_refresh_limits = raw_refresh_limits,
f3a5d3f8 2051
55b949c8 2052 .bdrv_truncate = raw_truncate,
b94a2610
KW
2053 .bdrv_getlength = raw_getlength,
2054 .has_variable_length = true,
4a1d5e1f
FZ
2055 .bdrv_get_allocated_file_size
2056 = raw_get_allocated_file_size,
f3a5d3f8
CH
2057
2058 /* removable device support */
2059 .bdrv_is_inserted = cdrom_is_inserted,
2060 .bdrv_eject = cdrom_eject,
025e849a 2061 .bdrv_lock_medium = cdrom_lock_medium,
f3a5d3f8
CH
2062
2063 /* generic scsi device */
63ec93db 2064 .bdrv_ioctl = hdev_ioctl,
63ec93db 2065 .bdrv_aio_ioctl = hdev_aio_ioctl,
f3a5d3f8
CH
2066};
2067#endif /* __linux__ */
2068
a167ba50 2069#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
511018e4
AT
2070static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
2071 Error **errp)
f3a5d3f8
CH
2072{
2073 BDRVRawState *s = bs->opaque;
e428e439 2074 Error *local_err = NULL;
f3a5d3f8
CH
2075 int ret;
2076
2077 s->type = FTYPE_CD;
2078
e428e439
HR
2079 ret = raw_open_common(bs, options, flags, 0, &local_err);
2080 if (ret) {
84d18f06 2081 if (local_err) {
e428e439
HR
2082 error_propagate(errp, local_err);
2083 }
f3a5d3f8 2084 return ret;
e428e439 2085 }
f3a5d3f8 2086
9b2260cb 2087 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
2088 ioctl(s->fd, CDIOCALLOW);
2089 return 0;
2090}
2091
508c7cb3
CH
2092static int cdrom_probe_device(const char *filename)
2093{
2094 if (strstart(filename, "/dev/cd", NULL) ||
2095 strstart(filename, "/dev/acd", NULL))
2096 return 100;
2097 return 0;
2098}
2099
f3a5d3f8
CH
2100static int cdrom_reopen(BlockDriverState *bs)
2101{
2102 BDRVRawState *s = bs->opaque;
2103 int fd;
2104
2105 /*
2106 * Force reread of possibly changed/newly loaded disc,
2107 * FreeBSD seems to not notice sometimes...
2108 */
2109 if (s->fd >= 0)
2e1e79da 2110 qemu_close(s->fd);
6165f4d8 2111 fd = qemu_open(bs->filename, s->open_flags, 0644);
f3a5d3f8
CH
2112 if (fd < 0) {
2113 s->fd = -1;
2114 return -EIO;
2115 }
2116 s->fd = fd;
2117
9b2260cb 2118 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
2119 ioctl(s->fd, CDIOCALLOW);
2120 return 0;
2121}
2122
2123static int cdrom_is_inserted(BlockDriverState *bs)
2124{
2125 return raw_getlength(bs) > 0;
2126}
2127
f36f3949 2128static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2129{
2130 BDRVRawState *s = bs->opaque;
2131
2132 if (s->fd < 0)
822e1cd1 2133 return;
f3a5d3f8
CH
2134
2135 (void) ioctl(s->fd, CDIOCALLOW);
2136
2137 if (eject_flag) {
2138 if (ioctl(s->fd, CDIOCEJECT) < 0)
2139 perror("CDIOCEJECT");
2140 } else {
2141 if (ioctl(s->fd, CDIOCCLOSE) < 0)
2142 perror("CDIOCCLOSE");
2143 }
2144
822e1cd1 2145 cdrom_reopen(bs);
f3a5d3f8
CH
2146}
2147
025e849a 2148static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
2149{
2150 BDRVRawState *s = bs->opaque;
2151
2152 if (s->fd < 0)
7bf37fed 2153 return;
f3a5d3f8
CH
2154 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
2155 /*
2156 * Note: an error can happen if the distribution automatically
2157 * mounts the CD-ROM
2158 */
2159 /* perror("CDROM_LOCKDOOR"); */
2160 }
f3a5d3f8
CH
2161}
2162
2163static BlockDriver bdrv_host_cdrom = {
2164 .format_name = "host_cdrom",
84a12e66 2165 .protocol_name = "host_cdrom",
f3a5d3f8 2166 .instance_size = sizeof(BDRVRawState),
030be321 2167 .bdrv_needs_filename = true,
508c7cb3 2168 .bdrv_probe_device = cdrom_probe_device,
66f82cee 2169 .bdrv_file_open = cdrom_open,
f3a5d3f8 2170 .bdrv_close = raw_close,
1bc6b705
JC
2171 .bdrv_reopen_prepare = raw_reopen_prepare,
2172 .bdrv_reopen_commit = raw_reopen_commit,
2173 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 2174 .bdrv_create = hdev_create,
0b4ce02e 2175 .create_options = raw_create_options,
f3a5d3f8 2176
f3a5d3f8
CH
2177 .bdrv_aio_readv = raw_aio_readv,
2178 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2179 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2180 .bdrv_refresh_limits = raw_refresh_limits,
f3a5d3f8 2181
55b949c8 2182 .bdrv_truncate = raw_truncate,
b94a2610
KW
2183 .bdrv_getlength = raw_getlength,
2184 .has_variable_length = true,
4a1d5e1f
FZ
2185 .bdrv_get_allocated_file_size
2186 = raw_get_allocated_file_size,
f3a5d3f8 2187
19cb3738 2188 /* removable device support */
f3a5d3f8
CH
2189 .bdrv_is_inserted = cdrom_is_inserted,
2190 .bdrv_eject = cdrom_eject,
025e849a 2191 .bdrv_lock_medium = cdrom_lock_medium,
19cb3738 2192};
f3a5d3f8 2193#endif /* __FreeBSD__ */
5efa9d5a 2194
4065742a
SH
2195#ifdef CONFIG_LINUX_AIO
2196/**
2197 * Return the file descriptor for Linux AIO
2198 *
2199 * This function is a layering violation and should be removed when it becomes
2200 * possible to call the block layer outside the global mutex. It allows the
2201 * caller to hijack the file descriptor so I/O can be performed outside the
2202 * block layer.
2203 */
2204int raw_get_aio_fd(BlockDriverState *bs)
2205{
2206 BDRVRawState *s;
2207
2208 if (!bs->drv) {
2209 return -ENOMEDIUM;
2210 }
2211
2212 if (bs->drv == bdrv_find_format("raw")) {
2213 bs = bs->file;
2214 }
2215
2216 /* raw-posix has several protocols so just check for raw_aio_readv */
2217 if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
2218 return -ENOTSUP;
2219 }
2220
2221 s = bs->opaque;
2222 if (!s->use_aio) {
2223 return -ENOTSUP;
2224 }
2225 return s->fd;
2226}
2227#endif /* CONFIG_LINUX_AIO */
2228
84a12e66 2229static void bdrv_file_init(void)
5efa9d5a 2230{
508c7cb3
CH
2231 /*
2232 * Register all the drivers. Note that order is important, the driver
2233 * registered last will get probed first.
2234 */
84a12e66 2235 bdrv_register(&bdrv_file);
5efa9d5a 2236 bdrv_register(&bdrv_host_device);
f3a5d3f8
CH
2237#ifdef __linux__
2238 bdrv_register(&bdrv_host_floppy);
2239 bdrv_register(&bdrv_host_cdrom);
2240#endif
a167ba50 2241#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
2242 bdrv_register(&bdrv_host_cdrom);
2243#endif
5efa9d5a
AL
2244}
2245
84a12e66 2246block_init(bdrv_file_init);