]> git.proxmox.com Git - mirror_qemu.git/blame - block/raw-posix.c
raw-posix: fix bdrv_aio_ioctl
[mirror_qemu.git] / block / raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
1de7afc9
PB
25#include "qemu/timer.h"
26#include "qemu/log.h"
737e150e 27#include "block/block_int.h"
1de7afc9 28#include "qemu/module.h"
de81a169 29#include "trace.h"
737e150e 30#include "block/thread-pool.h"
1de7afc9 31#include "qemu/iov.h"
9f8540ec 32#include "raw-aio.h"
83f64091 33
83affaa6 34#if defined(__APPLE__) && (__MACH__)
83f64091
FB
35#include <paths.h>
36#include <sys/param.h>
37#include <IOKit/IOKitLib.h>
38#include <IOKit/IOBSD.h>
39#include <IOKit/storage/IOMediaBSDClient.h>
40#include <IOKit/storage/IOMedia.h>
41#include <IOKit/storage/IOCDMedia.h>
42//#include <IOKit/storage/IOCDTypes.h>
43#include <CoreFoundation/CoreFoundation.h>
44#endif
45
46#ifdef __sun__
2e9671da 47#define _POSIX_PTHREAD_SEMANTICS 1
83f64091
FB
48#include <sys/dkio.h>
49#endif
19cb3738 50#ifdef __linux__
343f8568
JS
51#include <sys/types.h>
52#include <sys/stat.h>
19cb3738 53#include <sys/ioctl.h>
05acda4d 54#include <sys/param.h>
19cb3738
FB
55#include <linux/cdrom.h>
56#include <linux/fd.h>
5500316d
PB
57#include <linux/fs.h>
58#endif
59#ifdef CONFIG_FIEMAP
60#include <linux/fiemap.h>
19cb3738 61#endif
a167ba50 62#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1cb6c3fd 63#include <sys/disk.h>
9f23011a 64#include <sys/cdio.h>
1cb6c3fd 65#endif
83f64091 66
128ab2ff
BS
67#ifdef __OpenBSD__
68#include <sys/ioctl.h>
69#include <sys/disklabel.h>
70#include <sys/dkio.h>
71#endif
72
d1f6fd8d
CE
73#ifdef __NetBSD__
74#include <sys/ioctl.h>
75#include <sys/disklabel.h>
76#include <sys/dkio.h>
77#include <sys/disk.h>
78#endif
79
c5e97233
BS
80#ifdef __DragonFly__
81#include <sys/ioctl.h>
82#include <sys/diskslice.h>
83#endif
84
dce512de
CH
85#ifdef CONFIG_XFS
86#include <xfs/xfs.h>
87#endif
88
19cb3738 89//#define DEBUG_FLOPPY
83f64091 90
faf07963 91//#define DEBUG_BLOCK
03ff3ca3 92#if defined(DEBUG_BLOCK)
001faf32
BS
93#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
94 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
8c05dbf9 95#else
001faf32 96#define DEBUG_BLOCK_PRINT(formatCstr, ...)
8c05dbf9
TS
97#endif
98
f6465578
AL
99/* OS X does not have O_DSYNC */
100#ifndef O_DSYNC
1c27a8b3 101#ifdef O_SYNC
7ab064d2 102#define O_DSYNC O_SYNC
1c27a8b3
JA
103#elif defined(O_FSYNC)
104#define O_DSYNC O_FSYNC
105#endif
f6465578
AL
106#endif
107
9f7965c7
AL
108/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
109#ifndef O_DIRECT
110#define O_DIRECT O_DSYNC
111#endif
112
19cb3738
FB
113#define FTYPE_FILE 0
114#define FTYPE_CD 1
115#define FTYPE_FD 2
83f64091 116
c57c846a 117/* if the FD is not accessed during that time (in ns), we try to
19cb3738 118 reopen it to see if the disk has been changed */
c57c846a 119#define FD_OPEN_TIMEOUT (1000000000)
83f64091 120
581b9e29
CH
121#define MAX_BLOCKSIZE 4096
122
19cb3738
FB
123typedef struct BDRVRawState {
124 int fd;
125 int type;
0e1d8f4c 126 int open_flags;
19cb3738
FB
127#if defined(__linux__)
128 /* linux floppy specific */
19cb3738
FB
129 int64_t fd_open_time;
130 int64_t fd_error_time;
131 int fd_got_error;
132 int fd_media_changed;
83f64091 133#endif
e44bd6fc 134#ifdef CONFIG_LINUX_AIO
5c6c3a6c 135 int use_aio;
1e5b9d2f 136 void *aio_ctx;
e44bd6fc 137#endif
dce512de
CH
138#ifdef CONFIG_XFS
139 bool is_xfs : 1;
140#endif
19cb3738
FB
141} BDRVRawState;
142
eeb6b45d
JC
143typedef struct BDRVRawReopenState {
144 int fd;
145 int open_flags;
146#ifdef CONFIG_LINUX_AIO
147 int use_aio;
148#endif
149} BDRVRawReopenState;
150
19cb3738 151static int fd_open(BlockDriverState *bs);
22afa7b5 152static int64_t raw_getlength(BlockDriverState *bs);
83f64091 153
de81a169
PB
154typedef struct RawPosixAIOData {
155 BlockDriverState *bs;
156 int aio_fildes;
157 union {
158 struct iovec *aio_iov;
159 void *aio_ioctl_buf;
160 };
161 int aio_niov;
162 size_t aio_nbytes;
163#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
164 off_t aio_offset;
165 int aio_type;
166} RawPosixAIOData;
167
a167ba50 168#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8 169static int cdrom_reopen(BlockDriverState *bs);
9f23011a
BS
170#endif
171
1de1ae0a
CE
172#if defined(__NetBSD__)
173static int raw_normalize_devicepath(const char **filename)
174{
175 static char namebuf[PATH_MAX];
176 const char *dp, *fname;
177 struct stat sb;
178
179 fname = *filename;
180 dp = strrchr(fname, '/');
181 if (lstat(fname, &sb) < 0) {
182 fprintf(stderr, "%s: stat failed: %s\n",
183 fname, strerror(errno));
184 return -errno;
185 }
186
187 if (!S_ISBLK(sb.st_mode)) {
188 return 0;
189 }
190
191 if (dp == NULL) {
192 snprintf(namebuf, PATH_MAX, "r%s", fname);
193 } else {
194 snprintf(namebuf, PATH_MAX, "%.*s/r%s",
195 (int)(dp - fname), fname, dp + 1);
196 }
197 fprintf(stderr, "%s is a block device", fname);
198 *filename = namebuf;
199 fprintf(stderr, ", using %s\n", *filename);
200
201 return 0;
202}
203#else
204static int raw_normalize_devicepath(const char **filename)
205{
206 return 0;
207}
208#endif
209
6a8dc042
JC
210static void raw_parse_flags(int bdrv_flags, int *open_flags)
211{
212 assert(open_flags != NULL);
213
214 *open_flags |= O_BINARY;
215 *open_flags &= ~O_ACCMODE;
216 if (bdrv_flags & BDRV_O_RDWR) {
217 *open_flags |= O_RDWR;
218 } else {
219 *open_flags |= O_RDONLY;
220 }
221
222 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
223 * and O_DIRECT for no caching. */
224 if ((bdrv_flags & BDRV_O_NOCACHE)) {
225 *open_flags |= O_DIRECT;
226 }
6a8dc042
JC
227}
228
fc32a72d
JC
229#ifdef CONFIG_LINUX_AIO
230static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
231{
232 int ret = -1;
233 assert(aio_ctx != NULL);
234 assert(use_aio != NULL);
235 /*
236 * Currently Linux do AIO only for files opened with O_DIRECT
237 * specified so check NOCACHE flag too
238 */
239 if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
240 (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
241
242 /* if non-NULL, laio_init() has already been run */
243 if (*aio_ctx == NULL) {
244 *aio_ctx = laio_init();
245 if (!*aio_ctx) {
246 goto error;
247 }
248 }
249 *use_aio = 1;
250 } else {
251 *use_aio = 0;
252 }
253
254 ret = 0;
255
256error:
257 return ret;
258}
259#endif
260
90babde0 261static int raw_open_common(BlockDriverState *bs, const char *filename,
19a3da7f 262 int bdrv_flags, int open_flags)
83f64091
FB
263{
264 BDRVRawState *s = bs->opaque;
0e1d8f4c 265 int fd, ret;
83f64091 266
1de1ae0a
CE
267 ret = raw_normalize_devicepath(&filename);
268 if (ret != 0) {
269 return ret;
270 }
271
6a8dc042
JC
272 s->open_flags = open_flags;
273 raw_parse_flags(bdrv_flags, &s->open_flags);
83f64091 274
90babde0 275 s->fd = -1;
40ff6d7e 276 fd = qemu_open(filename, s->open_flags, 0644);
19cb3738
FB
277 if (fd < 0) {
278 ret = -errno;
279 if (ret == -EROFS)
280 ret = -EACCES;
281 return ret;
282 }
83f64091 283 s->fd = fd;
9ef91a67 284
5c6c3a6c 285#ifdef CONFIG_LINUX_AIO
fc32a72d 286 if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
47e6b251
PB
287 qemu_close(fd);
288 return -errno;
9ef91a67 289 }
fc32a72d 290#endif
9ef91a67 291
dce512de
CH
292#ifdef CONFIG_XFS
293 if (platform_test_xfs_fd(s->fd)) {
294 s->is_xfs = 1;
295 }
296#endif
297
83f64091
FB
298 return 0;
299}
300
90babde0
CH
301static int raw_open(BlockDriverState *bs, const char *filename, int flags)
302{
303 BDRVRawState *s = bs->opaque;
304
305 s->type = FTYPE_FILE;
9a2d77ad 306 return raw_open_common(bs, filename, flags, 0);
90babde0
CH
307}
308
eeb6b45d
JC
309static int raw_reopen_prepare(BDRVReopenState *state,
310 BlockReopenQueue *queue, Error **errp)
311{
312 BDRVRawState *s;
313 BDRVRawReopenState *raw_s;
314 int ret = 0;
315
316 assert(state != NULL);
317 assert(state->bs != NULL);
318
319 s = state->bs->opaque;
320
321 state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
322 raw_s = state->opaque;
323
324#ifdef CONFIG_LINUX_AIO
325 raw_s->use_aio = s->use_aio;
326
327 /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
328 * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
329 * won't override aio_ctx if aio_ctx is non-NULL */
330 if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
331 return -1;
332 }
333#endif
334
1bc6b705
JC
335 if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
336 raw_s->open_flags |= O_NONBLOCK;
337 }
338
eeb6b45d
JC
339 raw_parse_flags(state->flags, &raw_s->open_flags);
340
341 raw_s->fd = -1;
342
343 int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
344#ifdef O_NOATIME
345 fcntl_flags |= O_NOATIME;
346#endif
347
348 if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
349 /* dup the original fd */
350 /* TODO: use qemu fcntl wrapper */
351#ifdef F_DUPFD_CLOEXEC
352 raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
353#else
354 raw_s->fd = dup(s->fd);
355 if (raw_s->fd != -1) {
356 qemu_set_cloexec(raw_s->fd);
357 }
358#endif
359 if (raw_s->fd >= 0) {
360 ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
361 if (ret) {
362 qemu_close(raw_s->fd);
363 raw_s->fd = -1;
364 }
365 }
366 }
367
368 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
369 if (raw_s->fd == -1) {
370 assert(!(raw_s->open_flags & O_CREAT));
371 raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
372 if (raw_s->fd == -1) {
373 ret = -1;
374 }
375 }
376 return ret;
377}
378
379
380static void raw_reopen_commit(BDRVReopenState *state)
381{
382 BDRVRawReopenState *raw_s = state->opaque;
383 BDRVRawState *s = state->bs->opaque;
384
385 s->open_flags = raw_s->open_flags;
386
387 qemu_close(s->fd);
388 s->fd = raw_s->fd;
389#ifdef CONFIG_LINUX_AIO
390 s->use_aio = raw_s->use_aio;
391#endif
392
393 g_free(state->opaque);
394 state->opaque = NULL;
395}
396
397
398static void raw_reopen_abort(BDRVReopenState *state)
399{
400 BDRVRawReopenState *raw_s = state->opaque;
401
402 /* nothing to do if NULL, we didn't get far enough */
403 if (raw_s == NULL) {
404 return;
405 }
406
407 if (raw_s->fd >= 0) {
408 qemu_close(raw_s->fd);
409 raw_s->fd = -1;
410 }
411 g_free(state->opaque);
412 state->opaque = NULL;
413}
414
415
83f64091
FB
416/* XXX: use host sector size if necessary with:
417#ifdef DIOCGSECTORSIZE
418 {
419 unsigned int sectorsize = 512;
420 if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
421 sectorsize > bufsize)
422 bufsize = sectorsize;
423 }
424#endif
425#ifdef CONFIG_COCOA
2ee9fb48 426 uint32_t blockSize = 512;
83f64091
FB
427 if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
428 bufsize = blockSize;
429 }
430#endif
431*/
432
9ef91a67
CH
433/*
434 * Check if all memory in this vector is sector aligned.
435 */
581b9e29 436static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
a76bab49 437{
9ef91a67 438 int i;
83f64091 439
9ef91a67 440 for (i = 0; i < qiov->niov; i++) {
581b9e29 441 if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
9ef91a67 442 return 0;
c16b5a2c 443 }
c16b5a2c 444 }
c16b5a2c 445
9ef91a67 446 return 1;
c16b5a2c
CH
447}
448
de81a169
PB
449static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
450{
451 int ret;
452
453 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
454 if (ret == -1) {
455 return -errno;
456 }
457
b608c8dc 458 return 0;
de81a169
PB
459}
460
461static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
462{
463 int ret;
464
465 ret = qemu_fdatasync(aiocb->aio_fildes);
466 if (ret == -1) {
467 return -errno;
468 }
469 return 0;
470}
471
472#ifdef CONFIG_PREADV
473
474static bool preadv_present = true;
475
476static ssize_t
477qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
478{
479 return preadv(fd, iov, nr_iov, offset);
480}
481
482static ssize_t
483qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
484{
485 return pwritev(fd, iov, nr_iov, offset);
486}
487
488#else
489
490static bool preadv_present = false;
491
492static ssize_t
493qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
494{
495 return -ENOSYS;
496}
497
498static ssize_t
499qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
500{
501 return -ENOSYS;
502}
503
504#endif
505
506static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
507{
508 ssize_t len;
509
510 do {
511 if (aiocb->aio_type & QEMU_AIO_WRITE)
512 len = qemu_pwritev(aiocb->aio_fildes,
513 aiocb->aio_iov,
514 aiocb->aio_niov,
515 aiocb->aio_offset);
516 else
517 len = qemu_preadv(aiocb->aio_fildes,
518 aiocb->aio_iov,
519 aiocb->aio_niov,
520 aiocb->aio_offset);
521 } while (len == -1 && errno == EINTR);
522
523 if (len == -1) {
524 return -errno;
525 }
526 return len;
527}
528
529/*
530 * Read/writes the data to/from a given linear buffer.
531 *
532 * Returns the number of bytes handles or -errno in case of an error. Short
533 * reads are only returned if the end of the file is reached.
534 */
535static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
536{
537 ssize_t offset = 0;
538 ssize_t len;
539
540 while (offset < aiocb->aio_nbytes) {
541 if (aiocb->aio_type & QEMU_AIO_WRITE) {
542 len = pwrite(aiocb->aio_fildes,
543 (const char *)buf + offset,
544 aiocb->aio_nbytes - offset,
545 aiocb->aio_offset + offset);
546 } else {
547 len = pread(aiocb->aio_fildes,
548 buf + offset,
549 aiocb->aio_nbytes - offset,
550 aiocb->aio_offset + offset);
551 }
552 if (len == -1 && errno == EINTR) {
553 continue;
554 } else if (len == -1) {
555 offset = -errno;
556 break;
557 } else if (len == 0) {
558 break;
559 }
560 offset += len;
561 }
562
563 return offset;
564}
565
566static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
567{
568 ssize_t nbytes;
569 char *buf;
570
571 if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
572 /*
573 * If there is just a single buffer, and it is properly aligned
574 * we can just use plain pread/pwrite without any problems.
575 */
576 if (aiocb->aio_niov == 1) {
577 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
578 }
579 /*
580 * We have more than one iovec, and all are properly aligned.
581 *
582 * Try preadv/pwritev first and fall back to linearizing the
583 * buffer if it's not supported.
584 */
585 if (preadv_present) {
586 nbytes = handle_aiocb_rw_vector(aiocb);
587 if (nbytes == aiocb->aio_nbytes ||
588 (nbytes < 0 && nbytes != -ENOSYS)) {
589 return nbytes;
590 }
591 preadv_present = false;
592 }
593
594 /*
595 * XXX(hch): short read/write. no easy way to handle the reminder
596 * using these interfaces. For now retry using plain
597 * pread/pwrite?
598 */
599 }
600
601 /*
602 * Ok, we have to do it the hard way, copy all segments into
603 * a single aligned buffer.
604 */
605 buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
606 if (aiocb->aio_type & QEMU_AIO_WRITE) {
607 char *p = buf;
608 int i;
609
610 for (i = 0; i < aiocb->aio_niov; ++i) {
611 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
612 p += aiocb->aio_iov[i].iov_len;
613 }
614 }
615
616 nbytes = handle_aiocb_rw_linear(aiocb, buf);
617 if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
618 char *p = buf;
619 size_t count = aiocb->aio_nbytes, copy;
620 int i;
621
622 for (i = 0; i < aiocb->aio_niov && count; ++i) {
623 copy = count;
624 if (copy > aiocb->aio_iov[i].iov_len) {
625 copy = aiocb->aio_iov[i].iov_len;
626 }
627 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
628 p += copy;
629 count -= copy;
630 }
631 }
632 qemu_vfree(buf);
633
634 return nbytes;
635}
636
637static int aio_worker(void *arg)
638{
639 RawPosixAIOData *aiocb = arg;
640 ssize_t ret = 0;
641
642 switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
643 case QEMU_AIO_READ:
644 ret = handle_aiocb_rw(aiocb);
645 if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
646 iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
647 0, aiocb->aio_nbytes - ret);
648
649 ret = aiocb->aio_nbytes;
650 }
651 if (ret == aiocb->aio_nbytes) {
652 ret = 0;
653 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
654 ret = -EINVAL;
655 }
656 break;
657 case QEMU_AIO_WRITE:
658 ret = handle_aiocb_rw(aiocb);
659 if (ret == aiocb->aio_nbytes) {
660 ret = 0;
661 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
662 ret = -EINVAL;
663 }
664 break;
665 case QEMU_AIO_FLUSH:
666 ret = handle_aiocb_flush(aiocb);
667 break;
668 case QEMU_AIO_IOCTL:
669 ret = handle_aiocb_ioctl(aiocb);
670 break;
671 default:
672 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
673 ret = -EINVAL;
674 break;
675 }
676
677 g_slice_free(RawPosixAIOData, aiocb);
678 return ret;
679}
680
681static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
682 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
683 BlockDriverCompletionFunc *cb, void *opaque, int type)
684{
685 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
686
687 acb->bs = bs;
688 acb->aio_type = type;
689 acb->aio_fildes = fd;
690
691 if (qiov) {
692 acb->aio_iov = qiov->iov;
693 acb->aio_niov = qiov->niov;
694 }
695 acb->aio_nbytes = nb_sectors * 512;
696 acb->aio_offset = sector_num * 512;
697
698 trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
699 return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
700}
701
9ef91a67
CH
702static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
703 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
704 BlockDriverCompletionFunc *cb, void *opaque, int type)
83f64091 705{
ce1a14dc 706 BDRVRawState *s = bs->opaque;
ce1a14dc 707
19cb3738
FB
708 if (fd_open(bs) < 0)
709 return NULL;
710
f141eafe
AL
711 /*
712 * If O_DIRECT is used the buffer needs to be aligned on a sector
c1ee7d56 713 * boundary. Check if this is the case or tell the low-level
9ef91a67 714 * driver that it needs to copy the buffer.
f141eafe 715 */
9acc5a06 716 if ((bs->open_flags & BDRV_O_NOCACHE)) {
581b9e29 717 if (!qiov_is_aligned(bs, qiov)) {
5c6c3a6c 718 type |= QEMU_AIO_MISALIGNED;
e44bd6fc 719#ifdef CONFIG_LINUX_AIO
5c6c3a6c
CH
720 } else if (s->use_aio) {
721 return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
e44bd6fc
SW
722 nb_sectors, cb, opaque, type);
723#endif
5c6c3a6c 724 }
9ef91a67 725 }
f141eafe 726
1e5b9d2f 727 return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
9ef91a67 728 cb, opaque, type);
83f64091
FB
729}
730
f141eafe
AL
731static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
732 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 733 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 734{
9ef91a67
CH
735 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
736 cb, opaque, QEMU_AIO_READ);
83f64091
FB
737}
738
f141eafe
AL
739static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
740 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 741 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 742{
9ef91a67
CH
743 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
744 cb, opaque, QEMU_AIO_WRITE);
83f64091 745}
53538725 746
b2e12bc6
CH
747static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
748 BlockDriverCompletionFunc *cb, void *opaque)
749{
750 BDRVRawState *s = bs->opaque;
751
752 if (fd_open(bs) < 0)
753 return NULL;
754
1e5b9d2f 755 return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
b2e12bc6
CH
756}
757
83f64091
FB
758static void raw_close(BlockDriverState *bs)
759{
760 BDRVRawState *s = bs->opaque;
19cb3738 761 if (s->fd >= 0) {
2e1e79da 762 qemu_close(s->fd);
19cb3738
FB
763 s->fd = -1;
764 }
83f64091
FB
765}
766
767static int raw_truncate(BlockDriverState *bs, int64_t offset)
768{
769 BDRVRawState *s = bs->opaque;
55b949c8
CH
770 struct stat st;
771
772 if (fstat(s->fd, &st)) {
83f64091 773 return -errno;
55b949c8
CH
774 }
775
776 if (S_ISREG(st.st_mode)) {
777 if (ftruncate(s->fd, offset) < 0) {
778 return -errno;
779 }
780 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
781 if (offset > raw_getlength(bs)) {
782 return -EINVAL;
783 }
784 } else {
785 return -ENOTSUP;
786 }
787
83f64091
FB
788 return 0;
789}
790
128ab2ff
BS
791#ifdef __OpenBSD__
792static int64_t raw_getlength(BlockDriverState *bs)
793{
794 BDRVRawState *s = bs->opaque;
795 int fd = s->fd;
796 struct stat st;
797
798 if (fstat(fd, &st))
799 return -1;
800 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
801 struct disklabel dl;
802
803 if (ioctl(fd, DIOCGDINFO, &dl))
804 return -1;
805 return (uint64_t)dl.d_secsize *
806 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
807 } else
808 return st.st_size;
809}
d1f6fd8d
CE
810#elif defined(__NetBSD__)
811static int64_t raw_getlength(BlockDriverState *bs)
812{
813 BDRVRawState *s = bs->opaque;
814 int fd = s->fd;
815 struct stat st;
816
817 if (fstat(fd, &st))
818 return -1;
819 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
820 struct dkwedge_info dkw;
821
822 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
823 return dkw.dkw_size * 512;
824 } else {
825 struct disklabel dl;
826
827 if (ioctl(fd, DIOCGDINFO, &dl))
828 return -1;
829 return (uint64_t)dl.d_secsize *
830 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
831 }
832 } else
833 return st.st_size;
834}
50779cc2
CH
835#elif defined(__sun__)
836static int64_t raw_getlength(BlockDriverState *bs)
837{
838 BDRVRawState *s = bs->opaque;
839 struct dk_minfo minfo;
840 int ret;
841
842 ret = fd_open(bs);
843 if (ret < 0) {
844 return ret;
845 }
846
847 /*
848 * Use the DKIOCGMEDIAINFO ioctl to read the size.
849 */
850 ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
851 if (ret != -1) {
852 return minfo.dki_lbsize * minfo.dki_capacity;
853 }
854
855 /*
856 * There are reports that lseek on some devices fails, but
857 * irc discussion said that contingency on contingency was overkill.
858 */
859 return lseek(s->fd, 0, SEEK_END);
860}
861#elif defined(CONFIG_BSD)
862static int64_t raw_getlength(BlockDriverState *bs)
83f64091
FB
863{
864 BDRVRawState *s = bs->opaque;
865 int fd = s->fd;
866 int64_t size;
83f64091 867 struct stat sb;
a167ba50 868#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a 869 int reopened = 0;
83f64091 870#endif
19cb3738
FB
871 int ret;
872
873 ret = fd_open(bs);
874 if (ret < 0)
875 return ret;
83f64091 876
a167ba50 877#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
878again:
879#endif
83f64091
FB
880 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
881#ifdef DIOCGMEDIASIZE
882 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
c5e97233
BS
883#elif defined(DIOCGPART)
884 {
885 struct partinfo pi;
886 if (ioctl(fd, DIOCGPART, &pi) == 0)
887 size = pi.media_size;
888 else
889 size = 0;
890 }
891 if (size == 0)
83f64091 892#endif
83affaa6 893#if defined(__APPLE__) && defined(__MACH__)
83f64091
FB
894 size = LONG_LONG_MAX;
895#else
896 size = lseek(fd, 0LL, SEEK_END);
9f23011a 897#endif
a167ba50 898#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
899 switch(s->type) {
900 case FTYPE_CD:
901 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
902 if (size == 2048LL * (unsigned)-1)
903 size = 0;
904 /* XXX no disc? maybe we need to reopen... */
f3a5d3f8 905 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
9f23011a
BS
906 reopened = 1;
907 goto again;
908 }
909 }
83f64091 910#endif
50779cc2 911 } else {
83f64091
FB
912 size = lseek(fd, 0, SEEK_END);
913 }
83f64091
FB
914 return size;
915}
50779cc2
CH
916#else
917static int64_t raw_getlength(BlockDriverState *bs)
918{
919 BDRVRawState *s = bs->opaque;
920 int ret;
921
922 ret = fd_open(bs);
923 if (ret < 0) {
924 return ret;
925 }
926
927 return lseek(s->fd, 0, SEEK_END);
928}
128ab2ff 929#endif
83f64091 930
4a1d5e1f
FZ
931static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
932{
933 struct stat st;
934 BDRVRawState *s = bs->opaque;
935
936 if (fstat(s->fd, &st) < 0) {
937 return -errno;
938 }
939 return (int64_t)st.st_blocks * 512;
940}
941
0e7e1989 942static int raw_create(const char *filename, QEMUOptionParameter *options)
83f64091
FB
943{
944 int fd;
1e37d059 945 int result = 0;
0e7e1989 946 int64_t total_size = 0;
83f64091 947
0e7e1989
KW
948 /* Read out options */
949 while (options && options->name) {
950 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
9040385d 951 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
952 }
953 options++;
954 }
83f64091 955
6165f4d8
CB
956 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
957 0644);
1e37d059
SW
958 if (fd < 0) {
959 result = -errno;
960 } else {
9040385d 961 if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
1e37d059
SW
962 result = -errno;
963 }
2e1e79da 964 if (qemu_close(fd) != 0) {
1e37d059
SW
965 result = -errno;
966 }
967 }
968 return result;
83f64091
FB
969}
970
5500316d
PB
971/*
972 * Returns true iff the specified sector is present in the disk image. Drivers
973 * not implementing the functionality are assumed to not support backing files,
974 * hence all their sectors are reported as allocated.
975 *
976 * If 'sector_num' is beyond the end of the disk image the return value is 0
977 * and 'pnum' is set to 0.
978 *
979 * 'pnum' is set to the number of sectors (including and immediately following
980 * the specified sector) that are known to be in the same
981 * allocated/unallocated state.
982 *
983 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
984 * beyond the end of the disk image it will be clamped.
985 */
986static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
987 int64_t sector_num,
988 int nb_sectors, int *pnum)
989{
5500316d
PB
990 off_t start, data, hole;
991 int ret;
992
993 ret = fd_open(bs);
994 if (ret < 0) {
995 return ret;
996 }
997
998 start = sector_num * BDRV_SECTOR_SIZE;
94282e71 999
5500316d 1000#ifdef CONFIG_FIEMAP
94282e71
KW
1001
1002 BDRVRawState *s = bs->opaque;
5500316d
PB
1003 struct {
1004 struct fiemap fm;
1005 struct fiemap_extent fe;
1006 } f;
94282e71 1007
5500316d
PB
1008 f.fm.fm_start = start;
1009 f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
1010 f.fm.fm_flags = 0;
1011 f.fm.fm_extent_count = 1;
1012 f.fm.fm_reserved = 0;
1013 if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
1014 /* Assume everything is allocated. */
1015 *pnum = nb_sectors;
1016 return 1;
1017 }
1018
1019 if (f.fm.fm_mapped_extents == 0) {
1020 /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
1021 * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
1022 */
1023 off_t length = lseek(s->fd, 0, SEEK_END);
1024 hole = f.fm.fm_start;
1025 data = MIN(f.fm.fm_start + f.fm.fm_length, length);
1026 } else {
1027 data = f.fe.fe_logical;
1028 hole = f.fe.fe_logical + f.fe.fe_length;
1029 }
94282e71 1030
5500316d 1031#elif defined SEEK_HOLE && defined SEEK_DATA
94282e71
KW
1032
1033 BDRVRawState *s = bs->opaque;
1034
5500316d
PB
1035 hole = lseek(s->fd, start, SEEK_HOLE);
1036 if (hole == -1) {
1037 /* -ENXIO indicates that sector_num was past the end of the file.
1038 * There is a virtual hole there. */
1039 assert(errno != -ENXIO);
1040
1041 /* Most likely EINVAL. Assume everything is allocated. */
1042 *pnum = nb_sectors;
1043 return 1;
1044 }
1045
1046 if (hole > start) {
1047 data = start;
1048 } else {
1049 /* On a hole. We need another syscall to find its end. */
1050 data = lseek(s->fd, start, SEEK_DATA);
1051 if (data == -1) {
1052 data = lseek(s->fd, 0, SEEK_END);
1053 }
1054 }
1055#else
1056 *pnum = nb_sectors;
1057 return 1;
1058#endif
1059
1060 if (data <= start) {
1061 /* On a data extent, compute sectors to the end of the extent. */
1062 *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
1063 return 1;
1064 } else {
1065 /* On a hole, compute sectors to the beginning of the next extent. */
1066 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
1067 return 0;
1068 }
1069}
1070
dce512de
CH
1071#ifdef CONFIG_XFS
1072static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
1073{
1074 struct xfs_flock64 fl;
1075
1076 memset(&fl, 0, sizeof(fl));
1077 fl.l_whence = SEEK_SET;
1078 fl.l_start = sector_num << 9;
1079 fl.l_len = (int64_t)nb_sectors << 9;
1080
1081 if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
1082 DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
1083 return -errno;
1084 }
1085
1086 return 0;
1087}
1088#endif
1089
6db39ae2
PB
1090static coroutine_fn int raw_co_discard(BlockDriverState *bs,
1091 int64_t sector_num, int nb_sectors)
dce512de
CH
1092{
1093#ifdef CONFIG_XFS
1094 BDRVRawState *s = bs->opaque;
1095
1096 if (s->is_xfs) {
1097 return xfs_discard(s, sector_num, nb_sectors);
1098 }
1099#endif
1100
1101 return 0;
1102}
0e7e1989
KW
1103
1104static QEMUOptionParameter raw_create_options[] = {
db08adf5
KW
1105 {
1106 .name = BLOCK_OPT_SIZE,
1107 .type = OPT_SIZE,
1108 .help = "Virtual disk size"
1109 },
0e7e1989
KW
1110 { NULL }
1111};
1112
84a12e66
CH
1113static BlockDriver bdrv_file = {
1114 .format_name = "file",
1115 .protocol_name = "file",
856ae5c3
BS
1116 .instance_size = sizeof(BDRVRawState),
1117 .bdrv_probe = NULL, /* no probe for protocols */
66f82cee 1118 .bdrv_file_open = raw_open,
eeb6b45d
JC
1119 .bdrv_reopen_prepare = raw_reopen_prepare,
1120 .bdrv_reopen_commit = raw_reopen_commit,
1121 .bdrv_reopen_abort = raw_reopen_abort,
856ae5c3
BS
1122 .bdrv_close = raw_close,
1123 .bdrv_create = raw_create,
6db39ae2 1124 .bdrv_co_discard = raw_co_discard,
5500316d 1125 .bdrv_co_is_allocated = raw_co_is_allocated,
3b46e624 1126
f141eafe
AL
1127 .bdrv_aio_readv = raw_aio_readv,
1128 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1129 .bdrv_aio_flush = raw_aio_flush,
3c529d93 1130
83f64091
FB
1131 .bdrv_truncate = raw_truncate,
1132 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1133 .bdrv_get_allocated_file_size
1134 = raw_get_allocated_file_size,
0e7e1989
KW
1135
1136 .create_options = raw_create_options,
83f64091
FB
1137};
1138
19cb3738
FB
1139/***********************************************/
1140/* host device */
1141
83affaa6 1142#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1143static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1144static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1145
1146kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1147{
5fafdf24 1148 kern_return_t kernResult;
19cb3738
FB
1149 mach_port_t masterPort;
1150 CFMutableDictionaryRef classesToMatch;
1151
1152 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1153 if ( KERN_SUCCESS != kernResult ) {
1154 printf( "IOMasterPort returned %d\n", kernResult );
1155 }
3b46e624 1156
5fafdf24 1157 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
1158 if ( classesToMatch == NULL ) {
1159 printf( "IOServiceMatching returned a NULL dictionary.\n" );
1160 } else {
1161 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1162 }
1163 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1164 if ( KERN_SUCCESS != kernResult )
1165 {
1166 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1167 }
3b46e624 1168
19cb3738
FB
1169 return kernResult;
1170}
1171
1172kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1173{
1174 io_object_t nextMedia;
1175 kern_return_t kernResult = KERN_FAILURE;
1176 *bsdPath = '\0';
1177 nextMedia = IOIteratorNext( mediaIterator );
1178 if ( nextMedia )
1179 {
1180 CFTypeRef bsdPathAsCFString;
1181 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1182 if ( bsdPathAsCFString ) {
1183 size_t devPathLength;
1184 strcpy( bsdPath, _PATH_DEV );
1185 strcat( bsdPath, "r" );
1186 devPathLength = strlen( bsdPath );
1187 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1188 kernResult = KERN_SUCCESS;
1189 }
1190 CFRelease( bsdPathAsCFString );
1191 }
1192 IOObjectRelease( nextMedia );
1193 }
3b46e624 1194
19cb3738
FB
1195 return kernResult;
1196}
1197
1198#endif
1199
508c7cb3
CH
1200static int hdev_probe_device(const char *filename)
1201{
1202 struct stat st;
1203
1204 /* allow a dedicated CD-ROM driver to match with a higher priority */
1205 if (strstart(filename, "/dev/cdrom", NULL))
1206 return 50;
1207
1208 if (stat(filename, &st) >= 0 &&
1209 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1210 return 100;
1211 }
1212
1213 return 0;
1214}
1215
19cb3738
FB
1216static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
1217{
1218 BDRVRawState *s = bs->opaque;
a76bab49 1219
83affaa6 1220#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1221 if (strstart(filename, "/dev/cdrom", NULL)) {
1222 kern_return_t kernResult;
1223 io_iterator_t mediaIterator;
1224 char bsdPath[ MAXPATHLEN ];
1225 int fd;
5fafdf24 1226
19cb3738
FB
1227 kernResult = FindEjectableCDMedia( &mediaIterator );
1228 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 1229
19cb3738
FB
1230 if ( bsdPath[ 0 ] != '\0' ) {
1231 strcat(bsdPath,"s0");
1232 /* some CDs don't have a partition 0 */
6165f4d8 1233 fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
19cb3738
FB
1234 if (fd < 0) {
1235 bsdPath[strlen(bsdPath)-1] = '1';
1236 } else {
2e1e79da 1237 qemu_close(fd);
19cb3738
FB
1238 }
1239 filename = bsdPath;
1240 }
3b46e624 1241
19cb3738
FB
1242 if ( mediaIterator )
1243 IOObjectRelease( mediaIterator );
1244 }
1245#endif
19cb3738
FB
1246
1247 s->type = FTYPE_FILE;
4dd75c70 1248#if defined(__linux__)
05acda4d
BK
1249 {
1250 char resolved_path[ MAXPATHLEN ], *temp;
1251
1252 temp = realpath(filename, resolved_path);
1253 if (temp && strstart(temp, "/dev/sg", NULL)) {
1254 bs->sg = 1;
1255 }
19cb3738
FB
1256 }
1257#endif
90babde0 1258
19a3da7f 1259 return raw_open_common(bs, filename, flags, 0);
19cb3738
FB
1260}
1261
03ff3ca3 1262#if defined(__linux__)
19cb3738
FB
1263/* Note: we do not have a reliable method to detect if the floppy is
1264 present. The current method is to try to open the floppy at every
1265 I/O and to keep it opened during a few hundreds of ms. */
1266static int fd_open(BlockDriverState *bs)
1267{
1268 BDRVRawState *s = bs->opaque;
1269 int last_media_present;
1270
1271 if (s->type != FTYPE_FD)
1272 return 0;
1273 last_media_present = (s->fd >= 0);
5fafdf24 1274 if (s->fd >= 0 &&
c57c846a 1275 (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
2e1e79da 1276 qemu_close(s->fd);
19cb3738
FB
1277 s->fd = -1;
1278#ifdef DEBUG_FLOPPY
1279 printf("Floppy closed\n");
1280#endif
1281 }
1282 if (s->fd < 0) {
5fafdf24 1283 if (s->fd_got_error &&
c57c846a 1284 (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
19cb3738
FB
1285#ifdef DEBUG_FLOPPY
1286 printf("No floppy (open delayed)\n");
1287#endif
1288 return -EIO;
1289 }
6165f4d8 1290 s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
19cb3738 1291 if (s->fd < 0) {
c57c846a 1292 s->fd_error_time = get_clock();
19cb3738
FB
1293 s->fd_got_error = 1;
1294 if (last_media_present)
1295 s->fd_media_changed = 1;
1296#ifdef DEBUG_FLOPPY
1297 printf("No floppy\n");
1298#endif
1299 return -EIO;
1300 }
1301#ifdef DEBUG_FLOPPY
1302 printf("Floppy opened\n");
1303#endif
1304 }
1305 if (!last_media_present)
1306 s->fd_media_changed = 1;
c57c846a 1307 s->fd_open_time = get_clock();
19cb3738
FB
1308 s->fd_got_error = 0;
1309 return 0;
1310}
19cb3738 1311
63ec93db 1312static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
985a03b0
TS
1313{
1314 BDRVRawState *s = bs->opaque;
1315
1316 return ioctl(s->fd, req, buf);
1317}
221f715d 1318
63ec93db 1319static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
221f715d
AL
1320 unsigned long int req, void *buf,
1321 BlockDriverCompletionFunc *cb, void *opaque)
1322{
f141eafe 1323 BDRVRawState *s = bs->opaque;
c208e8c2 1324 RawPosixAIOData *acb;
221f715d 1325
f141eafe
AL
1326 if (fd_open(bs) < 0)
1327 return NULL;
c208e8c2
PB
1328
1329 acb = g_slice_new(RawPosixAIOData);
1330 acb->bs = bs;
1331 acb->aio_type = QEMU_AIO_IOCTL;
1332 acb->aio_fildes = s->fd;
1333 acb->aio_offset = 0;
1334 acb->aio_ioctl_buf = buf;
1335 acb->aio_ioctl_cmd = req;
1336 return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
221f715d
AL
1337}
1338
a167ba50 1339#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1340static int fd_open(BlockDriverState *bs)
1341{
1342 BDRVRawState *s = bs->opaque;
1343
1344 /* this is just to ensure s->fd is sane (its called by io ops) */
1345 if (s->fd >= 0)
1346 return 0;
1347 return -EIO;
1348}
9f23011a 1349#else /* !linux && !FreeBSD */
19cb3738 1350
08af02e2
AL
1351static int fd_open(BlockDriverState *bs)
1352{
1353 return 0;
1354}
1355
221f715d 1356#endif /* !linux && !FreeBSD */
04eeb8b6 1357
0e7e1989 1358static int hdev_create(const char *filename, QEMUOptionParameter *options)
93c65b47
AL
1359{
1360 int fd;
1361 int ret = 0;
1362 struct stat stat_buf;
0e7e1989 1363 int64_t total_size = 0;
93c65b47 1364
0e7e1989
KW
1365 /* Read out options */
1366 while (options && options->name) {
1367 if (!strcmp(options->name, "size")) {
9040385d 1368 total_size = options->value.n / BDRV_SECTOR_SIZE;
0e7e1989
KW
1369 }
1370 options++;
1371 }
93c65b47 1372
6165f4d8 1373 fd = qemu_open(filename, O_WRONLY | O_BINARY);
93c65b47 1374 if (fd < 0)
57e69b7d 1375 return -errno;
93c65b47
AL
1376
1377 if (fstat(fd, &stat_buf) < 0)
57e69b7d 1378 ret = -errno;
4099df58 1379 else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
57e69b7d 1380 ret = -ENODEV;
9040385d 1381 else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
93c65b47
AL
1382 ret = -ENOSPC;
1383
2e1e79da 1384 qemu_close(fd);
93c65b47
AL
1385 return ret;
1386}
1387
336c1c12
KW
1388static int hdev_has_zero_init(BlockDriverState *bs)
1389{
1390 return 0;
1391}
1392
5efa9d5a 1393static BlockDriver bdrv_host_device = {
0b4ce02e 1394 .format_name = "host_device",
84a12e66 1395 .protocol_name = "host_device",
0b4ce02e
KW
1396 .instance_size = sizeof(BDRVRawState),
1397 .bdrv_probe_device = hdev_probe_device,
66f82cee 1398 .bdrv_file_open = hdev_open,
0b4ce02e 1399 .bdrv_close = raw_close,
1bc6b705
JC
1400 .bdrv_reopen_prepare = raw_reopen_prepare,
1401 .bdrv_reopen_commit = raw_reopen_commit,
1402 .bdrv_reopen_abort = raw_reopen_abort,
93c65b47 1403 .bdrv_create = hdev_create,
0b4ce02e 1404 .create_options = raw_create_options,
336c1c12 1405 .bdrv_has_zero_init = hdev_has_zero_init,
3b46e624 1406
f141eafe
AL
1407 .bdrv_aio_readv = raw_aio_readv,
1408 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1409 .bdrv_aio_flush = raw_aio_flush,
3c529d93 1410
55b949c8 1411 .bdrv_truncate = raw_truncate,
e60f469c 1412 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1413 .bdrv_get_allocated_file_size
1414 = raw_get_allocated_file_size,
19cb3738 1415
f3a5d3f8 1416 /* generic scsi device */
63ec93db
CH
1417#ifdef __linux__
1418 .bdrv_ioctl = hdev_ioctl,
63ec93db
CH
1419 .bdrv_aio_ioctl = hdev_aio_ioctl,
1420#endif
f3a5d3f8
CH
1421};
1422
1423#ifdef __linux__
1424static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
1425{
1426 BDRVRawState *s = bs->opaque;
1427 int ret;
1428
f3a5d3f8 1429 s->type = FTYPE_FD;
f3a5d3f8 1430
19a3da7f
BS
1431 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
1432 ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
f3a5d3f8
CH
1433 if (ret)
1434 return ret;
1435
1436 /* close fd so that we can reopen it as needed */
2e1e79da 1437 qemu_close(s->fd);
f3a5d3f8
CH
1438 s->fd = -1;
1439 s->fd_media_changed = 1;
1440
1441 return 0;
1442}
1443
508c7cb3
CH
1444static int floppy_probe_device(const char *filename)
1445{
2ebf7c4b
CR
1446 int fd, ret;
1447 int prio = 0;
1448 struct floppy_struct fdparam;
343f8568 1449 struct stat st;
2ebf7c4b 1450
e1740828
CB
1451 if (strstart(filename, "/dev/fd", NULL) &&
1452 !strstart(filename, "/dev/fdset/", NULL)) {
2ebf7c4b 1453 prio = 50;
e1740828 1454 }
2ebf7c4b 1455
6165f4d8 1456 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
2ebf7c4b
CR
1457 if (fd < 0) {
1458 goto out;
1459 }
343f8568
JS
1460 ret = fstat(fd, &st);
1461 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1462 goto outc;
1463 }
2ebf7c4b
CR
1464
1465 /* Attempt to detect via a floppy specific ioctl */
1466 ret = ioctl(fd, FDGETPRM, &fdparam);
1467 if (ret >= 0)
1468 prio = 100;
1469
343f8568 1470outc:
2e1e79da 1471 qemu_close(fd);
2ebf7c4b
CR
1472out:
1473 return prio;
508c7cb3
CH
1474}
1475
1476
f3a5d3f8
CH
1477static int floppy_is_inserted(BlockDriverState *bs)
1478{
1479 return fd_open(bs) >= 0;
1480}
1481
1482static int floppy_media_changed(BlockDriverState *bs)
1483{
1484 BDRVRawState *s = bs->opaque;
1485 int ret;
1486
1487 /*
1488 * XXX: we do not have a true media changed indication.
1489 * It does not work if the floppy is changed without trying to read it.
1490 */
1491 fd_open(bs);
1492 ret = s->fd_media_changed;
1493 s->fd_media_changed = 0;
1494#ifdef DEBUG_FLOPPY
1495 printf("Floppy changed=%d\n", ret);
1496#endif
1497 return ret;
1498}
1499
f36f3949 1500static void floppy_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1501{
1502 BDRVRawState *s = bs->opaque;
1503 int fd;
1504
1505 if (s->fd >= 0) {
2e1e79da 1506 qemu_close(s->fd);
f3a5d3f8
CH
1507 s->fd = -1;
1508 }
6165f4d8 1509 fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
f3a5d3f8
CH
1510 if (fd >= 0) {
1511 if (ioctl(fd, FDEJECT, 0) < 0)
1512 perror("FDEJECT");
2e1e79da 1513 qemu_close(fd);
f3a5d3f8 1514 }
f3a5d3f8
CH
1515}
1516
1517static BlockDriver bdrv_host_floppy = {
1518 .format_name = "host_floppy",
84a12e66 1519 .protocol_name = "host_floppy",
f3a5d3f8 1520 .instance_size = sizeof(BDRVRawState),
508c7cb3 1521 .bdrv_probe_device = floppy_probe_device,
66f82cee 1522 .bdrv_file_open = floppy_open,
f3a5d3f8 1523 .bdrv_close = raw_close,
1bc6b705
JC
1524 .bdrv_reopen_prepare = raw_reopen_prepare,
1525 .bdrv_reopen_commit = raw_reopen_commit,
1526 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1527 .bdrv_create = hdev_create,
0b4ce02e 1528 .create_options = raw_create_options,
336c1c12 1529 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1530
f3a5d3f8
CH
1531 .bdrv_aio_readv = raw_aio_readv,
1532 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1533 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1534
55b949c8 1535 .bdrv_truncate = raw_truncate,
f3a5d3f8 1536 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1537 .bdrv_get_allocated_file_size
1538 = raw_get_allocated_file_size,
f3a5d3f8
CH
1539
1540 /* removable device support */
1541 .bdrv_is_inserted = floppy_is_inserted,
1542 .bdrv_media_changed = floppy_media_changed,
1543 .bdrv_eject = floppy_eject,
f3a5d3f8
CH
1544};
1545
1546static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1547{
1548 BDRVRawState *s = bs->opaque;
1549
f3a5d3f8
CH
1550 s->type = FTYPE_CD;
1551
19a3da7f
BS
1552 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
1553 return raw_open_common(bs, filename, flags, O_NONBLOCK);
f3a5d3f8
CH
1554}
1555
508c7cb3
CH
1556static int cdrom_probe_device(const char *filename)
1557{
3baf720e
CR
1558 int fd, ret;
1559 int prio = 0;
343f8568 1560 struct stat st;
3baf720e 1561
6165f4d8 1562 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
3baf720e
CR
1563 if (fd < 0) {
1564 goto out;
1565 }
343f8568
JS
1566 ret = fstat(fd, &st);
1567 if (ret == -1 || !S_ISBLK(st.st_mode)) {
1568 goto outc;
1569 }
3baf720e
CR
1570
1571 /* Attempt to detect via a CDROM specific ioctl */
1572 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1573 if (ret >= 0)
1574 prio = 100;
1575
343f8568 1576outc:
2e1e79da 1577 qemu_close(fd);
3baf720e
CR
1578out:
1579 return prio;
508c7cb3
CH
1580}
1581
f3a5d3f8
CH
1582static int cdrom_is_inserted(BlockDriverState *bs)
1583{
1584 BDRVRawState *s = bs->opaque;
1585 int ret;
1586
1587 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1588 if (ret == CDS_DISC_OK)
1589 return 1;
1590 return 0;
1591}
1592
f36f3949 1593static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1594{
1595 BDRVRawState *s = bs->opaque;
1596
1597 if (eject_flag) {
1598 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1599 perror("CDROMEJECT");
1600 } else {
1601 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1602 perror("CDROMEJECT");
1603 }
f3a5d3f8
CH
1604}
1605
025e849a 1606static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
1607{
1608 BDRVRawState *s = bs->opaque;
1609
1610 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1611 /*
1612 * Note: an error can happen if the distribution automatically
1613 * mounts the CD-ROM
1614 */
1615 /* perror("CDROM_LOCKDOOR"); */
1616 }
f3a5d3f8
CH
1617}
1618
1619static BlockDriver bdrv_host_cdrom = {
1620 .format_name = "host_cdrom",
84a12e66 1621 .protocol_name = "host_cdrom",
f3a5d3f8 1622 .instance_size = sizeof(BDRVRawState),
508c7cb3 1623 .bdrv_probe_device = cdrom_probe_device,
66f82cee 1624 .bdrv_file_open = cdrom_open,
f3a5d3f8 1625 .bdrv_close = raw_close,
1bc6b705
JC
1626 .bdrv_reopen_prepare = raw_reopen_prepare,
1627 .bdrv_reopen_commit = raw_reopen_commit,
1628 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1629 .bdrv_create = hdev_create,
0b4ce02e 1630 .create_options = raw_create_options,
336c1c12 1631 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1632
f3a5d3f8
CH
1633 .bdrv_aio_readv = raw_aio_readv,
1634 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1635 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1636
55b949c8 1637 .bdrv_truncate = raw_truncate,
f3a5d3f8 1638 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1639 .bdrv_get_allocated_file_size
1640 = raw_get_allocated_file_size,
f3a5d3f8
CH
1641
1642 /* removable device support */
1643 .bdrv_is_inserted = cdrom_is_inserted,
1644 .bdrv_eject = cdrom_eject,
025e849a 1645 .bdrv_lock_medium = cdrom_lock_medium,
f3a5d3f8
CH
1646
1647 /* generic scsi device */
63ec93db 1648 .bdrv_ioctl = hdev_ioctl,
63ec93db 1649 .bdrv_aio_ioctl = hdev_aio_ioctl,
f3a5d3f8
CH
1650};
1651#endif /* __linux__ */
1652
a167ba50 1653#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
1654static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1655{
1656 BDRVRawState *s = bs->opaque;
1657 int ret;
1658
1659 s->type = FTYPE_CD;
1660
19a3da7f 1661 ret = raw_open_common(bs, filename, flags, 0);
f3a5d3f8
CH
1662 if (ret)
1663 return ret;
1664
9b2260cb 1665 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1666 ioctl(s->fd, CDIOCALLOW);
1667 return 0;
1668}
1669
508c7cb3
CH
1670static int cdrom_probe_device(const char *filename)
1671{
1672 if (strstart(filename, "/dev/cd", NULL) ||
1673 strstart(filename, "/dev/acd", NULL))
1674 return 100;
1675 return 0;
1676}
1677
f3a5d3f8
CH
1678static int cdrom_reopen(BlockDriverState *bs)
1679{
1680 BDRVRawState *s = bs->opaque;
1681 int fd;
1682
1683 /*
1684 * Force reread of possibly changed/newly loaded disc,
1685 * FreeBSD seems to not notice sometimes...
1686 */
1687 if (s->fd >= 0)
2e1e79da 1688 qemu_close(s->fd);
6165f4d8 1689 fd = qemu_open(bs->filename, s->open_flags, 0644);
f3a5d3f8
CH
1690 if (fd < 0) {
1691 s->fd = -1;
1692 return -EIO;
1693 }
1694 s->fd = fd;
1695
9b2260cb 1696 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
1697 ioctl(s->fd, CDIOCALLOW);
1698 return 0;
1699}
1700
1701static int cdrom_is_inserted(BlockDriverState *bs)
1702{
1703 return raw_getlength(bs) > 0;
1704}
1705
f36f3949 1706static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
1707{
1708 BDRVRawState *s = bs->opaque;
1709
1710 if (s->fd < 0)
822e1cd1 1711 return;
f3a5d3f8
CH
1712
1713 (void) ioctl(s->fd, CDIOCALLOW);
1714
1715 if (eject_flag) {
1716 if (ioctl(s->fd, CDIOCEJECT) < 0)
1717 perror("CDIOCEJECT");
1718 } else {
1719 if (ioctl(s->fd, CDIOCCLOSE) < 0)
1720 perror("CDIOCCLOSE");
1721 }
1722
822e1cd1 1723 cdrom_reopen(bs);
f3a5d3f8
CH
1724}
1725
025e849a 1726static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
1727{
1728 BDRVRawState *s = bs->opaque;
1729
1730 if (s->fd < 0)
7bf37fed 1731 return;
f3a5d3f8
CH
1732 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
1733 /*
1734 * Note: an error can happen if the distribution automatically
1735 * mounts the CD-ROM
1736 */
1737 /* perror("CDROM_LOCKDOOR"); */
1738 }
f3a5d3f8
CH
1739}
1740
1741static BlockDriver bdrv_host_cdrom = {
1742 .format_name = "host_cdrom",
84a12e66 1743 .protocol_name = "host_cdrom",
f3a5d3f8 1744 .instance_size = sizeof(BDRVRawState),
508c7cb3 1745 .bdrv_probe_device = cdrom_probe_device,
66f82cee 1746 .bdrv_file_open = cdrom_open,
f3a5d3f8 1747 .bdrv_close = raw_close,
1bc6b705
JC
1748 .bdrv_reopen_prepare = raw_reopen_prepare,
1749 .bdrv_reopen_commit = raw_reopen_commit,
1750 .bdrv_reopen_abort = raw_reopen_abort,
f3a5d3f8 1751 .bdrv_create = hdev_create,
0b4ce02e 1752 .create_options = raw_create_options,
336c1c12 1753 .bdrv_has_zero_init = hdev_has_zero_init,
f3a5d3f8 1754
f3a5d3f8
CH
1755 .bdrv_aio_readv = raw_aio_readv,
1756 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1757 .bdrv_aio_flush = raw_aio_flush,
f3a5d3f8 1758
55b949c8 1759 .bdrv_truncate = raw_truncate,
f3a5d3f8 1760 .bdrv_getlength = raw_getlength,
4a1d5e1f
FZ
1761 .bdrv_get_allocated_file_size
1762 = raw_get_allocated_file_size,
f3a5d3f8 1763
19cb3738 1764 /* removable device support */
f3a5d3f8
CH
1765 .bdrv_is_inserted = cdrom_is_inserted,
1766 .bdrv_eject = cdrom_eject,
025e849a 1767 .bdrv_lock_medium = cdrom_lock_medium,
19cb3738 1768};
f3a5d3f8 1769#endif /* __FreeBSD__ */
5efa9d5a 1770
4065742a
SH
1771#ifdef CONFIG_LINUX_AIO
1772/**
1773 * Return the file descriptor for Linux AIO
1774 *
1775 * This function is a layering violation and should be removed when it becomes
1776 * possible to call the block layer outside the global mutex. It allows the
1777 * caller to hijack the file descriptor so I/O can be performed outside the
1778 * block layer.
1779 */
1780int raw_get_aio_fd(BlockDriverState *bs)
1781{
1782 BDRVRawState *s;
1783
1784 if (!bs->drv) {
1785 return -ENOMEDIUM;
1786 }
1787
1788 if (bs->drv == bdrv_find_format("raw")) {
1789 bs = bs->file;
1790 }
1791
1792 /* raw-posix has several protocols so just check for raw_aio_readv */
1793 if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
1794 return -ENOTSUP;
1795 }
1796
1797 s = bs->opaque;
1798 if (!s->use_aio) {
1799 return -ENOTSUP;
1800 }
1801 return s->fd;
1802}
1803#endif /* CONFIG_LINUX_AIO */
1804
84a12e66 1805static void bdrv_file_init(void)
5efa9d5a 1806{
508c7cb3
CH
1807 /*
1808 * Register all the drivers. Note that order is important, the driver
1809 * registered last will get probed first.
1810 */
84a12e66 1811 bdrv_register(&bdrv_file);
5efa9d5a 1812 bdrv_register(&bdrv_host_device);
f3a5d3f8
CH
1813#ifdef __linux__
1814 bdrv_register(&bdrv_host_floppy);
1815 bdrv_register(&bdrv_host_cdrom);
1816#endif
a167ba50 1817#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
1818 bdrv_register(&bdrv_host_cdrom);
1819#endif
5efa9d5a
AL
1820}
1821
84a12e66 1822block_init(bdrv_file_init);