]> git.proxmox.com Git - mirror_qemu.git/blame - block/raw-posix.c
nvme: Fix memleak in nvme_dma_read_prp
[mirror_qemu.git] / block / raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
d49b6836 25#include "qemu/error-report.h"
1de7afc9
PB
26#include "qemu/timer.h"
27#include "qemu/log.h"
737e150e 28#include "block/block_int.h"
1de7afc9 29#include "qemu/module.h"
de81a169 30#include "trace.h"
737e150e 31#include "block/thread-pool.h"
1de7afc9 32#include "qemu/iov.h"
9f8540ec 33#include "raw-aio.h"
06247428 34#include "qapi/util.h"
d49b6836 35#include "qapi/qmp/qstring.h"
83f64091 36
83affaa6 37#if defined(__APPLE__) && (__MACH__)
83f64091
FB
38#include <paths.h>
39#include <sys/param.h>
40#include <IOKit/IOKitLib.h>
41#include <IOKit/IOBSD.h>
42#include <IOKit/storage/IOMediaBSDClient.h>
43#include <IOKit/storage/IOMedia.h>
44#include <IOKit/storage/IOCDMedia.h>
45//#include <IOKit/storage/IOCDTypes.h>
46#include <CoreFoundation/CoreFoundation.h>
47#endif
48
49#ifdef __sun__
2e9671da 50#define _POSIX_PTHREAD_SEMANTICS 1
83f64091
FB
51#include <sys/dkio.h>
52#endif
19cb3738 53#ifdef __linux__
343f8568
JS
54#include <sys/types.h>
55#include <sys/stat.h>
19cb3738 56#include <sys/ioctl.h>
05acda4d 57#include <sys/param.h>
19cb3738
FB
58#include <linux/cdrom.h>
59#include <linux/fd.h>
5500316d 60#include <linux/fs.h>
1a9335e4
ET
61#include <linux/hdreg.h>
62#ifdef __s390__
63#include <asm/dasd.h>
64#endif
4ab15590
CL
65#ifndef FS_NOCOW_FL
66#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
67#endif
5500316d 68#endif
b953f075 69#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_FALLOCATE_ZERO_RANGE)
3d4fa43e
KK
70#include <linux/falloc.h>
71#endif
a167ba50 72#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1cb6c3fd 73#include <sys/disk.h>
9f23011a 74#include <sys/cdio.h>
1cb6c3fd 75#endif
83f64091 76
128ab2ff
BS
77#ifdef __OpenBSD__
78#include <sys/ioctl.h>
79#include <sys/disklabel.h>
80#include <sys/dkio.h>
81#endif
82
d1f6fd8d
CE
83#ifdef __NetBSD__
84#include <sys/ioctl.h>
85#include <sys/disklabel.h>
86#include <sys/dkio.h>
87#include <sys/disk.h>
88#endif
89
c5e97233
BS
90#ifdef __DragonFly__
91#include <sys/ioctl.h>
92#include <sys/diskslice.h>
93#endif
94
dce512de
CH
95#ifdef CONFIG_XFS
96#include <xfs/xfs.h>
97#endif
98
19cb3738 99//#define DEBUG_FLOPPY
83f64091 100
faf07963 101//#define DEBUG_BLOCK
03ff3ca3 102#if defined(DEBUG_BLOCK)
001faf32
BS
103#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
104 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
8c05dbf9 105#else
001faf32 106#define DEBUG_BLOCK_PRINT(formatCstr, ...)
8c05dbf9
TS
107#endif
108
f6465578
AL
109/* OS X does not have O_DSYNC */
110#ifndef O_DSYNC
1c27a8b3 111#ifdef O_SYNC
7ab064d2 112#define O_DSYNC O_SYNC
1c27a8b3
JA
113#elif defined(O_FSYNC)
114#define O_DSYNC O_FSYNC
115#endif
f6465578
AL
116#endif
117
9f7965c7
AL
118/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
119#ifndef O_DIRECT
120#define O_DIRECT O_DSYNC
121#endif
122
19cb3738
FB
123#define FTYPE_FILE 0
124#define FTYPE_CD 1
125#define FTYPE_FD 2
83f64091 126
c57c846a 127/* if the FD is not accessed during that time (in ns), we try to
19cb3738 128 reopen it to see if the disk has been changed */
c57c846a 129#define FD_OPEN_TIMEOUT (1000000000)
83f64091 130
581b9e29
CH
131#define MAX_BLOCKSIZE 4096
132
19cb3738
FB
133typedef struct BDRVRawState {
134 int fd;
135 int type;
0e1d8f4c 136 int open_flags;
c25f53b0
PB
137 size_t buf_align;
138
19cb3738
FB
139#if defined(__linux__)
140 /* linux floppy specific */
19cb3738
FB
141 int64_t fd_open_time;
142 int64_t fd_error_time;
143 int fd_got_error;
144 int fd_media_changed;
83f64091 145#endif
e44bd6fc 146#ifdef CONFIG_LINUX_AIO
5c6c3a6c 147 int use_aio;
1e5b9d2f 148 void *aio_ctx;
e44bd6fc 149#endif
dce512de 150#ifdef CONFIG_XFS
260a82e5 151 bool is_xfs:1;
dce512de 152#endif
260a82e5 153 bool has_discard:1;
97a2ae34 154 bool has_write_zeroes:1;
260a82e5 155 bool discard_zeroes:1;
d50d8222 156 bool has_fallocate;
3cad8307 157 bool needs_alignment;
19cb3738
FB
158} BDRVRawState;
159
eeb6b45d
JC
160typedef struct BDRVRawReopenState {
161 int fd;
162 int open_flags;
163#ifdef CONFIG_LINUX_AIO
164 int use_aio;
165#endif
166} BDRVRawReopenState;
167
19cb3738 168static int fd_open(BlockDriverState *bs);
22afa7b5 169static int64_t raw_getlength(BlockDriverState *bs);
83f64091 170
de81a169
PB
171typedef struct RawPosixAIOData {
172 BlockDriverState *bs;
173 int aio_fildes;
174 union {
175 struct iovec *aio_iov;
176 void *aio_ioctl_buf;
177 };
178 int aio_niov;
8238010b 179 uint64_t aio_nbytes;
de81a169
PB
180#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
181 off_t aio_offset;
182 int aio_type;
183} RawPosixAIOData;
184
a167ba50 185#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8 186static int cdrom_reopen(BlockDriverState *bs);
9f23011a
BS
187#endif
188
1de1ae0a
CE
189#if defined(__NetBSD__)
190static int raw_normalize_devicepath(const char **filename)
191{
192 static char namebuf[PATH_MAX];
193 const char *dp, *fname;
194 struct stat sb;
195
196 fname = *filename;
197 dp = strrchr(fname, '/');
198 if (lstat(fname, &sb) < 0) {
199 fprintf(stderr, "%s: stat failed: %s\n",
200 fname, strerror(errno));
201 return -errno;
202 }
203
204 if (!S_ISBLK(sb.st_mode)) {
205 return 0;
206 }
207
208 if (dp == NULL) {
209 snprintf(namebuf, PATH_MAX, "r%s", fname);
210 } else {
211 snprintf(namebuf, PATH_MAX, "%.*s/r%s",
212 (int)(dp - fname), fname, dp + 1);
213 }
214 fprintf(stderr, "%s is a block device", fname);
215 *filename = namebuf;
216 fprintf(stderr, ", using %s\n", *filename);
217
218 return 0;
219}
220#else
221static int raw_normalize_devicepath(const char **filename)
222{
223 return 0;
224}
225#endif
226
8a4ed0d1
ET
227/*
228 * Get logical block size via ioctl. On success store it in @sector_size_p.
229 */
230static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
c25f53b0 231{
c25f53b0 232 unsigned int sector_size;
8a4ed0d1 233 bool success = false;
c25f53b0 234
8a4ed0d1 235 errno = ENOTSUP;
c25f53b0
PB
236
237 /* Try a few ioctls to get the right size */
c25f53b0 238#ifdef BLKSSZGET
df26a350 239 if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
8a4ed0d1
ET
240 *sector_size_p = sector_size;
241 success = true;
c25f53b0
PB
242 }
243#endif
244#ifdef DKIOCGETBLOCKSIZE
df26a350 245 if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
8a4ed0d1
ET
246 *sector_size_p = sector_size;
247 success = true;
c25f53b0
PB
248 }
249#endif
250#ifdef DIOCGSECTORSIZE
df26a350 251 if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
8a4ed0d1
ET
252 *sector_size_p = sector_size;
253 success = true;
c25f53b0
PB
254 }
255#endif
8a4ed0d1
ET
256
257 return success ? 0 : -errno;
258}
259
1a9335e4
ET
260/**
261 * Get physical block size of @fd.
262 * On success, store it in @blk_size and return 0.
263 * On failure, return -errno.
264 */
265static int probe_physical_blocksize(int fd, unsigned int *blk_size)
266{
267#ifdef BLKPBSZGET
268 if (ioctl(fd, BLKPBSZGET, blk_size) < 0) {
269 return -errno;
270 }
271 return 0;
272#else
273 return -ENOTSUP;
274#endif
275}
276
22d182e8
SH
277/* Check if read is allowed with given memory buffer and length.
278 *
279 * This function is used to check O_DIRECT memory buffer and request alignment.
280 */
281static bool raw_is_io_aligned(int fd, void *buf, size_t len)
282{
283 ssize_t ret = pread(fd, buf, len, 0);
284
285 if (ret >= 0) {
286 return true;
287 }
288
289#ifdef __linux__
290 /* The Linux kernel returns EINVAL for misaligned O_DIRECT reads. Ignore
291 * other errors (e.g. real I/O error), which could happen on a failed
292 * drive, since we only care about probing alignment.
293 */
294 if (errno != EINVAL) {
295 return true;
296 }
297#endif
298
299 return false;
300}
301
8a4ed0d1
ET
302static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
303{
304 BDRVRawState *s = bs->opaque;
305 char *buf;
459b4e66 306 size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
8a4ed0d1
ET
307
308 /* For /dev/sg devices the alignment is not really used.
309 With buffered I/O, we don't have any restrictions. */
310 if (bs->sg || !s->needs_alignment) {
311 bs->request_alignment = 1;
312 s->buf_align = 1;
313 return;
314 }
315
316 bs->request_alignment = 0;
317 s->buf_align = 0;
318 /* Let's try to use the logical blocksize for the alignment. */
319 if (probe_logical_blocksize(fd, &bs->request_alignment) < 0) {
320 bs->request_alignment = 0;
321 }
c25f53b0
PB
322#ifdef CONFIG_XFS
323 if (s->is_xfs) {
324 struct dioattr da;
df26a350 325 if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
c25f53b0
PB
326 bs->request_alignment = da.d_miniosz;
327 /* The kernel returns wrong information for d_mem */
328 /* s->buf_align = da.d_mem; */
329 }
330 }
331#endif
332
333 /* If we could not get the sizes so far, we can only guess them */
334 if (!s->buf_align) {
335 size_t align;
459b4e66
DL
336 buf = qemu_memalign(max_align, 2 * max_align);
337 for (align = 512; align <= max_align; align <<= 1) {
338 if (raw_is_io_aligned(fd, buf + align, max_align)) {
c25f53b0
PB
339 s->buf_align = align;
340 break;
341 }
342 }
343 qemu_vfree(buf);
344 }
345
346 if (!bs->request_alignment) {
347 size_t align;
459b4e66
DL
348 buf = qemu_memalign(s->buf_align, max_align);
349 for (align = 512; align <= max_align; align <<= 1) {
22d182e8 350 if (raw_is_io_aligned(fd, buf, align)) {
c25f53b0
PB
351 bs->request_alignment = align;
352 break;
353 }
354 }
355 qemu_vfree(buf);
356 }
df26a350
KW
357
358 if (!s->buf_align || !bs->request_alignment) {
359 error_setg(errp, "Could not find working O_DIRECT alignment. "
360 "Try cache.direct=off.");
361 }
c25f53b0
PB
362}
363
6a8dc042
JC
364static void raw_parse_flags(int bdrv_flags, int *open_flags)
365{
366 assert(open_flags != NULL);
367
368 *open_flags |= O_BINARY;
369 *open_flags &= ~O_ACCMODE;
370 if (bdrv_flags & BDRV_O_RDWR) {
371 *open_flags |= O_RDWR;
372 } else {
373 *open_flags |= O_RDONLY;
374 }
375
376 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
377 * and O_DIRECT for no caching. */
378 if ((bdrv_flags & BDRV_O_NOCACHE)) {
379 *open_flags |= O_DIRECT;
380 }
6a8dc042
JC
381}
382
c2f3426c
SH
383static void raw_detach_aio_context(BlockDriverState *bs)
384{
385#ifdef CONFIG_LINUX_AIO
386 BDRVRawState *s = bs->opaque;
387
388 if (s->use_aio) {
389 laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
390 }
391#endif
392}
393
394static void raw_attach_aio_context(BlockDriverState *bs,
395 AioContext *new_context)
396{
397#ifdef CONFIG_LINUX_AIO
398 BDRVRawState *s = bs->opaque;
399
400 if (s->use_aio) {
401 laio_attach_aio_context(s->aio_ctx, new_context);
402 }
403#endif
404}
405
fc32a72d
JC
406#ifdef CONFIG_LINUX_AIO
407static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
408{
409 int ret = -1;
410 assert(aio_ctx != NULL);
411 assert(use_aio != NULL);
412 /*
413 * Currently Linux do AIO only for files opened with O_DIRECT
414 * specified so check NOCACHE flag too
415 */
416 if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
417 (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
418
419 /* if non-NULL, laio_init() has already been run */
420 if (*aio_ctx == NULL) {
421 *aio_ctx = laio_init();
422 if (!*aio_ctx) {
423 goto error;
424 }
425 }
426 *use_aio = 1;
427 } else {
428 *use_aio = 0;
429 }
430
431 ret = 0;
432
433error:
434 return ret;
435}
436#endif
437
078896a9
HR
438static void raw_parse_filename(const char *filename, QDict *options,
439 Error **errp)
440{
441 /* The filename does not have to be prefixed by the protocol name, since
442 * "file" is the default protocol; therefore, the return value of this
443 * function call can be ignored. */
444 strstart(filename, "file:", &filename);
445
446 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
447}
448
c66a6157
KW
449static QemuOptsList raw_runtime_opts = {
450 .name = "raw",
451 .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
452 .desc = {
453 {
454 .name = "filename",
455 .type = QEMU_OPT_STRING,
456 .help = "File name of the image",
457 },
458 { /* end of list */ }
459 },
460};
461
462static int raw_open_common(BlockDriverState *bs, QDict *options,
e428e439 463 int bdrv_flags, int open_flags, Error **errp)
83f64091
FB
464{
465 BDRVRawState *s = bs->opaque;
c66a6157
KW
466 QemuOpts *opts;
467 Error *local_err = NULL;
8bfea15d 468 const char *filename = NULL;
0e1d8f4c 469 int fd, ret;
260a82e5 470 struct stat st;
83f64091 471
87ea75d5 472 opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
c66a6157 473 qemu_opts_absorb_qdict(opts, options, &local_err);
84d18f06 474 if (local_err) {
e428e439 475 error_propagate(errp, local_err);
c66a6157
KW
476 ret = -EINVAL;
477 goto fail;
478 }
479
480 filename = qemu_opt_get(opts, "filename");
481
1de1ae0a
CE
482 ret = raw_normalize_devicepath(&filename);
483 if (ret != 0) {
e428e439 484 error_setg_errno(errp, -ret, "Could not normalize device path");
c66a6157 485 goto fail;
1de1ae0a
CE
486 }
487
6a8dc042
JC
488 s->open_flags = open_flags;
489 raw_parse_flags(bdrv_flags, &s->open_flags);
83f64091 490
90babde0 491 s->fd = -1;
40ff6d7e 492 fd = qemu_open(filename, s->open_flags, 0644);
19cb3738
FB
493 if (fd < 0) {
494 ret = -errno;
c66a6157 495 if (ret == -EROFS) {
19cb3738 496 ret = -EACCES;
c66a6157
KW
497 }
498 goto fail;
19cb3738 499 }
83f64091 500 s->fd = fd;
9ef91a67 501
5c6c3a6c 502#ifdef CONFIG_LINUX_AIO
fc32a72d 503 if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
47e6b251 504 qemu_close(fd);
c66a6157 505 ret = -errno;
e428e439 506 error_setg_errno(errp, -ret, "Could not set AIO state");
c66a6157 507 goto fail;
9ef91a67 508 }
96518254
KW
509 if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
510 error_printf("WARNING: aio=native was specified for '%s', but "
511 "it requires cache.direct=on, which was not "
512 "specified. Falling back to aio=threads.\n"
513 " This will become an error condition in "
514 "future QEMU versions.\n",
515 bs->filename);
516 }
fc32a72d 517#endif
9ef91a67 518
7ce21016 519 s->has_discard = true;
97a2ae34 520 s->has_write_zeroes = true;
3cad8307
RPM
521 if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
522 s->needs_alignment = true;
523 }
260a82e5
PB
524
525 if (fstat(s->fd, &st) < 0) {
01212d4e 526 ret = -errno;
260a82e5
PB
527 error_setg_errno(errp, errno, "Could not stat file");
528 goto fail;
529 }
530 if (S_ISREG(st.st_mode)) {
531 s->discard_zeroes = true;
d50d8222 532 s->has_fallocate = true;
260a82e5 533 }
d0b4503e
PB
534 if (S_ISBLK(st.st_mode)) {
535#ifdef BLKDISCARDZEROES
536 unsigned int arg;
537 if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
538 s->discard_zeroes = true;
539 }
540#endif
541#ifdef __linux__
542 /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do
543 * not rely on the contents of discarded blocks unless using O_DIRECT.
97a2ae34 544 * Same for BLKZEROOUT.
d0b4503e
PB
545 */
546 if (!(bs->open_flags & BDRV_O_NOCACHE)) {
547 s->discard_zeroes = false;
97a2ae34 548 s->has_write_zeroes = false;
d0b4503e
PB
549 }
550#endif
551 }
3cad8307
RPM
552#ifdef __FreeBSD__
553 if (S_ISCHR(st.st_mode)) {
554 /*
555 * The file is a char device (disk), which on FreeBSD isn't behind
556 * a pager, so force all requests to be aligned. This is needed
557 * so QEMU makes sure all IO operations on the device are aligned
558 * to sector size, or else FreeBSD will reject them with EINVAL.
559 */
560 s->needs_alignment = true;
561 }
562#endif
260a82e5 563
dce512de
CH
564#ifdef CONFIG_XFS
565 if (platform_test_xfs_fd(s->fd)) {
7ce21016 566 s->is_xfs = true;
dce512de
CH
567 }
568#endif
569
c2f3426c
SH
570 raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
571
c66a6157
KW
572 ret = 0;
573fail:
8bfea15d
KW
574 if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
575 unlink(filename);
576 }
c66a6157
KW
577 qemu_opts_del(opts);
578 return ret;
83f64091
FB
579}
580
015a1036
HR
581static int raw_open(BlockDriverState *bs, QDict *options, int flags,
582 Error **errp)
90babde0
CH
583{
584 BDRVRawState *s = bs->opaque;
e428e439
HR
585 Error *local_err = NULL;
586 int ret;
90babde0
CH
587
588 s->type = FTYPE_FILE;
e428e439 589 ret = raw_open_common(bs, options, flags, 0, &local_err);
84d18f06 590 if (local_err) {
e428e439
HR
591 error_propagate(errp, local_err);
592 }
593 return ret;
90babde0
CH
594}
595
eeb6b45d
JC
596static int raw_reopen_prepare(BDRVReopenState *state,
597 BlockReopenQueue *queue, Error **errp)
598{
599 BDRVRawState *s;
600 BDRVRawReopenState *raw_s;
601 int ret = 0;
df26a350 602 Error *local_err = NULL;
eeb6b45d
JC
603
604 assert(state != NULL);
605 assert(state->bs != NULL);
606
607 s = state->bs->opaque;
608
5839e53b 609 state->opaque = g_new0(BDRVRawReopenState, 1);
eeb6b45d
JC
610 raw_s = state->opaque;
611
612#ifdef CONFIG_LINUX_AIO
613 raw_s->use_aio = s->use_aio;
614
615 /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
616 * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
617 * won't override aio_ctx if aio_ctx is non-NULL */
618 if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
e428e439 619 error_setg(errp, "Could not set AIO state");
eeb6b45d
JC
620 return -1;
621 }
622#endif
623
1bc6b705
JC
624 if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
625 raw_s->open_flags |= O_NONBLOCK;
626 }
627
eeb6b45d
JC
628 raw_parse_flags(state->flags, &raw_s->open_flags);
629
630 raw_s->fd = -1;
631
fdf263f6 632 int fcntl_flags = O_APPEND | O_NONBLOCK;
eeb6b45d
JC
633#ifdef O_NOATIME
634 fcntl_flags |= O_NOATIME;
635#endif
636
fdf263f6
AF
637#ifdef O_ASYNC
638 /* Not all operating systems have O_ASYNC, and those that don't
639 * will not let us track the state into raw_s->open_flags (typically
640 * you achieve the same effect with an ioctl, for example I_SETSIG
641 * on Solaris). But we do not use O_ASYNC, so that's fine.
642 */
643 assert((s->open_flags & O_ASYNC) == 0);
644#endif
645
eeb6b45d
JC
646 if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
647 /* dup the original fd */
648 /* TODO: use qemu fcntl wrapper */
649#ifdef F_DUPFD_CLOEXEC
650 raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
651#else
652 raw_s->fd = dup(s->fd);
653 if (raw_s->fd != -1) {
654 qemu_set_cloexec(raw_s->fd);
655 }
656#endif
657 if (raw_s->fd >= 0) {
658 ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
659 if (ret) {
660 qemu_close(raw_s->fd);
661 raw_s->fd = -1;
662 }
663 }
664 }
665
666 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
667 if (raw_s->fd == -1) {
668 assert(!(raw_s->open_flags & O_CREAT));
669 raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
670 if (raw_s->fd == -1) {
e428e439 671 error_setg_errno(errp, errno, "Could not reopen file");
eeb6b45d
JC
672 ret = -1;
673 }
674 }
df26a350
KW
675
676 /* Fail already reopen_prepare() if we can't get a working O_DIRECT
677 * alignment with the new fd. */
678 if (raw_s->fd != -1) {
679 raw_probe_alignment(state->bs, raw_s->fd, &local_err);
680 if (local_err) {
681 qemu_close(raw_s->fd);
682 raw_s->fd = -1;
683 error_propagate(errp, local_err);
684 ret = -EINVAL;
685 }
686 }
687
eeb6b45d
JC
688 return ret;
689}
690
eeb6b45d
JC
691static void raw_reopen_commit(BDRVReopenState *state)
692{
693 BDRVRawReopenState *raw_s = state->opaque;
694 BDRVRawState *s = state->bs->opaque;
695
696 s->open_flags = raw_s->open_flags;
697
698 qemu_close(s->fd);
699 s->fd = raw_s->fd;
700#ifdef CONFIG_LINUX_AIO
701 s->use_aio = raw_s->use_aio;
702#endif
703
704 g_free(state->opaque);
705 state->opaque = NULL;
706}
707
708
709static void raw_reopen_abort(BDRVReopenState *state)
710{
711 BDRVRawReopenState *raw_s = state->opaque;
712
713 /* nothing to do if NULL, we didn't get far enough */
714 if (raw_s == NULL) {
715 return;
716 }
717
718 if (raw_s->fd >= 0) {
719 qemu_close(raw_s->fd);
720 raw_s->fd = -1;
721 }
722 g_free(state->opaque);
723 state->opaque = NULL;
724}
725
3baca891 726static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
c25f53b0
PB
727{
728 BDRVRawState *s = bs->opaque;
eeb6b45d 729
df26a350 730 raw_probe_alignment(bs, s->fd, errp);
4196d2f0 731 bs->bl.min_mem_alignment = s->buf_align;
459b4e66 732 bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize());
c25f53b0 733}
83f64091 734
1a9335e4
ET
735static int check_for_dasd(int fd)
736{
737#ifdef BIODASDINFO2
738 struct dasd_information2_t info = {0};
739
740 return ioctl(fd, BIODASDINFO2, &info);
741#else
742 return -1;
743#endif
744}
745
746/**
747 * Try to get @bs's logical and physical block size.
748 * On success, store them in @bsz and return zero.
749 * On failure, return negative errno.
750 */
751static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
752{
753 BDRVRawState *s = bs->opaque;
754 int ret;
755
756 /* If DASD, get blocksizes */
757 if (check_for_dasd(s->fd) < 0) {
758 return -ENOTSUP;
759 }
760 ret = probe_logical_blocksize(s->fd, &bsz->log);
761 if (ret < 0) {
762 return ret;
763 }
764 return probe_physical_blocksize(s->fd, &bsz->phys);
765}
766
767/**
768 * Try to get @bs's geometry: cyls, heads, sectors.
769 * On success, store them in @geo and return 0.
770 * On failure return -errno.
771 * (Allows block driver to assign default geometry values that guest sees)
772 */
773#ifdef __linux__
774static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
775{
776 BDRVRawState *s = bs->opaque;
777 struct hd_geometry ioctl_geo = {0};
778 uint32_t blksize;
779
780 /* If DASD, get its geometry */
781 if (check_for_dasd(s->fd) < 0) {
782 return -ENOTSUP;
783 }
784 if (ioctl(s->fd, HDIO_GETGEO, &ioctl_geo) < 0) {
785 return -errno;
786 }
787 /* HDIO_GETGEO may return success even though geo contains zeros
788 (e.g. certain multipath setups) */
789 if (!ioctl_geo.heads || !ioctl_geo.sectors || !ioctl_geo.cylinders) {
790 return -ENOTSUP;
791 }
792 /* Do not return a geometry for partition */
793 if (ioctl_geo.start != 0) {
794 return -ENOTSUP;
795 }
796 geo->heads = ioctl_geo.heads;
797 geo->sectors = ioctl_geo.sectors;
798 if (!probe_physical_blocksize(s->fd, &blksize)) {
799 /* overwrite cyls: HDIO_GETGEO result is incorrect for big drives */
800 geo->cylinders = bdrv_nb_sectors(bs) / (blksize / BDRV_SECTOR_SIZE)
801 / (geo->heads * geo->sectors);
802 return 0;
803 }
804 geo->cylinders = ioctl_geo.cylinders;
805
806 return 0;
807}
808#else /* __linux__ */
809static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
810{
811 return -ENOTSUP;
812}
813#endif
814
de81a169
PB
815static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
816{
817 int ret;
818
819 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
820 if (ret == -1) {
821 return -errno;
822 }
823
b608c8dc 824 return 0;
de81a169
PB
825}
826
827static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
828{
829 int ret;
830
831 ret = qemu_fdatasync(aiocb->aio_fildes);
832 if (ret == -1) {
833 return -errno;
834 }
835 return 0;
836}
837
838#ifdef CONFIG_PREADV
839
840static bool preadv_present = true;
841
842static ssize_t
843qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
844{
845 return preadv(fd, iov, nr_iov, offset);
846}
847
848static ssize_t
849qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
850{
851 return pwritev(fd, iov, nr_iov, offset);
852}
853
854#else
855
856static bool preadv_present = false;
857
858static ssize_t
859qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
860{
861 return -ENOSYS;
862}
863
864static ssize_t
865qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
866{
867 return -ENOSYS;
868}
869
870#endif
871
872static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
873{
874 ssize_t len;
875
876 do {
877 if (aiocb->aio_type & QEMU_AIO_WRITE)
878 len = qemu_pwritev(aiocb->aio_fildes,
879 aiocb->aio_iov,
880 aiocb->aio_niov,
881 aiocb->aio_offset);
882 else
883 len = qemu_preadv(aiocb->aio_fildes,
884 aiocb->aio_iov,
885 aiocb->aio_niov,
886 aiocb->aio_offset);
887 } while (len == -1 && errno == EINTR);
888
889 if (len == -1) {
890 return -errno;
891 }
892 return len;
893}
894
895/*
896 * Read/writes the data to/from a given linear buffer.
897 *
898 * Returns the number of bytes handles or -errno in case of an error. Short
899 * reads are only returned if the end of the file is reached.
900 */
901static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
902{
903 ssize_t offset = 0;
904 ssize_t len;
905
906 while (offset < aiocb->aio_nbytes) {
907 if (aiocb->aio_type & QEMU_AIO_WRITE) {
908 len = pwrite(aiocb->aio_fildes,
909 (const char *)buf + offset,
910 aiocb->aio_nbytes - offset,
911 aiocb->aio_offset + offset);
912 } else {
913 len = pread(aiocb->aio_fildes,
914 buf + offset,
915 aiocb->aio_nbytes - offset,
916 aiocb->aio_offset + offset);
917 }
918 if (len == -1 && errno == EINTR) {
919 continue;
61ed73cf
SH
920 } else if (len == -1 && errno == EINVAL &&
921 (aiocb->bs->open_flags & BDRV_O_NOCACHE) &&
922 !(aiocb->aio_type & QEMU_AIO_WRITE) &&
923 offset > 0) {
924 /* O_DIRECT pread() may fail with EINVAL when offset is unaligned
925 * after a short read. Assume that O_DIRECT short reads only occur
926 * at EOF. Therefore this is a short read, not an I/O error.
927 */
928 break;
de81a169
PB
929 } else if (len == -1) {
930 offset = -errno;
931 break;
932 } else if (len == 0) {
933 break;
934 }
935 offset += len;
936 }
937
938 return offset;
939}
940
941static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
942{
943 ssize_t nbytes;
944 char *buf;
945
946 if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
947 /*
948 * If there is just a single buffer, and it is properly aligned
949 * we can just use plain pread/pwrite without any problems.
950 */
951 if (aiocb->aio_niov == 1) {
952 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
953 }
954 /*
955 * We have more than one iovec, and all are properly aligned.
956 *
957 * Try preadv/pwritev first and fall back to linearizing the
958 * buffer if it's not supported.
959 */
960 if (preadv_present) {
961 nbytes = handle_aiocb_rw_vector(aiocb);
962 if (nbytes == aiocb->aio_nbytes ||
963 (nbytes < 0 && nbytes != -ENOSYS)) {
964 return nbytes;
965 }
966 preadv_present = false;
967 }
968
969 /*
970 * XXX(hch): short read/write. no easy way to handle the reminder
971 * using these interfaces. For now retry using plain
972 * pread/pwrite?
973 */
974 }
975
976 /*
977 * Ok, we have to do it the hard way, copy all segments into
978 * a single aligned buffer.
979 */
50d4a858
KW
980 buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
981 if (buf == NULL) {
982 return -ENOMEM;
983 }
984
de81a169
PB
985 if (aiocb->aio_type & QEMU_AIO_WRITE) {
986 char *p = buf;
987 int i;
988
989 for (i = 0; i < aiocb->aio_niov; ++i) {
990 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
991 p += aiocb->aio_iov[i].iov_len;
992 }
8eb029c2 993 assert(p - buf == aiocb->aio_nbytes);
de81a169
PB
994 }
995
996 nbytes = handle_aiocb_rw_linear(aiocb, buf);
997 if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
998 char *p = buf;
999 size_t count = aiocb->aio_nbytes, copy;
1000 int i;
1001
1002 for (i = 0; i < aiocb->aio_niov && count; ++i) {
1003 copy = count;
1004 if (copy > aiocb->aio_iov[i].iov_len) {
1005 copy = aiocb->aio_iov[i].iov_len;
1006 }
1007 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
8eb029c2 1008 assert(count >= copy);
de81a169
PB
1009 p += copy;
1010 count -= copy;
1011 }
8eb029c2 1012 assert(count == 0);
de81a169
PB
1013 }
1014 qemu_vfree(buf);
1015
1016 return nbytes;
1017}
1018
8238010b 1019#ifdef CONFIG_XFS
97a2ae34
PB
1020static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
1021{
1022 struct xfs_flock64 fl;
1023
1024 memset(&fl, 0, sizeof(fl));
1025 fl.l_whence = SEEK_SET;
1026 fl.l_start = offset;
1027 fl.l_len = bytes;
1028
1029 if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
1030 DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
1031 return -errno;
1032 }
1033
1034 return 0;
1035}
1036
8238010b
PB
1037static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
1038{
1039 struct xfs_flock64 fl;
1040
1041 memset(&fl, 0, sizeof(fl));
1042 fl.l_whence = SEEK_SET;
1043 fl.l_start = offset;
1044 fl.l_len = bytes;
1045
1046 if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
1047 DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
1048 return -errno;
1049 }
1050
1051 return 0;
1052}
1053#endif
1054
1486df0e
DL
1055static int translate_err(int err)
1056{
1057 if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
1058 err == -ENOTTY) {
1059 err = -ENOTSUP;
1060 }
1061 return err;
1062}
1063
d50d8222 1064#ifdef CONFIG_FALLOCATE
0b991712
DL
1065static int do_fallocate(int fd, int mode, off_t offset, off_t len)
1066{
1067 do {
1068 if (fallocate(fd, mode, offset, len) == 0) {
1069 return 0;
1070 }
1071 } while (errno == EINTR);
1072 return translate_err(-errno);
1073}
1074#endif
1075
37cc9f7f 1076static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
97a2ae34 1077{
37cc9f7f 1078 int ret = -ENOTSUP;
97a2ae34
PB
1079 BDRVRawState *s = aiocb->bs->opaque;
1080
37cc9f7f 1081 if (!s->has_write_zeroes) {
97a2ae34
PB
1082 return -ENOTSUP;
1083 }
1084
97a2ae34 1085#ifdef BLKZEROOUT
37cc9f7f
DL
1086 do {
1087 uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
1088 if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
1089 return 0;
97a2ae34 1090 }
37cc9f7f
DL
1091 } while (errno == EINTR);
1092
1093 ret = translate_err(-errno);
97a2ae34 1094#endif
97a2ae34 1095
1486df0e 1096 if (ret == -ENOTSUP) {
97a2ae34 1097 s->has_write_zeroes = false;
97a2ae34
PB
1098 }
1099 return ret;
1100}
1101
37cc9f7f
DL
1102static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
1103{
a6dcf097 1104#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS)
37cc9f7f 1105 BDRVRawState *s = aiocb->bs->opaque;
a6dcf097 1106#endif
37cc9f7f
DL
1107
1108 if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
1109 return handle_aiocb_write_zeroes_block(aiocb);
1110 }
1111
1112#ifdef CONFIG_XFS
1113 if (s->is_xfs) {
1114 return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
1115 }
1116#endif
1117
b953f075
DL
1118#ifdef CONFIG_FALLOCATE_ZERO_RANGE
1119 if (s->has_write_zeroes) {
1120 int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
1121 aiocb->aio_offset, aiocb->aio_nbytes);
1122 if (ret == 0 || ret != -ENOTSUP) {
1123 return ret;
1124 }
1125 s->has_write_zeroes = false;
1126 }
1127#endif
1128
1cdc3239
DL
1129#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
1130 if (s->has_discard && s->has_fallocate) {
1131 int ret = do_fallocate(s->fd,
1132 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1133 aiocb->aio_offset, aiocb->aio_nbytes);
1134 if (ret == 0) {
1135 ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
1136 if (ret == 0 || ret != -ENOTSUP) {
1137 return ret;
1138 }
1139 s->has_fallocate = false;
1140 } else if (ret != -ENOTSUP) {
1141 return ret;
1142 } else {
1143 s->has_discard = false;
1144 }
1145 }
1146#endif
1147
d50d8222
DL
1148#ifdef CONFIG_FALLOCATE
1149 if (s->has_fallocate && aiocb->aio_offset >= bdrv_getlength(aiocb->bs)) {
1150 int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
1151 if (ret == 0 || ret != -ENOTSUP) {
1152 return ret;
1153 }
1154 s->has_fallocate = false;
1155 }
1156#endif
1157
37cc9f7f
DL
1158 return -ENOTSUP;
1159}
1160
8238010b
PB
1161static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
1162{
1163 int ret = -EOPNOTSUPP;
1164 BDRVRawState *s = aiocb->bs->opaque;
1165
7ce21016
PB
1166 if (!s->has_discard) {
1167 return -ENOTSUP;
8238010b
PB
1168 }
1169
1170 if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
1171#ifdef BLKDISCARD
1172 do {
1173 uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
1174 if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
1175 return 0;
1176 }
1177 } while (errno == EINTR);
1178
1179 ret = -errno;
1180#endif
1181 } else {
1182#ifdef CONFIG_XFS
1183 if (s->is_xfs) {
1184 return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
1185 }
1186#endif
1187
1188#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
0b991712
DL
1189 ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1190 aiocb->aio_offset, aiocb->aio_nbytes);
8238010b
PB
1191#endif
1192 }
1193
1486df0e
DL
1194 ret = translate_err(ret);
1195 if (ret == -ENOTSUP) {
7ce21016 1196 s->has_discard = false;
8238010b
PB
1197 }
1198 return ret;
1199}
1200
de81a169
PB
1201static int aio_worker(void *arg)
1202{
1203 RawPosixAIOData *aiocb = arg;
1204 ssize_t ret = 0;
1205
1206 switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
1207 case QEMU_AIO_READ:
1208 ret = handle_aiocb_rw(aiocb);
c0191e76 1209 if (ret >= 0 && ret < aiocb->aio_nbytes) {
de81a169
PB
1210 iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
1211 0, aiocb->aio_nbytes - ret);
1212
1213 ret = aiocb->aio_nbytes;
1214 }
1215 if (ret == aiocb->aio_nbytes) {
1216 ret = 0;
1217 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
1218 ret = -EINVAL;
1219 }
1220 break;
1221 case QEMU_AIO_WRITE:
1222 ret = handle_aiocb_rw(aiocb);
1223 if (ret == aiocb->aio_nbytes) {
1224 ret = 0;
1225 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
1226 ret = -EINVAL;
1227 }
1228 break;
1229 case QEMU_AIO_FLUSH:
1230 ret = handle_aiocb_flush(aiocb);
1231 break;
1232 case QEMU_AIO_IOCTL:
1233 ret = handle_aiocb_ioctl(aiocb);
1234 break;
8238010b
PB
1235 case QEMU_AIO_DISCARD:
1236 ret = handle_aiocb_discard(aiocb);
1237 break;
97a2ae34
PB
1238 case QEMU_AIO_WRITE_ZEROES:
1239 ret = handle_aiocb_write_zeroes(aiocb);
1240 break;
de81a169
PB
1241 default:
1242 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
1243 ret = -EINVAL;
1244 break;
1245 }
1246
1247 g_slice_free(RawPosixAIOData, aiocb);
1248 return ret;
1249}
1250
260a82e5
PB
1251static int paio_submit_co(BlockDriverState *bs, int fd,
1252 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1253 int type)
1254{
1255 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
1256 ThreadPool *pool;
1257
1258 acb->bs = bs;
1259 acb->aio_type = type;
1260 acb->aio_fildes = fd;
1261
8eb029c2
KW
1262 acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
1263 acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
1264
260a82e5
PB
1265 if (qiov) {
1266 acb->aio_iov = qiov->iov;
1267 acb->aio_niov = qiov->niov;
8eb029c2 1268 assert(qiov->size == acb->aio_nbytes);
260a82e5 1269 }
260a82e5
PB
1270
1271 trace_paio_submit_co(sector_num, nb_sectors, type);
1272 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
1273 return thread_pool_submit_co(pool, aio_worker, acb);
1274}
1275
7c84b1b8 1276static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
de81a169 1277 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1278 BlockCompletionFunc *cb, void *opaque, int type)
de81a169
PB
1279{
1280 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
c4d9d196 1281 ThreadPool *pool;
de81a169
PB
1282
1283 acb->bs = bs;
1284 acb->aio_type = type;
1285 acb->aio_fildes = fd;
1286
8eb029c2
KW
1287 acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
1288 acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
1289
de81a169
PB
1290 if (qiov) {
1291 acb->aio_iov = qiov->iov;
1292 acb->aio_niov = qiov->niov;
8eb029c2 1293 assert(qiov->size == acb->aio_nbytes);
de81a169 1294 }
de81a169
PB
1295
1296 trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
c4d9d196
SH
1297 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
1298 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
de81a169
PB
1299}
1300
7c84b1b8 1301static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
9ef91a67 1302 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1303 BlockCompletionFunc *cb, void *opaque, int type)
83f64091 1304{
ce1a14dc 1305 BDRVRawState *s = bs->opaque;
ce1a14dc 1306
19cb3738
FB
1307 if (fd_open(bs) < 0)
1308 return NULL;
1309
f141eafe 1310 /*
3cad8307
RPM
1311 * Check if the underlying device requires requests to be aligned,
1312 * and if the request we are trying to submit is aligned or not.
1313 * If this is the case tell the low-level driver that it needs
1314 * to copy the buffer.
f141eafe 1315 */
3cad8307 1316 if (s->needs_alignment) {
c53b1c51 1317 if (!bdrv_qiov_is_aligned(bs, qiov)) {
5c6c3a6c 1318 type |= QEMU_AIO_MISALIGNED;
e44bd6fc 1319#ifdef CONFIG_LINUX_AIO
5c6c3a6c
CH
1320 } else if (s->use_aio) {
1321 return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
e44bd6fc
SW
1322 nb_sectors, cb, opaque, type);
1323#endif
5c6c3a6c 1324 }
9ef91a67 1325 }
f141eafe 1326
1e5b9d2f 1327 return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
9ef91a67 1328 cb, opaque, type);
83f64091
FB
1329}
1330
1b3abdcc
ML
1331static void raw_aio_plug(BlockDriverState *bs)
1332{
1333#ifdef CONFIG_LINUX_AIO
1334 BDRVRawState *s = bs->opaque;
1335 if (s->use_aio) {
1336 laio_io_plug(bs, s->aio_ctx);
1337 }
1338#endif
1339}
1340
1341static void raw_aio_unplug(BlockDriverState *bs)
1342{
1343#ifdef CONFIG_LINUX_AIO
1344 BDRVRawState *s = bs->opaque;
1345 if (s->use_aio) {
1346 laio_io_unplug(bs, s->aio_ctx, true);
1347 }
1348#endif
1349}
1350
1351static void raw_aio_flush_io_queue(BlockDriverState *bs)
1352{
1353#ifdef CONFIG_LINUX_AIO
1354 BDRVRawState *s = bs->opaque;
1355 if (s->use_aio) {
1356 laio_io_unplug(bs, s->aio_ctx, false);
1357 }
1358#endif
1359}
1360
7c84b1b8 1361static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
f141eafe 1362 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1363 BlockCompletionFunc *cb, void *opaque)
83f64091 1364{
9ef91a67
CH
1365 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
1366 cb, opaque, QEMU_AIO_READ);
83f64091
FB
1367}
1368
7c84b1b8 1369static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
f141eafe 1370 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1371 BlockCompletionFunc *cb, void *opaque)
83f64091 1372{
9ef91a67
CH
1373 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
1374 cb, opaque, QEMU_AIO_WRITE);
83f64091 1375}
53538725 1376
7c84b1b8 1377static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
097310b5 1378 BlockCompletionFunc *cb, void *opaque)
b2e12bc6
CH
1379{
1380 BDRVRawState *s = bs->opaque;
1381
1382 if (fd_open(bs) < 0)
1383 return NULL;
1384
1e5b9d2f 1385 return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
b2e12bc6
CH
1386}
1387
83f64091
FB
1388static void raw_close(BlockDriverState *bs)
1389{
1390 BDRVRawState *s = bs->opaque;
c2f3426c
SH
1391
1392 raw_detach_aio_context(bs);
1393
abd269b7
SH
1394#ifdef CONFIG_LINUX_AIO
1395 if (s->use_aio) {
1396 laio_cleanup(s->aio_ctx);
1397 }
1398#endif
19cb3738 1399 if (s->fd >= 0) {
2e1e79da 1400 qemu_close(s->fd);
19cb3738
FB
1401 s->fd = -1;
1402 }
83f64091
FB
1403}
1404
1405static int raw_truncate(BlockDriverState *bs, int64_t offset)
1406{
1407 BDRVRawState *s = bs->opaque;
55b949c8
CH
1408 struct stat st;
1409
1410 if (fstat(s->fd, &st)) {
83f64091 1411 return -errno;
55b949c8
CH
1412 }
1413
1414 if (S_ISREG(st.st_mode)) {
1415 if (ftruncate(s->fd, offset) < 0) {
1416 return -errno;
1417 }
1418 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1419 if (offset > raw_getlength(bs)) {
1420 return -EINVAL;
1421 }
1422 } else {
1423 return -ENOTSUP;
1424 }
1425
83f64091
FB
1426 return 0;
1427}
1428
128ab2ff
BS
1429#ifdef __OpenBSD__
1430static int64_t raw_getlength(BlockDriverState *bs)
1431{
1432 BDRVRawState *s = bs->opaque;
1433 int fd = s->fd;
1434 struct stat st;
1435
1436 if (fstat(fd, &st))
aa729704 1437 return -errno;
128ab2ff
BS
1438 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1439 struct disklabel dl;
1440
1441 if (ioctl(fd, DIOCGDINFO, &dl))
aa729704 1442 return -errno;
128ab2ff
BS
1443 return (uint64_t)dl.d_secsize *
1444 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
1445 } else
1446 return st.st_size;
1447}
d1f6fd8d
CE
1448#elif defined(__NetBSD__)
1449static int64_t raw_getlength(BlockDriverState *bs)
1450{
1451 BDRVRawState *s = bs->opaque;
1452 int fd = s->fd;
1453 struct stat st;
1454
1455 if (fstat(fd, &st))
aa729704 1456 return -errno;
d1f6fd8d
CE
1457 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1458 struct dkwedge_info dkw;
1459
1460 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
1461 return dkw.dkw_size * 512;
1462 } else {
1463 struct disklabel dl;
1464
1465 if (ioctl(fd, DIOCGDINFO, &dl))
aa729704 1466 return -errno;
d1f6fd8d
CE
1467 return (uint64_t)dl.d_secsize *
1468 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
1469 }
1470 } else
1471 return st.st_size;
1472}
50779cc2
CH
1473#elif defined(__sun__)
1474static int64_t raw_getlength(BlockDriverState *bs)
1475{
1476 BDRVRawState *s = bs->opaque;
1477 struct dk_minfo minfo;
1478 int ret;
aa729704 1479 int64_t size;
50779cc2
CH
1480
1481 ret = fd_open(bs);
1482 if (ret < 0) {
1483 return ret;
1484 }
1485
1486 /*
1487 * Use the DKIOCGMEDIAINFO ioctl to read the size.
1488 */
1489 ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
1490 if (ret != -1) {
1491 return minfo.dki_lbsize * minfo.dki_capacity;
1492 }
1493
1494 /*
1495 * There are reports that lseek on some devices fails, but
1496 * irc discussion said that contingency on contingency was overkill.
1497 */
aa729704
MA
1498 size = lseek(s->fd, 0, SEEK_END);
1499 if (size < 0) {
1500 return -errno;
1501 }
1502 return size;
50779cc2
CH
1503}
1504#elif defined(CONFIG_BSD)
1505static int64_t raw_getlength(BlockDriverState *bs)
83f64091
FB
1506{
1507 BDRVRawState *s = bs->opaque;
1508 int fd = s->fd;
1509 int64_t size;
83f64091 1510 struct stat sb;
a167ba50 1511#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a 1512 int reopened = 0;
83f64091 1513#endif
19cb3738
FB
1514 int ret;
1515
1516 ret = fd_open(bs);
1517 if (ret < 0)
1518 return ret;
83f64091 1519
a167ba50 1520#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1521again:
1522#endif
83f64091
FB
1523 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
1524#ifdef DIOCGMEDIASIZE
1525 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
c5e97233
BS
1526#elif defined(DIOCGPART)
1527 {
1528 struct partinfo pi;
1529 if (ioctl(fd, DIOCGPART, &pi) == 0)
1530 size = pi.media_size;
1531 else
1532 size = 0;
1533 }
1534 if (size == 0)
83f64091 1535#endif
83affaa6 1536#if defined(__APPLE__) && defined(__MACH__)
728dacbd
JA
1537 {
1538 uint64_t sectors = 0;
1539 uint32_t sector_size = 0;
1540
1541 if (ioctl(fd, DKIOCGETBLOCKCOUNT, &sectors) == 0
1542 && ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) == 0) {
1543 size = sectors * sector_size;
1544 } else {
1545 size = lseek(fd, 0LL, SEEK_END);
1546 if (size < 0) {
1547 return -errno;
1548 }
1549 }
1550 }
83f64091
FB
1551#else
1552 size = lseek(fd, 0LL, SEEK_END);
aa729704
MA
1553 if (size < 0) {
1554 return -errno;
1555 }
9f23011a 1556#endif
a167ba50 1557#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1558 switch(s->type) {
1559 case FTYPE_CD:
1560 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
1561 if (size == 2048LL * (unsigned)-1)
1562 size = 0;
1563 /* XXX no disc? maybe we need to reopen... */
f3a5d3f8 1564 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
9f23011a
BS
1565 reopened = 1;
1566 goto again;
1567 }
1568 }
83f64091 1569#endif
50779cc2 1570 } else {
83f64091 1571 size = lseek(fd, 0, SEEK_END);
aa729704
MA
1572 if (size < 0) {
1573 return -errno;
1574 }
83f64091 1575 }
83f64091
FB
1576 return size;
1577}
50779cc2
CH
1578#else
1579static int64_t raw_getlength(BlockDriverState *bs)
1580{
1581 BDRVRawState *s = bs->opaque;
1582 int ret;
aa729704 1583 int64_t size;
50779cc2
CH
1584
1585 ret = fd_open(bs);
1586 if (ret < 0) {
1587 return ret;
1588 }
1589
aa729704
MA
1590 size = lseek(s->fd, 0, SEEK_END);
1591 if (size < 0) {
1592 return -errno;
1593 }
1594 return size;
50779cc2 1595}
128ab2ff 1596#endif
83f64091 1597
4a1d5e1f
FZ
1598static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
1599{
1600 struct stat st;
1601 BDRVRawState *s = bs->opaque;
1602
1603 if (fstat(s->fd, &st) < 0) {
1604 return -errno;
1605 }
1606 return (int64_t)st.st_blocks * 512;
1607}
1608
6f482f74 1609static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
83f64091
FB
1610{
1611 int fd;
1e37d059 1612 int result = 0;
0e7e1989 1613 int64_t total_size = 0;
4ab15590 1614 bool nocow = false;
06247428
HT
1615 PreallocMode prealloc;
1616 char *buf = NULL;
1617 Error *local_err = NULL;
83f64091 1618
464d9f64
HR
1619 strstart(filename, "file:", &filename);
1620
0e7e1989 1621 /* Read out options */
180e9526
HT
1622 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1623 BDRV_SECTOR_SIZE);
4ab15590 1624 nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
06247428
HT
1625 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
1626 prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
1627 PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
1628 &local_err);
1629 g_free(buf);
1630 if (local_err) {
1631 error_propagate(errp, local_err);
1632 result = -EINVAL;
1633 goto out;
1634 }
83f64091 1635
6165f4d8
CB
1636 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
1637 0644);
1e37d059
SW
1638 if (fd < 0) {
1639 result = -errno;
e428e439 1640 error_setg_errno(errp, -result, "Could not create file");
06247428
HT
1641 goto out;
1642 }
1643
1644 if (nocow) {
4ab15590 1645#ifdef __linux__
06247428
HT
1646 /* Set NOCOW flag to solve performance issue on fs like btrfs.
1647 * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
1648 * will be ignored since any failure of this operation should not
1649 * block the left work.
1650 */
1651 int attr;
1652 if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
1653 attr |= FS_NOCOW_FL;
1654 ioctl(fd, FS_IOC_SETFLAGS, &attr);
4ab15590 1655 }
06247428
HT
1656#endif
1657 }
1658
1659 if (ftruncate(fd, total_size) != 0) {
1660 result = -errno;
1661 error_setg_errno(errp, -result, "Could not resize file");
1662 goto out_close;
1663 }
4ab15590 1664
ed911435
KW
1665 switch (prealloc) {
1666#ifdef CONFIG_POSIX_FALLOCATE
1667 case PREALLOC_MODE_FALLOC:
06247428
HT
1668 /* posix_fallocate() doesn't set errno. */
1669 result = -posix_fallocate(fd, 0, total_size);
1670 if (result != 0) {
1671 error_setg_errno(errp, -result,
1672 "Could not preallocate data for the new file");
1e37d059 1673 }
ed911435
KW
1674 break;
1675#endif
1676 case PREALLOC_MODE_FULL:
1677 {
06247428 1678 int64_t num = 0, left = total_size;
ed911435 1679 buf = g_malloc0(65536);
06247428
HT
1680
1681 while (left > 0) {
1682 num = MIN(left, 65536);
1683 result = write(fd, buf, num);
1684 if (result < 0) {
1685 result = -errno;
1686 error_setg_errno(errp, -result,
1687 "Could not write to the new file");
1688 break;
1689 }
39411cf3 1690 left -= result;
1e37d059 1691 }
731de380 1692 if (result >= 0) {
098ffa66
HR
1693 result = fsync(fd);
1694 if (result < 0) {
1695 result = -errno;
1696 error_setg_errno(errp, -result,
1697 "Could not flush new file to disk");
1698 }
731de380 1699 }
06247428 1700 g_free(buf);
ed911435
KW
1701 break;
1702 }
1703 case PREALLOC_MODE_OFF:
1704 break;
1705 default:
06247428
HT
1706 result = -EINVAL;
1707 error_setg(errp, "Unsupported preallocation mode: %s",
1708 PreallocMode_lookup[prealloc]);
ed911435 1709 break;
1e37d059 1710 }
06247428
HT
1711
1712out_close:
1713 if (qemu_close(fd) != 0 && result == 0) {
1714 result = -errno;
1715 error_setg_errno(errp, -result, "Could not close the new file");
1716 }
1717out:
1e37d059 1718 return result;
83f64091
FB
1719}
1720
d1f06fe6
MA
1721/*
1722 * Find allocation range in @bs around offset @start.
1723 * May change underlying file descriptor's file offset.
1724 * If @start is not in a hole, store @start in @data, and the
1725 * beginning of the next hole in @hole, and return 0.
1726 * If @start is in a non-trailing hole, store @start in @hole and the
1727 * beginning of the next non-hole in @data, and return 0.
1728 * If @start is in a trailing hole or beyond EOF, return -ENXIO.
1729 * If we can't find out, return a negative errno other than -ENXIO.
1730 */
1731static int find_allocation(BlockDriverState *bs, off_t start,
1732 off_t *data, off_t *hole)
4f11aa8a
HR
1733{
1734#if defined SEEK_HOLE && defined SEEK_DATA
94282e71 1735 BDRVRawState *s = bs->opaque;
d1f06fe6 1736 off_t offs;
94282e71 1737
d1f06fe6
MA
1738 /*
1739 * SEEK_DATA cases:
1740 * D1. offs == start: start is in data
1741 * D2. offs > start: start is in a hole, next data at offs
1742 * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
1743 * or start is beyond EOF
1744 * If the latter happens, the file has been truncated behind
1745 * our back since we opened it. All bets are off then.
1746 * Treating like a trailing hole is simplest.
1747 * D4. offs < 0, errno != ENXIO: we learned nothing
1748 */
1749 offs = lseek(s->fd, start, SEEK_DATA);
1750 if (offs < 0) {
1751 return -errno; /* D3 or D4 */
1752 }
1753 assert(offs >= start);
1754
1755 if (offs > start) {
1756 /* D2: in hole, next data at offs */
1757 *hole = start;
1758 *data = offs;
1759 return 0;
5500316d
PB
1760 }
1761
d1f06fe6
MA
1762 /* D1: in data, end not yet known */
1763
1764 /*
1765 * SEEK_HOLE cases:
1766 * H1. offs == start: start is in a hole
1767 * If this happens here, a hole has been dug behind our back
1768 * since the previous lseek().
1769 * H2. offs > start: either start is in data, next hole at offs,
1770 * or start is in trailing hole, EOF at offs
1771 * Linux treats trailing holes like any other hole: offs ==
1772 * start. Solaris seeks to EOF instead: offs > start (blech).
1773 * If that happens here, a hole has been dug behind our back
1774 * since the previous lseek().
1775 * H3. offs < 0, errno = ENXIO: start is beyond EOF
1776 * If this happens, the file has been truncated behind our
1777 * back since we opened it. Treat it like a trailing hole.
1778 * H4. offs < 0, errno != ENXIO: we learned nothing
1779 * Pretend we know nothing at all, i.e. "forget" about D1.
1780 */
1781 offs = lseek(s->fd, start, SEEK_HOLE);
1782 if (offs < 0) {
1783 return -errno; /* D1 and (H3 or H4) */
1784 }
1785 assert(offs >= start);
1786
1787 if (offs > start) {
1788 /*
1789 * D1 and H2: either in data, next hole at offs, or it was in
1790 * data but is now in a trailing hole. In the latter case,
1791 * all bets are off. Treating it as if it there was data all
1792 * the way to EOF is safe, so simply do that.
1793 */
4f11aa8a 1794 *data = start;
d1f06fe6
MA
1795 *hole = offs;
1796 return 0;
5500316d 1797 }
4f11aa8a 1798
d1f06fe6
MA
1799 /* D1 and H1 */
1800 return -EBUSY;
5500316d 1801#else
4f11aa8a 1802 return -ENOTSUP;
5500316d 1803#endif
4f11aa8a
HR
1804}
1805
1806/*
be2ebc6d 1807 * Returns the allocation status of the specified sectors.
4f11aa8a
HR
1808 *
1809 * If 'sector_num' is beyond the end of the disk image the return value is 0
1810 * and 'pnum' is set to 0.
1811 *
1812 * 'pnum' is set to the number of sectors (including and immediately following
1813 * the specified sector) that are known to be in the same
1814 * allocated/unallocated state.
1815 *
1816 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
1817 * beyond the end of the disk image it will be clamped.
1818 */
1819static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
1820 int64_t sector_num,
1821 int nb_sectors, int *pnum)
1822{
1823 off_t start, data = 0, hole = 0;
e6d7ec32 1824 int64_t total_size;
d7f62751 1825 int ret;
4f11aa8a
HR
1826
1827 ret = fd_open(bs);
1828 if (ret < 0) {
1829 return ret;
1830 }
1831
1832 start = sector_num * BDRV_SECTOR_SIZE;
e6d7ec32
HR
1833 total_size = bdrv_getlength(bs);
1834 if (total_size < 0) {
1835 return total_size;
1836 } else if (start >= total_size) {
1837 *pnum = 0;
1838 return 0;
1839 } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
1840 nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
1841 }
4f11aa8a 1842
d1f06fe6
MA
1843 ret = find_allocation(bs, start, &data, &hole);
1844 if (ret == -ENXIO) {
1845 /* Trailing hole */
1846 *pnum = nb_sectors;
1847 ret = BDRV_BLOCK_ZERO;
1848 } else if (ret < 0) {
1849 /* No info available, so pretend there are no holes */
1850 *pnum = nb_sectors;
1851 ret = BDRV_BLOCK_DATA;
1852 } else if (data == start) {
f4a769ab
KW
1853 /* On a data extent, compute sectors to the end of the extent,
1854 * possibly including a partial sector at EOF. */
1855 *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
d1f06fe6 1856 ret = BDRV_BLOCK_DATA;
5500316d
PB
1857 } else {
1858 /* On a hole, compute sectors to the beginning of the next extent. */
d1f06fe6 1859 assert(hole == start);
5500316d 1860 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
d1f06fe6 1861 ret = BDRV_BLOCK_ZERO;
5500316d 1862 }
d1f06fe6 1863 return ret | BDRV_BLOCK_OFFSET_VALID | start;
5500316d
PB
1864}
1865
7c84b1b8 1866static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
8238010b 1867 int64_t sector_num, int nb_sectors,
097310b5 1868 BlockCompletionFunc *cb, void *opaque)
dce512de 1869{
dce512de
CH
1870 BDRVRawState *s = bs->opaque;
1871
8238010b
PB
1872 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1873 cb, opaque, QEMU_AIO_DISCARD);
dce512de 1874}
0e7e1989 1875
260a82e5
PB
1876static int coroutine_fn raw_co_write_zeroes(
1877 BlockDriverState *bs, int64_t sector_num,
1878 int nb_sectors, BdrvRequestFlags flags)
1879{
1880 BDRVRawState *s = bs->opaque;
1881
1882 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
97a2ae34
PB
1883 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1884 QEMU_AIO_WRITE_ZEROES);
1885 } else if (s->discard_zeroes) {
1886 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1887 QEMU_AIO_DISCARD);
260a82e5 1888 }
97a2ae34 1889 return -ENOTSUP;
260a82e5
PB
1890}
1891
1892static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1893{
1894 BDRVRawState *s = bs->opaque;
1895
1896 bdi->unallocated_blocks_are_zero = s->discard_zeroes;
1897 bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
1898 return 0;
1899}
1900
6f482f74
CL
1901static QemuOptsList raw_create_opts = {
1902 .name = "raw-create-opts",
1903 .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
1904 .desc = {
1905 {
1906 .name = BLOCK_OPT_SIZE,
1907 .type = QEMU_OPT_SIZE,
1908 .help = "Virtual disk size"
1909 },
4ab15590
CL
1910 {
1911 .name = BLOCK_OPT_NOCOW,
1912 .type = QEMU_OPT_BOOL,
1913 .help = "Turn off copy-on-write (valid only on btrfs)"
1914 },
06247428
HT
1915 {
1916 .name = BLOCK_OPT_PREALLOC,
1917 .type = QEMU_OPT_STRING,
1918 .help = "Preallocation mode (allowed values: off, falloc, full)"
1919 },
6f482f74
CL
1920 { /* end of list */ }
1921 }
0e7e1989
KW
1922};
1923
5f535a94 1924BlockDriver bdrv_file = {
84a12e66
CH
1925 .format_name = "file",
1926 .protocol_name = "file",
856ae5c3 1927 .instance_size = sizeof(BDRVRawState),
030be321 1928 .bdrv_needs_filename = true,
856ae5c3 1929 .bdrv_probe = NULL, /* no probe for protocols */
078896a9 1930 .bdrv_parse_filename = raw_parse_filename,
66f82cee 1931 .bdrv_file_open = raw_open,
eeb6b45d
JC
1932 .bdrv_reopen_prepare = raw_reopen_prepare,
1933 .bdrv_reopen_commit = raw_reopen_commit,
1934 .bdrv_reopen_abort = raw_reopen_abort,
856ae5c3 1935 .bdrv_close = raw_close,
c282e1fd 1936 .bdrv_create = raw_create,
3ac21627 1937 .bdrv_has_zero_init = bdrv_has_zero_init_1,
b6b8a333 1938 .bdrv_co_get_block_status = raw_co_get_block_status,
260a82e5 1939 .bdrv_co_write_zeroes = raw_co_write_zeroes,
3b46e624 1940
f141eafe
AL
1941 .bdrv_aio_readv = raw_aio_readv,
1942 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1943 .bdrv_aio_flush = raw_aio_flush,
8238010b 1944 .bdrv_aio_discard = raw_aio_discard,
c25f53b0 1945 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
1946 .bdrv_io_plug = raw_aio_plug,
1947 .bdrv_io_unplug = raw_aio_unplug,
1948 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
3c529d93 1949
83f64091
FB
1950 .bdrv_truncate = raw_truncate,
1951 .bdrv_getlength = raw_getlength,
260a82e5 1952 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
1953 .bdrv_get_allocated_file_size
1954 = raw_get_allocated_file_size,
0e7e1989 1955
c2f3426c
SH
1956 .bdrv_detach_aio_context = raw_detach_aio_context,
1957 .bdrv_attach_aio_context = raw_attach_aio_context,
1958
6f482f74 1959 .create_opts = &raw_create_opts,
83f64091
FB
1960};
1961
19cb3738
FB
1962/***********************************************/
1963/* host device */
1964
83affaa6 1965#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1966static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1967static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1968
1969kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1970{
5fafdf24 1971 kern_return_t kernResult;
19cb3738
FB
1972 mach_port_t masterPort;
1973 CFMutableDictionaryRef classesToMatch;
1974
1975 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1976 if ( KERN_SUCCESS != kernResult ) {
1977 printf( "IOMasterPort returned %d\n", kernResult );
1978 }
3b46e624 1979
5fafdf24 1980 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
1981 if ( classesToMatch == NULL ) {
1982 printf( "IOServiceMatching returned a NULL dictionary.\n" );
1983 } else {
1984 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1985 }
1986 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1987 if ( KERN_SUCCESS != kernResult )
1988 {
1989 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1990 }
3b46e624 1991
19cb3738
FB
1992 return kernResult;
1993}
1994
1995kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1996{
1997 io_object_t nextMedia;
1998 kern_return_t kernResult = KERN_FAILURE;
1999 *bsdPath = '\0';
2000 nextMedia = IOIteratorNext( mediaIterator );
2001 if ( nextMedia )
2002 {
2003 CFTypeRef bsdPathAsCFString;
2004 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
2005 if ( bsdPathAsCFString ) {
2006 size_t devPathLength;
2007 strcpy( bsdPath, _PATH_DEV );
2008 strcat( bsdPath, "r" );
2009 devPathLength = strlen( bsdPath );
2010 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
2011 kernResult = KERN_SUCCESS;
2012 }
2013 CFRelease( bsdPathAsCFString );
2014 }
2015 IOObjectRelease( nextMedia );
2016 }
3b46e624 2017
19cb3738
FB
2018 return kernResult;
2019}
2020
2021#endif
2022
508c7cb3
CH
2023static int hdev_probe_device(const char *filename)
2024{
2025 struct stat st;
2026
2027 /* allow a dedicated CD-ROM driver to match with a higher priority */
2028 if (strstart(filename, "/dev/cdrom", NULL))
2029 return 50;
2030
2031 if (stat(filename, &st) >= 0 &&
2032 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
2033 return 100;
2034 }
2035
2036 return 0;
2037}
2038
da888d37
SH
2039static int check_hdev_writable(BDRVRawState *s)
2040{
2041#if defined(BLKROGET)
2042 /* Linux block devices can be configured "read-only" using blockdev(8).
2043 * This is independent of device node permissions and therefore open(2)
2044 * with O_RDWR succeeds. Actual writes fail with EPERM.
2045 *
2046 * bdrv_open() is supposed to fail if the disk is read-only. Explicitly
2047 * check for read-only block devices so that Linux block devices behave
2048 * properly.
2049 */
2050 struct stat st;
2051 int readonly = 0;
2052
2053 if (fstat(s->fd, &st)) {
2054 return -errno;
2055 }
2056
2057 if (!S_ISBLK(st.st_mode)) {
2058 return 0;
2059 }
2060
2061 if (ioctl(s->fd, BLKROGET, &readonly) < 0) {
2062 return -errno;
2063 }
2064
2065 if (readonly) {
2066 return -EACCES;
2067 }
2068#endif /* defined(BLKROGET) */
2069 return 0;
2070}
2071
7af803d4
HR
2072static void hdev_parse_filename(const char *filename, QDict *options,
2073 Error **errp)
2074{
2075 /* The prefix is optional, just as for "file". */
2076 strstart(filename, "host_device:", &filename);
2077
2078 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
2079}
2080
015a1036
HR
2081static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
2082 Error **errp)
19cb3738
FB
2083{
2084 BDRVRawState *s = bs->opaque;
e428e439 2085 Error *local_err = NULL;
da888d37 2086 int ret;
c66a6157 2087 const char *filename = qdict_get_str(options, "filename");
a76bab49 2088
83affaa6 2089#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
2090 if (strstart(filename, "/dev/cdrom", NULL)) {
2091 kern_return_t kernResult;
2092 io_iterator_t mediaIterator;
2093 char bsdPath[ MAXPATHLEN ];
2094 int fd;
5fafdf24 2095
19cb3738
FB
2096 kernResult = FindEjectableCDMedia( &mediaIterator );
2097 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 2098
19cb3738
FB
2099 if ( bsdPath[ 0 ] != '\0' ) {
2100 strcat(bsdPath,"s0");
2101 /* some CDs don't have a partition 0 */
6165f4d8 2102 fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
19cb3738
FB
2103 if (fd < 0) {
2104 bsdPath[strlen(bsdPath)-1] = '1';
2105 } else {
2e1e79da 2106 qemu_close(fd);
19cb3738
FB
2107 }
2108 filename = bsdPath;
a5c5ea3f 2109 qdict_put(options, "filename", qstring_from_str(filename));
19cb3738 2110 }
3b46e624 2111
19cb3738
FB
2112 if ( mediaIterator )
2113 IOObjectRelease( mediaIterator );
2114 }
2115#endif
19cb3738
FB
2116
2117 s->type = FTYPE_FILE;
4dd75c70 2118#if defined(__linux__)
05acda4d
BK
2119 {
2120 char resolved_path[ MAXPATHLEN ], *temp;
2121
2122 temp = realpath(filename, resolved_path);
2123 if (temp && strstart(temp, "/dev/sg", NULL)) {
2124 bs->sg = 1;
2125 }
19cb3738
FB
2126 }
2127#endif
90babde0 2128
e428e439 2129 ret = raw_open_common(bs, options, flags, 0, &local_err);
da888d37 2130 if (ret < 0) {
84d18f06 2131 if (local_err) {
e428e439
HR
2132 error_propagate(errp, local_err);
2133 }
da888d37
SH
2134 return ret;
2135 }
2136
2137 if (flags & BDRV_O_RDWR) {
2138 ret = check_hdev_writable(s);
2139 if (ret < 0) {
2140 raw_close(bs);
e428e439 2141 error_setg_errno(errp, -ret, "The device is not writable");
da888d37
SH
2142 return ret;
2143 }
2144 }
2145
2146 return ret;
19cb3738
FB
2147}
2148
03ff3ca3 2149#if defined(__linux__)
19cb3738
FB
2150/* Note: we do not have a reliable method to detect if the floppy is
2151 present. The current method is to try to open the floppy at every
2152 I/O and to keep it opened during a few hundreds of ms. */
2153static int fd_open(BlockDriverState *bs)
2154{
2155 BDRVRawState *s = bs->opaque;
2156 int last_media_present;
2157
2158 if (s->type != FTYPE_FD)
2159 return 0;
2160 last_media_present = (s->fd >= 0);
5fafdf24 2161 if (s->fd >= 0 &&
a56ebc6b 2162 (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
2e1e79da 2163 qemu_close(s->fd);
19cb3738
FB
2164 s->fd = -1;
2165#ifdef DEBUG_FLOPPY
2166 printf("Floppy closed\n");
2167#endif
2168 }
2169 if (s->fd < 0) {
5fafdf24 2170 if (s->fd_got_error &&
a56ebc6b 2171 (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
19cb3738
FB
2172#ifdef DEBUG_FLOPPY
2173 printf("No floppy (open delayed)\n");
2174#endif
2175 return -EIO;
2176 }
6165f4d8 2177 s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
19cb3738 2178 if (s->fd < 0) {
a56ebc6b 2179 s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
19cb3738
FB
2180 s->fd_got_error = 1;
2181 if (last_media_present)
2182 s->fd_media_changed = 1;
2183#ifdef DEBUG_FLOPPY
2184 printf("No floppy\n");
2185#endif
2186 return -EIO;
2187 }
2188#ifdef DEBUG_FLOPPY
2189 printf("Floppy opened\n");
2190#endif
2191 }
2192 if (!last_media_present)
2193 s->fd_media_changed = 1;
a56ebc6b 2194 s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
19cb3738
FB
2195 s->fd_got_error = 0;
2196 return 0;
2197}
19cb3738 2198
63ec93db 2199static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
985a03b0
TS
2200{
2201 BDRVRawState *s = bs->opaque;
2202
2203 return ioctl(s->fd, req, buf);
2204}
221f715d 2205
7c84b1b8 2206static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
221f715d 2207 unsigned long int req, void *buf,
097310b5 2208 BlockCompletionFunc *cb, void *opaque)
221f715d 2209{
f141eafe 2210 BDRVRawState *s = bs->opaque;
c208e8c2 2211 RawPosixAIOData *acb;
c4d9d196 2212 ThreadPool *pool;
221f715d 2213
f141eafe
AL
2214 if (fd_open(bs) < 0)
2215 return NULL;
c208e8c2
PB
2216
2217 acb = g_slice_new(RawPosixAIOData);
2218 acb->bs = bs;
2219 acb->aio_type = QEMU_AIO_IOCTL;
2220 acb->aio_fildes = s->fd;
2221 acb->aio_offset = 0;
2222 acb->aio_ioctl_buf = buf;
2223 acb->aio_ioctl_cmd = req;
c4d9d196
SH
2224 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
2225 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
221f715d
AL
2226}
2227
a167ba50 2228#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
2229static int fd_open(BlockDriverState *bs)
2230{
2231 BDRVRawState *s = bs->opaque;
2232
2233 /* this is just to ensure s->fd is sane (its called by io ops) */
2234 if (s->fd >= 0)
2235 return 0;
2236 return -EIO;
2237}
9f23011a 2238#else /* !linux && !FreeBSD */
19cb3738 2239
08af02e2
AL
2240static int fd_open(BlockDriverState *bs)
2241{
2242 return 0;
2243}
2244
221f715d 2245#endif /* !linux && !FreeBSD */
04eeb8b6 2246
7c84b1b8 2247static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
c36dd8a0 2248 int64_t sector_num, int nb_sectors,
097310b5 2249 BlockCompletionFunc *cb, void *opaque)
c36dd8a0
AF
2250{
2251 BDRVRawState *s = bs->opaque;
2252
2253 if (fd_open(bs) < 0) {
2254 return NULL;
2255 }
2256 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
2257 cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
2258}
2259
d0b4503e
PB
2260static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
2261 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
2262{
2263 BDRVRawState *s = bs->opaque;
2264 int rc;
2265
2266 rc = fd_open(bs);
2267 if (rc < 0) {
2268 return rc;
2269 }
2270 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
97a2ae34
PB
2271 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
2272 QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
2273 } else if (s->discard_zeroes) {
2274 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
2275 QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
d0b4503e 2276 }
97a2ae34 2277 return -ENOTSUP;
d0b4503e
PB
2278}
2279
6f482f74 2280static int hdev_create(const char *filename, QemuOpts *opts,
d5124c00 2281 Error **errp)
93c65b47
AL
2282{
2283 int fd;
2284 int ret = 0;
2285 struct stat stat_buf;
0e7e1989 2286 int64_t total_size = 0;
cc28c6aa
HR
2287 bool has_prefix;
2288
2289 /* This function is used by all three protocol block drivers and therefore
2290 * any of these three prefixes may be given.
2291 * The return value has to be stored somewhere, otherwise this is an error
2292 * due to -Werror=unused-value. */
2293 has_prefix =
2294 strstart(filename, "host_device:", &filename) ||
2295 strstart(filename, "host_cdrom:" , &filename) ||
2296 strstart(filename, "host_floppy:", &filename);
2297
2298 (void)has_prefix;
93c65b47 2299
0e7e1989 2300 /* Read out options */
180e9526
HT
2301 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
2302 BDRV_SECTOR_SIZE);
93c65b47 2303
6165f4d8 2304 fd = qemu_open(filename, O_WRONLY | O_BINARY);
e428e439
HR
2305 if (fd < 0) {
2306 ret = -errno;
2307 error_setg_errno(errp, -ret, "Could not open device");
2308 return ret;
2309 }
93c65b47 2310
e428e439 2311 if (fstat(fd, &stat_buf) < 0) {
57e69b7d 2312 ret = -errno;
e428e439
HR
2313 error_setg_errno(errp, -ret, "Could not stat device");
2314 } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
2315 error_setg(errp,
2316 "The given file is neither a block nor a character device");
57e69b7d 2317 ret = -ENODEV;
180e9526 2318 } else if (lseek(fd, 0, SEEK_END) < total_size) {
e428e439 2319 error_setg(errp, "Device is too small");
93c65b47 2320 ret = -ENOSPC;
e428e439 2321 }
93c65b47 2322
2e1e79da 2323 qemu_close(fd);
93c65b47
AL
2324 return ret;
2325}
2326
5efa9d5a 2327static BlockDriver bdrv_host_device = {
0b4ce02e 2328 .format_name = "host_device",
84a12e66 2329 .protocol_name = "host_device",
0b4ce02e 2330 .instance_size = sizeof(BDRVRawState),
030be321 2331 .bdrv_needs_filename = true,
0b4ce02e 2332 .bdrv_probe_device = hdev_probe_device,
7af803d4 2333 .bdrv_parse_filename = hdev_parse_filename,
66f82cee 2334 .bdrv_file_open = hdev_open,
0b4ce02e 2335 .bdrv_close = raw_close,
1bc6b705
JC
2336 .bdrv_reopen_prepare = raw_reopen_prepare,
2337 .bdrv_reopen_commit = raw_reopen_commit,
2338 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2339 .bdrv_create = hdev_create,
6f482f74 2340 .create_opts = &raw_create_opts,
d0b4503e 2341 .bdrv_co_write_zeroes = hdev_co_write_zeroes,
3b46e624 2342
f141eafe
AL
2343 .bdrv_aio_readv = raw_aio_readv,
2344 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2345 .bdrv_aio_flush = raw_aio_flush,
8238010b 2346 .bdrv_aio_discard = hdev_aio_discard,
c25f53b0 2347 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2348 .bdrv_io_plug = raw_aio_plug,
2349 .bdrv_io_unplug = raw_aio_unplug,
2350 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
3c529d93 2351
55b949c8 2352 .bdrv_truncate = raw_truncate,
e60f469c 2353 .bdrv_getlength = raw_getlength,
260a82e5 2354 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
2355 .bdrv_get_allocated_file_size
2356 = raw_get_allocated_file_size,
1a9335e4
ET
2357 .bdrv_probe_blocksizes = hdev_probe_blocksizes,
2358 .bdrv_probe_geometry = hdev_probe_geometry,
19cb3738 2359
c2f3426c
SH
2360 .bdrv_detach_aio_context = raw_detach_aio_context,
2361 .bdrv_attach_aio_context = raw_attach_aio_context,
2362
f3a5d3f8 2363 /* generic scsi device */
63ec93db
CH
2364#ifdef __linux__
2365 .bdrv_ioctl = hdev_ioctl,
63ec93db
CH
2366 .bdrv_aio_ioctl = hdev_aio_ioctl,
2367#endif
f3a5d3f8
CH
2368};
2369
2370#ifdef __linux__
d3f49845
HR
2371static void floppy_parse_filename(const char *filename, QDict *options,
2372 Error **errp)
2373{
2374 /* The prefix is optional, just as for "file". */
2375 strstart(filename, "host_floppy:", &filename);
2376
2377 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
2378}
2379
015a1036
HR
2380static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
2381 Error **errp)
f3a5d3f8
CH
2382{
2383 BDRVRawState *s = bs->opaque;
e428e439 2384 Error *local_err = NULL;
f3a5d3f8
CH
2385 int ret;
2386
f3a5d3f8 2387 s->type = FTYPE_FD;
f3a5d3f8 2388
19a3da7f 2389 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
e428e439
HR
2390 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
2391 if (ret) {
84d18f06 2392 if (local_err) {
e428e439
HR
2393 error_propagate(errp, local_err);
2394 }
f3a5d3f8 2395 return ret;
e428e439 2396 }
f3a5d3f8
CH
2397
2398 /* close fd so that we can reopen it as needed */
2e1e79da 2399 qemu_close(s->fd);
f3a5d3f8
CH
2400 s->fd = -1;
2401 s->fd_media_changed = 1;
2402
92a539d2
MA
2403 error_report("Host floppy pass-through is deprecated");
2404 error_printf("Support for it will be removed in a future release.\n");
f3a5d3f8
CH
2405 return 0;
2406}
2407
508c7cb3
CH
2408static int floppy_probe_device(const char *filename)
2409{
2ebf7c4b
CR
2410 int fd, ret;
2411 int prio = 0;
2412 struct floppy_struct fdparam;
343f8568 2413 struct stat st;
2ebf7c4b 2414
e1740828
CB
2415 if (strstart(filename, "/dev/fd", NULL) &&
2416 !strstart(filename, "/dev/fdset/", NULL)) {
2ebf7c4b 2417 prio = 50;
e1740828 2418 }
2ebf7c4b 2419
6165f4d8 2420 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
2ebf7c4b
CR
2421 if (fd < 0) {
2422 goto out;
2423 }
343f8568
JS
2424 ret = fstat(fd, &st);
2425 if (ret == -1 || !S_ISBLK(st.st_mode)) {
2426 goto outc;
2427 }
2ebf7c4b
CR
2428
2429 /* Attempt to detect via a floppy specific ioctl */
2430 ret = ioctl(fd, FDGETPRM, &fdparam);
2431 if (ret >= 0)
2432 prio = 100;
2433
343f8568 2434outc:
2e1e79da 2435 qemu_close(fd);
2ebf7c4b
CR
2436out:
2437 return prio;
508c7cb3
CH
2438}
2439
2440
f3a5d3f8
CH
2441static int floppy_is_inserted(BlockDriverState *bs)
2442{
2443 return fd_open(bs) >= 0;
2444}
2445
2446static int floppy_media_changed(BlockDriverState *bs)
2447{
2448 BDRVRawState *s = bs->opaque;
2449 int ret;
2450
2451 /*
2452 * XXX: we do not have a true media changed indication.
2453 * It does not work if the floppy is changed without trying to read it.
2454 */
2455 fd_open(bs);
2456 ret = s->fd_media_changed;
2457 s->fd_media_changed = 0;
2458#ifdef DEBUG_FLOPPY
2459 printf("Floppy changed=%d\n", ret);
2460#endif
2461 return ret;
2462}
2463
f36f3949 2464static void floppy_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2465{
2466 BDRVRawState *s = bs->opaque;
2467 int fd;
2468
2469 if (s->fd >= 0) {
2e1e79da 2470 qemu_close(s->fd);
f3a5d3f8
CH
2471 s->fd = -1;
2472 }
6165f4d8 2473 fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
f3a5d3f8
CH
2474 if (fd >= 0) {
2475 if (ioctl(fd, FDEJECT, 0) < 0)
2476 perror("FDEJECT");
2e1e79da 2477 qemu_close(fd);
f3a5d3f8 2478 }
f3a5d3f8
CH
2479}
2480
2481static BlockDriver bdrv_host_floppy = {
2482 .format_name = "host_floppy",
84a12e66 2483 .protocol_name = "host_floppy",
f3a5d3f8 2484 .instance_size = sizeof(BDRVRawState),
030be321 2485 .bdrv_needs_filename = true,
508c7cb3 2486 .bdrv_probe_device = floppy_probe_device,
d3f49845 2487 .bdrv_parse_filename = floppy_parse_filename,
66f82cee 2488 .bdrv_file_open = floppy_open,
f3a5d3f8 2489 .bdrv_close = raw_close,
1bc6b705
JC
2490 .bdrv_reopen_prepare = raw_reopen_prepare,
2491 .bdrv_reopen_commit = raw_reopen_commit,
2492 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2493 .bdrv_create = hdev_create,
6f482f74 2494 .create_opts = &raw_create_opts,
f3a5d3f8 2495
f3a5d3f8
CH
2496 .bdrv_aio_readv = raw_aio_readv,
2497 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2498 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2499 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2500 .bdrv_io_plug = raw_aio_plug,
2501 .bdrv_io_unplug = raw_aio_unplug,
2502 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
f3a5d3f8 2503
55b949c8 2504 .bdrv_truncate = raw_truncate,
b94a2610
KW
2505 .bdrv_getlength = raw_getlength,
2506 .has_variable_length = true,
4a1d5e1f
FZ
2507 .bdrv_get_allocated_file_size
2508 = raw_get_allocated_file_size,
f3a5d3f8 2509
c2f3426c
SH
2510 .bdrv_detach_aio_context = raw_detach_aio_context,
2511 .bdrv_attach_aio_context = raw_attach_aio_context,
2512
f3a5d3f8
CH
2513 /* removable device support */
2514 .bdrv_is_inserted = floppy_is_inserted,
2515 .bdrv_media_changed = floppy_media_changed,
2516 .bdrv_eject = floppy_eject,
f3a5d3f8 2517};
18fa1c42 2518#endif
f3a5d3f8 2519
18fa1c42
HR
2520#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
2521static void cdrom_parse_filename(const char *filename, QDict *options,
2522 Error **errp)
2523{
2524 /* The prefix is optional, just as for "file". */
2525 strstart(filename, "host_cdrom:", &filename);
2526
2527 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
2528}
2529#endif
2530
2531#ifdef __linux__
015a1036
HR
2532static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
2533 Error **errp)
f3a5d3f8
CH
2534{
2535 BDRVRawState *s = bs->opaque;
e428e439
HR
2536 Error *local_err = NULL;
2537 int ret;
f3a5d3f8 2538
f3a5d3f8
CH
2539 s->type = FTYPE_CD;
2540
19a3da7f 2541 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
e428e439 2542 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
84d18f06 2543 if (local_err) {
e428e439
HR
2544 error_propagate(errp, local_err);
2545 }
2546 return ret;
f3a5d3f8
CH
2547}
2548
508c7cb3
CH
2549static int cdrom_probe_device(const char *filename)
2550{
3baf720e
CR
2551 int fd, ret;
2552 int prio = 0;
343f8568 2553 struct stat st;
3baf720e 2554
6165f4d8 2555 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
3baf720e
CR
2556 if (fd < 0) {
2557 goto out;
2558 }
343f8568
JS
2559 ret = fstat(fd, &st);
2560 if (ret == -1 || !S_ISBLK(st.st_mode)) {
2561 goto outc;
2562 }
3baf720e
CR
2563
2564 /* Attempt to detect via a CDROM specific ioctl */
2565 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
2566 if (ret >= 0)
2567 prio = 100;
2568
343f8568 2569outc:
2e1e79da 2570 qemu_close(fd);
3baf720e
CR
2571out:
2572 return prio;
508c7cb3
CH
2573}
2574
f3a5d3f8
CH
2575static int cdrom_is_inserted(BlockDriverState *bs)
2576{
2577 BDRVRawState *s = bs->opaque;
2578 int ret;
2579
2580 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
2581 if (ret == CDS_DISC_OK)
2582 return 1;
2583 return 0;
2584}
2585
f36f3949 2586static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2587{
2588 BDRVRawState *s = bs->opaque;
2589
2590 if (eject_flag) {
2591 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
2592 perror("CDROMEJECT");
2593 } else {
2594 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
2595 perror("CDROMEJECT");
2596 }
f3a5d3f8
CH
2597}
2598
025e849a 2599static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
2600{
2601 BDRVRawState *s = bs->opaque;
2602
2603 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
2604 /*
2605 * Note: an error can happen if the distribution automatically
2606 * mounts the CD-ROM
2607 */
2608 /* perror("CDROM_LOCKDOOR"); */
2609 }
f3a5d3f8
CH
2610}
2611
2612static BlockDriver bdrv_host_cdrom = {
2613 .format_name = "host_cdrom",
84a12e66 2614 .protocol_name = "host_cdrom",
f3a5d3f8 2615 .instance_size = sizeof(BDRVRawState),
030be321 2616 .bdrv_needs_filename = true,
508c7cb3 2617 .bdrv_probe_device = cdrom_probe_device,
18fa1c42 2618 .bdrv_parse_filename = cdrom_parse_filename,
66f82cee 2619 .bdrv_file_open = cdrom_open,
f3a5d3f8 2620 .bdrv_close = raw_close,
1bc6b705
JC
2621 .bdrv_reopen_prepare = raw_reopen_prepare,
2622 .bdrv_reopen_commit = raw_reopen_commit,
2623 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2624 .bdrv_create = hdev_create,
6f482f74 2625 .create_opts = &raw_create_opts,
f3a5d3f8 2626
f3a5d3f8
CH
2627 .bdrv_aio_readv = raw_aio_readv,
2628 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2629 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2630 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2631 .bdrv_io_plug = raw_aio_plug,
2632 .bdrv_io_unplug = raw_aio_unplug,
2633 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
f3a5d3f8 2634
55b949c8 2635 .bdrv_truncate = raw_truncate,
b94a2610
KW
2636 .bdrv_getlength = raw_getlength,
2637 .has_variable_length = true,
4a1d5e1f
FZ
2638 .bdrv_get_allocated_file_size
2639 = raw_get_allocated_file_size,
f3a5d3f8 2640
c2f3426c
SH
2641 .bdrv_detach_aio_context = raw_detach_aio_context,
2642 .bdrv_attach_aio_context = raw_attach_aio_context,
2643
f3a5d3f8
CH
2644 /* removable device support */
2645 .bdrv_is_inserted = cdrom_is_inserted,
2646 .bdrv_eject = cdrom_eject,
025e849a 2647 .bdrv_lock_medium = cdrom_lock_medium,
f3a5d3f8
CH
2648
2649 /* generic scsi device */
63ec93db 2650 .bdrv_ioctl = hdev_ioctl,
63ec93db 2651 .bdrv_aio_ioctl = hdev_aio_ioctl,
f3a5d3f8
CH
2652};
2653#endif /* __linux__ */
2654
a167ba50 2655#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
511018e4
AT
2656static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
2657 Error **errp)
f3a5d3f8
CH
2658{
2659 BDRVRawState *s = bs->opaque;
e428e439 2660 Error *local_err = NULL;
f3a5d3f8
CH
2661 int ret;
2662
2663 s->type = FTYPE_CD;
2664
e428e439
HR
2665 ret = raw_open_common(bs, options, flags, 0, &local_err);
2666 if (ret) {
84d18f06 2667 if (local_err) {
e428e439
HR
2668 error_propagate(errp, local_err);
2669 }
f3a5d3f8 2670 return ret;
e428e439 2671 }
f3a5d3f8 2672
9b2260cb 2673 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
2674 ioctl(s->fd, CDIOCALLOW);
2675 return 0;
2676}
2677
508c7cb3
CH
2678static int cdrom_probe_device(const char *filename)
2679{
2680 if (strstart(filename, "/dev/cd", NULL) ||
2681 strstart(filename, "/dev/acd", NULL))
2682 return 100;
2683 return 0;
2684}
2685
f3a5d3f8
CH
2686static int cdrom_reopen(BlockDriverState *bs)
2687{
2688 BDRVRawState *s = bs->opaque;
2689 int fd;
2690
2691 /*
2692 * Force reread of possibly changed/newly loaded disc,
2693 * FreeBSD seems to not notice sometimes...
2694 */
2695 if (s->fd >= 0)
2e1e79da 2696 qemu_close(s->fd);
6165f4d8 2697 fd = qemu_open(bs->filename, s->open_flags, 0644);
f3a5d3f8
CH
2698 if (fd < 0) {
2699 s->fd = -1;
2700 return -EIO;
2701 }
2702 s->fd = fd;
2703
9b2260cb 2704 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
2705 ioctl(s->fd, CDIOCALLOW);
2706 return 0;
2707}
2708
2709static int cdrom_is_inserted(BlockDriverState *bs)
2710{
2711 return raw_getlength(bs) > 0;
2712}
2713
f36f3949 2714static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2715{
2716 BDRVRawState *s = bs->opaque;
2717
2718 if (s->fd < 0)
822e1cd1 2719 return;
f3a5d3f8
CH
2720
2721 (void) ioctl(s->fd, CDIOCALLOW);
2722
2723 if (eject_flag) {
2724 if (ioctl(s->fd, CDIOCEJECT) < 0)
2725 perror("CDIOCEJECT");
2726 } else {
2727 if (ioctl(s->fd, CDIOCCLOSE) < 0)
2728 perror("CDIOCCLOSE");
2729 }
2730
822e1cd1 2731 cdrom_reopen(bs);
f3a5d3f8
CH
2732}
2733
025e849a 2734static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
2735{
2736 BDRVRawState *s = bs->opaque;
2737
2738 if (s->fd < 0)
7bf37fed 2739 return;
f3a5d3f8
CH
2740 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
2741 /*
2742 * Note: an error can happen if the distribution automatically
2743 * mounts the CD-ROM
2744 */
2745 /* perror("CDROM_LOCKDOOR"); */
2746 }
f3a5d3f8
CH
2747}
2748
2749static BlockDriver bdrv_host_cdrom = {
2750 .format_name = "host_cdrom",
84a12e66 2751 .protocol_name = "host_cdrom",
f3a5d3f8 2752 .instance_size = sizeof(BDRVRawState),
030be321 2753 .bdrv_needs_filename = true,
508c7cb3 2754 .bdrv_probe_device = cdrom_probe_device,
18fa1c42 2755 .bdrv_parse_filename = cdrom_parse_filename,
66f82cee 2756 .bdrv_file_open = cdrom_open,
f3a5d3f8 2757 .bdrv_close = raw_close,
1bc6b705
JC
2758 .bdrv_reopen_prepare = raw_reopen_prepare,
2759 .bdrv_reopen_commit = raw_reopen_commit,
2760 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2761 .bdrv_create = hdev_create,
6f482f74 2762 .create_opts = &raw_create_opts,
f3a5d3f8 2763
f3a5d3f8
CH
2764 .bdrv_aio_readv = raw_aio_readv,
2765 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2766 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2767 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2768 .bdrv_io_plug = raw_aio_plug,
2769 .bdrv_io_unplug = raw_aio_unplug,
2770 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
f3a5d3f8 2771
55b949c8 2772 .bdrv_truncate = raw_truncate,
b94a2610
KW
2773 .bdrv_getlength = raw_getlength,
2774 .has_variable_length = true,
4a1d5e1f
FZ
2775 .bdrv_get_allocated_file_size
2776 = raw_get_allocated_file_size,
f3a5d3f8 2777
c2f3426c
SH
2778 .bdrv_detach_aio_context = raw_detach_aio_context,
2779 .bdrv_attach_aio_context = raw_attach_aio_context,
2780
19cb3738 2781 /* removable device support */
f3a5d3f8
CH
2782 .bdrv_is_inserted = cdrom_is_inserted,
2783 .bdrv_eject = cdrom_eject,
025e849a 2784 .bdrv_lock_medium = cdrom_lock_medium,
19cb3738 2785};
f3a5d3f8 2786#endif /* __FreeBSD__ */
5efa9d5a 2787
84a12e66 2788static void bdrv_file_init(void)
5efa9d5a 2789{
508c7cb3
CH
2790 /*
2791 * Register all the drivers. Note that order is important, the driver
2792 * registered last will get probed first.
2793 */
84a12e66 2794 bdrv_register(&bdrv_file);
5efa9d5a 2795 bdrv_register(&bdrv_host_device);
f3a5d3f8
CH
2796#ifdef __linux__
2797 bdrv_register(&bdrv_host_floppy);
2798 bdrv_register(&bdrv_host_cdrom);
2799#endif
a167ba50 2800#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
2801 bdrv_register(&bdrv_host_cdrom);
2802#endif
5efa9d5a
AL
2803}
2804
84a12e66 2805block_init(bdrv_file_init);