]> git.proxmox.com Git - mirror_qemu.git/blame - block/raw-posix.c
block/raw-posix: call plain fallocate in handle_aiocb_write_zeroes
[mirror_qemu.git] / block / raw-posix.c
CommitLineData
83f64091 1/*
223d4670 2 * Block driver for RAW files (posix)
5fafdf24 3 *
83f64091 4 * Copyright (c) 2006 Fabrice Bellard
5fafdf24 5 *
83f64091
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
1de7afc9
PB
25#include "qemu/timer.h"
26#include "qemu/log.h"
737e150e 27#include "block/block_int.h"
1de7afc9 28#include "qemu/module.h"
de81a169 29#include "trace.h"
737e150e 30#include "block/thread-pool.h"
1de7afc9 31#include "qemu/iov.h"
9f8540ec 32#include "raw-aio.h"
06247428 33#include "qapi/util.h"
83f64091 34
83affaa6 35#if defined(__APPLE__) && (__MACH__)
83f64091
FB
36#include <paths.h>
37#include <sys/param.h>
38#include <IOKit/IOKitLib.h>
39#include <IOKit/IOBSD.h>
40#include <IOKit/storage/IOMediaBSDClient.h>
41#include <IOKit/storage/IOMedia.h>
42#include <IOKit/storage/IOCDMedia.h>
43//#include <IOKit/storage/IOCDTypes.h>
44#include <CoreFoundation/CoreFoundation.h>
45#endif
46
47#ifdef __sun__
2e9671da 48#define _POSIX_PTHREAD_SEMANTICS 1
83f64091
FB
49#include <sys/dkio.h>
50#endif
19cb3738 51#ifdef __linux__
343f8568
JS
52#include <sys/types.h>
53#include <sys/stat.h>
19cb3738 54#include <sys/ioctl.h>
05acda4d 55#include <sys/param.h>
19cb3738
FB
56#include <linux/cdrom.h>
57#include <linux/fd.h>
5500316d 58#include <linux/fs.h>
4ab15590
CL
59#ifndef FS_NOCOW_FL
60#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
61#endif
5500316d 62#endif
b953f075 63#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_FALLOCATE_ZERO_RANGE)
3d4fa43e
KK
64#include <linux/falloc.h>
65#endif
a167ba50 66#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
1cb6c3fd 67#include <sys/disk.h>
9f23011a 68#include <sys/cdio.h>
1cb6c3fd 69#endif
83f64091 70
128ab2ff
BS
71#ifdef __OpenBSD__
72#include <sys/ioctl.h>
73#include <sys/disklabel.h>
74#include <sys/dkio.h>
75#endif
76
d1f6fd8d
CE
77#ifdef __NetBSD__
78#include <sys/ioctl.h>
79#include <sys/disklabel.h>
80#include <sys/dkio.h>
81#include <sys/disk.h>
82#endif
83
c5e97233
BS
84#ifdef __DragonFly__
85#include <sys/ioctl.h>
86#include <sys/diskslice.h>
87#endif
88
dce512de
CH
89#ifdef CONFIG_XFS
90#include <xfs/xfs.h>
91#endif
92
19cb3738 93//#define DEBUG_FLOPPY
83f64091 94
faf07963 95//#define DEBUG_BLOCK
03ff3ca3 96#if defined(DEBUG_BLOCK)
001faf32
BS
97#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
98 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
8c05dbf9 99#else
001faf32 100#define DEBUG_BLOCK_PRINT(formatCstr, ...)
8c05dbf9
TS
101#endif
102
f6465578
AL
103/* OS X does not have O_DSYNC */
104#ifndef O_DSYNC
1c27a8b3 105#ifdef O_SYNC
7ab064d2 106#define O_DSYNC O_SYNC
1c27a8b3
JA
107#elif defined(O_FSYNC)
108#define O_DSYNC O_FSYNC
109#endif
f6465578
AL
110#endif
111
9f7965c7
AL
112/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
113#ifndef O_DIRECT
114#define O_DIRECT O_DSYNC
115#endif
116
19cb3738
FB
117#define FTYPE_FILE 0
118#define FTYPE_CD 1
119#define FTYPE_FD 2
83f64091 120
c57c846a 121/* if the FD is not accessed during that time (in ns), we try to
19cb3738 122 reopen it to see if the disk has been changed */
c57c846a 123#define FD_OPEN_TIMEOUT (1000000000)
83f64091 124
581b9e29
CH
125#define MAX_BLOCKSIZE 4096
126
19cb3738
FB
127typedef struct BDRVRawState {
128 int fd;
129 int type;
0e1d8f4c 130 int open_flags;
c25f53b0
PB
131 size_t buf_align;
132
19cb3738
FB
133#if defined(__linux__)
134 /* linux floppy specific */
19cb3738
FB
135 int64_t fd_open_time;
136 int64_t fd_error_time;
137 int fd_got_error;
138 int fd_media_changed;
83f64091 139#endif
e44bd6fc 140#ifdef CONFIG_LINUX_AIO
5c6c3a6c 141 int use_aio;
1e5b9d2f 142 void *aio_ctx;
e44bd6fc 143#endif
dce512de 144#ifdef CONFIG_XFS
260a82e5 145 bool is_xfs:1;
dce512de 146#endif
260a82e5 147 bool has_discard:1;
97a2ae34 148 bool has_write_zeroes:1;
260a82e5 149 bool discard_zeroes:1;
d50d8222 150 bool has_fallocate;
3cad8307 151 bool needs_alignment;
19cb3738
FB
152} BDRVRawState;
153
eeb6b45d
JC
154typedef struct BDRVRawReopenState {
155 int fd;
156 int open_flags;
157#ifdef CONFIG_LINUX_AIO
158 int use_aio;
159#endif
160} BDRVRawReopenState;
161
19cb3738 162static int fd_open(BlockDriverState *bs);
22afa7b5 163static int64_t raw_getlength(BlockDriverState *bs);
83f64091 164
de81a169
PB
165typedef struct RawPosixAIOData {
166 BlockDriverState *bs;
167 int aio_fildes;
168 union {
169 struct iovec *aio_iov;
170 void *aio_ioctl_buf;
171 };
172 int aio_niov;
8238010b 173 uint64_t aio_nbytes;
de81a169
PB
174#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
175 off_t aio_offset;
176 int aio_type;
177} RawPosixAIOData;
178
a167ba50 179#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8 180static int cdrom_reopen(BlockDriverState *bs);
9f23011a
BS
181#endif
182
1de1ae0a
CE
183#if defined(__NetBSD__)
184static int raw_normalize_devicepath(const char **filename)
185{
186 static char namebuf[PATH_MAX];
187 const char *dp, *fname;
188 struct stat sb;
189
190 fname = *filename;
191 dp = strrchr(fname, '/');
192 if (lstat(fname, &sb) < 0) {
193 fprintf(stderr, "%s: stat failed: %s\n",
194 fname, strerror(errno));
195 return -errno;
196 }
197
198 if (!S_ISBLK(sb.st_mode)) {
199 return 0;
200 }
201
202 if (dp == NULL) {
203 snprintf(namebuf, PATH_MAX, "r%s", fname);
204 } else {
205 snprintf(namebuf, PATH_MAX, "%.*s/r%s",
206 (int)(dp - fname), fname, dp + 1);
207 }
208 fprintf(stderr, "%s is a block device", fname);
209 *filename = namebuf;
210 fprintf(stderr, ", using %s\n", *filename);
211
212 return 0;
213}
214#else
215static int raw_normalize_devicepath(const char **filename)
216{
217 return 0;
218}
219#endif
220
df26a350 221static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
c25f53b0
PB
222{
223 BDRVRawState *s = bs->opaque;
224 char *buf;
225 unsigned int sector_size;
226
227 /* For /dev/sg devices the alignment is not really used.
228 With buffered I/O, we don't have any restrictions. */
3cad8307 229 if (bs->sg || !s->needs_alignment) {
c25f53b0
PB
230 bs->request_alignment = 1;
231 s->buf_align = 1;
232 return;
233 }
234
235 /* Try a few ioctls to get the right size */
236 bs->request_alignment = 0;
237 s->buf_align = 0;
238
239#ifdef BLKSSZGET
df26a350 240 if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
c25f53b0
PB
241 bs->request_alignment = sector_size;
242 }
243#endif
244#ifdef DKIOCGETBLOCKSIZE
df26a350 245 if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
c25f53b0
PB
246 bs->request_alignment = sector_size;
247 }
248#endif
249#ifdef DIOCGSECTORSIZE
df26a350 250 if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
c25f53b0
PB
251 bs->request_alignment = sector_size;
252 }
253#endif
254#ifdef CONFIG_XFS
255 if (s->is_xfs) {
256 struct dioattr da;
df26a350 257 if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
c25f53b0
PB
258 bs->request_alignment = da.d_miniosz;
259 /* The kernel returns wrong information for d_mem */
260 /* s->buf_align = da.d_mem; */
261 }
262 }
263#endif
264
265 /* If we could not get the sizes so far, we can only guess them */
266 if (!s->buf_align) {
267 size_t align;
268 buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
269 for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
df26a350 270 if (pread(fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
c25f53b0
PB
271 s->buf_align = align;
272 break;
273 }
274 }
275 qemu_vfree(buf);
276 }
277
278 if (!bs->request_alignment) {
279 size_t align;
280 buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
281 for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
df26a350 282 if (pread(fd, buf, align, 0) >= 0) {
c25f53b0
PB
283 bs->request_alignment = align;
284 break;
285 }
286 }
287 qemu_vfree(buf);
288 }
df26a350
KW
289
290 if (!s->buf_align || !bs->request_alignment) {
291 error_setg(errp, "Could not find working O_DIRECT alignment. "
292 "Try cache.direct=off.");
293 }
c25f53b0
PB
294}
295
6a8dc042
JC
296static void raw_parse_flags(int bdrv_flags, int *open_flags)
297{
298 assert(open_flags != NULL);
299
300 *open_flags |= O_BINARY;
301 *open_flags &= ~O_ACCMODE;
302 if (bdrv_flags & BDRV_O_RDWR) {
303 *open_flags |= O_RDWR;
304 } else {
305 *open_flags |= O_RDONLY;
306 }
307
308 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
309 * and O_DIRECT for no caching. */
310 if ((bdrv_flags & BDRV_O_NOCACHE)) {
311 *open_flags |= O_DIRECT;
312 }
6a8dc042
JC
313}
314
c2f3426c
SH
315static void raw_detach_aio_context(BlockDriverState *bs)
316{
317#ifdef CONFIG_LINUX_AIO
318 BDRVRawState *s = bs->opaque;
319
320 if (s->use_aio) {
321 laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
322 }
323#endif
324}
325
326static void raw_attach_aio_context(BlockDriverState *bs,
327 AioContext *new_context)
328{
329#ifdef CONFIG_LINUX_AIO
330 BDRVRawState *s = bs->opaque;
331
332 if (s->use_aio) {
333 laio_attach_aio_context(s->aio_ctx, new_context);
334 }
335#endif
336}
337
fc32a72d
JC
338#ifdef CONFIG_LINUX_AIO
339static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
340{
341 int ret = -1;
342 assert(aio_ctx != NULL);
343 assert(use_aio != NULL);
344 /*
345 * Currently Linux do AIO only for files opened with O_DIRECT
346 * specified so check NOCACHE flag too
347 */
348 if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
349 (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
350
351 /* if non-NULL, laio_init() has already been run */
352 if (*aio_ctx == NULL) {
353 *aio_ctx = laio_init();
354 if (!*aio_ctx) {
355 goto error;
356 }
357 }
358 *use_aio = 1;
359 } else {
360 *use_aio = 0;
361 }
362
363 ret = 0;
364
365error:
366 return ret;
367}
368#endif
369
078896a9
HR
370static void raw_parse_filename(const char *filename, QDict *options,
371 Error **errp)
372{
373 /* The filename does not have to be prefixed by the protocol name, since
374 * "file" is the default protocol; therefore, the return value of this
375 * function call can be ignored. */
376 strstart(filename, "file:", &filename);
377
378 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
379}
380
c66a6157
KW
381static QemuOptsList raw_runtime_opts = {
382 .name = "raw",
383 .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
384 .desc = {
385 {
386 .name = "filename",
387 .type = QEMU_OPT_STRING,
388 .help = "File name of the image",
389 },
390 { /* end of list */ }
391 },
392};
393
394static int raw_open_common(BlockDriverState *bs, QDict *options,
e428e439 395 int bdrv_flags, int open_flags, Error **errp)
83f64091
FB
396{
397 BDRVRawState *s = bs->opaque;
c66a6157
KW
398 QemuOpts *opts;
399 Error *local_err = NULL;
8bfea15d 400 const char *filename = NULL;
0e1d8f4c 401 int fd, ret;
260a82e5 402 struct stat st;
83f64091 403
87ea75d5 404 opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
c66a6157 405 qemu_opts_absorb_qdict(opts, options, &local_err);
84d18f06 406 if (local_err) {
e428e439 407 error_propagate(errp, local_err);
c66a6157
KW
408 ret = -EINVAL;
409 goto fail;
410 }
411
412 filename = qemu_opt_get(opts, "filename");
413
1de1ae0a
CE
414 ret = raw_normalize_devicepath(&filename);
415 if (ret != 0) {
e428e439 416 error_setg_errno(errp, -ret, "Could not normalize device path");
c66a6157 417 goto fail;
1de1ae0a
CE
418 }
419
6a8dc042
JC
420 s->open_flags = open_flags;
421 raw_parse_flags(bdrv_flags, &s->open_flags);
83f64091 422
90babde0 423 s->fd = -1;
40ff6d7e 424 fd = qemu_open(filename, s->open_flags, 0644);
19cb3738
FB
425 if (fd < 0) {
426 ret = -errno;
c66a6157 427 if (ret == -EROFS) {
19cb3738 428 ret = -EACCES;
c66a6157
KW
429 }
430 goto fail;
19cb3738 431 }
83f64091 432 s->fd = fd;
9ef91a67 433
5c6c3a6c 434#ifdef CONFIG_LINUX_AIO
fc32a72d 435 if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
47e6b251 436 qemu_close(fd);
c66a6157 437 ret = -errno;
e428e439 438 error_setg_errno(errp, -ret, "Could not set AIO state");
c66a6157 439 goto fail;
9ef91a67 440 }
fc32a72d 441#endif
9ef91a67 442
7ce21016 443 s->has_discard = true;
97a2ae34 444 s->has_write_zeroes = true;
3cad8307
RPM
445 if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
446 s->needs_alignment = true;
447 }
260a82e5
PB
448
449 if (fstat(s->fd, &st) < 0) {
01212d4e 450 ret = -errno;
260a82e5
PB
451 error_setg_errno(errp, errno, "Could not stat file");
452 goto fail;
453 }
454 if (S_ISREG(st.st_mode)) {
455 s->discard_zeroes = true;
d50d8222 456 s->has_fallocate = true;
260a82e5 457 }
d0b4503e
PB
458 if (S_ISBLK(st.st_mode)) {
459#ifdef BLKDISCARDZEROES
460 unsigned int arg;
461 if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
462 s->discard_zeroes = true;
463 }
464#endif
465#ifdef __linux__
466 /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do
467 * not rely on the contents of discarded blocks unless using O_DIRECT.
97a2ae34 468 * Same for BLKZEROOUT.
d0b4503e
PB
469 */
470 if (!(bs->open_flags & BDRV_O_NOCACHE)) {
471 s->discard_zeroes = false;
97a2ae34 472 s->has_write_zeroes = false;
d0b4503e
PB
473 }
474#endif
475 }
3cad8307
RPM
476#ifdef __FreeBSD__
477 if (S_ISCHR(st.st_mode)) {
478 /*
479 * The file is a char device (disk), which on FreeBSD isn't behind
480 * a pager, so force all requests to be aligned. This is needed
481 * so QEMU makes sure all IO operations on the device are aligned
482 * to sector size, or else FreeBSD will reject them with EINVAL.
483 */
484 s->needs_alignment = true;
485 }
486#endif
260a82e5 487
dce512de
CH
488#ifdef CONFIG_XFS
489 if (platform_test_xfs_fd(s->fd)) {
7ce21016 490 s->is_xfs = true;
dce512de
CH
491 }
492#endif
493
c2f3426c
SH
494 raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
495
c66a6157
KW
496 ret = 0;
497fail:
8bfea15d
KW
498 if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
499 unlink(filename);
500 }
c66a6157
KW
501 qemu_opts_del(opts);
502 return ret;
83f64091
FB
503}
504
015a1036
HR
505static int raw_open(BlockDriverState *bs, QDict *options, int flags,
506 Error **errp)
90babde0
CH
507{
508 BDRVRawState *s = bs->opaque;
e428e439
HR
509 Error *local_err = NULL;
510 int ret;
90babde0
CH
511
512 s->type = FTYPE_FILE;
e428e439 513 ret = raw_open_common(bs, options, flags, 0, &local_err);
84d18f06 514 if (local_err) {
e428e439
HR
515 error_propagate(errp, local_err);
516 }
517 return ret;
90babde0
CH
518}
519
eeb6b45d
JC
520static int raw_reopen_prepare(BDRVReopenState *state,
521 BlockReopenQueue *queue, Error **errp)
522{
523 BDRVRawState *s;
524 BDRVRawReopenState *raw_s;
525 int ret = 0;
df26a350 526 Error *local_err = NULL;
eeb6b45d
JC
527
528 assert(state != NULL);
529 assert(state->bs != NULL);
530
531 s = state->bs->opaque;
532
5839e53b 533 state->opaque = g_new0(BDRVRawReopenState, 1);
eeb6b45d
JC
534 raw_s = state->opaque;
535
536#ifdef CONFIG_LINUX_AIO
537 raw_s->use_aio = s->use_aio;
538
539 /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
540 * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
541 * won't override aio_ctx if aio_ctx is non-NULL */
542 if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
e428e439 543 error_setg(errp, "Could not set AIO state");
eeb6b45d
JC
544 return -1;
545 }
546#endif
547
1bc6b705
JC
548 if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
549 raw_s->open_flags |= O_NONBLOCK;
550 }
551
eeb6b45d
JC
552 raw_parse_flags(state->flags, &raw_s->open_flags);
553
554 raw_s->fd = -1;
555
fdf263f6 556 int fcntl_flags = O_APPEND | O_NONBLOCK;
eeb6b45d
JC
557#ifdef O_NOATIME
558 fcntl_flags |= O_NOATIME;
559#endif
560
fdf263f6
AF
561#ifdef O_ASYNC
562 /* Not all operating systems have O_ASYNC, and those that don't
563 * will not let us track the state into raw_s->open_flags (typically
564 * you achieve the same effect with an ioctl, for example I_SETSIG
565 * on Solaris). But we do not use O_ASYNC, so that's fine.
566 */
567 assert((s->open_flags & O_ASYNC) == 0);
568#endif
569
eeb6b45d
JC
570 if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
571 /* dup the original fd */
572 /* TODO: use qemu fcntl wrapper */
573#ifdef F_DUPFD_CLOEXEC
574 raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
575#else
576 raw_s->fd = dup(s->fd);
577 if (raw_s->fd != -1) {
578 qemu_set_cloexec(raw_s->fd);
579 }
580#endif
581 if (raw_s->fd >= 0) {
582 ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
583 if (ret) {
584 qemu_close(raw_s->fd);
585 raw_s->fd = -1;
586 }
587 }
588 }
589
590 /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
591 if (raw_s->fd == -1) {
592 assert(!(raw_s->open_flags & O_CREAT));
593 raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
594 if (raw_s->fd == -1) {
e428e439 595 error_setg_errno(errp, errno, "Could not reopen file");
eeb6b45d
JC
596 ret = -1;
597 }
598 }
df26a350
KW
599
600 /* Fail already reopen_prepare() if we can't get a working O_DIRECT
601 * alignment with the new fd. */
602 if (raw_s->fd != -1) {
603 raw_probe_alignment(state->bs, raw_s->fd, &local_err);
604 if (local_err) {
605 qemu_close(raw_s->fd);
606 raw_s->fd = -1;
607 error_propagate(errp, local_err);
608 ret = -EINVAL;
609 }
610 }
611
eeb6b45d
JC
612 return ret;
613}
614
eeb6b45d
JC
615static void raw_reopen_commit(BDRVReopenState *state)
616{
617 BDRVRawReopenState *raw_s = state->opaque;
618 BDRVRawState *s = state->bs->opaque;
619
620 s->open_flags = raw_s->open_flags;
621
622 qemu_close(s->fd);
623 s->fd = raw_s->fd;
624#ifdef CONFIG_LINUX_AIO
625 s->use_aio = raw_s->use_aio;
626#endif
627
628 g_free(state->opaque);
629 state->opaque = NULL;
630}
631
632
633static void raw_reopen_abort(BDRVReopenState *state)
634{
635 BDRVRawReopenState *raw_s = state->opaque;
636
637 /* nothing to do if NULL, we didn't get far enough */
638 if (raw_s == NULL) {
639 return;
640 }
641
642 if (raw_s->fd >= 0) {
643 qemu_close(raw_s->fd);
644 raw_s->fd = -1;
645 }
646 g_free(state->opaque);
647 state->opaque = NULL;
648}
649
3baca891 650static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
c25f53b0
PB
651{
652 BDRVRawState *s = bs->opaque;
eeb6b45d 653
df26a350 654 raw_probe_alignment(bs, s->fd, errp);
c25f53b0 655 bs->bl.opt_mem_alignment = s->buf_align;
c25f53b0 656}
83f64091 657
de81a169
PB
658static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
659{
660 int ret;
661
662 ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
663 if (ret == -1) {
664 return -errno;
665 }
666
b608c8dc 667 return 0;
de81a169
PB
668}
669
670static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
671{
672 int ret;
673
674 ret = qemu_fdatasync(aiocb->aio_fildes);
675 if (ret == -1) {
676 return -errno;
677 }
678 return 0;
679}
680
681#ifdef CONFIG_PREADV
682
683static bool preadv_present = true;
684
685static ssize_t
686qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
687{
688 return preadv(fd, iov, nr_iov, offset);
689}
690
691static ssize_t
692qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
693{
694 return pwritev(fd, iov, nr_iov, offset);
695}
696
697#else
698
699static bool preadv_present = false;
700
701static ssize_t
702qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
703{
704 return -ENOSYS;
705}
706
707static ssize_t
708qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
709{
710 return -ENOSYS;
711}
712
713#endif
714
715static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
716{
717 ssize_t len;
718
719 do {
720 if (aiocb->aio_type & QEMU_AIO_WRITE)
721 len = qemu_pwritev(aiocb->aio_fildes,
722 aiocb->aio_iov,
723 aiocb->aio_niov,
724 aiocb->aio_offset);
725 else
726 len = qemu_preadv(aiocb->aio_fildes,
727 aiocb->aio_iov,
728 aiocb->aio_niov,
729 aiocb->aio_offset);
730 } while (len == -1 && errno == EINTR);
731
732 if (len == -1) {
733 return -errno;
734 }
735 return len;
736}
737
738/*
739 * Read/writes the data to/from a given linear buffer.
740 *
741 * Returns the number of bytes handles or -errno in case of an error. Short
742 * reads are only returned if the end of the file is reached.
743 */
744static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
745{
746 ssize_t offset = 0;
747 ssize_t len;
748
749 while (offset < aiocb->aio_nbytes) {
750 if (aiocb->aio_type & QEMU_AIO_WRITE) {
751 len = pwrite(aiocb->aio_fildes,
752 (const char *)buf + offset,
753 aiocb->aio_nbytes - offset,
754 aiocb->aio_offset + offset);
755 } else {
756 len = pread(aiocb->aio_fildes,
757 buf + offset,
758 aiocb->aio_nbytes - offset,
759 aiocb->aio_offset + offset);
760 }
761 if (len == -1 && errno == EINTR) {
762 continue;
61ed73cf
SH
763 } else if (len == -1 && errno == EINVAL &&
764 (aiocb->bs->open_flags & BDRV_O_NOCACHE) &&
765 !(aiocb->aio_type & QEMU_AIO_WRITE) &&
766 offset > 0) {
767 /* O_DIRECT pread() may fail with EINVAL when offset is unaligned
768 * after a short read. Assume that O_DIRECT short reads only occur
769 * at EOF. Therefore this is a short read, not an I/O error.
770 */
771 break;
de81a169
PB
772 } else if (len == -1) {
773 offset = -errno;
774 break;
775 } else if (len == 0) {
776 break;
777 }
778 offset += len;
779 }
780
781 return offset;
782}
783
784static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
785{
786 ssize_t nbytes;
787 char *buf;
788
789 if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
790 /*
791 * If there is just a single buffer, and it is properly aligned
792 * we can just use plain pread/pwrite without any problems.
793 */
794 if (aiocb->aio_niov == 1) {
795 return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
796 }
797 /*
798 * We have more than one iovec, and all are properly aligned.
799 *
800 * Try preadv/pwritev first and fall back to linearizing the
801 * buffer if it's not supported.
802 */
803 if (preadv_present) {
804 nbytes = handle_aiocb_rw_vector(aiocb);
805 if (nbytes == aiocb->aio_nbytes ||
806 (nbytes < 0 && nbytes != -ENOSYS)) {
807 return nbytes;
808 }
809 preadv_present = false;
810 }
811
812 /*
813 * XXX(hch): short read/write. no easy way to handle the reminder
814 * using these interfaces. For now retry using plain
815 * pread/pwrite?
816 */
817 }
818
819 /*
820 * Ok, we have to do it the hard way, copy all segments into
821 * a single aligned buffer.
822 */
50d4a858
KW
823 buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
824 if (buf == NULL) {
825 return -ENOMEM;
826 }
827
de81a169
PB
828 if (aiocb->aio_type & QEMU_AIO_WRITE) {
829 char *p = buf;
830 int i;
831
832 for (i = 0; i < aiocb->aio_niov; ++i) {
833 memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
834 p += aiocb->aio_iov[i].iov_len;
835 }
8eb029c2 836 assert(p - buf == aiocb->aio_nbytes);
de81a169
PB
837 }
838
839 nbytes = handle_aiocb_rw_linear(aiocb, buf);
840 if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
841 char *p = buf;
842 size_t count = aiocb->aio_nbytes, copy;
843 int i;
844
845 for (i = 0; i < aiocb->aio_niov && count; ++i) {
846 copy = count;
847 if (copy > aiocb->aio_iov[i].iov_len) {
848 copy = aiocb->aio_iov[i].iov_len;
849 }
850 memcpy(aiocb->aio_iov[i].iov_base, p, copy);
8eb029c2 851 assert(count >= copy);
de81a169
PB
852 p += copy;
853 count -= copy;
854 }
8eb029c2 855 assert(count == 0);
de81a169
PB
856 }
857 qemu_vfree(buf);
858
859 return nbytes;
860}
861
8238010b 862#ifdef CONFIG_XFS
97a2ae34
PB
863static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
864{
865 struct xfs_flock64 fl;
866
867 memset(&fl, 0, sizeof(fl));
868 fl.l_whence = SEEK_SET;
869 fl.l_start = offset;
870 fl.l_len = bytes;
871
872 if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
873 DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
874 return -errno;
875 }
876
877 return 0;
878}
879
8238010b
PB
880static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
881{
882 struct xfs_flock64 fl;
883
884 memset(&fl, 0, sizeof(fl));
885 fl.l_whence = SEEK_SET;
886 fl.l_start = offset;
887 fl.l_len = bytes;
888
889 if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
890 DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
891 return -errno;
892 }
893
894 return 0;
895}
896#endif
897
1486df0e
DL
898static int translate_err(int err)
899{
900 if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
901 err == -ENOTTY) {
902 err = -ENOTSUP;
903 }
904 return err;
905}
906
d50d8222 907#ifdef CONFIG_FALLOCATE
0b991712
DL
908static int do_fallocate(int fd, int mode, off_t offset, off_t len)
909{
910 do {
911 if (fallocate(fd, mode, offset, len) == 0) {
912 return 0;
913 }
914 } while (errno == EINTR);
915 return translate_err(-errno);
916}
917#endif
918
37cc9f7f 919static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
97a2ae34 920{
37cc9f7f 921 int ret = -ENOTSUP;
97a2ae34
PB
922 BDRVRawState *s = aiocb->bs->opaque;
923
37cc9f7f 924 if (!s->has_write_zeroes) {
97a2ae34
PB
925 return -ENOTSUP;
926 }
927
97a2ae34 928#ifdef BLKZEROOUT
37cc9f7f
DL
929 do {
930 uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
931 if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
932 return 0;
97a2ae34 933 }
37cc9f7f
DL
934 } while (errno == EINTR);
935
936 ret = translate_err(-errno);
97a2ae34 937#endif
97a2ae34 938
1486df0e 939 if (ret == -ENOTSUP) {
97a2ae34 940 s->has_write_zeroes = false;
97a2ae34
PB
941 }
942 return ret;
943}
944
37cc9f7f
DL
945static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
946{
947 BDRVRawState *s = aiocb->bs->opaque;
948
949 if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
950 return handle_aiocb_write_zeroes_block(aiocb);
951 }
952
953#ifdef CONFIG_XFS
954 if (s->is_xfs) {
955 return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
956 }
957#endif
958
b953f075
DL
959#ifdef CONFIG_FALLOCATE_ZERO_RANGE
960 if (s->has_write_zeroes) {
961 int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
962 aiocb->aio_offset, aiocb->aio_nbytes);
963 if (ret == 0 || ret != -ENOTSUP) {
964 return ret;
965 }
966 s->has_write_zeroes = false;
967 }
968#endif
969
d50d8222
DL
970#ifdef CONFIG_FALLOCATE
971 if (s->has_fallocate && aiocb->aio_offset >= bdrv_getlength(aiocb->bs)) {
972 int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
973 if (ret == 0 || ret != -ENOTSUP) {
974 return ret;
975 }
976 s->has_fallocate = false;
977 }
978#endif
979
37cc9f7f
DL
980 return -ENOTSUP;
981}
982
8238010b
PB
983static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
984{
985 int ret = -EOPNOTSUPP;
986 BDRVRawState *s = aiocb->bs->opaque;
987
7ce21016
PB
988 if (!s->has_discard) {
989 return -ENOTSUP;
8238010b
PB
990 }
991
992 if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
993#ifdef BLKDISCARD
994 do {
995 uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
996 if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
997 return 0;
998 }
999 } while (errno == EINTR);
1000
1001 ret = -errno;
1002#endif
1003 } else {
1004#ifdef CONFIG_XFS
1005 if (s->is_xfs) {
1006 return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
1007 }
1008#endif
1009
1010#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
0b991712
DL
1011 ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
1012 aiocb->aio_offset, aiocb->aio_nbytes);
8238010b
PB
1013#endif
1014 }
1015
1486df0e
DL
1016 ret = translate_err(ret);
1017 if (ret == -ENOTSUP) {
7ce21016 1018 s->has_discard = false;
8238010b
PB
1019 }
1020 return ret;
1021}
1022
de81a169
PB
1023static int aio_worker(void *arg)
1024{
1025 RawPosixAIOData *aiocb = arg;
1026 ssize_t ret = 0;
1027
1028 switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
1029 case QEMU_AIO_READ:
1030 ret = handle_aiocb_rw(aiocb);
1031 if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
1032 iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
1033 0, aiocb->aio_nbytes - ret);
1034
1035 ret = aiocb->aio_nbytes;
1036 }
1037 if (ret == aiocb->aio_nbytes) {
1038 ret = 0;
1039 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
1040 ret = -EINVAL;
1041 }
1042 break;
1043 case QEMU_AIO_WRITE:
1044 ret = handle_aiocb_rw(aiocb);
1045 if (ret == aiocb->aio_nbytes) {
1046 ret = 0;
1047 } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
1048 ret = -EINVAL;
1049 }
1050 break;
1051 case QEMU_AIO_FLUSH:
1052 ret = handle_aiocb_flush(aiocb);
1053 break;
1054 case QEMU_AIO_IOCTL:
1055 ret = handle_aiocb_ioctl(aiocb);
1056 break;
8238010b
PB
1057 case QEMU_AIO_DISCARD:
1058 ret = handle_aiocb_discard(aiocb);
1059 break;
97a2ae34
PB
1060 case QEMU_AIO_WRITE_ZEROES:
1061 ret = handle_aiocb_write_zeroes(aiocb);
1062 break;
de81a169
PB
1063 default:
1064 fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
1065 ret = -EINVAL;
1066 break;
1067 }
1068
1069 g_slice_free(RawPosixAIOData, aiocb);
1070 return ret;
1071}
1072
260a82e5
PB
1073static int paio_submit_co(BlockDriverState *bs, int fd,
1074 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1075 int type)
1076{
1077 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
1078 ThreadPool *pool;
1079
1080 acb->bs = bs;
1081 acb->aio_type = type;
1082 acb->aio_fildes = fd;
1083
8eb029c2
KW
1084 acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
1085 acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
1086
260a82e5
PB
1087 if (qiov) {
1088 acb->aio_iov = qiov->iov;
1089 acb->aio_niov = qiov->niov;
8eb029c2 1090 assert(qiov->size == acb->aio_nbytes);
260a82e5 1091 }
260a82e5
PB
1092
1093 trace_paio_submit_co(sector_num, nb_sectors, type);
1094 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
1095 return thread_pool_submit_co(pool, aio_worker, acb);
1096}
1097
7c84b1b8 1098static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
de81a169 1099 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1100 BlockCompletionFunc *cb, void *opaque, int type)
de81a169
PB
1101{
1102 RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
c4d9d196 1103 ThreadPool *pool;
de81a169
PB
1104
1105 acb->bs = bs;
1106 acb->aio_type = type;
1107 acb->aio_fildes = fd;
1108
8eb029c2
KW
1109 acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
1110 acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
1111
de81a169
PB
1112 if (qiov) {
1113 acb->aio_iov = qiov->iov;
1114 acb->aio_niov = qiov->niov;
8eb029c2 1115 assert(qiov->size == acb->aio_nbytes);
de81a169 1116 }
de81a169
PB
1117
1118 trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
c4d9d196
SH
1119 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
1120 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
de81a169
PB
1121}
1122
7c84b1b8 1123static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
9ef91a67 1124 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1125 BlockCompletionFunc *cb, void *opaque, int type)
83f64091 1126{
ce1a14dc 1127 BDRVRawState *s = bs->opaque;
ce1a14dc 1128
19cb3738
FB
1129 if (fd_open(bs) < 0)
1130 return NULL;
1131
f141eafe 1132 /*
3cad8307
RPM
1133 * Check if the underlying device requires requests to be aligned,
1134 * and if the request we are trying to submit is aligned or not.
1135 * If this is the case tell the low-level driver that it needs
1136 * to copy the buffer.
f141eafe 1137 */
3cad8307 1138 if (s->needs_alignment) {
c53b1c51 1139 if (!bdrv_qiov_is_aligned(bs, qiov)) {
5c6c3a6c 1140 type |= QEMU_AIO_MISALIGNED;
e44bd6fc 1141#ifdef CONFIG_LINUX_AIO
5c6c3a6c
CH
1142 } else if (s->use_aio) {
1143 return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
e44bd6fc
SW
1144 nb_sectors, cb, opaque, type);
1145#endif
5c6c3a6c 1146 }
9ef91a67 1147 }
f141eafe 1148
1e5b9d2f 1149 return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
9ef91a67 1150 cb, opaque, type);
83f64091
FB
1151}
1152
1b3abdcc
ML
1153static void raw_aio_plug(BlockDriverState *bs)
1154{
1155#ifdef CONFIG_LINUX_AIO
1156 BDRVRawState *s = bs->opaque;
1157 if (s->use_aio) {
1158 laio_io_plug(bs, s->aio_ctx);
1159 }
1160#endif
1161}
1162
1163static void raw_aio_unplug(BlockDriverState *bs)
1164{
1165#ifdef CONFIG_LINUX_AIO
1166 BDRVRawState *s = bs->opaque;
1167 if (s->use_aio) {
1168 laio_io_unplug(bs, s->aio_ctx, true);
1169 }
1170#endif
1171}
1172
1173static void raw_aio_flush_io_queue(BlockDriverState *bs)
1174{
1175#ifdef CONFIG_LINUX_AIO
1176 BDRVRawState *s = bs->opaque;
1177 if (s->use_aio) {
1178 laio_io_unplug(bs, s->aio_ctx, false);
1179 }
1180#endif
1181}
1182
7c84b1b8 1183static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
f141eafe 1184 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1185 BlockCompletionFunc *cb, void *opaque)
83f64091 1186{
9ef91a67
CH
1187 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
1188 cb, opaque, QEMU_AIO_READ);
83f64091
FB
1189}
1190
7c84b1b8 1191static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
f141eafe 1192 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
097310b5 1193 BlockCompletionFunc *cb, void *opaque)
83f64091 1194{
9ef91a67
CH
1195 return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
1196 cb, opaque, QEMU_AIO_WRITE);
83f64091 1197}
53538725 1198
7c84b1b8 1199static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
097310b5 1200 BlockCompletionFunc *cb, void *opaque)
b2e12bc6
CH
1201{
1202 BDRVRawState *s = bs->opaque;
1203
1204 if (fd_open(bs) < 0)
1205 return NULL;
1206
1e5b9d2f 1207 return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
b2e12bc6
CH
1208}
1209
83f64091
FB
1210static void raw_close(BlockDriverState *bs)
1211{
1212 BDRVRawState *s = bs->opaque;
c2f3426c
SH
1213
1214 raw_detach_aio_context(bs);
1215
abd269b7
SH
1216#ifdef CONFIG_LINUX_AIO
1217 if (s->use_aio) {
1218 laio_cleanup(s->aio_ctx);
1219 }
1220#endif
19cb3738 1221 if (s->fd >= 0) {
2e1e79da 1222 qemu_close(s->fd);
19cb3738
FB
1223 s->fd = -1;
1224 }
83f64091
FB
1225}
1226
1227static int raw_truncate(BlockDriverState *bs, int64_t offset)
1228{
1229 BDRVRawState *s = bs->opaque;
55b949c8
CH
1230 struct stat st;
1231
1232 if (fstat(s->fd, &st)) {
83f64091 1233 return -errno;
55b949c8
CH
1234 }
1235
1236 if (S_ISREG(st.st_mode)) {
1237 if (ftruncate(s->fd, offset) < 0) {
1238 return -errno;
1239 }
1240 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1241 if (offset > raw_getlength(bs)) {
1242 return -EINVAL;
1243 }
1244 } else {
1245 return -ENOTSUP;
1246 }
1247
83f64091
FB
1248 return 0;
1249}
1250
128ab2ff
BS
1251#ifdef __OpenBSD__
1252static int64_t raw_getlength(BlockDriverState *bs)
1253{
1254 BDRVRawState *s = bs->opaque;
1255 int fd = s->fd;
1256 struct stat st;
1257
1258 if (fstat(fd, &st))
aa729704 1259 return -errno;
128ab2ff
BS
1260 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1261 struct disklabel dl;
1262
1263 if (ioctl(fd, DIOCGDINFO, &dl))
aa729704 1264 return -errno;
128ab2ff
BS
1265 return (uint64_t)dl.d_secsize *
1266 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
1267 } else
1268 return st.st_size;
1269}
d1f6fd8d
CE
1270#elif defined(__NetBSD__)
1271static int64_t raw_getlength(BlockDriverState *bs)
1272{
1273 BDRVRawState *s = bs->opaque;
1274 int fd = s->fd;
1275 struct stat st;
1276
1277 if (fstat(fd, &st))
aa729704 1278 return -errno;
d1f6fd8d
CE
1279 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
1280 struct dkwedge_info dkw;
1281
1282 if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
1283 return dkw.dkw_size * 512;
1284 } else {
1285 struct disklabel dl;
1286
1287 if (ioctl(fd, DIOCGDINFO, &dl))
aa729704 1288 return -errno;
d1f6fd8d
CE
1289 return (uint64_t)dl.d_secsize *
1290 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
1291 }
1292 } else
1293 return st.st_size;
1294}
50779cc2
CH
1295#elif defined(__sun__)
1296static int64_t raw_getlength(BlockDriverState *bs)
1297{
1298 BDRVRawState *s = bs->opaque;
1299 struct dk_minfo minfo;
1300 int ret;
aa729704 1301 int64_t size;
50779cc2
CH
1302
1303 ret = fd_open(bs);
1304 if (ret < 0) {
1305 return ret;
1306 }
1307
1308 /*
1309 * Use the DKIOCGMEDIAINFO ioctl to read the size.
1310 */
1311 ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
1312 if (ret != -1) {
1313 return minfo.dki_lbsize * minfo.dki_capacity;
1314 }
1315
1316 /*
1317 * There are reports that lseek on some devices fails, but
1318 * irc discussion said that contingency on contingency was overkill.
1319 */
aa729704
MA
1320 size = lseek(s->fd, 0, SEEK_END);
1321 if (size < 0) {
1322 return -errno;
1323 }
1324 return size;
50779cc2
CH
1325}
1326#elif defined(CONFIG_BSD)
1327static int64_t raw_getlength(BlockDriverState *bs)
83f64091
FB
1328{
1329 BDRVRawState *s = bs->opaque;
1330 int fd = s->fd;
1331 int64_t size;
83f64091 1332 struct stat sb;
a167ba50 1333#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a 1334 int reopened = 0;
83f64091 1335#endif
19cb3738
FB
1336 int ret;
1337
1338 ret = fd_open(bs);
1339 if (ret < 0)
1340 return ret;
83f64091 1341
a167ba50 1342#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1343again:
1344#endif
83f64091
FB
1345 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
1346#ifdef DIOCGMEDIASIZE
1347 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
c5e97233
BS
1348#elif defined(DIOCGPART)
1349 {
1350 struct partinfo pi;
1351 if (ioctl(fd, DIOCGPART, &pi) == 0)
1352 size = pi.media_size;
1353 else
1354 size = 0;
1355 }
1356 if (size == 0)
83f64091 1357#endif
83affaa6 1358#if defined(__APPLE__) && defined(__MACH__)
675036e4 1359 size = LLONG_MAX;
83f64091
FB
1360#else
1361 size = lseek(fd, 0LL, SEEK_END);
aa729704
MA
1362 if (size < 0) {
1363 return -errno;
1364 }
9f23011a 1365#endif
a167ba50 1366#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
1367 switch(s->type) {
1368 case FTYPE_CD:
1369 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
1370 if (size == 2048LL * (unsigned)-1)
1371 size = 0;
1372 /* XXX no disc? maybe we need to reopen... */
f3a5d3f8 1373 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
9f23011a
BS
1374 reopened = 1;
1375 goto again;
1376 }
1377 }
83f64091 1378#endif
50779cc2 1379 } else {
83f64091 1380 size = lseek(fd, 0, SEEK_END);
aa729704
MA
1381 if (size < 0) {
1382 return -errno;
1383 }
83f64091 1384 }
83f64091
FB
1385 return size;
1386}
50779cc2
CH
1387#else
1388static int64_t raw_getlength(BlockDriverState *bs)
1389{
1390 BDRVRawState *s = bs->opaque;
1391 int ret;
aa729704 1392 int64_t size;
50779cc2
CH
1393
1394 ret = fd_open(bs);
1395 if (ret < 0) {
1396 return ret;
1397 }
1398
aa729704
MA
1399 size = lseek(s->fd, 0, SEEK_END);
1400 if (size < 0) {
1401 return -errno;
1402 }
1403 return size;
50779cc2 1404}
128ab2ff 1405#endif
83f64091 1406
4a1d5e1f
FZ
1407static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
1408{
1409 struct stat st;
1410 BDRVRawState *s = bs->opaque;
1411
1412 if (fstat(s->fd, &st) < 0) {
1413 return -errno;
1414 }
1415 return (int64_t)st.st_blocks * 512;
1416}
1417
6f482f74 1418static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
83f64091
FB
1419{
1420 int fd;
1e37d059 1421 int result = 0;
0e7e1989 1422 int64_t total_size = 0;
4ab15590 1423 bool nocow = false;
06247428
HT
1424 PreallocMode prealloc;
1425 char *buf = NULL;
1426 Error *local_err = NULL;
83f64091 1427
464d9f64
HR
1428 strstart(filename, "file:", &filename);
1429
0e7e1989 1430 /* Read out options */
180e9526
HT
1431 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1432 BDRV_SECTOR_SIZE);
4ab15590 1433 nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
06247428
HT
1434 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
1435 prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
1436 PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
1437 &local_err);
1438 g_free(buf);
1439 if (local_err) {
1440 error_propagate(errp, local_err);
1441 result = -EINVAL;
1442 goto out;
1443 }
83f64091 1444
6165f4d8
CB
1445 fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
1446 0644);
1e37d059
SW
1447 if (fd < 0) {
1448 result = -errno;
e428e439 1449 error_setg_errno(errp, -result, "Could not create file");
06247428
HT
1450 goto out;
1451 }
1452
1453 if (nocow) {
4ab15590 1454#ifdef __linux__
06247428
HT
1455 /* Set NOCOW flag to solve performance issue on fs like btrfs.
1456 * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
1457 * will be ignored since any failure of this operation should not
1458 * block the left work.
1459 */
1460 int attr;
1461 if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
1462 attr |= FS_NOCOW_FL;
1463 ioctl(fd, FS_IOC_SETFLAGS, &attr);
4ab15590 1464 }
06247428
HT
1465#endif
1466 }
1467
1468 if (ftruncate(fd, total_size) != 0) {
1469 result = -errno;
1470 error_setg_errno(errp, -result, "Could not resize file");
1471 goto out_close;
1472 }
4ab15590 1473
ed911435
KW
1474 switch (prealloc) {
1475#ifdef CONFIG_POSIX_FALLOCATE
1476 case PREALLOC_MODE_FALLOC:
06247428
HT
1477 /* posix_fallocate() doesn't set errno. */
1478 result = -posix_fallocate(fd, 0, total_size);
1479 if (result != 0) {
1480 error_setg_errno(errp, -result,
1481 "Could not preallocate data for the new file");
1e37d059 1482 }
ed911435
KW
1483 break;
1484#endif
1485 case PREALLOC_MODE_FULL:
1486 {
06247428 1487 int64_t num = 0, left = total_size;
ed911435 1488 buf = g_malloc0(65536);
06247428
HT
1489
1490 while (left > 0) {
1491 num = MIN(left, 65536);
1492 result = write(fd, buf, num);
1493 if (result < 0) {
1494 result = -errno;
1495 error_setg_errno(errp, -result,
1496 "Could not write to the new file");
1497 break;
1498 }
39411cf3 1499 left -= result;
1e37d059 1500 }
731de380 1501 if (result >= 0) {
098ffa66
HR
1502 result = fsync(fd);
1503 if (result < 0) {
1504 result = -errno;
1505 error_setg_errno(errp, -result,
1506 "Could not flush new file to disk");
1507 }
731de380 1508 }
06247428 1509 g_free(buf);
ed911435
KW
1510 break;
1511 }
1512 case PREALLOC_MODE_OFF:
1513 break;
1514 default:
06247428
HT
1515 result = -EINVAL;
1516 error_setg(errp, "Unsupported preallocation mode: %s",
1517 PreallocMode_lookup[prealloc]);
ed911435 1518 break;
1e37d059 1519 }
06247428
HT
1520
1521out_close:
1522 if (qemu_close(fd) != 0 && result == 0) {
1523 result = -errno;
1524 error_setg_errno(errp, -result, "Could not close the new file");
1525 }
1526out:
1e37d059 1527 return result;
83f64091
FB
1528}
1529
d1f06fe6
MA
1530/*
1531 * Find allocation range in @bs around offset @start.
1532 * May change underlying file descriptor's file offset.
1533 * If @start is not in a hole, store @start in @data, and the
1534 * beginning of the next hole in @hole, and return 0.
1535 * If @start is in a non-trailing hole, store @start in @hole and the
1536 * beginning of the next non-hole in @data, and return 0.
1537 * If @start is in a trailing hole or beyond EOF, return -ENXIO.
1538 * If we can't find out, return a negative errno other than -ENXIO.
1539 */
1540static int find_allocation(BlockDriverState *bs, off_t start,
1541 off_t *data, off_t *hole)
4f11aa8a
HR
1542{
1543#if defined SEEK_HOLE && defined SEEK_DATA
94282e71 1544 BDRVRawState *s = bs->opaque;
d1f06fe6 1545 off_t offs;
94282e71 1546
d1f06fe6
MA
1547 /*
1548 * SEEK_DATA cases:
1549 * D1. offs == start: start is in data
1550 * D2. offs > start: start is in a hole, next data at offs
1551 * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
1552 * or start is beyond EOF
1553 * If the latter happens, the file has been truncated behind
1554 * our back since we opened it. All bets are off then.
1555 * Treating like a trailing hole is simplest.
1556 * D4. offs < 0, errno != ENXIO: we learned nothing
1557 */
1558 offs = lseek(s->fd, start, SEEK_DATA);
1559 if (offs < 0) {
1560 return -errno; /* D3 or D4 */
1561 }
1562 assert(offs >= start);
1563
1564 if (offs > start) {
1565 /* D2: in hole, next data at offs */
1566 *hole = start;
1567 *data = offs;
1568 return 0;
5500316d
PB
1569 }
1570
d1f06fe6
MA
1571 /* D1: in data, end not yet known */
1572
1573 /*
1574 * SEEK_HOLE cases:
1575 * H1. offs == start: start is in a hole
1576 * If this happens here, a hole has been dug behind our back
1577 * since the previous lseek().
1578 * H2. offs > start: either start is in data, next hole at offs,
1579 * or start is in trailing hole, EOF at offs
1580 * Linux treats trailing holes like any other hole: offs ==
1581 * start. Solaris seeks to EOF instead: offs > start (blech).
1582 * If that happens here, a hole has been dug behind our back
1583 * since the previous lseek().
1584 * H3. offs < 0, errno = ENXIO: start is beyond EOF
1585 * If this happens, the file has been truncated behind our
1586 * back since we opened it. Treat it like a trailing hole.
1587 * H4. offs < 0, errno != ENXIO: we learned nothing
1588 * Pretend we know nothing at all, i.e. "forget" about D1.
1589 */
1590 offs = lseek(s->fd, start, SEEK_HOLE);
1591 if (offs < 0) {
1592 return -errno; /* D1 and (H3 or H4) */
1593 }
1594 assert(offs >= start);
1595
1596 if (offs > start) {
1597 /*
1598 * D1 and H2: either in data, next hole at offs, or it was in
1599 * data but is now in a trailing hole. In the latter case,
1600 * all bets are off. Treating it as if it there was data all
1601 * the way to EOF is safe, so simply do that.
1602 */
4f11aa8a 1603 *data = start;
d1f06fe6
MA
1604 *hole = offs;
1605 return 0;
5500316d 1606 }
4f11aa8a 1607
d1f06fe6
MA
1608 /* D1 and H1 */
1609 return -EBUSY;
5500316d 1610#else
4f11aa8a 1611 return -ENOTSUP;
5500316d 1612#endif
4f11aa8a
HR
1613}
1614
1615/*
be2ebc6d 1616 * Returns the allocation status of the specified sectors.
4f11aa8a
HR
1617 *
1618 * If 'sector_num' is beyond the end of the disk image the return value is 0
1619 * and 'pnum' is set to 0.
1620 *
1621 * 'pnum' is set to the number of sectors (including and immediately following
1622 * the specified sector) that are known to be in the same
1623 * allocated/unallocated state.
1624 *
1625 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
1626 * beyond the end of the disk image it will be clamped.
1627 */
1628static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
1629 int64_t sector_num,
1630 int nb_sectors, int *pnum)
1631{
1632 off_t start, data = 0, hole = 0;
e6d7ec32 1633 int64_t total_size;
d7f62751 1634 int ret;
4f11aa8a
HR
1635
1636 ret = fd_open(bs);
1637 if (ret < 0) {
1638 return ret;
1639 }
1640
1641 start = sector_num * BDRV_SECTOR_SIZE;
e6d7ec32
HR
1642 total_size = bdrv_getlength(bs);
1643 if (total_size < 0) {
1644 return total_size;
1645 } else if (start >= total_size) {
1646 *pnum = 0;
1647 return 0;
1648 } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
1649 nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
1650 }
4f11aa8a 1651
d1f06fe6
MA
1652 ret = find_allocation(bs, start, &data, &hole);
1653 if (ret == -ENXIO) {
1654 /* Trailing hole */
1655 *pnum = nb_sectors;
1656 ret = BDRV_BLOCK_ZERO;
1657 } else if (ret < 0) {
1658 /* No info available, so pretend there are no holes */
1659 *pnum = nb_sectors;
1660 ret = BDRV_BLOCK_DATA;
1661 } else if (data == start) {
5500316d
PB
1662 /* On a data extent, compute sectors to the end of the extent. */
1663 *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
d1f06fe6 1664 ret = BDRV_BLOCK_DATA;
5500316d
PB
1665 } else {
1666 /* On a hole, compute sectors to the beginning of the next extent. */
d1f06fe6 1667 assert(hole == start);
5500316d 1668 *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
d1f06fe6 1669 ret = BDRV_BLOCK_ZERO;
5500316d 1670 }
d1f06fe6 1671 return ret | BDRV_BLOCK_OFFSET_VALID | start;
5500316d
PB
1672}
1673
7c84b1b8 1674static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
8238010b 1675 int64_t sector_num, int nb_sectors,
097310b5 1676 BlockCompletionFunc *cb, void *opaque)
dce512de 1677{
dce512de
CH
1678 BDRVRawState *s = bs->opaque;
1679
8238010b
PB
1680 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
1681 cb, opaque, QEMU_AIO_DISCARD);
dce512de 1682}
0e7e1989 1683
260a82e5
PB
1684static int coroutine_fn raw_co_write_zeroes(
1685 BlockDriverState *bs, int64_t sector_num,
1686 int nb_sectors, BdrvRequestFlags flags)
1687{
1688 BDRVRawState *s = bs->opaque;
1689
1690 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
97a2ae34
PB
1691 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1692 QEMU_AIO_WRITE_ZEROES);
1693 } else if (s->discard_zeroes) {
1694 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
1695 QEMU_AIO_DISCARD);
260a82e5 1696 }
97a2ae34 1697 return -ENOTSUP;
260a82e5
PB
1698}
1699
1700static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1701{
1702 BDRVRawState *s = bs->opaque;
1703
1704 bdi->unallocated_blocks_are_zero = s->discard_zeroes;
1705 bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
1706 return 0;
1707}
1708
6f482f74
CL
1709static QemuOptsList raw_create_opts = {
1710 .name = "raw-create-opts",
1711 .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
1712 .desc = {
1713 {
1714 .name = BLOCK_OPT_SIZE,
1715 .type = QEMU_OPT_SIZE,
1716 .help = "Virtual disk size"
1717 },
4ab15590
CL
1718 {
1719 .name = BLOCK_OPT_NOCOW,
1720 .type = QEMU_OPT_BOOL,
1721 .help = "Turn off copy-on-write (valid only on btrfs)"
1722 },
06247428
HT
1723 {
1724 .name = BLOCK_OPT_PREALLOC,
1725 .type = QEMU_OPT_STRING,
1726 .help = "Preallocation mode (allowed values: off, falloc, full)"
1727 },
6f482f74
CL
1728 { /* end of list */ }
1729 }
0e7e1989
KW
1730};
1731
5f535a94 1732BlockDriver bdrv_file = {
84a12e66
CH
1733 .format_name = "file",
1734 .protocol_name = "file",
856ae5c3 1735 .instance_size = sizeof(BDRVRawState),
030be321 1736 .bdrv_needs_filename = true,
856ae5c3 1737 .bdrv_probe = NULL, /* no probe for protocols */
078896a9 1738 .bdrv_parse_filename = raw_parse_filename,
66f82cee 1739 .bdrv_file_open = raw_open,
eeb6b45d
JC
1740 .bdrv_reopen_prepare = raw_reopen_prepare,
1741 .bdrv_reopen_commit = raw_reopen_commit,
1742 .bdrv_reopen_abort = raw_reopen_abort,
856ae5c3 1743 .bdrv_close = raw_close,
c282e1fd 1744 .bdrv_create = raw_create,
3ac21627 1745 .bdrv_has_zero_init = bdrv_has_zero_init_1,
b6b8a333 1746 .bdrv_co_get_block_status = raw_co_get_block_status,
260a82e5 1747 .bdrv_co_write_zeroes = raw_co_write_zeroes,
3b46e624 1748
f141eafe
AL
1749 .bdrv_aio_readv = raw_aio_readv,
1750 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 1751 .bdrv_aio_flush = raw_aio_flush,
8238010b 1752 .bdrv_aio_discard = raw_aio_discard,
c25f53b0 1753 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
1754 .bdrv_io_plug = raw_aio_plug,
1755 .bdrv_io_unplug = raw_aio_unplug,
1756 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
3c529d93 1757
83f64091
FB
1758 .bdrv_truncate = raw_truncate,
1759 .bdrv_getlength = raw_getlength,
260a82e5 1760 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
1761 .bdrv_get_allocated_file_size
1762 = raw_get_allocated_file_size,
0e7e1989 1763
c2f3426c
SH
1764 .bdrv_detach_aio_context = raw_detach_aio_context,
1765 .bdrv_attach_aio_context = raw_attach_aio_context,
1766
6f482f74 1767 .create_opts = &raw_create_opts,
83f64091
FB
1768};
1769
19cb3738
FB
1770/***********************************************/
1771/* host device */
1772
83affaa6 1773#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1774static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
1775static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
1776
1777kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
1778{
5fafdf24 1779 kern_return_t kernResult;
19cb3738
FB
1780 mach_port_t masterPort;
1781 CFMutableDictionaryRef classesToMatch;
1782
1783 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
1784 if ( KERN_SUCCESS != kernResult ) {
1785 printf( "IOMasterPort returned %d\n", kernResult );
1786 }
3b46e624 1787
5fafdf24 1788 classesToMatch = IOServiceMatching( kIOCDMediaClass );
19cb3738
FB
1789 if ( classesToMatch == NULL ) {
1790 printf( "IOServiceMatching returned a NULL dictionary.\n" );
1791 } else {
1792 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
1793 }
1794 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
1795 if ( KERN_SUCCESS != kernResult )
1796 {
1797 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
1798 }
3b46e624 1799
19cb3738
FB
1800 return kernResult;
1801}
1802
1803kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
1804{
1805 io_object_t nextMedia;
1806 kern_return_t kernResult = KERN_FAILURE;
1807 *bsdPath = '\0';
1808 nextMedia = IOIteratorNext( mediaIterator );
1809 if ( nextMedia )
1810 {
1811 CFTypeRef bsdPathAsCFString;
1812 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
1813 if ( bsdPathAsCFString ) {
1814 size_t devPathLength;
1815 strcpy( bsdPath, _PATH_DEV );
1816 strcat( bsdPath, "r" );
1817 devPathLength = strlen( bsdPath );
1818 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
1819 kernResult = KERN_SUCCESS;
1820 }
1821 CFRelease( bsdPathAsCFString );
1822 }
1823 IOObjectRelease( nextMedia );
1824 }
3b46e624 1825
19cb3738
FB
1826 return kernResult;
1827}
1828
1829#endif
1830
508c7cb3
CH
1831static int hdev_probe_device(const char *filename)
1832{
1833 struct stat st;
1834
1835 /* allow a dedicated CD-ROM driver to match with a higher priority */
1836 if (strstart(filename, "/dev/cdrom", NULL))
1837 return 50;
1838
1839 if (stat(filename, &st) >= 0 &&
1840 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
1841 return 100;
1842 }
1843
1844 return 0;
1845}
1846
da888d37
SH
1847static int check_hdev_writable(BDRVRawState *s)
1848{
1849#if defined(BLKROGET)
1850 /* Linux block devices can be configured "read-only" using blockdev(8).
1851 * This is independent of device node permissions and therefore open(2)
1852 * with O_RDWR succeeds. Actual writes fail with EPERM.
1853 *
1854 * bdrv_open() is supposed to fail if the disk is read-only. Explicitly
1855 * check for read-only block devices so that Linux block devices behave
1856 * properly.
1857 */
1858 struct stat st;
1859 int readonly = 0;
1860
1861 if (fstat(s->fd, &st)) {
1862 return -errno;
1863 }
1864
1865 if (!S_ISBLK(st.st_mode)) {
1866 return 0;
1867 }
1868
1869 if (ioctl(s->fd, BLKROGET, &readonly) < 0) {
1870 return -errno;
1871 }
1872
1873 if (readonly) {
1874 return -EACCES;
1875 }
1876#endif /* defined(BLKROGET) */
1877 return 0;
1878}
1879
7af803d4
HR
1880static void hdev_parse_filename(const char *filename, QDict *options,
1881 Error **errp)
1882{
1883 /* The prefix is optional, just as for "file". */
1884 strstart(filename, "host_device:", &filename);
1885
1886 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
1887}
1888
015a1036
HR
1889static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
1890 Error **errp)
19cb3738
FB
1891{
1892 BDRVRawState *s = bs->opaque;
e428e439 1893 Error *local_err = NULL;
da888d37 1894 int ret;
c66a6157 1895 const char *filename = qdict_get_str(options, "filename");
a76bab49 1896
83affaa6 1897#if defined(__APPLE__) && defined(__MACH__)
19cb3738
FB
1898 if (strstart(filename, "/dev/cdrom", NULL)) {
1899 kern_return_t kernResult;
1900 io_iterator_t mediaIterator;
1901 char bsdPath[ MAXPATHLEN ];
1902 int fd;
5fafdf24 1903
19cb3738
FB
1904 kernResult = FindEjectableCDMedia( &mediaIterator );
1905 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
3b46e624 1906
19cb3738
FB
1907 if ( bsdPath[ 0 ] != '\0' ) {
1908 strcat(bsdPath,"s0");
1909 /* some CDs don't have a partition 0 */
6165f4d8 1910 fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
19cb3738
FB
1911 if (fd < 0) {
1912 bsdPath[strlen(bsdPath)-1] = '1';
1913 } else {
2e1e79da 1914 qemu_close(fd);
19cb3738
FB
1915 }
1916 filename = bsdPath;
a5c5ea3f 1917 qdict_put(options, "filename", qstring_from_str(filename));
19cb3738 1918 }
3b46e624 1919
19cb3738
FB
1920 if ( mediaIterator )
1921 IOObjectRelease( mediaIterator );
1922 }
1923#endif
19cb3738
FB
1924
1925 s->type = FTYPE_FILE;
4dd75c70 1926#if defined(__linux__)
05acda4d
BK
1927 {
1928 char resolved_path[ MAXPATHLEN ], *temp;
1929
1930 temp = realpath(filename, resolved_path);
1931 if (temp && strstart(temp, "/dev/sg", NULL)) {
1932 bs->sg = 1;
1933 }
19cb3738
FB
1934 }
1935#endif
90babde0 1936
e428e439 1937 ret = raw_open_common(bs, options, flags, 0, &local_err);
da888d37 1938 if (ret < 0) {
84d18f06 1939 if (local_err) {
e428e439
HR
1940 error_propagate(errp, local_err);
1941 }
da888d37
SH
1942 return ret;
1943 }
1944
1945 if (flags & BDRV_O_RDWR) {
1946 ret = check_hdev_writable(s);
1947 if (ret < 0) {
1948 raw_close(bs);
e428e439 1949 error_setg_errno(errp, -ret, "The device is not writable");
da888d37
SH
1950 return ret;
1951 }
1952 }
1953
1954 return ret;
19cb3738
FB
1955}
1956
03ff3ca3 1957#if defined(__linux__)
19cb3738
FB
1958/* Note: we do not have a reliable method to detect if the floppy is
1959 present. The current method is to try to open the floppy at every
1960 I/O and to keep it opened during a few hundreds of ms. */
1961static int fd_open(BlockDriverState *bs)
1962{
1963 BDRVRawState *s = bs->opaque;
1964 int last_media_present;
1965
1966 if (s->type != FTYPE_FD)
1967 return 0;
1968 last_media_present = (s->fd >= 0);
5fafdf24 1969 if (s->fd >= 0 &&
a56ebc6b 1970 (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
2e1e79da 1971 qemu_close(s->fd);
19cb3738
FB
1972 s->fd = -1;
1973#ifdef DEBUG_FLOPPY
1974 printf("Floppy closed\n");
1975#endif
1976 }
1977 if (s->fd < 0) {
5fafdf24 1978 if (s->fd_got_error &&
a56ebc6b 1979 (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
19cb3738
FB
1980#ifdef DEBUG_FLOPPY
1981 printf("No floppy (open delayed)\n");
1982#endif
1983 return -EIO;
1984 }
6165f4d8 1985 s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
19cb3738 1986 if (s->fd < 0) {
a56ebc6b 1987 s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
19cb3738
FB
1988 s->fd_got_error = 1;
1989 if (last_media_present)
1990 s->fd_media_changed = 1;
1991#ifdef DEBUG_FLOPPY
1992 printf("No floppy\n");
1993#endif
1994 return -EIO;
1995 }
1996#ifdef DEBUG_FLOPPY
1997 printf("Floppy opened\n");
1998#endif
1999 }
2000 if (!last_media_present)
2001 s->fd_media_changed = 1;
a56ebc6b 2002 s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
19cb3738
FB
2003 s->fd_got_error = 0;
2004 return 0;
2005}
19cb3738 2006
63ec93db 2007static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
985a03b0
TS
2008{
2009 BDRVRawState *s = bs->opaque;
2010
2011 return ioctl(s->fd, req, buf);
2012}
221f715d 2013
7c84b1b8 2014static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
221f715d 2015 unsigned long int req, void *buf,
097310b5 2016 BlockCompletionFunc *cb, void *opaque)
221f715d 2017{
f141eafe 2018 BDRVRawState *s = bs->opaque;
c208e8c2 2019 RawPosixAIOData *acb;
c4d9d196 2020 ThreadPool *pool;
221f715d 2021
f141eafe
AL
2022 if (fd_open(bs) < 0)
2023 return NULL;
c208e8c2
PB
2024
2025 acb = g_slice_new(RawPosixAIOData);
2026 acb->bs = bs;
2027 acb->aio_type = QEMU_AIO_IOCTL;
2028 acb->aio_fildes = s->fd;
2029 acb->aio_offset = 0;
2030 acb->aio_ioctl_buf = buf;
2031 acb->aio_ioctl_cmd = req;
c4d9d196
SH
2032 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
2033 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
221f715d
AL
2034}
2035
a167ba50 2036#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
9f23011a
BS
2037static int fd_open(BlockDriverState *bs)
2038{
2039 BDRVRawState *s = bs->opaque;
2040
2041 /* this is just to ensure s->fd is sane (its called by io ops) */
2042 if (s->fd >= 0)
2043 return 0;
2044 return -EIO;
2045}
9f23011a 2046#else /* !linux && !FreeBSD */
19cb3738 2047
08af02e2
AL
2048static int fd_open(BlockDriverState *bs)
2049{
2050 return 0;
2051}
2052
221f715d 2053#endif /* !linux && !FreeBSD */
04eeb8b6 2054
7c84b1b8 2055static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
c36dd8a0 2056 int64_t sector_num, int nb_sectors,
097310b5 2057 BlockCompletionFunc *cb, void *opaque)
c36dd8a0
AF
2058{
2059 BDRVRawState *s = bs->opaque;
2060
2061 if (fd_open(bs) < 0) {
2062 return NULL;
2063 }
2064 return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
2065 cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
2066}
2067
d0b4503e
PB
2068static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
2069 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
2070{
2071 BDRVRawState *s = bs->opaque;
2072 int rc;
2073
2074 rc = fd_open(bs);
2075 if (rc < 0) {
2076 return rc;
2077 }
2078 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
97a2ae34
PB
2079 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
2080 QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
2081 } else if (s->discard_zeroes) {
2082 return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
2083 QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
d0b4503e 2084 }
97a2ae34 2085 return -ENOTSUP;
d0b4503e
PB
2086}
2087
6f482f74 2088static int hdev_create(const char *filename, QemuOpts *opts,
d5124c00 2089 Error **errp)
93c65b47
AL
2090{
2091 int fd;
2092 int ret = 0;
2093 struct stat stat_buf;
0e7e1989 2094 int64_t total_size = 0;
cc28c6aa
HR
2095 bool has_prefix;
2096
2097 /* This function is used by all three protocol block drivers and therefore
2098 * any of these three prefixes may be given.
2099 * The return value has to be stored somewhere, otherwise this is an error
2100 * due to -Werror=unused-value. */
2101 has_prefix =
2102 strstart(filename, "host_device:", &filename) ||
2103 strstart(filename, "host_cdrom:" , &filename) ||
2104 strstart(filename, "host_floppy:", &filename);
2105
2106 (void)has_prefix;
93c65b47 2107
0e7e1989 2108 /* Read out options */
180e9526
HT
2109 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
2110 BDRV_SECTOR_SIZE);
93c65b47 2111
6165f4d8 2112 fd = qemu_open(filename, O_WRONLY | O_BINARY);
e428e439
HR
2113 if (fd < 0) {
2114 ret = -errno;
2115 error_setg_errno(errp, -ret, "Could not open device");
2116 return ret;
2117 }
93c65b47 2118
e428e439 2119 if (fstat(fd, &stat_buf) < 0) {
57e69b7d 2120 ret = -errno;
e428e439
HR
2121 error_setg_errno(errp, -ret, "Could not stat device");
2122 } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
2123 error_setg(errp,
2124 "The given file is neither a block nor a character device");
57e69b7d 2125 ret = -ENODEV;
180e9526 2126 } else if (lseek(fd, 0, SEEK_END) < total_size) {
e428e439 2127 error_setg(errp, "Device is too small");
93c65b47 2128 ret = -ENOSPC;
e428e439 2129 }
93c65b47 2130
2e1e79da 2131 qemu_close(fd);
93c65b47
AL
2132 return ret;
2133}
2134
5efa9d5a 2135static BlockDriver bdrv_host_device = {
0b4ce02e 2136 .format_name = "host_device",
84a12e66 2137 .protocol_name = "host_device",
0b4ce02e 2138 .instance_size = sizeof(BDRVRawState),
030be321 2139 .bdrv_needs_filename = true,
0b4ce02e 2140 .bdrv_probe_device = hdev_probe_device,
7af803d4 2141 .bdrv_parse_filename = hdev_parse_filename,
66f82cee 2142 .bdrv_file_open = hdev_open,
0b4ce02e 2143 .bdrv_close = raw_close,
1bc6b705
JC
2144 .bdrv_reopen_prepare = raw_reopen_prepare,
2145 .bdrv_reopen_commit = raw_reopen_commit,
2146 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2147 .bdrv_create = hdev_create,
6f482f74 2148 .create_opts = &raw_create_opts,
d0b4503e 2149 .bdrv_co_write_zeroes = hdev_co_write_zeroes,
3b46e624 2150
f141eafe
AL
2151 .bdrv_aio_readv = raw_aio_readv,
2152 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2153 .bdrv_aio_flush = raw_aio_flush,
8238010b 2154 .bdrv_aio_discard = hdev_aio_discard,
c25f53b0 2155 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2156 .bdrv_io_plug = raw_aio_plug,
2157 .bdrv_io_unplug = raw_aio_unplug,
2158 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
3c529d93 2159
55b949c8 2160 .bdrv_truncate = raw_truncate,
e60f469c 2161 .bdrv_getlength = raw_getlength,
260a82e5 2162 .bdrv_get_info = raw_get_info,
4a1d5e1f
FZ
2163 .bdrv_get_allocated_file_size
2164 = raw_get_allocated_file_size,
19cb3738 2165
c2f3426c
SH
2166 .bdrv_detach_aio_context = raw_detach_aio_context,
2167 .bdrv_attach_aio_context = raw_attach_aio_context,
2168
f3a5d3f8 2169 /* generic scsi device */
63ec93db
CH
2170#ifdef __linux__
2171 .bdrv_ioctl = hdev_ioctl,
63ec93db
CH
2172 .bdrv_aio_ioctl = hdev_aio_ioctl,
2173#endif
f3a5d3f8
CH
2174};
2175
2176#ifdef __linux__
d3f49845
HR
2177static void floppy_parse_filename(const char *filename, QDict *options,
2178 Error **errp)
2179{
2180 /* The prefix is optional, just as for "file". */
2181 strstart(filename, "host_floppy:", &filename);
2182
2183 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
2184}
2185
015a1036
HR
2186static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
2187 Error **errp)
f3a5d3f8
CH
2188{
2189 BDRVRawState *s = bs->opaque;
e428e439 2190 Error *local_err = NULL;
f3a5d3f8
CH
2191 int ret;
2192
f3a5d3f8 2193 s->type = FTYPE_FD;
f3a5d3f8 2194
19a3da7f 2195 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
e428e439
HR
2196 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
2197 if (ret) {
84d18f06 2198 if (local_err) {
e428e439
HR
2199 error_propagate(errp, local_err);
2200 }
f3a5d3f8 2201 return ret;
e428e439 2202 }
f3a5d3f8
CH
2203
2204 /* close fd so that we can reopen it as needed */
2e1e79da 2205 qemu_close(s->fd);
f3a5d3f8
CH
2206 s->fd = -1;
2207 s->fd_media_changed = 1;
2208
2209 return 0;
2210}
2211
508c7cb3
CH
2212static int floppy_probe_device(const char *filename)
2213{
2ebf7c4b
CR
2214 int fd, ret;
2215 int prio = 0;
2216 struct floppy_struct fdparam;
343f8568 2217 struct stat st;
2ebf7c4b 2218
e1740828
CB
2219 if (strstart(filename, "/dev/fd", NULL) &&
2220 !strstart(filename, "/dev/fdset/", NULL)) {
2ebf7c4b 2221 prio = 50;
e1740828 2222 }
2ebf7c4b 2223
6165f4d8 2224 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
2ebf7c4b
CR
2225 if (fd < 0) {
2226 goto out;
2227 }
343f8568
JS
2228 ret = fstat(fd, &st);
2229 if (ret == -1 || !S_ISBLK(st.st_mode)) {
2230 goto outc;
2231 }
2ebf7c4b
CR
2232
2233 /* Attempt to detect via a floppy specific ioctl */
2234 ret = ioctl(fd, FDGETPRM, &fdparam);
2235 if (ret >= 0)
2236 prio = 100;
2237
343f8568 2238outc:
2e1e79da 2239 qemu_close(fd);
2ebf7c4b
CR
2240out:
2241 return prio;
508c7cb3
CH
2242}
2243
2244
f3a5d3f8
CH
2245static int floppy_is_inserted(BlockDriverState *bs)
2246{
2247 return fd_open(bs) >= 0;
2248}
2249
2250static int floppy_media_changed(BlockDriverState *bs)
2251{
2252 BDRVRawState *s = bs->opaque;
2253 int ret;
2254
2255 /*
2256 * XXX: we do not have a true media changed indication.
2257 * It does not work if the floppy is changed without trying to read it.
2258 */
2259 fd_open(bs);
2260 ret = s->fd_media_changed;
2261 s->fd_media_changed = 0;
2262#ifdef DEBUG_FLOPPY
2263 printf("Floppy changed=%d\n", ret);
2264#endif
2265 return ret;
2266}
2267
f36f3949 2268static void floppy_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2269{
2270 BDRVRawState *s = bs->opaque;
2271 int fd;
2272
2273 if (s->fd >= 0) {
2e1e79da 2274 qemu_close(s->fd);
f3a5d3f8
CH
2275 s->fd = -1;
2276 }
6165f4d8 2277 fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
f3a5d3f8
CH
2278 if (fd >= 0) {
2279 if (ioctl(fd, FDEJECT, 0) < 0)
2280 perror("FDEJECT");
2e1e79da 2281 qemu_close(fd);
f3a5d3f8 2282 }
f3a5d3f8
CH
2283}
2284
2285static BlockDriver bdrv_host_floppy = {
2286 .format_name = "host_floppy",
84a12e66 2287 .protocol_name = "host_floppy",
f3a5d3f8 2288 .instance_size = sizeof(BDRVRawState),
030be321 2289 .bdrv_needs_filename = true,
508c7cb3 2290 .bdrv_probe_device = floppy_probe_device,
d3f49845 2291 .bdrv_parse_filename = floppy_parse_filename,
66f82cee 2292 .bdrv_file_open = floppy_open,
f3a5d3f8 2293 .bdrv_close = raw_close,
1bc6b705
JC
2294 .bdrv_reopen_prepare = raw_reopen_prepare,
2295 .bdrv_reopen_commit = raw_reopen_commit,
2296 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2297 .bdrv_create = hdev_create,
6f482f74 2298 .create_opts = &raw_create_opts,
f3a5d3f8 2299
f3a5d3f8
CH
2300 .bdrv_aio_readv = raw_aio_readv,
2301 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2302 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2303 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2304 .bdrv_io_plug = raw_aio_plug,
2305 .bdrv_io_unplug = raw_aio_unplug,
2306 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
f3a5d3f8 2307
55b949c8 2308 .bdrv_truncate = raw_truncate,
b94a2610
KW
2309 .bdrv_getlength = raw_getlength,
2310 .has_variable_length = true,
4a1d5e1f
FZ
2311 .bdrv_get_allocated_file_size
2312 = raw_get_allocated_file_size,
f3a5d3f8 2313
c2f3426c
SH
2314 .bdrv_detach_aio_context = raw_detach_aio_context,
2315 .bdrv_attach_aio_context = raw_attach_aio_context,
2316
f3a5d3f8
CH
2317 /* removable device support */
2318 .bdrv_is_inserted = floppy_is_inserted,
2319 .bdrv_media_changed = floppy_media_changed,
2320 .bdrv_eject = floppy_eject,
f3a5d3f8 2321};
18fa1c42 2322#endif
f3a5d3f8 2323
18fa1c42
HR
2324#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
2325static void cdrom_parse_filename(const char *filename, QDict *options,
2326 Error **errp)
2327{
2328 /* The prefix is optional, just as for "file". */
2329 strstart(filename, "host_cdrom:", &filename);
2330
2331 qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
2332}
2333#endif
2334
2335#ifdef __linux__
015a1036
HR
2336static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
2337 Error **errp)
f3a5d3f8
CH
2338{
2339 BDRVRawState *s = bs->opaque;
e428e439
HR
2340 Error *local_err = NULL;
2341 int ret;
f3a5d3f8 2342
f3a5d3f8
CH
2343 s->type = FTYPE_CD;
2344
19a3da7f 2345 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
e428e439 2346 ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
84d18f06 2347 if (local_err) {
e428e439
HR
2348 error_propagate(errp, local_err);
2349 }
2350 return ret;
f3a5d3f8
CH
2351}
2352
508c7cb3
CH
2353static int cdrom_probe_device(const char *filename)
2354{
3baf720e
CR
2355 int fd, ret;
2356 int prio = 0;
343f8568 2357 struct stat st;
3baf720e 2358
6165f4d8 2359 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
3baf720e
CR
2360 if (fd < 0) {
2361 goto out;
2362 }
343f8568
JS
2363 ret = fstat(fd, &st);
2364 if (ret == -1 || !S_ISBLK(st.st_mode)) {
2365 goto outc;
2366 }
3baf720e
CR
2367
2368 /* Attempt to detect via a CDROM specific ioctl */
2369 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
2370 if (ret >= 0)
2371 prio = 100;
2372
343f8568 2373outc:
2e1e79da 2374 qemu_close(fd);
3baf720e
CR
2375out:
2376 return prio;
508c7cb3
CH
2377}
2378
f3a5d3f8
CH
2379static int cdrom_is_inserted(BlockDriverState *bs)
2380{
2381 BDRVRawState *s = bs->opaque;
2382 int ret;
2383
2384 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
2385 if (ret == CDS_DISC_OK)
2386 return 1;
2387 return 0;
2388}
2389
f36f3949 2390static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2391{
2392 BDRVRawState *s = bs->opaque;
2393
2394 if (eject_flag) {
2395 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
2396 perror("CDROMEJECT");
2397 } else {
2398 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
2399 perror("CDROMEJECT");
2400 }
f3a5d3f8
CH
2401}
2402
025e849a 2403static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
2404{
2405 BDRVRawState *s = bs->opaque;
2406
2407 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
2408 /*
2409 * Note: an error can happen if the distribution automatically
2410 * mounts the CD-ROM
2411 */
2412 /* perror("CDROM_LOCKDOOR"); */
2413 }
f3a5d3f8
CH
2414}
2415
2416static BlockDriver bdrv_host_cdrom = {
2417 .format_name = "host_cdrom",
84a12e66 2418 .protocol_name = "host_cdrom",
f3a5d3f8 2419 .instance_size = sizeof(BDRVRawState),
030be321 2420 .bdrv_needs_filename = true,
508c7cb3 2421 .bdrv_probe_device = cdrom_probe_device,
18fa1c42 2422 .bdrv_parse_filename = cdrom_parse_filename,
66f82cee 2423 .bdrv_file_open = cdrom_open,
f3a5d3f8 2424 .bdrv_close = raw_close,
1bc6b705
JC
2425 .bdrv_reopen_prepare = raw_reopen_prepare,
2426 .bdrv_reopen_commit = raw_reopen_commit,
2427 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2428 .bdrv_create = hdev_create,
6f482f74 2429 .create_opts = &raw_create_opts,
f3a5d3f8 2430
f3a5d3f8
CH
2431 .bdrv_aio_readv = raw_aio_readv,
2432 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2433 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2434 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2435 .bdrv_io_plug = raw_aio_plug,
2436 .bdrv_io_unplug = raw_aio_unplug,
2437 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
f3a5d3f8 2438
55b949c8 2439 .bdrv_truncate = raw_truncate,
b94a2610
KW
2440 .bdrv_getlength = raw_getlength,
2441 .has_variable_length = true,
4a1d5e1f
FZ
2442 .bdrv_get_allocated_file_size
2443 = raw_get_allocated_file_size,
f3a5d3f8 2444
c2f3426c
SH
2445 .bdrv_detach_aio_context = raw_detach_aio_context,
2446 .bdrv_attach_aio_context = raw_attach_aio_context,
2447
f3a5d3f8
CH
2448 /* removable device support */
2449 .bdrv_is_inserted = cdrom_is_inserted,
2450 .bdrv_eject = cdrom_eject,
025e849a 2451 .bdrv_lock_medium = cdrom_lock_medium,
f3a5d3f8
CH
2452
2453 /* generic scsi device */
63ec93db 2454 .bdrv_ioctl = hdev_ioctl,
63ec93db 2455 .bdrv_aio_ioctl = hdev_aio_ioctl,
f3a5d3f8
CH
2456};
2457#endif /* __linux__ */
2458
a167ba50 2459#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
511018e4
AT
2460static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
2461 Error **errp)
f3a5d3f8
CH
2462{
2463 BDRVRawState *s = bs->opaque;
e428e439 2464 Error *local_err = NULL;
f3a5d3f8
CH
2465 int ret;
2466
2467 s->type = FTYPE_CD;
2468
e428e439
HR
2469 ret = raw_open_common(bs, options, flags, 0, &local_err);
2470 if (ret) {
84d18f06 2471 if (local_err) {
e428e439
HR
2472 error_propagate(errp, local_err);
2473 }
f3a5d3f8 2474 return ret;
e428e439 2475 }
f3a5d3f8 2476
9b2260cb 2477 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
2478 ioctl(s->fd, CDIOCALLOW);
2479 return 0;
2480}
2481
508c7cb3
CH
2482static int cdrom_probe_device(const char *filename)
2483{
2484 if (strstart(filename, "/dev/cd", NULL) ||
2485 strstart(filename, "/dev/acd", NULL))
2486 return 100;
2487 return 0;
2488}
2489
f3a5d3f8
CH
2490static int cdrom_reopen(BlockDriverState *bs)
2491{
2492 BDRVRawState *s = bs->opaque;
2493 int fd;
2494
2495 /*
2496 * Force reread of possibly changed/newly loaded disc,
2497 * FreeBSD seems to not notice sometimes...
2498 */
2499 if (s->fd >= 0)
2e1e79da 2500 qemu_close(s->fd);
6165f4d8 2501 fd = qemu_open(bs->filename, s->open_flags, 0644);
f3a5d3f8
CH
2502 if (fd < 0) {
2503 s->fd = -1;
2504 return -EIO;
2505 }
2506 s->fd = fd;
2507
9b2260cb 2508 /* make sure the door isn't locked at this time */
f3a5d3f8
CH
2509 ioctl(s->fd, CDIOCALLOW);
2510 return 0;
2511}
2512
2513static int cdrom_is_inserted(BlockDriverState *bs)
2514{
2515 return raw_getlength(bs) > 0;
2516}
2517
f36f3949 2518static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
f3a5d3f8
CH
2519{
2520 BDRVRawState *s = bs->opaque;
2521
2522 if (s->fd < 0)
822e1cd1 2523 return;
f3a5d3f8
CH
2524
2525 (void) ioctl(s->fd, CDIOCALLOW);
2526
2527 if (eject_flag) {
2528 if (ioctl(s->fd, CDIOCEJECT) < 0)
2529 perror("CDIOCEJECT");
2530 } else {
2531 if (ioctl(s->fd, CDIOCCLOSE) < 0)
2532 perror("CDIOCCLOSE");
2533 }
2534
822e1cd1 2535 cdrom_reopen(bs);
f3a5d3f8
CH
2536}
2537
025e849a 2538static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
f3a5d3f8
CH
2539{
2540 BDRVRawState *s = bs->opaque;
2541
2542 if (s->fd < 0)
7bf37fed 2543 return;
f3a5d3f8
CH
2544 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
2545 /*
2546 * Note: an error can happen if the distribution automatically
2547 * mounts the CD-ROM
2548 */
2549 /* perror("CDROM_LOCKDOOR"); */
2550 }
f3a5d3f8
CH
2551}
2552
2553static BlockDriver bdrv_host_cdrom = {
2554 .format_name = "host_cdrom",
84a12e66 2555 .protocol_name = "host_cdrom",
f3a5d3f8 2556 .instance_size = sizeof(BDRVRawState),
030be321 2557 .bdrv_needs_filename = true,
508c7cb3 2558 .bdrv_probe_device = cdrom_probe_device,
18fa1c42 2559 .bdrv_parse_filename = cdrom_parse_filename,
66f82cee 2560 .bdrv_file_open = cdrom_open,
f3a5d3f8 2561 .bdrv_close = raw_close,
1bc6b705
JC
2562 .bdrv_reopen_prepare = raw_reopen_prepare,
2563 .bdrv_reopen_commit = raw_reopen_commit,
2564 .bdrv_reopen_abort = raw_reopen_abort,
c282e1fd 2565 .bdrv_create = hdev_create,
6f482f74 2566 .create_opts = &raw_create_opts,
f3a5d3f8 2567
f3a5d3f8
CH
2568 .bdrv_aio_readv = raw_aio_readv,
2569 .bdrv_aio_writev = raw_aio_writev,
b2e12bc6 2570 .bdrv_aio_flush = raw_aio_flush,
c25f53b0 2571 .bdrv_refresh_limits = raw_refresh_limits,
1b3abdcc
ML
2572 .bdrv_io_plug = raw_aio_plug,
2573 .bdrv_io_unplug = raw_aio_unplug,
2574 .bdrv_flush_io_queue = raw_aio_flush_io_queue,
f3a5d3f8 2575
55b949c8 2576 .bdrv_truncate = raw_truncate,
b94a2610
KW
2577 .bdrv_getlength = raw_getlength,
2578 .has_variable_length = true,
4a1d5e1f
FZ
2579 .bdrv_get_allocated_file_size
2580 = raw_get_allocated_file_size,
f3a5d3f8 2581
c2f3426c
SH
2582 .bdrv_detach_aio_context = raw_detach_aio_context,
2583 .bdrv_attach_aio_context = raw_attach_aio_context,
2584
19cb3738 2585 /* removable device support */
f3a5d3f8
CH
2586 .bdrv_is_inserted = cdrom_is_inserted,
2587 .bdrv_eject = cdrom_eject,
025e849a 2588 .bdrv_lock_medium = cdrom_lock_medium,
19cb3738 2589};
f3a5d3f8 2590#endif /* __FreeBSD__ */
5efa9d5a 2591
84a12e66 2592static void bdrv_file_init(void)
5efa9d5a 2593{
508c7cb3
CH
2594 /*
2595 * Register all the drivers. Note that order is important, the driver
2596 * registered last will get probed first.
2597 */
84a12e66 2598 bdrv_register(&bdrv_file);
5efa9d5a 2599 bdrv_register(&bdrv_host_device);
f3a5d3f8
CH
2600#ifdef __linux__
2601 bdrv_register(&bdrv_host_floppy);
2602 bdrv_register(&bdrv_host_cdrom);
2603#endif
a167ba50 2604#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
f3a5d3f8
CH
2605 bdrv_register(&bdrv_host_cdrom);
2606#endif
5efa9d5a
AL
2607}
2608
84a12e66 2609block_init(bdrv_file_init);