2 * QEMU Enhanced Disk Format
4 * Copyright IBM, Corp. 2010
7 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
8 * Anthony Liguori <aliguori@us.ibm.com>
10 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11 * See the COPYING.LIB file in the top-level directory.
17 static int bdrv_qed_probe(const uint8_t *buf
, int buf_size
,
20 const QEDHeader
*header
= (const QEDHeader
*)buf
;
22 if (buf_size
< sizeof(*header
)) {
25 if (le32_to_cpu(header
->magic
) != QED_MAGIC
) {
32 * Check whether an image format is raw
34 * @fmt: Backing file format, may be NULL
36 static bool qed_fmt_is_raw(const char *fmt
)
38 return fmt
&& strcmp(fmt
, "raw") == 0;
41 static void qed_header_le_to_cpu(const QEDHeader
*le
, QEDHeader
*cpu
)
43 cpu
->magic
= le32_to_cpu(le
->magic
);
44 cpu
->cluster_size
= le32_to_cpu(le
->cluster_size
);
45 cpu
->table_size
= le32_to_cpu(le
->table_size
);
46 cpu
->header_size
= le32_to_cpu(le
->header_size
);
47 cpu
->features
= le64_to_cpu(le
->features
);
48 cpu
->compat_features
= le64_to_cpu(le
->compat_features
);
49 cpu
->autoclear_features
= le64_to_cpu(le
->autoclear_features
);
50 cpu
->l1_table_offset
= le64_to_cpu(le
->l1_table_offset
);
51 cpu
->image_size
= le64_to_cpu(le
->image_size
);
52 cpu
->backing_filename_offset
= le32_to_cpu(le
->backing_filename_offset
);
53 cpu
->backing_filename_size
= le32_to_cpu(le
->backing_filename_size
);
56 static void qed_header_cpu_to_le(const QEDHeader
*cpu
, QEDHeader
*le
)
58 le
->magic
= cpu_to_le32(cpu
->magic
);
59 le
->cluster_size
= cpu_to_le32(cpu
->cluster_size
);
60 le
->table_size
= cpu_to_le32(cpu
->table_size
);
61 le
->header_size
= cpu_to_le32(cpu
->header_size
);
62 le
->features
= cpu_to_le64(cpu
->features
);
63 le
->compat_features
= cpu_to_le64(cpu
->compat_features
);
64 le
->autoclear_features
= cpu_to_le64(cpu
->autoclear_features
);
65 le
->l1_table_offset
= cpu_to_le64(cpu
->l1_table_offset
);
66 le
->image_size
= cpu_to_le64(cpu
->image_size
);
67 le
->backing_filename_offset
= cpu_to_le32(cpu
->backing_filename_offset
);
68 le
->backing_filename_size
= cpu_to_le32(cpu
->backing_filename_size
);
71 static int qed_write_header_sync(BDRVQEDState
*s
)
76 qed_header_cpu_to_le(&s
->header
, &le
);
77 ret
= bdrv_pwrite(s
->bs
->file
, 0, &le
, sizeof(le
));
78 if (ret
!= sizeof(le
)) {
84 static uint64_t qed_max_image_size(uint32_t cluster_size
, uint32_t table_size
)
86 uint64_t table_entries
;
89 table_entries
= (table_size
* cluster_size
) / sizeof(uint64_t);
90 l2_size
= table_entries
* cluster_size
;
92 return l2_size
* table_entries
;
95 static bool qed_is_cluster_size_valid(uint32_t cluster_size
)
97 if (cluster_size
< QED_MIN_CLUSTER_SIZE
||
98 cluster_size
> QED_MAX_CLUSTER_SIZE
) {
101 if (cluster_size
& (cluster_size
- 1)) {
102 return false; /* not power of 2 */
107 static bool qed_is_table_size_valid(uint32_t table_size
)
109 if (table_size
< QED_MIN_TABLE_SIZE
||
110 table_size
> QED_MAX_TABLE_SIZE
) {
113 if (table_size
& (table_size
- 1)) {
114 return false; /* not power of 2 */
119 static bool qed_is_image_size_valid(uint64_t image_size
, uint32_t cluster_size
,
122 if (image_size
% BDRV_SECTOR_SIZE
!= 0) {
123 return false; /* not multiple of sector size */
125 if (image_size
> qed_max_image_size(cluster_size
, table_size
)) {
126 return false; /* image is too large */
132 * Read a string of known length from the image file
135 * @offset: File offset to start of string, in bytes
136 * @n: String length in bytes
137 * @buf: Destination buffer
138 * @buflen: Destination buffer length in bytes
139 * @ret: 0 on success, -errno on failure
141 * The string is NUL-terminated.
143 static int qed_read_string(BlockDriverState
*file
, uint64_t offset
, size_t n
,
144 char *buf
, size_t buflen
)
150 ret
= bdrv_pread(file
, offset
, buf
, n
);
158 static int bdrv_qed_open(BlockDriverState
*bs
, int flags
)
160 BDRVQEDState
*s
= bs
->opaque
;
167 ret
= bdrv_pread(bs
->file
, 0, &le_header
, sizeof(le_header
));
171 ret
= 0; /* ret should always be 0 or -errno */
172 qed_header_le_to_cpu(&le_header
, &s
->header
);
174 if (s
->header
.magic
!= QED_MAGIC
) {
177 if (s
->header
.features
& ~QED_FEATURE_MASK
) {
178 return -ENOTSUP
; /* image uses unsupported feature bits */
180 if (!qed_is_cluster_size_valid(s
->header
.cluster_size
)) {
184 /* Round down file size to the last cluster */
185 file_size
= bdrv_getlength(bs
->file
);
189 s
->file_size
= qed_start_of_cluster(s
, file_size
);
191 if (!qed_is_table_size_valid(s
->header
.table_size
)) {
194 if (!qed_is_image_size_valid(s
->header
.image_size
,
195 s
->header
.cluster_size
,
196 s
->header
.table_size
)) {
199 if (!qed_check_table_offset(s
, s
->header
.l1_table_offset
)) {
203 s
->table_nelems
= (s
->header
.cluster_size
* s
->header
.table_size
) /
205 s
->l2_shift
= ffs(s
->header
.cluster_size
) - 1;
206 s
->l2_mask
= s
->table_nelems
- 1;
207 s
->l1_shift
= s
->l2_shift
+ ffs(s
->table_nelems
) - 1;
209 if ((s
->header
.features
& QED_F_BACKING_FILE
)) {
210 if ((uint64_t)s
->header
.backing_filename_offset
+
211 s
->header
.backing_filename_size
>
212 s
->header
.cluster_size
* s
->header
.header_size
) {
216 ret
= qed_read_string(bs
->file
, s
->header
.backing_filename_offset
,
217 s
->header
.backing_filename_size
, bs
->backing_file
,
218 sizeof(bs
->backing_file
));
223 if (s
->header
.features
& QED_F_BACKING_FORMAT_NO_PROBE
) {
224 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
), "raw");
228 /* Reset unknown autoclear feature bits. This is a backwards
229 * compatibility mechanism that allows images to be opened by older
230 * programs, which "knock out" unknown feature bits. When an image is
231 * opened by a newer program again it can detect that the autoclear
232 * feature is no longer valid.
234 if ((s
->header
.autoclear_features
& ~QED_AUTOCLEAR_FEATURE_MASK
) != 0 &&
235 !bdrv_is_read_only(bs
->file
)) {
236 s
->header
.autoclear_features
&= QED_AUTOCLEAR_FEATURE_MASK
;
238 ret
= qed_write_header_sync(s
);
243 /* From here on only known autoclear feature bits are valid */
244 bdrv_flush(bs
->file
);
250 static void bdrv_qed_close(BlockDriverState
*bs
)
254 static int bdrv_qed_flush(BlockDriverState
*bs
)
256 return bdrv_flush(bs
->file
);
259 static int qed_create(const char *filename
, uint32_t cluster_size
,
260 uint64_t image_size
, uint32_t table_size
,
261 const char *backing_file
, const char *backing_fmt
)
265 .cluster_size
= cluster_size
,
266 .table_size
= table_size
,
269 .compat_features
= 0,
270 .l1_table_offset
= cluster_size
,
271 .image_size
= image_size
,
274 uint8_t *l1_table
= NULL
;
275 size_t l1_size
= header
.cluster_size
* header
.table_size
;
277 BlockDriverState
*bs
= NULL
;
279 ret
= bdrv_create_file(filename
, NULL
);
284 ret
= bdrv_file_open(&bs
, filename
, BDRV_O_RDWR
| BDRV_O_CACHE_WB
);
290 header
.features
|= QED_F_BACKING_FILE
;
291 header
.backing_filename_offset
= sizeof(le_header
);
292 header
.backing_filename_size
= strlen(backing_file
);
294 if (qed_fmt_is_raw(backing_fmt
)) {
295 header
.features
|= QED_F_BACKING_FORMAT_NO_PROBE
;
299 qed_header_cpu_to_le(&header
, &le_header
);
300 ret
= bdrv_pwrite(bs
, 0, &le_header
, sizeof(le_header
));
304 ret
= bdrv_pwrite(bs
, sizeof(le_header
), backing_file
,
305 header
.backing_filename_size
);
310 l1_table
= qemu_mallocz(l1_size
);
311 ret
= bdrv_pwrite(bs
, header
.l1_table_offset
, l1_table
, l1_size
);
316 ret
= 0; /* success */
323 static int bdrv_qed_create(const char *filename
, QEMUOptionParameter
*options
)
325 uint64_t image_size
= 0;
326 uint32_t cluster_size
= QED_DEFAULT_CLUSTER_SIZE
;
327 uint32_t table_size
= QED_DEFAULT_TABLE_SIZE
;
328 const char *backing_file
= NULL
;
329 const char *backing_fmt
= NULL
;
331 while (options
&& options
->name
) {
332 if (!strcmp(options
->name
, BLOCK_OPT_SIZE
)) {
333 image_size
= options
->value
.n
;
334 } else if (!strcmp(options
->name
, BLOCK_OPT_BACKING_FILE
)) {
335 backing_file
= options
->value
.s
;
336 } else if (!strcmp(options
->name
, BLOCK_OPT_BACKING_FMT
)) {
337 backing_fmt
= options
->value
.s
;
338 } else if (!strcmp(options
->name
, BLOCK_OPT_CLUSTER_SIZE
)) {
339 if (options
->value
.n
) {
340 cluster_size
= options
->value
.n
;
342 } else if (!strcmp(options
->name
, BLOCK_OPT_TABLE_SIZE
)) {
343 if (options
->value
.n
) {
344 table_size
= options
->value
.n
;
350 if (!qed_is_cluster_size_valid(cluster_size
)) {
351 fprintf(stderr
, "QED cluster size must be within range [%u, %u] and power of 2\n",
352 QED_MIN_CLUSTER_SIZE
, QED_MAX_CLUSTER_SIZE
);
355 if (!qed_is_table_size_valid(table_size
)) {
356 fprintf(stderr
, "QED table size must be within range [%u, %u] and power of 2\n",
357 QED_MIN_TABLE_SIZE
, QED_MAX_TABLE_SIZE
);
360 if (!qed_is_image_size_valid(image_size
, cluster_size
, table_size
)) {
361 fprintf(stderr
, "QED image size must be a non-zero multiple of "
362 "cluster size and less than %" PRIu64
" bytes\n",
363 qed_max_image_size(cluster_size
, table_size
));
367 return qed_create(filename
, cluster_size
, image_size
, table_size
,
368 backing_file
, backing_fmt
);
371 static int bdrv_qed_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
372 int nb_sectors
, int *pnum
)
377 static int bdrv_qed_make_empty(BlockDriverState
*bs
)
382 static BlockDriverAIOCB
*bdrv_qed_aio_readv(BlockDriverState
*bs
,
384 QEMUIOVector
*qiov
, int nb_sectors
,
385 BlockDriverCompletionFunc
*cb
,
391 static BlockDriverAIOCB
*bdrv_qed_aio_writev(BlockDriverState
*bs
,
393 QEMUIOVector
*qiov
, int nb_sectors
,
394 BlockDriverCompletionFunc
*cb
,
400 static BlockDriverAIOCB
*bdrv_qed_aio_flush(BlockDriverState
*bs
,
401 BlockDriverCompletionFunc
*cb
,
404 return bdrv_aio_flush(bs
->file
, cb
, opaque
);
407 static int bdrv_qed_truncate(BlockDriverState
*bs
, int64_t offset
)
412 static int64_t bdrv_qed_getlength(BlockDriverState
*bs
)
414 BDRVQEDState
*s
= bs
->opaque
;
415 return s
->header
.image_size
;
418 static int bdrv_qed_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
420 BDRVQEDState
*s
= bs
->opaque
;
422 memset(bdi
, 0, sizeof(*bdi
));
423 bdi
->cluster_size
= s
->header
.cluster_size
;
427 static int bdrv_qed_change_backing_file(BlockDriverState
*bs
,
428 const char *backing_file
,
429 const char *backing_fmt
)
431 BDRVQEDState
*s
= bs
->opaque
;
432 QEDHeader new_header
, le_header
;
434 size_t buffer_len
, backing_file_len
;
437 /* Refuse to set backing filename if unknown compat feature bits are
438 * active. If the image uses an unknown compat feature then we may not
439 * know the layout of data following the header structure and cannot safely
442 if (backing_file
&& (s
->header
.compat_features
&
443 ~QED_COMPAT_FEATURE_MASK
)) {
447 memcpy(&new_header
, &s
->header
, sizeof(new_header
));
449 new_header
.features
&= ~(QED_F_BACKING_FILE
|
450 QED_F_BACKING_FORMAT_NO_PROBE
);
452 /* Adjust feature flags */
454 new_header
.features
|= QED_F_BACKING_FILE
;
456 if (qed_fmt_is_raw(backing_fmt
)) {
457 new_header
.features
|= QED_F_BACKING_FORMAT_NO_PROBE
;
461 /* Calculate new header size */
462 backing_file_len
= 0;
465 backing_file_len
= strlen(backing_file
);
468 buffer_len
= sizeof(new_header
);
469 new_header
.backing_filename_offset
= buffer_len
;
470 new_header
.backing_filename_size
= backing_file_len
;
471 buffer_len
+= backing_file_len
;
473 /* Make sure we can rewrite header without failing */
474 if (buffer_len
> new_header
.header_size
* new_header
.cluster_size
) {
478 /* Prepare new header */
479 buffer
= qemu_malloc(buffer_len
);
481 qed_header_cpu_to_le(&new_header
, &le_header
);
482 memcpy(buffer
, &le_header
, sizeof(le_header
));
483 buffer_len
= sizeof(le_header
);
485 memcpy(buffer
+ buffer_len
, backing_file
, backing_file_len
);
486 buffer_len
+= backing_file_len
;
488 /* Write new header */
489 ret
= bdrv_pwrite_sync(bs
->file
, 0, buffer
, buffer_len
);
492 memcpy(&s
->header
, &new_header
, sizeof(new_header
));
497 static int bdrv_qed_check(BlockDriverState
*bs
, BdrvCheckResult
*result
)
502 static QEMUOptionParameter qed_create_options
[] = {
504 .name
= BLOCK_OPT_SIZE
,
506 .help
= "Virtual disk size (in bytes)"
508 .name
= BLOCK_OPT_BACKING_FILE
,
510 .help
= "File name of a base image"
512 .name
= BLOCK_OPT_BACKING_FMT
,
514 .help
= "Image format of the base image"
516 .name
= BLOCK_OPT_CLUSTER_SIZE
,
518 .help
= "Cluster size (in bytes)"
520 .name
= BLOCK_OPT_TABLE_SIZE
,
522 .help
= "L1/L2 table size (in clusters)"
524 { /* end of list */ }
527 static BlockDriver bdrv_qed
= {
528 .format_name
= "qed",
529 .instance_size
= sizeof(BDRVQEDState
),
530 .create_options
= qed_create_options
,
532 .bdrv_probe
= bdrv_qed_probe
,
533 .bdrv_open
= bdrv_qed_open
,
534 .bdrv_close
= bdrv_qed_close
,
535 .bdrv_create
= bdrv_qed_create
,
536 .bdrv_flush
= bdrv_qed_flush
,
537 .bdrv_is_allocated
= bdrv_qed_is_allocated
,
538 .bdrv_make_empty
= bdrv_qed_make_empty
,
539 .bdrv_aio_readv
= bdrv_qed_aio_readv
,
540 .bdrv_aio_writev
= bdrv_qed_aio_writev
,
541 .bdrv_aio_flush
= bdrv_qed_aio_flush
,
542 .bdrv_truncate
= bdrv_qed_truncate
,
543 .bdrv_getlength
= bdrv_qed_getlength
,
544 .bdrv_get_info
= bdrv_qed_get_info
,
545 .bdrv_change_backing_file
= bdrv_qed_change_backing_file
,
546 .bdrv_check
= bdrv_qed_check
,
549 static void bdrv_qed_init(void)
551 bdrv_register(&bdrv_qed
);
554 block_init(bdrv_qed_init
);