From: Anthony Liguori Date: Sat, 9 May 2009 22:14:19 +0000 (-0500) Subject: Move block drivers into their own directory X-Git-Tag: v0.11.0-rc0~642 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=019d6b8ff0d495ded6977f24a4e8fd1c7fec09e0;p=qemu.git Move block drivers into their own directory Signed-off-by: Anthony Liguori --- diff --git a/Makefile b/Makefile index 046c74369..22d200671 100644 --- a/Makefile +++ b/Makefile @@ -64,18 +64,18 @@ recurse-all: $(SUBDIR_RULES) # BLOCK_OBJS is code used by both qemu system emulation and qemu-img BLOCK_OBJS=cutils.o cache-utils.o qemu-malloc.o module.o -BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o -BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o -BLOCK_OBJS+=block-qcow2.o block-parallels.o block-nbd.o +BLOCK_OBJS+=block/cow.o block/qcow.o aes.o block/vmdk.o block/cloop.o +BLOCK_OBJS+=block/dmg.o block/bochs.o block/vpc.o block/vvfat.o +BLOCK_OBJS+=block/qcow2.o block/parallels.o block/nbd.o BLOCK_OBJS+=nbd.o block.o aio.o ifdef CONFIG_WIN32 -BLOCK_OBJS += block-raw-win32.o +BLOCK_OBJS += block/raw-win32.o else ifdef CONFIG_AIO BLOCK_OBJS += posix-aio-compat.o endif -BLOCK_OBJS += block-raw-posix.o +BLOCK_OBJS += block/raw-posix.o endif ###################################################################### @@ -234,7 +234,7 @@ clean: # avoid old build problems by removing potentially incorrect old files rm -f config.mak config.h op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h rm -f *.o *.d *.a $(TOOLS) TAGS cscope.* *.pod *~ */*~ - rm -f slirp/*.o slirp/*.d audio/*.o audio/*.d + rm -f slirp/*.o slirp/*.d audio/*.o audio/*.d block/*.o block/*.d $(MAKE) -C tests clean for d in $(TARGET_DIRS); do \ $(MAKE) -C $$d $@ || exit 1 ; \ @@ -408,4 +408,4 @@ tarbin: $(mandir)/man8/qemu-nbd.8 # Include automatically generated dependency files --include $(wildcard *.d audio/*.d slirp/*.d) +-include $(wildcard *.d audio/*.d slirp/*.d block/*.d) diff --git a/block-bochs.c b/block-bochs.c deleted file mode 100644 index bac81c42b..000000000 --- a/block-bochs.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Block driver for the various disk image formats used by Bochs - * Currently only for "growing" type in read-only mode - * - * Copyright (c) 2005 Alex Beregszaszi - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" - -/**************************************************************/ - -#define HEADER_MAGIC "Bochs Virtual HD Image" -#define HEADER_VERSION 0x00020000 -#define HEADER_V1 0x00010000 -#define HEADER_SIZE 512 - -#define REDOLOG_TYPE "Redolog" -#define GROWING_TYPE "Growing" - -// not allocated: 0xffffffff - -// always little-endian -struct bochs_header_v1 { - char magic[32]; // "Bochs Virtual HD Image" - char type[16]; // "Redolog" - char subtype[16]; // "Undoable" / "Volatile" / "Growing" - uint32_t version; - uint32_t header; // size of header - - union { - struct { - uint32_t catalog; // num of entries - uint32_t bitmap; // bitmap size - uint32_t extent; // extent size - uint64_t disk; // disk size - char padding[HEADER_SIZE - 64 - 8 - 20]; - } redolog; - char padding[HEADER_SIZE - 64 - 8]; - } extra; -}; - -// always little-endian -struct bochs_header { - char magic[32]; // "Bochs Virtual HD Image" - char type[16]; // "Redolog" - char subtype[16]; // "Undoable" / "Volatile" / "Growing" - uint32_t version; - uint32_t header; // size of header - - union { - struct { - uint32_t catalog; // num of entries - uint32_t bitmap; // bitmap size - uint32_t extent; // extent size - uint32_t reserved; // for ??? - uint64_t disk; // disk size - char padding[HEADER_SIZE - 64 - 8 - 24]; - } redolog; - char padding[HEADER_SIZE - 64 - 8]; - } extra; -}; - -typedef struct BDRVBochsState { - int fd; - - uint32_t *catalog_bitmap; - int catalog_size; - - int data_offset; - - int bitmap_blocks; - int extent_blocks; - int extent_size; -} BDRVBochsState; - -static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const struct bochs_header *bochs = (const void *)buf; - - if (buf_size < HEADER_SIZE) - return 0; - - if (!strcmp(bochs->magic, HEADER_MAGIC) && - !strcmp(bochs->type, REDOLOG_TYPE) && - !strcmp(bochs->subtype, GROWING_TYPE) && - ((le32_to_cpu(bochs->version) == HEADER_VERSION) || - (le32_to_cpu(bochs->version) == HEADER_V1))) - return 100; - - return 0; -} - -static int bochs_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVBochsState *s = bs->opaque; - int fd, i; - struct bochs_header bochs; - struct bochs_header_v1 header_v1; - - fd = open(filename, O_RDWR | O_BINARY); - if (fd < 0) { - fd = open(filename, O_RDONLY | O_BINARY); - if (fd < 0) - return -1; - } - - bs->read_only = 1; // no write support yet - - s->fd = fd; - - if (read(fd, &bochs, sizeof(bochs)) != sizeof(bochs)) { - goto fail; - } - - if (strcmp(bochs.magic, HEADER_MAGIC) || - strcmp(bochs.type, REDOLOG_TYPE) || - strcmp(bochs.subtype, GROWING_TYPE) || - ((le32_to_cpu(bochs.version) != HEADER_VERSION) && - (le32_to_cpu(bochs.version) != HEADER_V1))) { - goto fail; - } - - if (le32_to_cpu(bochs.version) == HEADER_V1) { - memcpy(&header_v1, &bochs, sizeof(bochs)); - bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512; - } else { - bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512; - } - - lseek(s->fd, le32_to_cpu(bochs.header), SEEK_SET); - - s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog); - s->catalog_bitmap = qemu_malloc(s->catalog_size * 4); - if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) != - s->catalog_size * 4) - goto fail; - for (i = 0; i < s->catalog_size; i++) - le32_to_cpus(&s->catalog_bitmap[i]); - - s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4); - - s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512; - s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512; - - s->extent_size = le32_to_cpu(bochs.extra.redolog.extent); - - return 0; - fail: - close(fd); - return -1; -} - -static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num) -{ - BDRVBochsState *s = bs->opaque; - int64_t offset = sector_num * 512; - int64_t extent_index, extent_offset, bitmap_offset, block_offset; - char bitmap_entry; - - // seek to sector - extent_index = offset / s->extent_size; - extent_offset = (offset % s->extent_size) / 512; - - if (s->catalog_bitmap[extent_index] == 0xffffffff) - { -// fprintf(stderr, "page not allocated [%x - %x:%x]\n", -// sector_num, extent_index, extent_offset); - return -1; // not allocated - } - - bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] * - (s->extent_blocks + s->bitmap_blocks)); - block_offset = bitmap_offset + (512 * (s->bitmap_blocks + extent_offset)); - -// fprintf(stderr, "sect: %x [ext i: %x o: %x] -> %x bitmap: %x block: %x\n", -// sector_num, extent_index, extent_offset, -// le32_to_cpu(s->catalog_bitmap[extent_index]), -// bitmap_offset, block_offset); - - // read in bitmap for current extent - lseek(s->fd, bitmap_offset + (extent_offset / 8), SEEK_SET); - - read(s->fd, &bitmap_entry, 1); - - if (!((bitmap_entry >> (extent_offset % 8)) & 1)) - { -// fprintf(stderr, "sector (%x) in bitmap not allocated\n", -// sector_num); - return -1; // not allocated - } - - lseek(s->fd, block_offset, SEEK_SET); - - return 0; -} - -static int bochs_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVBochsState *s = bs->opaque; - int ret; - - while (nb_sectors > 0) { - if (!seek_to_sector(bs, sector_num)) - { - ret = read(s->fd, buf, 512); - if (ret != 512) - return -1; - } - else - memset(buf, 0, 512); - nb_sectors--; - sector_num++; - buf += 512; - } - return 0; -} - -static void bochs_close(BlockDriverState *bs) -{ - BDRVBochsState *s = bs->opaque; - qemu_free(s->catalog_bitmap); - close(s->fd); -} - -static BlockDriver bdrv_bochs = { - .format_name = "bochs", - .instance_size = sizeof(BDRVBochsState), - .bdrv_probe = bochs_probe, - .bdrv_open = bochs_open, - .bdrv_read = bochs_read, - .bdrv_close = bochs_close, -}; - -static void bdrv_bochs_init(void) -{ - bdrv_register(&bdrv_bochs); -} - -block_init(bdrv_bochs_init); diff --git a/block-cloop.c b/block-cloop.c deleted file mode 100644 index 06c687e69..000000000 --- a/block-cloop.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * QEMU Block driver for CLOOP images - * - * Copyright (c) 2004 Johannes E. Schindelin - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include - -typedef struct BDRVCloopState { - int fd; - uint32_t block_size; - uint32_t n_blocks; - uint64_t* offsets; - uint32_t sectors_per_block; - uint32_t current_block; - uint8_t *compressed_block; - uint8_t *uncompressed_block; - z_stream zstream; -} BDRVCloopState; - -static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const char* magic_version_2_0="#!/bin/sh\n" - "#V2.0 Format\n" - "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n"; - int length=strlen(magic_version_2_0); - if(length>buf_size) - length=buf_size; - if(!memcmp(magic_version_2_0,buf,length)) - return 2; - return 0; -} - -static int cloop_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVCloopState *s = bs->opaque; - uint32_t offsets_size,max_compressed_block_size=1,i; - - s->fd = open(filename, O_RDONLY | O_BINARY); - if (s->fd < 0) - return -errno; - bs->read_only = 1; - - /* read header */ - if(lseek(s->fd,128,SEEK_SET)<0) { -cloop_close: - close(s->fd); - return -1; - } - if(read(s->fd,&s->block_size,4)<4) - goto cloop_close; - s->block_size=be32_to_cpu(s->block_size); - if(read(s->fd,&s->n_blocks,4)<4) - goto cloop_close; - s->n_blocks=be32_to_cpu(s->n_blocks); - - /* read offsets */ - offsets_size=s->n_blocks*sizeof(uint64_t); - s->offsets=(uint64_t*)qemu_malloc(offsets_size); - if(read(s->fd,s->offsets,offsets_size)n_blocks;i++) { - s->offsets[i]=be64_to_cpu(s->offsets[i]); - if(i>0) { - uint32_t size=s->offsets[i]-s->offsets[i-1]; - if(size>max_compressed_block_size) - max_compressed_block_size=size; - } - } - - /* initialize zlib engine */ - s->compressed_block = qemu_malloc(max_compressed_block_size+1); - s->uncompressed_block = qemu_malloc(s->block_size); - if(inflateInit(&s->zstream) != Z_OK) - goto cloop_close; - s->current_block=s->n_blocks; - - s->sectors_per_block = s->block_size/512; - bs->total_sectors = s->n_blocks*s->sectors_per_block; - return 0; -} - -static inline int cloop_read_block(BDRVCloopState *s,int block_num) -{ - if(s->current_block != block_num) { - int ret; - uint32_t bytes = s->offsets[block_num+1]-s->offsets[block_num]; - - lseek(s->fd, s->offsets[block_num], SEEK_SET); - ret = read(s->fd, s->compressed_block, bytes); - if (ret != bytes) - return -1; - - s->zstream.next_in = s->compressed_block; - s->zstream.avail_in = bytes; - s->zstream.next_out = s->uncompressed_block; - s->zstream.avail_out = s->block_size; - ret = inflateReset(&s->zstream); - if(ret != Z_OK) - return -1; - ret = inflate(&s->zstream, Z_FINISH); - if(ret != Z_STREAM_END || s->zstream.total_out != s->block_size) - return -1; - - s->current_block = block_num; - } - return 0; -} - -static int cloop_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVCloopState *s = bs->opaque; - int i; - - for(i=0;isectors_per_block), - block_num=(sector_num+i)/s->sectors_per_block; - if(cloop_read_block(s, block_num) != 0) - return -1; - memcpy(buf+i*512,s->uncompressed_block+sector_offset_in_block*512,512); - } - return 0; -} - -static void cloop_close(BlockDriverState *bs) -{ - BDRVCloopState *s = bs->opaque; - close(s->fd); - if(s->n_blocks>0) - free(s->offsets); - free(s->compressed_block); - free(s->uncompressed_block); - inflateEnd(&s->zstream); -} - -static BlockDriver bdrv_cloop = { - .format_name = "cloop", - .instance_size = sizeof(BDRVCloopState), - .bdrv_probe = cloop_probe, - .bdrv_open = cloop_open, - .bdrv_read = cloop_read, - .bdrv_close = cloop_close, -}; - -static void bdrv_cloop_init(void) -{ - bdrv_register(&bdrv_cloop); -} - -block_init(bdrv_cloop_init); diff --git a/block-cow.c b/block-cow.c deleted file mode 100644 index 94b354938..000000000 --- a/block-cow.c +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Block driver for the COW format - * - * Copyright (c) 2004 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#ifndef _WIN32 -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include - -/**************************************************************/ -/* COW block driver using file system holes */ - -/* user mode linux compatible COW file */ -#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -#define COW_VERSION 2 - -struct cow_header_v2 { - uint32_t magic; - uint32_t version; - char backing_file[1024]; - int32_t mtime; - uint64_t size; - uint32_t sectorsize; -}; - -typedef struct BDRVCowState { - int fd; - uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */ - uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */ - int cow_bitmap_size; - int64_t cow_sectors_offset; -} BDRVCowState; - -static int cow_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const struct cow_header_v2 *cow_header = (const void *)buf; - - if (buf_size >= sizeof(struct cow_header_v2) && - be32_to_cpu(cow_header->magic) == COW_MAGIC && - be32_to_cpu(cow_header->version) == COW_VERSION) - return 100; - else - return 0; -} - -static int cow_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVCowState *s = bs->opaque; - int fd; - struct cow_header_v2 cow_header; - int64_t size; - - fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE); - if (fd < 0) { - fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE); - if (fd < 0) - return -1; - } - s->fd = fd; - /* see if it is a cow image */ - if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) { - goto fail; - } - - if (be32_to_cpu(cow_header.magic) != COW_MAGIC || - be32_to_cpu(cow_header.version) != COW_VERSION) { - goto fail; - } - - /* cow image found */ - size = be64_to_cpu(cow_header.size); - bs->total_sectors = size / 512; - - pstrcpy(bs->backing_file, sizeof(bs->backing_file), - cow_header.backing_file); - - /* mmap the bitmap */ - s->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header); - s->cow_bitmap_addr = (void *)mmap(get_mmap_addr(s->cow_bitmap_size), - s->cow_bitmap_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, s->fd, 0); - if (s->cow_bitmap_addr == MAP_FAILED) - goto fail; - s->cow_bitmap = s->cow_bitmap_addr + sizeof(cow_header); - s->cow_sectors_offset = (s->cow_bitmap_size + 511) & ~511; - return 0; - fail: - close(fd); - return -1; -} - -static inline void cow_set_bit(uint8_t *bitmap, int64_t bitnum) -{ - bitmap[bitnum / 8] |= (1 << (bitnum%8)); -} - -static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum) -{ - return !!(bitmap[bitnum / 8] & (1 << (bitnum%8))); -} - - -/* Return true if first block has been changed (ie. current version is - * in COW file). Set the number of continuous blocks for which that - * is true. */ -static inline int is_changed(uint8_t *bitmap, - int64_t sector_num, int nb_sectors, - int *num_same) -{ - int changed; - - if (!bitmap || nb_sectors == 0) { - *num_same = nb_sectors; - return 0; - } - - changed = is_bit_set(bitmap, sector_num); - for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { - if (is_bit_set(bitmap, sector_num + *num_same) != changed) - break; - } - - return changed; -} - -static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) -{ - BDRVCowState *s = bs->opaque; - return is_changed(s->cow_bitmap, sector_num, nb_sectors, pnum); -} - -static int cow_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVCowState *s = bs->opaque; - int ret, n; - - while (nb_sectors > 0) { - if (is_changed(s->cow_bitmap, sector_num, nb_sectors, &n)) { - lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET); - ret = read(s->fd, buf, n * 512); - if (ret != n * 512) - return -1; - } else { - if (bs->backing_hd) { - /* read from the base image */ - ret = bdrv_read(bs->backing_hd, sector_num, buf, n); - if (ret < 0) - return -1; - } else { - memset(buf, 0, n * 512); - } - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; - } - return 0; -} - -static int cow_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVCowState *s = bs->opaque; - int ret, i; - - lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET); - ret = write(s->fd, buf, nb_sectors * 512); - if (ret != nb_sectors * 512) - return -1; - for (i = 0; i < nb_sectors; i++) - cow_set_bit(s->cow_bitmap, sector_num + i); - return 0; -} - -static void cow_close(BlockDriverState *bs) -{ - BDRVCowState *s = bs->opaque; - munmap((void *)s->cow_bitmap_addr, s->cow_bitmap_size); - close(s->fd); -} - -static int cow_create(const char *filename, int64_t image_sectors, - const char *image_filename, int flags) -{ - int fd, cow_fd; - struct cow_header_v2 cow_header; - struct stat st; - - if (flags) - return -ENOTSUP; - - cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, - 0644); - if (cow_fd < 0) - return -1; - memset(&cow_header, 0, sizeof(cow_header)); - cow_header.magic = cpu_to_be32(COW_MAGIC); - cow_header.version = cpu_to_be32(COW_VERSION); - if (image_filename) { - /* Note: if no file, we put a dummy mtime */ - cow_header.mtime = cpu_to_be32(0); - - fd = open(image_filename, O_RDONLY | O_BINARY); - if (fd < 0) { - close(cow_fd); - goto mtime_fail; - } - if (fstat(fd, &st) != 0) { - close(fd); - goto mtime_fail; - } - close(fd); - cow_header.mtime = cpu_to_be32(st.st_mtime); - mtime_fail: - pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file), - image_filename); - } - cow_header.sectorsize = cpu_to_be32(512); - cow_header.size = cpu_to_be64(image_sectors * 512); - write(cow_fd, &cow_header, sizeof(cow_header)); - /* resize to include at least all the bitmap */ - ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3)); - close(cow_fd); - return 0; -} - -static void cow_flush(BlockDriverState *bs) -{ - BDRVCowState *s = bs->opaque; - fsync(s->fd); -} - -static BlockDriver bdrv_cow = { - .format_name = "cow", - .instance_size = sizeof(BDRVCowState), - .bdrv_probe = cow_probe, - .bdrv_open = cow_open, - .bdrv_read = cow_read, - .bdrv_write = cow_write, - .bdrv_close = cow_close, - .bdrv_create = cow_create, - .bdrv_flush = cow_flush, - .bdrv_is_allocated = cow_is_allocated, -}; - -static void bdrv_cow_init(void) -{ - bdrv_register(&bdrv_cow); -} - -block_init(bdrv_cow_init); -#endif diff --git a/block-dmg.c b/block-dmg.c deleted file mode 100644 index 262560ffd..000000000 --- a/block-dmg.c +++ /dev/null @@ -1,301 +0,0 @@ -/* - * QEMU Block driver for DMG images - * - * Copyright (c) 2004 Johannes E. Schindelin - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block_int.h" -#include "bswap.h" -#include "module.h" -#include - -typedef struct BDRVDMGState { - int fd; - - /* each chunk contains a certain number of sectors, - * offsets[i] is the offset in the .dmg file, - * lengths[i] is the length of the compressed chunk, - * sectors[i] is the sector beginning at offsets[i], - * sectorcounts[i] is the number of sectors in that chunk, - * the sectors array is ordered - * 0<=i4 && !strcmp(filename+len-4,".dmg")) - return 2; - return 0; -} - -static off_t read_off(int fd) -{ - uint64_t buffer; - if(read(fd,&buffer,8)<8) - return 0; - return be64_to_cpu(buffer); -} - -static off_t read_uint32(int fd) -{ - uint32_t buffer; - if(read(fd,&buffer,4)<4) - return 0; - return be32_to_cpu(buffer); -} - -static int dmg_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVDMGState *s = bs->opaque; - off_t info_begin,info_end,last_in_offset,last_out_offset; - uint32_t count; - uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i; - - s->fd = open(filename, O_RDONLY | O_BINARY); - if (s->fd < 0) - return -errno; - bs->read_only = 1; - s->n_chunks = 0; - s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; - - /* read offset of info blocks */ - if(lseek(s->fd,-0x1d8,SEEK_END)<0) { -dmg_close: - close(s->fd); - /* open raw instead */ - bs->drv=bdrv_find_format("raw"); - return bs->drv->bdrv_open(bs, filename, flags); - } - info_begin=read_off(s->fd); - if(info_begin==0) - goto dmg_close; - if(lseek(s->fd,info_begin,SEEK_SET)<0) - goto dmg_close; - if(read_uint32(s->fd)!=0x100) - goto dmg_close; - if((count = read_uint32(s->fd))==0) - goto dmg_close; - info_end = info_begin+count; - if(lseek(s->fd,0xf8,SEEK_CUR)<0) - goto dmg_close; - - /* read offsets */ - last_in_offset = last_out_offset = 0; - while(lseek(s->fd,0,SEEK_CUR)fd); - if(count==0) - goto dmg_close; - type = read_uint32(s->fd); - if(type!=0x6d697368 || count<244) - lseek(s->fd,count-4,SEEK_CUR); - else { - int new_size, chunk_count; - if(lseek(s->fd,200,SEEK_CUR)<0) - goto dmg_close; - chunk_count = (count-204)/40; - new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count); - s->types = qemu_realloc(s->types, new_size/2); - s->offsets = qemu_realloc(s->offsets, new_size); - s->lengths = qemu_realloc(s->lengths, new_size); - s->sectors = qemu_realloc(s->sectors, new_size); - s->sectorcounts = qemu_realloc(s->sectorcounts, new_size); - - for(i=s->n_chunks;in_chunks+chunk_count;i++) { - s->types[i] = read_uint32(s->fd); - if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) { - if(s->types[i]==0xffffffff) { - last_in_offset = s->offsets[i-1]+s->lengths[i-1]; - last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1]; - } - chunk_count--; - i--; - if(lseek(s->fd,36,SEEK_CUR)<0) - goto dmg_close; - continue; - } - read_uint32(s->fd); - s->sectors[i] = last_out_offset+read_off(s->fd); - s->sectorcounts[i] = read_off(s->fd); - s->offsets[i] = last_in_offset+read_off(s->fd); - s->lengths[i] = read_off(s->fd); - if(s->lengths[i]>max_compressed_size) - max_compressed_size = s->lengths[i]; - if(s->sectorcounts[i]>max_sectors_per_chunk) - max_sectors_per_chunk = s->sectorcounts[i]; - } - s->n_chunks+=chunk_count; - } - } - - /* initialize zlib engine */ - s->compressed_chunk = qemu_malloc(max_compressed_size+1); - s->uncompressed_chunk = qemu_malloc(512*max_sectors_per_chunk); - if(inflateInit(&s->zstream) != Z_OK) - goto dmg_close; - - s->current_chunk = s->n_chunks; - - return 0; -} - -static inline int is_sector_in_chunk(BDRVDMGState* s, - uint32_t chunk_num,int sector_num) -{ - if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num || - s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num) - return 0; - else - return -1; -} - -static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num) -{ - /* binary search */ - uint32_t chunk1=0,chunk2=s->n_chunks,chunk3; - while(chunk1!=chunk2) { - chunk3 = (chunk1+chunk2)/2; - if(s->sectors[chunk3]>sector_num) - chunk2 = chunk3; - else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num) - return chunk3; - else - chunk1 = chunk3; - } - return s->n_chunks; /* error */ -} - -static inline int dmg_read_chunk(BDRVDMGState *s,int sector_num) -{ - if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) { - int ret; - uint32_t chunk = search_chunk(s,sector_num); - - if(chunk>=s->n_chunks) - return -1; - - s->current_chunk = s->n_chunks; - switch(s->types[chunk]) { - case 0x80000005: { /* zlib compressed */ - int i; - - ret = lseek(s->fd, s->offsets[chunk], SEEK_SET); - if(ret<0) - return -1; - - /* we need to buffer, because only the chunk as whole can be - * inflated. */ - i=0; - do { - ret = read(s->fd, s->compressed_chunk+i, s->lengths[chunk]-i); - if(ret<0 && errno==EINTR) - ret=0; - i+=ret; - } while(ret>=0 && ret+ilengths[chunk]); - - if (ret != s->lengths[chunk]) - return -1; - - s->zstream.next_in = s->compressed_chunk; - s->zstream.avail_in = s->lengths[chunk]; - s->zstream.next_out = s->uncompressed_chunk; - s->zstream.avail_out = 512*s->sectorcounts[chunk]; - ret = inflateReset(&s->zstream); - if(ret != Z_OK) - return -1; - ret = inflate(&s->zstream, Z_FINISH); - if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk]) - return -1; - break; } - case 1: /* copy */ - ret = read(s->fd, s->uncompressed_chunk, s->lengths[chunk]); - if (ret != s->lengths[chunk]) - return -1; - break; - case 2: /* zero */ - memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]); - break; - } - s->current_chunk = chunk; - } - return 0; -} - -static int dmg_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVDMGState *s = bs->opaque; - int i; - - for(i=0;isectors[s->current_chunk]; - memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512); - } - return 0; -} - -static void dmg_close(BlockDriverState *bs) -{ - BDRVDMGState *s = bs->opaque; - close(s->fd); - if(s->n_chunks>0) { - free(s->types); - free(s->offsets); - free(s->lengths); - free(s->sectors); - free(s->sectorcounts); - } - free(s->compressed_chunk); - free(s->uncompressed_chunk); - inflateEnd(&s->zstream); -} - -static BlockDriver bdrv_dmg = { - .format_name = "dmg", - .instance_size = sizeof(BDRVDMGState), - .bdrv_probe = dmg_probe, - .bdrv_open = dmg_open, - .bdrv_read = dmg_read, - .bdrv_close = dmg_close, -}; - -static void bdrv_dmg_init(void) -{ - bdrv_register(&bdrv_dmg); -} - -block_init(bdrv_dmg_init); diff --git a/block-nbd.c b/block-nbd.c deleted file mode 100644 index 47d477899..000000000 --- a/block-nbd.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * QEMU Block driver for NBD - * - * Copyright (C) 2008 Bull S.A.S. - * Author: Laurent Vivier - * - * Some parts: - * Copyright (C) 2007 Anthony Liguori - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu-common.h" -#include "nbd.h" -#include "module.h" - -#include -#include - -typedef struct BDRVNBDState { - int sock; - off_t size; - size_t blocksize; -} BDRVNBDState; - -static int nbd_open(BlockDriverState *bs, const char* filename, int flags) -{ - BDRVNBDState *s = bs->opaque; - const char *host; - const char *unixpath; - int sock; - off_t size; - size_t blocksize; - int ret; - - if ((flags & BDRV_O_CREAT)) - return -EINVAL; - - if (!strstart(filename, "nbd:", &host)) - return -EINVAL; - - if (strstart(host, "unix:", &unixpath)) { - - if (unixpath[0] != '/') - return -EINVAL; - - sock = unix_socket_outgoing(unixpath); - - } else { - uint16_t port; - char *p, *r; - char hostname[128]; - - pstrcpy(hostname, 128, host); - - p = strchr(hostname, ':'); - if (p == NULL) - return -EINVAL; - - *p = '\0'; - p++; - - port = strtol(p, &r, 0); - if (r == p) - return -EINVAL; - sock = tcp_socket_outgoing(hostname, port); - } - - if (sock == -1) - return -errno; - - ret = nbd_receive_negotiate(sock, &size, &blocksize); - if (ret == -1) - return -errno; - - s->sock = sock; - s->size = size; - s->blocksize = blocksize; - - return 0; -} - -static int nbd_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVNBDState *s = bs->opaque; - struct nbd_request request; - struct nbd_reply reply; - - request.type = NBD_CMD_READ; - request.handle = (uint64_t)(intptr_t)bs; - request.from = sector_num * 512;; - request.len = nb_sectors * 512; - - if (nbd_send_request(s->sock, &request) == -1) - return -errno; - - if (nbd_receive_reply(s->sock, &reply) == -1) - return -errno; - - if (reply.error !=0) - return -reply.error; - - if (reply.handle != request.handle) - return -EIO; - - if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len) - return -EIO; - - return 0; -} - -static int nbd_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVNBDState *s = bs->opaque; - struct nbd_request request; - struct nbd_reply reply; - - request.type = NBD_CMD_WRITE; - request.handle = (uint64_t)(intptr_t)bs; - request.from = sector_num * 512;; - request.len = nb_sectors * 512; - - if (nbd_send_request(s->sock, &request) == -1) - return -errno; - - if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len) - return -EIO; - - if (nbd_receive_reply(s->sock, &reply) == -1) - return -errno; - - if (reply.error !=0) - return -reply.error; - - if (reply.handle != request.handle) - return -EIO; - - return 0; -} - -static void nbd_close(BlockDriverState *bs) -{ - BDRVNBDState *s = bs->opaque; - struct nbd_request request; - - request.type = NBD_CMD_DISC; - request.handle = (uint64_t)(intptr_t)bs; - request.from = 0; - request.len = 0; - nbd_send_request(s->sock, &request); - - close(s->sock); -} - -static int64_t nbd_getlength(BlockDriverState *bs) -{ - BDRVNBDState *s = bs->opaque; - - return s->size; -} - -static BlockDriver bdrv_nbd = { - .format_name = "nbd", - .instance_size = sizeof(BDRVNBDState), - .bdrv_open = nbd_open, - .bdrv_read = nbd_read, - .bdrv_write = nbd_write, - .bdrv_close = nbd_close, - .bdrv_getlength = nbd_getlength, - .protocol_name = "nbd", -}; - -static void bdrv_nbd_init(void) -{ - bdrv_register(&bdrv_nbd); -} - -block_init(bdrv_nbd_init); diff --git a/block-parallels.c b/block-parallels.c deleted file mode 100644 index 0b64a5c62..000000000 --- a/block-parallels.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Block driver for Parallels disk image format - * - * Copyright (c) 2007 Alex Beregszaszi - * - * This code is based on comparing different disk images created by Parallels. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" - -/**************************************************************/ - -#define HEADER_MAGIC "WithoutFreeSpace" -#define HEADER_VERSION 2 -#define HEADER_SIZE 64 - -// always little-endian -struct parallels_header { - char magic[16]; // "WithoutFreeSpace" - uint32_t version; - uint32_t heads; - uint32_t cylinders; - uint32_t tracks; - uint32_t catalog_entries; - uint32_t nb_sectors; - char padding[24]; -} __attribute__((packed)); - -typedef struct BDRVParallelsState { - int fd; - - uint32_t *catalog_bitmap; - int catalog_size; - - int tracks; -} BDRVParallelsState; - -static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const struct parallels_header *ph = (const void *)buf; - - if (buf_size < HEADER_SIZE) - return 0; - - if (!memcmp(ph->magic, HEADER_MAGIC, 16) && - (le32_to_cpu(ph->version) == HEADER_VERSION)) - return 100; - - return 0; -} - -static int parallels_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVParallelsState *s = bs->opaque; - int fd, i; - struct parallels_header ph; - - fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE); - if (fd < 0) { - fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE); - if (fd < 0) - return -1; - } - - bs->read_only = 1; // no write support yet - - s->fd = fd; - - if (read(fd, &ph, sizeof(ph)) != sizeof(ph)) - goto fail; - - if (memcmp(ph.magic, HEADER_MAGIC, 16) || - (le32_to_cpu(ph.version) != HEADER_VERSION)) { - goto fail; - } - - bs->total_sectors = le32_to_cpu(ph.nb_sectors); - - if (lseek(s->fd, 64, SEEK_SET) != 64) - goto fail; - - s->tracks = le32_to_cpu(ph.tracks); - - s->catalog_size = le32_to_cpu(ph.catalog_entries); - s->catalog_bitmap = qemu_malloc(s->catalog_size * 4); - if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) != - s->catalog_size * 4) - goto fail; - for (i = 0; i < s->catalog_size; i++) - le32_to_cpus(&s->catalog_bitmap[i]); - - return 0; -fail: - if (s->catalog_bitmap) - qemu_free(s->catalog_bitmap); - close(fd); - return -1; -} - -static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num) -{ - BDRVParallelsState *s = bs->opaque; - uint32_t index, offset, position; - - index = sector_num / s->tracks; - offset = sector_num % s->tracks; - - // not allocated - if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0)) - return -1; - - position = (s->catalog_bitmap[index] + offset) * 512; - -// fprintf(stderr, "sector: %llx index=%x offset=%x pointer=%x position=%x\n", -// sector_num, index, offset, s->catalog_bitmap[index], position); - - if (lseek(s->fd, position, SEEK_SET) != position) - return -1; - - return 0; -} - -static int parallels_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVParallelsState *s = bs->opaque; - - while (nb_sectors > 0) { - if (!seek_to_sector(bs, sector_num)) { - if (read(s->fd, buf, 512) != 512) - return -1; - } else - memset(buf, 0, 512); - nb_sectors--; - sector_num++; - buf += 512; - } - return 0; -} - -static void parallels_close(BlockDriverState *bs) -{ - BDRVParallelsState *s = bs->opaque; - qemu_free(s->catalog_bitmap); - close(s->fd); -} - -static BlockDriver bdrv_parallels = { - .format_name = "parallels", - .instance_size = sizeof(BDRVParallelsState), - .bdrv_probe = parallels_probe, - .bdrv_open = parallels_open, - .bdrv_read = parallels_read, - .bdrv_close = parallels_close, -}; - -static void bdrv_parallels_init(void) -{ - bdrv_register(&bdrv_parallels); -} - -block_init(bdrv_parallels_init); diff --git a/block-qcow.c b/block-qcow.c deleted file mode 100644 index 1cf7c3be7..000000000 --- a/block-qcow.c +++ /dev/null @@ -1,945 +0,0 @@ -/* - * Block driver for the QCOW format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include -#include "aes.h" - -/**************************************************************/ -/* QEMU COW block driver with compression and encryption support */ - -#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) -#define QCOW_VERSION 1 - -#define QCOW_CRYPT_NONE 0 -#define QCOW_CRYPT_AES 1 - -#define QCOW_OFLAG_COMPRESSED (1LL << 63) - -typedef struct QCowHeader { - uint32_t magic; - uint32_t version; - uint64_t backing_file_offset; - uint32_t backing_file_size; - uint32_t mtime; - uint64_t size; /* in bytes */ - uint8_t cluster_bits; - uint8_t l2_bits; - uint32_t crypt_method; - uint64_t l1_table_offset; -} QCowHeader; - -#define L2_CACHE_SIZE 16 - -typedef struct BDRVQcowState { - BlockDriverState *hd; - int cluster_bits; - int cluster_size; - int cluster_sectors; - int l2_bits; - int l2_size; - int l1_size; - uint64_t cluster_offset_mask; - uint64_t l1_table_offset; - uint64_t *l1_table; - uint64_t *l2_cache; - uint64_t l2_cache_offsets[L2_CACHE_SIZE]; - uint32_t l2_cache_counts[L2_CACHE_SIZE]; - uint8_t *cluster_cache; - uint8_t *cluster_data; - uint64_t cluster_cache_offset; - uint32_t crypt_method; /* current crypt method, 0 if no key yet */ - uint32_t crypt_method_header; - AES_KEY aes_encrypt_key; - AES_KEY aes_decrypt_key; -} BDRVQcowState; - -static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset); - -static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const QCowHeader *cow_header = (const void *)buf; - - if (buf_size >= sizeof(QCowHeader) && - be32_to_cpu(cow_header->magic) == QCOW_MAGIC && - be32_to_cpu(cow_header->version) == QCOW_VERSION) - return 100; - else - return 0; -} - -static int qcow_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVQcowState *s = bs->opaque; - int len, i, shift, ret; - QCowHeader header; - - ret = bdrv_file_open(&s->hd, filename, flags); - if (ret < 0) - return ret; - if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) - goto fail; - be32_to_cpus(&header.magic); - be32_to_cpus(&header.version); - be64_to_cpus(&header.backing_file_offset); - be32_to_cpus(&header.backing_file_size); - be32_to_cpus(&header.mtime); - be64_to_cpus(&header.size); - be32_to_cpus(&header.crypt_method); - be64_to_cpus(&header.l1_table_offset); - - if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) - goto fail; - if (header.size <= 1 || header.cluster_bits < 9) - goto fail; - if (header.crypt_method > QCOW_CRYPT_AES) - goto fail; - s->crypt_method_header = header.crypt_method; - if (s->crypt_method_header) - bs->encrypted = 1; - s->cluster_bits = header.cluster_bits; - s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - 9); - s->l2_bits = header.l2_bits; - s->l2_size = 1 << s->l2_bits; - bs->total_sectors = header.size / 512; - s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; - - /* read the level 1 table */ - shift = s->cluster_bits + s->l2_bits; - s->l1_size = (header.size + (1LL << shift) - 1) >> shift; - - s->l1_table_offset = header.l1_table_offset; - s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); - if (!s->l1_table) - goto fail; - if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != - s->l1_size * sizeof(uint64_t)) - goto fail; - for(i = 0;i < s->l1_size; i++) { - be64_to_cpus(&s->l1_table[i]); - } - /* alloc L2 cache */ - s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); - if (!s->l2_cache) - goto fail; - s->cluster_cache = qemu_malloc(s->cluster_size); - if (!s->cluster_cache) - goto fail; - s->cluster_data = qemu_malloc(s->cluster_size); - if (!s->cluster_data) - goto fail; - s->cluster_cache_offset = -1; - - /* read the backing file name */ - if (header.backing_file_offset != 0) { - len = header.backing_file_size; - if (len > 1023) - len = 1023; - if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len) - goto fail; - bs->backing_file[len] = '\0'; - } - return 0; - - fail: - qemu_free(s->l1_table); - qemu_free(s->l2_cache); - qemu_free(s->cluster_cache); - qemu_free(s->cluster_data); - bdrv_delete(s->hd); - return -1; -} - -static int qcow_set_key(BlockDriverState *bs, const char *key) -{ - BDRVQcowState *s = bs->opaque; - uint8_t keybuf[16]; - int len, i; - - memset(keybuf, 0, 16); - len = strlen(key); - if (len > 16) - len = 16; - /* XXX: we could compress the chars to 7 bits to increase - entropy */ - for(i = 0;i < len;i++) { - keybuf[i] = key[i]; - } - s->crypt_method = s->crypt_method_header; - - if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) - return -1; - if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) - return -1; -#if 0 - /* test */ - { - uint8_t in[16]; - uint8_t out[16]; - uint8_t tmp[16]; - for(i=0;i<16;i++) - in[i] = i; - AES_encrypt(in, tmp, &s->aes_encrypt_key); - AES_decrypt(tmp, out, &s->aes_decrypt_key); - for(i = 0; i < 16; i++) - printf(" %02x", tmp[i]); - printf("\n"); - for(i = 0; i < 16; i++) - printf(" %02x", out[i]); - printf("\n"); - } -#endif - return 0; -} - -/* The crypt function is compatible with the linux cryptoloop - algorithm for < 4 GB images. NOTE: out_buf == in_buf is - supported */ -static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, - uint8_t *out_buf, const uint8_t *in_buf, - int nb_sectors, int enc, - const AES_KEY *key) -{ - union { - uint64_t ll[2]; - uint8_t b[16]; - } ivec; - int i; - - for(i = 0; i < nb_sectors; i++) { - ivec.ll[0] = cpu_to_le64(sector_num); - ivec.ll[1] = 0; - AES_cbc_encrypt(in_buf, out_buf, 512, key, - ivec.b, enc); - sector_num++; - in_buf += 512; - out_buf += 512; - } -} - -/* 'allocate' is: - * - * 0 to not allocate. - * - * 1 to allocate a normal cluster (for sector indexes 'n_start' to - * 'n_end') - * - * 2 to allocate a compressed cluster of size - * 'compressed_size'. 'compressed_size' must be > 0 and < - * cluster_size - * - * return 0 if not allocated. - */ -static uint64_t get_cluster_offset(BlockDriverState *bs, - uint64_t offset, int allocate, - int compressed_size, - int n_start, int n_end) -{ - BDRVQcowState *s = bs->opaque; - int min_index, i, j, l1_index, l2_index; - uint64_t l2_offset, *l2_table, cluster_offset, tmp; - uint32_t min_count; - int new_l2_table; - - l1_index = offset >> (s->l2_bits + s->cluster_bits); - l2_offset = s->l1_table[l1_index]; - new_l2_table = 0; - if (!l2_offset) { - if (!allocate) - return 0; - /* allocate a new l2 entry */ - l2_offset = bdrv_getlength(s->hd); - /* round to cluster size */ - l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); - /* update the L1 entry */ - s->l1_table[l1_index] = l2_offset; - tmp = cpu_to_be64(l2_offset); - if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), - &tmp, sizeof(tmp)) != sizeof(tmp)) - return 0; - new_l2_table = 1; - } - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (l2_offset == s->l2_cache_offsets[i]) { - /* increment the hit count */ - if (++s->l2_cache_counts[i] == 0xffffffff) { - for(j = 0; j < L2_CACHE_SIZE; j++) { - s->l2_cache_counts[j] >>= 1; - } - } - l2_table = s->l2_cache + (i << s->l2_bits); - goto found; - } - } - /* not found: load a new entry in the least used one */ - min_index = 0; - min_count = 0xffffffff; - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (s->l2_cache_counts[i] < min_count) { - min_count = s->l2_cache_counts[i]; - min_index = i; - } - } - l2_table = s->l2_cache + (min_index << s->l2_bits); - if (new_l2_table) { - memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); - if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != - s->l2_size * sizeof(uint64_t)) - return 0; - } else { - if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != - s->l2_size * sizeof(uint64_t)) - return 0; - } - s->l2_cache_offsets[min_index] = l2_offset; - s->l2_cache_counts[min_index] = 1; - found: - l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); - cluster_offset = be64_to_cpu(l2_table[l2_index]); - if (!cluster_offset || - ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { - if (!allocate) - return 0; - /* allocate a new cluster */ - if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && - (n_end - n_start) < s->cluster_sectors) { - /* if the cluster is already compressed, we must - decompress it in the case it is not completely - overwritten */ - if (decompress_cluster(s, cluster_offset) < 0) - return 0; - cluster_offset = bdrv_getlength(s->hd); - cluster_offset = (cluster_offset + s->cluster_size - 1) & - ~(s->cluster_size - 1); - /* write the cluster content */ - if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) != - s->cluster_size) - return -1; - } else { - cluster_offset = bdrv_getlength(s->hd); - if (allocate == 1) { - /* round to cluster size */ - cluster_offset = (cluster_offset + s->cluster_size - 1) & - ~(s->cluster_size - 1); - bdrv_truncate(s->hd, cluster_offset + s->cluster_size); - /* if encrypted, we must initialize the cluster - content which won't be written */ - if (s->crypt_method && - (n_end - n_start) < s->cluster_sectors) { - uint64_t start_sect; - start_sect = (offset & ~(s->cluster_size - 1)) >> 9; - memset(s->cluster_data + 512, 0x00, 512); - for(i = 0; i < s->cluster_sectors; i++) { - if (i < n_start || i >= n_end) { - encrypt_sectors(s, start_sect + i, - s->cluster_data, - s->cluster_data + 512, 1, 1, - &s->aes_encrypt_key); - if (bdrv_pwrite(s->hd, cluster_offset + i * 512, - s->cluster_data, 512) != 512) - return -1; - } - } - } - } else if (allocate == 2) { - cluster_offset |= QCOW_OFLAG_COMPRESSED | - (uint64_t)compressed_size << (63 - s->cluster_bits); - } - } - /* update L2 table */ - tmp = cpu_to_be64(cluster_offset); - l2_table[l2_index] = tmp; - if (bdrv_pwrite(s->hd, - l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp)) - return 0; - } - return cluster_offset; -} - -static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) -{ - BDRVQcowState *s = bs->opaque; - int index_in_cluster, n; - uint64_t cluster_offset; - - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) - n = nb_sectors; - *pnum = n; - return (cluster_offset != 0); -} - -static int decompress_buffer(uint8_t *out_buf, int out_buf_size, - const uint8_t *buf, int buf_size) -{ - z_stream strm1, *strm = &strm1; - int ret, out_len; - - memset(strm, 0, sizeof(*strm)); - - strm->next_in = (uint8_t *)buf; - strm->avail_in = buf_size; - strm->next_out = out_buf; - strm->avail_out = out_buf_size; - - ret = inflateInit2(strm, -12); - if (ret != Z_OK) - return -1; - ret = inflate(strm, Z_FINISH); - out_len = strm->next_out - out_buf; - if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || - out_len != out_buf_size) { - inflateEnd(strm); - return -1; - } - inflateEnd(strm); - return 0; -} - -static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset) -{ - int ret, csize; - uint64_t coffset; - - coffset = cluster_offset & s->cluster_offset_mask; - if (s->cluster_cache_offset != coffset) { - csize = cluster_offset >> (63 - s->cluster_bits); - csize &= (s->cluster_size - 1); - ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize); - if (ret != csize) - return -1; - if (decompress_buffer(s->cluster_cache, s->cluster_size, - s->cluster_data, csize) < 0) { - return -1; - } - s->cluster_cache_offset = coffset; - } - return 0; -} - -#if 0 - -static int qcow_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVQcowState *s = bs->opaque; - int ret, index_in_cluster, n; - uint64_t cluster_offset; - - while (nb_sectors > 0) { - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) - n = nb_sectors; - if (!cluster_offset) { - if (bs->backing_hd) { - /* read from the base image */ - ret = bdrv_read(bs->backing_hd, sector_num, buf, n); - if (ret < 0) - return -1; - } else { - memset(buf, 0, 512 * n); - } - } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { - if (decompress_cluster(s, cluster_offset) < 0) - return -1; - memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); - } else { - ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); - if (ret != n * 512) - return -1; - if (s->crypt_method) { - encrypt_sectors(s, sector_num, buf, buf, n, 0, - &s->aes_decrypt_key); - } - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; - } - return 0; -} -#endif - -static int qcow_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVQcowState *s = bs->opaque; - int ret, index_in_cluster, n; - uint64_t cluster_offset; - - while (nb_sectors > 0) { - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) - n = nb_sectors; - cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, - index_in_cluster, - index_in_cluster + n); - if (!cluster_offset) - return -1; - if (s->crypt_method) { - encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1, - &s->aes_encrypt_key); - ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, - s->cluster_data, n * 512); - } else { - ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); - } - if (ret != n * 512) - return -1; - nb_sectors -= n; - sector_num += n; - buf += n * 512; - } - s->cluster_cache_offset = -1; /* disable compressed cache */ - return 0; -} - -typedef struct QCowAIOCB { - BlockDriverAIOCB common; - int64_t sector_num; - QEMUIOVector *qiov; - uint8_t *buf; - void *orig_buf; - int nb_sectors; - int n; - uint64_t cluster_offset; - uint8_t *cluster_data; - struct iovec hd_iov; - QEMUIOVector hd_qiov; - BlockDriverAIOCB *hd_aiocb; -} QCowAIOCB; - -static void qcow_aio_read_cb(void *opaque, int ret) -{ - QCowAIOCB *acb = opaque; - BlockDriverState *bs = acb->common.bs; - BDRVQcowState *s = bs->opaque; - int index_in_cluster; - - acb->hd_aiocb = NULL; - if (ret < 0) - goto done; - - redo: - /* post process the read buffer */ - if (!acb->cluster_offset) { - /* nothing to do */ - } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { - /* nothing to do */ - } else { - if (s->crypt_method) { - encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, - acb->n, 0, - &s->aes_decrypt_key); - } - } - - acb->nb_sectors -= acb->n; - acb->sector_num += acb->n; - acb->buf += acb->n * 512; - - if (acb->nb_sectors == 0) { - /* request completed */ - ret = 0; - goto done; - } - - /* prepare next AIO request */ - acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, - 0, 0, 0, 0); - index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); - acb->n = s->cluster_sectors - index_in_cluster; - if (acb->n > acb->nb_sectors) - acb->n = acb->nb_sectors; - - if (!acb->cluster_offset) { - if (bs->backing_hd) { - /* read from the base image */ - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->n * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, - &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) - goto done; - } else { - /* Note: in this case, no need to wait */ - memset(acb->buf, 0, 512 * acb->n); - goto redo; - } - } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { - /* add AIO support for compressed blocks ? */ - if (decompress_cluster(s, acb->cluster_offset) < 0) - goto done; - memcpy(acb->buf, - s->cluster_cache + index_in_cluster * 512, 512 * acb->n); - goto redo; - } else { - if ((acb->cluster_offset & 511) != 0) { - ret = -EIO; - goto done; - } - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->n * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_readv(s->hd, - (acb->cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) - goto done; - } - - return; - -done: - if (acb->qiov->niov > 1) { - qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); - qemu_vfree(acb->orig_buf); - } - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); -} - -static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - QCowAIOCB *acb; - - acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->hd_aiocb = NULL; - acb->sector_num = sector_num; - acb->qiov = qiov; - if (qiov->niov > 1) - acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); - else - acb->buf = (uint8_t *)qiov->iov->iov_base; - acb->nb_sectors = nb_sectors; - acb->n = 0; - acb->cluster_offset = 0; - - qcow_aio_read_cb(acb, 0); - return &acb->common; -} - -static void qcow_aio_write_cb(void *opaque, int ret) -{ - QCowAIOCB *acb = opaque; - BlockDriverState *bs = acb->common.bs; - BDRVQcowState *s = bs->opaque; - int index_in_cluster; - uint64_t cluster_offset; - const uint8_t *src_buf; - - acb->hd_aiocb = NULL; - - if (ret < 0) - goto done; - - acb->nb_sectors -= acb->n; - acb->sector_num += acb->n; - acb->buf += acb->n * 512; - - if (acb->nb_sectors == 0) { - /* request completed */ - ret = 0; - goto done; - } - - index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); - acb->n = s->cluster_sectors - index_in_cluster; - if (acb->n > acb->nb_sectors) - acb->n = acb->nb_sectors; - cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0, - index_in_cluster, - index_in_cluster + acb->n); - if (!cluster_offset || (cluster_offset & 511) != 0) { - ret = -EIO; - goto done; - } - if (s->crypt_method) { - if (!acb->cluster_data) { - acb->cluster_data = qemu_mallocz(s->cluster_size); - if (!acb->cluster_data) { - ret = -ENOMEM; - goto done; - } - } - encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, - acb->n, 1, &s->aes_encrypt_key); - src_buf = acb->cluster_data; - } else { - src_buf = acb->buf; - } - - acb->hd_iov.iov_base = (void *)src_buf; - acb->hd_iov.iov_len = acb->n * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_writev(s->hd, - (cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->n, - qcow_aio_write_cb, acb); - if (acb->hd_aiocb == NULL) - goto done; - return; - -done: - if (acb->qiov->niov > 1) - qemu_vfree(acb->orig_buf); - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); -} - -static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - BDRVQcowState *s = bs->opaque; - QCowAIOCB *acb; - - s->cluster_cache_offset = -1; /* disable compressed cache */ - - acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->hd_aiocb = NULL; - acb->sector_num = sector_num; - acb->qiov = qiov; - if (qiov->niov > 1) { - acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); - qemu_iovec_to_buffer(qiov, acb->buf); - } else { - acb->buf = (uint8_t *)qiov->iov->iov_base; - } - acb->nb_sectors = nb_sectors; - acb->n = 0; - - qcow_aio_write_cb(acb, 0); - return &acb->common; -} - -static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) -{ - QCowAIOCB *acb = (QCowAIOCB *)blockacb; - if (acb->hd_aiocb) - bdrv_aio_cancel(acb->hd_aiocb); - qemu_aio_release(acb); -} - -static void qcow_close(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - qemu_free(s->l1_table); - qemu_free(s->l2_cache); - qemu_free(s->cluster_cache); - qemu_free(s->cluster_data); - bdrv_delete(s->hd); -} - -static int qcow_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - int fd, header_size, backing_filename_len, l1_size, i, shift; - QCowHeader header; - uint64_t tmp; - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) - return -1; - memset(&header, 0, sizeof(header)); - header.magic = cpu_to_be32(QCOW_MAGIC); - header.version = cpu_to_be32(QCOW_VERSION); - header.size = cpu_to_be64(total_size * 512); - header_size = sizeof(header); - backing_filename_len = 0; - if (backing_file) { - if (strcmp(backing_file, "fat:")) { - header.backing_file_offset = cpu_to_be64(header_size); - backing_filename_len = strlen(backing_file); - header.backing_file_size = cpu_to_be32(backing_filename_len); - header_size += backing_filename_len; - } else { - /* special backing file for vvfat */ - backing_file = NULL; - } - header.cluster_bits = 9; /* 512 byte cluster to avoid copying - unmodifyed sectors */ - header.l2_bits = 12; /* 32 KB L2 tables */ - } else { - header.cluster_bits = 12; /* 4 KB clusters */ - header.l2_bits = 9; /* 4 KB L2 tables */ - } - header_size = (header_size + 7) & ~7; - shift = header.cluster_bits + header.l2_bits; - l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift; - - header.l1_table_offset = cpu_to_be64(header_size); - if (flags & BLOCK_FLAG_ENCRYPT) { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); - } else { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); - } - - /* write all the data */ - write(fd, &header, sizeof(header)); - if (backing_file) { - write(fd, backing_file, backing_filename_len); - } - lseek(fd, header_size, SEEK_SET); - tmp = 0; - for(i = 0;i < l1_size; i++) { - write(fd, &tmp, sizeof(tmp)); - } - close(fd); - return 0; -} - -static int qcow_make_empty(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - uint32_t l1_length = s->l1_size * sizeof(uint64_t); - int ret; - - memset(s->l1_table, 0, l1_length); - if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0) - return -1; - ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length); - if (ret < 0) - return ret; - - memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); - memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); - memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); - - return 0; -} - -/* XXX: put compressed sectors first, then all the cluster aligned - tables to avoid losing bytes in alignment */ -static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVQcowState *s = bs->opaque; - z_stream strm; - int ret, out_len; - uint8_t *out_buf; - uint64_t cluster_offset; - - if (nb_sectors != s->cluster_sectors) - return -EINVAL; - - out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); - if (!out_buf) - return -1; - - /* best compression, small window, no zlib header */ - memset(&strm, 0, sizeof(strm)); - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, - Z_DEFLATED, -12, - 9, Z_DEFAULT_STRATEGY); - if (ret != 0) { - qemu_free(out_buf); - return -1; - } - - strm.avail_in = s->cluster_size; - strm.next_in = (uint8_t *)buf; - strm.avail_out = s->cluster_size; - strm.next_out = out_buf; - - ret = deflate(&strm, Z_FINISH); - if (ret != Z_STREAM_END && ret != Z_OK) { - qemu_free(out_buf); - deflateEnd(&strm); - return -1; - } - out_len = strm.next_out - out_buf; - - deflateEnd(&strm); - - if (ret != Z_STREAM_END || out_len >= s->cluster_size) { - /* could not compress: write normal cluster */ - qcow_write(bs, sector_num, buf, s->cluster_sectors); - } else { - cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, - out_len, 0, 0); - cluster_offset &= s->cluster_offset_mask; - if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) { - qemu_free(out_buf); - return -1; - } - } - - qemu_free(out_buf); - return 0; -} - -static void qcow_flush(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - bdrv_flush(s->hd); -} - -static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) -{ - BDRVQcowState *s = bs->opaque; - bdi->cluster_size = s->cluster_size; - return 0; -} - -static BlockDriver bdrv_qcow = { - .format_name = "qcow", - .instance_size = sizeof(BDRVQcowState), - .bdrv_probe = qcow_probe, - .bdrv_open = qcow_open, - .bdrv_close = qcow_close, - .bdrv_create = qcow_create, - .bdrv_flush = qcow_flush, - .bdrv_is_allocated = qcow_is_allocated, - .bdrv_set_key = qcow_set_key, - .bdrv_make_empty = qcow_make_empty, - .bdrv_aio_readv = qcow_aio_readv, - .bdrv_aio_writev = qcow_aio_writev, - .bdrv_aio_cancel = qcow_aio_cancel, - .aiocb_size = sizeof(QCowAIOCB), - .bdrv_write_compressed = qcow_write_compressed, - .bdrv_get_info = qcow_get_info, -}; - -static void bdrv_qcow_init(void) -{ - bdrv_register(&bdrv_qcow); -} - -block_init(bdrv_qcow_init); diff --git a/block-qcow2.c b/block-qcow2.c deleted file mode 100644 index a6de9b691..000000000 --- a/block-qcow2.c +++ /dev/null @@ -1,2931 +0,0 @@ -/* - * Block driver for the QCOW version 2 format - * - * Copyright (c) 2004-2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" -#include -#include "aes.h" - -/* - Differences with QCOW: - - - Support for multiple incremental snapshots. - - Memory management by reference counts. - - Clusters which have a reference count of one have the bit - QCOW_OFLAG_COPIED to optimize write performance. - - Size of compressed clusters is stored in sectors to reduce bit usage - in the cluster offsets. - - Support for storing additional data (such as the VM state) in the - snapshots. - - If a backing store is used, the cluster size is not constrained - (could be backported to QCOW). - - L2 tables have always a size of one cluster. -*/ - -//#define DEBUG_ALLOC -//#define DEBUG_ALLOC2 -//#define DEBUG_EXT - -#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) -#define QCOW_VERSION 2 - -#define QCOW_CRYPT_NONE 0 -#define QCOW_CRYPT_AES 1 - -#define QCOW_MAX_CRYPT_CLUSTERS 32 - -/* indicate that the refcount of the referenced cluster is exactly one. */ -#define QCOW_OFLAG_COPIED (1LL << 63) -/* indicate that the cluster is compressed (they never have the copied flag) */ -#define QCOW_OFLAG_COMPRESSED (1LL << 62) - -#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */ - -typedef struct QCowHeader { - uint32_t magic; - uint32_t version; - uint64_t backing_file_offset; - uint32_t backing_file_size; - uint32_t cluster_bits; - uint64_t size; /* in bytes */ - uint32_t crypt_method; - uint32_t l1_size; /* XXX: save number of clusters instead ? */ - uint64_t l1_table_offset; - uint64_t refcount_table_offset; - uint32_t refcount_table_clusters; - uint32_t nb_snapshots; - uint64_t snapshots_offset; -} QCowHeader; - - -typedef struct { - uint32_t magic; - uint32_t len; -} QCowExtension; -#define QCOW_EXT_MAGIC_END 0 -#define QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA - - -typedef struct __attribute__((packed)) QCowSnapshotHeader { - /* header is 8 byte aligned */ - uint64_t l1_table_offset; - - uint32_t l1_size; - uint16_t id_str_size; - uint16_t name_size; - - uint32_t date_sec; - uint32_t date_nsec; - - uint64_t vm_clock_nsec; - - uint32_t vm_state_size; - uint32_t extra_data_size; /* for extension */ - /* extra data follows */ - /* id_str follows */ - /* name follows */ -} QCowSnapshotHeader; - -#define L2_CACHE_SIZE 16 - -typedef struct QCowSnapshot { - uint64_t l1_table_offset; - uint32_t l1_size; - char *id_str; - char *name; - uint32_t vm_state_size; - uint32_t date_sec; - uint32_t date_nsec; - uint64_t vm_clock_nsec; -} QCowSnapshot; - -typedef struct BDRVQcowState { - BlockDriverState *hd; - int cluster_bits; - int cluster_size; - int cluster_sectors; - int l2_bits; - int l2_size; - int l1_size; - int l1_vm_state_index; - int csize_shift; - int csize_mask; - uint64_t cluster_offset_mask; - uint64_t l1_table_offset; - uint64_t *l1_table; - uint64_t *l2_cache; - uint64_t l2_cache_offsets[L2_CACHE_SIZE]; - uint32_t l2_cache_counts[L2_CACHE_SIZE]; - uint8_t *cluster_cache; - uint8_t *cluster_data; - uint64_t cluster_cache_offset; - - uint64_t *refcount_table; - uint64_t refcount_table_offset; - uint32_t refcount_table_size; - uint64_t refcount_block_cache_offset; - uint16_t *refcount_block_cache; - int64_t free_cluster_index; - int64_t free_byte_offset; - - uint32_t crypt_method; /* current crypt method, 0 if no key yet */ - uint32_t crypt_method_header; - AES_KEY aes_encrypt_key; - AES_KEY aes_decrypt_key; - uint64_t snapshots_offset; - int snapshots_size; - int nb_snapshots; - QCowSnapshot *snapshots; -} BDRVQcowState; - -static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset); -static int qcow_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors); -static int qcow_read_snapshots(BlockDriverState *bs); -static void qcow_free_snapshots(BlockDriverState *bs); -static int refcount_init(BlockDriverState *bs); -static void refcount_close(BlockDriverState *bs); -static int get_refcount(BlockDriverState *bs, int64_t cluster_index); -static int update_cluster_refcount(BlockDriverState *bs, - int64_t cluster_index, - int addend); -static void update_refcount(BlockDriverState *bs, - int64_t offset, int64_t length, - int addend); -static int64_t alloc_clusters(BlockDriverState *bs, int64_t size); -static int64_t alloc_bytes(BlockDriverState *bs, int size); -static void free_clusters(BlockDriverState *bs, - int64_t offset, int64_t size); -static int check_refcounts(BlockDriverState *bs); - -static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const QCowHeader *cow_header = (const void *)buf; - - if (buf_size >= sizeof(QCowHeader) && - be32_to_cpu(cow_header->magic) == QCOW_MAGIC && - be32_to_cpu(cow_header->version) == QCOW_VERSION) - return 100; - else - return 0; -} - - -/* - * read qcow2 extension and fill bs - * start reading from start_offset - * finish reading upon magic of value 0 or when end_offset reached - * unknown magic is skipped (future extension this version knows nothing about) - * return 0 upon success, non-0 otherwise - */ -static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset, - uint64_t end_offset) -{ - BDRVQcowState *s = bs->opaque; - QCowExtension ext; - uint64_t offset; - -#ifdef DEBUG_EXT - printf("qcow_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); -#endif - offset = start_offset; - while (offset < end_offset) { - -#ifdef DEBUG_EXT - /* Sanity check */ - if (offset > s->cluster_size) - printf("qcow_handle_extension: suspicious offset %lu\n", offset); - - printf("attemting to read extended header in offset %lu\n", offset); -#endif - - if (bdrv_pread(s->hd, offset, &ext, sizeof(ext)) != sizeof(ext)) { - fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n", - (unsigned long long)offset); - return 1; - } - be32_to_cpus(&ext.magic); - be32_to_cpus(&ext.len); - offset += sizeof(ext); -#ifdef DEBUG_EXT - printf("ext.magic = 0x%x\n", ext.magic); -#endif - switch (ext.magic) { - case QCOW_EXT_MAGIC_END: - return 0; - - case QCOW_EXT_MAGIC_BACKING_FORMAT: - if (ext.len >= sizeof(bs->backing_format)) { - fprintf(stderr, "ERROR: ext_backing_format: len=%u too large" - " (>=%zu)\n", - ext.len, sizeof(bs->backing_format)); - return 2; - } - if (bdrv_pread(s->hd, offset , bs->backing_format, - ext.len) != ext.len) - return 3; - bs->backing_format[ext.len] = '\0'; -#ifdef DEBUG_EXT - printf("Qcow2: Got format extension %s\n", bs->backing_format); -#endif - offset += ((ext.len + 7) & ~7); - break; - - default: - /* unknown magic -- just skip it */ - offset += ((ext.len + 7) & ~7); - break; - } - } - - return 0; -} - - -static int qcow_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVQcowState *s = bs->opaque; - int len, i, shift, ret; - QCowHeader header; - uint64_t ext_end; - - /* Performance is terrible right now with cache=writethrough due mainly - * to reference count updates. If the user does not explicitly specify - * a caching type, force to writeback caching. - */ - if ((flags & BDRV_O_CACHE_DEF)) { - flags |= BDRV_O_CACHE_WB; - flags &= ~BDRV_O_CACHE_DEF; - } - ret = bdrv_file_open(&s->hd, filename, flags); - if (ret < 0) - return ret; - if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) - goto fail; - be32_to_cpus(&header.magic); - be32_to_cpus(&header.version); - be64_to_cpus(&header.backing_file_offset); - be32_to_cpus(&header.backing_file_size); - be64_to_cpus(&header.size); - be32_to_cpus(&header.cluster_bits); - be32_to_cpus(&header.crypt_method); - be64_to_cpus(&header.l1_table_offset); - be32_to_cpus(&header.l1_size); - be64_to_cpus(&header.refcount_table_offset); - be32_to_cpus(&header.refcount_table_clusters); - be64_to_cpus(&header.snapshots_offset); - be32_to_cpus(&header.nb_snapshots); - - if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) - goto fail; - if (header.size <= 1 || - header.cluster_bits < 9 || - header.cluster_bits > 16) - goto fail; - if (header.crypt_method > QCOW_CRYPT_AES) - goto fail; - s->crypt_method_header = header.crypt_method; - if (s->crypt_method_header) - bs->encrypted = 1; - s->cluster_bits = header.cluster_bits; - s->cluster_size = 1 << s->cluster_bits; - s->cluster_sectors = 1 << (s->cluster_bits - 9); - s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ - s->l2_size = 1 << s->l2_bits; - bs->total_sectors = header.size / 512; - s->csize_shift = (62 - (s->cluster_bits - 8)); - s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; - s->cluster_offset_mask = (1LL << s->csize_shift) - 1; - s->refcount_table_offset = header.refcount_table_offset; - s->refcount_table_size = - header.refcount_table_clusters << (s->cluster_bits - 3); - - s->snapshots_offset = header.snapshots_offset; - s->nb_snapshots = header.nb_snapshots; - - /* read the level 1 table */ - s->l1_size = header.l1_size; - shift = s->cluster_bits + s->l2_bits; - s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift; - /* the L1 table must contain at least enough entries to put - header.size bytes */ - if (s->l1_size < s->l1_vm_state_index) - goto fail; - s->l1_table_offset = header.l1_table_offset; - s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); - if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != - s->l1_size * sizeof(uint64_t)) - goto fail; - for(i = 0;i < s->l1_size; i++) { - be64_to_cpus(&s->l1_table[i]); - } - /* alloc L2 cache */ - s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); - s->cluster_cache = qemu_malloc(s->cluster_size); - /* one more sector for decompressed data alignment */ - s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size - + 512); - s->cluster_cache_offset = -1; - - if (refcount_init(bs) < 0) - goto fail; - - /* read qcow2 extensions */ - if (header.backing_file_offset) - ext_end = header.backing_file_offset; - else - ext_end = s->cluster_size; - if (qcow_read_extensions(bs, sizeof(header), ext_end)) - goto fail; - - /* read the backing file name */ - if (header.backing_file_offset != 0) { - len = header.backing_file_size; - if (len > 1023) - len = 1023; - if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len) - goto fail; - bs->backing_file[len] = '\0'; - } - if (qcow_read_snapshots(bs) < 0) - goto fail; - -#ifdef DEBUG_ALLOC - check_refcounts(bs); -#endif - return 0; - - fail: - qcow_free_snapshots(bs); - refcount_close(bs); - qemu_free(s->l1_table); - qemu_free(s->l2_cache); - qemu_free(s->cluster_cache); - qemu_free(s->cluster_data); - bdrv_delete(s->hd); - return -1; -} - -static int qcow_set_key(BlockDriverState *bs, const char *key) -{ - BDRVQcowState *s = bs->opaque; - uint8_t keybuf[16]; - int len, i; - - memset(keybuf, 0, 16); - len = strlen(key); - if (len > 16) - len = 16; - /* XXX: we could compress the chars to 7 bits to increase - entropy */ - for(i = 0;i < len;i++) { - keybuf[i] = key[i]; - } - s->crypt_method = s->crypt_method_header; - - if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) - return -1; - if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) - return -1; -#if 0 - /* test */ - { - uint8_t in[16]; - uint8_t out[16]; - uint8_t tmp[16]; - for(i=0;i<16;i++) - in[i] = i; - AES_encrypt(in, tmp, &s->aes_encrypt_key); - AES_decrypt(tmp, out, &s->aes_decrypt_key); - for(i = 0; i < 16; i++) - printf(" %02x", tmp[i]); - printf("\n"); - for(i = 0; i < 16; i++) - printf(" %02x", out[i]); - printf("\n"); - } -#endif - return 0; -} - -/* The crypt function is compatible with the linux cryptoloop - algorithm for < 4 GB images. NOTE: out_buf == in_buf is - supported */ -static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, - uint8_t *out_buf, const uint8_t *in_buf, - int nb_sectors, int enc, - const AES_KEY *key) -{ - union { - uint64_t ll[2]; - uint8_t b[16]; - } ivec; - int i; - - for(i = 0; i < nb_sectors; i++) { - ivec.ll[0] = cpu_to_le64(sector_num); - ivec.ll[1] = 0; - AES_cbc_encrypt(in_buf, out_buf, 512, key, - ivec.b, enc); - sector_num++; - in_buf += 512; - out_buf += 512; - } -} - -static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, - uint64_t cluster_offset, int n_start, int n_end) -{ - BDRVQcowState *s = bs->opaque; - int n, ret; - - n = n_end - n_start; - if (n <= 0) - return 0; - ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n); - if (ret < 0) - return ret; - if (s->crypt_method) { - encrypt_sectors(s, start_sect + n_start, - s->cluster_data, - s->cluster_data, n, 1, - &s->aes_encrypt_key); - } - ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start, - s->cluster_data, n); - if (ret < 0) - return ret; - return 0; -} - -static void l2_cache_reset(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - - memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); - memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); - memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); -} - -static inline int l2_cache_new_entry(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - uint32_t min_count; - int min_index, i; - - /* find a new entry in the least used one */ - min_index = 0; - min_count = 0xffffffff; - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (s->l2_cache_counts[i] < min_count) { - min_count = s->l2_cache_counts[i]; - min_index = i; - } - } - return min_index; -} - -static int64_t align_offset(int64_t offset, int n) -{ - offset = (offset + n - 1) & ~(n - 1); - return offset; -} - -static int grow_l1_table(BlockDriverState *bs, int min_size) -{ - BDRVQcowState *s = bs->opaque; - int new_l1_size, new_l1_size2, ret, i; - uint64_t *new_l1_table; - uint64_t new_l1_table_offset; - uint8_t data[12]; - - new_l1_size = s->l1_size; - if (min_size <= new_l1_size) - return 0; - while (min_size > new_l1_size) { - new_l1_size = (new_l1_size * 3 + 1) / 2; - } -#ifdef DEBUG_ALLOC2 - printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size); -#endif - - new_l1_size2 = sizeof(uint64_t) * new_l1_size; - new_l1_table = qemu_mallocz(new_l1_size2); - memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); - - /* write new table (align to cluster) */ - new_l1_table_offset = alloc_clusters(bs, new_l1_size2); - - for(i = 0; i < s->l1_size; i++) - new_l1_table[i] = cpu_to_be64(new_l1_table[i]); - ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2); - if (ret != new_l1_size2) - goto fail; - for(i = 0; i < s->l1_size; i++) - new_l1_table[i] = be64_to_cpu(new_l1_table[i]); - - /* set new table */ - cpu_to_be32w((uint32_t*)data, new_l1_size); - cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset); - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data, - sizeof(data)) != sizeof(data)) - goto fail; - qemu_free(s->l1_table); - free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t)); - s->l1_table_offset = new_l1_table_offset; - s->l1_table = new_l1_table; - s->l1_size = new_l1_size; - return 0; - fail: - qemu_free(s->l1_table); - return -EIO; -} - -/* - * seek_l2_table - * - * seek l2_offset in the l2_cache table - * if not found, return NULL, - * if found, - * increments the l2 cache hit count of the entry, - * if counter overflow, divide by two all counters - * return the pointer to the l2 cache entry - * - */ - -static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset) -{ - int i, j; - - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (l2_offset == s->l2_cache_offsets[i]) { - /* increment the hit count */ - if (++s->l2_cache_counts[i] == 0xffffffff) { - for(j = 0; j < L2_CACHE_SIZE; j++) { - s->l2_cache_counts[j] >>= 1; - } - } - return s->l2_cache + (i << s->l2_bits); - } - } - return NULL; -} - -/* - * l2_load - * - * Loads a L2 table into memory. If the table is in the cache, the cache - * is used; otherwise the L2 table is loaded from the image file. - * - * Returns a pointer to the L2 table on success, or NULL if the read from - * the image file failed. - */ - -static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset) -{ - BDRVQcowState *s = bs->opaque; - int min_index; - uint64_t *l2_table; - - /* seek if the table for the given offset is in the cache */ - - l2_table = seek_l2_table(s, l2_offset); - if (l2_table != NULL) - return l2_table; - - /* not found: load a new entry in the least used one */ - - min_index = l2_cache_new_entry(bs); - l2_table = s->l2_cache + (min_index << s->l2_bits); - if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != - s->l2_size * sizeof(uint64_t)) - return NULL; - s->l2_cache_offsets[min_index] = l2_offset; - s->l2_cache_counts[min_index] = 1; - - return l2_table; -} - -/* - * l2_allocate - * - * Allocate a new l2 entry in the file. If l1_index points to an already - * used entry in the L2 table (i.e. we are doing a copy on write for the L2 - * table) copy the contents of the old L2 table into the newly allocated one. - * Otherwise the new table is initialized with zeros. - * - */ - -static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index) -{ - BDRVQcowState *s = bs->opaque; - int min_index; - uint64_t old_l2_offset, tmp; - uint64_t *l2_table, l2_offset; - - old_l2_offset = s->l1_table[l1_index]; - - /* allocate a new l2 entry */ - - l2_offset = alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); - - /* update the L1 entry */ - - s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; - - tmp = cpu_to_be64(l2_offset | QCOW_OFLAG_COPIED); - if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), - &tmp, sizeof(tmp)) != sizeof(tmp)) - return NULL; - - /* allocate a new entry in the l2 cache */ - - min_index = l2_cache_new_entry(bs); - l2_table = s->l2_cache + (min_index << s->l2_bits); - - if (old_l2_offset == 0) { - /* if there was no old l2 table, clear the new table */ - memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); - } else { - /* if there was an old l2 table, read it from the disk */ - if (bdrv_pread(s->hd, old_l2_offset, - l2_table, s->l2_size * sizeof(uint64_t)) != - s->l2_size * sizeof(uint64_t)) - return NULL; - } - /* write the l2 table to the file */ - if (bdrv_pwrite(s->hd, l2_offset, - l2_table, s->l2_size * sizeof(uint64_t)) != - s->l2_size * sizeof(uint64_t)) - return NULL; - - /* update the l2 cache entry */ - - s->l2_cache_offsets[min_index] = l2_offset; - s->l2_cache_counts[min_index] = 1; - - return l2_table; -} - -static int size_to_clusters(BDRVQcowState *s, int64_t size) -{ - return (size + (s->cluster_size - 1)) >> s->cluster_bits; -} - -static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, - uint64_t *l2_table, uint64_t start, uint64_t mask) -{ - int i; - uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask; - - if (!offset) - return 0; - - for (i = start; i < start + nb_clusters; i++) - if (offset + i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask)) - break; - - return (i - start); -} - -static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table) -{ - int i = 0; - - while(nb_clusters-- && l2_table[i] == 0) - i++; - - return i; -} - -/* - * get_cluster_offset - * - * For a given offset of the disk image, return cluster offset in - * qcow2 file. - * - * on entry, *num is the number of contiguous clusters we'd like to - * access following offset. - * - * on exit, *num is the number of contiguous clusters we can read. - * - * Return 1, if the offset is found - * Return 0, otherwise. - * - */ - -static uint64_t get_cluster_offset(BlockDriverState *bs, - uint64_t offset, int *num) -{ - BDRVQcowState *s = bs->opaque; - int l1_index, l2_index; - uint64_t l2_offset, *l2_table, cluster_offset; - int l1_bits, c; - int index_in_cluster, nb_available, nb_needed, nb_clusters; - - index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); - nb_needed = *num + index_in_cluster; - - l1_bits = s->l2_bits + s->cluster_bits; - - /* compute how many bytes there are between the offset and - * the end of the l1 entry - */ - - nb_available = (1 << l1_bits) - (offset & ((1 << l1_bits) - 1)); - - /* compute the number of available sectors */ - - nb_available = (nb_available >> 9) + index_in_cluster; - - if (nb_needed > nb_available) { - nb_needed = nb_available; - } - - cluster_offset = 0; - - /* seek the the l2 offset in the l1 table */ - - l1_index = offset >> l1_bits; - if (l1_index >= s->l1_size) - goto out; - - l2_offset = s->l1_table[l1_index]; - - /* seek the l2 table of the given l2 offset */ - - if (!l2_offset) - goto out; - - /* load the l2 table in memory */ - - l2_offset &= ~QCOW_OFLAG_COPIED; - l2_table = l2_load(bs, l2_offset); - if (l2_table == NULL) - return 0; - - /* find the cluster offset for the given disk offset */ - - l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); - cluster_offset = be64_to_cpu(l2_table[l2_index]); - nb_clusters = size_to_clusters(s, nb_needed << 9); - - if (!cluster_offset) { - /* how many empty clusters ? */ - c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); - } else { - /* how many allocated clusters ? */ - c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, QCOW_OFLAG_COPIED); - } - - nb_available = (c * s->cluster_sectors); -out: - if (nb_available > nb_needed) - nb_available = nb_needed; - - *num = nb_available - index_in_cluster; - - return cluster_offset & ~QCOW_OFLAG_COPIED; -} - -/* - * free_any_clusters - * - * free clusters according to its type: compressed or not - * - */ - -static void free_any_clusters(BlockDriverState *bs, - uint64_t cluster_offset, int nb_clusters) -{ - BDRVQcowState *s = bs->opaque; - - /* free the cluster */ - - if (cluster_offset & QCOW_OFLAG_COMPRESSED) { - int nb_csectors; - nb_csectors = ((cluster_offset >> s->csize_shift) & - s->csize_mask) + 1; - free_clusters(bs, (cluster_offset & s->cluster_offset_mask) & ~511, - nb_csectors * 512); - return; - } - - free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits); - - return; -} - -/* - * get_cluster_table - * - * for a given disk offset, load (and allocate if needed) - * the l2 table. - * - * the l2 table offset in the qcow2 file and the cluster index - * in the l2 table are given to the caller. - * - */ - -static int get_cluster_table(BlockDriverState *bs, uint64_t offset, - uint64_t **new_l2_table, - uint64_t *new_l2_offset, - int *new_l2_index) -{ - BDRVQcowState *s = bs->opaque; - int l1_index, l2_index, ret; - uint64_t l2_offset, *l2_table; - - /* seek the the l2 offset in the l1 table */ - - l1_index = offset >> (s->l2_bits + s->cluster_bits); - if (l1_index >= s->l1_size) { - ret = grow_l1_table(bs, l1_index + 1); - if (ret < 0) - return 0; - } - l2_offset = s->l1_table[l1_index]; - - /* seek the l2 table of the given l2 offset */ - - if (l2_offset & QCOW_OFLAG_COPIED) { - /* load the l2 table in memory */ - l2_offset &= ~QCOW_OFLAG_COPIED; - l2_table = l2_load(bs, l2_offset); - if (l2_table == NULL) - return 0; - } else { - if (l2_offset) - free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); - l2_table = l2_allocate(bs, l1_index); - if (l2_table == NULL) - return 0; - l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED; - } - - /* find the cluster offset for the given disk offset */ - - l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); - - *new_l2_table = l2_table; - *new_l2_offset = l2_offset; - *new_l2_index = l2_index; - - return 1; -} - -/* - * alloc_compressed_cluster_offset - * - * For a given offset of the disk image, return cluster offset in - * qcow2 file. - * - * If the offset is not found, allocate a new compressed cluster. - * - * Return the cluster offset if successful, - * Return 0, otherwise. - * - */ - -static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs, - uint64_t offset, - int compressed_size) -{ - BDRVQcowState *s = bs->opaque; - int l2_index, ret; - uint64_t l2_offset, *l2_table, cluster_offset; - int nb_csectors; - - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); - if (ret == 0) - return 0; - - cluster_offset = be64_to_cpu(l2_table[l2_index]); - if (cluster_offset & QCOW_OFLAG_COPIED) - return cluster_offset & ~QCOW_OFLAG_COPIED; - - if (cluster_offset) - free_any_clusters(bs, cluster_offset, 1); - - cluster_offset = alloc_bytes(bs, compressed_size); - nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - - (cluster_offset >> 9); - - cluster_offset |= QCOW_OFLAG_COMPRESSED | - ((uint64_t)nb_csectors << s->csize_shift); - - /* update L2 table */ - - /* compressed clusters never have the copied flag */ - - l2_table[l2_index] = cpu_to_be64(cluster_offset); - if (bdrv_pwrite(s->hd, - l2_offset + l2_index * sizeof(uint64_t), - l2_table + l2_index, - sizeof(uint64_t)) != sizeof(uint64_t)) - return 0; - - return cluster_offset; -} - -typedef struct QCowL2Meta -{ - uint64_t offset; - int n_start; - int nb_available; - int nb_clusters; -} QCowL2Meta; - -static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset, - QCowL2Meta *m) -{ - BDRVQcowState *s = bs->opaque; - int i, j = 0, l2_index, ret; - uint64_t *old_cluster, start_sect, l2_offset, *l2_table; - - if (m->nb_clusters == 0) - return 0; - - old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t)); - - /* copy content of unmodified sectors */ - start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; - if (m->n_start) { - ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); - if (ret < 0) - goto err; - } - - if (m->nb_available & (s->cluster_sectors - 1)) { - uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1); - ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9), - m->nb_available - end, s->cluster_sectors); - if (ret < 0) - goto err; - } - - ret = -EIO; - /* update L2 table */ - if (!get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index)) - goto err; - - for (i = 0; i < m->nb_clusters; i++) { - /* if two concurrent writes happen to the same unallocated cluster - * each write allocates separate cluster and writes data concurrently. - * The first one to complete updates l2 table with pointer to its - * cluster the second one has to do RMW (which is done above by - * copy_sectors()), update l2 table with its cluster pointer and free - * old cluster. This is what this loop does */ - if(l2_table[l2_index + i] != 0) - old_cluster[j++] = l2_table[l2_index + i]; - - l2_table[l2_index + i] = cpu_to_be64((cluster_offset + - (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); - } - - if (bdrv_pwrite(s->hd, l2_offset + l2_index * sizeof(uint64_t), - l2_table + l2_index, m->nb_clusters * sizeof(uint64_t)) != - m->nb_clusters * sizeof(uint64_t)) - goto err; - - for (i = 0; i < j; i++) - free_any_clusters(bs, be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, - 1); - - ret = 0; -err: - qemu_free(old_cluster); - return ret; - } - -/* - * alloc_cluster_offset - * - * For a given offset of the disk image, return cluster offset in - * qcow2 file. - * - * If the offset is not found, allocate a new cluster. - * - * Return the cluster offset if successful, - * Return 0, otherwise. - * - */ - -static uint64_t alloc_cluster_offset(BlockDriverState *bs, - uint64_t offset, - int n_start, int n_end, - int *num, QCowL2Meta *m) -{ - BDRVQcowState *s = bs->opaque; - int l2_index, ret; - uint64_t l2_offset, *l2_table, cluster_offset; - int nb_clusters, i = 0; - - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); - if (ret == 0) - return 0; - - nb_clusters = size_to_clusters(s, n_end << 9); - - nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); - - cluster_offset = be64_to_cpu(l2_table[l2_index]); - - /* We keep all QCOW_OFLAG_COPIED clusters */ - - if (cluster_offset & QCOW_OFLAG_COPIED) { - nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, 0); - - cluster_offset &= ~QCOW_OFLAG_COPIED; - m->nb_clusters = 0; - - goto out; - } - - /* for the moment, multiple compressed clusters are not managed */ - - if (cluster_offset & QCOW_OFLAG_COMPRESSED) - nb_clusters = 1; - - /* how many available clusters ? */ - - while (i < nb_clusters) { - i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, - &l2_table[l2_index], i, 0); - - if(be64_to_cpu(l2_table[l2_index + i])) - break; - - i += count_contiguous_free_clusters(nb_clusters - i, - &l2_table[l2_index + i]); - - cluster_offset = be64_to_cpu(l2_table[l2_index + i]); - - if ((cluster_offset & QCOW_OFLAG_COPIED) || - (cluster_offset & QCOW_OFLAG_COMPRESSED)) - break; - } - nb_clusters = i; - - /* allocate a new cluster */ - - cluster_offset = alloc_clusters(bs, nb_clusters * s->cluster_size); - - /* save info needed for meta data update */ - m->offset = offset; - m->n_start = n_start; - m->nb_clusters = nb_clusters; - -out: - m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); - - *num = m->nb_available - n_start; - - return cluster_offset; -} - -static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) -{ - uint64_t cluster_offset; - - *pnum = nb_sectors; - cluster_offset = get_cluster_offset(bs, sector_num << 9, pnum); - - return (cluster_offset != 0); -} - -static int decompress_buffer(uint8_t *out_buf, int out_buf_size, - const uint8_t *buf, int buf_size) -{ - z_stream strm1, *strm = &strm1; - int ret, out_len; - - memset(strm, 0, sizeof(*strm)); - - strm->next_in = (uint8_t *)buf; - strm->avail_in = buf_size; - strm->next_out = out_buf; - strm->avail_out = out_buf_size; - - ret = inflateInit2(strm, -12); - if (ret != Z_OK) - return -1; - ret = inflate(strm, Z_FINISH); - out_len = strm->next_out - out_buf; - if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || - out_len != out_buf_size) { - inflateEnd(strm); - return -1; - } - inflateEnd(strm); - return 0; -} - -static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset) -{ - int ret, csize, nb_csectors, sector_offset; - uint64_t coffset; - - coffset = cluster_offset & s->cluster_offset_mask; - if (s->cluster_cache_offset != coffset) { - nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; - sector_offset = coffset & 511; - csize = nb_csectors * 512 - sector_offset; - ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors); - if (ret < 0) { - return -1; - } - if (decompress_buffer(s->cluster_cache, s->cluster_size, - s->cluster_data + sector_offset, csize) < 0) { - return -1; - } - s->cluster_cache_offset = coffset; - } - return 0; -} - -/* handle reading after the end of the backing file */ -static int backing_read1(BlockDriverState *bs, - int64_t sector_num, uint8_t *buf, int nb_sectors) -{ - int n1; - if ((sector_num + nb_sectors) <= bs->total_sectors) - return nb_sectors; - if (sector_num >= bs->total_sectors) - n1 = 0; - else - n1 = bs->total_sectors - sector_num; - memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1)); - return n1; -} - -static int qcow_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVQcowState *s = bs->opaque; - int ret, index_in_cluster, n, n1; - uint64_t cluster_offset; - - while (nb_sectors > 0) { - n = nb_sectors; - cluster_offset = get_cluster_offset(bs, sector_num << 9, &n); - index_in_cluster = sector_num & (s->cluster_sectors - 1); - if (!cluster_offset) { - if (bs->backing_hd) { - /* read from the base image */ - n1 = backing_read1(bs->backing_hd, sector_num, buf, n); - if (n1 > 0) { - ret = bdrv_read(bs->backing_hd, sector_num, buf, n1); - if (ret < 0) - return -1; - } - } else { - memset(buf, 0, 512 * n); - } - } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { - if (decompress_cluster(s, cluster_offset) < 0) - return -1; - memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); - } else { - ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); - if (ret != n * 512) - return -1; - if (s->crypt_method) { - encrypt_sectors(s, sector_num, buf, buf, n, 0, - &s->aes_decrypt_key); - } - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; - } - return 0; -} - -static int qcow_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVQcowState *s = bs->opaque; - int ret, index_in_cluster, n; - uint64_t cluster_offset; - int n_end; - QCowL2Meta l2meta; - - while (nb_sectors > 0) { - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n_end = index_in_cluster + nb_sectors; - if (s->crypt_method && - n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) - n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; - cluster_offset = alloc_cluster_offset(bs, sector_num << 9, - index_in_cluster, - n_end, &n, &l2meta); - if (!cluster_offset) - return -1; - if (s->crypt_method) { - encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1, - &s->aes_encrypt_key); - ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, - s->cluster_data, n * 512); - } else { - ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); - } - if (ret != n * 512 || alloc_cluster_link_l2(bs, cluster_offset, &l2meta) < 0) { - free_any_clusters(bs, cluster_offset, l2meta.nb_clusters); - return -1; - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; - } - s->cluster_cache_offset = -1; /* disable compressed cache */ - return 0; -} - -typedef struct QCowAIOCB { - BlockDriverAIOCB common; - int64_t sector_num; - QEMUIOVector *qiov; - uint8_t *buf; - void *orig_buf; - int nb_sectors; - int n; - uint64_t cluster_offset; - uint8_t *cluster_data; - BlockDriverAIOCB *hd_aiocb; - struct iovec hd_iov; - QEMUIOVector hd_qiov; - QEMUBH *bh; - QCowL2Meta l2meta; -} QCowAIOCB; - -static void qcow_aio_read_cb(void *opaque, int ret); -static void qcow_aio_read_bh(void *opaque) -{ - QCowAIOCB *acb = opaque; - qemu_bh_delete(acb->bh); - acb->bh = NULL; - qcow_aio_read_cb(opaque, 0); -} - -static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) -{ - if (acb->bh) - return -EIO; - - acb->bh = qemu_bh_new(cb, acb); - if (!acb->bh) - return -EIO; - - qemu_bh_schedule(acb->bh); - - return 0; -} - -static void qcow_aio_read_cb(void *opaque, int ret) -{ - QCowAIOCB *acb = opaque; - BlockDriverState *bs = acb->common.bs; - BDRVQcowState *s = bs->opaque; - int index_in_cluster, n1; - - acb->hd_aiocb = NULL; - if (ret < 0) - goto done; - - /* post process the read buffer */ - if (!acb->cluster_offset) { - /* nothing to do */ - } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { - /* nothing to do */ - } else { - if (s->crypt_method) { - encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, - acb->n, 0, - &s->aes_decrypt_key); - } - } - - acb->nb_sectors -= acb->n; - acb->sector_num += acb->n; - acb->buf += acb->n * 512; - - if (acb->nb_sectors == 0) { - /* request completed */ - ret = 0; - goto done; - } - - /* prepare next AIO request */ - acb->n = acb->nb_sectors; - acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, &acb->n); - index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); - - if (!acb->cluster_offset) { - if (bs->backing_hd) { - /* read from the base image */ - n1 = backing_read1(bs->backing_hd, acb->sector_num, - acb->buf, acb->n); - if (n1 > 0) { - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->n * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, - &acb->hd_qiov, acb->n, - qcow_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) - goto done; - } else { - ret = qcow_schedule_bh(qcow_aio_read_bh, acb); - if (ret < 0) - goto done; - } - } else { - /* Note: in this case, no need to wait */ - memset(acb->buf, 0, 512 * acb->n); - ret = qcow_schedule_bh(qcow_aio_read_bh, acb); - if (ret < 0) - goto done; - } - } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { - /* add AIO support for compressed blocks ? */ - if (decompress_cluster(s, acb->cluster_offset) < 0) - goto done; - memcpy(acb->buf, - s->cluster_cache + index_in_cluster * 512, 512 * acb->n); - ret = qcow_schedule_bh(qcow_aio_read_bh, acb); - if (ret < 0) - goto done; - } else { - if ((acb->cluster_offset & 511) != 0) { - ret = -EIO; - goto done; - } - - acb->hd_iov.iov_base = (void *)acb->buf; - acb->hd_iov.iov_len = acb->n * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_readv(s->hd, - (acb->cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); - if (acb->hd_aiocb == NULL) - goto done; - } - - return; -done: - if (acb->qiov->niov > 1) { - qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); - qemu_vfree(acb->orig_buf); - } - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); -} - -static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int is_write) -{ - QCowAIOCB *acb; - - acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->hd_aiocb = NULL; - acb->sector_num = sector_num; - acb->qiov = qiov; - if (qiov->niov > 1) { - acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); - if (is_write) - qemu_iovec_to_buffer(qiov, acb->buf); - } else { - acb->buf = (uint8_t *)qiov->iov->iov_base; - } - acb->nb_sectors = nb_sectors; - acb->n = 0; - acb->cluster_offset = 0; - acb->l2meta.nb_clusters = 0; - return acb; -} - -static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - QCowAIOCB *acb; - - acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); - if (!acb) - return NULL; - - qcow_aio_read_cb(acb, 0); - return &acb->common; -} - -static void qcow_aio_write_cb(void *opaque, int ret) -{ - QCowAIOCB *acb = opaque; - BlockDriverState *bs = acb->common.bs; - BDRVQcowState *s = bs->opaque; - int index_in_cluster; - const uint8_t *src_buf; - int n_end; - - acb->hd_aiocb = NULL; - - if (ret < 0) - goto done; - - if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) { - free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters); - goto done; - } - - acb->nb_sectors -= acb->n; - acb->sector_num += acb->n; - acb->buf += acb->n * 512; - - if (acb->nb_sectors == 0) { - /* request completed */ - ret = 0; - goto done; - } - - index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); - n_end = index_in_cluster + acb->nb_sectors; - if (s->crypt_method && - n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) - n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; - - acb->cluster_offset = alloc_cluster_offset(bs, acb->sector_num << 9, - index_in_cluster, - n_end, &acb->n, &acb->l2meta); - if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) { - ret = -EIO; - goto done; - } - if (s->crypt_method) { - if (!acb->cluster_data) { - acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * - s->cluster_size); - } - encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, - acb->n, 1, &s->aes_encrypt_key); - src_buf = acb->cluster_data; - } else { - src_buf = acb->buf; - } - acb->hd_iov.iov_base = (void *)src_buf; - acb->hd_iov.iov_len = acb->n * 512; - qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); - acb->hd_aiocb = bdrv_aio_writev(s->hd, - (acb->cluster_offset >> 9) + index_in_cluster, - &acb->hd_qiov, acb->n, - qcow_aio_write_cb, acb); - if (acb->hd_aiocb == NULL) - goto done; - - return; - -done: - if (acb->qiov->niov > 1) - qemu_vfree(acb->orig_buf); - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); -} - -static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - BDRVQcowState *s = bs->opaque; - QCowAIOCB *acb; - - s->cluster_cache_offset = -1; /* disable compressed cache */ - - acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); - if (!acb) - return NULL; - - qcow_aio_write_cb(acb, 0); - return &acb->common; -} - -static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) -{ - QCowAIOCB *acb = (QCowAIOCB *)blockacb; - if (acb->hd_aiocb) - bdrv_aio_cancel(acb->hd_aiocb); - qemu_aio_release(acb); -} - -static void qcow_close(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - qemu_free(s->l1_table); - qemu_free(s->l2_cache); - qemu_free(s->cluster_cache); - qemu_free(s->cluster_data); - refcount_close(bs); - bdrv_delete(s->hd); -} - -/* XXX: use std qcow open function ? */ -typedef struct QCowCreateState { - int cluster_size; - int cluster_bits; - uint16_t *refcount_block; - uint64_t *refcount_table; - int64_t l1_table_offset; - int64_t refcount_table_offset; - int64_t refcount_block_offset; -} QCowCreateState; - -static void create_refcount_update(QCowCreateState *s, - int64_t offset, int64_t size) -{ - int refcount; - int64_t start, last, cluster_offset; - uint16_t *p; - - start = offset & ~(s->cluster_size - 1); - last = (offset + size - 1) & ~(s->cluster_size - 1); - for(cluster_offset = start; cluster_offset <= last; - cluster_offset += s->cluster_size) { - p = &s->refcount_block[cluster_offset >> s->cluster_bits]; - refcount = be16_to_cpu(*p); - refcount++; - *p = cpu_to_be16(refcount); - } -} - -static int qcow_create2(const char *filename, int64_t total_size, - const char *backing_file, const char *backing_format, - int flags) -{ - - int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits; - int ref_clusters, backing_format_len = 0; - QCowHeader header; - uint64_t tmp, offset; - QCowCreateState s1, *s = &s1; - QCowExtension ext_bf = {0, 0}; - - - memset(s, 0, sizeof(*s)); - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) - return -1; - memset(&header, 0, sizeof(header)); - header.magic = cpu_to_be32(QCOW_MAGIC); - header.version = cpu_to_be32(QCOW_VERSION); - header.size = cpu_to_be64(total_size * 512); - header_size = sizeof(header); - backing_filename_len = 0; - if (backing_file) { - if (backing_format) { - ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT; - backing_format_len = strlen(backing_format); - ext_bf.len = (backing_format_len + 7) & ~7; - header_size += ((sizeof(ext_bf) + ext_bf.len + 7) & ~7); - } - header.backing_file_offset = cpu_to_be64(header_size); - backing_filename_len = strlen(backing_file); - header.backing_file_size = cpu_to_be32(backing_filename_len); - header_size += backing_filename_len; - } - s->cluster_bits = 12; /* 4 KB clusters */ - s->cluster_size = 1 << s->cluster_bits; - header.cluster_bits = cpu_to_be32(s->cluster_bits); - header_size = (header_size + 7) & ~7; - if (flags & BLOCK_FLAG_ENCRYPT) { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); - } else { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); - } - l2_bits = s->cluster_bits - 3; - shift = s->cluster_bits + l2_bits; - l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift); - offset = align_offset(header_size, s->cluster_size); - s->l1_table_offset = offset; - header.l1_table_offset = cpu_to_be64(s->l1_table_offset); - header.l1_size = cpu_to_be32(l1_size); - offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size); - - s->refcount_table = qemu_mallocz(s->cluster_size); - - s->refcount_table_offset = offset; - header.refcount_table_offset = cpu_to_be64(offset); - header.refcount_table_clusters = cpu_to_be32(1); - offset += s->cluster_size; - s->refcount_block_offset = offset; - - /* count how many refcount blocks needed */ - tmp = offset >> s->cluster_bits; - ref_clusters = (tmp >> (s->cluster_bits - REFCOUNT_SHIFT)) + 1; - for (i=0; i < ref_clusters; i++) { - s->refcount_table[i] = cpu_to_be64(offset); - offset += s->cluster_size; - } - - s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size); - - /* update refcounts */ - create_refcount_update(s, 0, header_size); - create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t)); - create_refcount_update(s, s->refcount_table_offset, s->cluster_size); - create_refcount_update(s, s->refcount_block_offset, ref_clusters * s->cluster_size); - - /* write all the data */ - write(fd, &header, sizeof(header)); - if (backing_file) { - if (backing_format_len) { - char zero[16]; - int d = ext_bf.len - backing_format_len; - - memset(zero, 0, sizeof(zero)); - cpu_to_be32s(&ext_bf.magic); - cpu_to_be32s(&ext_bf.len); - write(fd, &ext_bf, sizeof(ext_bf)); - write(fd, backing_format, backing_format_len); - if (d>0) { - write(fd, zero, d); - } - } - write(fd, backing_file, backing_filename_len); - } - lseek(fd, s->l1_table_offset, SEEK_SET); - tmp = 0; - for(i = 0;i < l1_size; i++) { - write(fd, &tmp, sizeof(tmp)); - } - lseek(fd, s->refcount_table_offset, SEEK_SET); - write(fd, s->refcount_table, s->cluster_size); - - lseek(fd, s->refcount_block_offset, SEEK_SET); - write(fd, s->refcount_block, ref_clusters * s->cluster_size); - - qemu_free(s->refcount_table); - qemu_free(s->refcount_block); - close(fd); - return 0; -} - -static int qcow_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - return qcow_create2(filename, total_size, backing_file, NULL, flags); -} - -static int qcow_make_empty(BlockDriverState *bs) -{ -#if 0 - /* XXX: not correct */ - BDRVQcowState *s = bs->opaque; - uint32_t l1_length = s->l1_size * sizeof(uint64_t); - int ret; - - memset(s->l1_table, 0, l1_length); - if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0) - return -1; - ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length); - if (ret < 0) - return ret; - - l2_cache_reset(bs); -#endif - return 0; -} - -/* XXX: put compressed sectors first, then all the cluster aligned - tables to avoid losing bytes in alignment */ -static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVQcowState *s = bs->opaque; - z_stream strm; - int ret, out_len; - uint8_t *out_buf; - uint64_t cluster_offset; - - if (nb_sectors == 0) { - /* align end of file to a sector boundary to ease reading with - sector based I/Os */ - cluster_offset = bdrv_getlength(s->hd); - cluster_offset = (cluster_offset + 511) & ~511; - bdrv_truncate(s->hd, cluster_offset); - return 0; - } - - if (nb_sectors != s->cluster_sectors) - return -EINVAL; - - out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); - - /* best compression, small window, no zlib header */ - memset(&strm, 0, sizeof(strm)); - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, - Z_DEFLATED, -12, - 9, Z_DEFAULT_STRATEGY); - if (ret != 0) { - qemu_free(out_buf); - return -1; - } - - strm.avail_in = s->cluster_size; - strm.next_in = (uint8_t *)buf; - strm.avail_out = s->cluster_size; - strm.next_out = out_buf; - - ret = deflate(&strm, Z_FINISH); - if (ret != Z_STREAM_END && ret != Z_OK) { - qemu_free(out_buf); - deflateEnd(&strm); - return -1; - } - out_len = strm.next_out - out_buf; - - deflateEnd(&strm); - - if (ret != Z_STREAM_END || out_len >= s->cluster_size) { - /* could not compress: write normal cluster */ - qcow_write(bs, sector_num, buf, s->cluster_sectors); - } else { - cluster_offset = alloc_compressed_cluster_offset(bs, sector_num << 9, - out_len); - if (!cluster_offset) - return -1; - cluster_offset &= s->cluster_offset_mask; - if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) { - qemu_free(out_buf); - return -1; - } - } - - qemu_free(out_buf); - return 0; -} - -static void qcow_flush(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - bdrv_flush(s->hd); -} - -static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) -{ - BDRVQcowState *s = bs->opaque; - bdi->cluster_size = s->cluster_size; - bdi->vm_state_offset = (int64_t)s->l1_vm_state_index << - (s->cluster_bits + s->l2_bits); - return 0; -} - -/*********************************************************/ -/* snapshot support */ - -/* update the refcounts of snapshots and the copied flag */ -static int update_snapshot_refcount(BlockDriverState *bs, - int64_t l1_table_offset, - int l1_size, - int addend) -{ - BDRVQcowState *s = bs->opaque; - uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; - int64_t old_offset, old_l2_offset; - int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount; - - l2_cache_reset(bs); - - l2_table = NULL; - l1_table = NULL; - l1_size2 = l1_size * sizeof(uint64_t); - l1_allocated = 0; - if (l1_table_offset != s->l1_table_offset) { - l1_table = qemu_malloc(l1_size2); - l1_allocated = 1; - if (bdrv_pread(s->hd, l1_table_offset, - l1_table, l1_size2) != l1_size2) - goto fail; - for(i = 0;i < l1_size; i++) - be64_to_cpus(&l1_table[i]); - } else { - assert(l1_size == s->l1_size); - l1_table = s->l1_table; - l1_allocated = 0; - } - - l2_size = s->l2_size * sizeof(uint64_t); - l2_table = qemu_malloc(l2_size); - l1_modified = 0; - for(i = 0; i < l1_size; i++) { - l2_offset = l1_table[i]; - if (l2_offset) { - old_l2_offset = l2_offset; - l2_offset &= ~QCOW_OFLAG_COPIED; - l2_modified = 0; - if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size) - goto fail; - for(j = 0; j < s->l2_size; j++) { - offset = be64_to_cpu(l2_table[j]); - if (offset != 0) { - old_offset = offset; - offset &= ~QCOW_OFLAG_COPIED; - if (offset & QCOW_OFLAG_COMPRESSED) { - nb_csectors = ((offset >> s->csize_shift) & - s->csize_mask) + 1; - if (addend != 0) - update_refcount(bs, (offset & s->cluster_offset_mask) & ~511, - nb_csectors * 512, addend); - /* compressed clusters are never modified */ - refcount = 2; - } else { - if (addend != 0) { - refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend); - } else { - refcount = get_refcount(bs, offset >> s->cluster_bits); - } - } - - if (refcount == 1) { - offset |= QCOW_OFLAG_COPIED; - } - if (offset != old_offset) { - l2_table[j] = cpu_to_be64(offset); - l2_modified = 1; - } - } - } - if (l2_modified) { - if (bdrv_pwrite(s->hd, - l2_offset, l2_table, l2_size) != l2_size) - goto fail; - } - - if (addend != 0) { - refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend); - } else { - refcount = get_refcount(bs, l2_offset >> s->cluster_bits); - } - if (refcount == 1) { - l2_offset |= QCOW_OFLAG_COPIED; - } - if (l2_offset != old_l2_offset) { - l1_table[i] = l2_offset; - l1_modified = 1; - } - } - } - if (l1_modified) { - for(i = 0; i < l1_size; i++) - cpu_to_be64s(&l1_table[i]); - if (bdrv_pwrite(s->hd, l1_table_offset, l1_table, - l1_size2) != l1_size2) - goto fail; - for(i = 0; i < l1_size; i++) - be64_to_cpus(&l1_table[i]); - } - if (l1_allocated) - qemu_free(l1_table); - qemu_free(l2_table); - return 0; - fail: - if (l1_allocated) - qemu_free(l1_table); - qemu_free(l2_table); - return -EIO; -} - -static void qcow_free_snapshots(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - int i; - - for(i = 0; i < s->nb_snapshots; i++) { - qemu_free(s->snapshots[i].name); - qemu_free(s->snapshots[i].id_str); - } - qemu_free(s->snapshots); - s->snapshots = NULL; - s->nb_snapshots = 0; -} - -static int qcow_read_snapshots(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - QCowSnapshotHeader h; - QCowSnapshot *sn; - int i, id_str_size, name_size; - int64_t offset; - uint32_t extra_data_size; - - if (!s->nb_snapshots) { - s->snapshots = NULL; - s->snapshots_size = 0; - return 0; - } - - offset = s->snapshots_offset; - s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot)); - for(i = 0; i < s->nb_snapshots; i++) { - offset = align_offset(offset, 8); - if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h)) - goto fail; - offset += sizeof(h); - sn = s->snapshots + i; - sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); - sn->l1_size = be32_to_cpu(h.l1_size); - sn->vm_state_size = be32_to_cpu(h.vm_state_size); - sn->date_sec = be32_to_cpu(h.date_sec); - sn->date_nsec = be32_to_cpu(h.date_nsec); - sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec); - extra_data_size = be32_to_cpu(h.extra_data_size); - - id_str_size = be16_to_cpu(h.id_str_size); - name_size = be16_to_cpu(h.name_size); - - offset += extra_data_size; - - sn->id_str = qemu_malloc(id_str_size + 1); - if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size) - goto fail; - offset += id_str_size; - sn->id_str[id_str_size] = '\0'; - - sn->name = qemu_malloc(name_size + 1); - if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size) - goto fail; - offset += name_size; - sn->name[name_size] = '\0'; - } - s->snapshots_size = offset - s->snapshots_offset; - return 0; - fail: - qcow_free_snapshots(bs); - return -1; -} - -/* add at the end of the file a new list of snapshots */ -static int qcow_write_snapshots(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - QCowSnapshot *sn; - QCowSnapshotHeader h; - int i, name_size, id_str_size, snapshots_size; - uint64_t data64; - uint32_t data32; - int64_t offset, snapshots_offset; - - /* compute the size of the snapshots */ - offset = 0; - for(i = 0; i < s->nb_snapshots; i++) { - sn = s->snapshots + i; - offset = align_offset(offset, 8); - offset += sizeof(h); - offset += strlen(sn->id_str); - offset += strlen(sn->name); - } - snapshots_size = offset; - - snapshots_offset = alloc_clusters(bs, snapshots_size); - offset = snapshots_offset; - - for(i = 0; i < s->nb_snapshots; i++) { - sn = s->snapshots + i; - memset(&h, 0, sizeof(h)); - h.l1_table_offset = cpu_to_be64(sn->l1_table_offset); - h.l1_size = cpu_to_be32(sn->l1_size); - h.vm_state_size = cpu_to_be32(sn->vm_state_size); - h.date_sec = cpu_to_be32(sn->date_sec); - h.date_nsec = cpu_to_be32(sn->date_nsec); - h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec); - - id_str_size = strlen(sn->id_str); - name_size = strlen(sn->name); - h.id_str_size = cpu_to_be16(id_str_size); - h.name_size = cpu_to_be16(name_size); - offset = align_offset(offset, 8); - if (bdrv_pwrite(s->hd, offset, &h, sizeof(h)) != sizeof(h)) - goto fail; - offset += sizeof(h); - if (bdrv_pwrite(s->hd, offset, sn->id_str, id_str_size) != id_str_size) - goto fail; - offset += id_str_size; - if (bdrv_pwrite(s->hd, offset, sn->name, name_size) != name_size) - goto fail; - offset += name_size; - } - - /* update the various header fields */ - data64 = cpu_to_be64(snapshots_offset); - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, snapshots_offset), - &data64, sizeof(data64)) != sizeof(data64)) - goto fail; - data32 = cpu_to_be32(s->nb_snapshots); - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, nb_snapshots), - &data32, sizeof(data32)) != sizeof(data32)) - goto fail; - - /* free the old snapshot table */ - free_clusters(bs, s->snapshots_offset, s->snapshots_size); - s->snapshots_offset = snapshots_offset; - s->snapshots_size = snapshots_size; - return 0; - fail: - return -1; -} - -static void find_new_snapshot_id(BlockDriverState *bs, - char *id_str, int id_str_size) -{ - BDRVQcowState *s = bs->opaque; - QCowSnapshot *sn; - int i, id, id_max = 0; - - for(i = 0; i < s->nb_snapshots; i++) { - sn = s->snapshots + i; - id = strtoul(sn->id_str, NULL, 10); - if (id > id_max) - id_max = id; - } - snprintf(id_str, id_str_size, "%d", id_max + 1); -} - -static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str) -{ - BDRVQcowState *s = bs->opaque; - int i; - - for(i = 0; i < s->nb_snapshots; i++) { - if (!strcmp(s->snapshots[i].id_str, id_str)) - return i; - } - return -1; -} - -static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name) -{ - BDRVQcowState *s = bs->opaque; - int i, ret; - - ret = find_snapshot_by_id(bs, name); - if (ret >= 0) - return ret; - for(i = 0; i < s->nb_snapshots; i++) { - if (!strcmp(s->snapshots[i].name, name)) - return i; - } - return -1; -} - -/* if no id is provided, a new one is constructed */ -static int qcow_snapshot_create(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info) -{ - BDRVQcowState *s = bs->opaque; - QCowSnapshot *snapshots1, sn1, *sn = &sn1; - int i, ret; - uint64_t *l1_table = NULL; - - memset(sn, 0, sizeof(*sn)); - - if (sn_info->id_str[0] == '\0') { - /* compute a new id */ - find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str)); - } - - /* check that the ID is unique */ - if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) - return -ENOENT; - - sn->id_str = qemu_strdup(sn_info->id_str); - if (!sn->id_str) - goto fail; - sn->name = qemu_strdup(sn_info->name); - if (!sn->name) - goto fail; - sn->vm_state_size = sn_info->vm_state_size; - sn->date_sec = sn_info->date_sec; - sn->date_nsec = sn_info->date_nsec; - sn->vm_clock_nsec = sn_info->vm_clock_nsec; - - ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); - if (ret < 0) - goto fail; - - /* create the L1 table of the snapshot */ - sn->l1_table_offset = alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); - sn->l1_size = s->l1_size; - - l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); - for(i = 0; i < s->l1_size; i++) { - l1_table[i] = cpu_to_be64(s->l1_table[i]); - } - if (bdrv_pwrite(s->hd, sn->l1_table_offset, - l1_table, s->l1_size * sizeof(uint64_t)) != - (s->l1_size * sizeof(uint64_t))) - goto fail; - qemu_free(l1_table); - l1_table = NULL; - - snapshots1 = qemu_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot)); - if (s->snapshots) { - memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot)); - qemu_free(s->snapshots); - } - s->snapshots = snapshots1; - s->snapshots[s->nb_snapshots++] = *sn; - - if (qcow_write_snapshots(bs) < 0) - goto fail; -#ifdef DEBUG_ALLOC - check_refcounts(bs); -#endif - return 0; - fail: - qemu_free(sn->name); - qemu_free(l1_table); - return -1; -} - -/* copy the snapshot 'snapshot_name' into the current disk image */ -static int qcow_snapshot_goto(BlockDriverState *bs, - const char *snapshot_id) -{ - BDRVQcowState *s = bs->opaque; - QCowSnapshot *sn; - int i, snapshot_index, l1_size2; - - snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); - if (snapshot_index < 0) - return -ENOENT; - sn = &s->snapshots[snapshot_index]; - - if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0) - goto fail; - - if (grow_l1_table(bs, sn->l1_size) < 0) - goto fail; - - s->l1_size = sn->l1_size; - l1_size2 = s->l1_size * sizeof(uint64_t); - /* copy the snapshot l1 table to the current l1 table */ - if (bdrv_pread(s->hd, sn->l1_table_offset, - s->l1_table, l1_size2) != l1_size2) - goto fail; - if (bdrv_pwrite(s->hd, s->l1_table_offset, - s->l1_table, l1_size2) != l1_size2) - goto fail; - for(i = 0;i < s->l1_size; i++) { - be64_to_cpus(&s->l1_table[i]); - } - - if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0) - goto fail; - -#ifdef DEBUG_ALLOC - check_refcounts(bs); -#endif - return 0; - fail: - return -EIO; -} - -static int qcow_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) -{ - BDRVQcowState *s = bs->opaque; - QCowSnapshot *sn; - int snapshot_index, ret; - - snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); - if (snapshot_index < 0) - return -ENOENT; - sn = &s->snapshots[snapshot_index]; - - ret = update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1); - if (ret < 0) - return ret; - /* must update the copied flag on the current cluster offsets */ - ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); - if (ret < 0) - return ret; - free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t)); - - qemu_free(sn->id_str); - qemu_free(sn->name); - memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn)); - s->nb_snapshots--; - ret = qcow_write_snapshots(bs); - if (ret < 0) { - /* XXX: restore snapshot if error ? */ - return ret; - } -#ifdef DEBUG_ALLOC - check_refcounts(bs); -#endif - return 0; -} - -static int qcow_snapshot_list(BlockDriverState *bs, - QEMUSnapshotInfo **psn_tab) -{ - BDRVQcowState *s = bs->opaque; - QEMUSnapshotInfo *sn_tab, *sn_info; - QCowSnapshot *sn; - int i; - - sn_tab = qemu_mallocz(s->nb_snapshots * sizeof(QEMUSnapshotInfo)); - for(i = 0; i < s->nb_snapshots; i++) { - sn_info = sn_tab + i; - sn = s->snapshots + i; - pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), - sn->id_str); - pstrcpy(sn_info->name, sizeof(sn_info->name), - sn->name); - sn_info->vm_state_size = sn->vm_state_size; - sn_info->date_sec = sn->date_sec; - sn_info->date_nsec = sn->date_nsec; - sn_info->vm_clock_nsec = sn->vm_clock_nsec; - } - *psn_tab = sn_tab; - return s->nb_snapshots; -} - -/*********************************************************/ -/* refcount handling */ - -static int refcount_init(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - int ret, refcount_table_size2, i; - - s->refcount_block_cache = qemu_malloc(s->cluster_size); - refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); - s->refcount_table = qemu_malloc(refcount_table_size2); - if (s->refcount_table_size > 0) { - ret = bdrv_pread(s->hd, s->refcount_table_offset, - s->refcount_table, refcount_table_size2); - if (ret != refcount_table_size2) - goto fail; - for(i = 0; i < s->refcount_table_size; i++) - be64_to_cpus(&s->refcount_table[i]); - } - return 0; - fail: - return -ENOMEM; -} - -static void refcount_close(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - qemu_free(s->refcount_block_cache); - qemu_free(s->refcount_table); -} - - -static int load_refcount_block(BlockDriverState *bs, - int64_t refcount_block_offset) -{ - BDRVQcowState *s = bs->opaque; - int ret; - ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache, - s->cluster_size); - if (ret != s->cluster_size) - return -EIO; - s->refcount_block_cache_offset = refcount_block_offset; - return 0; -} - -static int get_refcount(BlockDriverState *bs, int64_t cluster_index) -{ - BDRVQcowState *s = bs->opaque; - int refcount_table_index, block_index; - int64_t refcount_block_offset; - - refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); - if (refcount_table_index >= s->refcount_table_size) - return 0; - refcount_block_offset = s->refcount_table[refcount_table_index]; - if (!refcount_block_offset) - return 0; - if (refcount_block_offset != s->refcount_block_cache_offset) { - /* better than nothing: return allocated if read error */ - if (load_refcount_block(bs, refcount_block_offset) < 0) - return 1; - } - block_index = cluster_index & - ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); - return be16_to_cpu(s->refcount_block_cache[block_index]); -} - -/* return < 0 if error */ -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size) -{ - BDRVQcowState *s = bs->opaque; - int i, nb_clusters; - - nb_clusters = size_to_clusters(s, size); -retry: - for(i = 0; i < nb_clusters; i++) { - int64_t i = s->free_cluster_index++; - if (get_refcount(bs, i) != 0) - goto retry; - } -#ifdef DEBUG_ALLOC2 - printf("alloc_clusters: size=%lld -> %lld\n", - size, - (s->free_cluster_index - nb_clusters) << s->cluster_bits); -#endif - return (s->free_cluster_index - nb_clusters) << s->cluster_bits; -} - -static int64_t alloc_clusters(BlockDriverState *bs, int64_t size) -{ - int64_t offset; - - offset = alloc_clusters_noref(bs, size); - update_refcount(bs, offset, size, 1); - return offset; -} - -/* only used to allocate compressed sectors. We try to allocate - contiguous sectors. size must be <= cluster_size */ -static int64_t alloc_bytes(BlockDriverState *bs, int size) -{ - BDRVQcowState *s = bs->opaque; - int64_t offset, cluster_offset; - int free_in_cluster; - - assert(size > 0 && size <= s->cluster_size); - if (s->free_byte_offset == 0) { - s->free_byte_offset = alloc_clusters(bs, s->cluster_size); - } - redo: - free_in_cluster = s->cluster_size - - (s->free_byte_offset & (s->cluster_size - 1)); - if (size <= free_in_cluster) { - /* enough space in current cluster */ - offset = s->free_byte_offset; - s->free_byte_offset += size; - free_in_cluster -= size; - if (free_in_cluster == 0) - s->free_byte_offset = 0; - if ((offset & (s->cluster_size - 1)) != 0) - update_cluster_refcount(bs, offset >> s->cluster_bits, 1); - } else { - offset = alloc_clusters(bs, s->cluster_size); - cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1); - if ((cluster_offset + s->cluster_size) == offset) { - /* we are lucky: contiguous data */ - offset = s->free_byte_offset; - update_cluster_refcount(bs, offset >> s->cluster_bits, 1); - s->free_byte_offset += size; - } else { - s->free_byte_offset = offset; - goto redo; - } - } - return offset; -} - -static void free_clusters(BlockDriverState *bs, - int64_t offset, int64_t size) -{ - update_refcount(bs, offset, size, -1); -} - -static int grow_refcount_table(BlockDriverState *bs, int min_size) -{ - BDRVQcowState *s = bs->opaque; - int new_table_size, new_table_size2, refcount_table_clusters, i, ret; - uint64_t *new_table; - int64_t table_offset; - uint8_t data[12]; - int old_table_size; - int64_t old_table_offset; - - if (min_size <= s->refcount_table_size) - return 0; - /* compute new table size */ - refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); - for(;;) { - if (refcount_table_clusters == 0) { - refcount_table_clusters = 1; - } else { - refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2; - } - new_table_size = refcount_table_clusters << (s->cluster_bits - 3); - if (min_size <= new_table_size) - break; - } -#ifdef DEBUG_ALLOC2 - printf("grow_refcount_table from %d to %d\n", - s->refcount_table_size, - new_table_size); -#endif - new_table_size2 = new_table_size * sizeof(uint64_t); - new_table = qemu_mallocz(new_table_size2); - memcpy(new_table, s->refcount_table, - s->refcount_table_size * sizeof(uint64_t)); - for(i = 0; i < s->refcount_table_size; i++) - cpu_to_be64s(&new_table[i]); - /* Note: we cannot update the refcount now to avoid recursion */ - table_offset = alloc_clusters_noref(bs, new_table_size2); - ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2); - if (ret != new_table_size2) - goto fail; - for(i = 0; i < s->refcount_table_size; i++) - be64_to_cpus(&new_table[i]); - - cpu_to_be64w((uint64_t*)data, table_offset); - cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters); - if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset), - data, sizeof(data)) != sizeof(data)) - goto fail; - qemu_free(s->refcount_table); - old_table_offset = s->refcount_table_offset; - old_table_size = s->refcount_table_size; - s->refcount_table = new_table; - s->refcount_table_size = new_table_size; - s->refcount_table_offset = table_offset; - - update_refcount(bs, table_offset, new_table_size2, 1); - free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t)); - return 0; - fail: - free_clusters(bs, table_offset, new_table_size2); - qemu_free(new_table); - return -EIO; -} - -/* addend must be 1 or -1 */ -/* XXX: cache several refcount block clusters ? */ -static int update_cluster_refcount(BlockDriverState *bs, - int64_t cluster_index, - int addend) -{ - BDRVQcowState *s = bs->opaque; - int64_t offset, refcount_block_offset; - int ret, refcount_table_index, block_index, refcount; - uint64_t data64; - - refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); - if (refcount_table_index >= s->refcount_table_size) { - if (addend < 0) - return -EINVAL; - ret = grow_refcount_table(bs, refcount_table_index + 1); - if (ret < 0) - return ret; - } - refcount_block_offset = s->refcount_table[refcount_table_index]; - if (!refcount_block_offset) { - if (addend < 0) - return -EINVAL; - /* create a new refcount block */ - /* Note: we cannot update the refcount now to avoid recursion */ - offset = alloc_clusters_noref(bs, s->cluster_size); - memset(s->refcount_block_cache, 0, s->cluster_size); - ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size); - if (ret != s->cluster_size) - return -EINVAL; - s->refcount_table[refcount_table_index] = offset; - data64 = cpu_to_be64(offset); - ret = bdrv_pwrite(s->hd, s->refcount_table_offset + - refcount_table_index * sizeof(uint64_t), - &data64, sizeof(data64)); - if (ret != sizeof(data64)) - return -EINVAL; - - refcount_block_offset = offset; - s->refcount_block_cache_offset = offset; - update_refcount(bs, offset, s->cluster_size, 1); - } else { - if (refcount_block_offset != s->refcount_block_cache_offset) { - if (load_refcount_block(bs, refcount_block_offset) < 0) - return -EIO; - } - } - /* we can update the count and save it */ - block_index = cluster_index & - ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); - refcount = be16_to_cpu(s->refcount_block_cache[block_index]); - refcount += addend; - if (refcount < 0 || refcount > 0xffff) - return -EINVAL; - if (refcount == 0 && cluster_index < s->free_cluster_index) { - s->free_cluster_index = cluster_index; - } - s->refcount_block_cache[block_index] = cpu_to_be16(refcount); - if (bdrv_pwrite(s->hd, - refcount_block_offset + (block_index << REFCOUNT_SHIFT), - &s->refcount_block_cache[block_index], 2) != 2) - return -EIO; - return refcount; -} - -static void update_refcount(BlockDriverState *bs, - int64_t offset, int64_t length, - int addend) -{ - BDRVQcowState *s = bs->opaque; - int64_t start, last, cluster_offset; - -#ifdef DEBUG_ALLOC2 - printf("update_refcount: offset=%lld size=%lld addend=%d\n", - offset, length, addend); -#endif - if (length <= 0) - return; - start = offset & ~(s->cluster_size - 1); - last = (offset + length - 1) & ~(s->cluster_size - 1); - for(cluster_offset = start; cluster_offset <= last; - cluster_offset += s->cluster_size) { - update_cluster_refcount(bs, cluster_offset >> s->cluster_bits, addend); - } -} - -/* - * Increases the refcount for a range of clusters in a given refcount table. - * This is used to construct a temporary refcount table out of L1 and L2 tables - * which can be compared the the refcount table saved in the image. - * - * Returns the number of errors in the image that were found - */ -static int inc_refcounts(BlockDriverState *bs, - uint16_t *refcount_table, - int refcount_table_size, - int64_t offset, int64_t size) -{ - BDRVQcowState *s = bs->opaque; - int64_t start, last, cluster_offset; - int k; - int errors = 0; - - if (size <= 0) - return 0; - - start = offset & ~(s->cluster_size - 1); - last = (offset + size - 1) & ~(s->cluster_size - 1); - for(cluster_offset = start; cluster_offset <= last; - cluster_offset += s->cluster_size) { - k = cluster_offset >> s->cluster_bits; - if (k < 0 || k >= refcount_table_size) { - fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n", - cluster_offset); - errors++; - } else { - if (++refcount_table[k] == 0) { - fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 - "\n", cluster_offset); - errors++; - } - } - } - - return errors; -} - -/* - * Increases the refcount in the given refcount table for the all clusters - * referenced in the L2 table. While doing so, performs some checks on L2 - * entries. - * - * Returns the number of errors found by the checks or -errno if an internal - * error occurred. - */ -static int check_refcounts_l2(BlockDriverState *bs, - uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset, - int check_copied) -{ - BDRVQcowState *s = bs->opaque; - uint64_t *l2_table, offset; - int i, l2_size, nb_csectors, refcount; - int errors = 0; - - /* Read L2 table from disk */ - l2_size = s->l2_size * sizeof(uint64_t); - l2_table = qemu_malloc(l2_size); - - if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size) - goto fail; - - /* Do the actual checks */ - for(i = 0; i < s->l2_size; i++) { - offset = be64_to_cpu(l2_table[i]); - if (offset != 0) { - if (offset & QCOW_OFLAG_COMPRESSED) { - /* Compressed clusters don't have QCOW_OFLAG_COPIED */ - if (offset & QCOW_OFLAG_COPIED) { - fprintf(stderr, "ERROR: cluster %" PRId64 ": " - "copied flag must never be set for compressed " - "clusters\n", offset >> s->cluster_bits); - offset &= ~QCOW_OFLAG_COPIED; - errors++; - } - - /* Mark cluster as used */ - nb_csectors = ((offset >> s->csize_shift) & - s->csize_mask) + 1; - offset &= s->cluster_offset_mask; - errors += inc_refcounts(bs, refcount_table, - refcount_table_size, - offset & ~511, nb_csectors * 512); - } else { - /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ - if (check_copied) { - uint64_t entry = offset; - offset &= ~QCOW_OFLAG_COPIED; - refcount = get_refcount(bs, offset >> s->cluster_bits); - if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) { - fprintf(stderr, "ERROR OFLAG_COPIED: offset=%" - PRIx64 " refcount=%d\n", entry, refcount); - errors++; - } - } - - /* Mark cluster as used */ - offset &= ~QCOW_OFLAG_COPIED; - errors += inc_refcounts(bs, refcount_table, - refcount_table_size, - offset, s->cluster_size); - - /* Correct offsets are cluster aligned */ - if (offset & (s->cluster_size - 1)) { - fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not " - "properly aligned; L2 entry corrupted.\n", offset); - errors++; - } - } - } - } - - qemu_free(l2_table); - return errors; - -fail: - fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); - qemu_free(l2_table); - return -EIO; -} - -/* - * Increases the refcount for the L1 table, its L2 tables and all referenced - * clusters in the given refcount table. While doing so, performs some checks - * on L1 and L2 entries. - * - * Returns the number of errors found by the checks or -errno if an internal - * error occurred. - */ -static int check_refcounts_l1(BlockDriverState *bs, - uint16_t *refcount_table, - int refcount_table_size, - int64_t l1_table_offset, int l1_size, - int check_copied) -{ - BDRVQcowState *s = bs->opaque; - uint64_t *l1_table, l2_offset, l1_size2; - int i, refcount, ret; - int errors = 0; - - l1_size2 = l1_size * sizeof(uint64_t); - - /* Mark L1 table as used */ - errors += inc_refcounts(bs, refcount_table, refcount_table_size, - l1_table_offset, l1_size2); - - /* Read L1 table entries from disk */ - l1_table = qemu_malloc(l1_size2); - if (bdrv_pread(s->hd, l1_table_offset, - l1_table, l1_size2) != l1_size2) - goto fail; - for(i = 0;i < l1_size; i++) - be64_to_cpus(&l1_table[i]); - - /* Do the actual checks */ - for(i = 0; i < l1_size; i++) { - l2_offset = l1_table[i]; - if (l2_offset) { - /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ - if (check_copied) { - refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) - >> s->cluster_bits); - if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) { - fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64 - " refcount=%d\n", l2_offset, refcount); - errors++; - } - } - - /* Mark L2 table as used */ - l2_offset &= ~QCOW_OFLAG_COPIED; - errors += inc_refcounts(bs, refcount_table, - refcount_table_size, - l2_offset, - s->cluster_size); - - /* L2 tables are cluster aligned */ - if (l2_offset & (s->cluster_size - 1)) { - fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " - "cluster aligned; L1 entry corrupted\n", l2_offset); - errors++; - } - - /* Process and check L2 entries */ - ret = check_refcounts_l2(bs, refcount_table, refcount_table_size, - l2_offset, check_copied); - if (ret < 0) { - goto fail; - } - errors += ret; - } - } - qemu_free(l1_table); - return errors; - -fail: - fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); - qemu_free(l1_table); - return -EIO; -} - -/* - * Checks an image for refcount consistency. - * - * Returns 0 if no errors are found, the number of errors in case the image is - * detected as corrupted, and -errno when an internal error occured. - */ -static int check_refcounts(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - int64_t size; - int nb_clusters, refcount1, refcount2, i; - QCowSnapshot *sn; - uint16_t *refcount_table; - int ret, errors = 0; - - size = bdrv_getlength(s->hd); - nb_clusters = size_to_clusters(s, size); - refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t)); - - /* header */ - errors += inc_refcounts(bs, refcount_table, nb_clusters, - 0, s->cluster_size); - - /* current L1 table */ - ret = check_refcounts_l1(bs, refcount_table, nb_clusters, - s->l1_table_offset, s->l1_size, 1); - if (ret < 0) { - return ret; - } - errors += ret; - - /* snapshots */ - for(i = 0; i < s->nb_snapshots; i++) { - sn = s->snapshots + i; - check_refcounts_l1(bs, refcount_table, nb_clusters, - sn->l1_table_offset, sn->l1_size, 0); - } - errors += inc_refcounts(bs, refcount_table, nb_clusters, - s->snapshots_offset, s->snapshots_size); - - /* refcount data */ - errors += inc_refcounts(bs, refcount_table, nb_clusters, - s->refcount_table_offset, - s->refcount_table_size * sizeof(uint64_t)); - for(i = 0; i < s->refcount_table_size; i++) { - int64_t offset; - offset = s->refcount_table[i]; - if (offset != 0) { - errors += inc_refcounts(bs, refcount_table, nb_clusters, - offset, s->cluster_size); - } - } - - /* compare ref counts */ - for(i = 0; i < nb_clusters; i++) { - refcount1 = get_refcount(bs, i); - refcount2 = refcount_table[i]; - if (refcount1 != refcount2) { - fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n", - i, refcount1, refcount2); - errors++; - } - } - - qemu_free(refcount_table); - - return errors; -} - -static int qcow_check(BlockDriverState *bs) -{ - return check_refcounts(bs); -} - -#if 0 -static void dump_refcounts(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - int64_t nb_clusters, k, k1, size; - int refcount; - - size = bdrv_getlength(s->hd); - nb_clusters = size_to_clusters(s, size); - for(k = 0; k < nb_clusters;) { - k1 = k; - refcount = get_refcount(bs, k); - k++; - while (k < nb_clusters && get_refcount(bs, k) == refcount) - k++; - printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1); - } -} -#endif - -static int qcow_put_buffer(BlockDriverState *bs, const uint8_t *buf, - int64_t pos, int size) -{ - int growable = bs->growable; - - bs->growable = 1; - bdrv_pwrite(bs, pos, buf, size); - bs->growable = growable; - - return size; -} - -static int qcow_get_buffer(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size) -{ - int growable = bs->growable; - int ret; - - bs->growable = 1; - ret = bdrv_pread(bs, pos, buf, size); - bs->growable = growable; - - return ret; -} - -static BlockDriver bdrv_qcow2 = { - .format_name = "qcow2", - .instance_size = sizeof(BDRVQcowState), - .bdrv_probe = qcow_probe, - .bdrv_open = qcow_open, - .bdrv_close = qcow_close, - .bdrv_create = qcow_create, - .bdrv_flush = qcow_flush, - .bdrv_is_allocated = qcow_is_allocated, - .bdrv_set_key = qcow_set_key, - .bdrv_make_empty = qcow_make_empty, - - .bdrv_aio_readv = qcow_aio_readv, - .bdrv_aio_writev = qcow_aio_writev, - .bdrv_aio_cancel = qcow_aio_cancel, - .aiocb_size = sizeof(QCowAIOCB), - .bdrv_write_compressed = qcow_write_compressed, - - .bdrv_snapshot_create = qcow_snapshot_create, - .bdrv_snapshot_goto = qcow_snapshot_goto, - .bdrv_snapshot_delete = qcow_snapshot_delete, - .bdrv_snapshot_list = qcow_snapshot_list, - .bdrv_get_info = qcow_get_info, - - .bdrv_put_buffer = qcow_put_buffer, - .bdrv_get_buffer = qcow_get_buffer, - - .bdrv_create2 = qcow_create2, - .bdrv_check = qcow_check, -}; - -static void bdrv_qcow2_init(void) -{ - bdrv_register(&bdrv_qcow2); -} - -block_init(bdrv_qcow2_init); diff --git a/block-raw-posix.c b/block-raw-posix.c deleted file mode 100644 index f3a9476b4..000000000 --- a/block-raw-posix.c +++ /dev/null @@ -1,1438 +0,0 @@ -/* - * Block driver for RAW files (posix) - * - * Copyright (c) 2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "qemu-timer.h" -#include "qemu-char.h" -#include "block_int.h" -#include "module.h" -#ifdef CONFIG_AIO -#include "posix-aio-compat.h" -#endif - -#ifdef CONFIG_COCOA -#include -#include -#include -#include -#include -#include -#include -//#include -#include -#endif - -#ifdef __sun__ -#define _POSIX_PTHREAD_SEMANTICS 1 -#include -#include -#endif -#ifdef __linux__ -#include -#include -#include -#endif -#ifdef __FreeBSD__ -#include -#include -#include -#endif - -#ifdef __OpenBSD__ -#include -#include -#include -#endif - -#ifdef __DragonFly__ -#include -#include -#endif - -//#define DEBUG_FLOPPY - -//#define DEBUG_BLOCK -#if defined(DEBUG_BLOCK) -#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \ - { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0) -#else -#define DEBUG_BLOCK_PRINT(formatCstr, ...) -#endif - -/* OS X does not have O_DSYNC */ -#ifndef O_DSYNC -#define O_DSYNC O_SYNC -#endif - -/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */ -#ifndef O_DIRECT -#define O_DIRECT O_DSYNC -#endif - -#define FTYPE_FILE 0 -#define FTYPE_CD 1 -#define FTYPE_FD 2 - -#define ALIGNED_BUFFER_SIZE (32 * 512) - -/* if the FD is not accessed during that time (in ms), we try to - reopen it to see if the disk has been changed */ -#define FD_OPEN_TIMEOUT 1000 - -typedef struct BDRVRawState { - int fd; - int type; - unsigned int lseek_err_cnt; -#if defined(__linux__) - /* linux floppy specific */ - int fd_open_flags; - int64_t fd_open_time; - int64_t fd_error_time; - int fd_got_error; - int fd_media_changed; -#endif -#if defined(__FreeBSD__) - int cd_open_flags; -#endif - uint8_t* aligned_buf; -} BDRVRawState; - -static int posix_aio_init(void); - -static int fd_open(BlockDriverState *bs); - -#if defined(__FreeBSD__) -static int cd_open(BlockDriverState *bs); -#endif - -static int raw_is_inserted(BlockDriverState *bs); - -static int raw_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVRawState *s = bs->opaque; - int fd, open_flags, ret; - - posix_aio_init(); - - s->lseek_err_cnt = 0; - - open_flags = O_BINARY; - if ((flags & BDRV_O_ACCESS) == O_RDWR) { - open_flags |= O_RDWR; - } else { - open_flags |= O_RDONLY; - bs->read_only = 1; - } - if (flags & BDRV_O_CREAT) - open_flags |= O_CREAT | O_TRUNC; - - /* Use O_DSYNC for write-through caching, no flags for write-back caching, - * and O_DIRECT for no caching. */ - if ((flags & BDRV_O_NOCACHE)) - open_flags |= O_DIRECT; - else if (!(flags & BDRV_O_CACHE_WB)) - open_flags |= O_DSYNC; - - s->type = FTYPE_FILE; - - fd = open(filename, open_flags, 0644); - if (fd < 0) { - ret = -errno; - if (ret == -EROFS) - ret = -EACCES; - return ret; - } - s->fd = fd; - s->aligned_buf = NULL; - if ((flags & BDRV_O_NOCACHE)) { - s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); - if (s->aligned_buf == NULL) { - ret = -errno; - close(fd); - return ret; - } - } - return 0; -} - -/* XXX: use host sector size if necessary with: -#ifdef DIOCGSECTORSIZE - { - unsigned int sectorsize = 512; - if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) && - sectorsize > bufsize) - bufsize = sectorsize; - } -#endif -#ifdef CONFIG_COCOA - u_int32_t blockSize = 512; - if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) { - bufsize = blockSize; - } -#endif -*/ - -/* - * offset and count are in bytes, but must be multiples of 512 for files - * opened with O_DIRECT. buf must be aligned to 512 bytes then. - * - * This function may be called without alignment if the caller ensures - * that O_DIRECT is not in effect. - */ -static int raw_pread_aligned(BlockDriverState *bs, int64_t offset, - uint8_t *buf, int count) -{ - BDRVRawState *s = bs->opaque; - int ret; - - ret = fd_open(bs); - if (ret < 0) - return ret; - - if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) { - ++(s->lseek_err_cnt); - if(s->lseek_err_cnt <= 10) { - DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 - "] lseek failed : %d = %s\n", - s->fd, bs->filename, offset, buf, count, - bs->total_sectors, errno, strerror(errno)); - } - return -1; - } - s->lseek_err_cnt=0; - - ret = read(s->fd, buf, count); - if (ret == count) - goto label__raw_read__success; - - DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 - "] read failed %d : %d = %s\n", - s->fd, bs->filename, offset, buf, count, - bs->total_sectors, ret, errno, strerror(errno)); - - /* Try harder for CDrom. */ - if (bs->type == BDRV_TYPE_CDROM) { - lseek(s->fd, offset, SEEK_SET); - ret = read(s->fd, buf, count); - if (ret == count) - goto label__raw_read__success; - lseek(s->fd, offset, SEEK_SET); - ret = read(s->fd, buf, count); - if (ret == count) - goto label__raw_read__success; - - DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 - "] retry read failed %d : %d = %s\n", - s->fd, bs->filename, offset, buf, count, - bs->total_sectors, ret, errno, strerror(errno)); - } - -label__raw_read__success: - - return ret; -} - -/* - * offset and count are in bytes, but must be multiples of 512 for files - * opened with O_DIRECT. buf must be aligned to 512 bytes then. - * - * This function may be called without alignment if the caller ensures - * that O_DIRECT is not in effect. - */ -static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset, - const uint8_t *buf, int count) -{ - BDRVRawState *s = bs->opaque; - int ret; - - ret = fd_open(bs); - if (ret < 0) - return -errno; - - if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) { - ++(s->lseek_err_cnt); - if(s->lseek_err_cnt) { - DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" - PRId64 "] lseek failed : %d = %s\n", - s->fd, bs->filename, offset, buf, count, - bs->total_sectors, errno, strerror(errno)); - } - return -EIO; - } - s->lseek_err_cnt = 0; - - ret = write(s->fd, buf, count); - if (ret == count) - goto label__raw_write__success; - - DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 - "] write failed %d : %d = %s\n", - s->fd, bs->filename, offset, buf, count, - bs->total_sectors, ret, errno, strerror(errno)); - -label__raw_write__success: - - return (ret < 0) ? -errno : ret; -} - - -/* - * offset and count are in bytes and possibly not aligned. For files opened - * with O_DIRECT, necessary alignments are ensured before calling - * raw_pread_aligned to do the actual read. - */ -static int raw_pread(BlockDriverState *bs, int64_t offset, - uint8_t *buf, int count) -{ - BDRVRawState *s = bs->opaque; - int size, ret, shift, sum; - - sum = 0; - - if (s->aligned_buf != NULL) { - - if (offset & 0x1ff) { - /* align offset on a 512 bytes boundary */ - - shift = offset & 0x1ff; - size = (shift + count + 0x1ff) & ~0x1ff; - if (size > ALIGNED_BUFFER_SIZE) - size = ALIGNED_BUFFER_SIZE; - ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size); - if (ret < 0) - return ret; - - size = 512 - shift; - if (size > count) - size = count; - memcpy(buf, s->aligned_buf + shift, size); - - buf += size; - offset += size; - count -= size; - sum += size; - - if (count == 0) - return sum; - } - if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { - - /* read on aligned buffer */ - - while (count) { - - size = (count + 0x1ff) & ~0x1ff; - if (size > ALIGNED_BUFFER_SIZE) - size = ALIGNED_BUFFER_SIZE; - - ret = raw_pread_aligned(bs, offset, s->aligned_buf, size); - if (ret < 0) - return ret; - - size = ret; - if (size > count) - size = count; - - memcpy(buf, s->aligned_buf, size); - - buf += size; - offset += size; - count -= size; - sum += size; - } - - return sum; - } - } - - return raw_pread_aligned(bs, offset, buf, count) + sum; -} - -static int raw_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - int ret; - - ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512); - if (ret == (nb_sectors * 512)) - ret = 0; - return ret; -} - -/* - * offset and count are in bytes and possibly not aligned. For files opened - * with O_DIRECT, necessary alignments are ensured before calling - * raw_pwrite_aligned to do the actual write. - */ -static int raw_pwrite(BlockDriverState *bs, int64_t offset, - const uint8_t *buf, int count) -{ - BDRVRawState *s = bs->opaque; - int size, ret, shift, sum; - - sum = 0; - - if (s->aligned_buf != NULL) { - - if (offset & 0x1ff) { - /* align offset on a 512 bytes boundary */ - shift = offset & 0x1ff; - ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512); - if (ret < 0) - return ret; - - size = 512 - shift; - if (size > count) - size = count; - memcpy(s->aligned_buf + shift, buf, size); - - ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512); - if (ret < 0) - return ret; - - buf += size; - offset += size; - count -= size; - sum += size; - - if (count == 0) - return sum; - } - if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { - - while ((size = (count & ~0x1ff)) != 0) { - - if (size > ALIGNED_BUFFER_SIZE) - size = ALIGNED_BUFFER_SIZE; - - memcpy(s->aligned_buf, buf, size); - - ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size); - if (ret < 0) - return ret; - - buf += ret; - offset += ret; - count -= ret; - sum += ret; - } - /* here, count < 512 because (count & ~0x1ff) == 0 */ - if (count) { - ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512); - if (ret < 0) - return ret; - memcpy(s->aligned_buf, buf, count); - - ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512); - if (ret < 0) - return ret; - if (count < ret) - ret = count; - - sum += ret; - } - return sum; - } - } - return raw_pwrite_aligned(bs, offset, buf, count) + sum; -} - -static int raw_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - int ret; - ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512); - if (ret == (nb_sectors * 512)) - ret = 0; - return ret; -} - -#ifdef CONFIG_AIO -/***********************************************************/ -/* Unix AIO using POSIX AIO */ - -typedef struct RawAIOCB { - BlockDriverAIOCB common; - struct qemu_paiocb aiocb; - struct RawAIOCB *next; - int ret; -} RawAIOCB; - -typedef struct PosixAioState -{ - int rfd, wfd; - RawAIOCB *first_aio; -} PosixAioState; - -static void posix_aio_read(void *opaque) -{ - PosixAioState *s = opaque; - RawAIOCB *acb, **pacb; - int ret; - ssize_t len; - - /* read all bytes from signal pipe */ - for (;;) { - char bytes[16]; - - len = read(s->rfd, bytes, sizeof(bytes)); - if (len == -1 && errno == EINTR) - continue; /* try again */ - if (len == sizeof(bytes)) - continue; /* more to read */ - break; - } - - for(;;) { - pacb = &s->first_aio; - for(;;) { - acb = *pacb; - if (!acb) - goto the_end; - ret = qemu_paio_error(&acb->aiocb); - if (ret == ECANCELED) { - /* remove the request */ - *pacb = acb->next; - qemu_aio_release(acb); - } else if (ret != EINPROGRESS) { - /* end of aio */ - if (ret == 0) { - ret = qemu_paio_return(&acb->aiocb); - if (ret == acb->aiocb.aio_nbytes) - ret = 0; - else - ret = -EINVAL; - } else { - ret = -ret; - } - /* remove the request */ - *pacb = acb->next; - /* call the callback */ - acb->common.cb(acb->common.opaque, ret); - qemu_aio_release(acb); - break; - } else { - pacb = &acb->next; - } - } - } - the_end: ; -} - -static int posix_aio_flush(void *opaque) -{ - PosixAioState *s = opaque; - return !!s->first_aio; -} - -static PosixAioState *posix_aio_state; - -static void aio_signal_handler(int signum) -{ - if (posix_aio_state) { - char byte = 0; - - write(posix_aio_state->wfd, &byte, sizeof(byte)); - } - - qemu_service_io(); -} - -static int posix_aio_init(void) -{ - struct sigaction act; - PosixAioState *s; - int fds[2]; - struct qemu_paioinit ai; - - if (posix_aio_state) - return 0; - - s = qemu_malloc(sizeof(PosixAioState)); - - sigfillset(&act.sa_mask); - act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ - act.sa_handler = aio_signal_handler; - sigaction(SIGUSR2, &act, NULL); - - s->first_aio = NULL; - if (pipe(fds) == -1) { - fprintf(stderr, "failed to create pipe\n"); - return -errno; - } - - s->rfd = fds[0]; - s->wfd = fds[1]; - - fcntl(s->rfd, F_SETFL, O_NONBLOCK); - fcntl(s->wfd, F_SETFL, O_NONBLOCK); - - qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s); - - memset(&ai, 0, sizeof(ai)); - ai.aio_threads = 64; - ai.aio_num = 64; - qemu_paio_init(&ai); - - posix_aio_state = s; - - return 0; -} - -static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - BDRVRawState *s = bs->opaque; - RawAIOCB *acb; - - if (fd_open(bs) < 0) - return NULL; - - acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->aiocb.aio_fildes = s->fd; - acb->aiocb.ev_signo = SIGUSR2; - acb->aiocb.aio_iov = qiov->iov; - acb->aiocb.aio_niov = qiov->niov; - acb->aiocb.aio_nbytes = nb_sectors * 512; - acb->aiocb.aio_offset = sector_num * 512; - acb->aiocb.aio_flags = 0; - - /* - * If O_DIRECT is used the buffer needs to be aligned on a sector - * boundary. Tell the low level code to ensure that in case it's - * not done yet. - */ - if (s->aligned_buf) - acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED; - - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - return acb; -} - -static void raw_aio_remove(RawAIOCB *acb) -{ - RawAIOCB **pacb; - - /* remove the callback from the queue */ - pacb = &posix_aio_state->first_aio; - for(;;) { - if (*pacb == NULL) { - fprintf(stderr, "raw_aio_remove: aio request not found!\n"); - break; - } else if (*pacb == acb) { - *pacb = acb->next; - qemu_aio_release(acb); - break; - } - pacb = &(*pacb)->next; - } -} - -static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - RawAIOCB *acb; - - acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); - if (!acb) - return NULL; - if (qemu_paio_read(&acb->aiocb) < 0) { - raw_aio_remove(acb); - return NULL; - } - return &acb->common; -} - -static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) -{ - RawAIOCB *acb; - - acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); - if (!acb) - return NULL; - if (qemu_paio_write(&acb->aiocb) < 0) { - raw_aio_remove(acb); - return NULL; - } - return &acb->common; -} - -static void raw_aio_cancel(BlockDriverAIOCB *blockacb) -{ - int ret; - RawAIOCB *acb = (RawAIOCB *)blockacb; - - ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); - if (ret == QEMU_PAIO_NOTCANCELED) { - /* fail safe: if the aio could not be canceled, we wait for - it */ - while (qemu_paio_error(&acb->aiocb) == EINPROGRESS); - } - - raw_aio_remove(acb); -} -#else /* CONFIG_AIO */ -static int posix_aio_init(void) -{ - return 0; -} -#endif /* CONFIG_AIO */ - - -static void raw_close(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - if (s->fd >= 0) { - close(s->fd); - s->fd = -1; - if (s->aligned_buf != NULL) - qemu_free(s->aligned_buf); - } -} - -static int raw_truncate(BlockDriverState *bs, int64_t offset) -{ - BDRVRawState *s = bs->opaque; - if (s->type != FTYPE_FILE) - return -ENOTSUP; - if (ftruncate(s->fd, offset) < 0) - return -errno; - return 0; -} - -#ifdef __OpenBSD__ -static int64_t raw_getlength(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - int fd = s->fd; - struct stat st; - - if (fstat(fd, &st)) - return -1; - if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { - struct disklabel dl; - - if (ioctl(fd, DIOCGDINFO, &dl)) - return -1; - return (uint64_t)dl.d_secsize * - dl.d_partitions[DISKPART(st.st_rdev)].p_size; - } else - return st.st_size; -} -#else /* !__OpenBSD__ */ -static int64_t raw_getlength(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - int fd = s->fd; - int64_t size; -#ifdef HOST_BSD - struct stat sb; -#ifdef __FreeBSD__ - int reopened = 0; -#endif -#endif -#ifdef __sun__ - struct dk_minfo minfo; - int rv; -#endif - int ret; - - ret = fd_open(bs); - if (ret < 0) - return ret; - -#ifdef HOST_BSD -#ifdef __FreeBSD__ -again: -#endif - if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) { -#ifdef DIOCGMEDIASIZE - if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) -#elif defined(DIOCGPART) - { - struct partinfo pi; - if (ioctl(fd, DIOCGPART, &pi) == 0) - size = pi.media_size; - else - size = 0; - } - if (size == 0) -#endif -#ifdef CONFIG_COCOA - size = LONG_LONG_MAX; -#else - size = lseek(fd, 0LL, SEEK_END); -#endif -#ifdef __FreeBSD__ - switch(s->type) { - case FTYPE_CD: - /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */ - if (size == 2048LL * (unsigned)-1) - size = 0; - /* XXX no disc? maybe we need to reopen... */ - if (size <= 0 && !reopened && cd_open(bs) >= 0) { - reopened = 1; - goto again; - } - } -#endif - } else -#endif -#ifdef __sun__ - /* - * use the DKIOCGMEDIAINFO ioctl to read the size. - */ - rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo ); - if ( rv != -1 ) { - size = minfo.dki_lbsize * minfo.dki_capacity; - } else /* there are reports that lseek on some devices - fails, but irc discussion said that contingency - on contingency was overkill */ -#endif - { - size = lseek(fd, 0, SEEK_END); - } - return size; -} -#endif - -static int raw_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - int fd; - - if (flags || backing_file) - return -ENOTSUP; - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, - 0644); - if (fd < 0) - return -EIO; - ftruncate(fd, total_size * 512); - close(fd); - return 0; -} - -static void raw_flush(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - fsync(s->fd); -} - -static BlockDriver bdrv_raw = { - .format_name = "raw", - .instance_size = sizeof(BDRVRawState), - .bdrv_probe = NULL, /* no probe for protocols */ - .bdrv_open = raw_open, - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_close = raw_close, - .bdrv_create = raw_create, - .bdrv_flush = raw_flush, - -#ifdef CONFIG_AIO - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, - .bdrv_aio_cancel = raw_aio_cancel, - .aiocb_size = sizeof(RawAIOCB), -#endif - - .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, -}; - -/***********************************************/ -/* host device */ - -#ifdef CONFIG_COCOA -static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ); -static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ); - -kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ) -{ - kern_return_t kernResult; - mach_port_t masterPort; - CFMutableDictionaryRef classesToMatch; - - kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort ); - if ( KERN_SUCCESS != kernResult ) { - printf( "IOMasterPort returned %d\n", kernResult ); - } - - classesToMatch = IOServiceMatching( kIOCDMediaClass ); - if ( classesToMatch == NULL ) { - printf( "IOServiceMatching returned a NULL dictionary.\n" ); - } else { - CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue ); - } - kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator ); - if ( KERN_SUCCESS != kernResult ) - { - printf( "IOServiceGetMatchingServices returned %d\n", kernResult ); - } - - return kernResult; -} - -kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ) -{ - io_object_t nextMedia; - kern_return_t kernResult = KERN_FAILURE; - *bsdPath = '\0'; - nextMedia = IOIteratorNext( mediaIterator ); - if ( nextMedia ) - { - CFTypeRef bsdPathAsCFString; - bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 ); - if ( bsdPathAsCFString ) { - size_t devPathLength; - strcpy( bsdPath, _PATH_DEV ); - strcat( bsdPath, "r" ); - devPathLength = strlen( bsdPath ); - if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) { - kernResult = KERN_SUCCESS; - } - CFRelease( bsdPathAsCFString ); - } - IOObjectRelease( nextMedia ); - } - - return kernResult; -} - -#endif - -static int hdev_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVRawState *s = bs->opaque; - int fd, open_flags, ret; - - posix_aio_init(); - -#ifdef CONFIG_COCOA - if (strstart(filename, "/dev/cdrom", NULL)) { - kern_return_t kernResult; - io_iterator_t mediaIterator; - char bsdPath[ MAXPATHLEN ]; - int fd; - - kernResult = FindEjectableCDMedia( &mediaIterator ); - kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) ); - - if ( bsdPath[ 0 ] != '\0' ) { - strcat(bsdPath,"s0"); - /* some CDs don't have a partition 0 */ - fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE); - if (fd < 0) { - bsdPath[strlen(bsdPath)-1] = '1'; - } else { - close(fd); - } - filename = bsdPath; - } - - if ( mediaIterator ) - IOObjectRelease( mediaIterator ); - } -#endif - open_flags = O_BINARY; - if ((flags & BDRV_O_ACCESS) == O_RDWR) { - open_flags |= O_RDWR; - } else { - open_flags |= O_RDONLY; - bs->read_only = 1; - } - /* Use O_DSYNC for write-through caching, no flags for write-back caching, - * and O_DIRECT for no caching. */ - if ((flags & BDRV_O_NOCACHE)) - open_flags |= O_DIRECT; - else if (!(flags & BDRV_O_CACHE_WB)) - open_flags |= O_DSYNC; - - s->type = FTYPE_FILE; -#if defined(__linux__) - if (strstart(filename, "/dev/cd", NULL)) { - /* open will not fail even if no CD is inserted */ - open_flags |= O_NONBLOCK; - s->type = FTYPE_CD; - } else if (strstart(filename, "/dev/fd", NULL)) { - s->type = FTYPE_FD; - s->fd_open_flags = open_flags; - /* open will not fail even if no floppy is inserted */ - open_flags |= O_NONBLOCK; -#ifdef CONFIG_AIO - } else if (strstart(filename, "/dev/sg", NULL)) { - bs->sg = 1; -#endif - } -#endif -#if defined(__FreeBSD__) - if (strstart(filename, "/dev/cd", NULL) || - strstart(filename, "/dev/acd", NULL)) { - s->type = FTYPE_CD; - s->cd_open_flags = open_flags; - } -#endif - s->fd = -1; - fd = open(filename, open_flags, 0644); - if (fd < 0) { - ret = -errno; - if (ret == -EROFS) - ret = -EACCES; - return ret; - } - s->fd = fd; -#if defined(__FreeBSD__) - /* make sure the door isnt locked at this time */ - if (s->type == FTYPE_CD) - ioctl (s->fd, CDIOCALLOW); -#endif -#if defined(__linux__) - /* close fd so that we can reopen it as needed */ - if (s->type == FTYPE_FD) { - close(s->fd); - s->fd = -1; - s->fd_media_changed = 1; - } -#endif - return 0; -} - -#if defined(__linux__) -/* Note: we do not have a reliable method to detect if the floppy is - present. The current method is to try to open the floppy at every - I/O and to keep it opened during a few hundreds of ms. */ -static int fd_open(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - int last_media_present; - - if (s->type != FTYPE_FD) - return 0; - last_media_present = (s->fd >= 0); - if (s->fd >= 0 && - (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) { - close(s->fd); - s->fd = -1; -#ifdef DEBUG_FLOPPY - printf("Floppy closed\n"); -#endif - } - if (s->fd < 0) { - if (s->fd_got_error && - (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) { -#ifdef DEBUG_FLOPPY - printf("No floppy (open delayed)\n"); -#endif - return -EIO; - } - s->fd = open(bs->filename, s->fd_open_flags); - if (s->fd < 0) { - s->fd_error_time = qemu_get_clock(rt_clock); - s->fd_got_error = 1; - if (last_media_present) - s->fd_media_changed = 1; -#ifdef DEBUG_FLOPPY - printf("No floppy\n"); -#endif - return -EIO; - } -#ifdef DEBUG_FLOPPY - printf("Floppy opened\n"); -#endif - } - if (!last_media_present) - s->fd_media_changed = 1; - s->fd_open_time = qemu_get_clock(rt_clock); - s->fd_got_error = 0; - return 0; -} - -static int raw_is_inserted(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - int ret; - - switch(s->type) { - case FTYPE_CD: - ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT); - if (ret == CDS_DISC_OK) - return 1; - else - return 0; - break; - case FTYPE_FD: - ret = fd_open(bs); - return (ret >= 0); - default: - return 1; - } -} - -/* currently only used by fdc.c, but a CD version would be good too */ -static int raw_media_changed(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - - switch(s->type) { - case FTYPE_FD: - { - int ret; - /* XXX: we do not have a true media changed indication. It - does not work if the floppy is changed without trying - to read it */ - fd_open(bs); - ret = s->fd_media_changed; - s->fd_media_changed = 0; -#ifdef DEBUG_FLOPPY - printf("Floppy changed=%d\n", ret); -#endif - return ret; - } - default: - return -ENOTSUP; - } -} - -static int raw_eject(BlockDriverState *bs, int eject_flag) -{ - BDRVRawState *s = bs->opaque; - - switch(s->type) { - case FTYPE_CD: - if (eject_flag) { - if (ioctl (s->fd, CDROMEJECT, NULL) < 0) - perror("CDROMEJECT"); - } else { - if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0) - perror("CDROMEJECT"); - } - break; - case FTYPE_FD: - { - int fd; - if (s->fd >= 0) { - close(s->fd); - s->fd = -1; - } - fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK); - if (fd >= 0) { - if (ioctl(fd, FDEJECT, 0) < 0) - perror("FDEJECT"); - close(fd); - } - } - break; - default: - return -ENOTSUP; - } - return 0; -} - -static int raw_set_locked(BlockDriverState *bs, int locked) -{ - BDRVRawState *s = bs->opaque; - - switch(s->type) { - case FTYPE_CD: - if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) { - /* Note: an error can happen if the distribution automatically - mounts the CD-ROM */ - // perror("CDROM_LOCKDOOR"); - } - break; - default: - return -ENOTSUP; - } - return 0; -} - -static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - BDRVRawState *s = bs->opaque; - - return ioctl(s->fd, req, buf); -} - -#ifdef CONFIG_AIO -static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque) -{ - BDRVRawState *s = bs->opaque; - RawAIOCB *acb; - - if (fd_open(bs) < 0) - return NULL; - - acb = qemu_aio_get(bs, cb, opaque); - if (!acb) - return NULL; - acb->aiocb.aio_fildes = s->fd; - acb->aiocb.ev_signo = SIGUSR2; - acb->aiocb.aio_offset = 0; - acb->aiocb.aio_flags = 0; - - acb->next = posix_aio_state->first_aio; - posix_aio_state->first_aio = acb; - - acb->aiocb.aio_ioctl_buf = buf; - acb->aiocb.aio_ioctl_cmd = req; - if (qemu_paio_ioctl(&acb->aiocb) < 0) { - raw_aio_remove(acb); - return NULL; - } - - return &acb->common; -} -#endif - -#elif defined(__FreeBSD__) - -static int fd_open(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - - /* this is just to ensure s->fd is sane (its called by io ops) */ - if (s->fd >= 0) - return 0; - return -EIO; -} - -static int cd_open(BlockDriverState *bs) -{ -#if defined(__FreeBSD__) - BDRVRawState *s = bs->opaque; - int fd; - - switch(s->type) { - case FTYPE_CD: - /* XXX force reread of possibly changed/newly loaded disc, - * FreeBSD seems to not notice sometimes... */ - if (s->fd >= 0) - close (s->fd); - fd = open(bs->filename, s->cd_open_flags, 0644); - if (fd < 0) { - s->fd = -1; - return -EIO; - } - s->fd = fd; - /* make sure the door isnt locked at this time */ - ioctl (s->fd, CDIOCALLOW); - } -#endif - return 0; -} - -static int raw_is_inserted(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - - switch(s->type) { - case FTYPE_CD: - return (raw_getlength(bs) > 0); - case FTYPE_FD: - /* XXX handle this */ - /* FALLTHRU */ - default: - return 1; - } -} - -static int raw_media_changed(BlockDriverState *bs) -{ - return -ENOTSUP; -} - -static int raw_eject(BlockDriverState *bs, int eject_flag) -{ - BDRVRawState *s = bs->opaque; - - switch(s->type) { - case FTYPE_CD: - if (s->fd < 0) - return -ENOTSUP; - (void) ioctl (s->fd, CDIOCALLOW); - if (eject_flag) { - if (ioctl (s->fd, CDIOCEJECT) < 0) - perror("CDIOCEJECT"); - } else { - if (ioctl (s->fd, CDIOCCLOSE) < 0) - perror("CDIOCCLOSE"); - } - if (cd_open(bs) < 0) - return -ENOTSUP; - break; - case FTYPE_FD: - /* XXX handle this */ - /* FALLTHRU */ - default: - return -ENOTSUP; - } - return 0; -} - -static int raw_set_locked(BlockDriverState *bs, int locked) -{ - BDRVRawState *s = bs->opaque; - - switch(s->type) { - case FTYPE_CD: - if (s->fd < 0) - return -ENOTSUP; - if (ioctl (s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) { - /* Note: an error can happen if the distribution automatically - mounts the CD-ROM */ - // perror("CDROM_LOCKDOOR"); - } - break; - default: - return -ENOTSUP; - } - return 0; -} - -static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - return -ENOTSUP; -} -#else /* !linux && !FreeBSD */ - -static int fd_open(BlockDriverState *bs) -{ - return 0; -} - -static int raw_is_inserted(BlockDriverState *bs) -{ - return 1; -} - -static int raw_media_changed(BlockDriverState *bs) -{ - return -ENOTSUP; -} - -static int raw_eject(BlockDriverState *bs, int eject_flag) -{ - return -ENOTSUP; -} - -static int raw_set_locked(BlockDriverState *bs, int locked) -{ - return -ENOTSUP; -} - -static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - return -ENOTSUP; -} - -static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque) -{ - return NULL; -} -#endif /* !linux && !FreeBSD */ - -#if defined(__linux__) || defined(__FreeBSD__) -static int hdev_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - int fd; - int ret = 0; - struct stat stat_buf; - - if (flags || backing_file) - return -ENOTSUP; - - fd = open(filename, O_WRONLY | O_BINARY); - if (fd < 0) - return -EIO; - - if (fstat(fd, &stat_buf) < 0) - ret = -EIO; - else if (!S_ISBLK(stat_buf.st_mode)) - ret = -EIO; - else if (lseek(fd, 0, SEEK_END) < total_size * 512) - ret = -ENOSPC; - - close(fd); - return ret; -} - -#else /* !(linux || freebsd) */ - -static int hdev_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - return -ENOTSUP; -} -#endif - -static BlockDriver bdrv_host_device = { - .format_name = "host_device", - .instance_size = sizeof(BDRVRawState), - .bdrv_open = hdev_open, - .bdrv_close = raw_close, - .bdrv_create = hdev_create, - .bdrv_flush = raw_flush, - -#ifdef CONFIG_AIO - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, - .bdrv_aio_cancel = raw_aio_cancel, - .aiocb_size = sizeof(RawAIOCB), -#endif - - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_getlength = raw_getlength, - - /* removable device support */ - .bdrv_is_inserted = raw_is_inserted, - .bdrv_media_changed = raw_media_changed, - .bdrv_eject = raw_eject, - .bdrv_set_locked = raw_set_locked, - /* generic scsi device */ - .bdrv_ioctl = raw_ioctl, -#ifdef CONFIG_AIO - .bdrv_aio_ioctl = raw_aio_ioctl, -#endif -}; - -static void bdrv_raw_init(void) -{ - bdrv_register(&bdrv_raw); - bdrv_register(&bdrv_host_device); -} - -block_init(bdrv_raw_init); diff --git a/block-raw-win32.c b/block-raw-win32.c deleted file mode 100644 index 15f3ec484..000000000 --- a/block-raw-win32.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Block driver for RAW files (win32) - * - * Copyright (c) 2006 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "qemu-timer.h" -#include "block_int.h" -#include "module.h" -#include -#include - -#define FTYPE_FILE 0 -#define FTYPE_CD 1 -#define FTYPE_HARDDISK 2 - -typedef struct BDRVRawState { - HANDLE hfile; - int type; - char drive_path[16]; /* format: "d:\" */ -} BDRVRawState; - -int qemu_ftruncate64(int fd, int64_t length) -{ - LARGE_INTEGER li; - LONG high; - HANDLE h; - BOOL res; - - if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0) - return -1; - - h = (HANDLE)_get_osfhandle(fd); - - /* get current position, ftruncate do not change position */ - li.HighPart = 0; - li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT); - if (li.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR) - return -1; - - high = length >> 32; - if (!SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN)) - return -1; - res = SetEndOfFile(h); - - /* back to old position */ - SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN); - return res ? 0 : -1; -} - -static int set_sparse(int fd) -{ - DWORD returned; - return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE, - NULL, 0, NULL, 0, &returned, NULL); -} - -static int raw_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVRawState *s = bs->opaque; - int access_flags, create_flags; - DWORD overlapped; - - s->type = FTYPE_FILE; - - if ((flags & BDRV_O_ACCESS) == O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; - } - if (flags & BDRV_O_CREAT) { - create_flags = CREATE_ALWAYS; - } else { - create_flags = OPEN_EXISTING; - } - overlapped = FILE_ATTRIBUTE_NORMAL; - if ((flags & BDRV_O_NOCACHE)) - overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; - else if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; - s->hfile = CreateFile(filename, access_flags, - FILE_SHARE_READ, NULL, - create_flags, overlapped, NULL); - if (s->hfile == INVALID_HANDLE_VALUE) { - int err = GetLastError(); - - if (err == ERROR_ACCESS_DENIED) - return -EACCES; - return -1; - } - return 0; -} - -static int raw_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVRawState *s = bs->opaque; - OVERLAPPED ov; - DWORD ret_count; - int ret; - int64_t offset = sector_num * 512; - int count = nb_sectors * 512; - - memset(&ov, 0, sizeof(ov)); - ov.Offset = offset; - ov.OffsetHigh = offset >> 32; - ret = ReadFile(s->hfile, buf, count, &ret_count, &ov); - if (!ret) - return ret_count; - if (ret_count == count) - ret_count = 0; - return ret_count; -} - -static int raw_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVRawState *s = bs->opaque; - OVERLAPPED ov; - DWORD ret_count; - int ret; - int64_t offset = sector_num * 512; - int count = nb_sectors * 512; - - memset(&ov, 0, sizeof(ov)); - ov.Offset = offset; - ov.OffsetHigh = offset >> 32; - ret = WriteFile(s->hfile, buf, count, &ret_count, &ov); - if (!ret) - return ret_count; - if (ret_count == count) - ret_count = 0; - return ret_count; -} - -static void raw_flush(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - FlushFileBuffers(s->hfile); -} - -static void raw_close(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - CloseHandle(s->hfile); -} - -static int raw_truncate(BlockDriverState *bs, int64_t offset) -{ - BDRVRawState *s = bs->opaque; - LONG low, high; - - low = offset; - high = offset >> 32; - if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN)) - return -EIO; - if (!SetEndOfFile(s->hfile)) - return -EIO; - return 0; -} - -static int64_t raw_getlength(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - LARGE_INTEGER l; - ULARGE_INTEGER available, total, total_free; - DISK_GEOMETRY_EX dg; - DWORD count; - BOOL status; - - switch(s->type) { - case FTYPE_FILE: - l.LowPart = GetFileSize(s->hfile, (PDWORD)&l.HighPart); - if (l.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR) - return -EIO; - break; - case FTYPE_CD: - if (!GetDiskFreeSpaceEx(s->drive_path, &available, &total, &total_free)) - return -EIO; - l.QuadPart = total.QuadPart; - break; - case FTYPE_HARDDISK: - status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, - NULL, 0, &dg, sizeof(dg), &count, NULL); - if (status != 0) { - l = dg.DiskSize; - } - break; - default: - return -EIO; - } - return l.QuadPart; -} - -static int raw_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - int fd; - - if (flags || backing_file) - return -ENOTSUP; - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, - 0644); - if (fd < 0) - return -EIO; - set_sparse(fd); - ftruncate(fd, total_size * 512); - close(fd); - return 0; -} - -static BlockDriver bdrv_raw = { - .format_name = "raw", - .instance_size = sizeof(BDRVRawState), - .bdrv_open = raw_open, - .bdrv_close = raw_close, - .bdrv_create = raw_create, - .bdrv_flush = raw_flush, - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, -}; - -/***********************************************/ -/* host device */ - -static int find_cdrom(char *cdrom_name, int cdrom_name_size) -{ - char drives[256], *pdrv = drives; - UINT type; - - memset(drives, 0, sizeof(drives)); - GetLogicalDriveStrings(sizeof(drives), drives); - while(pdrv[0] != '\0') { - type = GetDriveType(pdrv); - switch(type) { - case DRIVE_CDROM: - snprintf(cdrom_name, cdrom_name_size, "\\\\.\\%c:", pdrv[0]); - return 0; - break; - } - pdrv += lstrlen(pdrv) + 1; - } - return -1; -} - -static int find_device_type(BlockDriverState *bs, const char *filename) -{ - BDRVRawState *s = bs->opaque; - UINT type; - const char *p; - - if (strstart(filename, "\\\\.\\", &p) || - strstart(filename, "//./", &p)) { - if (stristart(p, "PhysicalDrive", NULL)) - return FTYPE_HARDDISK; - snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", p[0]); - type = GetDriveType(s->drive_path); - switch (type) { - case DRIVE_REMOVABLE: - case DRIVE_FIXED: - return FTYPE_HARDDISK; - case DRIVE_CDROM: - return FTYPE_CD; - default: - return FTYPE_FILE; - } - } else { - return FTYPE_FILE; - } -} - -static int hdev_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVRawState *s = bs->opaque; - int access_flags, create_flags; - DWORD overlapped; - char device_name[64]; - - if (strstart(filename, "/dev/cdrom", NULL)) { - if (find_cdrom(device_name, sizeof(device_name)) < 0) - return -ENOENT; - filename = device_name; - } else { - /* transform drive letters into device name */ - if (((filename[0] >= 'a' && filename[0] <= 'z') || - (filename[0] >= 'A' && filename[0] <= 'Z')) && - filename[1] == ':' && filename[2] == '\0') { - snprintf(device_name, sizeof(device_name), "\\\\.\\%c:", filename[0]); - filename = device_name; - } - } - s->type = find_device_type(bs, filename); - - if ((flags & BDRV_O_ACCESS) == O_RDWR) { - access_flags = GENERIC_READ | GENERIC_WRITE; - } else { - access_flags = GENERIC_READ; - } - create_flags = OPEN_EXISTING; - - overlapped = FILE_ATTRIBUTE_NORMAL; - if ((flags & BDRV_O_NOCACHE)) - overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; - else if (!(flags & BDRV_O_CACHE_WB)) - overlapped |= FILE_FLAG_WRITE_THROUGH; - s->hfile = CreateFile(filename, access_flags, - FILE_SHARE_READ, NULL, - create_flags, overlapped, NULL); - if (s->hfile == INVALID_HANDLE_VALUE) { - int err = GetLastError(); - - if (err == ERROR_ACCESS_DENIED) - return -EACCES; - return -1; - } - return 0; -} - -#if 0 -/***********************************************/ -/* removable device additional commands */ - -static int raw_is_inserted(BlockDriverState *bs) -{ - return 1; -} - -static int raw_media_changed(BlockDriverState *bs) -{ - return -ENOTSUP; -} - -static int raw_eject(BlockDriverState *bs, int eject_flag) -{ - DWORD ret_count; - - if (s->type == FTYPE_FILE) - return -ENOTSUP; - if (eject_flag) { - DeviceIoControl(s->hfile, IOCTL_STORAGE_EJECT_MEDIA, - NULL, 0, NULL, 0, &lpBytesReturned, NULL); - } else { - DeviceIoControl(s->hfile, IOCTL_STORAGE_LOAD_MEDIA, - NULL, 0, NULL, 0, &lpBytesReturned, NULL); - } -} - -static int raw_set_locked(BlockDriverState *bs, int locked) -{ - return -ENOTSUP; -} -#endif - -static BlockDriver bdrv_host_device = { - .format_name = "host_device", - .instance_size = sizeof(BDRVRawState), - .bdrv_open = hdev_open, - .bdrv_close = raw_close, - .bdrv_flush = raw_flush, - - .bdrv_read = raw_read, - .bdrv_write = raw_write, - .bdrv_getlength = raw_getlength, -}; - -static void bdrv_raw_init(void) -{ - bdrv_register(&bdrv_raw); - bdrv_register(&bdrv_host_device); -} - -block_init(bdrv_raw_init); diff --git a/block-vmdk.c b/block-vmdk.c deleted file mode 100644 index 13866e9b0..000000000 --- a/block-vmdk.c +++ /dev/null @@ -1,833 +0,0 @@ -/* - * Block driver for the VMDK format - * - * Copyright (c) 2004 Fabrice Bellard - * Copyright (c) 2005 Filip Navara - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" - -#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') -#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') - -typedef struct { - uint32_t version; - uint32_t flags; - uint32_t disk_sectors; - uint32_t granularity; - uint32_t l1dir_offset; - uint32_t l1dir_size; - uint32_t file_sectors; - uint32_t cylinders; - uint32_t heads; - uint32_t sectors_per_track; -} VMDK3Header; - -typedef struct { - uint32_t version; - uint32_t flags; - int64_t capacity; - int64_t granularity; - int64_t desc_offset; - int64_t desc_size; - int32_t num_gtes_per_gte; - int64_t rgd_offset; - int64_t gd_offset; - int64_t grain_offset; - char filler[1]; - char check_bytes[4]; -} __attribute__((packed)) VMDK4Header; - -#define L2_CACHE_SIZE 16 - -typedef struct BDRVVmdkState { - BlockDriverState *hd; - int64_t l1_table_offset; - int64_t l1_backup_table_offset; - uint32_t *l1_table; - uint32_t *l1_backup_table; - unsigned int l1_size; - uint32_t l1_entry_sectors; - - unsigned int l2_size; - uint32_t *l2_cache; - uint32_t l2_cache_offsets[L2_CACHE_SIZE]; - uint32_t l2_cache_counts[L2_CACHE_SIZE]; - - unsigned int cluster_sectors; - uint32_t parent_cid; - int is_parent; -} BDRVVmdkState; - -typedef struct VmdkMetaData { - uint32_t offset; - unsigned int l1_index; - unsigned int l2_index; - unsigned int l2_offset; - int valid; -} VmdkMetaData; - -typedef struct ActiveBDRVState{ - BlockDriverState *hd; // active image handler - uint64_t cluster_offset; // current write offset -}ActiveBDRVState; - -static ActiveBDRVState activeBDRV; - - -static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - uint32_t magic; - - if (buf_size < 4) - return 0; - magic = be32_to_cpu(*(uint32_t *)buf); - if (magic == VMDK3_MAGIC || - magic == VMDK4_MAGIC) - return 100; - else - return 0; -} - -#define CHECK_CID 1 - -#define SECTOR_SIZE 512 -#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each -#define HEADER_SIZE 512 // first sector of 512 bytes - -static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) -{ - BDRVVmdkState *s = bs->opaque; - char desc[DESC_SIZE]; - uint32_t cid; - const char *p_name, *cid_str; - size_t cid_str_size; - - /* the descriptor offset = 0x200 */ - if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) - return 0; - - if (parent) { - cid_str = "parentCID"; - cid_str_size = sizeof("parentCID"); - } else { - cid_str = "CID"; - cid_str_size = sizeof("CID"); - } - - if ((p_name = strstr(desc,cid_str)) != NULL) { - p_name += cid_str_size; - sscanf(p_name,"%x",&cid); - } - - return cid; -} - -static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) -{ - BDRVVmdkState *s = bs->opaque; - char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; - char *p_name, *tmp_str; - - /* the descriptor offset = 0x200 */ - if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) - return -1; - - tmp_str = strstr(desc,"parentCID"); - pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); - if ((p_name = strstr(desc,"CID")) != NULL) { - p_name += sizeof("CID"); - snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid); - pstrcat(desc, sizeof(desc), tmp_desc); - } - - if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) - return -1; - return 0; -} - -static int vmdk_is_cid_valid(BlockDriverState *bs) -{ -#ifdef CHECK_CID - BDRVVmdkState *s = bs->opaque; - BlockDriverState *p_bs = s->hd->backing_hd; - uint32_t cur_pcid; - - if (p_bs) { - cur_pcid = vmdk_read_cid(p_bs,0); - if (s->parent_cid != cur_pcid) - // CID not valid - return 0; - } -#endif - // CID valid - return 1; -} - -static int vmdk_snapshot_create(const char *filename, const char *backing_file) -{ - int snp_fd, p_fd; - uint32_t p_cid; - char *p_name, *gd_buf, *rgd_buf; - const char *real_filename, *temp_str; - VMDK4Header header; - uint32_t gde_entries, gd_size; - int64_t gd_offset, rgd_offset, capacity, gt_size; - char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE]; - static const char desc_template[] = - "# Disk DescriptorFile\n" - "version=1\n" - "CID=%x\n" - "parentCID=%x\n" - "createType=\"monolithicSparse\"\n" - "parentFileNameHint=\"%s\"\n" - "\n" - "# Extent description\n" - "RW %u SPARSE \"%s\"\n" - "\n" - "# The Disk Data Base \n" - "#DDB\n" - "\n"; - - snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644); - if (snp_fd < 0) - return -1; - p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE); - if (p_fd < 0) { - close(snp_fd); - return -1; - } - - /* read the header */ - if (lseek(p_fd, 0x0, SEEK_SET) == -1) - goto fail; - if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) - goto fail; - - /* write the header */ - if (lseek(snp_fd, 0x0, SEEK_SET) == -1) - goto fail; - if (write(snp_fd, hdr, HEADER_SIZE) == -1) - goto fail; - - memset(&header, 0, sizeof(header)); - memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC - - ftruncate(snp_fd, header.grain_offset << 9); - /* the descriptor offset = 0x200 */ - if (lseek(p_fd, 0x200, SEEK_SET) == -1) - goto fail; - if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) - goto fail; - - if ((p_name = strstr(p_desc,"CID")) != NULL) { - p_name += sizeof("CID"); - sscanf(p_name,"%x",&p_cid); - } - - real_filename = filename; - if ((temp_str = strrchr(real_filename, '\\')) != NULL) - real_filename = temp_str + 1; - if ((temp_str = strrchr(real_filename, '/')) != NULL) - real_filename = temp_str + 1; - if ((temp_str = strrchr(real_filename, ':')) != NULL) - real_filename = temp_str + 1; - - snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file, - (uint32_t)header.capacity, real_filename); - - /* write the descriptor */ - if (lseek(snp_fd, 0x200, SEEK_SET) == -1) - goto fail; - if (write(snp_fd, s_desc, strlen(s_desc)) == -1) - goto fail; - - gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table - rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table - capacity = header.capacity * SECTOR_SIZE; // Extent size - /* - * Each GDE span 32M disk, means: - * 512 GTE per GT, each GTE points to grain - */ - gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE; - if (!gt_size) - goto fail; - gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde - gd_size = gde_entries * sizeof(uint32_t); - - /* write RGD */ - rgd_buf = qemu_malloc(gd_size); - if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) - goto fail_rgd; - if (read(p_fd, rgd_buf, gd_size) != gd_size) - goto fail_rgd; - if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) - goto fail_rgd; - if (write(snp_fd, rgd_buf, gd_size) == -1) - goto fail_rgd; - qemu_free(rgd_buf); - - /* write GD */ - gd_buf = qemu_malloc(gd_size); - if (lseek(p_fd, gd_offset, SEEK_SET) == -1) - goto fail_gd; - if (read(p_fd, gd_buf, gd_size) != gd_size) - goto fail_gd; - if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) - goto fail_gd; - if (write(snp_fd, gd_buf, gd_size) == -1) - goto fail_gd; - qemu_free(gd_buf); - - close(p_fd); - close(snp_fd); - return 0; - - fail_gd: - qemu_free(gd_buf); - fail_rgd: - qemu_free(rgd_buf); - fail: - close(p_fd); - close(snp_fd); - return -1; -} - -static void vmdk_parent_close(BlockDriverState *bs) -{ - if (bs->backing_hd) - bdrv_close(bs->backing_hd); -} - -static int parent_open = 0; -static int vmdk_parent_open(BlockDriverState *bs, const char * filename) -{ - BDRVVmdkState *s = bs->opaque; - char *p_name; - char desc[DESC_SIZE]; - char parent_img_name[1024]; - - /* the descriptor offset = 0x200 */ - if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) - return -1; - - if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) { - char *end_name; - struct stat file_buf; - - p_name += sizeof("parentFileNameHint") + 1; - if ((end_name = strchr(p_name,'\"')) == NULL) - return -1; - if ((end_name - p_name) > sizeof (s->hd->backing_file) - 1) - return -1; - - pstrcpy(s->hd->backing_file, end_name - p_name + 1, p_name); - if (stat(s->hd->backing_file, &file_buf) != 0) { - path_combine(parent_img_name, sizeof(parent_img_name), - filename, s->hd->backing_file); - } else { - pstrcpy(parent_img_name, sizeof(parent_img_name), - s->hd->backing_file); - } - - s->hd->backing_hd = bdrv_new(""); - if (!s->hd->backing_hd) { - failure: - bdrv_close(s->hd); - return -1; - } - parent_open = 1; - if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0) - goto failure; - parent_open = 0; - } - - return 0; -} - -static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVVmdkState *s = bs->opaque; - uint32_t magic; - int l1_size, i, ret; - - if (parent_open) - // Parent must be opened as RO. - flags = BDRV_O_RDONLY; - - ret = bdrv_file_open(&s->hd, filename, flags); - if (ret < 0) - return ret; - if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic)) - goto fail; - - magic = be32_to_cpu(magic); - if (magic == VMDK3_MAGIC) { - VMDK3Header header; - - if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header)) - goto fail; - s->cluster_sectors = le32_to_cpu(header.granularity); - s->l2_size = 1 << 9; - s->l1_size = 1 << 6; - bs->total_sectors = le32_to_cpu(header.disk_sectors); - s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9; - s->l1_backup_table_offset = 0; - s->l1_entry_sectors = s->l2_size * s->cluster_sectors; - } else if (magic == VMDK4_MAGIC) { - VMDK4Header header; - - if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header)) - goto fail; - bs->total_sectors = le64_to_cpu(header.capacity); - s->cluster_sectors = le64_to_cpu(header.granularity); - s->l2_size = le32_to_cpu(header.num_gtes_per_gte); - s->l1_entry_sectors = s->l2_size * s->cluster_sectors; - if (s->l1_entry_sectors <= 0) - goto fail; - s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1) - / s->l1_entry_sectors; - s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; - s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; - - if (parent_open) - s->is_parent = 1; - else - s->is_parent = 0; - - // try to open parent images, if exist - if (vmdk_parent_open(bs, filename) != 0) - goto fail; - // write the CID once after the image creation - s->parent_cid = vmdk_read_cid(bs,1); - } else { - goto fail; - } - - /* read the L1 table */ - l1_size = s->l1_size * sizeof(uint32_t); - s->l1_table = qemu_malloc(l1_size); - if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size) - goto fail; - for(i = 0; i < s->l1_size; i++) { - le32_to_cpus(&s->l1_table[i]); - } - - if (s->l1_backup_table_offset) { - s->l1_backup_table = qemu_malloc(l1_size); - if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size) - goto fail; - for(i = 0; i < s->l1_size; i++) { - le32_to_cpus(&s->l1_backup_table[i]); - } - } - - s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t)); - return 0; - fail: - qemu_free(s->l1_backup_table); - qemu_free(s->l1_table); - qemu_free(s->l2_cache); - bdrv_delete(s->hd); - return -1; -} - -static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, - uint64_t offset, int allocate); - -static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, - uint64_t offset, int allocate) -{ - uint64_t parent_cluster_offset; - BDRVVmdkState *s = bs->opaque; - uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB - - // we will be here if it's first write on non-exist grain(cluster). - // try to read from parent image, if exist - if (s->hd->backing_hd) { - BDRVVmdkState *ps = s->hd->backing_hd->opaque; - - if (!vmdk_is_cid_valid(bs)) - return -1; - - parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate); - - if (parent_cluster_offset) { - BDRVVmdkState *act_s = activeBDRV.hd->opaque; - - if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512) - return -1; - - //Write grain only into the active image - if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain)) - return -1; - } - } - return 0; -} - -static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data) -{ - BDRVVmdkState *s = bs->opaque; - - /* update L2 table */ - if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) - return -1; - /* update backup L2 table */ - if (s->l1_backup_table_offset != 0) { - m_data->l2_offset = s->l1_backup_table[m_data->l1_index]; - if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), - &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) - return -1; - } - - return 0; -} - -static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, - uint64_t offset, int allocate) -{ - BDRVVmdkState *s = bs->opaque; - unsigned int l1_index, l2_offset, l2_index; - int min_index, i, j; - uint32_t min_count, *l2_table, tmp = 0; - uint64_t cluster_offset; - - if (m_data) - m_data->valid = 0; - - l1_index = (offset >> 9) / s->l1_entry_sectors; - if (l1_index >= s->l1_size) - return 0; - l2_offset = s->l1_table[l1_index]; - if (!l2_offset) - return 0; - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (l2_offset == s->l2_cache_offsets[i]) { - /* increment the hit count */ - if (++s->l2_cache_counts[i] == 0xffffffff) { - for(j = 0; j < L2_CACHE_SIZE; j++) { - s->l2_cache_counts[j] >>= 1; - } - } - l2_table = s->l2_cache + (i * s->l2_size); - goto found; - } - } - /* not found: load a new entry in the least used one */ - min_index = 0; - min_count = 0xffffffff; - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (s->l2_cache_counts[i] < min_count) { - min_count = s->l2_cache_counts[i]; - min_index = i; - } - } - l2_table = s->l2_cache + (min_index * s->l2_size); - if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) != - s->l2_size * sizeof(uint32_t)) - return 0; - - s->l2_cache_offsets[min_index] = l2_offset; - s->l2_cache_counts[min_index] = 1; - found: - l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; - cluster_offset = le32_to_cpu(l2_table[l2_index]); - - if (!cluster_offset) { - if (!allocate) - return 0; - // Avoid the L2 tables update for the images that have snapshots. - if (!s->is_parent) { - cluster_offset = bdrv_getlength(s->hd); - bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); - - cluster_offset >>= 9; - tmp = cpu_to_le32(cluster_offset); - l2_table[l2_index] = tmp; - // Save the active image state - activeBDRV.cluster_offset = cluster_offset; - activeBDRV.hd = bs; - } - /* First of all we write grain itself, to avoid race condition - * that may to corrupt the image. - * This problem may occur because of insufficient space on host disk - * or inappropriate VM shutdown. - */ - if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) - return 0; - - if (m_data) { - m_data->offset = tmp; - m_data->l1_index = l1_index; - m_data->l2_index = l2_index; - m_data->l2_offset = l2_offset; - m_data->valid = 1; - } - } - cluster_offset <<= 9; - return cluster_offset; -} - -static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) -{ - BDRVVmdkState *s = bs->opaque; - int index_in_cluster, n; - uint64_t cluster_offset; - - cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); - index_in_cluster = sector_num % s->cluster_sectors; - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) - n = nb_sectors; - *pnum = n; - return (cluster_offset != 0); -} - -static int vmdk_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVVmdkState *s = bs->opaque; - int index_in_cluster, n, ret; - uint64_t cluster_offset; - - while (nb_sectors > 0) { - cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); - index_in_cluster = sector_num % s->cluster_sectors; - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) - n = nb_sectors; - if (!cluster_offset) { - // try to read from parent image, if exist - if (s->hd->backing_hd) { - if (!vmdk_is_cid_valid(bs)) - return -1; - ret = bdrv_read(s->hd->backing_hd, sector_num, buf, n); - if (ret < 0) - return -1; - } else { - memset(buf, 0, 512 * n); - } - } else { - if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) - return -1; - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; - } - return 0; -} - -static int vmdk_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVVmdkState *s = bs->opaque; - VmdkMetaData m_data; - int index_in_cluster, n; - uint64_t cluster_offset; - static int cid_update = 0; - - if (sector_num > bs->total_sectors) { - fprintf(stderr, - "(VMDK) Wrong offset: sector_num=0x%" PRIx64 - " total_sectors=0x%" PRIx64 "\n", - sector_num, bs->total_sectors); - return -1; - } - - while (nb_sectors > 0) { - index_in_cluster = sector_num & (s->cluster_sectors - 1); - n = s->cluster_sectors - index_in_cluster; - if (n > nb_sectors) - n = nb_sectors; - cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1); - if (!cluster_offset) - return -1; - - if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) - return -1; - if (m_data.valid) { - /* update L2 tables */ - if (vmdk_L2update(bs, &m_data) == -1) - return -1; - } - nb_sectors -= n; - sector_num += n; - buf += n * 512; - - // update CID on the first write every time the virtual disk is opened - if (!cid_update) { - vmdk_write_cid(bs, time(NULL)); - cid_update++; - } - } - return 0; -} - -static int vmdk_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - int fd, i; - VMDK4Header header; - uint32_t tmp, magic, grains, gd_size, gt_size, gt_count; - static const char desc_template[] = - "# Disk DescriptorFile\n" - "version=1\n" - "CID=%x\n" - "parentCID=ffffffff\n" - "createType=\"monolithicSparse\"\n" - "\n" - "# Extent description\n" - "RW %" PRId64 " SPARSE \"%s\"\n" - "\n" - "# The Disk Data Base \n" - "#DDB\n" - "\n" - "ddb.virtualHWVersion = \"%d\"\n" - "ddb.geometry.cylinders = \"%" PRId64 "\"\n" - "ddb.geometry.heads = \"16\"\n" - "ddb.geometry.sectors = \"63\"\n" - "ddb.adapterType = \"ide\"\n"; - char desc[1024]; - const char *real_filename, *temp_str; - - /* XXX: add support for backing file */ - if (backing_file) { - return vmdk_snapshot_create(filename, backing_file); - } - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, - 0644); - if (fd < 0) - return -1; - magic = cpu_to_be32(VMDK4_MAGIC); - memset(&header, 0, sizeof(header)); - header.version = cpu_to_le32(1); - header.flags = cpu_to_le32(3); /* ?? */ - header.capacity = cpu_to_le64(total_size); - header.granularity = cpu_to_le64(128); - header.num_gtes_per_gte = cpu_to_le32(512); - - grains = (total_size + header.granularity - 1) / header.granularity; - gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; - gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; - gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; - - header.desc_offset = 1; - header.desc_size = 20; - header.rgd_offset = header.desc_offset + header.desc_size; - header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count); - header.grain_offset = - ((header.gd_offset + gd_size + (gt_size * gt_count) + - header.granularity - 1) / header.granularity) * - header.granularity; - - header.desc_offset = cpu_to_le64(header.desc_offset); - header.desc_size = cpu_to_le64(header.desc_size); - header.rgd_offset = cpu_to_le64(header.rgd_offset); - header.gd_offset = cpu_to_le64(header.gd_offset); - header.grain_offset = cpu_to_le64(header.grain_offset); - - header.check_bytes[0] = 0xa; - header.check_bytes[1] = 0x20; - header.check_bytes[2] = 0xd; - header.check_bytes[3] = 0xa; - - /* write all the data */ - write(fd, &magic, sizeof(magic)); - write(fd, &header, sizeof(header)); - - ftruncate(fd, header.grain_offset << 9); - - /* write grain directory */ - lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET); - for (i = 0, tmp = header.rgd_offset + gd_size; - i < gt_count; i++, tmp += gt_size) - write(fd, &tmp, sizeof(tmp)); - - /* write backup grain directory */ - lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET); - for (i = 0, tmp = header.gd_offset + gd_size; - i < gt_count; i++, tmp += gt_size) - write(fd, &tmp, sizeof(tmp)); - - /* compose the descriptor */ - real_filename = filename; - if ((temp_str = strrchr(real_filename, '\\')) != NULL) - real_filename = temp_str + 1; - if ((temp_str = strrchr(real_filename, '/')) != NULL) - real_filename = temp_str + 1; - if ((temp_str = strrchr(real_filename, ':')) != NULL) - real_filename = temp_str + 1; - snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL), - total_size, real_filename, - (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), - total_size / (int64_t)(63 * 16)); - - /* write the descriptor */ - lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET); - write(fd, desc, strlen(desc)); - - close(fd); - return 0; -} - -static void vmdk_close(BlockDriverState *bs) -{ - BDRVVmdkState *s = bs->opaque; - - qemu_free(s->l1_table); - qemu_free(s->l2_cache); - // try to close parent image, if exist - vmdk_parent_close(s->hd); - bdrv_delete(s->hd); -} - -static void vmdk_flush(BlockDriverState *bs) -{ - BDRVVmdkState *s = bs->opaque; - bdrv_flush(s->hd); -} - -static BlockDriver bdrv_vmdk = { - .format_name = "vmdk", - .instance_size = sizeof(BDRVVmdkState), - .bdrv_probe = vmdk_probe, - .bdrv_open = vmdk_open, - .bdrv_read = vmdk_read, - .bdrv_write = vmdk_write, - .bdrv_close = vmdk_close, - .bdrv_create = vmdk_create, - .bdrv_flush = vmdk_flush, - .bdrv_is_allocated = vmdk_is_allocated, -}; - -static void bdrv_vmdk_init(void) -{ - bdrv_register(&bdrv_vmdk); -} - -block_init(bdrv_vmdk_init); diff --git a/block-vpc.c b/block-vpc.c deleted file mode 100644 index 211ae5c72..000000000 --- a/block-vpc.c +++ /dev/null @@ -1,606 +0,0 @@ -/* - * Block driver for Conectix/Microsoft Virtual PC images - * - * Copyright (c) 2005 Alex Beregszaszi - * Copyright (c) 2009 Kevin Wolf - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" - -/**************************************************************/ - -#define HEADER_SIZE 512 - -//#define CACHE - -enum vhd_type { - VHD_FIXED = 2, - VHD_DYNAMIC = 3, - VHD_DIFFERENCING = 4, -}; - -// Seconds since Jan 1, 2000 0:00:00 (UTC) -#define VHD_TIMESTAMP_BASE 946684800 - -// always big-endian -struct vhd_footer { - char creator[8]; // "conectix" - uint32_t features; - uint32_t version; - - // Offset of next header structure, 0xFFFFFFFF if none - uint64_t data_offset; - - // Seconds since Jan 1, 2000 0:00:00 (UTC) - uint32_t timestamp; - - char creator_app[4]; // "vpc " - uint16_t major; - uint16_t minor; - char creator_os[4]; // "Wi2k" - - uint64_t orig_size; - uint64_t size; - - uint16_t cyls; - uint8_t heads; - uint8_t secs_per_cyl; - - uint32_t type; - - // Checksum of the Hard Disk Footer ("one's complement of the sum of all - // the bytes in the footer without the checksum field") - uint32_t checksum; - - // UUID used to identify a parent hard disk (backing file) - uint8_t uuid[16]; - - uint8_t in_saved_state; -}; - -struct vhd_dyndisk_header { - char magic[8]; // "cxsparse" - - // Offset of next header structure, 0xFFFFFFFF if none - uint64_t data_offset; - - // Offset of the Block Allocation Table (BAT) - uint64_t table_offset; - - uint32_t version; - uint32_t max_table_entries; // 32bit/entry - - // 2 MB by default, must be a power of two - uint32_t block_size; - - uint32_t checksum; - uint8_t parent_uuid[16]; - uint32_t parent_timestamp; - uint32_t reserved; - - // Backing file name (in UTF-16) - uint8_t parent_name[512]; - - struct { - uint32_t platform; - uint32_t data_space; - uint32_t data_length; - uint32_t reserved; - uint64_t data_offset; - } parent_locator[8]; -}; - -typedef struct BDRVVPCState { - BlockDriverState *hd; - - uint8_t footer_buf[HEADER_SIZE]; - uint64_t free_data_block_offset; - int max_table_entries; - uint32_t *pagetable; - uint64_t bat_offset; - uint64_t last_bitmap_offset; - - uint32_t block_size; - uint32_t bitmap_size; - -#ifdef CACHE - uint8_t *pageentry_u8; - uint32_t *pageentry_u32; - uint16_t *pageentry_u16; - - uint64_t last_bitmap; -#endif -} BDRVVPCState; - -static uint32_t vpc_checksum(uint8_t* buf, size_t size) -{ - uint32_t res = 0; - int i; - - for (i = 0; i < size; i++) - res += buf[i]; - - return ~res; -} - - -static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) - return 100; - return 0; -} - -static int vpc_open(BlockDriverState *bs, const char *filename, int flags) -{ - BDRVVPCState *s = bs->opaque; - int ret, i; - struct vhd_footer* footer; - struct vhd_dyndisk_header* dyndisk_header; - uint8_t buf[HEADER_SIZE]; - uint32_t checksum; - - ret = bdrv_file_open(&s->hd, filename, flags); - if (ret < 0) - return ret; - - if (bdrv_pread(s->hd, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE) - goto fail; - - footer = (struct vhd_footer*) s->footer_buf; - if (strncmp(footer->creator, "conectix", 8)) - goto fail; - - checksum = be32_to_cpu(footer->checksum); - footer->checksum = 0; - if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) - fprintf(stderr, "block-vpc: The header checksum of '%s' is " - "incorrect.\n", filename); - - // The visible size of a image in Virtual PC depends on the geometry - // rather than on the size stored in the footer (the size in the footer - // is too large usually) - bs->total_sectors = (int64_t) - be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - - if (bdrv_pread(s->hd, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE) - != HEADER_SIZE) - goto fail; - - dyndisk_header = (struct vhd_dyndisk_header*) buf; - - if (strncmp(dyndisk_header->magic, "cxsparse", 8)) - goto fail; - - - s->block_size = be32_to_cpu(dyndisk_header->block_size); - s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; - - s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); - s->pagetable = qemu_malloc(s->max_table_entries * 4); - - s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); - if (bdrv_pread(s->hd, s->bat_offset, s->pagetable, - s->max_table_entries * 4) != s->max_table_entries * 4) - goto fail; - - s->free_data_block_offset = - (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511; - - for (i = 0; i < s->max_table_entries; i++) { - be32_to_cpus(&s->pagetable[i]); - if (s->pagetable[i] != 0xFFFFFFFF) { - int64_t next = (512 * (int64_t) s->pagetable[i]) + - s->bitmap_size + s->block_size; - - if (next> s->free_data_block_offset) - s->free_data_block_offset = next; - } - } - - s->last_bitmap_offset = (int64_t) -1; - -#ifdef CACHE - s->pageentry_u8 = qemu_malloc(512); - s->pageentry_u32 = s->pageentry_u8; - s->pageentry_u16 = s->pageentry_u8; - s->last_pagetable = -1; -#endif - - return 0; - fail: - bdrv_delete(s->hd); - return -1; -} - -/* - * Returns the absolute byte offset of the given sector in the image file. - * If the sector is not allocated, -1 is returned instead. - * - * The parameter write must be 1 if the offset will be used for a write - * operation (the block bitmaps is updated then), 0 otherwise. - */ -static inline int64_t get_sector_offset(BlockDriverState *bs, - int64_t sector_num, int write) -{ - BDRVVPCState *s = bs->opaque; - uint64_t offset = sector_num * 512; - uint64_t bitmap_offset, block_offset; - uint32_t pagetable_index, pageentry_index; - - pagetable_index = offset / s->block_size; - pageentry_index = (offset % s->block_size) / 512; - - if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff) - return -1; // not allocated - - bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; - block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index); - - // We must ensure that we don't write to any sectors which are marked as - // unused in the bitmap. We get away with setting all bits in the block - // bitmap each time we write to a new block. This might cause Virtual PC to - // miss sparse read optimization, but it's not a problem in terms of - // correctness. - if (write && (s->last_bitmap_offset != bitmap_offset)) { - uint8_t bitmap[s->bitmap_size]; - - s->last_bitmap_offset = bitmap_offset; - memset(bitmap, 0xff, s->bitmap_size); - bdrv_pwrite(s->hd, bitmap_offset, bitmap, s->bitmap_size); - } - -// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n", -// sector_num, pagetable_index, pageentry_index, -// bitmap_offset, block_offset); - -// disabled by reason -#if 0 -#ifdef CACHE - if (bitmap_offset != s->last_bitmap) - { - lseek(s->fd, bitmap_offset, SEEK_SET); - - s->last_bitmap = bitmap_offset; - - // Scary! Bitmap is stored as big endian 32bit entries, - // while we used to look it up byte by byte - read(s->fd, s->pageentry_u8, 512); - for (i = 0; i < 128; i++) - be32_to_cpus(&s->pageentry_u32[i]); - } - - if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1) - return -1; -#else - lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET); - - read(s->fd, &bitmap_entry, 1); - - if ((bitmap_entry >> (pageentry_index % 8)) & 1) - return -1; // not allocated -#endif -#endif - - return block_offset; -} - -/* - * Writes the footer to the end of the image file. This is needed when the - * file grows as it overwrites the old footer - * - * Returns 0 on success and < 0 on error - */ -static int rewrite_footer(BlockDriverState* bs) -{ - int ret; - BDRVVPCState *s = bs->opaque; - int64_t offset = s->free_data_block_offset; - - ret = bdrv_pwrite(s->hd, offset, s->footer_buf, HEADER_SIZE); - if (ret < 0) - return ret; - - return 0; -} - -/* - * Allocates a new block. This involves writing a new footer and updating - * the Block Allocation Table to use the space at the old end of the image - * file (overwriting the old footer) - * - * Returns the sectors' offset in the image file on success and < 0 on error - */ -static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) -{ - BDRVVPCState *s = bs->opaque; - int64_t bat_offset; - uint32_t index, bat_value; - int ret; - uint8_t bitmap[s->bitmap_size]; - - // Check if sector_num is valid - if ((sector_num < 0) || (sector_num > bs->total_sectors)) - return -1; - - // Write entry into in-memory BAT - index = (sector_num * 512) / s->block_size; - if (s->pagetable[index] != 0xFFFFFFFF) - return -1; - - s->pagetable[index] = s->free_data_block_offset / 512; - - // Initialize the block's bitmap - memset(bitmap, 0xff, s->bitmap_size); - bdrv_pwrite(s->hd, s->free_data_block_offset, bitmap, s->bitmap_size); - - // Write new footer (the old one will be overwritten) - s->free_data_block_offset += s->block_size + s->bitmap_size; - ret = rewrite_footer(bs); - if (ret < 0) - goto fail; - - // Write BAT entry to disk - bat_offset = s->bat_offset + (4 * index); - bat_value = be32_to_cpu(s->pagetable[index]); - ret = bdrv_pwrite(s->hd, bat_offset, &bat_value, 4); - if (ret < 0) - goto fail; - - return get_sector_offset(bs, sector_num, 0); - -fail: - s->free_data_block_offset -= (s->block_size + s->bitmap_size); - return -1; -} - -static int vpc_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVVPCState *s = bs->opaque; - int ret; - int64_t offset; - - while (nb_sectors > 0) { - offset = get_sector_offset(bs, sector_num, 0); - - if (offset == -1) { - memset(buf, 0, 512); - } else { - ret = bdrv_pread(s->hd, offset, buf, 512); - if (ret != 512) - return -1; - } - - nb_sectors--; - sector_num++; - buf += 512; - } - return 0; -} - -static int vpc_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVVPCState *s = bs->opaque; - int64_t offset; - int ret; - - while (nb_sectors > 0) { - offset = get_sector_offset(bs, sector_num, 1); - - if (offset == -1) { - offset = alloc_block(bs, sector_num); - if (offset < 0) - return -1; - } - - ret = bdrv_pwrite(s->hd, offset, buf, 512); - if (ret != 512) - return -1; - - nb_sectors--; - sector_num++; - buf += 512; - } - - return 0; -} - - -/* - * Calculates the number of cylinders, heads and sectors per cylinder - * based on a given number of sectors. This is the algorithm described - * in the VHD specification. - * - * Note that the geometry doesn't always exactly match total_sectors but - * may round it down. - * - * Returns 0 on success, -EFBIG if the size is larger than 127 GB - */ -static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, - uint8_t* heads, uint8_t* secs_per_cyl) -{ - uint32_t cyls_times_heads; - - if (total_sectors > 65535 * 16 * 255) - return -EFBIG; - - if (total_sectors > 65535 * 16 * 63) { - *secs_per_cyl = 255; - *heads = 16; - cyls_times_heads = total_sectors / *secs_per_cyl; - } else { - *secs_per_cyl = 17; - cyls_times_heads = total_sectors / *secs_per_cyl; - *heads = (cyls_times_heads + 1023) / 1024; - - if (*heads < 4) - *heads = 4; - - if (cyls_times_heads >= (*heads * 1024) || *heads > 16) { - *secs_per_cyl = 31; - *heads = 16; - cyls_times_heads = total_sectors / *secs_per_cyl; - } - - if (cyls_times_heads >= (*heads * 1024)) { - *secs_per_cyl = 63; - *heads = 16; - cyls_times_heads = total_sectors / *secs_per_cyl; - } - } - - // Note: Rounding up deviates from the Virtual PC behaviour - // However, we need this to avoid truncating images in qemu-img convert - *cyls = (cyls_times_heads + *heads - 1) / *heads; - - return 0; -} - -static int vpc_create(const char *filename, int64_t total_sectors, - const char *backing_file, int flags) -{ - uint8_t buf[1024]; - struct vhd_footer* footer = (struct vhd_footer*) buf; - struct vhd_dyndisk_header* dyndisk_header = - (struct vhd_dyndisk_header*) buf; - int fd, i; - uint16_t cyls; - uint8_t heads; - uint8_t secs_per_cyl; - size_t block_size, num_bat_entries; - - if (backing_file != NULL) - return -ENOTSUP; - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) - return -EIO; - - // Calculate matching total_size and geometry - if (calculate_geometry(total_sectors, &cyls, &heads, &secs_per_cyl)) - return -EFBIG; - total_sectors = (int64_t) cyls * heads * secs_per_cyl; - - // Prepare the Hard Disk Footer - memset(buf, 0, 1024); - - strncpy(footer->creator, "conectix", 8); - // TODO Check if "qemu" creator_app is ok for VPC - strncpy(footer->creator_app, "qemu", 4); - strncpy(footer->creator_os, "Wi2k", 4); - - footer->features = be32_to_cpu(0x02); - footer->version = be32_to_cpu(0x00010000); - footer->data_offset = be64_to_cpu(HEADER_SIZE); - footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE); - - // Version of Virtual PC 2007 - footer->major = be16_to_cpu(0x0005); - footer->minor =be16_to_cpu(0x0003); - - footer->orig_size = be64_to_cpu(total_sectors * 512); - footer->size = be64_to_cpu(total_sectors * 512); - - footer->cyls = be16_to_cpu(cyls); - footer->heads = heads; - footer->secs_per_cyl = secs_per_cyl; - - footer->type = be32_to_cpu(VHD_DYNAMIC); - - // TODO uuid is missing - - footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); - - // Write the footer (twice: at the beginning and at the end) - block_size = 0x200000; - num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); - - if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) - return -EIO; - - if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) - return -EIO; - if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) - return -EIO; - - // Write the initial BAT - if (lseek(fd, 3 * 512, SEEK_SET) < 0) - return -EIO; - - memset(buf, 0xFF, 512); - for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) - if (write(fd, buf, 512) != 512) - return -EIO; - - - // Prepare the Dynamic Disk Header - memset(buf, 0, 1024); - - strncpy(dyndisk_header->magic, "cxsparse", 8); - - dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFF); - dyndisk_header->table_offset = be64_to_cpu(3 * 512); - dyndisk_header->version = be32_to_cpu(0x00010000); - dyndisk_header->block_size = be32_to_cpu(block_size); - dyndisk_header->max_table_entries = be32_to_cpu(num_bat_entries); - - dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024)); - - // Write the header - if (lseek(fd, 512, SEEK_SET) < 0) - return -EIO; - if (write(fd, buf, 1024) != 1024) - return -EIO; - - close(fd); - return 0; -} - -static void vpc_close(BlockDriverState *bs) -{ - BDRVVPCState *s = bs->opaque; - qemu_free(s->pagetable); -#ifdef CACHE - qemu_free(s->pageentry_u8); -#endif - bdrv_delete(s->hd); -} - -static BlockDriver bdrv_vpc = { - .format_name = "vpc", - .instance_size = sizeof(BDRVVPCState), - .bdrv_probe = vpc_probe, - .bdrv_open = vpc_open, - .bdrv_read = vpc_read, - .bdrv_write = vpc_write, - .bdrv_close = vpc_close, - .bdrv_create = vpc_create, -}; - -static void bdrv_vpc_init(void) -{ - bdrv_register(&bdrv_vpc); -} - -block_init(bdrv_vpc_init); diff --git a/block-vvfat.c b/block-vvfat.c deleted file mode 100644 index 2a8feb38d..000000000 --- a/block-vvfat.c +++ /dev/null @@ -1,2855 +0,0 @@ -/* vim:set shiftwidth=4 ts=8: */ -/* - * QEMU Block driver for virtual VFAT (shadows a local directory) - * - * Copyright (c) 2004,2005 Johannes E. Schindelin - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include -#include -#include "qemu-common.h" -#include "block_int.h" -#include "module.h" - -#ifndef S_IWGRP -#define S_IWGRP 0 -#endif -#ifndef S_IWOTH -#define S_IWOTH 0 -#endif - -/* TODO: add ":bootsector=blabla.img:" */ -/* LATER TODO: add automatic boot sector generation from - BOOTEASY.ASM and Ranish Partition Manager - Note that DOS assumes the system files to be the first files in the - file system (test if the boot sector still relies on that fact)! */ -/* MAYBE TODO: write block-visofs.c */ -/* TODO: call try_commit() only after a timeout */ - -/* #define DEBUG */ - -#ifdef DEBUG - -#define DLOG(a) a - -#undef stderr -#define stderr STDERR -FILE* stderr = NULL; - -static void checkpoint(void); - -#ifdef __MINGW32__ -void nonono(const char* file, int line, const char* msg) { - fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg); - exit(-5); -} -#undef assert -#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0) -#endif - -#else - -#define DLOG(a) - -#endif - -/* dynamic array functions */ -typedef struct array_t { - char* pointer; - unsigned int size,next,item_size; -} array_t; - -static inline void array_init(array_t* array,unsigned int item_size) -{ - array->pointer = NULL; - array->size=0; - array->next=0; - array->item_size=item_size; -} - -static inline void array_free(array_t* array) -{ - if(array->pointer) - free(array->pointer); - array->size=array->next=0; -} - -/* does not automatically grow */ -static inline void* array_get(array_t* array,unsigned int index) { - assert(index < array->next); - return array->pointer + index * array->item_size; -} - -static inline int array_ensure_allocated(array_t* array, int index) -{ - if((index + 1) * array->item_size > array->size) { - int new_size = (index + 32) * array->item_size; - array->pointer = qemu_realloc(array->pointer, new_size); - if (!array->pointer) - return -1; - array->size = new_size; - array->next = index + 1; - } - - return 0; -} - -static inline void* array_get_next(array_t* array) { - unsigned int next = array->next; - void* result; - - if (array_ensure_allocated(array, next) < 0) - return NULL; - - array->next = next + 1; - result = array_get(array, next); - - return result; -} - -static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) { - if((array->next+count)*array->item_size>array->size) { - int increment=count*array->item_size; - array->pointer=qemu_realloc(array->pointer,array->size+increment); - if(!array->pointer) - return NULL; - array->size+=increment; - } - memmove(array->pointer+(index+count)*array->item_size, - array->pointer+index*array->item_size, - (array->next-index)*array->item_size); - array->next+=count; - return array->pointer+index*array->item_size; -} - -/* this performs a "roll", so that the element which was at index_from becomes - * index_to, but the order of all other elements is preserved. */ -static inline int array_roll(array_t* array,int index_to,int index_from,int count) -{ - char* buf; - char* from; - char* to; - int is; - - if(!array || - index_to<0 || index_to>=array->next || - index_from<0 || index_from>=array->next) - return -1; - - if(index_to==index_from) - return 0; - - is=array->item_size; - from=array->pointer+index_from*is; - to=array->pointer+index_to*is; - buf=qemu_malloc(is*count); - memcpy(buf,from,is*count); - - if(index_to=0); - assert(count > 0); - assert(index + count <= array->next); - if(array_roll(array,array->next-1,index,count)) - return -1; - array->next -= count; - return 0; -} - -static int array_remove(array_t* array,int index) -{ - return array_remove_slice(array, index, 1); -} - -/* return the index for a given member */ -static int array_index(array_t* array, void* pointer) -{ - size_t offset = (char*)pointer - array->pointer; - assert((offset % array->item_size) == 0); - assert(offset/array->item_size < array->next); - return offset/array->item_size; -} - -/* These structures are used to fake a disk and the VFAT filesystem. - * For this reason we need to use __attribute__((packed)). */ - -typedef struct bootsector_t { - uint8_t jump[3]; - uint8_t name[8]; - uint16_t sector_size; - uint8_t sectors_per_cluster; - uint16_t reserved_sectors; - uint8_t number_of_fats; - uint16_t root_entries; - uint16_t total_sectors16; - uint8_t media_type; - uint16_t sectors_per_fat; - uint16_t sectors_per_track; - uint16_t number_of_heads; - uint32_t hidden_sectors; - uint32_t total_sectors; - union { - struct { - uint8_t drive_number; - uint8_t current_head; - uint8_t signature; - uint32_t id; - uint8_t volume_label[11]; - } __attribute__((packed)) fat16; - struct { - uint32_t sectors_per_fat; - uint16_t flags; - uint8_t major,minor; - uint32_t first_cluster_of_root_directory; - uint16_t info_sector; - uint16_t backup_boot_sector; - uint16_t ignored; - } __attribute__((packed)) fat32; - } u; - uint8_t fat_type[8]; - uint8_t ignored[0x1c0]; - uint8_t magic[2]; -} __attribute__((packed)) bootsector_t; - -typedef struct { - uint8_t head; - uint8_t sector; - uint8_t cylinder; -} mbr_chs_t; - -typedef struct partition_t { - uint8_t attributes; /* 0x80 = bootable */ - mbr_chs_t start_CHS; - uint8_t fs_type; /* 0x1 = FAT12, 0x6 = FAT16, 0xe = FAT16_LBA, 0xb = FAT32, 0xc = FAT32_LBA */ - mbr_chs_t end_CHS; - uint32_t start_sector_long; - uint32_t length_sector_long; -} __attribute__((packed)) partition_t; - -typedef struct mbr_t { - uint8_t ignored[0x1b8]; - uint32_t nt_id; - uint8_t ignored2[2]; - partition_t partition[4]; - uint8_t magic[2]; -} __attribute__((packed)) mbr_t; - -typedef struct direntry_t { - uint8_t name[8]; - uint8_t extension[3]; - uint8_t attributes; - uint8_t reserved[2]; - uint16_t ctime; - uint16_t cdate; - uint16_t adate; - uint16_t begin_hi; - uint16_t mtime; - uint16_t mdate; - uint16_t begin; - uint32_t size; -} __attribute__((packed)) direntry_t; - -/* this structure are used to transparently access the files */ - -typedef struct mapping_t { - /* begin is the first cluster, end is the last+1 */ - uint32_t begin,end; - /* as s->directory is growable, no pointer may be used here */ - unsigned int dir_index; - /* the clusters of a file may be in any order; this points to the first */ - int first_mapping_index; - union { - /* offset is - * - the offset in the file (in clusters) for a file, or - * - the next cluster of the directory for a directory, and - * - the address of the buffer for a faked entry - */ - struct { - uint32_t offset; - } file; - struct { - int parent_mapping_index; - int first_dir_index; - } dir; - } info; - /* path contains the full path, i.e. it always starts with s->path */ - char* path; - - enum { MODE_UNDEFINED = 0, MODE_NORMAL = 1, MODE_MODIFIED = 2, - MODE_DIRECTORY = 4, MODE_FAKED = 8, - MODE_DELETED = 16, MODE_RENAMED = 32 } mode; - int read_only; -} mapping_t; - -#ifdef DEBUG -static void print_direntry(const struct direntry_t*); -static void print_mapping(const struct mapping_t* mapping); -#endif - -/* here begins the real VVFAT driver */ - -typedef struct BDRVVVFATState { - BlockDriverState* bs; /* pointer to parent */ - unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */ - unsigned char first_sectors[0x40*0x200]; - - int fat_type; /* 16 or 32 */ - array_t fat,directory,mapping; - - unsigned int cluster_size; - unsigned int sectors_per_cluster; - unsigned int sectors_per_fat; - unsigned int sectors_of_root_directory; - uint32_t last_cluster_of_root_directory; - unsigned int faked_sectors; /* how many sectors are faked before file data */ - uint32_t sector_count; /* total number of sectors of the partition */ - uint32_t cluster_count; /* total number of clusters of this partition */ - uint32_t max_fat_value; - - int current_fd; - mapping_t* current_mapping; - unsigned char* cluster; /* points to current cluster */ - unsigned char* cluster_buffer; /* points to a buffer to hold temp data */ - unsigned int current_cluster; - - /* write support */ - BlockDriverState* write_target; - char* qcow_filename; - BlockDriverState* qcow; - void* fat2; - char* used_clusters; - array_t commits; - const char* path; - int downcase_short_names; -} BDRVVVFATState; - -/* take the sector position spos and convert it to Cylinder/Head/Sector position - * if the position is outside the specified geometry, fill maximum value for CHS - * and return 1 to signal overflow. - */ -static int sector2CHS(BlockDriverState* bs, mbr_chs_t * chs, int spos){ - int head,sector; - sector = spos % (bs->secs); spos/= bs->secs; - head = spos % (bs->heads); spos/= bs->heads; - if(spos >= bs->cyls){ - /* Overflow, - it happens if 32bit sector positions are used, while CHS is only 24bit. - Windows/Dos is said to take 1023/255/63 as nonrepresentable CHS */ - chs->head = 0xFF; - chs->sector = 0xFF; - chs->cylinder = 0xFF; - return 1; - } - chs->head = (uint8_t)head; - chs->sector = (uint8_t)( (sector+1) | ((spos>>8)<<6) ); - chs->cylinder = (uint8_t)spos; - return 0; -} - -static void init_mbr(BDRVVVFATState* s) -{ - /* TODO: if the files mbr.img and bootsect.img exist, use them */ - mbr_t* real_mbr=(mbr_t*)s->first_sectors; - partition_t* partition=&(real_mbr->partition[0]); - int lba; - - memset(s->first_sectors,0,512); - - /* Win NT Disk Signature */ - real_mbr->nt_id= cpu_to_le32(0xbe1afdfa); - - partition->attributes=0x80; /* bootable */ - - /* LBA is used when partition is outside the CHS geometry */ - lba = sector2CHS(s->bs, &partition->start_CHS, s->first_sectors_number-1); - lba|= sector2CHS(s->bs, &partition->end_CHS, s->sector_count); - - /*LBA partitions are identified only by start/length_sector_long not by CHS*/ - partition->start_sector_long =cpu_to_le32(s->first_sectors_number-1); - partition->length_sector_long=cpu_to_le32(s->sector_count - s->first_sectors_number+1); - - /* FAT12/FAT16/FAT32 */ - /* DOS uses different types when partition is LBA, - probably to prevent older versions from using CHS on them */ - partition->fs_type= s->fat_type==12 ? 0x1: - s->fat_type==16 ? (lba?0xe:0x06): - /*fat_tyoe==32*/ (lba?0xc:0x0b); - - real_mbr->magic[0]=0x55; real_mbr->magic[1]=0xaa; -} - -/* direntry functions */ - -/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */ -static inline int short2long_name(char* dest,const char* src) -{ - int i; - int len; - for(i=0;i<129 && src[i];i++) { - dest[2*i]=src[i]; - dest[2*i+1]=0; - } - len=2*i; - dest[2*i]=dest[2*i+1]=0; - for(i=2*i+2;(i%26);i++) - dest[i]=0xff; - return len; -} - -static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename) -{ - char buffer[258]; - int length=short2long_name(buffer,filename), - number_of_entries=(length+25)/26,i; - direntry_t* entry; - - for(i=0;idirectory)); - entry->attributes=0xf; - entry->reserved[0]=0; - entry->begin=0; - entry->name[0]=(number_of_entries-i)|(i==0?0x40:0); - } - for(i=0;i<26*number_of_entries;i++) { - int offset=(i%26); - if(offset<10) offset=1+offset; - else if(offset<22) offset=14+offset-10; - else offset=28+offset-22; - entry=array_get(&(s->directory),s->directory.next-1-(i/26)); - entry->name[offset]=buffer[i]; - } - return array_get(&(s->directory),s->directory.next-number_of_entries); -} - -static char is_free(const direntry_t* direntry) -{ - return direntry->name[0]==0xe5 || direntry->name[0]==0x00; -} - -static char is_volume_label(const direntry_t* direntry) -{ - return direntry->attributes == 0x28; -} - -static char is_long_name(const direntry_t* direntry) -{ - return direntry->attributes == 0xf; -} - -static char is_short_name(const direntry_t* direntry) -{ - return !is_volume_label(direntry) && !is_long_name(direntry) - && !is_free(direntry); -} - -static char is_directory(const direntry_t* direntry) -{ - return direntry->attributes & 0x10 && direntry->name[0] != 0xe5; -} - -static inline char is_dot(const direntry_t* direntry) -{ - return is_short_name(direntry) && direntry->name[0] == '.'; -} - -static char is_file(const direntry_t* direntry) -{ - return is_short_name(direntry) && !is_directory(direntry); -} - -static inline uint32_t begin_of_direntry(const direntry_t* direntry) -{ - return le16_to_cpu(direntry->begin)|(le16_to_cpu(direntry->begin_hi)<<16); -} - -static inline uint32_t filesize_of_direntry(const direntry_t* direntry) -{ - return le32_to_cpu(direntry->size); -} - -static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin) -{ - direntry->begin = cpu_to_le16(begin & 0xffff); - direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff); -} - -/* fat functions */ - -static inline uint8_t fat_chksum(const direntry_t* entry) -{ - uint8_t chksum=0; - int i; - - for(i=0;i<11;i++) { - unsigned char c; - - c = (i <= 8) ? entry->name[i] : entry->extension[i-8]; - chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c; - } - - return chksum; -} - -/* if return_time==0, this returns the fat_date, else the fat_time */ -static uint16_t fat_datetime(time_t time,int return_time) { - struct tm* t; -#ifdef _WIN32 - t=localtime(&time); /* this is not thread safe */ -#else - struct tm t1; - t=&t1; - localtime_r(&time,t); -#endif - if(return_time) - return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11)); - return cpu_to_le16((t->tm_mday)|((t->tm_mon+1)<<5)|((t->tm_year-80)<<9)); -} - -static inline void fat_set(BDRVVVFATState* s,unsigned int cluster,uint32_t value) -{ - if(s->fat_type==32) { - uint32_t* entry=array_get(&(s->fat),cluster); - *entry=cpu_to_le32(value); - } else if(s->fat_type==16) { - uint16_t* entry=array_get(&(s->fat),cluster); - *entry=cpu_to_le16(value&0xffff); - } else { - int offset = (cluster*3/2); - unsigned char* p = array_get(&(s->fat), offset); - switch (cluster&1) { - case 0: - p[0] = value&0xff; - p[1] = (p[1]&0xf0) | ((value>>8)&0xf); - break; - case 1: - p[0] = (p[0]&0xf) | ((value&0xf)<<4); - p[1] = (value>>4); - break; - } - } -} - -static inline uint32_t fat_get(BDRVVVFATState* s,unsigned int cluster) -{ - if(s->fat_type==32) { - uint32_t* entry=array_get(&(s->fat),cluster); - return le32_to_cpu(*entry); - } else if(s->fat_type==16) { - uint16_t* entry=array_get(&(s->fat),cluster); - return le16_to_cpu(*entry); - } else { - const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2; - return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff; - } -} - -static inline int fat_eof(BDRVVVFATState* s,uint32_t fat_entry) -{ - if(fat_entry>s->max_fat_value-8) - return -1; - return 0; -} - -static inline void init_fat(BDRVVVFATState* s) -{ - if (s->fat_type == 12) { - array_init(&(s->fat),1); - array_ensure_allocated(&(s->fat), - s->sectors_per_fat * 0x200 * 3 / 2 - 1); - } else { - array_init(&(s->fat),(s->fat_type==32?4:2)); - array_ensure_allocated(&(s->fat), - s->sectors_per_fat * 0x200 / s->fat.item_size - 1); - } - memset(s->fat.pointer,0,s->fat.size); - - switch(s->fat_type) { - case 12: s->max_fat_value=0xfff; break; - case 16: s->max_fat_value=0xffff; break; - case 32: s->max_fat_value=0x0fffffff; break; - default: s->max_fat_value=0; /* error... */ - } - -} - -/* TODO: in create_short_filename, 0xe5->0x05 is not yet handled! */ -/* TODO: in parse_short_filename, 0x05->0xe5 is not yet handled! */ -static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, - unsigned int directory_start, const char* filename, int is_dot) -{ - int i,j,long_index=s->directory.next; - direntry_t* entry = NULL; - direntry_t* entry_long = NULL; - - if(is_dot) { - entry=array_get_next(&(s->directory)); - memset(entry->name,0x20,11); - memcpy(entry->name,filename,strlen(filename)); - return entry; - } - - entry_long=create_long_filename(s,filename); - - i = strlen(filename); - for(j = i - 1; j>0 && filename[j]!='.';j--); - if (j > 0) - i = (j > 8 ? 8 : j); - else if (i > 8) - i = 8; - - entry=array_get_next(&(s->directory)); - memset(entry->name,0x20,11); - memcpy(entry->name, filename, i); - - if(j > 0) - for (i = 0; i < 3 && filename[j+1+i]; i++) - entry->extension[i] = filename[j+1+i]; - - /* upcase & remove unwanted characters */ - for(i=10;i>=0;i--) { - if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--); - if(entry->name[i]<=' ' || entry->name[i]>0x7f - || strchr(".*?<>|\":/\\[];,+='",entry->name[i])) - entry->name[i]='_'; - else if(entry->name[i]>='a' && entry->name[i]<='z') - entry->name[i]+='A'-'a'; - } - - /* mangle duplicates */ - while(1) { - direntry_t* entry1=array_get(&(s->directory),directory_start); - int j; - - for(;entry1name,entry->name,11)) - break; /* found dupe */ - if(entry1==entry) /* no dupe found */ - break; - - /* use all 8 characters of name */ - if(entry->name[7]==' ') { - int j; - for(j=6;j>0 && entry->name[j]==' ';j--) - entry->name[j]='~'; - } - - /* increment number */ - for(j=7;j>0 && entry->name[j]=='9';j--) - entry->name[j]='0'; - if(j>0) { - if(entry->name[j]<'0' || entry->name[j]>'9') - entry->name[j]='0'; - else - entry->name[j]++; - } - } - - /* calculate checksum; propagate to long name */ - if(entry_long) { - uint8_t chksum=fat_chksum(entry); - - /* calculate anew, because realloc could have taken place */ - entry_long=array_get(&(s->directory),long_index); - while(entry_longreserved[1]=chksum; - entry_long++; - } - } - - return entry; -} - -/* - * Read a directory. (the index of the corresponding mapping must be passed). - */ -static int read_directory(BDRVVVFATState* s, int mapping_index) -{ - mapping_t* mapping = array_get(&(s->mapping), mapping_index); - direntry_t* direntry; - const char* dirname = mapping->path; - int first_cluster = mapping->begin; - int parent_index = mapping->info.dir.parent_mapping_index; - mapping_t* parent_mapping = (mapping_t*) - (parent_index >= 0 ? array_get(&(s->mapping), parent_index) : NULL); - int first_cluster_of_parent = parent_mapping ? parent_mapping->begin : -1; - - DIR* dir=opendir(dirname); - struct dirent* entry; - int i; - - assert(mapping->mode & MODE_DIRECTORY); - - if(!dir) { - mapping->end = mapping->begin; - return -1; - } - - i = mapping->info.dir.first_dir_index = - first_cluster == 0 ? 0 : s->directory.next; - - /* actually read the directory, and allocate the mappings */ - while((entry=readdir(dir))) { - unsigned int length=strlen(dirname)+2+strlen(entry->d_name); - char* buffer; - direntry_t* direntry; - struct stat st; - int is_dot=!strcmp(entry->d_name,"."); - int is_dotdot=!strcmp(entry->d_name,".."); - - if(first_cluster == 0 && (is_dotdot || is_dot)) - continue; - - buffer=(char*)qemu_malloc(length); - snprintf(buffer,length,"%s/%s",dirname,entry->d_name); - - if(stat(buffer,&st)<0) { - free(buffer); - continue; - } - - /* create directory entry for this file */ - direntry=create_short_and_long_name(s, i, entry->d_name, - is_dot || is_dotdot); - direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20); - direntry->reserved[0]=direntry->reserved[1]=0; - direntry->ctime=fat_datetime(st.st_ctime,1); - direntry->cdate=fat_datetime(st.st_ctime,0); - direntry->adate=fat_datetime(st.st_atime,0); - direntry->begin_hi=0; - direntry->mtime=fat_datetime(st.st_mtime,1); - direntry->mdate=fat_datetime(st.st_mtime,0); - if(is_dotdot) - set_begin_of_direntry(direntry, first_cluster_of_parent); - else if(is_dot) - set_begin_of_direntry(direntry, first_cluster); - else - direntry->begin=0; /* do that later */ - if (st.st_size > 0x7fffffff) { - fprintf(stderr, "File %s is larger than 2GB\n", buffer); - free(buffer); - return -2; - } - direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size); - - /* create mapping for this file */ - if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) { - s->current_mapping=(mapping_t*)array_get_next(&(s->mapping)); - s->current_mapping->begin=0; - s->current_mapping->end=st.st_size; - /* - * we get the direntry of the most recent direntry, which - * contains the short name and all the relevant information. - */ - s->current_mapping->dir_index=s->directory.next-1; - s->current_mapping->first_mapping_index = -1; - if (S_ISDIR(st.st_mode)) { - s->current_mapping->mode = MODE_DIRECTORY; - s->current_mapping->info.dir.parent_mapping_index = - mapping_index; - } else { - s->current_mapping->mode = MODE_UNDEFINED; - s->current_mapping->info.file.offset = 0; - } - s->current_mapping->path=buffer; - s->current_mapping->read_only = - (st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0; - } - } - closedir(dir); - - /* fill with zeroes up to the end of the cluster */ - while(s->directory.next%(0x10*s->sectors_per_cluster)) { - direntry_t* direntry=array_get_next(&(s->directory)); - memset(direntry,0,sizeof(direntry_t)); - } - -/* TODO: if there are more entries, bootsector has to be adjusted! */ -#define ROOT_ENTRIES (0x02 * 0x10 * s->sectors_per_cluster) - if (mapping_index == 0 && s->directory.next < ROOT_ENTRIES) { - /* root directory */ - int cur = s->directory.next; - array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1); - memset(array_get(&(s->directory), cur), 0, - (ROOT_ENTRIES - cur) * sizeof(direntry_t)); - } - - /* reget the mapping, since s->mapping was possibly realloc()ed */ - mapping = (mapping_t*)array_get(&(s->mapping), mapping_index); - first_cluster += (s->directory.next - mapping->info.dir.first_dir_index) - * 0x20 / s->cluster_size; - mapping->end = first_cluster; - - direntry = (direntry_t*)array_get(&(s->directory), mapping->dir_index); - set_begin_of_direntry(direntry, mapping->begin); - - return 0; -} - -static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num) -{ - return (sector_num-s->faked_sectors)/s->sectors_per_cluster; -} - -static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num) -{ - return s->faked_sectors + s->sectors_per_cluster * cluster_num; -} - -static inline uint32_t sector_offset_in_cluster(BDRVVVFATState* s,off_t sector_num) -{ - return (sector_num-s->first_sectors_number-2*s->sectors_per_fat)%s->sectors_per_cluster; -} - -#ifdef DBG -static direntry_t* get_direntry_for_mapping(BDRVVVFATState* s,mapping_t* mapping) -{ - if(mapping->mode==MODE_UNDEFINED) - return 0; - return (direntry_t*)(s->directory.pointer+sizeof(direntry_t)*mapping->dir_index); -} -#endif - -static int init_directories(BDRVVVFATState* s, - const char* dirname) -{ - bootsector_t* bootsector; - mapping_t* mapping; - unsigned int i; - unsigned int cluster; - - memset(&(s->first_sectors[0]),0,0x40*0x200); - - s->cluster_size=s->sectors_per_cluster*0x200; - s->cluster_buffer=qemu_malloc(s->cluster_size); - - /* - * The formula: sc = spf+1+spf*spc*(512*8/fat_type), - * where sc is sector_count, - * spf is sectors_per_fat, - * spc is sectors_per_clusters, and - * fat_type = 12, 16 or 32. - */ - i = 1+s->sectors_per_cluster*0x200*8/s->fat_type; - s->sectors_per_fat=(s->sector_count+i)/i; /* round up */ - - array_init(&(s->mapping),sizeof(mapping_t)); - array_init(&(s->directory),sizeof(direntry_t)); - - /* add volume label */ - { - direntry_t* entry=array_get_next(&(s->directory)); - entry->attributes=0x28; /* archive | volume label */ - snprintf((char*)entry->name,11,"QEMU VVFAT"); - } - - /* Now build FAT, and write back information into directory */ - init_fat(s); - - s->faked_sectors=s->first_sectors_number+s->sectors_per_fat*2; - s->cluster_count=sector2cluster(s, s->sector_count); - - mapping = array_get_next(&(s->mapping)); - mapping->begin = 0; - mapping->dir_index = 0; - mapping->info.dir.parent_mapping_index = -1; - mapping->first_mapping_index = -1; - mapping->path = strdup(dirname); - i = strlen(mapping->path); - if (i > 0 && mapping->path[i - 1] == '/') - mapping->path[i - 1] = '\0'; - mapping->mode = MODE_DIRECTORY; - mapping->read_only = 0; - s->path = mapping->path; - - for (i = 0, cluster = 0; i < s->mapping.next; i++) { - /* MS-DOS expects the FAT to be 0 for the root directory - * (except for the media byte). */ - /* LATER TODO: still true for FAT32? */ - int fix_fat = (i != 0); - mapping = array_get(&(s->mapping), i); - - if (mapping->mode & MODE_DIRECTORY) { - mapping->begin = cluster; - if(read_directory(s, i)) { - fprintf(stderr, "Could not read directory %s\n", - mapping->path); - return -1; - } - mapping = array_get(&(s->mapping), i); - } else { - assert(mapping->mode == MODE_UNDEFINED); - mapping->mode=MODE_NORMAL; - mapping->begin = cluster; - if (mapping->end > 0) { - direntry_t* direntry = array_get(&(s->directory), - mapping->dir_index); - - mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size; - set_begin_of_direntry(direntry, mapping->begin); - } else { - mapping->end = cluster + 1; - fix_fat = 0; - } - } - - assert(mapping->begin < mapping->end); - - /* next free cluster */ - cluster = mapping->end; - - if(cluster > s->cluster_count) { - fprintf(stderr,"Directory does not fit in FAT%d (capacity %s)\n", - s->fat_type, - s->fat_type == 12 ? s->sector_count == 2880 ? "1.44 MB" - : "2.88 MB" - : "504MB"); - return -EINVAL; - } - - /* fix fat for entry */ - if (fix_fat) { - int j; - for(j = mapping->begin; j < mapping->end - 1; j++) - fat_set(s, j, j+1); - fat_set(s, mapping->end - 1, s->max_fat_value); - } - } - - mapping = array_get(&(s->mapping), 0); - s->sectors_of_root_directory = mapping->end * s->sectors_per_cluster; - s->last_cluster_of_root_directory = mapping->end; - - /* the FAT signature */ - fat_set(s,0,s->max_fat_value); - fat_set(s,1,s->max_fat_value); - - s->current_mapping = NULL; - - bootsector=(bootsector_t*)(s->first_sectors+(s->first_sectors_number-1)*0x200); - bootsector->jump[0]=0xeb; - bootsector->jump[1]=0x3e; - bootsector->jump[2]=0x90; - memcpy(bootsector->name,"QEMU ",8); - bootsector->sector_size=cpu_to_le16(0x200); - bootsector->sectors_per_cluster=s->sectors_per_cluster; - bootsector->reserved_sectors=cpu_to_le16(1); - bootsector->number_of_fats=0x2; /* number of FATs */ - bootsector->root_entries=cpu_to_le16(s->sectors_of_root_directory*0x10); - bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count); - bootsector->media_type=(s->fat_type!=12?0xf8:s->sector_count==5760?0xf9:0xf8); /* media descriptor */ - s->fat.pointer[0] = bootsector->media_type; - bootsector->sectors_per_fat=cpu_to_le16(s->sectors_per_fat); - bootsector->sectors_per_track=cpu_to_le16(s->bs->secs); - bootsector->number_of_heads=cpu_to_le16(s->bs->heads); - bootsector->hidden_sectors=cpu_to_le32(s->first_sectors_number==1?0:0x3f); - bootsector->total_sectors=cpu_to_le32(s->sector_count>0xffff?s->sector_count:0); - - /* LATER TODO: if FAT32, this is wrong */ - bootsector->u.fat16.drive_number=s->fat_type==12?0:0x80; /* assume this is hda (TODO) */ - bootsector->u.fat16.current_head=0; - bootsector->u.fat16.signature=0x29; - bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd); - - memcpy(bootsector->u.fat16.volume_label,"QEMU VVFAT ",11); - memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12 ":s->fat_type==16?"FAT16 ":"FAT32 "),8); - bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa; - - return 0; -} - -#ifdef DEBUG -static BDRVVVFATState *vvv = NULL; -#endif - -static int enable_write_target(BDRVVVFATState *s); -static int is_consistent(BDRVVVFATState *s); - -static int vvfat_open(BlockDriverState *bs, const char* dirname, int flags) -{ - BDRVVVFATState *s = bs->opaque; - int floppy = 0; - int i; - -#ifdef DEBUG - vvv = s; -#endif - -DLOG(if (stderr == NULL) { - stderr = fopen("vvfat.log", "a"); - setbuf(stderr, NULL); -}) - - s->bs = bs; - - s->fat_type=16; - /* LATER TODO: if FAT32, adjust */ - s->sectors_per_cluster=0x10; - /* 504MB disk*/ - bs->cyls=1024; bs->heads=16; bs->secs=63; - - s->current_cluster=0xffffffff; - - s->first_sectors_number=0x40; - /* read only is the default for safety */ - bs->read_only = 1; - s->qcow = s->write_target = NULL; - s->qcow_filename = NULL; - s->fat2 = NULL; - s->downcase_short_names = 1; - - if (!strstart(dirname, "fat:", NULL)) - return -1; - - if (strstr(dirname, ":floppy:")) { - floppy = 1; - s->fat_type = 12; - s->first_sectors_number = 1; - s->sectors_per_cluster=2; - bs->cyls = 80; bs->heads = 2; bs->secs = 36; - } - - s->sector_count=bs->cyls*bs->heads*bs->secs; - - if (strstr(dirname, ":32:")) { - fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. You are welcome to do so!\n"); - s->fat_type = 32; - } else if (strstr(dirname, ":16:")) { - s->fat_type = 16; - } else if (strstr(dirname, ":12:")) { - s->fat_type = 12; - s->sector_count=2880; - } - - if (strstr(dirname, ":rw:")) { - if (enable_write_target(s)) - return -1; - bs->read_only = 0; - } - - i = strrchr(dirname, ':') - dirname; - assert(i >= 3); - if (dirname[i-2] == ':' && qemu_isalpha(dirname[i-1])) - /* workaround for DOS drive names */ - dirname += i-1; - else - dirname += i+1; - - bs->total_sectors=bs->cyls*bs->heads*bs->secs; - - if(init_directories(s, dirname)) - return -1; - - s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count; - - if(s->first_sectors_number==0x40) - init_mbr(s); - - /* for some reason or other, MS-DOS does not like to know about CHS... */ - if (floppy) - bs->heads = bs->cyls = bs->secs = 0; - - // assert(is_consistent(s)); - return 0; -} - -static inline void vvfat_close_current_file(BDRVVVFATState *s) -{ - if(s->current_mapping) { - s->current_mapping = NULL; - if (s->current_fd) { - close(s->current_fd); - s->current_fd = 0; - } - } - s->current_cluster = -1; -} - -/* mappings between index1 and index2-1 are supposed to be ordered - * return value is the index of the last mapping for which end>cluster_num - */ -static inline int find_mapping_for_cluster_aux(BDRVVVFATState* s,int cluster_num,int index1,int index2) -{ - int index3=index1+1; - while(1) { - mapping_t* mapping; - index3=(index1+index2)/2; - mapping=array_get(&(s->mapping),index3); - assert(mapping->begin < mapping->end); - if(mapping->begin>=cluster_num) { - assert(index2!=index3 || index2==0); - if(index2==index3) - return index1; - index2=index3; - } else { - if(index1==index3) - return mapping->end<=cluster_num ? index2 : index1; - index1=index3; - } - assert(index1<=index2); - DLOG(mapping=array_get(&(s->mapping),index1); - assert(mapping->begin<=cluster_num); - assert(index2 >= s->mapping.next || - ((mapping = array_get(&(s->mapping),index2)) && - mapping->end>cluster_num))); - } -} - -static inline mapping_t* find_mapping_for_cluster(BDRVVVFATState* s,int cluster_num) -{ - int index=find_mapping_for_cluster_aux(s,cluster_num,0,s->mapping.next); - mapping_t* mapping; - if(index>=s->mapping.next) - return NULL; - mapping=array_get(&(s->mapping),index); - if(mapping->begin>cluster_num) - return NULL; - assert(mapping->begin<=cluster_num && mapping->end>cluster_num); - return mapping; -} - -/* - * This function simply compares path == mapping->path. Since the mappings - * are sorted by cluster, this is expensive: O(n). - */ -static inline mapping_t* find_mapping_for_path(BDRVVVFATState* s, - const char* path) -{ - int i; - - for (i = 0; i < s->mapping.next; i++) { - mapping_t* mapping = array_get(&(s->mapping), i); - if (mapping->first_mapping_index < 0 && - !strcmp(path, mapping->path)) - return mapping; - } - - return NULL; -} - -static int open_file(BDRVVVFATState* s,mapping_t* mapping) -{ - if(!mapping) - return -1; - if(!s->current_mapping || - strcmp(s->current_mapping->path,mapping->path)) { - /* open file */ - int fd = open(mapping->path, O_RDONLY | O_BINARY | O_LARGEFILE); - if(fd<0) - return -1; - vvfat_close_current_file(s); - s->current_fd = fd; - s->current_mapping = mapping; - } - return 0; -} - -static inline int read_cluster(BDRVVVFATState *s,int cluster_num) -{ - if(s->current_cluster != cluster_num) { - int result=0; - off_t offset; - assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY)); - if(!s->current_mapping - || s->current_mapping->begin>cluster_num - || s->current_mapping->end<=cluster_num) { - /* binary search of mappings for file */ - mapping_t* mapping=find_mapping_for_cluster(s,cluster_num); - - assert(!mapping || (cluster_num>=mapping->begin && cluster_numend)); - - if (mapping && mapping->mode & MODE_DIRECTORY) { - vvfat_close_current_file(s); - s->current_mapping = mapping; -read_cluster_directory: - offset = s->cluster_size*(cluster_num-s->current_mapping->begin); - s->cluster = (unsigned char*)s->directory.pointer+offset - + 0x20*s->current_mapping->info.dir.first_dir_index; - assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0); - assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size); - s->current_cluster = cluster_num; - return 0; - } - - if(open_file(s,mapping)) - return -2; - } else if (s->current_mapping->mode & MODE_DIRECTORY) - goto read_cluster_directory; - - assert(s->current_fd); - - offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset; - if(lseek(s->current_fd, offset, SEEK_SET)!=offset) - return -3; - s->cluster=s->cluster_buffer; - result=read(s->current_fd,s->cluster,s->cluster_size); - if(result<0) { - s->current_cluster = -1; - return -1; - } - s->current_cluster = cluster_num; - } - return 0; -} - -#ifdef DEBUG -static void hexdump(const void* address, uint32_t len) -{ - const unsigned char* p = address; - int i, j; - - for (i = 0; i < len; i += 16) { - for (j = 0; j < 16 && i + j < len; j++) - fprintf(stderr, "%02x ", p[i + j]); - for (; j < 16; j++) - fprintf(stderr, " "); - fprintf(stderr, " "); - for (j = 0; j < 16 && i + j < len; j++) - fprintf(stderr, "%c", (p[i + j] < ' ' || p[i + j] > 0x7f) ? '.' : p[i + j]); - fprintf(stderr, "\n"); - } -} - -static void print_direntry(const direntry_t* direntry) -{ - int j = 0; - char buffer[1024]; - - fprintf(stderr, "direntry 0x%x: ", (int)direntry); - if(!direntry) - return; - if(is_long_name(direntry)) { - unsigned char* c=(unsigned char*)direntry; - int i; - for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2) -#define ADD_CHAR(c) {buffer[j] = (c); if (buffer[j] < ' ') buffer[j] = 0xb0; j++;} - ADD_CHAR(c[i]); - for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2) - ADD_CHAR(c[i]); - for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2) - ADD_CHAR(c[i]); - buffer[j] = 0; - fprintf(stderr, "%s\n", buffer); - } else { - int i; - for(i=0;i<11;i++) - ADD_CHAR(direntry->name[i]); - buffer[j] = 0; - fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n", - buffer, - direntry->attributes, - begin_of_direntry(direntry),le32_to_cpu(direntry->size)); - } -} - -static void print_mapping(const mapping_t* mapping) -{ - fprintf(stderr, "mapping (0x%x): begin, end = %d, %d, dir_index = %d, first_mapping_index = %d, name = %s, mode = 0x%x, " , (int)mapping, mapping->begin, mapping->end, mapping->dir_index, mapping->first_mapping_index, mapping->path, mapping->mode); - if (mapping->mode & MODE_DIRECTORY) - fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index); - else - fprintf(stderr, "offset = %d\n", mapping->info.file.offset); -} -#endif - -static int vvfat_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - BDRVVVFATState *s = bs->opaque; - int i; - - for(i=0;i= s->sector_count) - return -1; - if (s->qcow) { - int n; - if (s->qcow->drv->bdrv_is_allocated(s->qcow, - sector_num, nb_sectors-i, &n)) { -DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n)); - if (s->qcow->drv->bdrv_read(s->qcow, sector_num, buf+i*0x200, n)) - return -1; - i += n - 1; - sector_num += n - 1; - continue; - } -DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num)); - } - if(sector_numfaked_sectors) { - if(sector_numfirst_sectors_number) - memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200); - else if(sector_num-s->first_sectors_numbersectors_per_fat) - memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200); - else if(sector_num-s->first_sectors_number-s->sectors_per_fatsectors_per_fat) - memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200); - } else { - uint32_t sector=sector_num-s->faked_sectors, - sector_offset_in_cluster=(sector%s->sectors_per_cluster), - cluster_num=sector/s->sectors_per_cluster; - if(read_cluster(s, cluster_num) != 0) { - /* LATER TODO: strict: return -1; */ - memset(buf+i*0x200,0,0x200); - continue; - } - memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200); - } - } - return 0; -} - -/* LATER TODO: statify all functions */ - -/* - * Idea of the write support (use snapshot): - * - * 1. check if all data is consistent, recording renames, modifications, - * new files and directories (in s->commits). - * - * 2. if the data is not consistent, stop committing - * - * 3. handle renames, and create new files and directories (do not yet - * write their contents) - * - * 4. walk the directories, fixing the mapping and direntries, and marking - * the handled mappings as not deleted - * - * 5. commit the contents of the files - * - * 6. handle deleted files and directories - * - */ - -typedef struct commit_t { - char* path; - union { - struct { uint32_t cluster; } rename; - struct { int dir_index; uint32_t modified_offset; } writeout; - struct { uint32_t first_cluster; } new_file; - struct { uint32_t cluster; } mkdir; - } param; - /* DELETEs and RMDIRs are handled differently: see handle_deletes() */ - enum { - ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR - } action; -} commit_t; - -static void clear_commits(BDRVVVFATState* s) -{ - int i; -DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next)); - for (i = 0; i < s->commits.next; i++) { - commit_t* commit = array_get(&(s->commits), i); - assert(commit->path || commit->action == ACTION_WRITEOUT); - if (commit->action != ACTION_WRITEOUT) { - assert(commit->path); - free(commit->path); - } else - assert(commit->path == NULL); - } - s->commits.next = 0; -} - -static void schedule_rename(BDRVVVFATState* s, - uint32_t cluster, char* new_path) -{ - commit_t* commit = array_get_next(&(s->commits)); - commit->path = new_path; - commit->param.rename.cluster = cluster; - commit->action = ACTION_RENAME; -} - -static void schedule_writeout(BDRVVVFATState* s, - int dir_index, uint32_t modified_offset) -{ - commit_t* commit = array_get_next(&(s->commits)); - commit->path = NULL; - commit->param.writeout.dir_index = dir_index; - commit->param.writeout.modified_offset = modified_offset; - commit->action = ACTION_WRITEOUT; -} - -static void schedule_new_file(BDRVVVFATState* s, - char* path, uint32_t first_cluster) -{ - commit_t* commit = array_get_next(&(s->commits)); - commit->path = path; - commit->param.new_file.first_cluster = first_cluster; - commit->action = ACTION_NEW_FILE; -} - -static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path) -{ - commit_t* commit = array_get_next(&(s->commits)); - commit->path = path; - commit->param.mkdir.cluster = cluster; - commit->action = ACTION_MKDIR; -} - -typedef struct { - /* - * Since the sequence number is at most 0x3f, and the filename - * length is at most 13 times the sequence number, the maximal - * filename length is 0x3f * 13 bytes. - */ - unsigned char name[0x3f * 13 + 1]; - int checksum, len; - int sequence_number; -} long_file_name; - -static void lfn_init(long_file_name* lfn) -{ - lfn->sequence_number = lfn->len = 0; - lfn->checksum = 0x100; -} - -/* return 0 if parsed successfully, > 0 if no long name, < 0 if error */ -static int parse_long_name(long_file_name* lfn, - const direntry_t* direntry) -{ - int i, j, offset; - const unsigned char* pointer = (const unsigned char*)direntry; - - if (!is_long_name(direntry)) - return 1; - - if (pointer[0] & 0x40) { - lfn->sequence_number = pointer[0] & 0x3f; - lfn->checksum = pointer[13]; - lfn->name[0] = 0; - lfn->name[lfn->sequence_number * 13] = 0; - } else if ((pointer[0] & 0x3f) != --lfn->sequence_number) - return -1; - else if (pointer[13] != lfn->checksum) - return -2; - else if (pointer[12] || pointer[26] || pointer[27]) - return -3; - - offset = 13 * (lfn->sequence_number - 1); - for (i = 0, j = 1; i < 13; i++, j+=2) { - if (j == 11) - j = 14; - else if (j == 26) - j = 28; - - if (pointer[j+1] == 0) - lfn->name[offset + i] = pointer[j]; - else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0) - return -4; - else - lfn->name[offset + i] = 0; - } - - if (pointer[0] & 0x40) - lfn->len = offset + strlen((char*)lfn->name + offset); - - return 0; -} - -/* returns 0 if successful, >0 if no short_name, and <0 on error */ -static int parse_short_name(BDRVVVFATState* s, - long_file_name* lfn, direntry_t* direntry) -{ - int i, j; - - if (!is_short_name(direntry)) - return 1; - - for (j = 7; j >= 0 && direntry->name[j] == ' '; j--); - for (i = 0; i <= j; i++) { - if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f) - return -1; - else if (s->downcase_short_names) - lfn->name[i] = qemu_tolower(direntry->name[i]); - else - lfn->name[i] = direntry->name[i]; - } - - for (j = 2; j >= 0 && direntry->extension[j] == ' '; j--); - if (j >= 0) { - lfn->name[i++] = '.'; - lfn->name[i + j + 1] = '\0'; - for (;j >= 0; j--) { - if (direntry->extension[j] <= ' ' || direntry->extension[j] > 0x7f) - return -2; - else if (s->downcase_short_names) - lfn->name[i + j] = qemu_tolower(direntry->extension[j]); - else - lfn->name[i + j] = direntry->extension[j]; - } - } else - lfn->name[i + j + 1] = '\0'; - - lfn->len = strlen((char*)lfn->name); - - return 0; -} - -static inline uint32_t modified_fat_get(BDRVVVFATState* s, - unsigned int cluster) -{ - if (cluster < s->last_cluster_of_root_directory) { - if (cluster + 1 == s->last_cluster_of_root_directory) - return s->max_fat_value; - else - return cluster + 1; - } - - if (s->fat_type==32) { - uint32_t* entry=((uint32_t*)s->fat2)+cluster; - return le32_to_cpu(*entry); - } else if (s->fat_type==16) { - uint16_t* entry=((uint16_t*)s->fat2)+cluster; - return le16_to_cpu(*entry); - } else { - const uint8_t* x=s->fat2+cluster*3/2; - return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff; - } -} - -static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num) -{ - int was_modified = 0; - int i, dummy; - - if (s->qcow == NULL) - return 0; - - for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) - was_modified = s->qcow->drv->bdrv_is_allocated(s->qcow, - cluster2sector(s, cluster_num) + i, 1, &dummy); - - return was_modified; -} - -static const char* get_basename(const char* path) -{ - char* basename = strrchr(path, '/'); - if (basename == NULL) - return path; - else - return basename + 1; /* strip '/' */ -} - -/* - * The array s->used_clusters holds the states of the clusters. If it is - * part of a file, it has bit 2 set, in case of a directory, bit 1. If it - * was modified, bit 3 is set. - * If any cluster is allocated, but not part of a file or directory, this - * driver refuses to commit. - */ -typedef enum { - USED_DIRECTORY = 1, USED_FILE = 2, USED_ANY = 3, USED_ALLOCATED = 4 -} used_t; - -/* - * get_cluster_count_for_direntry() not only determines how many clusters - * are occupied by direntry, but also if it was renamed or modified. - * - * A file is thought to be renamed *only* if there already was a file with - * exactly the same first cluster, but a different name. - * - * Further, the files/directories handled by this function are - * assumed to be *not* deleted (and *only* those). - */ -static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, - direntry_t* direntry, const char* path) -{ - /* - * This is a little bit tricky: - * IF the guest OS just inserts a cluster into the file chain, - * and leaves the rest alone, (i.e. the original file had clusters - * 15 -> 16, but now has 15 -> 32 -> 16), then the following happens: - * - * - do_commit will write the cluster into the file at the given - * offset, but - * - * - the cluster which is overwritten should be moved to a later - * position in the file. - * - * I am not aware that any OS does something as braindead, but this - * situation could happen anyway when not committing for a long time. - * Just to be sure that this does not bite us, detect it, and copy the - * contents of the clusters to-be-overwritten into the qcow. - */ - int copy_it = 0; - int was_modified = 0; - int32_t ret = 0; - - uint32_t cluster_num = begin_of_direntry(direntry); - uint32_t offset = 0; - int first_mapping_index = -1; - mapping_t* mapping = NULL; - const char* basename2 = NULL; - - vvfat_close_current_file(s); - - /* the root directory */ - if (cluster_num == 0) - return 0; - - /* write support */ - if (s->qcow) { - basename2 = get_basename(path); - - mapping = find_mapping_for_cluster(s, cluster_num); - - if (mapping) { - const char* basename; - - assert(mapping->mode & MODE_DELETED); - mapping->mode &= ~MODE_DELETED; - - basename = get_basename(mapping->path); - - assert(mapping->mode & MODE_NORMAL); - - /* rename */ - if (strcmp(basename, basename2)) - schedule_rename(s, cluster_num, strdup(path)); - } else if (is_file(direntry)) - /* new file */ - schedule_new_file(s, strdup(path), cluster_num); - else { - assert(0); - return 0; - } - } - - while(1) { - if (s->qcow) { - if (!copy_it && cluster_was_modified(s, cluster_num)) { - if (mapping == NULL || - mapping->begin > cluster_num || - mapping->end <= cluster_num) - mapping = find_mapping_for_cluster(s, cluster_num); - - - if (mapping && - (mapping->mode & MODE_DIRECTORY) == 0) { - - /* was modified in qcow */ - if (offset != mapping->info.file.offset + s->cluster_size - * (cluster_num - mapping->begin)) { - /* offset of this cluster in file chain has changed */ - assert(0); - copy_it = 1; - } else if (offset == 0) { - const char* basename = get_basename(mapping->path); - - if (strcmp(basename, basename2)) - copy_it = 1; - first_mapping_index = array_index(&(s->mapping), mapping); - } - - if (mapping->first_mapping_index != first_mapping_index - && mapping->info.file.offset > 0) { - assert(0); - copy_it = 1; - } - - /* need to write out? */ - if (!was_modified && is_file(direntry)) { - was_modified = 1; - schedule_writeout(s, mapping->dir_index, offset); - } - } - } - - if (copy_it) { - int i, dummy; - /* - * This is horribly inefficient, but that is okay, since - * it is rarely executed, if at all. - */ - int64_t offset = cluster2sector(s, cluster_num); - - vvfat_close_current_file(s); - for (i = 0; i < s->sectors_per_cluster; i++) - if (!s->qcow->drv->bdrv_is_allocated(s->qcow, - offset + i, 1, &dummy)) { - if (vvfat_read(s->bs, - offset, s->cluster_buffer, 1)) - return -1; - if (s->qcow->drv->bdrv_write(s->qcow, - offset, s->cluster_buffer, 1)) - return -2; - } - } - } - - ret++; - if (s->used_clusters[cluster_num] & USED_ANY) - return 0; - s->used_clusters[cluster_num] = USED_FILE; - - cluster_num = modified_fat_get(s, cluster_num); - - if (fat_eof(s, cluster_num)) - return ret; - else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16) - return -1; - - offset += s->cluster_size; - } -} - -/* - * This function looks at the modified data (qcow). - * It returns 0 upon inconsistency or error, and the number of clusters - * used by the directory, its subdirectories and their files. - */ -static int check_directory_consistency(BDRVVVFATState *s, - int cluster_num, const char* path) -{ - int ret = 0; - unsigned char* cluster = qemu_malloc(s->cluster_size); - direntry_t* direntries = (direntry_t*)cluster; - mapping_t* mapping = find_mapping_for_cluster(s, cluster_num); - - long_file_name lfn; - int path_len = strlen(path); - char path2[PATH_MAX]; - - assert(path_len < PATH_MAX); /* len was tested before! */ - pstrcpy(path2, sizeof(path2), path); - path2[path_len] = '/'; - path2[path_len + 1] = '\0'; - - if (mapping) { - const char* basename = get_basename(mapping->path); - const char* basename2 = get_basename(path); - - assert(mapping->mode & MODE_DIRECTORY); - - assert(mapping->mode & MODE_DELETED); - mapping->mode &= ~MODE_DELETED; - - if (strcmp(basename, basename2)) - schedule_rename(s, cluster_num, strdup(path)); - } else - /* new directory */ - schedule_mkdir(s, cluster_num, strdup(path)); - - lfn_init(&lfn); - do { - int i; - int subret = 0; - - ret++; - - if (s->used_clusters[cluster_num] & USED_ANY) { - fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num); - return 0; - } - s->used_clusters[cluster_num] = USED_DIRECTORY; - -DLOG(fprintf(stderr, "read cluster %d (sector %d)\n", (int)cluster_num, (int)cluster2sector(s, cluster_num))); - subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster, - s->sectors_per_cluster); - if (subret) { - fprintf(stderr, "Error fetching direntries\n"); - fail: - free(cluster); - return 0; - } - - for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) { - int cluster_count = 0; - -DLOG(fprintf(stderr, "check direntry %d: \n", i); print_direntry(direntries + i)); - if (is_volume_label(direntries + i) || is_dot(direntries + i) || - is_free(direntries + i)) - continue; - - subret = parse_long_name(&lfn, direntries + i); - if (subret < 0) { - fprintf(stderr, "Error in long name\n"); - goto fail; - } - if (subret == 0 || is_free(direntries + i)) - continue; - - if (fat_chksum(direntries+i) != lfn.checksum) { - subret = parse_short_name(s, &lfn, direntries + i); - if (subret < 0) { - fprintf(stderr, "Error in short name (%d)\n", subret); - goto fail; - } - if (subret > 0 || !strcmp((char*)lfn.name, ".") - || !strcmp((char*)lfn.name, "..")) - continue; - } - lfn.checksum = 0x100; /* cannot use long name twice */ - - if (path_len + 1 + lfn.len >= PATH_MAX) { - fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name); - goto fail; - } - pstrcpy(path2 + path_len + 1, sizeof(path2) - path_len - 1, - (char*)lfn.name); - - if (is_directory(direntries + i)) { - if (begin_of_direntry(direntries + i) == 0) { - DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i)); - goto fail; - } - cluster_count = check_directory_consistency(s, - begin_of_direntry(direntries + i), path2); - if (cluster_count == 0) { - DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i)); - goto fail; - } - } else if (is_file(direntries + i)) { - /* check file size with FAT */ - cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2); - if (cluster_count != - (le32_to_cpu(direntries[i].size) + s->cluster_size - - 1) / s->cluster_size) { - DLOG(fprintf(stderr, "Cluster count mismatch\n")); - goto fail; - } - } else - assert(0); /* cluster_count = 0; */ - - ret += cluster_count; - } - - cluster_num = modified_fat_get(s, cluster_num); - } while(!fat_eof(s, cluster_num)); - - free(cluster); - return ret; -} - -/* returns 1 on success */ -static int is_consistent(BDRVVVFATState* s) -{ - int i, check; - int used_clusters_count = 0; - -DLOG(checkpoint()); - /* - * - get modified FAT - * - compare the two FATs (TODO) - * - get buffer for marking used clusters - * - recurse direntries from root (using bs->bdrv_read to make - * sure to get the new data) - * - check that the FAT agrees with the size - * - count the number of clusters occupied by this directory and - * its files - * - check that the cumulative used cluster count agrees with the - * FAT - * - if all is fine, return number of used clusters - */ - if (s->fat2 == NULL) { - int size = 0x200 * s->sectors_per_fat; - s->fat2 = qemu_malloc(size); - memcpy(s->fat2, s->fat.pointer, size); - } - check = vvfat_read(s->bs, - s->first_sectors_number, s->fat2, s->sectors_per_fat); - if (check) { - fprintf(stderr, "Could not copy fat\n"); - return 0; - } - assert (s->used_clusters); - for (i = 0; i < sector2cluster(s, s->sector_count); i++) - s->used_clusters[i] &= ~USED_ANY; - - clear_commits(s); - - /* mark every mapped file/directory as deleted. - * (check_directory_consistency() will unmark those still present). */ - if (s->qcow) - for (i = 0; i < s->mapping.next; i++) { - mapping_t* mapping = array_get(&(s->mapping), i); - if (mapping->first_mapping_index < 0) - mapping->mode |= MODE_DELETED; - } - - used_clusters_count = check_directory_consistency(s, 0, s->path); - if (used_clusters_count <= 0) { - DLOG(fprintf(stderr, "problem in directory\n")); - return 0; - } - - check = s->last_cluster_of_root_directory; - for (i = check; i < sector2cluster(s, s->sector_count); i++) { - if (modified_fat_get(s, i)) { - if(!s->used_clusters[i]) { - DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i)); - return 0; - } - check++; - } - - if (s->used_clusters[i] == USED_ALLOCATED) { - /* allocated, but not used... */ - DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i)); - return 0; - } - } - - if (check != used_clusters_count) - return 0; - - return used_clusters_count; -} - -static inline void adjust_mapping_indices(BDRVVVFATState* s, - int offset, int adjust) -{ - int i; - - for (i = 0; i < s->mapping.next; i++) { - mapping_t* mapping = array_get(&(s->mapping), i); - -#define ADJUST_MAPPING_INDEX(name) \ - if (mapping->name >= offset) \ - mapping->name += adjust - - ADJUST_MAPPING_INDEX(first_mapping_index); - if (mapping->mode & MODE_DIRECTORY) - ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index); - } -} - -/* insert or update mapping */ -static mapping_t* insert_mapping(BDRVVVFATState* s, - uint32_t begin, uint32_t end) -{ - /* - * - find mapping where mapping->begin >= begin, - * - if mapping->begin > begin: insert - * - adjust all references to mappings! - * - else: adjust - * - replace name - */ - int index = find_mapping_for_cluster_aux(s, begin, 0, s->mapping.next); - mapping_t* mapping = NULL; - mapping_t* first_mapping = array_get(&(s->mapping), 0); - - if (index < s->mapping.next && (mapping = array_get(&(s->mapping), index)) - && mapping->begin < begin) { - mapping->end = begin; - index++; - mapping = array_get(&(s->mapping), index); - } - if (index >= s->mapping.next || mapping->begin > begin) { - mapping = array_insert(&(s->mapping), index, 1); - mapping->path = NULL; - adjust_mapping_indices(s, index, +1); - } - - mapping->begin = begin; - mapping->end = end; - -DLOG(mapping_t* next_mapping; -assert(index + 1 >= s->mapping.next || -((next_mapping = array_get(&(s->mapping), index + 1)) && - next_mapping->begin >= end))); - - if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer) - s->current_mapping = array_get(&(s->mapping), - s->current_mapping - first_mapping); - - return mapping; -} - -static int remove_mapping(BDRVVVFATState* s, int mapping_index) -{ - mapping_t* mapping = array_get(&(s->mapping), mapping_index); - mapping_t* first_mapping = array_get(&(s->mapping), 0); - - /* free mapping */ - if (mapping->first_mapping_index < 0) - free(mapping->path); - - /* remove from s->mapping */ - array_remove(&(s->mapping), mapping_index); - - /* adjust all references to mappings */ - adjust_mapping_indices(s, mapping_index, -1); - - if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer) - s->current_mapping = array_get(&(s->mapping), - s->current_mapping - first_mapping); - - return 0; -} - -static void adjust_dirindices(BDRVVVFATState* s, int offset, int adjust) -{ - int i; - for (i = 0; i < s->mapping.next; i++) { - mapping_t* mapping = array_get(&(s->mapping), i); - if (mapping->dir_index >= offset) - mapping->dir_index += adjust; - if ((mapping->mode & MODE_DIRECTORY) && - mapping->info.dir.first_dir_index >= offset) - mapping->info.dir.first_dir_index += adjust; - } -} - -static direntry_t* insert_direntries(BDRVVVFATState* s, - int dir_index, int count) -{ - /* - * make room in s->directory, - * adjust_dirindices - */ - direntry_t* result = array_insert(&(s->directory), dir_index, count); - if (result == NULL) - return NULL; - adjust_dirindices(s, dir_index, count); - return result; -} - -static int remove_direntries(BDRVVVFATState* s, int dir_index, int count) -{ - int ret = array_remove_slice(&(s->directory), dir_index, count); - if (ret) - return ret; - adjust_dirindices(s, dir_index, -count); - return 0; -} - -/* - * Adapt the mappings of the cluster chain starting at first cluster - * (i.e. if a file starts at first_cluster, the chain is followed according - * to the modified fat, and the corresponding entries in s->mapping are - * adjusted) - */ -static int commit_mappings(BDRVVVFATState* s, - uint32_t first_cluster, int dir_index) -{ - mapping_t* mapping = find_mapping_for_cluster(s, first_cluster); - direntry_t* direntry = array_get(&(s->directory), dir_index); - uint32_t cluster = first_cluster; - - vvfat_close_current_file(s); - - assert(mapping); - assert(mapping->begin == first_cluster); - mapping->first_mapping_index = -1; - mapping->dir_index = dir_index; - mapping->mode = (dir_index <= 0 || is_directory(direntry)) ? - MODE_DIRECTORY : MODE_NORMAL; - - while (!fat_eof(s, cluster)) { - uint32_t c, c1; - - for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1; - c = c1, c1 = modified_fat_get(s, c1)); - - c++; - if (c > mapping->end) { - int index = array_index(&(s->mapping), mapping); - int i, max_i = s->mapping.next - index; - for (i = 1; i < max_i && mapping[i].begin < c; i++); - while (--i > 0) - remove_mapping(s, index + 1); - } - assert(mapping == array_get(&(s->mapping), s->mapping.next - 1) - || mapping[1].begin >= c); - mapping->end = c; - - if (!fat_eof(s, c1)) { - int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next); - mapping_t* next_mapping = i >= s->mapping.next ? NULL : - array_get(&(s->mapping), i); - - if (next_mapping == NULL || next_mapping->begin > c1) { - int i1 = array_index(&(s->mapping), mapping); - - next_mapping = insert_mapping(s, c1, c1+1); - - if (c1 < c) - i1++; - mapping = array_get(&(s->mapping), i1); - } - - next_mapping->dir_index = mapping->dir_index; - next_mapping->first_mapping_index = - mapping->first_mapping_index < 0 ? - array_index(&(s->mapping), mapping) : - mapping->first_mapping_index; - next_mapping->path = mapping->path; - next_mapping->mode = mapping->mode; - next_mapping->read_only = mapping->read_only; - if (mapping->mode & MODE_DIRECTORY) { - next_mapping->info.dir.parent_mapping_index = - mapping->info.dir.parent_mapping_index; - next_mapping->info.dir.first_dir_index = - mapping->info.dir.first_dir_index + - 0x10 * s->sectors_per_cluster * - (mapping->end - mapping->begin); - } else - next_mapping->info.file.offset = mapping->info.file.offset + - mapping->end - mapping->begin; - - mapping = next_mapping; - } - - cluster = c1; - } - - return 0; -} - -static int commit_direntries(BDRVVVFATState* s, - int dir_index, int parent_mapping_index) -{ - direntry_t* direntry = array_get(&(s->directory), dir_index); - uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry); - mapping_t* mapping = find_mapping_for_cluster(s, first_cluster); - - int factor = 0x10 * s->sectors_per_cluster; - int old_cluster_count, new_cluster_count; - int current_dir_index = mapping->info.dir.first_dir_index; - int first_dir_index = current_dir_index; - int ret, i; - uint32_t c; - -DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapping->path, parent_mapping_index)); - - assert(direntry); - assert(mapping); - assert(mapping->begin == first_cluster); - assert(mapping->info.dir.first_dir_index < s->directory.next); - assert(mapping->mode & MODE_DIRECTORY); - assert(dir_index == 0 || is_directory(direntry)); - - mapping->info.dir.parent_mapping_index = parent_mapping_index; - - if (first_cluster == 0) { - old_cluster_count = new_cluster_count = - s->last_cluster_of_root_directory; - } else { - for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c); - c = fat_get(s, c)) - old_cluster_count++; - - for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c); - c = modified_fat_get(s, c)) - new_cluster_count++; - } - - if (new_cluster_count > old_cluster_count) { - if (insert_direntries(s, - current_dir_index + factor * old_cluster_count, - factor * (new_cluster_count - old_cluster_count)) == NULL) - return -1; - } else if (new_cluster_count < old_cluster_count) - remove_direntries(s, - current_dir_index + factor * new_cluster_count, - factor * (old_cluster_count - new_cluster_count)); - - for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) { - void* direntry = array_get(&(s->directory), current_dir_index); - int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry, - s->sectors_per_cluster); - if (ret) - return ret; - assert(!strncmp(s->directory.pointer, "QEMU", 4)); - current_dir_index += factor; - } - - ret = commit_mappings(s, first_cluster, dir_index); - if (ret) - return ret; - - /* recurse */ - for (i = 0; i < factor * new_cluster_count; i++) { - direntry = array_get(&(s->directory), first_dir_index + i); - if (is_directory(direntry) && !is_dot(direntry)) { - mapping = find_mapping_for_cluster(s, first_cluster); - assert(mapping->mode & MODE_DIRECTORY); - ret = commit_direntries(s, first_dir_index + i, - array_index(&(s->mapping), mapping)); - if (ret) - return ret; - } - } - - return 0; -} - -/* commit one file (adjust contents, adjust mapping), - return first_mapping_index */ -static int commit_one_file(BDRVVVFATState* s, - int dir_index, uint32_t offset) -{ - direntry_t* direntry = array_get(&(s->directory), dir_index); - uint32_t c = begin_of_direntry(direntry); - uint32_t first_cluster = c; - mapping_t* mapping = find_mapping_for_cluster(s, c); - uint32_t size = filesize_of_direntry(direntry); - char* cluster = qemu_malloc(s->cluster_size); - uint32_t i; - int fd = 0; - - assert(offset < size); - assert((offset % s->cluster_size) == 0); - - for (i = s->cluster_size; i < offset; i += s->cluster_size) - c = modified_fat_get(s, c); - - fd = open(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666); - if (fd < 0) { - fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path, - strerror(errno), errno); - return fd; - } - if (offset > 0) - if (lseek(fd, offset, SEEK_SET) != offset) - return -3; - - while (offset < size) { - uint32_t c1; - int rest_size = (size - offset > s->cluster_size ? - s->cluster_size : size - offset); - int ret; - - c1 = modified_fat_get(s, c); - - assert((size - offset == 0 && fat_eof(s, c)) || - (size > offset && c >=2 && !fat_eof(s, c))); - - ret = vvfat_read(s->bs, cluster2sector(s, c), - (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200); - - if (ret < 0) - return ret; - - if (write(fd, cluster, rest_size) < 0) - return -2; - - offset += rest_size; - c = c1; - } - - ftruncate(fd, size); - close(fd); - - return commit_mappings(s, first_cluster, dir_index); -} - -#ifdef DEBUG -/* test, if all mappings point to valid direntries */ -static void check1(BDRVVVFATState* s) -{ - int i; - for (i = 0; i < s->mapping.next; i++) { - mapping_t* mapping = array_get(&(s->mapping), i); - if (mapping->mode & MODE_DELETED) { - fprintf(stderr, "deleted\n"); - continue; - } - assert(mapping->dir_index >= 0); - assert(mapping->dir_index < s->directory.next); - direntry_t* direntry = array_get(&(s->directory), mapping->dir_index); - assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0); - if (mapping->mode & MODE_DIRECTORY) { - assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next); - assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0); - } - } -} - -/* test, if all direntries have mappings */ -static void check2(BDRVVVFATState* s) -{ - int i; - int first_mapping = -1; - - for (i = 0; i < s->directory.next; i++) { - direntry_t* direntry = array_get(&(s->directory), i); - - if (is_short_name(direntry) && begin_of_direntry(direntry)) { - mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry)); - assert(mapping); - assert(mapping->dir_index == i || is_dot(direntry)); - assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry)); - } - - if ((i % (0x10 * s->sectors_per_cluster)) == 0) { - /* cluster start */ - int j, count = 0; - - for (j = 0; j < s->mapping.next; j++) { - mapping_t* mapping = array_get(&(s->mapping), j); - if (mapping->mode & MODE_DELETED) - continue; - if (mapping->mode & MODE_DIRECTORY) { - if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) { - assert(++count == 1); - if (mapping->first_mapping_index == -1) - first_mapping = array_index(&(s->mapping), mapping); - else - assert(first_mapping == mapping->first_mapping_index); - if (mapping->info.dir.parent_mapping_index < 0) - assert(j == 0); - else { - mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index); - assert(parent->mode & MODE_DIRECTORY); - assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index); - } - } - } - } - if (count == 0) - first_mapping = -1; - } - } -} -#endif - -static int handle_renames_and_mkdirs(BDRVVVFATState* s) -{ - int i; - -#ifdef DEBUG - fprintf(stderr, "handle_renames\n"); - for (i = 0; i < s->commits.next; i++) { - commit_t* commit = array_get(&(s->commits), i); - fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action); - } -#endif - - for (i = 0; i < s->commits.next;) { - commit_t* commit = array_get(&(s->commits), i); - if (commit->action == ACTION_RENAME) { - mapping_t* mapping = find_mapping_for_cluster(s, - commit->param.rename.cluster); - char* old_path = mapping->path; - - assert(commit->path); - mapping->path = commit->path; - if (rename(old_path, mapping->path)) - return -2; - - if (mapping->mode & MODE_DIRECTORY) { - int l1 = strlen(mapping->path); - int l2 = strlen(old_path); - int diff = l1 - l2; - direntry_t* direntry = array_get(&(s->directory), - mapping->info.dir.first_dir_index); - uint32_t c = mapping->begin; - int i = 0; - - /* recurse */ - while (!fat_eof(s, c)) { - do { - direntry_t* d = direntry + i; - - if (is_file(d) || (is_directory(d) && !is_dot(d))) { - mapping_t* m = find_mapping_for_cluster(s, - begin_of_direntry(d)); - int l = strlen(m->path); - char* new_path = qemu_malloc(l + diff + 1); - - assert(!strncmp(m->path, mapping->path, l2)); - - pstrcpy(new_path, l + diff + 1, mapping->path); - pstrcpy(new_path + l1, l + diff + 1 - l1, - m->path + l2); - - schedule_rename(s, m->begin, new_path); - } - i++; - } while((i % (0x10 * s->sectors_per_cluster)) != 0); - c = fat_get(s, c); - } - } - - free(old_path); - array_remove(&(s->commits), i); - continue; - } else if (commit->action == ACTION_MKDIR) { - mapping_t* mapping; - int j, parent_path_len; - -#ifdef __MINGW32__ - if (mkdir(commit->path)) - return -5; -#else - if (mkdir(commit->path, 0755)) - return -5; -#endif - - mapping = insert_mapping(s, commit->param.mkdir.cluster, - commit->param.mkdir.cluster + 1); - if (mapping == NULL) - return -6; - - mapping->mode = MODE_DIRECTORY; - mapping->read_only = 0; - mapping->path = commit->path; - j = s->directory.next; - assert(j); - insert_direntries(s, s->directory.next, - 0x10 * s->sectors_per_cluster); - mapping->info.dir.first_dir_index = j; - - parent_path_len = strlen(commit->path) - - strlen(get_basename(commit->path)) - 1; - for (j = 0; j < s->mapping.next; j++) { - mapping_t* m = array_get(&(s->mapping), j); - if (m->first_mapping_index < 0 && m != mapping && - !strncmp(m->path, mapping->path, parent_path_len) && - strlen(m->path) == parent_path_len) - break; - } - assert(j < s->mapping.next); - mapping->info.dir.parent_mapping_index = j; - - array_remove(&(s->commits), i); - continue; - } - - i++; - } - return 0; -} - -/* - * TODO: make sure that the short name is not matching *another* file - */ -static int handle_commits(BDRVVVFATState* s) -{ - int i, fail = 0; - - vvfat_close_current_file(s); - - for (i = 0; !fail && i < s->commits.next; i++) { - commit_t* commit = array_get(&(s->commits), i); - switch(commit->action) { - case ACTION_RENAME: case ACTION_MKDIR: - assert(0); - fail = -2; - break; - case ACTION_WRITEOUT: { - direntry_t* entry = array_get(&(s->directory), - commit->param.writeout.dir_index); - uint32_t begin = begin_of_direntry(entry); - mapping_t* mapping = find_mapping_for_cluster(s, begin); - - assert(mapping); - assert(mapping->begin == begin); - assert(commit->path == NULL); - - if (commit_one_file(s, commit->param.writeout.dir_index, - commit->param.writeout.modified_offset)) - fail = -3; - - break; - } - case ACTION_NEW_FILE: { - int begin = commit->param.new_file.first_cluster; - mapping_t* mapping = find_mapping_for_cluster(s, begin); - direntry_t* entry; - int i; - - /* find direntry */ - for (i = 0; i < s->directory.next; i++) { - entry = array_get(&(s->directory), i); - if (is_file(entry) && begin_of_direntry(entry) == begin) - break; - } - - if (i >= s->directory.next) { - fail = -6; - continue; - } - - /* make sure there exists an initial mapping */ - if (mapping && mapping->begin != begin) { - mapping->end = begin; - mapping = NULL; - } - if (mapping == NULL) { - mapping = insert_mapping(s, begin, begin+1); - } - /* most members will be fixed in commit_mappings() */ - assert(commit->path); - mapping->path = commit->path; - mapping->read_only = 0; - mapping->mode = MODE_NORMAL; - mapping->info.file.offset = 0; - - if (commit_one_file(s, i, 0)) - fail = -7; - - break; - } - default: - assert(0); - } - } - if (i > 0 && array_remove_slice(&(s->commits), 0, i)) - return -1; - return fail; -} - -static int handle_deletes(BDRVVVFATState* s) -{ - int i, deferred = 1, deleted = 1; - - /* delete files corresponding to mappings marked as deleted */ - /* handle DELETEs and unused mappings (modified_fat_get(s, mapping->begin) == 0) */ - while (deferred && deleted) { - deferred = 0; - deleted = 0; - - for (i = 1; i < s->mapping.next; i++) { - mapping_t* mapping = array_get(&(s->mapping), i); - if (mapping->mode & MODE_DELETED) { - direntry_t* entry = array_get(&(s->directory), - mapping->dir_index); - - if (is_free(entry)) { - /* remove file/directory */ - if (mapping->mode & MODE_DIRECTORY) { - int j, next_dir_index = s->directory.next, - first_dir_index = mapping->info.dir.first_dir_index; - - if (rmdir(mapping->path) < 0) { - if (errno == ENOTEMPTY) { - deferred++; - continue; - } else - return -5; - } - - for (j = 1; j < s->mapping.next; j++) { - mapping_t* m = array_get(&(s->mapping), j); - if (m->mode & MODE_DIRECTORY && - m->info.dir.first_dir_index > - first_dir_index && - m->info.dir.first_dir_index < - next_dir_index) - next_dir_index = - m->info.dir.first_dir_index; - } - remove_direntries(s, first_dir_index, - next_dir_index - first_dir_index); - - deleted++; - } - } else { - if (unlink(mapping->path)) - return -4; - deleted++; - } - DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry)); - remove_mapping(s, i); - } - } - } - - return 0; -} - -/* - * synchronize mapping with new state: - * - * - copy FAT (with bdrv_read) - * - mark all filenames corresponding to mappings as deleted - * - recurse direntries from root (using bs->bdrv_read) - * - delete files corresponding to mappings marked as deleted - */ -static int do_commit(BDRVVVFATState* s) -{ - int ret = 0; - - /* the real meat are the commits. Nothing to do? Move along! */ - if (s->commits.next == 0) - return 0; - - vvfat_close_current_file(s); - - ret = handle_renames_and_mkdirs(s); - if (ret) { - fprintf(stderr, "Error handling renames (%d)\n", ret); - assert(0); - return ret; - } - - /* copy FAT (with bdrv_read) */ - memcpy(s->fat.pointer, s->fat2, 0x200 * s->sectors_per_fat); - - /* recurse direntries from root (using bs->bdrv_read) */ - ret = commit_direntries(s, 0, -1); - if (ret) { - fprintf(stderr, "Fatal: error while committing (%d)\n", ret); - assert(0); - return ret; - } - - ret = handle_commits(s); - if (ret) { - fprintf(stderr, "Error handling commits (%d)\n", ret); - assert(0); - return ret; - } - - ret = handle_deletes(s); - if (ret) { - fprintf(stderr, "Error deleting\n"); - assert(0); - return ret; - } - - s->qcow->drv->bdrv_make_empty(s->qcow); - - memset(s->used_clusters, 0, sector2cluster(s, s->sector_count)); - -DLOG(checkpoint()); - return 0; -} - -static int try_commit(BDRVVVFATState* s) -{ - vvfat_close_current_file(s); -DLOG(checkpoint()); - if(!is_consistent(s)) - return -1; - return do_commit(s); -} - -static int vvfat_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVVVFATState *s = bs->opaque; - int i, ret; - -DLOG(checkpoint()); - - vvfat_close_current_file(s); - - /* - * Some sanity checks: - * - do not allow writing to the boot sector - * - do not allow to write non-ASCII filenames - */ - - if (sector_num < s->first_sectors_number) - return -1; - - for (i = sector2cluster(s, sector_num); - i <= sector2cluster(s, sector_num + nb_sectors - 1);) { - mapping_t* mapping = find_mapping_for_cluster(s, i); - if (mapping) { - if (mapping->read_only) { - fprintf(stderr, "Tried to write to write-protected file %s\n", - mapping->path); - return -1; - } - - if (mapping->mode & MODE_DIRECTORY) { - int begin = cluster2sector(s, i); - int end = begin + s->sectors_per_cluster, k; - int dir_index; - const direntry_t* direntries; - long_file_name lfn; - - lfn_init(&lfn); - - if (begin < sector_num) - begin = sector_num; - if (end > sector_num + nb_sectors) - end = sector_num + nb_sectors; - dir_index = mapping->dir_index + - 0x10 * (begin - mapping->begin * s->sectors_per_cluster); - direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num)); - - for (k = 0; k < (end - begin) * 0x10; k++) { - /* do not allow non-ASCII filenames */ - if (parse_long_name(&lfn, direntries + k) < 0) { - fprintf(stderr, "Warning: non-ASCII filename\n"); - return -1; - } - /* no access to the direntry of a read-only file */ - else if (is_short_name(direntries+k) && - (direntries[k].attributes & 1)) { - if (memcmp(direntries + k, - array_get(&(s->directory), dir_index + k), - sizeof(direntry_t))) { - fprintf(stderr, "Warning: tried to write to write-protected file\n"); - return -1; - } - } - } - } - i = mapping->end; - } else - i++; - } - - /* - * Use qcow backend. Commit later. - */ -DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); - ret = s->qcow->drv->bdrv_write(s->qcow, sector_num, buf, nb_sectors); - if (ret < 0) { - fprintf(stderr, "Error writing to qcow backend\n"); - return ret; - } - - for (i = sector2cluster(s, sector_num); - i <= sector2cluster(s, sector_num + nb_sectors - 1); i++) - if (i >= 0) - s->used_clusters[i] |= USED_ALLOCATED; - -DLOG(checkpoint()); - /* TODO: add timeout */ - try_commit(s); - -DLOG(checkpoint()); - return 0; -} - -static int vvfat_is_allocated(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int* n) -{ - BDRVVVFATState* s = bs->opaque; - *n = s->sector_count - sector_num; - if (*n > nb_sectors) - *n = nb_sectors; - else if (*n < 0) - return 0; - return 1; -} - -static int write_target_commit(BlockDriverState *bs, int64_t sector_num, - const uint8_t* buffer, int nb_sectors) { - BDRVVVFATState* s = bs->opaque; - return try_commit(s); -} - -static void write_target_close(BlockDriverState *bs) { - BDRVVVFATState* s = bs->opaque; - bdrv_delete(s->qcow); - free(s->qcow_filename); -} - -static BlockDriver vvfat_write_target = { - "vvfat_write_target", 0, NULL, NULL, NULL, - write_target_commit, - write_target_close, - NULL, NULL, NULL -}; - -static int enable_write_target(BDRVVVFATState *s) -{ - int size = sector2cluster(s, s->sector_count); - s->used_clusters = calloc(size, 1); - - array_init(&(s->commits), sizeof(commit_t)); - - s->qcow_filename = qemu_malloc(1024); - get_tmp_filename(s->qcow_filename, 1024); - if (bdrv_create(bdrv_find_format("qcow"), - s->qcow_filename, s->sector_count, "fat:", 0) < 0) - return -1; - s->qcow = bdrv_new(""); - if (s->qcow == NULL || bdrv_open(s->qcow, s->qcow_filename, 0) < 0) - return -1; - -#ifndef _WIN32 - unlink(s->qcow_filename); -#endif - - s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1); - s->bs->backing_hd->drv = &vvfat_write_target; - s->bs->backing_hd->opaque = s; - - return 0; -} - -static void vvfat_close(BlockDriverState *bs) -{ - BDRVVVFATState *s = bs->opaque; - - vvfat_close_current_file(s); - array_free(&(s->fat)); - array_free(&(s->directory)); - array_free(&(s->mapping)); - if(s->cluster_buffer) - free(s->cluster_buffer); -} - -static BlockDriver bdrv_vvfat = { - .format_name = "vvfat", - .instance_size = sizeof(BDRVVVFATState), - .bdrv_open = vvfat_open, - .bdrv_read = vvfat_read, - .bdrv_write = vvfat_write, - .bdrv_close = vvfat_close, - .bdrv_is_allocated = vvfat_is_allocated, - .protocol_name = "fat", -}; - -static void bdrv_vvfat_init(void) -{ - bdrv_register(&bdrv_vvfat); -} - -block_init(bdrv_vvfat_init); - -#ifdef DEBUG -static void checkpoint(void) { - assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2); - check1(vvv); - check2(vvv); - assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY)); -#if 0 - if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf) - fprintf(stderr, "Nonono!\n"); - mapping_t* mapping; - direntry_t* direntry; - assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next); - assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next); - if (vvv->mapping.next<47) - return; - assert((mapping = array_get(&(vvv->mapping), 47))); - assert(mapping->dir_index < vvv->directory.next); - direntry = array_get(&(vvv->directory), mapping->dir_index); - assert(!memcmp(direntry->name, "USB H ", 11) || direntry->name[0]==0); -#endif - return; - /* avoid compiler warnings: */ - hexdump(NULL, 100); - remove_mapping(vvv, NULL); - print_mapping(NULL); - print_direntry(NULL); -} -#endif diff --git a/block/bochs.c b/block/bochs.c new file mode 100644 index 000000000..bac81c42b --- /dev/null +++ b/block/bochs.c @@ -0,0 +1,259 @@ +/* + * Block driver for the various disk image formats used by Bochs + * Currently only for "growing" type in read-only mode + * + * Copyright (c) 2005 Alex Beregszaszi + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" + +/**************************************************************/ + +#define HEADER_MAGIC "Bochs Virtual HD Image" +#define HEADER_VERSION 0x00020000 +#define HEADER_V1 0x00010000 +#define HEADER_SIZE 512 + +#define REDOLOG_TYPE "Redolog" +#define GROWING_TYPE "Growing" + +// not allocated: 0xffffffff + +// always little-endian +struct bochs_header_v1 { + char magic[32]; // "Bochs Virtual HD Image" + char type[16]; // "Redolog" + char subtype[16]; // "Undoable" / "Volatile" / "Growing" + uint32_t version; + uint32_t header; // size of header + + union { + struct { + uint32_t catalog; // num of entries + uint32_t bitmap; // bitmap size + uint32_t extent; // extent size + uint64_t disk; // disk size + char padding[HEADER_SIZE - 64 - 8 - 20]; + } redolog; + char padding[HEADER_SIZE - 64 - 8]; + } extra; +}; + +// always little-endian +struct bochs_header { + char magic[32]; // "Bochs Virtual HD Image" + char type[16]; // "Redolog" + char subtype[16]; // "Undoable" / "Volatile" / "Growing" + uint32_t version; + uint32_t header; // size of header + + union { + struct { + uint32_t catalog; // num of entries + uint32_t bitmap; // bitmap size + uint32_t extent; // extent size + uint32_t reserved; // for ??? + uint64_t disk; // disk size + char padding[HEADER_SIZE - 64 - 8 - 24]; + } redolog; + char padding[HEADER_SIZE - 64 - 8]; + } extra; +}; + +typedef struct BDRVBochsState { + int fd; + + uint32_t *catalog_bitmap; + int catalog_size; + + int data_offset; + + int bitmap_blocks; + int extent_blocks; + int extent_size; +} BDRVBochsState; + +static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const struct bochs_header *bochs = (const void *)buf; + + if (buf_size < HEADER_SIZE) + return 0; + + if (!strcmp(bochs->magic, HEADER_MAGIC) && + !strcmp(bochs->type, REDOLOG_TYPE) && + !strcmp(bochs->subtype, GROWING_TYPE) && + ((le32_to_cpu(bochs->version) == HEADER_VERSION) || + (le32_to_cpu(bochs->version) == HEADER_V1))) + return 100; + + return 0; +} + +static int bochs_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVBochsState *s = bs->opaque; + int fd, i; + struct bochs_header bochs; + struct bochs_header_v1 header_v1; + + fd = open(filename, O_RDWR | O_BINARY); + if (fd < 0) { + fd = open(filename, O_RDONLY | O_BINARY); + if (fd < 0) + return -1; + } + + bs->read_only = 1; // no write support yet + + s->fd = fd; + + if (read(fd, &bochs, sizeof(bochs)) != sizeof(bochs)) { + goto fail; + } + + if (strcmp(bochs.magic, HEADER_MAGIC) || + strcmp(bochs.type, REDOLOG_TYPE) || + strcmp(bochs.subtype, GROWING_TYPE) || + ((le32_to_cpu(bochs.version) != HEADER_VERSION) && + (le32_to_cpu(bochs.version) != HEADER_V1))) { + goto fail; + } + + if (le32_to_cpu(bochs.version) == HEADER_V1) { + memcpy(&header_v1, &bochs, sizeof(bochs)); + bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512; + } else { + bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512; + } + + lseek(s->fd, le32_to_cpu(bochs.header), SEEK_SET); + + s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog); + s->catalog_bitmap = qemu_malloc(s->catalog_size * 4); + if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) != + s->catalog_size * 4) + goto fail; + for (i = 0; i < s->catalog_size; i++) + le32_to_cpus(&s->catalog_bitmap[i]); + + s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4); + + s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512; + s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512; + + s->extent_size = le32_to_cpu(bochs.extra.redolog.extent); + + return 0; + fail: + close(fd); + return -1; +} + +static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num) +{ + BDRVBochsState *s = bs->opaque; + int64_t offset = sector_num * 512; + int64_t extent_index, extent_offset, bitmap_offset, block_offset; + char bitmap_entry; + + // seek to sector + extent_index = offset / s->extent_size; + extent_offset = (offset % s->extent_size) / 512; + + if (s->catalog_bitmap[extent_index] == 0xffffffff) + { +// fprintf(stderr, "page not allocated [%x - %x:%x]\n", +// sector_num, extent_index, extent_offset); + return -1; // not allocated + } + + bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] * + (s->extent_blocks + s->bitmap_blocks)); + block_offset = bitmap_offset + (512 * (s->bitmap_blocks + extent_offset)); + +// fprintf(stderr, "sect: %x [ext i: %x o: %x] -> %x bitmap: %x block: %x\n", +// sector_num, extent_index, extent_offset, +// le32_to_cpu(s->catalog_bitmap[extent_index]), +// bitmap_offset, block_offset); + + // read in bitmap for current extent + lseek(s->fd, bitmap_offset + (extent_offset / 8), SEEK_SET); + + read(s->fd, &bitmap_entry, 1); + + if (!((bitmap_entry >> (extent_offset % 8)) & 1)) + { +// fprintf(stderr, "sector (%x) in bitmap not allocated\n", +// sector_num); + return -1; // not allocated + } + + lseek(s->fd, block_offset, SEEK_SET); + + return 0; +} + +static int bochs_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVBochsState *s = bs->opaque; + int ret; + + while (nb_sectors > 0) { + if (!seek_to_sector(bs, sector_num)) + { + ret = read(s->fd, buf, 512); + if (ret != 512) + return -1; + } + else + memset(buf, 0, 512); + nb_sectors--; + sector_num++; + buf += 512; + } + return 0; +} + +static void bochs_close(BlockDriverState *bs) +{ + BDRVBochsState *s = bs->opaque; + qemu_free(s->catalog_bitmap); + close(s->fd); +} + +static BlockDriver bdrv_bochs = { + .format_name = "bochs", + .instance_size = sizeof(BDRVBochsState), + .bdrv_probe = bochs_probe, + .bdrv_open = bochs_open, + .bdrv_read = bochs_read, + .bdrv_close = bochs_close, +}; + +static void bdrv_bochs_init(void) +{ + bdrv_register(&bdrv_bochs); +} + +block_init(bdrv_bochs_init); diff --git a/block/cloop.c b/block/cloop.c new file mode 100644 index 000000000..06c687e69 --- /dev/null +++ b/block/cloop.c @@ -0,0 +1,171 @@ +/* + * QEMU Block driver for CLOOP images + * + * Copyright (c) 2004 Johannes E. Schindelin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" +#include + +typedef struct BDRVCloopState { + int fd; + uint32_t block_size; + uint32_t n_blocks; + uint64_t* offsets; + uint32_t sectors_per_block; + uint32_t current_block; + uint8_t *compressed_block; + uint8_t *uncompressed_block; + z_stream zstream; +} BDRVCloopState; + +static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const char* magic_version_2_0="#!/bin/sh\n" + "#V2.0 Format\n" + "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n"; + int length=strlen(magic_version_2_0); + if(length>buf_size) + length=buf_size; + if(!memcmp(magic_version_2_0,buf,length)) + return 2; + return 0; +} + +static int cloop_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVCloopState *s = bs->opaque; + uint32_t offsets_size,max_compressed_block_size=1,i; + + s->fd = open(filename, O_RDONLY | O_BINARY); + if (s->fd < 0) + return -errno; + bs->read_only = 1; + + /* read header */ + if(lseek(s->fd,128,SEEK_SET)<0) { +cloop_close: + close(s->fd); + return -1; + } + if(read(s->fd,&s->block_size,4)<4) + goto cloop_close; + s->block_size=be32_to_cpu(s->block_size); + if(read(s->fd,&s->n_blocks,4)<4) + goto cloop_close; + s->n_blocks=be32_to_cpu(s->n_blocks); + + /* read offsets */ + offsets_size=s->n_blocks*sizeof(uint64_t); + s->offsets=(uint64_t*)qemu_malloc(offsets_size); + if(read(s->fd,s->offsets,offsets_size)n_blocks;i++) { + s->offsets[i]=be64_to_cpu(s->offsets[i]); + if(i>0) { + uint32_t size=s->offsets[i]-s->offsets[i-1]; + if(size>max_compressed_block_size) + max_compressed_block_size=size; + } + } + + /* initialize zlib engine */ + s->compressed_block = qemu_malloc(max_compressed_block_size+1); + s->uncompressed_block = qemu_malloc(s->block_size); + if(inflateInit(&s->zstream) != Z_OK) + goto cloop_close; + s->current_block=s->n_blocks; + + s->sectors_per_block = s->block_size/512; + bs->total_sectors = s->n_blocks*s->sectors_per_block; + return 0; +} + +static inline int cloop_read_block(BDRVCloopState *s,int block_num) +{ + if(s->current_block != block_num) { + int ret; + uint32_t bytes = s->offsets[block_num+1]-s->offsets[block_num]; + + lseek(s->fd, s->offsets[block_num], SEEK_SET); + ret = read(s->fd, s->compressed_block, bytes); + if (ret != bytes) + return -1; + + s->zstream.next_in = s->compressed_block; + s->zstream.avail_in = bytes; + s->zstream.next_out = s->uncompressed_block; + s->zstream.avail_out = s->block_size; + ret = inflateReset(&s->zstream); + if(ret != Z_OK) + return -1; + ret = inflate(&s->zstream, Z_FINISH); + if(ret != Z_STREAM_END || s->zstream.total_out != s->block_size) + return -1; + + s->current_block = block_num; + } + return 0; +} + +static int cloop_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVCloopState *s = bs->opaque; + int i; + + for(i=0;isectors_per_block), + block_num=(sector_num+i)/s->sectors_per_block; + if(cloop_read_block(s, block_num) != 0) + return -1; + memcpy(buf+i*512,s->uncompressed_block+sector_offset_in_block*512,512); + } + return 0; +} + +static void cloop_close(BlockDriverState *bs) +{ + BDRVCloopState *s = bs->opaque; + close(s->fd); + if(s->n_blocks>0) + free(s->offsets); + free(s->compressed_block); + free(s->uncompressed_block); + inflateEnd(&s->zstream); +} + +static BlockDriver bdrv_cloop = { + .format_name = "cloop", + .instance_size = sizeof(BDRVCloopState), + .bdrv_probe = cloop_probe, + .bdrv_open = cloop_open, + .bdrv_read = cloop_read, + .bdrv_close = cloop_close, +}; + +static void bdrv_cloop_init(void) +{ + bdrv_register(&bdrv_cloop); +} + +block_init(bdrv_cloop_init); diff --git a/block/cow.c b/block/cow.c new file mode 100644 index 000000000..94b354938 --- /dev/null +++ b/block/cow.c @@ -0,0 +1,275 @@ +/* + * Block driver for the COW format + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef _WIN32 +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" +#include + +/**************************************************************/ +/* COW block driver using file system holes */ + +/* user mode linux compatible COW file */ +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 2 + +struct cow_header_v2 { + uint32_t magic; + uint32_t version; + char backing_file[1024]; + int32_t mtime; + uint64_t size; + uint32_t sectorsize; +}; + +typedef struct BDRVCowState { + int fd; + uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */ + uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */ + int cow_bitmap_size; + int64_t cow_sectors_offset; +} BDRVCowState; + +static int cow_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const struct cow_header_v2 *cow_header = (const void *)buf; + + if (buf_size >= sizeof(struct cow_header_v2) && + be32_to_cpu(cow_header->magic) == COW_MAGIC && + be32_to_cpu(cow_header->version) == COW_VERSION) + return 100; + else + return 0; +} + +static int cow_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVCowState *s = bs->opaque; + int fd; + struct cow_header_v2 cow_header; + int64_t size; + + fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE); + if (fd < 0) { + fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE); + if (fd < 0) + return -1; + } + s->fd = fd; + /* see if it is a cow image */ + if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) { + goto fail; + } + + if (be32_to_cpu(cow_header.magic) != COW_MAGIC || + be32_to_cpu(cow_header.version) != COW_VERSION) { + goto fail; + } + + /* cow image found */ + size = be64_to_cpu(cow_header.size); + bs->total_sectors = size / 512; + + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + cow_header.backing_file); + + /* mmap the bitmap */ + s->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header); + s->cow_bitmap_addr = (void *)mmap(get_mmap_addr(s->cow_bitmap_size), + s->cow_bitmap_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, s->fd, 0); + if (s->cow_bitmap_addr == MAP_FAILED) + goto fail; + s->cow_bitmap = s->cow_bitmap_addr + sizeof(cow_header); + s->cow_sectors_offset = (s->cow_bitmap_size + 511) & ~511; + return 0; + fail: + close(fd); + return -1; +} + +static inline void cow_set_bit(uint8_t *bitmap, int64_t bitnum) +{ + bitmap[bitnum / 8] |= (1 << (bitnum%8)); +} + +static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum) +{ + return !!(bitmap[bitnum / 8] & (1 << (bitnum%8))); +} + + +/* Return true if first block has been changed (ie. current version is + * in COW file). Set the number of continuous blocks for which that + * is true. */ +static inline int is_changed(uint8_t *bitmap, + int64_t sector_num, int nb_sectors, + int *num_same) +{ + int changed; + + if (!bitmap || nb_sectors == 0) { + *num_same = nb_sectors; + return 0; + } + + changed = is_bit_set(bitmap, sector_num); + for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { + if (is_bit_set(bitmap, sector_num + *num_same) != changed) + break; + } + + return changed; +} + +static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + BDRVCowState *s = bs->opaque; + return is_changed(s->cow_bitmap, sector_num, nb_sectors, pnum); +} + +static int cow_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVCowState *s = bs->opaque; + int ret, n; + + while (nb_sectors > 0) { + if (is_changed(s->cow_bitmap, sector_num, nb_sectors, &n)) { + lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET); + ret = read(s->fd, buf, n * 512); + if (ret != n * 512) + return -1; + } else { + if (bs->backing_hd) { + /* read from the base image */ + ret = bdrv_read(bs->backing_hd, sector_num, buf, n); + if (ret < 0) + return -1; + } else { + memset(buf, 0, n * 512); + } + } + nb_sectors -= n; + sector_num += n; + buf += n * 512; + } + return 0; +} + +static int cow_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVCowState *s = bs->opaque; + int ret, i; + + lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET); + ret = write(s->fd, buf, nb_sectors * 512); + if (ret != nb_sectors * 512) + return -1; + for (i = 0; i < nb_sectors; i++) + cow_set_bit(s->cow_bitmap, sector_num + i); + return 0; +} + +static void cow_close(BlockDriverState *bs) +{ + BDRVCowState *s = bs->opaque; + munmap((void *)s->cow_bitmap_addr, s->cow_bitmap_size); + close(s->fd); +} + +static int cow_create(const char *filename, int64_t image_sectors, + const char *image_filename, int flags) +{ + int fd, cow_fd; + struct cow_header_v2 cow_header; + struct stat st; + + if (flags) + return -ENOTSUP; + + cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, + 0644); + if (cow_fd < 0) + return -1; + memset(&cow_header, 0, sizeof(cow_header)); + cow_header.magic = cpu_to_be32(COW_MAGIC); + cow_header.version = cpu_to_be32(COW_VERSION); + if (image_filename) { + /* Note: if no file, we put a dummy mtime */ + cow_header.mtime = cpu_to_be32(0); + + fd = open(image_filename, O_RDONLY | O_BINARY); + if (fd < 0) { + close(cow_fd); + goto mtime_fail; + } + if (fstat(fd, &st) != 0) { + close(fd); + goto mtime_fail; + } + close(fd); + cow_header.mtime = cpu_to_be32(st.st_mtime); + mtime_fail: + pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file), + image_filename); + } + cow_header.sectorsize = cpu_to_be32(512); + cow_header.size = cpu_to_be64(image_sectors * 512); + write(cow_fd, &cow_header, sizeof(cow_header)); + /* resize to include at least all the bitmap */ + ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3)); + close(cow_fd); + return 0; +} + +static void cow_flush(BlockDriverState *bs) +{ + BDRVCowState *s = bs->opaque; + fsync(s->fd); +} + +static BlockDriver bdrv_cow = { + .format_name = "cow", + .instance_size = sizeof(BDRVCowState), + .bdrv_probe = cow_probe, + .bdrv_open = cow_open, + .bdrv_read = cow_read, + .bdrv_write = cow_write, + .bdrv_close = cow_close, + .bdrv_create = cow_create, + .bdrv_flush = cow_flush, + .bdrv_is_allocated = cow_is_allocated, +}; + +static void bdrv_cow_init(void) +{ + bdrv_register(&bdrv_cow); +} + +block_init(bdrv_cow_init); +#endif diff --git a/block/dmg.c b/block/dmg.c new file mode 100644 index 000000000..262560ffd --- /dev/null +++ b/block/dmg.c @@ -0,0 +1,301 @@ +/* + * QEMU Block driver for DMG images + * + * Copyright (c) 2004 Johannes E. Schindelin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "block_int.h" +#include "bswap.h" +#include "module.h" +#include + +typedef struct BDRVDMGState { + int fd; + + /* each chunk contains a certain number of sectors, + * offsets[i] is the offset in the .dmg file, + * lengths[i] is the length of the compressed chunk, + * sectors[i] is the sector beginning at offsets[i], + * sectorcounts[i] is the number of sectors in that chunk, + * the sectors array is ordered + * 0<=i4 && !strcmp(filename+len-4,".dmg")) + return 2; + return 0; +} + +static off_t read_off(int fd) +{ + uint64_t buffer; + if(read(fd,&buffer,8)<8) + return 0; + return be64_to_cpu(buffer); +} + +static off_t read_uint32(int fd) +{ + uint32_t buffer; + if(read(fd,&buffer,4)<4) + return 0; + return be32_to_cpu(buffer); +} + +static int dmg_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVDMGState *s = bs->opaque; + off_t info_begin,info_end,last_in_offset,last_out_offset; + uint32_t count; + uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i; + + s->fd = open(filename, O_RDONLY | O_BINARY); + if (s->fd < 0) + return -errno; + bs->read_only = 1; + s->n_chunks = 0; + s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; + + /* read offset of info blocks */ + if(lseek(s->fd,-0x1d8,SEEK_END)<0) { +dmg_close: + close(s->fd); + /* open raw instead */ + bs->drv=bdrv_find_format("raw"); + return bs->drv->bdrv_open(bs, filename, flags); + } + info_begin=read_off(s->fd); + if(info_begin==0) + goto dmg_close; + if(lseek(s->fd,info_begin,SEEK_SET)<0) + goto dmg_close; + if(read_uint32(s->fd)!=0x100) + goto dmg_close; + if((count = read_uint32(s->fd))==0) + goto dmg_close; + info_end = info_begin+count; + if(lseek(s->fd,0xf8,SEEK_CUR)<0) + goto dmg_close; + + /* read offsets */ + last_in_offset = last_out_offset = 0; + while(lseek(s->fd,0,SEEK_CUR)fd); + if(count==0) + goto dmg_close; + type = read_uint32(s->fd); + if(type!=0x6d697368 || count<244) + lseek(s->fd,count-4,SEEK_CUR); + else { + int new_size, chunk_count; + if(lseek(s->fd,200,SEEK_CUR)<0) + goto dmg_close; + chunk_count = (count-204)/40; + new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count); + s->types = qemu_realloc(s->types, new_size/2); + s->offsets = qemu_realloc(s->offsets, new_size); + s->lengths = qemu_realloc(s->lengths, new_size); + s->sectors = qemu_realloc(s->sectors, new_size); + s->sectorcounts = qemu_realloc(s->sectorcounts, new_size); + + for(i=s->n_chunks;in_chunks+chunk_count;i++) { + s->types[i] = read_uint32(s->fd); + if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) { + if(s->types[i]==0xffffffff) { + last_in_offset = s->offsets[i-1]+s->lengths[i-1]; + last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1]; + } + chunk_count--; + i--; + if(lseek(s->fd,36,SEEK_CUR)<0) + goto dmg_close; + continue; + } + read_uint32(s->fd); + s->sectors[i] = last_out_offset+read_off(s->fd); + s->sectorcounts[i] = read_off(s->fd); + s->offsets[i] = last_in_offset+read_off(s->fd); + s->lengths[i] = read_off(s->fd); + if(s->lengths[i]>max_compressed_size) + max_compressed_size = s->lengths[i]; + if(s->sectorcounts[i]>max_sectors_per_chunk) + max_sectors_per_chunk = s->sectorcounts[i]; + } + s->n_chunks+=chunk_count; + } + } + + /* initialize zlib engine */ + s->compressed_chunk = qemu_malloc(max_compressed_size+1); + s->uncompressed_chunk = qemu_malloc(512*max_sectors_per_chunk); + if(inflateInit(&s->zstream) != Z_OK) + goto dmg_close; + + s->current_chunk = s->n_chunks; + + return 0; +} + +static inline int is_sector_in_chunk(BDRVDMGState* s, + uint32_t chunk_num,int sector_num) +{ + if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num || + s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num) + return 0; + else + return -1; +} + +static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num) +{ + /* binary search */ + uint32_t chunk1=0,chunk2=s->n_chunks,chunk3; + while(chunk1!=chunk2) { + chunk3 = (chunk1+chunk2)/2; + if(s->sectors[chunk3]>sector_num) + chunk2 = chunk3; + else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num) + return chunk3; + else + chunk1 = chunk3; + } + return s->n_chunks; /* error */ +} + +static inline int dmg_read_chunk(BDRVDMGState *s,int sector_num) +{ + if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) { + int ret; + uint32_t chunk = search_chunk(s,sector_num); + + if(chunk>=s->n_chunks) + return -1; + + s->current_chunk = s->n_chunks; + switch(s->types[chunk]) { + case 0x80000005: { /* zlib compressed */ + int i; + + ret = lseek(s->fd, s->offsets[chunk], SEEK_SET); + if(ret<0) + return -1; + + /* we need to buffer, because only the chunk as whole can be + * inflated. */ + i=0; + do { + ret = read(s->fd, s->compressed_chunk+i, s->lengths[chunk]-i); + if(ret<0 && errno==EINTR) + ret=0; + i+=ret; + } while(ret>=0 && ret+ilengths[chunk]); + + if (ret != s->lengths[chunk]) + return -1; + + s->zstream.next_in = s->compressed_chunk; + s->zstream.avail_in = s->lengths[chunk]; + s->zstream.next_out = s->uncompressed_chunk; + s->zstream.avail_out = 512*s->sectorcounts[chunk]; + ret = inflateReset(&s->zstream); + if(ret != Z_OK) + return -1; + ret = inflate(&s->zstream, Z_FINISH); + if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk]) + return -1; + break; } + case 1: /* copy */ + ret = read(s->fd, s->uncompressed_chunk, s->lengths[chunk]); + if (ret != s->lengths[chunk]) + return -1; + break; + case 2: /* zero */ + memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]); + break; + } + s->current_chunk = chunk; + } + return 0; +} + +static int dmg_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVDMGState *s = bs->opaque; + int i; + + for(i=0;isectors[s->current_chunk]; + memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512); + } + return 0; +} + +static void dmg_close(BlockDriverState *bs) +{ + BDRVDMGState *s = bs->opaque; + close(s->fd); + if(s->n_chunks>0) { + free(s->types); + free(s->offsets); + free(s->lengths); + free(s->sectors); + free(s->sectorcounts); + } + free(s->compressed_chunk); + free(s->uncompressed_chunk); + inflateEnd(&s->zstream); +} + +static BlockDriver bdrv_dmg = { + .format_name = "dmg", + .instance_size = sizeof(BDRVDMGState), + .bdrv_probe = dmg_probe, + .bdrv_open = dmg_open, + .bdrv_read = dmg_read, + .bdrv_close = dmg_close, +}; + +static void bdrv_dmg_init(void) +{ + bdrv_register(&bdrv_dmg); +} + +block_init(bdrv_dmg_init); diff --git a/block/nbd.c b/block/nbd.c new file mode 100644 index 000000000..47d477899 --- /dev/null +++ b/block/nbd.c @@ -0,0 +1,196 @@ +/* + * QEMU Block driver for NBD + * + * Copyright (C) 2008 Bull S.A.S. + * Author: Laurent Vivier + * + * Some parts: + * Copyright (C) 2007 Anthony Liguori + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "nbd.h" +#include "module.h" + +#include +#include + +typedef struct BDRVNBDState { + int sock; + off_t size; + size_t blocksize; +} BDRVNBDState; + +static int nbd_open(BlockDriverState *bs, const char* filename, int flags) +{ + BDRVNBDState *s = bs->opaque; + const char *host; + const char *unixpath; + int sock; + off_t size; + size_t blocksize; + int ret; + + if ((flags & BDRV_O_CREAT)) + return -EINVAL; + + if (!strstart(filename, "nbd:", &host)) + return -EINVAL; + + if (strstart(host, "unix:", &unixpath)) { + + if (unixpath[0] != '/') + return -EINVAL; + + sock = unix_socket_outgoing(unixpath); + + } else { + uint16_t port; + char *p, *r; + char hostname[128]; + + pstrcpy(hostname, 128, host); + + p = strchr(hostname, ':'); + if (p == NULL) + return -EINVAL; + + *p = '\0'; + p++; + + port = strtol(p, &r, 0); + if (r == p) + return -EINVAL; + sock = tcp_socket_outgoing(hostname, port); + } + + if (sock == -1) + return -errno; + + ret = nbd_receive_negotiate(sock, &size, &blocksize); + if (ret == -1) + return -errno; + + s->sock = sock; + s->size = size; + s->blocksize = blocksize; + + return 0; +} + +static int nbd_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVNBDState *s = bs->opaque; + struct nbd_request request; + struct nbd_reply reply; + + request.type = NBD_CMD_READ; + request.handle = (uint64_t)(intptr_t)bs; + request.from = sector_num * 512;; + request.len = nb_sectors * 512; + + if (nbd_send_request(s->sock, &request) == -1) + return -errno; + + if (nbd_receive_reply(s->sock, &reply) == -1) + return -errno; + + if (reply.error !=0) + return -reply.error; + + if (reply.handle != request.handle) + return -EIO; + + if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len) + return -EIO; + + return 0; +} + +static int nbd_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVNBDState *s = bs->opaque; + struct nbd_request request; + struct nbd_reply reply; + + request.type = NBD_CMD_WRITE; + request.handle = (uint64_t)(intptr_t)bs; + request.from = sector_num * 512;; + request.len = nb_sectors * 512; + + if (nbd_send_request(s->sock, &request) == -1) + return -errno; + + if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len) + return -EIO; + + if (nbd_receive_reply(s->sock, &reply) == -1) + return -errno; + + if (reply.error !=0) + return -reply.error; + + if (reply.handle != request.handle) + return -EIO; + + return 0; +} + +static void nbd_close(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + struct nbd_request request; + + request.type = NBD_CMD_DISC; + request.handle = (uint64_t)(intptr_t)bs; + request.from = 0; + request.len = 0; + nbd_send_request(s->sock, &request); + + close(s->sock); +} + +static int64_t nbd_getlength(BlockDriverState *bs) +{ + BDRVNBDState *s = bs->opaque; + + return s->size; +} + +static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .instance_size = sizeof(BDRVNBDState), + .bdrv_open = nbd_open, + .bdrv_read = nbd_read, + .bdrv_write = nbd_write, + .bdrv_close = nbd_close, + .bdrv_getlength = nbd_getlength, + .protocol_name = "nbd", +}; + +static void bdrv_nbd_init(void) +{ + bdrv_register(&bdrv_nbd); +} + +block_init(bdrv_nbd_init); diff --git a/block/parallels.c b/block/parallels.c new file mode 100644 index 000000000..0b64a5c62 --- /dev/null +++ b/block/parallels.c @@ -0,0 +1,181 @@ +/* + * Block driver for Parallels disk image format + * + * Copyright (c) 2007 Alex Beregszaszi + * + * This code is based on comparing different disk images created by Parallels. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" + +/**************************************************************/ + +#define HEADER_MAGIC "WithoutFreeSpace" +#define HEADER_VERSION 2 +#define HEADER_SIZE 64 + +// always little-endian +struct parallels_header { + char magic[16]; // "WithoutFreeSpace" + uint32_t version; + uint32_t heads; + uint32_t cylinders; + uint32_t tracks; + uint32_t catalog_entries; + uint32_t nb_sectors; + char padding[24]; +} __attribute__((packed)); + +typedef struct BDRVParallelsState { + int fd; + + uint32_t *catalog_bitmap; + int catalog_size; + + int tracks; +} BDRVParallelsState; + +static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const struct parallels_header *ph = (const void *)buf; + + if (buf_size < HEADER_SIZE) + return 0; + + if (!memcmp(ph->magic, HEADER_MAGIC, 16) && + (le32_to_cpu(ph->version) == HEADER_VERSION)) + return 100; + + return 0; +} + +static int parallels_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVParallelsState *s = bs->opaque; + int fd, i; + struct parallels_header ph; + + fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE); + if (fd < 0) { + fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE); + if (fd < 0) + return -1; + } + + bs->read_only = 1; // no write support yet + + s->fd = fd; + + if (read(fd, &ph, sizeof(ph)) != sizeof(ph)) + goto fail; + + if (memcmp(ph.magic, HEADER_MAGIC, 16) || + (le32_to_cpu(ph.version) != HEADER_VERSION)) { + goto fail; + } + + bs->total_sectors = le32_to_cpu(ph.nb_sectors); + + if (lseek(s->fd, 64, SEEK_SET) != 64) + goto fail; + + s->tracks = le32_to_cpu(ph.tracks); + + s->catalog_size = le32_to_cpu(ph.catalog_entries); + s->catalog_bitmap = qemu_malloc(s->catalog_size * 4); + if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) != + s->catalog_size * 4) + goto fail; + for (i = 0; i < s->catalog_size; i++) + le32_to_cpus(&s->catalog_bitmap[i]); + + return 0; +fail: + if (s->catalog_bitmap) + qemu_free(s->catalog_bitmap); + close(fd); + return -1; +} + +static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num) +{ + BDRVParallelsState *s = bs->opaque; + uint32_t index, offset, position; + + index = sector_num / s->tracks; + offset = sector_num % s->tracks; + + // not allocated + if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0)) + return -1; + + position = (s->catalog_bitmap[index] + offset) * 512; + +// fprintf(stderr, "sector: %llx index=%x offset=%x pointer=%x position=%x\n", +// sector_num, index, offset, s->catalog_bitmap[index], position); + + if (lseek(s->fd, position, SEEK_SET) != position) + return -1; + + return 0; +} + +static int parallels_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVParallelsState *s = bs->opaque; + + while (nb_sectors > 0) { + if (!seek_to_sector(bs, sector_num)) { + if (read(s->fd, buf, 512) != 512) + return -1; + } else + memset(buf, 0, 512); + nb_sectors--; + sector_num++; + buf += 512; + } + return 0; +} + +static void parallels_close(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + qemu_free(s->catalog_bitmap); + close(s->fd); +} + +static BlockDriver bdrv_parallels = { + .format_name = "parallels", + .instance_size = sizeof(BDRVParallelsState), + .bdrv_probe = parallels_probe, + .bdrv_open = parallels_open, + .bdrv_read = parallels_read, + .bdrv_close = parallels_close, +}; + +static void bdrv_parallels_init(void) +{ + bdrv_register(&bdrv_parallels); +} + +block_init(bdrv_parallels_init); diff --git a/block/qcow.c b/block/qcow.c new file mode 100644 index 000000000..1cf7c3be7 --- /dev/null +++ b/block/qcow.c @@ -0,0 +1,945 @@ +/* + * Block driver for the QCOW format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" +#include +#include "aes.h" + +/**************************************************************/ +/* QEMU COW block driver with compression and encryption support */ + +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) +#define QCOW_VERSION 1 + +#define QCOW_CRYPT_NONE 0 +#define QCOW_CRYPT_AES 1 + +#define QCOW_OFLAG_COMPRESSED (1LL << 63) + +typedef struct QCowHeader { + uint32_t magic; + uint32_t version; + uint64_t backing_file_offset; + uint32_t backing_file_size; + uint32_t mtime; + uint64_t size; /* in bytes */ + uint8_t cluster_bits; + uint8_t l2_bits; + uint32_t crypt_method; + uint64_t l1_table_offset; +} QCowHeader; + +#define L2_CACHE_SIZE 16 + +typedef struct BDRVQcowState { + BlockDriverState *hd; + int cluster_bits; + int cluster_size; + int cluster_sectors; + int l2_bits; + int l2_size; + int l1_size; + uint64_t cluster_offset_mask; + uint64_t l1_table_offset; + uint64_t *l1_table; + uint64_t *l2_cache; + uint64_t l2_cache_offsets[L2_CACHE_SIZE]; + uint32_t l2_cache_counts[L2_CACHE_SIZE]; + uint8_t *cluster_cache; + uint8_t *cluster_data; + uint64_t cluster_cache_offset; + uint32_t crypt_method; /* current crypt method, 0 if no key yet */ + uint32_t crypt_method_header; + AES_KEY aes_encrypt_key; + AES_KEY aes_decrypt_key; +} BDRVQcowState; + +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset); + +static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const QCowHeader *cow_header = (const void *)buf; + + if (buf_size >= sizeof(QCowHeader) && + be32_to_cpu(cow_header->magic) == QCOW_MAGIC && + be32_to_cpu(cow_header->version) == QCOW_VERSION) + return 100; + else + return 0; +} + +static int qcow_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVQcowState *s = bs->opaque; + int len, i, shift, ret; + QCowHeader header; + + ret = bdrv_file_open(&s->hd, filename, flags); + if (ret < 0) + return ret; + if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) + goto fail; + be32_to_cpus(&header.magic); + be32_to_cpus(&header.version); + be64_to_cpus(&header.backing_file_offset); + be32_to_cpus(&header.backing_file_size); + be32_to_cpus(&header.mtime); + be64_to_cpus(&header.size); + be32_to_cpus(&header.crypt_method); + be64_to_cpus(&header.l1_table_offset); + + if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) + goto fail; + if (header.size <= 1 || header.cluster_bits < 9) + goto fail; + if (header.crypt_method > QCOW_CRYPT_AES) + goto fail; + s->crypt_method_header = header.crypt_method; + if (s->crypt_method_header) + bs->encrypted = 1; + s->cluster_bits = header.cluster_bits; + s->cluster_size = 1 << s->cluster_bits; + s->cluster_sectors = 1 << (s->cluster_bits - 9); + s->l2_bits = header.l2_bits; + s->l2_size = 1 << s->l2_bits; + bs->total_sectors = header.size / 512; + s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; + + /* read the level 1 table */ + shift = s->cluster_bits + s->l2_bits; + s->l1_size = (header.size + (1LL << shift) - 1) >> shift; + + s->l1_table_offset = header.l1_table_offset; + s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); + if (!s->l1_table) + goto fail; + if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != + s->l1_size * sizeof(uint64_t)) + goto fail; + for(i = 0;i < s->l1_size; i++) { + be64_to_cpus(&s->l1_table[i]); + } + /* alloc L2 cache */ + s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + if (!s->l2_cache) + goto fail; + s->cluster_cache = qemu_malloc(s->cluster_size); + if (!s->cluster_cache) + goto fail; + s->cluster_data = qemu_malloc(s->cluster_size); + if (!s->cluster_data) + goto fail; + s->cluster_cache_offset = -1; + + /* read the backing file name */ + if (header.backing_file_offset != 0) { + len = header.backing_file_size; + if (len > 1023) + len = 1023; + if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len) + goto fail; + bs->backing_file[len] = '\0'; + } + return 0; + + fail: + qemu_free(s->l1_table); + qemu_free(s->l2_cache); + qemu_free(s->cluster_cache); + qemu_free(s->cluster_data); + bdrv_delete(s->hd); + return -1; +} + +static int qcow_set_key(BlockDriverState *bs, const char *key) +{ + BDRVQcowState *s = bs->opaque; + uint8_t keybuf[16]; + int len, i; + + memset(keybuf, 0, 16); + len = strlen(key); + if (len > 16) + len = 16; + /* XXX: we could compress the chars to 7 bits to increase + entropy */ + for(i = 0;i < len;i++) { + keybuf[i] = key[i]; + } + s->crypt_method = s->crypt_method_header; + + if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) + return -1; + if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) + return -1; +#if 0 + /* test */ + { + uint8_t in[16]; + uint8_t out[16]; + uint8_t tmp[16]; + for(i=0;i<16;i++) + in[i] = i; + AES_encrypt(in, tmp, &s->aes_encrypt_key); + AES_decrypt(tmp, out, &s->aes_decrypt_key); + for(i = 0; i < 16; i++) + printf(" %02x", tmp[i]); + printf("\n"); + for(i = 0; i < 16; i++) + printf(" %02x", out[i]); + printf("\n"); + } +#endif + return 0; +} + +/* The crypt function is compatible with the linux cryptoloop + algorithm for < 4 GB images. NOTE: out_buf == in_buf is + supported */ +static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, + uint8_t *out_buf, const uint8_t *in_buf, + int nb_sectors, int enc, + const AES_KEY *key) +{ + union { + uint64_t ll[2]; + uint8_t b[16]; + } ivec; + int i; + + for(i = 0; i < nb_sectors; i++) { + ivec.ll[0] = cpu_to_le64(sector_num); + ivec.ll[1] = 0; + AES_cbc_encrypt(in_buf, out_buf, 512, key, + ivec.b, enc); + sector_num++; + in_buf += 512; + out_buf += 512; + } +} + +/* 'allocate' is: + * + * 0 to not allocate. + * + * 1 to allocate a normal cluster (for sector indexes 'n_start' to + * 'n_end') + * + * 2 to allocate a compressed cluster of size + * 'compressed_size'. 'compressed_size' must be > 0 and < + * cluster_size + * + * return 0 if not allocated. + */ +static uint64_t get_cluster_offset(BlockDriverState *bs, + uint64_t offset, int allocate, + int compressed_size, + int n_start, int n_end) +{ + BDRVQcowState *s = bs->opaque; + int min_index, i, j, l1_index, l2_index; + uint64_t l2_offset, *l2_table, cluster_offset, tmp; + uint32_t min_count; + int new_l2_table; + + l1_index = offset >> (s->l2_bits + s->cluster_bits); + l2_offset = s->l1_table[l1_index]; + new_l2_table = 0; + if (!l2_offset) { + if (!allocate) + return 0; + /* allocate a new l2 entry */ + l2_offset = bdrv_getlength(s->hd); + /* round to cluster size */ + l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); + /* update the L1 entry */ + s->l1_table[l1_index] = l2_offset; + tmp = cpu_to_be64(l2_offset); + if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), + &tmp, sizeof(tmp)) != sizeof(tmp)) + return 0; + new_l2_table = 1; + } + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (l2_offset == s->l2_cache_offsets[i]) { + /* increment the hit count */ + if (++s->l2_cache_counts[i] == 0xffffffff) { + for(j = 0; j < L2_CACHE_SIZE; j++) { + s->l2_cache_counts[j] >>= 1; + } + } + l2_table = s->l2_cache + (i << s->l2_bits); + goto found; + } + } + /* not found: load a new entry in the least used one */ + min_index = 0; + min_count = 0xffffffff; + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (s->l2_cache_counts[i] < min_count) { + min_count = s->l2_cache_counts[i]; + min_index = i; + } + } + l2_table = s->l2_cache + (min_index << s->l2_bits); + if (new_l2_table) { + memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); + if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != + s->l2_size * sizeof(uint64_t)) + return 0; + } else { + if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != + s->l2_size * sizeof(uint64_t)) + return 0; + } + s->l2_cache_offsets[min_index] = l2_offset; + s->l2_cache_counts[min_index] = 1; + found: + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + cluster_offset = be64_to_cpu(l2_table[l2_index]); + if (!cluster_offset || + ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) { + if (!allocate) + return 0; + /* allocate a new cluster */ + if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && + (n_end - n_start) < s->cluster_sectors) { + /* if the cluster is already compressed, we must + decompress it in the case it is not completely + overwritten */ + if (decompress_cluster(s, cluster_offset) < 0) + return 0; + cluster_offset = bdrv_getlength(s->hd); + cluster_offset = (cluster_offset + s->cluster_size - 1) & + ~(s->cluster_size - 1); + /* write the cluster content */ + if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) != + s->cluster_size) + return -1; + } else { + cluster_offset = bdrv_getlength(s->hd); + if (allocate == 1) { + /* round to cluster size */ + cluster_offset = (cluster_offset + s->cluster_size - 1) & + ~(s->cluster_size - 1); + bdrv_truncate(s->hd, cluster_offset + s->cluster_size); + /* if encrypted, we must initialize the cluster + content which won't be written */ + if (s->crypt_method && + (n_end - n_start) < s->cluster_sectors) { + uint64_t start_sect; + start_sect = (offset & ~(s->cluster_size - 1)) >> 9; + memset(s->cluster_data + 512, 0x00, 512); + for(i = 0; i < s->cluster_sectors; i++) { + if (i < n_start || i >= n_end) { + encrypt_sectors(s, start_sect + i, + s->cluster_data, + s->cluster_data + 512, 1, 1, + &s->aes_encrypt_key); + if (bdrv_pwrite(s->hd, cluster_offset + i * 512, + s->cluster_data, 512) != 512) + return -1; + } + } + } + } else if (allocate == 2) { + cluster_offset |= QCOW_OFLAG_COMPRESSED | + (uint64_t)compressed_size << (63 - s->cluster_bits); + } + } + /* update L2 table */ + tmp = cpu_to_be64(cluster_offset); + l2_table[l2_index] = tmp; + if (bdrv_pwrite(s->hd, + l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp)) + return 0; + } + return cluster_offset; +} + +static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + BDRVQcowState *s = bs->opaque; + int index_in_cluster, n; + uint64_t cluster_offset; + + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); + index_in_cluster = sector_num & (s->cluster_sectors - 1); + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + *pnum = n; + return (cluster_offset != 0); +} + +static int decompress_buffer(uint8_t *out_buf, int out_buf_size, + const uint8_t *buf, int buf_size) +{ + z_stream strm1, *strm = &strm1; + int ret, out_len; + + memset(strm, 0, sizeof(*strm)); + + strm->next_in = (uint8_t *)buf; + strm->avail_in = buf_size; + strm->next_out = out_buf; + strm->avail_out = out_buf_size; + + ret = inflateInit2(strm, -12); + if (ret != Z_OK) + return -1; + ret = inflate(strm, Z_FINISH); + out_len = strm->next_out - out_buf; + if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || + out_len != out_buf_size) { + inflateEnd(strm); + return -1; + } + inflateEnd(strm); + return 0; +} + +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset) +{ + int ret, csize; + uint64_t coffset; + + coffset = cluster_offset & s->cluster_offset_mask; + if (s->cluster_cache_offset != coffset) { + csize = cluster_offset >> (63 - s->cluster_bits); + csize &= (s->cluster_size - 1); + ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize); + if (ret != csize) + return -1; + if (decompress_buffer(s->cluster_cache, s->cluster_size, + s->cluster_data, csize) < 0) { + return -1; + } + s->cluster_cache_offset = coffset; + } + return 0; +} + +#if 0 + +static int qcow_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVQcowState *s = bs->opaque; + int ret, index_in_cluster, n; + uint64_t cluster_offset; + + while (nb_sectors > 0) { + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); + index_in_cluster = sector_num & (s->cluster_sectors - 1); + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + if (!cluster_offset) { + if (bs->backing_hd) { + /* read from the base image */ + ret = bdrv_read(bs->backing_hd, sector_num, buf, n); + if (ret < 0) + return -1; + } else { + memset(buf, 0, 512 * n); + } + } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + if (decompress_cluster(s, cluster_offset) < 0) + return -1; + memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); + } else { + ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); + if (ret != n * 512) + return -1; + if (s->crypt_method) { + encrypt_sectors(s, sector_num, buf, buf, n, 0, + &s->aes_decrypt_key); + } + } + nb_sectors -= n; + sector_num += n; + buf += n * 512; + } + return 0; +} +#endif + +static int qcow_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVQcowState *s = bs->opaque; + int ret, index_in_cluster, n; + uint64_t cluster_offset; + + while (nb_sectors > 0) { + index_in_cluster = sector_num & (s->cluster_sectors - 1); + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, + index_in_cluster, + index_in_cluster + n); + if (!cluster_offset) + return -1; + if (s->crypt_method) { + encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1, + &s->aes_encrypt_key); + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, + s->cluster_data, n * 512); + } else { + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); + } + if (ret != n * 512) + return -1; + nb_sectors -= n; + sector_num += n; + buf += n * 512; + } + s->cluster_cache_offset = -1; /* disable compressed cache */ + return 0; +} + +typedef struct QCowAIOCB { + BlockDriverAIOCB common; + int64_t sector_num; + QEMUIOVector *qiov; + uint8_t *buf; + void *orig_buf; + int nb_sectors; + int n; + uint64_t cluster_offset; + uint8_t *cluster_data; + struct iovec hd_iov; + QEMUIOVector hd_qiov; + BlockDriverAIOCB *hd_aiocb; +} QCowAIOCB; + +static void qcow_aio_read_cb(void *opaque, int ret) +{ + QCowAIOCB *acb = opaque; + BlockDriverState *bs = acb->common.bs; + BDRVQcowState *s = bs->opaque; + int index_in_cluster; + + acb->hd_aiocb = NULL; + if (ret < 0) + goto done; + + redo: + /* post process the read buffer */ + if (!acb->cluster_offset) { + /* nothing to do */ + } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { + /* nothing to do */ + } else { + if (s->crypt_method) { + encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, + acb->n, 0, + &s->aes_decrypt_key); + } + } + + acb->nb_sectors -= acb->n; + acb->sector_num += acb->n; + acb->buf += acb->n * 512; + + if (acb->nb_sectors == 0) { + /* request completed */ + ret = 0; + goto done; + } + + /* prepare next AIO request */ + acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, + 0, 0, 0, 0); + index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); + acb->n = s->cluster_sectors - index_in_cluster; + if (acb->n > acb->nb_sectors) + acb->n = acb->nb_sectors; + + if (!acb->cluster_offset) { + if (bs->backing_hd) { + /* read from the base image */ + acb->hd_iov.iov_base = (void *)acb->buf; + acb->hd_iov.iov_len = acb->n * 512; + qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, + &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); + if (acb->hd_aiocb == NULL) + goto done; + } else { + /* Note: in this case, no need to wait */ + memset(acb->buf, 0, 512 * acb->n); + goto redo; + } + } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { + /* add AIO support for compressed blocks ? */ + if (decompress_cluster(s, acb->cluster_offset) < 0) + goto done; + memcpy(acb->buf, + s->cluster_cache + index_in_cluster * 512, 512 * acb->n); + goto redo; + } else { + if ((acb->cluster_offset & 511) != 0) { + ret = -EIO; + goto done; + } + acb->hd_iov.iov_base = (void *)acb->buf; + acb->hd_iov.iov_len = acb->n * 512; + qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + acb->hd_aiocb = bdrv_aio_readv(s->hd, + (acb->cluster_offset >> 9) + index_in_cluster, + &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); + if (acb->hd_aiocb == NULL) + goto done; + } + + return; + +done: + if (acb->qiov->niov > 1) { + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); + qemu_vfree(acb->orig_buf); + } + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); +} + +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + QCowAIOCB *acb; + + acb = qemu_aio_get(bs, cb, opaque); + if (!acb) + return NULL; + acb->hd_aiocb = NULL; + acb->sector_num = sector_num; + acb->qiov = qiov; + if (qiov->niov > 1) + acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); + else + acb->buf = (uint8_t *)qiov->iov->iov_base; + acb->nb_sectors = nb_sectors; + acb->n = 0; + acb->cluster_offset = 0; + + qcow_aio_read_cb(acb, 0); + return &acb->common; +} + +static void qcow_aio_write_cb(void *opaque, int ret) +{ + QCowAIOCB *acb = opaque; + BlockDriverState *bs = acb->common.bs; + BDRVQcowState *s = bs->opaque; + int index_in_cluster; + uint64_t cluster_offset; + const uint8_t *src_buf; + + acb->hd_aiocb = NULL; + + if (ret < 0) + goto done; + + acb->nb_sectors -= acb->n; + acb->sector_num += acb->n; + acb->buf += acb->n * 512; + + if (acb->nb_sectors == 0) { + /* request completed */ + ret = 0; + goto done; + } + + index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); + acb->n = s->cluster_sectors - index_in_cluster; + if (acb->n > acb->nb_sectors) + acb->n = acb->nb_sectors; + cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0, + index_in_cluster, + index_in_cluster + acb->n); + if (!cluster_offset || (cluster_offset & 511) != 0) { + ret = -EIO; + goto done; + } + if (s->crypt_method) { + if (!acb->cluster_data) { + acb->cluster_data = qemu_mallocz(s->cluster_size); + if (!acb->cluster_data) { + ret = -ENOMEM; + goto done; + } + } + encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, + acb->n, 1, &s->aes_encrypt_key); + src_buf = acb->cluster_data; + } else { + src_buf = acb->buf; + } + + acb->hd_iov.iov_base = (void *)src_buf; + acb->hd_iov.iov_len = acb->n * 512; + qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + acb->hd_aiocb = bdrv_aio_writev(s->hd, + (cluster_offset >> 9) + index_in_cluster, + &acb->hd_qiov, acb->n, + qcow_aio_write_cb, acb); + if (acb->hd_aiocb == NULL) + goto done; + return; + +done: + if (acb->qiov->niov > 1) + qemu_vfree(acb->orig_buf); + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); +} + +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVQcowState *s = bs->opaque; + QCowAIOCB *acb; + + s->cluster_cache_offset = -1; /* disable compressed cache */ + + acb = qemu_aio_get(bs, cb, opaque); + if (!acb) + return NULL; + acb->hd_aiocb = NULL; + acb->sector_num = sector_num; + acb->qiov = qiov; + if (qiov->niov > 1) { + acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); + qemu_iovec_to_buffer(qiov, acb->buf); + } else { + acb->buf = (uint8_t *)qiov->iov->iov_base; + } + acb->nb_sectors = nb_sectors; + acb->n = 0; + + qcow_aio_write_cb(acb, 0); + return &acb->common; +} + +static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) +{ + QCowAIOCB *acb = (QCowAIOCB *)blockacb; + if (acb->hd_aiocb) + bdrv_aio_cancel(acb->hd_aiocb); + qemu_aio_release(acb); +} + +static void qcow_close(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + qemu_free(s->l1_table); + qemu_free(s->l2_cache); + qemu_free(s->cluster_cache); + qemu_free(s->cluster_data); + bdrv_delete(s->hd); +} + +static int qcow_create(const char *filename, int64_t total_size, + const char *backing_file, int flags) +{ + int fd, header_size, backing_filename_len, l1_size, i, shift; + QCowHeader header; + uint64_t tmp; + + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); + if (fd < 0) + return -1; + memset(&header, 0, sizeof(header)); + header.magic = cpu_to_be32(QCOW_MAGIC); + header.version = cpu_to_be32(QCOW_VERSION); + header.size = cpu_to_be64(total_size * 512); + header_size = sizeof(header); + backing_filename_len = 0; + if (backing_file) { + if (strcmp(backing_file, "fat:")) { + header.backing_file_offset = cpu_to_be64(header_size); + backing_filename_len = strlen(backing_file); + header.backing_file_size = cpu_to_be32(backing_filename_len); + header_size += backing_filename_len; + } else { + /* special backing file for vvfat */ + backing_file = NULL; + } + header.cluster_bits = 9; /* 512 byte cluster to avoid copying + unmodifyed sectors */ + header.l2_bits = 12; /* 32 KB L2 tables */ + } else { + header.cluster_bits = 12; /* 4 KB clusters */ + header.l2_bits = 9; /* 4 KB L2 tables */ + } + header_size = (header_size + 7) & ~7; + shift = header.cluster_bits + header.l2_bits; + l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift; + + header.l1_table_offset = cpu_to_be64(header_size); + if (flags & BLOCK_FLAG_ENCRYPT) { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); + } else { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); + } + + /* write all the data */ + write(fd, &header, sizeof(header)); + if (backing_file) { + write(fd, backing_file, backing_filename_len); + } + lseek(fd, header_size, SEEK_SET); + tmp = 0; + for(i = 0;i < l1_size; i++) { + write(fd, &tmp, sizeof(tmp)); + } + close(fd); + return 0; +} + +static int qcow_make_empty(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + uint32_t l1_length = s->l1_size * sizeof(uint64_t); + int ret; + + memset(s->l1_table, 0, l1_length); + if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0) + return -1; + ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length); + if (ret < 0) + return ret; + + memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); + + return 0; +} + +/* XXX: put compressed sectors first, then all the cluster aligned + tables to avoid losing bytes in alignment */ +static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVQcowState *s = bs->opaque; + z_stream strm; + int ret, out_len; + uint8_t *out_buf; + uint64_t cluster_offset; + + if (nb_sectors != s->cluster_sectors) + return -EINVAL; + + out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); + if (!out_buf) + return -1; + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, -12, + 9, Z_DEFAULT_STRATEGY); + if (ret != 0) { + qemu_free(out_buf); + return -1; + } + + strm.avail_in = s->cluster_size; + strm.next_in = (uint8_t *)buf; + strm.avail_out = s->cluster_size; + strm.next_out = out_buf; + + ret = deflate(&strm, Z_FINISH); + if (ret != Z_STREAM_END && ret != Z_OK) { + qemu_free(out_buf); + deflateEnd(&strm); + return -1; + } + out_len = strm.next_out - out_buf; + + deflateEnd(&strm); + + if (ret != Z_STREAM_END || out_len >= s->cluster_size) { + /* could not compress: write normal cluster */ + qcow_write(bs, sector_num, buf, s->cluster_sectors); + } else { + cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, + out_len, 0, 0); + cluster_offset &= s->cluster_offset_mask; + if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) { + qemu_free(out_buf); + return -1; + } + } + + qemu_free(out_buf); + return 0; +} + +static void qcow_flush(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + bdrv_flush(s->hd); +} + +static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVQcowState *s = bs->opaque; + bdi->cluster_size = s->cluster_size; + return 0; +} + +static BlockDriver bdrv_qcow = { + .format_name = "qcow", + .instance_size = sizeof(BDRVQcowState), + .bdrv_probe = qcow_probe, + .bdrv_open = qcow_open, + .bdrv_close = qcow_close, + .bdrv_create = qcow_create, + .bdrv_flush = qcow_flush, + .bdrv_is_allocated = qcow_is_allocated, + .bdrv_set_key = qcow_set_key, + .bdrv_make_empty = qcow_make_empty, + .bdrv_aio_readv = qcow_aio_readv, + .bdrv_aio_writev = qcow_aio_writev, + .bdrv_aio_cancel = qcow_aio_cancel, + .aiocb_size = sizeof(QCowAIOCB), + .bdrv_write_compressed = qcow_write_compressed, + .bdrv_get_info = qcow_get_info, +}; + +static void bdrv_qcow_init(void) +{ + bdrv_register(&bdrv_qcow); +} + +block_init(bdrv_qcow_init); diff --git a/block/qcow2.c b/block/qcow2.c new file mode 100644 index 000000000..a6de9b691 --- /dev/null +++ b/block/qcow2.c @@ -0,0 +1,2931 @@ +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2004-2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" +#include +#include "aes.h" + +/* + Differences with QCOW: + + - Support for multiple incremental snapshots. + - Memory management by reference counts. + - Clusters which have a reference count of one have the bit + QCOW_OFLAG_COPIED to optimize write performance. + - Size of compressed clusters is stored in sectors to reduce bit usage + in the cluster offsets. + - Support for storing additional data (such as the VM state) in the + snapshots. + - If a backing store is used, the cluster size is not constrained + (could be backported to QCOW). + - L2 tables have always a size of one cluster. +*/ + +//#define DEBUG_ALLOC +//#define DEBUG_ALLOC2 +//#define DEBUG_EXT + +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) +#define QCOW_VERSION 2 + +#define QCOW_CRYPT_NONE 0 +#define QCOW_CRYPT_AES 1 + +#define QCOW_MAX_CRYPT_CLUSTERS 32 + +/* indicate that the refcount of the referenced cluster is exactly one. */ +#define QCOW_OFLAG_COPIED (1LL << 63) +/* indicate that the cluster is compressed (they never have the copied flag) */ +#define QCOW_OFLAG_COMPRESSED (1LL << 62) + +#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */ + +typedef struct QCowHeader { + uint32_t magic; + uint32_t version; + uint64_t backing_file_offset; + uint32_t backing_file_size; + uint32_t cluster_bits; + uint64_t size; /* in bytes */ + uint32_t crypt_method; + uint32_t l1_size; /* XXX: save number of clusters instead ? */ + uint64_t l1_table_offset; + uint64_t refcount_table_offset; + uint32_t refcount_table_clusters; + uint32_t nb_snapshots; + uint64_t snapshots_offset; +} QCowHeader; + + +typedef struct { + uint32_t magic; + uint32_t len; +} QCowExtension; +#define QCOW_EXT_MAGIC_END 0 +#define QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA + + +typedef struct __attribute__((packed)) QCowSnapshotHeader { + /* header is 8 byte aligned */ + uint64_t l1_table_offset; + + uint32_t l1_size; + uint16_t id_str_size; + uint16_t name_size; + + uint32_t date_sec; + uint32_t date_nsec; + + uint64_t vm_clock_nsec; + + uint32_t vm_state_size; + uint32_t extra_data_size; /* for extension */ + /* extra data follows */ + /* id_str follows */ + /* name follows */ +} QCowSnapshotHeader; + +#define L2_CACHE_SIZE 16 + +typedef struct QCowSnapshot { + uint64_t l1_table_offset; + uint32_t l1_size; + char *id_str; + char *name; + uint32_t vm_state_size; + uint32_t date_sec; + uint32_t date_nsec; + uint64_t vm_clock_nsec; +} QCowSnapshot; + +typedef struct BDRVQcowState { + BlockDriverState *hd; + int cluster_bits; + int cluster_size; + int cluster_sectors; + int l2_bits; + int l2_size; + int l1_size; + int l1_vm_state_index; + int csize_shift; + int csize_mask; + uint64_t cluster_offset_mask; + uint64_t l1_table_offset; + uint64_t *l1_table; + uint64_t *l2_cache; + uint64_t l2_cache_offsets[L2_CACHE_SIZE]; + uint32_t l2_cache_counts[L2_CACHE_SIZE]; + uint8_t *cluster_cache; + uint8_t *cluster_data; + uint64_t cluster_cache_offset; + + uint64_t *refcount_table; + uint64_t refcount_table_offset; + uint32_t refcount_table_size; + uint64_t refcount_block_cache_offset; + uint16_t *refcount_block_cache; + int64_t free_cluster_index; + int64_t free_byte_offset; + + uint32_t crypt_method; /* current crypt method, 0 if no key yet */ + uint32_t crypt_method_header; + AES_KEY aes_encrypt_key; + AES_KEY aes_decrypt_key; + uint64_t snapshots_offset; + int snapshots_size; + int nb_snapshots; + QCowSnapshot *snapshots; +} BDRVQcowState; + +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset); +static int qcow_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors); +static int qcow_read_snapshots(BlockDriverState *bs); +static void qcow_free_snapshots(BlockDriverState *bs); +static int refcount_init(BlockDriverState *bs); +static void refcount_close(BlockDriverState *bs); +static int get_refcount(BlockDriverState *bs, int64_t cluster_index); +static int update_cluster_refcount(BlockDriverState *bs, + int64_t cluster_index, + int addend); +static void update_refcount(BlockDriverState *bs, + int64_t offset, int64_t length, + int addend); +static int64_t alloc_clusters(BlockDriverState *bs, int64_t size); +static int64_t alloc_bytes(BlockDriverState *bs, int size); +static void free_clusters(BlockDriverState *bs, + int64_t offset, int64_t size); +static int check_refcounts(BlockDriverState *bs); + +static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + const QCowHeader *cow_header = (const void *)buf; + + if (buf_size >= sizeof(QCowHeader) && + be32_to_cpu(cow_header->magic) == QCOW_MAGIC && + be32_to_cpu(cow_header->version) == QCOW_VERSION) + return 100; + else + return 0; +} + + +/* + * read qcow2 extension and fill bs + * start reading from start_offset + * finish reading upon magic of value 0 or when end_offset reached + * unknown magic is skipped (future extension this version knows nothing about) + * return 0 upon success, non-0 otherwise + */ +static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset, + uint64_t end_offset) +{ + BDRVQcowState *s = bs->opaque; + QCowExtension ext; + uint64_t offset; + +#ifdef DEBUG_EXT + printf("qcow_read_extensions: start=%ld end=%ld\n", start_offset, end_offset); +#endif + offset = start_offset; + while (offset < end_offset) { + +#ifdef DEBUG_EXT + /* Sanity check */ + if (offset > s->cluster_size) + printf("qcow_handle_extension: suspicious offset %lu\n", offset); + + printf("attemting to read extended header in offset %lu\n", offset); +#endif + + if (bdrv_pread(s->hd, offset, &ext, sizeof(ext)) != sizeof(ext)) { + fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n", + (unsigned long long)offset); + return 1; + } + be32_to_cpus(&ext.magic); + be32_to_cpus(&ext.len); + offset += sizeof(ext); +#ifdef DEBUG_EXT + printf("ext.magic = 0x%x\n", ext.magic); +#endif + switch (ext.magic) { + case QCOW_EXT_MAGIC_END: + return 0; + + case QCOW_EXT_MAGIC_BACKING_FORMAT: + if (ext.len >= sizeof(bs->backing_format)) { + fprintf(stderr, "ERROR: ext_backing_format: len=%u too large" + " (>=%zu)\n", + ext.len, sizeof(bs->backing_format)); + return 2; + } + if (bdrv_pread(s->hd, offset , bs->backing_format, + ext.len) != ext.len) + return 3; + bs->backing_format[ext.len] = '\0'; +#ifdef DEBUG_EXT + printf("Qcow2: Got format extension %s\n", bs->backing_format); +#endif + offset += ((ext.len + 7) & ~7); + break; + + default: + /* unknown magic -- just skip it */ + offset += ((ext.len + 7) & ~7); + break; + } + } + + return 0; +} + + +static int qcow_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVQcowState *s = bs->opaque; + int len, i, shift, ret; + QCowHeader header; + uint64_t ext_end; + + /* Performance is terrible right now with cache=writethrough due mainly + * to reference count updates. If the user does not explicitly specify + * a caching type, force to writeback caching. + */ + if ((flags & BDRV_O_CACHE_DEF)) { + flags |= BDRV_O_CACHE_WB; + flags &= ~BDRV_O_CACHE_DEF; + } + ret = bdrv_file_open(&s->hd, filename, flags); + if (ret < 0) + return ret; + if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) + goto fail; + be32_to_cpus(&header.magic); + be32_to_cpus(&header.version); + be64_to_cpus(&header.backing_file_offset); + be32_to_cpus(&header.backing_file_size); + be64_to_cpus(&header.size); + be32_to_cpus(&header.cluster_bits); + be32_to_cpus(&header.crypt_method); + be64_to_cpus(&header.l1_table_offset); + be32_to_cpus(&header.l1_size); + be64_to_cpus(&header.refcount_table_offset); + be32_to_cpus(&header.refcount_table_clusters); + be64_to_cpus(&header.snapshots_offset); + be32_to_cpus(&header.nb_snapshots); + + if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) + goto fail; + if (header.size <= 1 || + header.cluster_bits < 9 || + header.cluster_bits > 16) + goto fail; + if (header.crypt_method > QCOW_CRYPT_AES) + goto fail; + s->crypt_method_header = header.crypt_method; + if (s->crypt_method_header) + bs->encrypted = 1; + s->cluster_bits = header.cluster_bits; + s->cluster_size = 1 << s->cluster_bits; + s->cluster_sectors = 1 << (s->cluster_bits - 9); + s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ + s->l2_size = 1 << s->l2_bits; + bs->total_sectors = header.size / 512; + s->csize_shift = (62 - (s->cluster_bits - 8)); + s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; + s->cluster_offset_mask = (1LL << s->csize_shift) - 1; + s->refcount_table_offset = header.refcount_table_offset; + s->refcount_table_size = + header.refcount_table_clusters << (s->cluster_bits - 3); + + s->snapshots_offset = header.snapshots_offset; + s->nb_snapshots = header.nb_snapshots; + + /* read the level 1 table */ + s->l1_size = header.l1_size; + shift = s->cluster_bits + s->l2_bits; + s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift; + /* the L1 table must contain at least enough entries to put + header.size bytes */ + if (s->l1_size < s->l1_vm_state_index) + goto fail; + s->l1_table_offset = header.l1_table_offset; + s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); + if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != + s->l1_size * sizeof(uint64_t)) + goto fail; + for(i = 0;i < s->l1_size; i++) { + be64_to_cpus(&s->l1_table[i]); + } + /* alloc L2 cache */ + s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + s->cluster_cache = qemu_malloc(s->cluster_size); + /* one more sector for decompressed data alignment */ + s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + + 512); + s->cluster_cache_offset = -1; + + if (refcount_init(bs) < 0) + goto fail; + + /* read qcow2 extensions */ + if (header.backing_file_offset) + ext_end = header.backing_file_offset; + else + ext_end = s->cluster_size; + if (qcow_read_extensions(bs, sizeof(header), ext_end)) + goto fail; + + /* read the backing file name */ + if (header.backing_file_offset != 0) { + len = header.backing_file_size; + if (len > 1023) + len = 1023; + if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len) + goto fail; + bs->backing_file[len] = '\0'; + } + if (qcow_read_snapshots(bs) < 0) + goto fail; + +#ifdef DEBUG_ALLOC + check_refcounts(bs); +#endif + return 0; + + fail: + qcow_free_snapshots(bs); + refcount_close(bs); + qemu_free(s->l1_table); + qemu_free(s->l2_cache); + qemu_free(s->cluster_cache); + qemu_free(s->cluster_data); + bdrv_delete(s->hd); + return -1; +} + +static int qcow_set_key(BlockDriverState *bs, const char *key) +{ + BDRVQcowState *s = bs->opaque; + uint8_t keybuf[16]; + int len, i; + + memset(keybuf, 0, 16); + len = strlen(key); + if (len > 16) + len = 16; + /* XXX: we could compress the chars to 7 bits to increase + entropy */ + for(i = 0;i < len;i++) { + keybuf[i] = key[i]; + } + s->crypt_method = s->crypt_method_header; + + if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) + return -1; + if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) + return -1; +#if 0 + /* test */ + { + uint8_t in[16]; + uint8_t out[16]; + uint8_t tmp[16]; + for(i=0;i<16;i++) + in[i] = i; + AES_encrypt(in, tmp, &s->aes_encrypt_key); + AES_decrypt(tmp, out, &s->aes_decrypt_key); + for(i = 0; i < 16; i++) + printf(" %02x", tmp[i]); + printf("\n"); + for(i = 0; i < 16; i++) + printf(" %02x", out[i]); + printf("\n"); + } +#endif + return 0; +} + +/* The crypt function is compatible with the linux cryptoloop + algorithm for < 4 GB images. NOTE: out_buf == in_buf is + supported */ +static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num, + uint8_t *out_buf, const uint8_t *in_buf, + int nb_sectors, int enc, + const AES_KEY *key) +{ + union { + uint64_t ll[2]; + uint8_t b[16]; + } ivec; + int i; + + for(i = 0; i < nb_sectors; i++) { + ivec.ll[0] = cpu_to_le64(sector_num); + ivec.ll[1] = 0; + AES_cbc_encrypt(in_buf, out_buf, 512, key, + ivec.b, enc); + sector_num++; + in_buf += 512; + out_buf += 512; + } +} + +static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, + uint64_t cluster_offset, int n_start, int n_end) +{ + BDRVQcowState *s = bs->opaque; + int n, ret; + + n = n_end - n_start; + if (n <= 0) + return 0; + ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n); + if (ret < 0) + return ret; + if (s->crypt_method) { + encrypt_sectors(s, start_sect + n_start, + s->cluster_data, + s->cluster_data, n, 1, + &s->aes_encrypt_key); + } + ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start, + s->cluster_data, n); + if (ret < 0) + return ret; + return 0; +} + +static void l2_cache_reset(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + + memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); + memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); +} + +static inline int l2_cache_new_entry(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + uint32_t min_count; + int min_index, i; + + /* find a new entry in the least used one */ + min_index = 0; + min_count = 0xffffffff; + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (s->l2_cache_counts[i] < min_count) { + min_count = s->l2_cache_counts[i]; + min_index = i; + } + } + return min_index; +} + +static int64_t align_offset(int64_t offset, int n) +{ + offset = (offset + n - 1) & ~(n - 1); + return offset; +} + +static int grow_l1_table(BlockDriverState *bs, int min_size) +{ + BDRVQcowState *s = bs->opaque; + int new_l1_size, new_l1_size2, ret, i; + uint64_t *new_l1_table; + uint64_t new_l1_table_offset; + uint8_t data[12]; + + new_l1_size = s->l1_size; + if (min_size <= new_l1_size) + return 0; + while (min_size > new_l1_size) { + new_l1_size = (new_l1_size * 3 + 1) / 2; + } +#ifdef DEBUG_ALLOC2 + printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size); +#endif + + new_l1_size2 = sizeof(uint64_t) * new_l1_size; + new_l1_table = qemu_mallocz(new_l1_size2); + memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t)); + + /* write new table (align to cluster) */ + new_l1_table_offset = alloc_clusters(bs, new_l1_size2); + + for(i = 0; i < s->l1_size; i++) + new_l1_table[i] = cpu_to_be64(new_l1_table[i]); + ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2); + if (ret != new_l1_size2) + goto fail; + for(i = 0; i < s->l1_size; i++) + new_l1_table[i] = be64_to_cpu(new_l1_table[i]); + + /* set new table */ + cpu_to_be32w((uint32_t*)data, new_l1_size); + cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset); + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data, + sizeof(data)) != sizeof(data)) + goto fail; + qemu_free(s->l1_table); + free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t)); + s->l1_table_offset = new_l1_table_offset; + s->l1_table = new_l1_table; + s->l1_size = new_l1_size; + return 0; + fail: + qemu_free(s->l1_table); + return -EIO; +} + +/* + * seek_l2_table + * + * seek l2_offset in the l2_cache table + * if not found, return NULL, + * if found, + * increments the l2 cache hit count of the entry, + * if counter overflow, divide by two all counters + * return the pointer to the l2 cache entry + * + */ + +static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset) +{ + int i, j; + + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (l2_offset == s->l2_cache_offsets[i]) { + /* increment the hit count */ + if (++s->l2_cache_counts[i] == 0xffffffff) { + for(j = 0; j < L2_CACHE_SIZE; j++) { + s->l2_cache_counts[j] >>= 1; + } + } + return s->l2_cache + (i << s->l2_bits); + } + } + return NULL; +} + +/* + * l2_load + * + * Loads a L2 table into memory. If the table is in the cache, the cache + * is used; otherwise the L2 table is loaded from the image file. + * + * Returns a pointer to the L2 table on success, or NULL if the read from + * the image file failed. + */ + +static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset) +{ + BDRVQcowState *s = bs->opaque; + int min_index; + uint64_t *l2_table; + + /* seek if the table for the given offset is in the cache */ + + l2_table = seek_l2_table(s, l2_offset); + if (l2_table != NULL) + return l2_table; + + /* not found: load a new entry in the least used one */ + + min_index = l2_cache_new_entry(bs); + l2_table = s->l2_cache + (min_index << s->l2_bits); + if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != + s->l2_size * sizeof(uint64_t)) + return NULL; + s->l2_cache_offsets[min_index] = l2_offset; + s->l2_cache_counts[min_index] = 1; + + return l2_table; +} + +/* + * l2_allocate + * + * Allocate a new l2 entry in the file. If l1_index points to an already + * used entry in the L2 table (i.e. we are doing a copy on write for the L2 + * table) copy the contents of the old L2 table into the newly allocated one. + * Otherwise the new table is initialized with zeros. + * + */ + +static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index) +{ + BDRVQcowState *s = bs->opaque; + int min_index; + uint64_t old_l2_offset, tmp; + uint64_t *l2_table, l2_offset; + + old_l2_offset = s->l1_table[l1_index]; + + /* allocate a new l2 entry */ + + l2_offset = alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); + + /* update the L1 entry */ + + s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; + + tmp = cpu_to_be64(l2_offset | QCOW_OFLAG_COPIED); + if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), + &tmp, sizeof(tmp)) != sizeof(tmp)) + return NULL; + + /* allocate a new entry in the l2 cache */ + + min_index = l2_cache_new_entry(bs); + l2_table = s->l2_cache + (min_index << s->l2_bits); + + if (old_l2_offset == 0) { + /* if there was no old l2 table, clear the new table */ + memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); + } else { + /* if there was an old l2 table, read it from the disk */ + if (bdrv_pread(s->hd, old_l2_offset, + l2_table, s->l2_size * sizeof(uint64_t)) != + s->l2_size * sizeof(uint64_t)) + return NULL; + } + /* write the l2 table to the file */ + if (bdrv_pwrite(s->hd, l2_offset, + l2_table, s->l2_size * sizeof(uint64_t)) != + s->l2_size * sizeof(uint64_t)) + return NULL; + + /* update the l2 cache entry */ + + s->l2_cache_offsets[min_index] = l2_offset; + s->l2_cache_counts[min_index] = 1; + + return l2_table; +} + +static int size_to_clusters(BDRVQcowState *s, int64_t size) +{ + return (size + (s->cluster_size - 1)) >> s->cluster_bits; +} + +static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, + uint64_t *l2_table, uint64_t start, uint64_t mask) +{ + int i; + uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask; + + if (!offset) + return 0; + + for (i = start; i < start + nb_clusters; i++) + if (offset + i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask)) + break; + + return (i - start); +} + +static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table) +{ + int i = 0; + + while(nb_clusters-- && l2_table[i] == 0) + i++; + + return i; +} + +/* + * get_cluster_offset + * + * For a given offset of the disk image, return cluster offset in + * qcow2 file. + * + * on entry, *num is the number of contiguous clusters we'd like to + * access following offset. + * + * on exit, *num is the number of contiguous clusters we can read. + * + * Return 1, if the offset is found + * Return 0, otherwise. + * + */ + +static uint64_t get_cluster_offset(BlockDriverState *bs, + uint64_t offset, int *num) +{ + BDRVQcowState *s = bs->opaque; + int l1_index, l2_index; + uint64_t l2_offset, *l2_table, cluster_offset; + int l1_bits, c; + int index_in_cluster, nb_available, nb_needed, nb_clusters; + + index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1); + nb_needed = *num + index_in_cluster; + + l1_bits = s->l2_bits + s->cluster_bits; + + /* compute how many bytes there are between the offset and + * the end of the l1 entry + */ + + nb_available = (1 << l1_bits) - (offset & ((1 << l1_bits) - 1)); + + /* compute the number of available sectors */ + + nb_available = (nb_available >> 9) + index_in_cluster; + + if (nb_needed > nb_available) { + nb_needed = nb_available; + } + + cluster_offset = 0; + + /* seek the the l2 offset in the l1 table */ + + l1_index = offset >> l1_bits; + if (l1_index >= s->l1_size) + goto out; + + l2_offset = s->l1_table[l1_index]; + + /* seek the l2 table of the given l2 offset */ + + if (!l2_offset) + goto out; + + /* load the l2 table in memory */ + + l2_offset &= ~QCOW_OFLAG_COPIED; + l2_table = l2_load(bs, l2_offset); + if (l2_table == NULL) + return 0; + + /* find the cluster offset for the given disk offset */ + + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + cluster_offset = be64_to_cpu(l2_table[l2_index]); + nb_clusters = size_to_clusters(s, nb_needed << 9); + + if (!cluster_offset) { + /* how many empty clusters ? */ + c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); + } else { + /* how many allocated clusters ? */ + c = count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, QCOW_OFLAG_COPIED); + } + + nb_available = (c * s->cluster_sectors); +out: + if (nb_available > nb_needed) + nb_available = nb_needed; + + *num = nb_available - index_in_cluster; + + return cluster_offset & ~QCOW_OFLAG_COPIED; +} + +/* + * free_any_clusters + * + * free clusters according to its type: compressed or not + * + */ + +static void free_any_clusters(BlockDriverState *bs, + uint64_t cluster_offset, int nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + + /* free the cluster */ + + if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + int nb_csectors; + nb_csectors = ((cluster_offset >> s->csize_shift) & + s->csize_mask) + 1; + free_clusters(bs, (cluster_offset & s->cluster_offset_mask) & ~511, + nb_csectors * 512); + return; + } + + free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits); + + return; +} + +/* + * get_cluster_table + * + * for a given disk offset, load (and allocate if needed) + * the l2 table. + * + * the l2 table offset in the qcow2 file and the cluster index + * in the l2 table are given to the caller. + * + */ + +static int get_cluster_table(BlockDriverState *bs, uint64_t offset, + uint64_t **new_l2_table, + uint64_t *new_l2_offset, + int *new_l2_index) +{ + BDRVQcowState *s = bs->opaque; + int l1_index, l2_index, ret; + uint64_t l2_offset, *l2_table; + + /* seek the the l2 offset in the l1 table */ + + l1_index = offset >> (s->l2_bits + s->cluster_bits); + if (l1_index >= s->l1_size) { + ret = grow_l1_table(bs, l1_index + 1); + if (ret < 0) + return 0; + } + l2_offset = s->l1_table[l1_index]; + + /* seek the l2 table of the given l2 offset */ + + if (l2_offset & QCOW_OFLAG_COPIED) { + /* load the l2 table in memory */ + l2_offset &= ~QCOW_OFLAG_COPIED; + l2_table = l2_load(bs, l2_offset); + if (l2_table == NULL) + return 0; + } else { + if (l2_offset) + free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); + l2_table = l2_allocate(bs, l1_index); + if (l2_table == NULL) + return 0; + l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED; + } + + /* find the cluster offset for the given disk offset */ + + l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); + + *new_l2_table = l2_table; + *new_l2_offset = l2_offset; + *new_l2_index = l2_index; + + return 1; +} + +/* + * alloc_compressed_cluster_offset + * + * For a given offset of the disk image, return cluster offset in + * qcow2 file. + * + * If the offset is not found, allocate a new compressed cluster. + * + * Return the cluster offset if successful, + * Return 0, otherwise. + * + */ + +static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs, + uint64_t offset, + int compressed_size) +{ + BDRVQcowState *s = bs->opaque; + int l2_index, ret; + uint64_t l2_offset, *l2_table, cluster_offset; + int nb_csectors; + + ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + if (ret == 0) + return 0; + + cluster_offset = be64_to_cpu(l2_table[l2_index]); + if (cluster_offset & QCOW_OFLAG_COPIED) + return cluster_offset & ~QCOW_OFLAG_COPIED; + + if (cluster_offset) + free_any_clusters(bs, cluster_offset, 1); + + cluster_offset = alloc_bytes(bs, compressed_size); + nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) - + (cluster_offset >> 9); + + cluster_offset |= QCOW_OFLAG_COMPRESSED | + ((uint64_t)nb_csectors << s->csize_shift); + + /* update L2 table */ + + /* compressed clusters never have the copied flag */ + + l2_table[l2_index] = cpu_to_be64(cluster_offset); + if (bdrv_pwrite(s->hd, + l2_offset + l2_index * sizeof(uint64_t), + l2_table + l2_index, + sizeof(uint64_t)) != sizeof(uint64_t)) + return 0; + + return cluster_offset; +} + +typedef struct QCowL2Meta +{ + uint64_t offset; + int n_start; + int nb_available; + int nb_clusters; +} QCowL2Meta; + +static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset, + QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int i, j = 0, l2_index, ret; + uint64_t *old_cluster, start_sect, l2_offset, *l2_table; + + if (m->nb_clusters == 0) + return 0; + + old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t)); + + /* copy content of unmodified sectors */ + start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; + if (m->n_start) { + ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); + if (ret < 0) + goto err; + } + + if (m->nb_available & (s->cluster_sectors - 1)) { + uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1); + ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9), + m->nb_available - end, s->cluster_sectors); + if (ret < 0) + goto err; + } + + ret = -EIO; + /* update L2 table */ + if (!get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index)) + goto err; + + for (i = 0; i < m->nb_clusters; i++) { + /* if two concurrent writes happen to the same unallocated cluster + * each write allocates separate cluster and writes data concurrently. + * The first one to complete updates l2 table with pointer to its + * cluster the second one has to do RMW (which is done above by + * copy_sectors()), update l2 table with its cluster pointer and free + * old cluster. This is what this loop does */ + if(l2_table[l2_index + i] != 0) + old_cluster[j++] = l2_table[l2_index + i]; + + l2_table[l2_index + i] = cpu_to_be64((cluster_offset + + (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); + } + + if (bdrv_pwrite(s->hd, l2_offset + l2_index * sizeof(uint64_t), + l2_table + l2_index, m->nb_clusters * sizeof(uint64_t)) != + m->nb_clusters * sizeof(uint64_t)) + goto err; + + for (i = 0; i < j; i++) + free_any_clusters(bs, be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, + 1); + + ret = 0; +err: + qemu_free(old_cluster); + return ret; + } + +/* + * alloc_cluster_offset + * + * For a given offset of the disk image, return cluster offset in + * qcow2 file. + * + * If the offset is not found, allocate a new cluster. + * + * Return the cluster offset if successful, + * Return 0, otherwise. + * + */ + +static uint64_t alloc_cluster_offset(BlockDriverState *bs, + uint64_t offset, + int n_start, int n_end, + int *num, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index, ret; + uint64_t l2_offset, *l2_table, cluster_offset; + int nb_clusters, i = 0; + + ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + if (ret == 0) + return 0; + + nb_clusters = size_to_clusters(s, n_end << 9); + + nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + + cluster_offset = be64_to_cpu(l2_table[l2_index]); + + /* We keep all QCOW_OFLAG_COPIED clusters */ + + if (cluster_offset & QCOW_OFLAG_COPIED) { + nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, 0); + + cluster_offset &= ~QCOW_OFLAG_COPIED; + m->nb_clusters = 0; + + goto out; + } + + /* for the moment, multiple compressed clusters are not managed */ + + if (cluster_offset & QCOW_OFLAG_COMPRESSED) + nb_clusters = 1; + + /* how many available clusters ? */ + + while (i < nb_clusters) { + i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, + &l2_table[l2_index], i, 0); + + if(be64_to_cpu(l2_table[l2_index + i])) + break; + + i += count_contiguous_free_clusters(nb_clusters - i, + &l2_table[l2_index + i]); + + cluster_offset = be64_to_cpu(l2_table[l2_index + i]); + + if ((cluster_offset & QCOW_OFLAG_COPIED) || + (cluster_offset & QCOW_OFLAG_COMPRESSED)) + break; + } + nb_clusters = i; + + /* allocate a new cluster */ + + cluster_offset = alloc_clusters(bs, nb_clusters * s->cluster_size); + + /* save info needed for meta data update */ + m->offset = offset; + m->n_start = n_start; + m->nb_clusters = nb_clusters; + +out: + m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); + + *num = m->nb_available - n_start; + + return cluster_offset; +} + +static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + uint64_t cluster_offset; + + *pnum = nb_sectors; + cluster_offset = get_cluster_offset(bs, sector_num << 9, pnum); + + return (cluster_offset != 0); +} + +static int decompress_buffer(uint8_t *out_buf, int out_buf_size, + const uint8_t *buf, int buf_size) +{ + z_stream strm1, *strm = &strm1; + int ret, out_len; + + memset(strm, 0, sizeof(*strm)); + + strm->next_in = (uint8_t *)buf; + strm->avail_in = buf_size; + strm->next_out = out_buf; + strm->avail_out = out_buf_size; + + ret = inflateInit2(strm, -12); + if (ret != Z_OK) + return -1; + ret = inflate(strm, Z_FINISH); + out_len = strm->next_out - out_buf; + if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || + out_len != out_buf_size) { + inflateEnd(strm); + return -1; + } + inflateEnd(strm); + return 0; +} + +static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset) +{ + int ret, csize, nb_csectors, sector_offset; + uint64_t coffset; + + coffset = cluster_offset & s->cluster_offset_mask; + if (s->cluster_cache_offset != coffset) { + nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; + sector_offset = coffset & 511; + csize = nb_csectors * 512 - sector_offset; + ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors); + if (ret < 0) { + return -1; + } + if (decompress_buffer(s->cluster_cache, s->cluster_size, + s->cluster_data + sector_offset, csize) < 0) { + return -1; + } + s->cluster_cache_offset = coffset; + } + return 0; +} + +/* handle reading after the end of the backing file */ +static int backing_read1(BlockDriverState *bs, + int64_t sector_num, uint8_t *buf, int nb_sectors) +{ + int n1; + if ((sector_num + nb_sectors) <= bs->total_sectors) + return nb_sectors; + if (sector_num >= bs->total_sectors) + n1 = 0; + else + n1 = bs->total_sectors - sector_num; + memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1)); + return n1; +} + +static int qcow_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVQcowState *s = bs->opaque; + int ret, index_in_cluster, n, n1; + uint64_t cluster_offset; + + while (nb_sectors > 0) { + n = nb_sectors; + cluster_offset = get_cluster_offset(bs, sector_num << 9, &n); + index_in_cluster = sector_num & (s->cluster_sectors - 1); + if (!cluster_offset) { + if (bs->backing_hd) { + /* read from the base image */ + n1 = backing_read1(bs->backing_hd, sector_num, buf, n); + if (n1 > 0) { + ret = bdrv_read(bs->backing_hd, sector_num, buf, n1); + if (ret < 0) + return -1; + } + } else { + memset(buf, 0, 512 * n); + } + } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + if (decompress_cluster(s, cluster_offset) < 0) + return -1; + memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); + } else { + ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); + if (ret != n * 512) + return -1; + if (s->crypt_method) { + encrypt_sectors(s, sector_num, buf, buf, n, 0, + &s->aes_decrypt_key); + } + } + nb_sectors -= n; + sector_num += n; + buf += n * 512; + } + return 0; +} + +static int qcow_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVQcowState *s = bs->opaque; + int ret, index_in_cluster, n; + uint64_t cluster_offset; + int n_end; + QCowL2Meta l2meta; + + while (nb_sectors > 0) { + index_in_cluster = sector_num & (s->cluster_sectors - 1); + n_end = index_in_cluster + nb_sectors; + if (s->crypt_method && + n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) + n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; + cluster_offset = alloc_cluster_offset(bs, sector_num << 9, + index_in_cluster, + n_end, &n, &l2meta); + if (!cluster_offset) + return -1; + if (s->crypt_method) { + encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1, + &s->aes_encrypt_key); + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, + s->cluster_data, n * 512); + } else { + ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512); + } + if (ret != n * 512 || alloc_cluster_link_l2(bs, cluster_offset, &l2meta) < 0) { + free_any_clusters(bs, cluster_offset, l2meta.nb_clusters); + return -1; + } + nb_sectors -= n; + sector_num += n; + buf += n * 512; + } + s->cluster_cache_offset = -1; /* disable compressed cache */ + return 0; +} + +typedef struct QCowAIOCB { + BlockDriverAIOCB common; + int64_t sector_num; + QEMUIOVector *qiov; + uint8_t *buf; + void *orig_buf; + int nb_sectors; + int n; + uint64_t cluster_offset; + uint8_t *cluster_data; + BlockDriverAIOCB *hd_aiocb; + struct iovec hd_iov; + QEMUIOVector hd_qiov; + QEMUBH *bh; + QCowL2Meta l2meta; +} QCowAIOCB; + +static void qcow_aio_read_cb(void *opaque, int ret); +static void qcow_aio_read_bh(void *opaque) +{ + QCowAIOCB *acb = opaque; + qemu_bh_delete(acb->bh); + acb->bh = NULL; + qcow_aio_read_cb(opaque, 0); +} + +static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb) +{ + if (acb->bh) + return -EIO; + + acb->bh = qemu_bh_new(cb, acb); + if (!acb->bh) + return -EIO; + + qemu_bh_schedule(acb->bh); + + return 0; +} + +static void qcow_aio_read_cb(void *opaque, int ret) +{ + QCowAIOCB *acb = opaque; + BlockDriverState *bs = acb->common.bs; + BDRVQcowState *s = bs->opaque; + int index_in_cluster, n1; + + acb->hd_aiocb = NULL; + if (ret < 0) + goto done; + + /* post process the read buffer */ + if (!acb->cluster_offset) { + /* nothing to do */ + } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { + /* nothing to do */ + } else { + if (s->crypt_method) { + encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf, + acb->n, 0, + &s->aes_decrypt_key); + } + } + + acb->nb_sectors -= acb->n; + acb->sector_num += acb->n; + acb->buf += acb->n * 512; + + if (acb->nb_sectors == 0) { + /* request completed */ + ret = 0; + goto done; + } + + /* prepare next AIO request */ + acb->n = acb->nb_sectors; + acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, &acb->n); + index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); + + if (!acb->cluster_offset) { + if (bs->backing_hd) { + /* read from the base image */ + n1 = backing_read1(bs->backing_hd, acb->sector_num, + acb->buf, acb->n); + if (n1 > 0) { + acb->hd_iov.iov_base = (void *)acb->buf; + acb->hd_iov.iov_len = acb->n * 512; + qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num, + &acb->hd_qiov, acb->n, + qcow_aio_read_cb, acb); + if (acb->hd_aiocb == NULL) + goto done; + } else { + ret = qcow_schedule_bh(qcow_aio_read_bh, acb); + if (ret < 0) + goto done; + } + } else { + /* Note: in this case, no need to wait */ + memset(acb->buf, 0, 512 * acb->n); + ret = qcow_schedule_bh(qcow_aio_read_bh, acb); + if (ret < 0) + goto done; + } + } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { + /* add AIO support for compressed blocks ? */ + if (decompress_cluster(s, acb->cluster_offset) < 0) + goto done; + memcpy(acb->buf, + s->cluster_cache + index_in_cluster * 512, 512 * acb->n); + ret = qcow_schedule_bh(qcow_aio_read_bh, acb); + if (ret < 0) + goto done; + } else { + if ((acb->cluster_offset & 511) != 0) { + ret = -EIO; + goto done; + } + + acb->hd_iov.iov_base = (void *)acb->buf; + acb->hd_iov.iov_len = acb->n * 512; + qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + acb->hd_aiocb = bdrv_aio_readv(s->hd, + (acb->cluster_offset >> 9) + index_in_cluster, + &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb); + if (acb->hd_aiocb == NULL) + goto done; + } + + return; +done: + if (acb->qiov->niov > 1) { + qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); + qemu_vfree(acb->orig_buf); + } + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); +} + +static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int is_write) +{ + QCowAIOCB *acb; + + acb = qemu_aio_get(bs, cb, opaque); + if (!acb) + return NULL; + acb->hd_aiocb = NULL; + acb->sector_num = sector_num; + acb->qiov = qiov; + if (qiov->niov > 1) { + acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size); + if (is_write) + qemu_iovec_to_buffer(qiov, acb->buf); + } else { + acb->buf = (uint8_t *)qiov->iov->iov_base; + } + acb->nb_sectors = nb_sectors; + acb->n = 0; + acb->cluster_offset = 0; + acb->l2meta.nb_clusters = 0; + return acb; +} + +static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + QCowAIOCB *acb; + + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); + if (!acb) + return NULL; + + qcow_aio_read_cb(acb, 0); + return &acb->common; +} + +static void qcow_aio_write_cb(void *opaque, int ret) +{ + QCowAIOCB *acb = opaque; + BlockDriverState *bs = acb->common.bs; + BDRVQcowState *s = bs->opaque; + int index_in_cluster; + const uint8_t *src_buf; + int n_end; + + acb->hd_aiocb = NULL; + + if (ret < 0) + goto done; + + if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) { + free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters); + goto done; + } + + acb->nb_sectors -= acb->n; + acb->sector_num += acb->n; + acb->buf += acb->n * 512; + + if (acb->nb_sectors == 0) { + /* request completed */ + ret = 0; + goto done; + } + + index_in_cluster = acb->sector_num & (s->cluster_sectors - 1); + n_end = index_in_cluster + acb->nb_sectors; + if (s->crypt_method && + n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) + n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; + + acb->cluster_offset = alloc_cluster_offset(bs, acb->sector_num << 9, + index_in_cluster, + n_end, &acb->n, &acb->l2meta); + if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) { + ret = -EIO; + goto done; + } + if (s->crypt_method) { + if (!acb->cluster_data) { + acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * + s->cluster_size); + } + encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf, + acb->n, 1, &s->aes_encrypt_key); + src_buf = acb->cluster_data; + } else { + src_buf = acb->buf; + } + acb->hd_iov.iov_base = (void *)src_buf; + acb->hd_iov.iov_len = acb->n * 512; + qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); + acb->hd_aiocb = bdrv_aio_writev(s->hd, + (acb->cluster_offset >> 9) + index_in_cluster, + &acb->hd_qiov, acb->n, + qcow_aio_write_cb, acb); + if (acb->hd_aiocb == NULL) + goto done; + + return; + +done: + if (acb->qiov->niov > 1) + qemu_vfree(acb->orig_buf); + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); +} + +static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVQcowState *s = bs->opaque; + QCowAIOCB *acb; + + s->cluster_cache_offset = -1; /* disable compressed cache */ + + acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); + if (!acb) + return NULL; + + qcow_aio_write_cb(acb, 0); + return &acb->common; +} + +static void qcow_aio_cancel(BlockDriverAIOCB *blockacb) +{ + QCowAIOCB *acb = (QCowAIOCB *)blockacb; + if (acb->hd_aiocb) + bdrv_aio_cancel(acb->hd_aiocb); + qemu_aio_release(acb); +} + +static void qcow_close(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + qemu_free(s->l1_table); + qemu_free(s->l2_cache); + qemu_free(s->cluster_cache); + qemu_free(s->cluster_data); + refcount_close(bs); + bdrv_delete(s->hd); +} + +/* XXX: use std qcow open function ? */ +typedef struct QCowCreateState { + int cluster_size; + int cluster_bits; + uint16_t *refcount_block; + uint64_t *refcount_table; + int64_t l1_table_offset; + int64_t refcount_table_offset; + int64_t refcount_block_offset; +} QCowCreateState; + +static void create_refcount_update(QCowCreateState *s, + int64_t offset, int64_t size) +{ + int refcount; + int64_t start, last, cluster_offset; + uint16_t *p; + + start = offset & ~(s->cluster_size - 1); + last = (offset + size - 1) & ~(s->cluster_size - 1); + for(cluster_offset = start; cluster_offset <= last; + cluster_offset += s->cluster_size) { + p = &s->refcount_block[cluster_offset >> s->cluster_bits]; + refcount = be16_to_cpu(*p); + refcount++; + *p = cpu_to_be16(refcount); + } +} + +static int qcow_create2(const char *filename, int64_t total_size, + const char *backing_file, const char *backing_format, + int flags) +{ + + int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits; + int ref_clusters, backing_format_len = 0; + QCowHeader header; + uint64_t tmp, offset; + QCowCreateState s1, *s = &s1; + QCowExtension ext_bf = {0, 0}; + + + memset(s, 0, sizeof(*s)); + + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); + if (fd < 0) + return -1; + memset(&header, 0, sizeof(header)); + header.magic = cpu_to_be32(QCOW_MAGIC); + header.version = cpu_to_be32(QCOW_VERSION); + header.size = cpu_to_be64(total_size * 512); + header_size = sizeof(header); + backing_filename_len = 0; + if (backing_file) { + if (backing_format) { + ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT; + backing_format_len = strlen(backing_format); + ext_bf.len = (backing_format_len + 7) & ~7; + header_size += ((sizeof(ext_bf) + ext_bf.len + 7) & ~7); + } + header.backing_file_offset = cpu_to_be64(header_size); + backing_filename_len = strlen(backing_file); + header.backing_file_size = cpu_to_be32(backing_filename_len); + header_size += backing_filename_len; + } + s->cluster_bits = 12; /* 4 KB clusters */ + s->cluster_size = 1 << s->cluster_bits; + header.cluster_bits = cpu_to_be32(s->cluster_bits); + header_size = (header_size + 7) & ~7; + if (flags & BLOCK_FLAG_ENCRYPT) { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); + } else { + header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); + } + l2_bits = s->cluster_bits - 3; + shift = s->cluster_bits + l2_bits; + l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift); + offset = align_offset(header_size, s->cluster_size); + s->l1_table_offset = offset; + header.l1_table_offset = cpu_to_be64(s->l1_table_offset); + header.l1_size = cpu_to_be32(l1_size); + offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size); + + s->refcount_table = qemu_mallocz(s->cluster_size); + + s->refcount_table_offset = offset; + header.refcount_table_offset = cpu_to_be64(offset); + header.refcount_table_clusters = cpu_to_be32(1); + offset += s->cluster_size; + s->refcount_block_offset = offset; + + /* count how many refcount blocks needed */ + tmp = offset >> s->cluster_bits; + ref_clusters = (tmp >> (s->cluster_bits - REFCOUNT_SHIFT)) + 1; + for (i=0; i < ref_clusters; i++) { + s->refcount_table[i] = cpu_to_be64(offset); + offset += s->cluster_size; + } + + s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size); + + /* update refcounts */ + create_refcount_update(s, 0, header_size); + create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t)); + create_refcount_update(s, s->refcount_table_offset, s->cluster_size); + create_refcount_update(s, s->refcount_block_offset, ref_clusters * s->cluster_size); + + /* write all the data */ + write(fd, &header, sizeof(header)); + if (backing_file) { + if (backing_format_len) { + char zero[16]; + int d = ext_bf.len - backing_format_len; + + memset(zero, 0, sizeof(zero)); + cpu_to_be32s(&ext_bf.magic); + cpu_to_be32s(&ext_bf.len); + write(fd, &ext_bf, sizeof(ext_bf)); + write(fd, backing_format, backing_format_len); + if (d>0) { + write(fd, zero, d); + } + } + write(fd, backing_file, backing_filename_len); + } + lseek(fd, s->l1_table_offset, SEEK_SET); + tmp = 0; + for(i = 0;i < l1_size; i++) { + write(fd, &tmp, sizeof(tmp)); + } + lseek(fd, s->refcount_table_offset, SEEK_SET); + write(fd, s->refcount_table, s->cluster_size); + + lseek(fd, s->refcount_block_offset, SEEK_SET); + write(fd, s->refcount_block, ref_clusters * s->cluster_size); + + qemu_free(s->refcount_table); + qemu_free(s->refcount_block); + close(fd); + return 0; +} + +static int qcow_create(const char *filename, int64_t total_size, + const char *backing_file, int flags) +{ + return qcow_create2(filename, total_size, backing_file, NULL, flags); +} + +static int qcow_make_empty(BlockDriverState *bs) +{ +#if 0 + /* XXX: not correct */ + BDRVQcowState *s = bs->opaque; + uint32_t l1_length = s->l1_size * sizeof(uint64_t); + int ret; + + memset(s->l1_table, 0, l1_length); + if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0) + return -1; + ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length); + if (ret < 0) + return ret; + + l2_cache_reset(bs); +#endif + return 0; +} + +/* XXX: put compressed sectors first, then all the cluster aligned + tables to avoid losing bytes in alignment */ +static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVQcowState *s = bs->opaque; + z_stream strm; + int ret, out_len; + uint8_t *out_buf; + uint64_t cluster_offset; + + if (nb_sectors == 0) { + /* align end of file to a sector boundary to ease reading with + sector based I/Os */ + cluster_offset = bdrv_getlength(s->hd); + cluster_offset = (cluster_offset + 511) & ~511; + bdrv_truncate(s->hd, cluster_offset); + return 0; + } + + if (nb_sectors != s->cluster_sectors) + return -EINVAL; + + out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, + Z_DEFLATED, -12, + 9, Z_DEFAULT_STRATEGY); + if (ret != 0) { + qemu_free(out_buf); + return -1; + } + + strm.avail_in = s->cluster_size; + strm.next_in = (uint8_t *)buf; + strm.avail_out = s->cluster_size; + strm.next_out = out_buf; + + ret = deflate(&strm, Z_FINISH); + if (ret != Z_STREAM_END && ret != Z_OK) { + qemu_free(out_buf); + deflateEnd(&strm); + return -1; + } + out_len = strm.next_out - out_buf; + + deflateEnd(&strm); + + if (ret != Z_STREAM_END || out_len >= s->cluster_size) { + /* could not compress: write normal cluster */ + qcow_write(bs, sector_num, buf, s->cluster_sectors); + } else { + cluster_offset = alloc_compressed_cluster_offset(bs, sector_num << 9, + out_len); + if (!cluster_offset) + return -1; + cluster_offset &= s->cluster_offset_mask; + if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) { + qemu_free(out_buf); + return -1; + } + } + + qemu_free(out_buf); + return 0; +} + +static void qcow_flush(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + bdrv_flush(s->hd); +} + +static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) +{ + BDRVQcowState *s = bs->opaque; + bdi->cluster_size = s->cluster_size; + bdi->vm_state_offset = (int64_t)s->l1_vm_state_index << + (s->cluster_bits + s->l2_bits); + return 0; +} + +/*********************************************************/ +/* snapshot support */ + +/* update the refcounts of snapshots and the copied flag */ +static int update_snapshot_refcount(BlockDriverState *bs, + int64_t l1_table_offset, + int l1_size, + int addend) +{ + BDRVQcowState *s = bs->opaque; + uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; + int64_t old_offset, old_l2_offset; + int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount; + + l2_cache_reset(bs); + + l2_table = NULL; + l1_table = NULL; + l1_size2 = l1_size * sizeof(uint64_t); + l1_allocated = 0; + if (l1_table_offset != s->l1_table_offset) { + l1_table = qemu_malloc(l1_size2); + l1_allocated = 1; + if (bdrv_pread(s->hd, l1_table_offset, + l1_table, l1_size2) != l1_size2) + goto fail; + for(i = 0;i < l1_size; i++) + be64_to_cpus(&l1_table[i]); + } else { + assert(l1_size == s->l1_size); + l1_table = s->l1_table; + l1_allocated = 0; + } + + l2_size = s->l2_size * sizeof(uint64_t); + l2_table = qemu_malloc(l2_size); + l1_modified = 0; + for(i = 0; i < l1_size; i++) { + l2_offset = l1_table[i]; + if (l2_offset) { + old_l2_offset = l2_offset; + l2_offset &= ~QCOW_OFLAG_COPIED; + l2_modified = 0; + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size) + goto fail; + for(j = 0; j < s->l2_size; j++) { + offset = be64_to_cpu(l2_table[j]); + if (offset != 0) { + old_offset = offset; + offset &= ~QCOW_OFLAG_COPIED; + if (offset & QCOW_OFLAG_COMPRESSED) { + nb_csectors = ((offset >> s->csize_shift) & + s->csize_mask) + 1; + if (addend != 0) + update_refcount(bs, (offset & s->cluster_offset_mask) & ~511, + nb_csectors * 512, addend); + /* compressed clusters are never modified */ + refcount = 2; + } else { + if (addend != 0) { + refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend); + } else { + refcount = get_refcount(bs, offset >> s->cluster_bits); + } + } + + if (refcount == 1) { + offset |= QCOW_OFLAG_COPIED; + } + if (offset != old_offset) { + l2_table[j] = cpu_to_be64(offset); + l2_modified = 1; + } + } + } + if (l2_modified) { + if (bdrv_pwrite(s->hd, + l2_offset, l2_table, l2_size) != l2_size) + goto fail; + } + + if (addend != 0) { + refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend); + } else { + refcount = get_refcount(bs, l2_offset >> s->cluster_bits); + } + if (refcount == 1) { + l2_offset |= QCOW_OFLAG_COPIED; + } + if (l2_offset != old_l2_offset) { + l1_table[i] = l2_offset; + l1_modified = 1; + } + } + } + if (l1_modified) { + for(i = 0; i < l1_size; i++) + cpu_to_be64s(&l1_table[i]); + if (bdrv_pwrite(s->hd, l1_table_offset, l1_table, + l1_size2) != l1_size2) + goto fail; + for(i = 0; i < l1_size; i++) + be64_to_cpus(&l1_table[i]); + } + if (l1_allocated) + qemu_free(l1_table); + qemu_free(l2_table); + return 0; + fail: + if (l1_allocated) + qemu_free(l1_table); + qemu_free(l2_table); + return -EIO; +} + +static void qcow_free_snapshots(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + int i; + + for(i = 0; i < s->nb_snapshots; i++) { + qemu_free(s->snapshots[i].name); + qemu_free(s->snapshots[i].id_str); + } + qemu_free(s->snapshots); + s->snapshots = NULL; + s->nb_snapshots = 0; +} + +static int qcow_read_snapshots(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + QCowSnapshotHeader h; + QCowSnapshot *sn; + int i, id_str_size, name_size; + int64_t offset; + uint32_t extra_data_size; + + if (!s->nb_snapshots) { + s->snapshots = NULL; + s->snapshots_size = 0; + return 0; + } + + offset = s->snapshots_offset; + s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot)); + for(i = 0; i < s->nb_snapshots; i++) { + offset = align_offset(offset, 8); + if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h)) + goto fail; + offset += sizeof(h); + sn = s->snapshots + i; + sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); + sn->l1_size = be32_to_cpu(h.l1_size); + sn->vm_state_size = be32_to_cpu(h.vm_state_size); + sn->date_sec = be32_to_cpu(h.date_sec); + sn->date_nsec = be32_to_cpu(h.date_nsec); + sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec); + extra_data_size = be32_to_cpu(h.extra_data_size); + + id_str_size = be16_to_cpu(h.id_str_size); + name_size = be16_to_cpu(h.name_size); + + offset += extra_data_size; + + sn->id_str = qemu_malloc(id_str_size + 1); + if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size) + goto fail; + offset += id_str_size; + sn->id_str[id_str_size] = '\0'; + + sn->name = qemu_malloc(name_size + 1); + if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size) + goto fail; + offset += name_size; + sn->name[name_size] = '\0'; + } + s->snapshots_size = offset - s->snapshots_offset; + return 0; + fail: + qcow_free_snapshots(bs); + return -1; +} + +/* add at the end of the file a new list of snapshots */ +static int qcow_write_snapshots(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + QCowSnapshot *sn; + QCowSnapshotHeader h; + int i, name_size, id_str_size, snapshots_size; + uint64_t data64; + uint32_t data32; + int64_t offset, snapshots_offset; + + /* compute the size of the snapshots */ + offset = 0; + for(i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + offset = align_offset(offset, 8); + offset += sizeof(h); + offset += strlen(sn->id_str); + offset += strlen(sn->name); + } + snapshots_size = offset; + + snapshots_offset = alloc_clusters(bs, snapshots_size); + offset = snapshots_offset; + + for(i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + memset(&h, 0, sizeof(h)); + h.l1_table_offset = cpu_to_be64(sn->l1_table_offset); + h.l1_size = cpu_to_be32(sn->l1_size); + h.vm_state_size = cpu_to_be32(sn->vm_state_size); + h.date_sec = cpu_to_be32(sn->date_sec); + h.date_nsec = cpu_to_be32(sn->date_nsec); + h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec); + + id_str_size = strlen(sn->id_str); + name_size = strlen(sn->name); + h.id_str_size = cpu_to_be16(id_str_size); + h.name_size = cpu_to_be16(name_size); + offset = align_offset(offset, 8); + if (bdrv_pwrite(s->hd, offset, &h, sizeof(h)) != sizeof(h)) + goto fail; + offset += sizeof(h); + if (bdrv_pwrite(s->hd, offset, sn->id_str, id_str_size) != id_str_size) + goto fail; + offset += id_str_size; + if (bdrv_pwrite(s->hd, offset, sn->name, name_size) != name_size) + goto fail; + offset += name_size; + } + + /* update the various header fields */ + data64 = cpu_to_be64(snapshots_offset); + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, snapshots_offset), + &data64, sizeof(data64)) != sizeof(data64)) + goto fail; + data32 = cpu_to_be32(s->nb_snapshots); + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, nb_snapshots), + &data32, sizeof(data32)) != sizeof(data32)) + goto fail; + + /* free the old snapshot table */ + free_clusters(bs, s->snapshots_offset, s->snapshots_size); + s->snapshots_offset = snapshots_offset; + s->snapshots_size = snapshots_size; + return 0; + fail: + return -1; +} + +static void find_new_snapshot_id(BlockDriverState *bs, + char *id_str, int id_str_size) +{ + BDRVQcowState *s = bs->opaque; + QCowSnapshot *sn; + int i, id, id_max = 0; + + for(i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + id = strtoul(sn->id_str, NULL, 10); + if (id > id_max) + id_max = id; + } + snprintf(id_str, id_str_size, "%d", id_max + 1); +} + +static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str) +{ + BDRVQcowState *s = bs->opaque; + int i; + + for(i = 0; i < s->nb_snapshots; i++) { + if (!strcmp(s->snapshots[i].id_str, id_str)) + return i; + } + return -1; +} + +static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name) +{ + BDRVQcowState *s = bs->opaque; + int i, ret; + + ret = find_snapshot_by_id(bs, name); + if (ret >= 0) + return ret; + for(i = 0; i < s->nb_snapshots; i++) { + if (!strcmp(s->snapshots[i].name, name)) + return i; + } + return -1; +} + +/* if no id is provided, a new one is constructed */ +static int qcow_snapshot_create(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info) +{ + BDRVQcowState *s = bs->opaque; + QCowSnapshot *snapshots1, sn1, *sn = &sn1; + int i, ret; + uint64_t *l1_table = NULL; + + memset(sn, 0, sizeof(*sn)); + + if (sn_info->id_str[0] == '\0') { + /* compute a new id */ + find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str)); + } + + /* check that the ID is unique */ + if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) + return -ENOENT; + + sn->id_str = qemu_strdup(sn_info->id_str); + if (!sn->id_str) + goto fail; + sn->name = qemu_strdup(sn_info->name); + if (!sn->name) + goto fail; + sn->vm_state_size = sn_info->vm_state_size; + sn->date_sec = sn_info->date_sec; + sn->date_nsec = sn_info->date_nsec; + sn->vm_clock_nsec = sn_info->vm_clock_nsec; + + ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); + if (ret < 0) + goto fail; + + /* create the L1 table of the snapshot */ + sn->l1_table_offset = alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); + sn->l1_size = s->l1_size; + + l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); + for(i = 0; i < s->l1_size; i++) { + l1_table[i] = cpu_to_be64(s->l1_table[i]); + } + if (bdrv_pwrite(s->hd, sn->l1_table_offset, + l1_table, s->l1_size * sizeof(uint64_t)) != + (s->l1_size * sizeof(uint64_t))) + goto fail; + qemu_free(l1_table); + l1_table = NULL; + + snapshots1 = qemu_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot)); + if (s->snapshots) { + memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot)); + qemu_free(s->snapshots); + } + s->snapshots = snapshots1; + s->snapshots[s->nb_snapshots++] = *sn; + + if (qcow_write_snapshots(bs) < 0) + goto fail; +#ifdef DEBUG_ALLOC + check_refcounts(bs); +#endif + return 0; + fail: + qemu_free(sn->name); + qemu_free(l1_table); + return -1; +} + +/* copy the snapshot 'snapshot_name' into the current disk image */ +static int qcow_snapshot_goto(BlockDriverState *bs, + const char *snapshot_id) +{ + BDRVQcowState *s = bs->opaque; + QCowSnapshot *sn; + int i, snapshot_index, l1_size2; + + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); + if (snapshot_index < 0) + return -ENOENT; + sn = &s->snapshots[snapshot_index]; + + if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0) + goto fail; + + if (grow_l1_table(bs, sn->l1_size) < 0) + goto fail; + + s->l1_size = sn->l1_size; + l1_size2 = s->l1_size * sizeof(uint64_t); + /* copy the snapshot l1 table to the current l1 table */ + if (bdrv_pread(s->hd, sn->l1_table_offset, + s->l1_table, l1_size2) != l1_size2) + goto fail; + if (bdrv_pwrite(s->hd, s->l1_table_offset, + s->l1_table, l1_size2) != l1_size2) + goto fail; + for(i = 0;i < s->l1_size; i++) { + be64_to_cpus(&s->l1_table[i]); + } + + if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0) + goto fail; + +#ifdef DEBUG_ALLOC + check_refcounts(bs); +#endif + return 0; + fail: + return -EIO; +} + +static int qcow_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) +{ + BDRVQcowState *s = bs->opaque; + QCowSnapshot *sn; + int snapshot_index, ret; + + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); + if (snapshot_index < 0) + return -ENOENT; + sn = &s->snapshots[snapshot_index]; + + ret = update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1); + if (ret < 0) + return ret; + /* must update the copied flag on the current cluster offsets */ + ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); + if (ret < 0) + return ret; + free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t)); + + qemu_free(sn->id_str); + qemu_free(sn->name); + memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn)); + s->nb_snapshots--; + ret = qcow_write_snapshots(bs); + if (ret < 0) { + /* XXX: restore snapshot if error ? */ + return ret; + } +#ifdef DEBUG_ALLOC + check_refcounts(bs); +#endif + return 0; +} + +static int qcow_snapshot_list(BlockDriverState *bs, + QEMUSnapshotInfo **psn_tab) +{ + BDRVQcowState *s = bs->opaque; + QEMUSnapshotInfo *sn_tab, *sn_info; + QCowSnapshot *sn; + int i; + + sn_tab = qemu_mallocz(s->nb_snapshots * sizeof(QEMUSnapshotInfo)); + for(i = 0; i < s->nb_snapshots; i++) { + sn_info = sn_tab + i; + sn = s->snapshots + i; + pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), + sn->id_str); + pstrcpy(sn_info->name, sizeof(sn_info->name), + sn->name); + sn_info->vm_state_size = sn->vm_state_size; + sn_info->date_sec = sn->date_sec; + sn_info->date_nsec = sn->date_nsec; + sn_info->vm_clock_nsec = sn->vm_clock_nsec; + } + *psn_tab = sn_tab; + return s->nb_snapshots; +} + +/*********************************************************/ +/* refcount handling */ + +static int refcount_init(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + int ret, refcount_table_size2, i; + + s->refcount_block_cache = qemu_malloc(s->cluster_size); + refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); + s->refcount_table = qemu_malloc(refcount_table_size2); + if (s->refcount_table_size > 0) { + ret = bdrv_pread(s->hd, s->refcount_table_offset, + s->refcount_table, refcount_table_size2); + if (ret != refcount_table_size2) + goto fail; + for(i = 0; i < s->refcount_table_size; i++) + be64_to_cpus(&s->refcount_table[i]); + } + return 0; + fail: + return -ENOMEM; +} + +static void refcount_close(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + qemu_free(s->refcount_block_cache); + qemu_free(s->refcount_table); +} + + +static int load_refcount_block(BlockDriverState *bs, + int64_t refcount_block_offset) +{ + BDRVQcowState *s = bs->opaque; + int ret; + ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache, + s->cluster_size); + if (ret != s->cluster_size) + return -EIO; + s->refcount_block_cache_offset = refcount_block_offset; + return 0; +} + +static int get_refcount(BlockDriverState *bs, int64_t cluster_index) +{ + BDRVQcowState *s = bs->opaque; + int refcount_table_index, block_index; + int64_t refcount_block_offset; + + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); + if (refcount_table_index >= s->refcount_table_size) + return 0; + refcount_block_offset = s->refcount_table[refcount_table_index]; + if (!refcount_block_offset) + return 0; + if (refcount_block_offset != s->refcount_block_cache_offset) { + /* better than nothing: return allocated if read error */ + if (load_refcount_block(bs, refcount_block_offset) < 0) + return 1; + } + block_index = cluster_index & + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); + return be16_to_cpu(s->refcount_block_cache[block_index]); +} + +/* return < 0 if error */ +static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size) +{ + BDRVQcowState *s = bs->opaque; + int i, nb_clusters; + + nb_clusters = size_to_clusters(s, size); +retry: + for(i = 0; i < nb_clusters; i++) { + int64_t i = s->free_cluster_index++; + if (get_refcount(bs, i) != 0) + goto retry; + } +#ifdef DEBUG_ALLOC2 + printf("alloc_clusters: size=%lld -> %lld\n", + size, + (s->free_cluster_index - nb_clusters) << s->cluster_bits); +#endif + return (s->free_cluster_index - nb_clusters) << s->cluster_bits; +} + +static int64_t alloc_clusters(BlockDriverState *bs, int64_t size) +{ + int64_t offset; + + offset = alloc_clusters_noref(bs, size); + update_refcount(bs, offset, size, 1); + return offset; +} + +/* only used to allocate compressed sectors. We try to allocate + contiguous sectors. size must be <= cluster_size */ +static int64_t alloc_bytes(BlockDriverState *bs, int size) +{ + BDRVQcowState *s = bs->opaque; + int64_t offset, cluster_offset; + int free_in_cluster; + + assert(size > 0 && size <= s->cluster_size); + if (s->free_byte_offset == 0) { + s->free_byte_offset = alloc_clusters(bs, s->cluster_size); + } + redo: + free_in_cluster = s->cluster_size - + (s->free_byte_offset & (s->cluster_size - 1)); + if (size <= free_in_cluster) { + /* enough space in current cluster */ + offset = s->free_byte_offset; + s->free_byte_offset += size; + free_in_cluster -= size; + if (free_in_cluster == 0) + s->free_byte_offset = 0; + if ((offset & (s->cluster_size - 1)) != 0) + update_cluster_refcount(bs, offset >> s->cluster_bits, 1); + } else { + offset = alloc_clusters(bs, s->cluster_size); + cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1); + if ((cluster_offset + s->cluster_size) == offset) { + /* we are lucky: contiguous data */ + offset = s->free_byte_offset; + update_cluster_refcount(bs, offset >> s->cluster_bits, 1); + s->free_byte_offset += size; + } else { + s->free_byte_offset = offset; + goto redo; + } + } + return offset; +} + +static void free_clusters(BlockDriverState *bs, + int64_t offset, int64_t size) +{ + update_refcount(bs, offset, size, -1); +} + +static int grow_refcount_table(BlockDriverState *bs, int min_size) +{ + BDRVQcowState *s = bs->opaque; + int new_table_size, new_table_size2, refcount_table_clusters, i, ret; + uint64_t *new_table; + int64_t table_offset; + uint8_t data[12]; + int old_table_size; + int64_t old_table_offset; + + if (min_size <= s->refcount_table_size) + return 0; + /* compute new table size */ + refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); + for(;;) { + if (refcount_table_clusters == 0) { + refcount_table_clusters = 1; + } else { + refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2; + } + new_table_size = refcount_table_clusters << (s->cluster_bits - 3); + if (min_size <= new_table_size) + break; + } +#ifdef DEBUG_ALLOC2 + printf("grow_refcount_table from %d to %d\n", + s->refcount_table_size, + new_table_size); +#endif + new_table_size2 = new_table_size * sizeof(uint64_t); + new_table = qemu_mallocz(new_table_size2); + memcpy(new_table, s->refcount_table, + s->refcount_table_size * sizeof(uint64_t)); + for(i = 0; i < s->refcount_table_size; i++) + cpu_to_be64s(&new_table[i]); + /* Note: we cannot update the refcount now to avoid recursion */ + table_offset = alloc_clusters_noref(bs, new_table_size2); + ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2); + if (ret != new_table_size2) + goto fail; + for(i = 0; i < s->refcount_table_size; i++) + be64_to_cpus(&new_table[i]); + + cpu_to_be64w((uint64_t*)data, table_offset); + cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters); + if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset), + data, sizeof(data)) != sizeof(data)) + goto fail; + qemu_free(s->refcount_table); + old_table_offset = s->refcount_table_offset; + old_table_size = s->refcount_table_size; + s->refcount_table = new_table; + s->refcount_table_size = new_table_size; + s->refcount_table_offset = table_offset; + + update_refcount(bs, table_offset, new_table_size2, 1); + free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t)); + return 0; + fail: + free_clusters(bs, table_offset, new_table_size2); + qemu_free(new_table); + return -EIO; +} + +/* addend must be 1 or -1 */ +/* XXX: cache several refcount block clusters ? */ +static int update_cluster_refcount(BlockDriverState *bs, + int64_t cluster_index, + int addend) +{ + BDRVQcowState *s = bs->opaque; + int64_t offset, refcount_block_offset; + int ret, refcount_table_index, block_index, refcount; + uint64_t data64; + + refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); + if (refcount_table_index >= s->refcount_table_size) { + if (addend < 0) + return -EINVAL; + ret = grow_refcount_table(bs, refcount_table_index + 1); + if (ret < 0) + return ret; + } + refcount_block_offset = s->refcount_table[refcount_table_index]; + if (!refcount_block_offset) { + if (addend < 0) + return -EINVAL; + /* create a new refcount block */ + /* Note: we cannot update the refcount now to avoid recursion */ + offset = alloc_clusters_noref(bs, s->cluster_size); + memset(s->refcount_block_cache, 0, s->cluster_size); + ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size); + if (ret != s->cluster_size) + return -EINVAL; + s->refcount_table[refcount_table_index] = offset; + data64 = cpu_to_be64(offset); + ret = bdrv_pwrite(s->hd, s->refcount_table_offset + + refcount_table_index * sizeof(uint64_t), + &data64, sizeof(data64)); + if (ret != sizeof(data64)) + return -EINVAL; + + refcount_block_offset = offset; + s->refcount_block_cache_offset = offset; + update_refcount(bs, offset, s->cluster_size, 1); + } else { + if (refcount_block_offset != s->refcount_block_cache_offset) { + if (load_refcount_block(bs, refcount_block_offset) < 0) + return -EIO; + } + } + /* we can update the count and save it */ + block_index = cluster_index & + ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); + refcount = be16_to_cpu(s->refcount_block_cache[block_index]); + refcount += addend; + if (refcount < 0 || refcount > 0xffff) + return -EINVAL; + if (refcount == 0 && cluster_index < s->free_cluster_index) { + s->free_cluster_index = cluster_index; + } + s->refcount_block_cache[block_index] = cpu_to_be16(refcount); + if (bdrv_pwrite(s->hd, + refcount_block_offset + (block_index << REFCOUNT_SHIFT), + &s->refcount_block_cache[block_index], 2) != 2) + return -EIO; + return refcount; +} + +static void update_refcount(BlockDriverState *bs, + int64_t offset, int64_t length, + int addend) +{ + BDRVQcowState *s = bs->opaque; + int64_t start, last, cluster_offset; + +#ifdef DEBUG_ALLOC2 + printf("update_refcount: offset=%lld size=%lld addend=%d\n", + offset, length, addend); +#endif + if (length <= 0) + return; + start = offset & ~(s->cluster_size - 1); + last = (offset + length - 1) & ~(s->cluster_size - 1); + for(cluster_offset = start; cluster_offset <= last; + cluster_offset += s->cluster_size) { + update_cluster_refcount(bs, cluster_offset >> s->cluster_bits, addend); + } +} + +/* + * Increases the refcount for a range of clusters in a given refcount table. + * This is used to construct a temporary refcount table out of L1 and L2 tables + * which can be compared the the refcount table saved in the image. + * + * Returns the number of errors in the image that were found + */ +static int inc_refcounts(BlockDriverState *bs, + uint16_t *refcount_table, + int refcount_table_size, + int64_t offset, int64_t size) +{ + BDRVQcowState *s = bs->opaque; + int64_t start, last, cluster_offset; + int k; + int errors = 0; + + if (size <= 0) + return 0; + + start = offset & ~(s->cluster_size - 1); + last = (offset + size - 1) & ~(s->cluster_size - 1); + for(cluster_offset = start; cluster_offset <= last; + cluster_offset += s->cluster_size) { + k = cluster_offset >> s->cluster_bits; + if (k < 0 || k >= refcount_table_size) { + fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n", + cluster_offset); + errors++; + } else { + if (++refcount_table[k] == 0) { + fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 + "\n", cluster_offset); + errors++; + } + } + } + + return errors; +} + +/* + * Increases the refcount in the given refcount table for the all clusters + * referenced in the L2 table. While doing so, performs some checks on L2 + * entries. + * + * Returns the number of errors found by the checks or -errno if an internal + * error occurred. + */ +static int check_refcounts_l2(BlockDriverState *bs, + uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset, + int check_copied) +{ + BDRVQcowState *s = bs->opaque; + uint64_t *l2_table, offset; + int i, l2_size, nb_csectors, refcount; + int errors = 0; + + /* Read L2 table from disk */ + l2_size = s->l2_size * sizeof(uint64_t); + l2_table = qemu_malloc(l2_size); + + if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size) + goto fail; + + /* Do the actual checks */ + for(i = 0; i < s->l2_size; i++) { + offset = be64_to_cpu(l2_table[i]); + if (offset != 0) { + if (offset & QCOW_OFLAG_COMPRESSED) { + /* Compressed clusters don't have QCOW_OFLAG_COPIED */ + if (offset & QCOW_OFLAG_COPIED) { + fprintf(stderr, "ERROR: cluster %" PRId64 ": " + "copied flag must never be set for compressed " + "clusters\n", offset >> s->cluster_bits); + offset &= ~QCOW_OFLAG_COPIED; + errors++; + } + + /* Mark cluster as used */ + nb_csectors = ((offset >> s->csize_shift) & + s->csize_mask) + 1; + offset &= s->cluster_offset_mask; + errors += inc_refcounts(bs, refcount_table, + refcount_table_size, + offset & ~511, nb_csectors * 512); + } else { + /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ + if (check_copied) { + uint64_t entry = offset; + offset &= ~QCOW_OFLAG_COPIED; + refcount = get_refcount(bs, offset >> s->cluster_bits); + if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) { + fprintf(stderr, "ERROR OFLAG_COPIED: offset=%" + PRIx64 " refcount=%d\n", entry, refcount); + errors++; + } + } + + /* Mark cluster as used */ + offset &= ~QCOW_OFLAG_COPIED; + errors += inc_refcounts(bs, refcount_table, + refcount_table_size, + offset, s->cluster_size); + + /* Correct offsets are cluster aligned */ + if (offset & (s->cluster_size - 1)) { + fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not " + "properly aligned; L2 entry corrupted.\n", offset); + errors++; + } + } + } + } + + qemu_free(l2_table); + return errors; + +fail: + fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); + qemu_free(l2_table); + return -EIO; +} + +/* + * Increases the refcount for the L1 table, its L2 tables and all referenced + * clusters in the given refcount table. While doing so, performs some checks + * on L1 and L2 entries. + * + * Returns the number of errors found by the checks or -errno if an internal + * error occurred. + */ +static int check_refcounts_l1(BlockDriverState *bs, + uint16_t *refcount_table, + int refcount_table_size, + int64_t l1_table_offset, int l1_size, + int check_copied) +{ + BDRVQcowState *s = bs->opaque; + uint64_t *l1_table, l2_offset, l1_size2; + int i, refcount, ret; + int errors = 0; + + l1_size2 = l1_size * sizeof(uint64_t); + + /* Mark L1 table as used */ + errors += inc_refcounts(bs, refcount_table, refcount_table_size, + l1_table_offset, l1_size2); + + /* Read L1 table entries from disk */ + l1_table = qemu_malloc(l1_size2); + if (bdrv_pread(s->hd, l1_table_offset, + l1_table, l1_size2) != l1_size2) + goto fail; + for(i = 0;i < l1_size; i++) + be64_to_cpus(&l1_table[i]); + + /* Do the actual checks */ + for(i = 0; i < l1_size; i++) { + l2_offset = l1_table[i]; + if (l2_offset) { + /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ + if (check_copied) { + refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) + >> s->cluster_bits); + if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) { + fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64 + " refcount=%d\n", l2_offset, refcount); + errors++; + } + } + + /* Mark L2 table as used */ + l2_offset &= ~QCOW_OFLAG_COPIED; + errors += inc_refcounts(bs, refcount_table, + refcount_table_size, + l2_offset, + s->cluster_size); + + /* L2 tables are cluster aligned */ + if (l2_offset & (s->cluster_size - 1)) { + fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not " + "cluster aligned; L1 entry corrupted\n", l2_offset); + errors++; + } + + /* Process and check L2 entries */ + ret = check_refcounts_l2(bs, refcount_table, refcount_table_size, + l2_offset, check_copied); + if (ret < 0) { + goto fail; + } + errors += ret; + } + } + qemu_free(l1_table); + return errors; + +fail: + fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); + qemu_free(l1_table); + return -EIO; +} + +/* + * Checks an image for refcount consistency. + * + * Returns 0 if no errors are found, the number of errors in case the image is + * detected as corrupted, and -errno when an internal error occured. + */ +static int check_refcounts(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + int64_t size; + int nb_clusters, refcount1, refcount2, i; + QCowSnapshot *sn; + uint16_t *refcount_table; + int ret, errors = 0; + + size = bdrv_getlength(s->hd); + nb_clusters = size_to_clusters(s, size); + refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t)); + + /* header */ + errors += inc_refcounts(bs, refcount_table, nb_clusters, + 0, s->cluster_size); + + /* current L1 table */ + ret = check_refcounts_l1(bs, refcount_table, nb_clusters, + s->l1_table_offset, s->l1_size, 1); + if (ret < 0) { + return ret; + } + errors += ret; + + /* snapshots */ + for(i = 0; i < s->nb_snapshots; i++) { + sn = s->snapshots + i; + check_refcounts_l1(bs, refcount_table, nb_clusters, + sn->l1_table_offset, sn->l1_size, 0); + } + errors += inc_refcounts(bs, refcount_table, nb_clusters, + s->snapshots_offset, s->snapshots_size); + + /* refcount data */ + errors += inc_refcounts(bs, refcount_table, nb_clusters, + s->refcount_table_offset, + s->refcount_table_size * sizeof(uint64_t)); + for(i = 0; i < s->refcount_table_size; i++) { + int64_t offset; + offset = s->refcount_table[i]; + if (offset != 0) { + errors += inc_refcounts(bs, refcount_table, nb_clusters, + offset, s->cluster_size); + } + } + + /* compare ref counts */ + for(i = 0; i < nb_clusters; i++) { + refcount1 = get_refcount(bs, i); + refcount2 = refcount_table[i]; + if (refcount1 != refcount2) { + fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n", + i, refcount1, refcount2); + errors++; + } + } + + qemu_free(refcount_table); + + return errors; +} + +static int qcow_check(BlockDriverState *bs) +{ + return check_refcounts(bs); +} + +#if 0 +static void dump_refcounts(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + int64_t nb_clusters, k, k1, size; + int refcount; + + size = bdrv_getlength(s->hd); + nb_clusters = size_to_clusters(s, size); + for(k = 0; k < nb_clusters;) { + k1 = k; + refcount = get_refcount(bs, k); + k++; + while (k < nb_clusters && get_refcount(bs, k) == refcount) + k++; + printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1); + } +} +#endif + +static int qcow_put_buffer(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size) +{ + int growable = bs->growable; + + bs->growable = 1; + bdrv_pwrite(bs, pos, buf, size); + bs->growable = growable; + + return size; +} + +static int qcow_get_buffer(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size) +{ + int growable = bs->growable; + int ret; + + bs->growable = 1; + ret = bdrv_pread(bs, pos, buf, size); + bs->growable = growable; + + return ret; +} + +static BlockDriver bdrv_qcow2 = { + .format_name = "qcow2", + .instance_size = sizeof(BDRVQcowState), + .bdrv_probe = qcow_probe, + .bdrv_open = qcow_open, + .bdrv_close = qcow_close, + .bdrv_create = qcow_create, + .bdrv_flush = qcow_flush, + .bdrv_is_allocated = qcow_is_allocated, + .bdrv_set_key = qcow_set_key, + .bdrv_make_empty = qcow_make_empty, + + .bdrv_aio_readv = qcow_aio_readv, + .bdrv_aio_writev = qcow_aio_writev, + .bdrv_aio_cancel = qcow_aio_cancel, + .aiocb_size = sizeof(QCowAIOCB), + .bdrv_write_compressed = qcow_write_compressed, + + .bdrv_snapshot_create = qcow_snapshot_create, + .bdrv_snapshot_goto = qcow_snapshot_goto, + .bdrv_snapshot_delete = qcow_snapshot_delete, + .bdrv_snapshot_list = qcow_snapshot_list, + .bdrv_get_info = qcow_get_info, + + .bdrv_put_buffer = qcow_put_buffer, + .bdrv_get_buffer = qcow_get_buffer, + + .bdrv_create2 = qcow_create2, + .bdrv_check = qcow_check, +}; + +static void bdrv_qcow2_init(void) +{ + bdrv_register(&bdrv_qcow2); +} + +block_init(bdrv_qcow2_init); diff --git a/block/raw-posix.c b/block/raw-posix.c new file mode 100644 index 000000000..f3a9476b4 --- /dev/null +++ b/block/raw-posix.c @@ -0,0 +1,1438 @@ +/* + * Block driver for RAW files (posix) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu-timer.h" +#include "qemu-char.h" +#include "block_int.h" +#include "module.h" +#ifdef CONFIG_AIO +#include "posix-aio-compat.h" +#endif + +#ifdef CONFIG_COCOA +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#endif + +#ifdef __sun__ +#define _POSIX_PTHREAD_SEMANTICS 1 +#include +#include +#endif +#ifdef __linux__ +#include +#include +#include +#endif +#ifdef __FreeBSD__ +#include +#include +#include +#endif + +#ifdef __OpenBSD__ +#include +#include +#include +#endif + +#ifdef __DragonFly__ +#include +#include +#endif + +//#define DEBUG_FLOPPY + +//#define DEBUG_BLOCK +#if defined(DEBUG_BLOCK) +#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \ + { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0) +#else +#define DEBUG_BLOCK_PRINT(formatCstr, ...) +#endif + +/* OS X does not have O_DSYNC */ +#ifndef O_DSYNC +#define O_DSYNC O_SYNC +#endif + +/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */ +#ifndef O_DIRECT +#define O_DIRECT O_DSYNC +#endif + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 +#define FTYPE_FD 2 + +#define ALIGNED_BUFFER_SIZE (32 * 512) + +/* if the FD is not accessed during that time (in ms), we try to + reopen it to see if the disk has been changed */ +#define FD_OPEN_TIMEOUT 1000 + +typedef struct BDRVRawState { + int fd; + int type; + unsigned int lseek_err_cnt; +#if defined(__linux__) + /* linux floppy specific */ + int fd_open_flags; + int64_t fd_open_time; + int64_t fd_error_time; + int fd_got_error; + int fd_media_changed; +#endif +#if defined(__FreeBSD__) + int cd_open_flags; +#endif + uint8_t* aligned_buf; +} BDRVRawState; + +static int posix_aio_init(void); + +static int fd_open(BlockDriverState *bs); + +#if defined(__FreeBSD__) +static int cd_open(BlockDriverState *bs); +#endif + +static int raw_is_inserted(BlockDriverState *bs); + +static int raw_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVRawState *s = bs->opaque; + int fd, open_flags, ret; + + posix_aio_init(); + + s->lseek_err_cnt = 0; + + open_flags = O_BINARY; + if ((flags & BDRV_O_ACCESS) == O_RDWR) { + open_flags |= O_RDWR; + } else { + open_flags |= O_RDONLY; + bs->read_only = 1; + } + if (flags & BDRV_O_CREAT) + open_flags |= O_CREAT | O_TRUNC; + + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((flags & BDRV_O_NOCACHE)) + open_flags |= O_DIRECT; + else if (!(flags & BDRV_O_CACHE_WB)) + open_flags |= O_DSYNC; + + s->type = FTYPE_FILE; + + fd = open(filename, open_flags, 0644); + if (fd < 0) { + ret = -errno; + if (ret == -EROFS) + ret = -EACCES; + return ret; + } + s->fd = fd; + s->aligned_buf = NULL; + if ((flags & BDRV_O_NOCACHE)) { + s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); + if (s->aligned_buf == NULL) { + ret = -errno; + close(fd); + return ret; + } + } + return 0; +} + +/* XXX: use host sector size if necessary with: +#ifdef DIOCGSECTORSIZE + { + unsigned int sectorsize = 512; + if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) && + sectorsize > bufsize) + bufsize = sectorsize; + } +#endif +#ifdef CONFIG_COCOA + u_int32_t blockSize = 512; + if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) { + bufsize = blockSize; + } +#endif +*/ + +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pread_aligned(BlockDriverState *bs, int64_t offset, + uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + int ret; + + ret = fd_open(bs); + if (ret < 0) + return ret; + + if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) { + ++(s->lseek_err_cnt); + if(s->lseek_err_cnt <= 10) { + DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 + "] lseek failed : %d = %s\n", + s->fd, bs->filename, offset, buf, count, + bs->total_sectors, errno, strerror(errno)); + } + return -1; + } + s->lseek_err_cnt=0; + + ret = read(s->fd, buf, count); + if (ret == count) + goto label__raw_read__success; + + DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 + "] read failed %d : %d = %s\n", + s->fd, bs->filename, offset, buf, count, + bs->total_sectors, ret, errno, strerror(errno)); + + /* Try harder for CDrom. */ + if (bs->type == BDRV_TYPE_CDROM) { + lseek(s->fd, offset, SEEK_SET); + ret = read(s->fd, buf, count); + if (ret == count) + goto label__raw_read__success; + lseek(s->fd, offset, SEEK_SET); + ret = read(s->fd, buf, count); + if (ret == count) + goto label__raw_read__success; + + DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 + "] retry read failed %d : %d = %s\n", + s->fd, bs->filename, offset, buf, count, + bs->total_sectors, ret, errno, strerror(errno)); + } + +label__raw_read__success: + + return ret; +} + +/* + * offset and count are in bytes, but must be multiples of 512 for files + * opened with O_DIRECT. buf must be aligned to 512 bytes then. + * + * This function may be called without alignment if the caller ensures + * that O_DIRECT is not in effect. + */ +static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset, + const uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + int ret; + + ret = fd_open(bs); + if (ret < 0) + return -errno; + + if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) { + ++(s->lseek_err_cnt); + if(s->lseek_err_cnt) { + DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" + PRId64 "] lseek failed : %d = %s\n", + s->fd, bs->filename, offset, buf, count, + bs->total_sectors, errno, strerror(errno)); + } + return -EIO; + } + s->lseek_err_cnt = 0; + + ret = write(s->fd, buf, count); + if (ret == count) + goto label__raw_write__success; + + DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64 + "] write failed %d : %d = %s\n", + s->fd, bs->filename, offset, buf, count, + bs->total_sectors, ret, errno, strerror(errno)); + +label__raw_write__success: + + return (ret < 0) ? -errno : ret; +} + + +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pread_aligned to do the actual read. + */ +static int raw_pread(BlockDriverState *bs, int64_t offset, + uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + int size, ret, shift, sum; + + sum = 0; + + if (s->aligned_buf != NULL) { + + if (offset & 0x1ff) { + /* align offset on a 512 bytes boundary */ + + shift = offset & 0x1ff; + size = (shift + count + 0x1ff) & ~0x1ff; + if (size > ALIGNED_BUFFER_SIZE) + size = ALIGNED_BUFFER_SIZE; + ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size); + if (ret < 0) + return ret; + + size = 512 - shift; + if (size > count) + size = count; + memcpy(buf, s->aligned_buf + shift, size); + + buf += size; + offset += size; + count -= size; + sum += size; + + if (count == 0) + return sum; + } + if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { + + /* read on aligned buffer */ + + while (count) { + + size = (count + 0x1ff) & ~0x1ff; + if (size > ALIGNED_BUFFER_SIZE) + size = ALIGNED_BUFFER_SIZE; + + ret = raw_pread_aligned(bs, offset, s->aligned_buf, size); + if (ret < 0) + return ret; + + size = ret; + if (size > count) + size = count; + + memcpy(buf, s->aligned_buf, size); + + buf += size; + offset += size; + count -= size; + sum += size; + } + + return sum; + } + } + + return raw_pread_aligned(bs, offset, buf, count) + sum; +} + +static int raw_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + int ret; + + ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512); + if (ret == (nb_sectors * 512)) + ret = 0; + return ret; +} + +/* + * offset and count are in bytes and possibly not aligned. For files opened + * with O_DIRECT, necessary alignments are ensured before calling + * raw_pwrite_aligned to do the actual write. + */ +static int raw_pwrite(BlockDriverState *bs, int64_t offset, + const uint8_t *buf, int count) +{ + BDRVRawState *s = bs->opaque; + int size, ret, shift, sum; + + sum = 0; + + if (s->aligned_buf != NULL) { + + if (offset & 0x1ff) { + /* align offset on a 512 bytes boundary */ + shift = offset & 0x1ff; + ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512); + if (ret < 0) + return ret; + + size = 512 - shift; + if (size > count) + size = count; + memcpy(s->aligned_buf + shift, buf, size); + + ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512); + if (ret < 0) + return ret; + + buf += size; + offset += size; + count -= size; + sum += size; + + if (count == 0) + return sum; + } + if (count & 0x1ff || (uintptr_t) buf & 0x1ff) { + + while ((size = (count & ~0x1ff)) != 0) { + + if (size > ALIGNED_BUFFER_SIZE) + size = ALIGNED_BUFFER_SIZE; + + memcpy(s->aligned_buf, buf, size); + + ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size); + if (ret < 0) + return ret; + + buf += ret; + offset += ret; + count -= ret; + sum += ret; + } + /* here, count < 512 because (count & ~0x1ff) == 0 */ + if (count) { + ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512); + if (ret < 0) + return ret; + memcpy(s->aligned_buf, buf, count); + + ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512); + if (ret < 0) + return ret; + if (count < ret) + ret = count; + + sum += ret; + } + return sum; + } + } + return raw_pwrite_aligned(bs, offset, buf, count) + sum; +} + +static int raw_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + int ret; + ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512); + if (ret == (nb_sectors * 512)) + ret = 0; + return ret; +} + +#ifdef CONFIG_AIO +/***********************************************************/ +/* Unix AIO using POSIX AIO */ + +typedef struct RawAIOCB { + BlockDriverAIOCB common; + struct qemu_paiocb aiocb; + struct RawAIOCB *next; + int ret; +} RawAIOCB; + +typedef struct PosixAioState +{ + int rfd, wfd; + RawAIOCB *first_aio; +} PosixAioState; + +static void posix_aio_read(void *opaque) +{ + PosixAioState *s = opaque; + RawAIOCB *acb, **pacb; + int ret; + ssize_t len; + + /* read all bytes from signal pipe */ + for (;;) { + char bytes[16]; + + len = read(s->rfd, bytes, sizeof(bytes)); + if (len == -1 && errno == EINTR) + continue; /* try again */ + if (len == sizeof(bytes)) + continue; /* more to read */ + break; + } + + for(;;) { + pacb = &s->first_aio; + for(;;) { + acb = *pacb; + if (!acb) + goto the_end; + ret = qemu_paio_error(&acb->aiocb); + if (ret == ECANCELED) { + /* remove the request */ + *pacb = acb->next; + qemu_aio_release(acb); + } else if (ret != EINPROGRESS) { + /* end of aio */ + if (ret == 0) { + ret = qemu_paio_return(&acb->aiocb); + if (ret == acb->aiocb.aio_nbytes) + ret = 0; + else + ret = -EINVAL; + } else { + ret = -ret; + } + /* remove the request */ + *pacb = acb->next; + /* call the callback */ + acb->common.cb(acb->common.opaque, ret); + qemu_aio_release(acb); + break; + } else { + pacb = &acb->next; + } + } + } + the_end: ; +} + +static int posix_aio_flush(void *opaque) +{ + PosixAioState *s = opaque; + return !!s->first_aio; +} + +static PosixAioState *posix_aio_state; + +static void aio_signal_handler(int signum) +{ + if (posix_aio_state) { + char byte = 0; + + write(posix_aio_state->wfd, &byte, sizeof(byte)); + } + + qemu_service_io(); +} + +static int posix_aio_init(void) +{ + struct sigaction act; + PosixAioState *s; + int fds[2]; + struct qemu_paioinit ai; + + if (posix_aio_state) + return 0; + + s = qemu_malloc(sizeof(PosixAioState)); + + sigfillset(&act.sa_mask); + act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ + act.sa_handler = aio_signal_handler; + sigaction(SIGUSR2, &act, NULL); + + s->first_aio = NULL; + if (pipe(fds) == -1) { + fprintf(stderr, "failed to create pipe\n"); + return -errno; + } + + s->rfd = fds[0]; + s->wfd = fds[1]; + + fcntl(s->rfd, F_SETFL, O_NONBLOCK); + fcntl(s->wfd, F_SETFL, O_NONBLOCK); + + qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s); + + memset(&ai, 0, sizeof(ai)); + ai.aio_threads = 64; + ai.aio_num = 64; + qemu_paio_init(&ai); + + posix_aio_state = s; + + return 0; +} + +static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + RawAIOCB *acb; + + if (fd_open(bs) < 0) + return NULL; + + acb = qemu_aio_get(bs, cb, opaque); + if (!acb) + return NULL; + acb->aiocb.aio_fildes = s->fd; + acb->aiocb.ev_signo = SIGUSR2; + acb->aiocb.aio_iov = qiov->iov; + acb->aiocb.aio_niov = qiov->niov; + acb->aiocb.aio_nbytes = nb_sectors * 512; + acb->aiocb.aio_offset = sector_num * 512; + acb->aiocb.aio_flags = 0; + + /* + * If O_DIRECT is used the buffer needs to be aligned on a sector + * boundary. Tell the low level code to ensure that in case it's + * not done yet. + */ + if (s->aligned_buf) + acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED; + + acb->next = posix_aio_state->first_aio; + posix_aio_state->first_aio = acb; + return acb; +} + +static void raw_aio_remove(RawAIOCB *acb) +{ + RawAIOCB **pacb; + + /* remove the callback from the queue */ + pacb = &posix_aio_state->first_aio; + for(;;) { + if (*pacb == NULL) { + fprintf(stderr, "raw_aio_remove: aio request not found!\n"); + break; + } else if (*pacb == acb) { + *pacb = acb->next; + qemu_aio_release(acb); + break; + } + pacb = &(*pacb)->next; + } +} + +static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + RawAIOCB *acb; + + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); + if (!acb) + return NULL; + if (qemu_paio_read(&acb->aiocb) < 0) { + raw_aio_remove(acb); + return NULL; + } + return &acb->common; +} + +static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + RawAIOCB *acb; + + acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque); + if (!acb) + return NULL; + if (qemu_paio_write(&acb->aiocb) < 0) { + raw_aio_remove(acb); + return NULL; + } + return &acb->common; +} + +static void raw_aio_cancel(BlockDriverAIOCB *blockacb) +{ + int ret; + RawAIOCB *acb = (RawAIOCB *)blockacb; + + ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb); + if (ret == QEMU_PAIO_NOTCANCELED) { + /* fail safe: if the aio could not be canceled, we wait for + it */ + while (qemu_paio_error(&acb->aiocb) == EINPROGRESS); + } + + raw_aio_remove(acb); +} +#else /* CONFIG_AIO */ +static int posix_aio_init(void) +{ + return 0; +} +#endif /* CONFIG_AIO */ + + +static void raw_close(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + if (s->fd >= 0) { + close(s->fd); + s->fd = -1; + if (s->aligned_buf != NULL) + qemu_free(s->aligned_buf); + } +} + +static int raw_truncate(BlockDriverState *bs, int64_t offset) +{ + BDRVRawState *s = bs->opaque; + if (s->type != FTYPE_FILE) + return -ENOTSUP; + if (ftruncate(s->fd, offset) < 0) + return -errno; + return 0; +} + +#ifdef __OpenBSD__ +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int fd = s->fd; + struct stat st; + + if (fstat(fd, &st)) + return -1; + if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { + struct disklabel dl; + + if (ioctl(fd, DIOCGDINFO, &dl)) + return -1; + return (uint64_t)dl.d_secsize * + dl.d_partitions[DISKPART(st.st_rdev)].p_size; + } else + return st.st_size; +} +#else /* !__OpenBSD__ */ +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int fd = s->fd; + int64_t size; +#ifdef HOST_BSD + struct stat sb; +#ifdef __FreeBSD__ + int reopened = 0; +#endif +#endif +#ifdef __sun__ + struct dk_minfo minfo; + int rv; +#endif + int ret; + + ret = fd_open(bs); + if (ret < 0) + return ret; + +#ifdef HOST_BSD +#ifdef __FreeBSD__ +again: +#endif + if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) { +#ifdef DIOCGMEDIASIZE + if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size)) +#elif defined(DIOCGPART) + { + struct partinfo pi; + if (ioctl(fd, DIOCGPART, &pi) == 0) + size = pi.media_size; + else + size = 0; + } + if (size == 0) +#endif +#ifdef CONFIG_COCOA + size = LONG_LONG_MAX; +#else + size = lseek(fd, 0LL, SEEK_END); +#endif +#ifdef __FreeBSD__ + switch(s->type) { + case FTYPE_CD: + /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */ + if (size == 2048LL * (unsigned)-1) + size = 0; + /* XXX no disc? maybe we need to reopen... */ + if (size <= 0 && !reopened && cd_open(bs) >= 0) { + reopened = 1; + goto again; + } + } +#endif + } else +#endif +#ifdef __sun__ + /* + * use the DKIOCGMEDIAINFO ioctl to read the size. + */ + rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo ); + if ( rv != -1 ) { + size = minfo.dki_lbsize * minfo.dki_capacity; + } else /* there are reports that lseek on some devices + fails, but irc discussion said that contingency + on contingency was overkill */ +#endif + { + size = lseek(fd, 0, SEEK_END); + } + return size; +} +#endif + +static int raw_create(const char *filename, int64_t total_size, + const char *backing_file, int flags) +{ + int fd; + + if (flags || backing_file) + return -ENOTSUP; + + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, + 0644); + if (fd < 0) + return -EIO; + ftruncate(fd, total_size * 512); + close(fd); + return 0; +} + +static void raw_flush(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + fsync(s->fd); +} + +static BlockDriver bdrv_raw = { + .format_name = "raw", + .instance_size = sizeof(BDRVRawState), + .bdrv_probe = NULL, /* no probe for protocols */ + .bdrv_open = raw_open, + .bdrv_read = raw_read, + .bdrv_write = raw_write, + .bdrv_close = raw_close, + .bdrv_create = raw_create, + .bdrv_flush = raw_flush, + +#ifdef CONFIG_AIO + .bdrv_aio_readv = raw_aio_readv, + .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_cancel = raw_aio_cancel, + .aiocb_size = sizeof(RawAIOCB), +#endif + + .bdrv_truncate = raw_truncate, + .bdrv_getlength = raw_getlength, +}; + +/***********************************************/ +/* host device */ + +#ifdef CONFIG_COCOA +static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ); +static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ); + +kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ) +{ + kern_return_t kernResult; + mach_port_t masterPort; + CFMutableDictionaryRef classesToMatch; + + kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort ); + if ( KERN_SUCCESS != kernResult ) { + printf( "IOMasterPort returned %d\n", kernResult ); + } + + classesToMatch = IOServiceMatching( kIOCDMediaClass ); + if ( classesToMatch == NULL ) { + printf( "IOServiceMatching returned a NULL dictionary.\n" ); + } else { + CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue ); + } + kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator ); + if ( KERN_SUCCESS != kernResult ) + { + printf( "IOServiceGetMatchingServices returned %d\n", kernResult ); + } + + return kernResult; +} + +kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ) +{ + io_object_t nextMedia; + kern_return_t kernResult = KERN_FAILURE; + *bsdPath = '\0'; + nextMedia = IOIteratorNext( mediaIterator ); + if ( nextMedia ) + { + CFTypeRef bsdPathAsCFString; + bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 ); + if ( bsdPathAsCFString ) { + size_t devPathLength; + strcpy( bsdPath, _PATH_DEV ); + strcat( bsdPath, "r" ); + devPathLength = strlen( bsdPath ); + if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) { + kernResult = KERN_SUCCESS; + } + CFRelease( bsdPathAsCFString ); + } + IOObjectRelease( nextMedia ); + } + + return kernResult; +} + +#endif + +static int hdev_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVRawState *s = bs->opaque; + int fd, open_flags, ret; + + posix_aio_init(); + +#ifdef CONFIG_COCOA + if (strstart(filename, "/dev/cdrom", NULL)) { + kern_return_t kernResult; + io_iterator_t mediaIterator; + char bsdPath[ MAXPATHLEN ]; + int fd; + + kernResult = FindEjectableCDMedia( &mediaIterator ); + kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) ); + + if ( bsdPath[ 0 ] != '\0' ) { + strcat(bsdPath,"s0"); + /* some CDs don't have a partition 0 */ + fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE); + if (fd < 0) { + bsdPath[strlen(bsdPath)-1] = '1'; + } else { + close(fd); + } + filename = bsdPath; + } + + if ( mediaIterator ) + IOObjectRelease( mediaIterator ); + } +#endif + open_flags = O_BINARY; + if ((flags & BDRV_O_ACCESS) == O_RDWR) { + open_flags |= O_RDWR; + } else { + open_flags |= O_RDONLY; + bs->read_only = 1; + } + /* Use O_DSYNC for write-through caching, no flags for write-back caching, + * and O_DIRECT for no caching. */ + if ((flags & BDRV_O_NOCACHE)) + open_flags |= O_DIRECT; + else if (!(flags & BDRV_O_CACHE_WB)) + open_flags |= O_DSYNC; + + s->type = FTYPE_FILE; +#if defined(__linux__) + if (strstart(filename, "/dev/cd", NULL)) { + /* open will not fail even if no CD is inserted */ + open_flags |= O_NONBLOCK; + s->type = FTYPE_CD; + } else if (strstart(filename, "/dev/fd", NULL)) { + s->type = FTYPE_FD; + s->fd_open_flags = open_flags; + /* open will not fail even if no floppy is inserted */ + open_flags |= O_NONBLOCK; +#ifdef CONFIG_AIO + } else if (strstart(filename, "/dev/sg", NULL)) { + bs->sg = 1; +#endif + } +#endif +#if defined(__FreeBSD__) + if (strstart(filename, "/dev/cd", NULL) || + strstart(filename, "/dev/acd", NULL)) { + s->type = FTYPE_CD; + s->cd_open_flags = open_flags; + } +#endif + s->fd = -1; + fd = open(filename, open_flags, 0644); + if (fd < 0) { + ret = -errno; + if (ret == -EROFS) + ret = -EACCES; + return ret; + } + s->fd = fd; +#if defined(__FreeBSD__) + /* make sure the door isnt locked at this time */ + if (s->type == FTYPE_CD) + ioctl (s->fd, CDIOCALLOW); +#endif +#if defined(__linux__) + /* close fd so that we can reopen it as needed */ + if (s->type == FTYPE_FD) { + close(s->fd); + s->fd = -1; + s->fd_media_changed = 1; + } +#endif + return 0; +} + +#if defined(__linux__) +/* Note: we do not have a reliable method to detect if the floppy is + present. The current method is to try to open the floppy at every + I/O and to keep it opened during a few hundreds of ms. */ +static int fd_open(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int last_media_present; + + if (s->type != FTYPE_FD) + return 0; + last_media_present = (s->fd >= 0); + if (s->fd >= 0 && + (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) { + close(s->fd); + s->fd = -1; +#ifdef DEBUG_FLOPPY + printf("Floppy closed\n"); +#endif + } + if (s->fd < 0) { + if (s->fd_got_error && + (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) { +#ifdef DEBUG_FLOPPY + printf("No floppy (open delayed)\n"); +#endif + return -EIO; + } + s->fd = open(bs->filename, s->fd_open_flags); + if (s->fd < 0) { + s->fd_error_time = qemu_get_clock(rt_clock); + s->fd_got_error = 1; + if (last_media_present) + s->fd_media_changed = 1; +#ifdef DEBUG_FLOPPY + printf("No floppy\n"); +#endif + return -EIO; + } +#ifdef DEBUG_FLOPPY + printf("Floppy opened\n"); +#endif + } + if (!last_media_present) + s->fd_media_changed = 1; + s->fd_open_time = qemu_get_clock(rt_clock); + s->fd_got_error = 0; + return 0; +} + +static int raw_is_inserted(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + int ret; + + switch(s->type) { + case FTYPE_CD: + ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT); + if (ret == CDS_DISC_OK) + return 1; + else + return 0; + break; + case FTYPE_FD: + ret = fd_open(bs); + return (ret >= 0); + default: + return 1; + } +} + +/* currently only used by fdc.c, but a CD version would be good too */ +static int raw_media_changed(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + switch(s->type) { + case FTYPE_FD: + { + int ret; + /* XXX: we do not have a true media changed indication. It + does not work if the floppy is changed without trying + to read it */ + fd_open(bs); + ret = s->fd_media_changed; + s->fd_media_changed = 0; +#ifdef DEBUG_FLOPPY + printf("Floppy changed=%d\n", ret); +#endif + return ret; + } + default: + return -ENOTSUP; + } +} + +static int raw_eject(BlockDriverState *bs, int eject_flag) +{ + BDRVRawState *s = bs->opaque; + + switch(s->type) { + case FTYPE_CD: + if (eject_flag) { + if (ioctl (s->fd, CDROMEJECT, NULL) < 0) + perror("CDROMEJECT"); + } else { + if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0) + perror("CDROMEJECT"); + } + break; + case FTYPE_FD: + { + int fd; + if (s->fd >= 0) { + close(s->fd); + s->fd = -1; + } + fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK); + if (fd >= 0) { + if (ioctl(fd, FDEJECT, 0) < 0) + perror("FDEJECT"); + close(fd); + } + } + break; + default: + return -ENOTSUP; + } + return 0; +} + +static int raw_set_locked(BlockDriverState *bs, int locked) +{ + BDRVRawState *s = bs->opaque; + + switch(s->type) { + case FTYPE_CD: + if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) { + /* Note: an error can happen if the distribution automatically + mounts the CD-ROM */ + // perror("CDROM_LOCKDOOR"); + } + break; + default: + return -ENOTSUP; + } + return 0; +} + +static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +{ + BDRVRawState *s = bs->opaque; + + return ioctl(s->fd, req, buf); +} + +#ifdef CONFIG_AIO +static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + RawAIOCB *acb; + + if (fd_open(bs) < 0) + return NULL; + + acb = qemu_aio_get(bs, cb, opaque); + if (!acb) + return NULL; + acb->aiocb.aio_fildes = s->fd; + acb->aiocb.ev_signo = SIGUSR2; + acb->aiocb.aio_offset = 0; + acb->aiocb.aio_flags = 0; + + acb->next = posix_aio_state->first_aio; + posix_aio_state->first_aio = acb; + + acb->aiocb.aio_ioctl_buf = buf; + acb->aiocb.aio_ioctl_cmd = req; + if (qemu_paio_ioctl(&acb->aiocb) < 0) { + raw_aio_remove(acb); + return NULL; + } + + return &acb->common; +} +#endif + +#elif defined(__FreeBSD__) + +static int fd_open(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + /* this is just to ensure s->fd is sane (its called by io ops) */ + if (s->fd >= 0) + return 0; + return -EIO; +} + +static int cd_open(BlockDriverState *bs) +{ +#if defined(__FreeBSD__) + BDRVRawState *s = bs->opaque; + int fd; + + switch(s->type) { + case FTYPE_CD: + /* XXX force reread of possibly changed/newly loaded disc, + * FreeBSD seems to not notice sometimes... */ + if (s->fd >= 0) + close (s->fd); + fd = open(bs->filename, s->cd_open_flags, 0644); + if (fd < 0) { + s->fd = -1; + return -EIO; + } + s->fd = fd; + /* make sure the door isnt locked at this time */ + ioctl (s->fd, CDIOCALLOW); + } +#endif + return 0; +} + +static int raw_is_inserted(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + + switch(s->type) { + case FTYPE_CD: + return (raw_getlength(bs) > 0); + case FTYPE_FD: + /* XXX handle this */ + /* FALLTHRU */ + default: + return 1; + } +} + +static int raw_media_changed(BlockDriverState *bs) +{ + return -ENOTSUP; +} + +static int raw_eject(BlockDriverState *bs, int eject_flag) +{ + BDRVRawState *s = bs->opaque; + + switch(s->type) { + case FTYPE_CD: + if (s->fd < 0) + return -ENOTSUP; + (void) ioctl (s->fd, CDIOCALLOW); + if (eject_flag) { + if (ioctl (s->fd, CDIOCEJECT) < 0) + perror("CDIOCEJECT"); + } else { + if (ioctl (s->fd, CDIOCCLOSE) < 0) + perror("CDIOCCLOSE"); + } + if (cd_open(bs) < 0) + return -ENOTSUP; + break; + case FTYPE_FD: + /* XXX handle this */ + /* FALLTHRU */ + default: + return -ENOTSUP; + } + return 0; +} + +static int raw_set_locked(BlockDriverState *bs, int locked) +{ + BDRVRawState *s = bs->opaque; + + switch(s->type) { + case FTYPE_CD: + if (s->fd < 0) + return -ENOTSUP; + if (ioctl (s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) { + /* Note: an error can happen if the distribution automatically + mounts the CD-ROM */ + // perror("CDROM_LOCKDOOR"); + } + break; + default: + return -ENOTSUP; + } + return 0; +} + +static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +{ + return -ENOTSUP; +} +#else /* !linux && !FreeBSD */ + +static int fd_open(BlockDriverState *bs) +{ + return 0; +} + +static int raw_is_inserted(BlockDriverState *bs) +{ + return 1; +} + +static int raw_media_changed(BlockDriverState *bs) +{ + return -ENOTSUP; +} + +static int raw_eject(BlockDriverState *bs, int eject_flag) +{ + return -ENOTSUP; +} + +static int raw_set_locked(BlockDriverState *bs, int locked) +{ + return -ENOTSUP; +} + +static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +{ + return -ENOTSUP; +} + +static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return NULL; +} +#endif /* !linux && !FreeBSD */ + +#if defined(__linux__) || defined(__FreeBSD__) +static int hdev_create(const char *filename, int64_t total_size, + const char *backing_file, int flags) +{ + int fd; + int ret = 0; + struct stat stat_buf; + + if (flags || backing_file) + return -ENOTSUP; + + fd = open(filename, O_WRONLY | O_BINARY); + if (fd < 0) + return -EIO; + + if (fstat(fd, &stat_buf) < 0) + ret = -EIO; + else if (!S_ISBLK(stat_buf.st_mode)) + ret = -EIO; + else if (lseek(fd, 0, SEEK_END) < total_size * 512) + ret = -ENOSPC; + + close(fd); + return ret; +} + +#else /* !(linux || freebsd) */ + +static int hdev_create(const char *filename, int64_t total_size, + const char *backing_file, int flags) +{ + return -ENOTSUP; +} +#endif + +static BlockDriver bdrv_host_device = { + .format_name = "host_device", + .instance_size = sizeof(BDRVRawState), + .bdrv_open = hdev_open, + .bdrv_close = raw_close, + .bdrv_create = hdev_create, + .bdrv_flush = raw_flush, + +#ifdef CONFIG_AIO + .bdrv_aio_readv = raw_aio_readv, + .bdrv_aio_writev = raw_aio_writev, + .bdrv_aio_cancel = raw_aio_cancel, + .aiocb_size = sizeof(RawAIOCB), +#endif + + .bdrv_read = raw_read, + .bdrv_write = raw_write, + .bdrv_getlength = raw_getlength, + + /* removable device support */ + .bdrv_is_inserted = raw_is_inserted, + .bdrv_media_changed = raw_media_changed, + .bdrv_eject = raw_eject, + .bdrv_set_locked = raw_set_locked, + /* generic scsi device */ + .bdrv_ioctl = raw_ioctl, +#ifdef CONFIG_AIO + .bdrv_aio_ioctl = raw_aio_ioctl, +#endif +}; + +static void bdrv_raw_init(void) +{ + bdrv_register(&bdrv_raw); + bdrv_register(&bdrv_host_device); +} + +block_init(bdrv_raw_init); diff --git a/block/raw-win32.c b/block/raw-win32.c new file mode 100644 index 000000000..ab3abd636 --- /dev/null +++ b/block/raw-win32.c @@ -0,0 +1,394 @@ +/* + * Block driver for RAW files (win32) + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu-timer.h" +#include "block_int.h" +#include "module.h" +#include +#include + +#define FTYPE_FILE 0 +#define FTYPE_CD 1 +#define FTYPE_HARDDISK 2 + +typedef struct BDRVRawState { + HANDLE hfile; + int type; + char drive_path[16]; /* format: "d:\" */ +} BDRVRawState; + +int qemu_ftruncate64(int fd, int64_t length) +{ + LARGE_INTEGER li; + LONG high; + HANDLE h; + BOOL res; + + if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0) + return -1; + + h = (HANDLE)_get_osfhandle(fd); + + /* get current position, ftruncate do not change position */ + li.HighPart = 0; + li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT); + if (li.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR) + return -1; + + high = length >> 32; + if (!SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN)) + return -1; + res = SetEndOfFile(h); + + /* back to old position */ + SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN); + return res ? 0 : -1; +} + +static int set_sparse(int fd) +{ + DWORD returned; + return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE, + NULL, 0, NULL, 0, &returned, NULL); +} + +static int raw_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVRawState *s = bs->opaque; + int access_flags, create_flags; + DWORD overlapped; + + s->type = FTYPE_FILE; + + if ((flags & BDRV_O_ACCESS) == O_RDWR) { + access_flags = GENERIC_READ | GENERIC_WRITE; + } else { + access_flags = GENERIC_READ; + } + if (flags & BDRV_O_CREAT) { + create_flags = CREATE_ALWAYS; + } else { + create_flags = OPEN_EXISTING; + } + overlapped = FILE_ATTRIBUTE_NORMAL; + if ((flags & BDRV_O_NOCACHE)) + overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; + else if (!(flags & BDRV_O_CACHE_WB)) + overlapped |= FILE_FLAG_WRITE_THROUGH; + s->hfile = CreateFile(filename, access_flags, + FILE_SHARE_READ, NULL, + create_flags, overlapped, NULL); + if (s->hfile == INVALID_HANDLE_VALUE) { + int err = GetLastError(); + + if (err == ERROR_ACCESS_DENIED) + return -EACCES; + return -1; + } + return 0; +} + +static int raw_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVRawState *s = bs->opaque; + OVERLAPPED ov; + DWORD ret_count; + int ret; + int64_t offset = sector_num * 512; + int count = nb_sectors * 512; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = offset; + ov.OffsetHigh = offset >> 32; + ret = ReadFile(s->hfile, buf, count, &ret_count, &ov); + if (!ret) + return ret_count; + if (ret_count == count) + ret_count = 0; + return ret_count; +} + +static int raw_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVRawState *s = bs->opaque; + OVERLAPPED ov; + DWORD ret_count; + int ret; + int64_t offset = sector_num * 512; + int count = nb_sectors * 512; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = offset; + ov.OffsetHigh = offset >> 32; + ret = WriteFile(s->hfile, buf, count, &ret_count, &ov); + if (!ret) + return ret_count; + if (ret_count == count) + ret_count = 0; + return ret_count; +} + +static void raw_flush(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + FlushFileBuffers(s->hfile); +} + +static void raw_close(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + CloseHandle(s->hfile); +} + +static int raw_truncate(BlockDriverState *bs, int64_t offset) +{ + BDRVRawState *s = bs->opaque; + LONG low, high; + + low = offset; + high = offset >> 32; + if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN)) + return -EIO; + if (!SetEndOfFile(s->hfile)) + return -EIO; + return 0; +} + +static int64_t raw_getlength(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + LARGE_INTEGER l; + ULARGE_INTEGER available, total, total_free; + DISK_GEOMETRY_EX dg; + DWORD count; + BOOL status; + + switch(s->type) { + case FTYPE_FILE: + l.LowPart = GetFileSize(s->hfile, (PDWORD)&l.HighPart); + if (l.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR) + return -EIO; + break; + case FTYPE_CD: + if (!GetDiskFreeSpaceEx(s->drive_path, &available, &total, &total_free)) + return -EIO; + l.QuadPart = total.QuadPart; + break; + case FTYPE_HARDDISK: + status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, + NULL, 0, &dg, sizeof(dg), &count, NULL); + if (status != 0) { + l = dg.DiskSize; + } + break; + default: + return -EIO; + } + return l.QuadPart; +} + +static int raw_create(const char *filename, int64_t total_size, + const char *backing_file, int flags) +{ + int fd; + + if (flags || backing_file) + return -ENOTSUP; + + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, + 0644); + if (fd < 0) + return -EIO; + set_sparse(fd); + ftruncate(fd, total_size * 512); + close(fd); + return 0; +} + +static BlockDriver bdrv_raw = { + .format_name = "raw", + .instance_size = sizeof(BDRVRawState), + .bdrv_open = raw_open, + .bdrv_close = raw_close, + .bdrv_create = raw_create, + .bdrv_flush = raw_flush, + .bdrv_read = raw_read, + .bdrv_write = raw_write, + .bdrv_truncate = raw_truncate, + .bdrv_getlength = raw_getlength, +}; + +/***********************************************/ +/* host device */ + +static int find_cdrom(char *cdrom_name, int cdrom_name_size) +{ + char drives[256], *pdrv = drives; + UINT type; + + memset(drives, 0, sizeof(drives)); + GetLogicalDriveStrings(sizeof(drives), drives); + while(pdrv[0] != '\0') { + type = GetDriveType(pdrv); + switch(type) { + case DRIVE_CDROM: + snprintf(cdrom_name, cdrom_name_size, "\\\\.\\%c:", pdrv[0]); + return 0; + break; + } + pdrv += lstrlen(pdrv) + 1; + } + return -1; +} + +static int find_device_type(BlockDriverState *bs, const char *filename) +{ + BDRVRawState *s = bs->opaque; + UINT type; + const char *p; + + if (strstart(filename, "\\\\.\\", &p) || + strstart(filename, "//./", &p)) { + if (stristart(p, "PhysicalDrive", NULL)) + return FTYPE_HARDDISK; + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", p[0]); + type = GetDriveType(s->drive_path); + switch (type) { + case DRIVE_REMOVABLE: + case DRIVE_FIXED: + return FTYPE_HARDDISK; + case DRIVE_CDROM: + return FTYPE_CD; + default: + return FTYPE_FILE; + } + } else { + return FTYPE_FILE; + } +} + +static int hdev_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVRawState *s = bs->opaque; + int access_flags, create_flags; + DWORD overlapped; + char device_name[64]; + + if (strstart(filename, "/dev/cdrom", NULL)) { + if (find_cdrom(device_name, sizeof(device_name)) < 0) + return -ENOENT; + filename = device_name; + } else { + /* transform drive letters into device name */ + if (((filename[0] >= 'a' && filename[0] <= 'z') || + (filename[0] >= 'A' && filename[0] <= 'Z')) && + filename[1] == ':' && filename[2] == '\0') { + snprintf(device_name, sizeof(device_name), "\\\\.\\%c:", filename[0]); + filename = device_name; + } + } + s->type = find_device_type(bs, filename); + + if ((flags & BDRV_O_ACCESS) == O_RDWR) { + access_flags = GENERIC_READ | GENERIC_WRITE; + } else { + access_flags = GENERIC_READ; + } + create_flags = OPEN_EXISTING; + + overlapped = FILE_ATTRIBUTE_NORMAL; + if ((flags & BDRV_O_NOCACHE)) + overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; + else if (!(flags & BDRV_O_CACHE_WB)) + overlapped |= FILE_FLAG_WRITE_THROUGH; + s->hfile = CreateFile(filename, access_flags, + FILE_SHARE_READ, NULL, + create_flags, overlapped, NULL); + if (s->hfile == INVALID_HANDLE_VALUE) { + int err = GetLastError(); + + if (err == ERROR_ACCESS_DENIED) + return -EACCES; + return -1; + } + return 0; +} + +#if 0 +/***********************************************/ +/* removable device additional commands */ + +static int raw_is_inserted(BlockDriverState *bs) +{ + return 1; +} + +static int raw_media_changed(BlockDriverState *bs) +{ + return -ENOTSUP; +} + +static int raw_eject(BlockDriverState *bs, int eject_flag) +{ + DWORD ret_count; + + if (s->type == FTYPE_FILE) + return -ENOTSUP; + if (eject_flag) { + DeviceIoControl(s->hfile, IOCTL_STORAGE_EJECT_MEDIA, + NULL, 0, NULL, 0, &lpBytesReturned, NULL); + } else { + DeviceIoControl(s->hfile, IOCTL_STORAGE_LOAD_MEDIA, + NULL, 0, NULL, 0, &lpBytesReturned, NULL); + } +} + +static int raw_set_locked(BlockDriverState *bs, int locked) +{ + return -ENOTSUP; +} +#endif + +static BlockDriver bdrv_host_device = { + .format_name = "host_device", + .instance_size = sizeof(BDRVRawState), + .bdrv_open = hdev_open, + .bdrv_close = raw_close, + .bdrv_flush = raw_flush, + + .bdrv_read = raw_read, + .bdrv_write = raw_write, + .bdrv_getlength = raw_getlength, +}; + +static void bdrv_raw_init(void) +{ + bdrv_register(&bdrv_raw); + bdrv_register(&bdrv_host_device); + return 0; +} + +block_init(bdrv_raw_init); diff --git a/block/vmdk.c b/block/vmdk.c new file mode 100644 index 000000000..13866e9b0 --- /dev/null +++ b/block/vmdk.c @@ -0,0 +1,833 @@ +/* + * Block driver for the VMDK format + * + * Copyright (c) 2004 Fabrice Bellard + * Copyright (c) 2005 Filip Navara + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" + +#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D') +#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V') + +typedef struct { + uint32_t version; + uint32_t flags; + uint32_t disk_sectors; + uint32_t granularity; + uint32_t l1dir_offset; + uint32_t l1dir_size; + uint32_t file_sectors; + uint32_t cylinders; + uint32_t heads; + uint32_t sectors_per_track; +} VMDK3Header; + +typedef struct { + uint32_t version; + uint32_t flags; + int64_t capacity; + int64_t granularity; + int64_t desc_offset; + int64_t desc_size; + int32_t num_gtes_per_gte; + int64_t rgd_offset; + int64_t gd_offset; + int64_t grain_offset; + char filler[1]; + char check_bytes[4]; +} __attribute__((packed)) VMDK4Header; + +#define L2_CACHE_SIZE 16 + +typedef struct BDRVVmdkState { + BlockDriverState *hd; + int64_t l1_table_offset; + int64_t l1_backup_table_offset; + uint32_t *l1_table; + uint32_t *l1_backup_table; + unsigned int l1_size; + uint32_t l1_entry_sectors; + + unsigned int l2_size; + uint32_t *l2_cache; + uint32_t l2_cache_offsets[L2_CACHE_SIZE]; + uint32_t l2_cache_counts[L2_CACHE_SIZE]; + + unsigned int cluster_sectors; + uint32_t parent_cid; + int is_parent; +} BDRVVmdkState; + +typedef struct VmdkMetaData { + uint32_t offset; + unsigned int l1_index; + unsigned int l2_index; + unsigned int l2_offset; + int valid; +} VmdkMetaData; + +typedef struct ActiveBDRVState{ + BlockDriverState *hd; // active image handler + uint64_t cluster_offset; // current write offset +}ActiveBDRVState; + +static ActiveBDRVState activeBDRV; + + +static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + uint32_t magic; + + if (buf_size < 4) + return 0; + magic = be32_to_cpu(*(uint32_t *)buf); + if (magic == VMDK3_MAGIC || + magic == VMDK4_MAGIC) + return 100; + else + return 0; +} + +#define CHECK_CID 1 + +#define SECTOR_SIZE 512 +#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each +#define HEADER_SIZE 512 // first sector of 512 bytes + +static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) +{ + BDRVVmdkState *s = bs->opaque; + char desc[DESC_SIZE]; + uint32_t cid; + const char *p_name, *cid_str; + size_t cid_str_size; + + /* the descriptor offset = 0x200 */ + if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) + return 0; + + if (parent) { + cid_str = "parentCID"; + cid_str_size = sizeof("parentCID"); + } else { + cid_str = "CID"; + cid_str_size = sizeof("CID"); + } + + if ((p_name = strstr(desc,cid_str)) != NULL) { + p_name += cid_str_size; + sscanf(p_name,"%x",&cid); + } + + return cid; +} + +static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) +{ + BDRVVmdkState *s = bs->opaque; + char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; + char *p_name, *tmp_str; + + /* the descriptor offset = 0x200 */ + if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) + return -1; + + tmp_str = strstr(desc,"parentCID"); + pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); + if ((p_name = strstr(desc,"CID")) != NULL) { + p_name += sizeof("CID"); + snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid); + pstrcat(desc, sizeof(desc), tmp_desc); + } + + if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) + return -1; + return 0; +} + +static int vmdk_is_cid_valid(BlockDriverState *bs) +{ +#ifdef CHECK_CID + BDRVVmdkState *s = bs->opaque; + BlockDriverState *p_bs = s->hd->backing_hd; + uint32_t cur_pcid; + + if (p_bs) { + cur_pcid = vmdk_read_cid(p_bs,0); + if (s->parent_cid != cur_pcid) + // CID not valid + return 0; + } +#endif + // CID valid + return 1; +} + +static int vmdk_snapshot_create(const char *filename, const char *backing_file) +{ + int snp_fd, p_fd; + uint32_t p_cid; + char *p_name, *gd_buf, *rgd_buf; + const char *real_filename, *temp_str; + VMDK4Header header; + uint32_t gde_entries, gd_size; + int64_t gd_offset, rgd_offset, capacity, gt_size; + char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE]; + static const char desc_template[] = + "# Disk DescriptorFile\n" + "version=1\n" + "CID=%x\n" + "parentCID=%x\n" + "createType=\"monolithicSparse\"\n" + "parentFileNameHint=\"%s\"\n" + "\n" + "# Extent description\n" + "RW %u SPARSE \"%s\"\n" + "\n" + "# The Disk Data Base \n" + "#DDB\n" + "\n"; + + snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644); + if (snp_fd < 0) + return -1; + p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE); + if (p_fd < 0) { + close(snp_fd); + return -1; + } + + /* read the header */ + if (lseek(p_fd, 0x0, SEEK_SET) == -1) + goto fail; + if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) + goto fail; + + /* write the header */ + if (lseek(snp_fd, 0x0, SEEK_SET) == -1) + goto fail; + if (write(snp_fd, hdr, HEADER_SIZE) == -1) + goto fail; + + memset(&header, 0, sizeof(header)); + memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC + + ftruncate(snp_fd, header.grain_offset << 9); + /* the descriptor offset = 0x200 */ + if (lseek(p_fd, 0x200, SEEK_SET) == -1) + goto fail; + if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) + goto fail; + + if ((p_name = strstr(p_desc,"CID")) != NULL) { + p_name += sizeof("CID"); + sscanf(p_name,"%x",&p_cid); + } + + real_filename = filename; + if ((temp_str = strrchr(real_filename, '\\')) != NULL) + real_filename = temp_str + 1; + if ((temp_str = strrchr(real_filename, '/')) != NULL) + real_filename = temp_str + 1; + if ((temp_str = strrchr(real_filename, ':')) != NULL) + real_filename = temp_str + 1; + + snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file, + (uint32_t)header.capacity, real_filename); + + /* write the descriptor */ + if (lseek(snp_fd, 0x200, SEEK_SET) == -1) + goto fail; + if (write(snp_fd, s_desc, strlen(s_desc)) == -1) + goto fail; + + gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table + rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table + capacity = header.capacity * SECTOR_SIZE; // Extent size + /* + * Each GDE span 32M disk, means: + * 512 GTE per GT, each GTE points to grain + */ + gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE; + if (!gt_size) + goto fail; + gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde + gd_size = gde_entries * sizeof(uint32_t); + + /* write RGD */ + rgd_buf = qemu_malloc(gd_size); + if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) + goto fail_rgd; + if (read(p_fd, rgd_buf, gd_size) != gd_size) + goto fail_rgd; + if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) + goto fail_rgd; + if (write(snp_fd, rgd_buf, gd_size) == -1) + goto fail_rgd; + qemu_free(rgd_buf); + + /* write GD */ + gd_buf = qemu_malloc(gd_size); + if (lseek(p_fd, gd_offset, SEEK_SET) == -1) + goto fail_gd; + if (read(p_fd, gd_buf, gd_size) != gd_size) + goto fail_gd; + if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) + goto fail_gd; + if (write(snp_fd, gd_buf, gd_size) == -1) + goto fail_gd; + qemu_free(gd_buf); + + close(p_fd); + close(snp_fd); + return 0; + + fail_gd: + qemu_free(gd_buf); + fail_rgd: + qemu_free(rgd_buf); + fail: + close(p_fd); + close(snp_fd); + return -1; +} + +static void vmdk_parent_close(BlockDriverState *bs) +{ + if (bs->backing_hd) + bdrv_close(bs->backing_hd); +} + +static int parent_open = 0; +static int vmdk_parent_open(BlockDriverState *bs, const char * filename) +{ + BDRVVmdkState *s = bs->opaque; + char *p_name; + char desc[DESC_SIZE]; + char parent_img_name[1024]; + + /* the descriptor offset = 0x200 */ + if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE) + return -1; + + if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) { + char *end_name; + struct stat file_buf; + + p_name += sizeof("parentFileNameHint") + 1; + if ((end_name = strchr(p_name,'\"')) == NULL) + return -1; + if ((end_name - p_name) > sizeof (s->hd->backing_file) - 1) + return -1; + + pstrcpy(s->hd->backing_file, end_name - p_name + 1, p_name); + if (stat(s->hd->backing_file, &file_buf) != 0) { + path_combine(parent_img_name, sizeof(parent_img_name), + filename, s->hd->backing_file); + } else { + pstrcpy(parent_img_name, sizeof(parent_img_name), + s->hd->backing_file); + } + + s->hd->backing_hd = bdrv_new(""); + if (!s->hd->backing_hd) { + failure: + bdrv_close(s->hd); + return -1; + } + parent_open = 1; + if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0) + goto failure; + parent_open = 0; + } + + return 0; +} + +static int vmdk_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVVmdkState *s = bs->opaque; + uint32_t magic; + int l1_size, i, ret; + + if (parent_open) + // Parent must be opened as RO. + flags = BDRV_O_RDONLY; + + ret = bdrv_file_open(&s->hd, filename, flags); + if (ret < 0) + return ret; + if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic)) + goto fail; + + magic = be32_to_cpu(magic); + if (magic == VMDK3_MAGIC) { + VMDK3Header header; + + if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header)) + goto fail; + s->cluster_sectors = le32_to_cpu(header.granularity); + s->l2_size = 1 << 9; + s->l1_size = 1 << 6; + bs->total_sectors = le32_to_cpu(header.disk_sectors); + s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9; + s->l1_backup_table_offset = 0; + s->l1_entry_sectors = s->l2_size * s->cluster_sectors; + } else if (magic == VMDK4_MAGIC) { + VMDK4Header header; + + if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header)) + goto fail; + bs->total_sectors = le64_to_cpu(header.capacity); + s->cluster_sectors = le64_to_cpu(header.granularity); + s->l2_size = le32_to_cpu(header.num_gtes_per_gte); + s->l1_entry_sectors = s->l2_size * s->cluster_sectors; + if (s->l1_entry_sectors <= 0) + goto fail; + s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1) + / s->l1_entry_sectors; + s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9; + s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9; + + if (parent_open) + s->is_parent = 1; + else + s->is_parent = 0; + + // try to open parent images, if exist + if (vmdk_parent_open(bs, filename) != 0) + goto fail; + // write the CID once after the image creation + s->parent_cid = vmdk_read_cid(bs,1); + } else { + goto fail; + } + + /* read the L1 table */ + l1_size = s->l1_size * sizeof(uint32_t); + s->l1_table = qemu_malloc(l1_size); + if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size) + goto fail; + for(i = 0; i < s->l1_size; i++) { + le32_to_cpus(&s->l1_table[i]); + } + + if (s->l1_backup_table_offset) { + s->l1_backup_table = qemu_malloc(l1_size); + if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size) + goto fail; + for(i = 0; i < s->l1_size; i++) { + le32_to_cpus(&s->l1_backup_table[i]); + } + } + + s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t)); + return 0; + fail: + qemu_free(s->l1_backup_table); + qemu_free(s->l1_table); + qemu_free(s->l2_cache); + bdrv_delete(s->hd); + return -1; +} + +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, + uint64_t offset, int allocate); + +static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset, + uint64_t offset, int allocate) +{ + uint64_t parent_cluster_offset; + BDRVVmdkState *s = bs->opaque; + uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB + + // we will be here if it's first write on non-exist grain(cluster). + // try to read from parent image, if exist + if (s->hd->backing_hd) { + BDRVVmdkState *ps = s->hd->backing_hd->opaque; + + if (!vmdk_is_cid_valid(bs)) + return -1; + + parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate); + + if (parent_cluster_offset) { + BDRVVmdkState *act_s = activeBDRV.hd->opaque; + + if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512) + return -1; + + //Write grain only into the active image + if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain)) + return -1; + } + } + return 0; +} + +static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data) +{ + BDRVVmdkState *s = bs->opaque; + + /* update L2 table */ + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) + return -1; + /* update backup L2 table */ + if (s->l1_backup_table_offset != 0) { + m_data->l2_offset = s->l1_backup_table[m_data->l1_index]; + if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)), + &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset)) + return -1; + } + + return 0; +} + +static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data, + uint64_t offset, int allocate) +{ + BDRVVmdkState *s = bs->opaque; + unsigned int l1_index, l2_offset, l2_index; + int min_index, i, j; + uint32_t min_count, *l2_table, tmp = 0; + uint64_t cluster_offset; + + if (m_data) + m_data->valid = 0; + + l1_index = (offset >> 9) / s->l1_entry_sectors; + if (l1_index >= s->l1_size) + return 0; + l2_offset = s->l1_table[l1_index]; + if (!l2_offset) + return 0; + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (l2_offset == s->l2_cache_offsets[i]) { + /* increment the hit count */ + if (++s->l2_cache_counts[i] == 0xffffffff) { + for(j = 0; j < L2_CACHE_SIZE; j++) { + s->l2_cache_counts[j] >>= 1; + } + } + l2_table = s->l2_cache + (i * s->l2_size); + goto found; + } + } + /* not found: load a new entry in the least used one */ + min_index = 0; + min_count = 0xffffffff; + for(i = 0; i < L2_CACHE_SIZE; i++) { + if (s->l2_cache_counts[i] < min_count) { + min_count = s->l2_cache_counts[i]; + min_index = i; + } + } + l2_table = s->l2_cache + (min_index * s->l2_size); + if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) != + s->l2_size * sizeof(uint32_t)) + return 0; + + s->l2_cache_offsets[min_index] = l2_offset; + s->l2_cache_counts[min_index] = 1; + found: + l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size; + cluster_offset = le32_to_cpu(l2_table[l2_index]); + + if (!cluster_offset) { + if (!allocate) + return 0; + // Avoid the L2 tables update for the images that have snapshots. + if (!s->is_parent) { + cluster_offset = bdrv_getlength(s->hd); + bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9)); + + cluster_offset >>= 9; + tmp = cpu_to_le32(cluster_offset); + l2_table[l2_index] = tmp; + // Save the active image state + activeBDRV.cluster_offset = cluster_offset; + activeBDRV.hd = bs; + } + /* First of all we write grain itself, to avoid race condition + * that may to corrupt the image. + * This problem may occur because of insufficient space on host disk + * or inappropriate VM shutdown. + */ + if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1) + return 0; + + if (m_data) { + m_data->offset = tmp; + m_data->l1_index = l1_index; + m_data->l2_index = l2_index; + m_data->l2_offset = l2_offset; + m_data->valid = 1; + } + } + cluster_offset <<= 9; + return cluster_offset; +} + +static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + BDRVVmdkState *s = bs->opaque; + int index_in_cluster, n; + uint64_t cluster_offset; + + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); + index_in_cluster = sector_num % s->cluster_sectors; + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + *pnum = n; + return (cluster_offset != 0); +} + +static int vmdk_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVVmdkState *s = bs->opaque; + int index_in_cluster, n, ret; + uint64_t cluster_offset; + + while (nb_sectors > 0) { + cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0); + index_in_cluster = sector_num % s->cluster_sectors; + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + if (!cluster_offset) { + // try to read from parent image, if exist + if (s->hd->backing_hd) { + if (!vmdk_is_cid_valid(bs)) + return -1; + ret = bdrv_read(s->hd->backing_hd, sector_num, buf, n); + if (ret < 0) + return -1; + } else { + memset(buf, 0, 512 * n); + } + } else { + if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) + return -1; + } + nb_sectors -= n; + sector_num += n; + buf += n * 512; + } + return 0; +} + +static int vmdk_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVVmdkState *s = bs->opaque; + VmdkMetaData m_data; + int index_in_cluster, n; + uint64_t cluster_offset; + static int cid_update = 0; + + if (sector_num > bs->total_sectors) { + fprintf(stderr, + "(VMDK) Wrong offset: sector_num=0x%" PRIx64 + " total_sectors=0x%" PRIx64 "\n", + sector_num, bs->total_sectors); + return -1; + } + + while (nb_sectors > 0) { + index_in_cluster = sector_num & (s->cluster_sectors - 1); + n = s->cluster_sectors - index_in_cluster; + if (n > nb_sectors) + n = nb_sectors; + cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1); + if (!cluster_offset) + return -1; + + if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512) + return -1; + if (m_data.valid) { + /* update L2 tables */ + if (vmdk_L2update(bs, &m_data) == -1) + return -1; + } + nb_sectors -= n; + sector_num += n; + buf += n * 512; + + // update CID on the first write every time the virtual disk is opened + if (!cid_update) { + vmdk_write_cid(bs, time(NULL)); + cid_update++; + } + } + return 0; +} + +static int vmdk_create(const char *filename, int64_t total_size, + const char *backing_file, int flags) +{ + int fd, i; + VMDK4Header header; + uint32_t tmp, magic, grains, gd_size, gt_size, gt_count; + static const char desc_template[] = + "# Disk DescriptorFile\n" + "version=1\n" + "CID=%x\n" + "parentCID=ffffffff\n" + "createType=\"monolithicSparse\"\n" + "\n" + "# Extent description\n" + "RW %" PRId64 " SPARSE \"%s\"\n" + "\n" + "# The Disk Data Base \n" + "#DDB\n" + "\n" + "ddb.virtualHWVersion = \"%d\"\n" + "ddb.geometry.cylinders = \"%" PRId64 "\"\n" + "ddb.geometry.heads = \"16\"\n" + "ddb.geometry.sectors = \"63\"\n" + "ddb.adapterType = \"ide\"\n"; + char desc[1024]; + const char *real_filename, *temp_str; + + /* XXX: add support for backing file */ + if (backing_file) { + return vmdk_snapshot_create(filename, backing_file); + } + + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, + 0644); + if (fd < 0) + return -1; + magic = cpu_to_be32(VMDK4_MAGIC); + memset(&header, 0, sizeof(header)); + header.version = cpu_to_le32(1); + header.flags = cpu_to_le32(3); /* ?? */ + header.capacity = cpu_to_le64(total_size); + header.granularity = cpu_to_le64(128); + header.num_gtes_per_gte = cpu_to_le32(512); + + grains = (total_size + header.granularity - 1) / header.granularity; + gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9; + gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte; + gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9; + + header.desc_offset = 1; + header.desc_size = 20; + header.rgd_offset = header.desc_offset + header.desc_size; + header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count); + header.grain_offset = + ((header.gd_offset + gd_size + (gt_size * gt_count) + + header.granularity - 1) / header.granularity) * + header.granularity; + + header.desc_offset = cpu_to_le64(header.desc_offset); + header.desc_size = cpu_to_le64(header.desc_size); + header.rgd_offset = cpu_to_le64(header.rgd_offset); + header.gd_offset = cpu_to_le64(header.gd_offset); + header.grain_offset = cpu_to_le64(header.grain_offset); + + header.check_bytes[0] = 0xa; + header.check_bytes[1] = 0x20; + header.check_bytes[2] = 0xd; + header.check_bytes[3] = 0xa; + + /* write all the data */ + write(fd, &magic, sizeof(magic)); + write(fd, &header, sizeof(header)); + + ftruncate(fd, header.grain_offset << 9); + + /* write grain directory */ + lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET); + for (i = 0, tmp = header.rgd_offset + gd_size; + i < gt_count; i++, tmp += gt_size) + write(fd, &tmp, sizeof(tmp)); + + /* write backup grain directory */ + lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET); + for (i = 0, tmp = header.gd_offset + gd_size; + i < gt_count; i++, tmp += gt_size) + write(fd, &tmp, sizeof(tmp)); + + /* compose the descriptor */ + real_filename = filename; + if ((temp_str = strrchr(real_filename, '\\')) != NULL) + real_filename = temp_str + 1; + if ((temp_str = strrchr(real_filename, '/')) != NULL) + real_filename = temp_str + 1; + if ((temp_str = strrchr(real_filename, ':')) != NULL) + real_filename = temp_str + 1; + snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL), + total_size, real_filename, + (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4), + total_size / (int64_t)(63 * 16)); + + /* write the descriptor */ + lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET); + write(fd, desc, strlen(desc)); + + close(fd); + return 0; +} + +static void vmdk_close(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + + qemu_free(s->l1_table); + qemu_free(s->l2_cache); + // try to close parent image, if exist + vmdk_parent_close(s->hd); + bdrv_delete(s->hd); +} + +static void vmdk_flush(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + bdrv_flush(s->hd); +} + +static BlockDriver bdrv_vmdk = { + .format_name = "vmdk", + .instance_size = sizeof(BDRVVmdkState), + .bdrv_probe = vmdk_probe, + .bdrv_open = vmdk_open, + .bdrv_read = vmdk_read, + .bdrv_write = vmdk_write, + .bdrv_close = vmdk_close, + .bdrv_create = vmdk_create, + .bdrv_flush = vmdk_flush, + .bdrv_is_allocated = vmdk_is_allocated, +}; + +static void bdrv_vmdk_init(void) +{ + bdrv_register(&bdrv_vmdk); +} + +block_init(bdrv_vmdk_init); diff --git a/block/vpc.c b/block/vpc.c new file mode 100644 index 000000000..211ae5c72 --- /dev/null +++ b/block/vpc.c @@ -0,0 +1,606 @@ +/* + * Block driver for Conectix/Microsoft Virtual PC images + * + * Copyright (c) 2005 Alex Beregszaszi + * Copyright (c) 2009 Kevin Wolf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" + +/**************************************************************/ + +#define HEADER_SIZE 512 + +//#define CACHE + +enum vhd_type { + VHD_FIXED = 2, + VHD_DYNAMIC = 3, + VHD_DIFFERENCING = 4, +}; + +// Seconds since Jan 1, 2000 0:00:00 (UTC) +#define VHD_TIMESTAMP_BASE 946684800 + +// always big-endian +struct vhd_footer { + char creator[8]; // "conectix" + uint32_t features; + uint32_t version; + + // Offset of next header structure, 0xFFFFFFFF if none + uint64_t data_offset; + + // Seconds since Jan 1, 2000 0:00:00 (UTC) + uint32_t timestamp; + + char creator_app[4]; // "vpc " + uint16_t major; + uint16_t minor; + char creator_os[4]; // "Wi2k" + + uint64_t orig_size; + uint64_t size; + + uint16_t cyls; + uint8_t heads; + uint8_t secs_per_cyl; + + uint32_t type; + + // Checksum of the Hard Disk Footer ("one's complement of the sum of all + // the bytes in the footer without the checksum field") + uint32_t checksum; + + // UUID used to identify a parent hard disk (backing file) + uint8_t uuid[16]; + + uint8_t in_saved_state; +}; + +struct vhd_dyndisk_header { + char magic[8]; // "cxsparse" + + // Offset of next header structure, 0xFFFFFFFF if none + uint64_t data_offset; + + // Offset of the Block Allocation Table (BAT) + uint64_t table_offset; + + uint32_t version; + uint32_t max_table_entries; // 32bit/entry + + // 2 MB by default, must be a power of two + uint32_t block_size; + + uint32_t checksum; + uint8_t parent_uuid[16]; + uint32_t parent_timestamp; + uint32_t reserved; + + // Backing file name (in UTF-16) + uint8_t parent_name[512]; + + struct { + uint32_t platform; + uint32_t data_space; + uint32_t data_length; + uint32_t reserved; + uint64_t data_offset; + } parent_locator[8]; +}; + +typedef struct BDRVVPCState { + BlockDriverState *hd; + + uint8_t footer_buf[HEADER_SIZE]; + uint64_t free_data_block_offset; + int max_table_entries; + uint32_t *pagetable; + uint64_t bat_offset; + uint64_t last_bitmap_offset; + + uint32_t block_size; + uint32_t bitmap_size; + +#ifdef CACHE + uint8_t *pageentry_u8; + uint32_t *pageentry_u32; + uint16_t *pageentry_u16; + + uint64_t last_bitmap; +#endif +} BDRVVPCState; + +static uint32_t vpc_checksum(uint8_t* buf, size_t size) +{ + uint32_t res = 0; + int i; + + for (i = 0; i < size; i++) + res += buf[i]; + + return ~res; +} + + +static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) +{ + if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) + return 100; + return 0; +} + +static int vpc_open(BlockDriverState *bs, const char *filename, int flags) +{ + BDRVVPCState *s = bs->opaque; + int ret, i; + struct vhd_footer* footer; + struct vhd_dyndisk_header* dyndisk_header; + uint8_t buf[HEADER_SIZE]; + uint32_t checksum; + + ret = bdrv_file_open(&s->hd, filename, flags); + if (ret < 0) + return ret; + + if (bdrv_pread(s->hd, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE) + goto fail; + + footer = (struct vhd_footer*) s->footer_buf; + if (strncmp(footer->creator, "conectix", 8)) + goto fail; + + checksum = be32_to_cpu(footer->checksum); + footer->checksum = 0; + if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) + fprintf(stderr, "block-vpc: The header checksum of '%s' is " + "incorrect.\n", filename); + + // The visible size of a image in Virtual PC depends on the geometry + // rather than on the size stored in the footer (the size in the footer + // is too large usually) + bs->total_sectors = (int64_t) + be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; + + if (bdrv_pread(s->hd, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE) + != HEADER_SIZE) + goto fail; + + dyndisk_header = (struct vhd_dyndisk_header*) buf; + + if (strncmp(dyndisk_header->magic, "cxsparse", 8)) + goto fail; + + + s->block_size = be32_to_cpu(dyndisk_header->block_size); + s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511; + + s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); + s->pagetable = qemu_malloc(s->max_table_entries * 4); + + s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); + if (bdrv_pread(s->hd, s->bat_offset, s->pagetable, + s->max_table_entries * 4) != s->max_table_entries * 4) + goto fail; + + s->free_data_block_offset = + (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511; + + for (i = 0; i < s->max_table_entries; i++) { + be32_to_cpus(&s->pagetable[i]); + if (s->pagetable[i] != 0xFFFFFFFF) { + int64_t next = (512 * (int64_t) s->pagetable[i]) + + s->bitmap_size + s->block_size; + + if (next> s->free_data_block_offset) + s->free_data_block_offset = next; + } + } + + s->last_bitmap_offset = (int64_t) -1; + +#ifdef CACHE + s->pageentry_u8 = qemu_malloc(512); + s->pageentry_u32 = s->pageentry_u8; + s->pageentry_u16 = s->pageentry_u8; + s->last_pagetable = -1; +#endif + + return 0; + fail: + bdrv_delete(s->hd); + return -1; +} + +/* + * Returns the absolute byte offset of the given sector in the image file. + * If the sector is not allocated, -1 is returned instead. + * + * The parameter write must be 1 if the offset will be used for a write + * operation (the block bitmaps is updated then), 0 otherwise. + */ +static inline int64_t get_sector_offset(BlockDriverState *bs, + int64_t sector_num, int write) +{ + BDRVVPCState *s = bs->opaque; + uint64_t offset = sector_num * 512; + uint64_t bitmap_offset, block_offset; + uint32_t pagetable_index, pageentry_index; + + pagetable_index = offset / s->block_size; + pageentry_index = (offset % s->block_size) / 512; + + if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff) + return -1; // not allocated + + bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; + block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index); + + // We must ensure that we don't write to any sectors which are marked as + // unused in the bitmap. We get away with setting all bits in the block + // bitmap each time we write to a new block. This might cause Virtual PC to + // miss sparse read optimization, but it's not a problem in terms of + // correctness. + if (write && (s->last_bitmap_offset != bitmap_offset)) { + uint8_t bitmap[s->bitmap_size]; + + s->last_bitmap_offset = bitmap_offset; + memset(bitmap, 0xff, s->bitmap_size); + bdrv_pwrite(s->hd, bitmap_offset, bitmap, s->bitmap_size); + } + +// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n", +// sector_num, pagetable_index, pageentry_index, +// bitmap_offset, block_offset); + +// disabled by reason +#if 0 +#ifdef CACHE + if (bitmap_offset != s->last_bitmap) + { + lseek(s->fd, bitmap_offset, SEEK_SET); + + s->last_bitmap = bitmap_offset; + + // Scary! Bitmap is stored as big endian 32bit entries, + // while we used to look it up byte by byte + read(s->fd, s->pageentry_u8, 512); + for (i = 0; i < 128; i++) + be32_to_cpus(&s->pageentry_u32[i]); + } + + if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1) + return -1; +#else + lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET); + + read(s->fd, &bitmap_entry, 1); + + if ((bitmap_entry >> (pageentry_index % 8)) & 1) + return -1; // not allocated +#endif +#endif + + return block_offset; +} + +/* + * Writes the footer to the end of the image file. This is needed when the + * file grows as it overwrites the old footer + * + * Returns 0 on success and < 0 on error + */ +static int rewrite_footer(BlockDriverState* bs) +{ + int ret; + BDRVVPCState *s = bs->opaque; + int64_t offset = s->free_data_block_offset; + + ret = bdrv_pwrite(s->hd, offset, s->footer_buf, HEADER_SIZE); + if (ret < 0) + return ret; + + return 0; +} + +/* + * Allocates a new block. This involves writing a new footer and updating + * the Block Allocation Table to use the space at the old end of the image + * file (overwriting the old footer) + * + * Returns the sectors' offset in the image file on success and < 0 on error + */ +static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) +{ + BDRVVPCState *s = bs->opaque; + int64_t bat_offset; + uint32_t index, bat_value; + int ret; + uint8_t bitmap[s->bitmap_size]; + + // Check if sector_num is valid + if ((sector_num < 0) || (sector_num > bs->total_sectors)) + return -1; + + // Write entry into in-memory BAT + index = (sector_num * 512) / s->block_size; + if (s->pagetable[index] != 0xFFFFFFFF) + return -1; + + s->pagetable[index] = s->free_data_block_offset / 512; + + // Initialize the block's bitmap + memset(bitmap, 0xff, s->bitmap_size); + bdrv_pwrite(s->hd, s->free_data_block_offset, bitmap, s->bitmap_size); + + // Write new footer (the old one will be overwritten) + s->free_data_block_offset += s->block_size + s->bitmap_size; + ret = rewrite_footer(bs); + if (ret < 0) + goto fail; + + // Write BAT entry to disk + bat_offset = s->bat_offset + (4 * index); + bat_value = be32_to_cpu(s->pagetable[index]); + ret = bdrv_pwrite(s->hd, bat_offset, &bat_value, 4); + if (ret < 0) + goto fail; + + return get_sector_offset(bs, sector_num, 0); + +fail: + s->free_data_block_offset -= (s->block_size + s->bitmap_size); + return -1; +} + +static int vpc_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVVPCState *s = bs->opaque; + int ret; + int64_t offset; + + while (nb_sectors > 0) { + offset = get_sector_offset(bs, sector_num, 0); + + if (offset == -1) { + memset(buf, 0, 512); + } else { + ret = bdrv_pread(s->hd, offset, buf, 512); + if (ret != 512) + return -1; + } + + nb_sectors--; + sector_num++; + buf += 512; + } + return 0; +} + +static int vpc_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVVPCState *s = bs->opaque; + int64_t offset; + int ret; + + while (nb_sectors > 0) { + offset = get_sector_offset(bs, sector_num, 1); + + if (offset == -1) { + offset = alloc_block(bs, sector_num); + if (offset < 0) + return -1; + } + + ret = bdrv_pwrite(s->hd, offset, buf, 512); + if (ret != 512) + return -1; + + nb_sectors--; + sector_num++; + buf += 512; + } + + return 0; +} + + +/* + * Calculates the number of cylinders, heads and sectors per cylinder + * based on a given number of sectors. This is the algorithm described + * in the VHD specification. + * + * Note that the geometry doesn't always exactly match total_sectors but + * may round it down. + * + * Returns 0 on success, -EFBIG if the size is larger than 127 GB + */ +static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, + uint8_t* heads, uint8_t* secs_per_cyl) +{ + uint32_t cyls_times_heads; + + if (total_sectors > 65535 * 16 * 255) + return -EFBIG; + + if (total_sectors > 65535 * 16 * 63) { + *secs_per_cyl = 255; + *heads = 16; + cyls_times_heads = total_sectors / *secs_per_cyl; + } else { + *secs_per_cyl = 17; + cyls_times_heads = total_sectors / *secs_per_cyl; + *heads = (cyls_times_heads + 1023) / 1024; + + if (*heads < 4) + *heads = 4; + + if (cyls_times_heads >= (*heads * 1024) || *heads > 16) { + *secs_per_cyl = 31; + *heads = 16; + cyls_times_heads = total_sectors / *secs_per_cyl; + } + + if (cyls_times_heads >= (*heads * 1024)) { + *secs_per_cyl = 63; + *heads = 16; + cyls_times_heads = total_sectors / *secs_per_cyl; + } + } + + // Note: Rounding up deviates from the Virtual PC behaviour + // However, we need this to avoid truncating images in qemu-img convert + *cyls = (cyls_times_heads + *heads - 1) / *heads; + + return 0; +} + +static int vpc_create(const char *filename, int64_t total_sectors, + const char *backing_file, int flags) +{ + uint8_t buf[1024]; + struct vhd_footer* footer = (struct vhd_footer*) buf; + struct vhd_dyndisk_header* dyndisk_header = + (struct vhd_dyndisk_header*) buf; + int fd, i; + uint16_t cyls; + uint8_t heads; + uint8_t secs_per_cyl; + size_t block_size, num_bat_entries; + + if (backing_file != NULL) + return -ENOTSUP; + + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); + if (fd < 0) + return -EIO; + + // Calculate matching total_size and geometry + if (calculate_geometry(total_sectors, &cyls, &heads, &secs_per_cyl)) + return -EFBIG; + total_sectors = (int64_t) cyls * heads * secs_per_cyl; + + // Prepare the Hard Disk Footer + memset(buf, 0, 1024); + + strncpy(footer->creator, "conectix", 8); + // TODO Check if "qemu" creator_app is ok for VPC + strncpy(footer->creator_app, "qemu", 4); + strncpy(footer->creator_os, "Wi2k", 4); + + footer->features = be32_to_cpu(0x02); + footer->version = be32_to_cpu(0x00010000); + footer->data_offset = be64_to_cpu(HEADER_SIZE); + footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE); + + // Version of Virtual PC 2007 + footer->major = be16_to_cpu(0x0005); + footer->minor =be16_to_cpu(0x0003); + + footer->orig_size = be64_to_cpu(total_sectors * 512); + footer->size = be64_to_cpu(total_sectors * 512); + + footer->cyls = be16_to_cpu(cyls); + footer->heads = heads; + footer->secs_per_cyl = secs_per_cyl; + + footer->type = be32_to_cpu(VHD_DYNAMIC); + + // TODO uuid is missing + + footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); + + // Write the footer (twice: at the beginning and at the end) + block_size = 0x200000; + num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); + + if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) + return -EIO; + + if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0) + return -EIO; + if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) + return -EIO; + + // Write the initial BAT + if (lseek(fd, 3 * 512, SEEK_SET) < 0) + return -EIO; + + memset(buf, 0xFF, 512); + for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) + if (write(fd, buf, 512) != 512) + return -EIO; + + + // Prepare the Dynamic Disk Header + memset(buf, 0, 1024); + + strncpy(dyndisk_header->magic, "cxsparse", 8); + + dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFF); + dyndisk_header->table_offset = be64_to_cpu(3 * 512); + dyndisk_header->version = be32_to_cpu(0x00010000); + dyndisk_header->block_size = be32_to_cpu(block_size); + dyndisk_header->max_table_entries = be32_to_cpu(num_bat_entries); + + dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024)); + + // Write the header + if (lseek(fd, 512, SEEK_SET) < 0) + return -EIO; + if (write(fd, buf, 1024) != 1024) + return -EIO; + + close(fd); + return 0; +} + +static void vpc_close(BlockDriverState *bs) +{ + BDRVVPCState *s = bs->opaque; + qemu_free(s->pagetable); +#ifdef CACHE + qemu_free(s->pageentry_u8); +#endif + bdrv_delete(s->hd); +} + +static BlockDriver bdrv_vpc = { + .format_name = "vpc", + .instance_size = sizeof(BDRVVPCState), + .bdrv_probe = vpc_probe, + .bdrv_open = vpc_open, + .bdrv_read = vpc_read, + .bdrv_write = vpc_write, + .bdrv_close = vpc_close, + .bdrv_create = vpc_create, +}; + +static void bdrv_vpc_init(void) +{ + bdrv_register(&bdrv_vpc); +} + +block_init(bdrv_vpc_init); diff --git a/block/vvfat.c b/block/vvfat.c new file mode 100644 index 000000000..2a8feb38d --- /dev/null +++ b/block/vvfat.c @@ -0,0 +1,2855 @@ +/* vim:set shiftwidth=4 ts=8: */ +/* + * QEMU Block driver for virtual VFAT (shadows a local directory) + * + * Copyright (c) 2004,2005 Johannes E. Schindelin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include "qemu-common.h" +#include "block_int.h" +#include "module.h" + +#ifndef S_IWGRP +#define S_IWGRP 0 +#endif +#ifndef S_IWOTH +#define S_IWOTH 0 +#endif + +/* TODO: add ":bootsector=blabla.img:" */ +/* LATER TODO: add automatic boot sector generation from + BOOTEASY.ASM and Ranish Partition Manager + Note that DOS assumes the system files to be the first files in the + file system (test if the boot sector still relies on that fact)! */ +/* MAYBE TODO: write block-visofs.c */ +/* TODO: call try_commit() only after a timeout */ + +/* #define DEBUG */ + +#ifdef DEBUG + +#define DLOG(a) a + +#undef stderr +#define stderr STDERR +FILE* stderr = NULL; + +static void checkpoint(void); + +#ifdef __MINGW32__ +void nonono(const char* file, int line, const char* msg) { + fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg); + exit(-5); +} +#undef assert +#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0) +#endif + +#else + +#define DLOG(a) + +#endif + +/* dynamic array functions */ +typedef struct array_t { + char* pointer; + unsigned int size,next,item_size; +} array_t; + +static inline void array_init(array_t* array,unsigned int item_size) +{ + array->pointer = NULL; + array->size=0; + array->next=0; + array->item_size=item_size; +} + +static inline void array_free(array_t* array) +{ + if(array->pointer) + free(array->pointer); + array->size=array->next=0; +} + +/* does not automatically grow */ +static inline void* array_get(array_t* array,unsigned int index) { + assert(index < array->next); + return array->pointer + index * array->item_size; +} + +static inline int array_ensure_allocated(array_t* array, int index) +{ + if((index + 1) * array->item_size > array->size) { + int new_size = (index + 32) * array->item_size; + array->pointer = qemu_realloc(array->pointer, new_size); + if (!array->pointer) + return -1; + array->size = new_size; + array->next = index + 1; + } + + return 0; +} + +static inline void* array_get_next(array_t* array) { + unsigned int next = array->next; + void* result; + + if (array_ensure_allocated(array, next) < 0) + return NULL; + + array->next = next + 1; + result = array_get(array, next); + + return result; +} + +static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) { + if((array->next+count)*array->item_size>array->size) { + int increment=count*array->item_size; + array->pointer=qemu_realloc(array->pointer,array->size+increment); + if(!array->pointer) + return NULL; + array->size+=increment; + } + memmove(array->pointer+(index+count)*array->item_size, + array->pointer+index*array->item_size, + (array->next-index)*array->item_size); + array->next+=count; + return array->pointer+index*array->item_size; +} + +/* this performs a "roll", so that the element which was at index_from becomes + * index_to, but the order of all other elements is preserved. */ +static inline int array_roll(array_t* array,int index_to,int index_from,int count) +{ + char* buf; + char* from; + char* to; + int is; + + if(!array || + index_to<0 || index_to>=array->next || + index_from<0 || index_from>=array->next) + return -1; + + if(index_to==index_from) + return 0; + + is=array->item_size; + from=array->pointer+index_from*is; + to=array->pointer+index_to*is; + buf=qemu_malloc(is*count); + memcpy(buf,from,is*count); + + if(index_to=0); + assert(count > 0); + assert(index + count <= array->next); + if(array_roll(array,array->next-1,index,count)) + return -1; + array->next -= count; + return 0; +} + +static int array_remove(array_t* array,int index) +{ + return array_remove_slice(array, index, 1); +} + +/* return the index for a given member */ +static int array_index(array_t* array, void* pointer) +{ + size_t offset = (char*)pointer - array->pointer; + assert((offset % array->item_size) == 0); + assert(offset/array->item_size < array->next); + return offset/array->item_size; +} + +/* These structures are used to fake a disk and the VFAT filesystem. + * For this reason we need to use __attribute__((packed)). */ + +typedef struct bootsector_t { + uint8_t jump[3]; + uint8_t name[8]; + uint16_t sector_size; + uint8_t sectors_per_cluster; + uint16_t reserved_sectors; + uint8_t number_of_fats; + uint16_t root_entries; + uint16_t total_sectors16; + uint8_t media_type; + uint16_t sectors_per_fat; + uint16_t sectors_per_track; + uint16_t number_of_heads; + uint32_t hidden_sectors; + uint32_t total_sectors; + union { + struct { + uint8_t drive_number; + uint8_t current_head; + uint8_t signature; + uint32_t id; + uint8_t volume_label[11]; + } __attribute__((packed)) fat16; + struct { + uint32_t sectors_per_fat; + uint16_t flags; + uint8_t major,minor; + uint32_t first_cluster_of_root_directory; + uint16_t info_sector; + uint16_t backup_boot_sector; + uint16_t ignored; + } __attribute__((packed)) fat32; + } u; + uint8_t fat_type[8]; + uint8_t ignored[0x1c0]; + uint8_t magic[2]; +} __attribute__((packed)) bootsector_t; + +typedef struct { + uint8_t head; + uint8_t sector; + uint8_t cylinder; +} mbr_chs_t; + +typedef struct partition_t { + uint8_t attributes; /* 0x80 = bootable */ + mbr_chs_t start_CHS; + uint8_t fs_type; /* 0x1 = FAT12, 0x6 = FAT16, 0xe = FAT16_LBA, 0xb = FAT32, 0xc = FAT32_LBA */ + mbr_chs_t end_CHS; + uint32_t start_sector_long; + uint32_t length_sector_long; +} __attribute__((packed)) partition_t; + +typedef struct mbr_t { + uint8_t ignored[0x1b8]; + uint32_t nt_id; + uint8_t ignored2[2]; + partition_t partition[4]; + uint8_t magic[2]; +} __attribute__((packed)) mbr_t; + +typedef struct direntry_t { + uint8_t name[8]; + uint8_t extension[3]; + uint8_t attributes; + uint8_t reserved[2]; + uint16_t ctime; + uint16_t cdate; + uint16_t adate; + uint16_t begin_hi; + uint16_t mtime; + uint16_t mdate; + uint16_t begin; + uint32_t size; +} __attribute__((packed)) direntry_t; + +/* this structure are used to transparently access the files */ + +typedef struct mapping_t { + /* begin is the first cluster, end is the last+1 */ + uint32_t begin,end; + /* as s->directory is growable, no pointer may be used here */ + unsigned int dir_index; + /* the clusters of a file may be in any order; this points to the first */ + int first_mapping_index; + union { + /* offset is + * - the offset in the file (in clusters) for a file, or + * - the next cluster of the directory for a directory, and + * - the address of the buffer for a faked entry + */ + struct { + uint32_t offset; + } file; + struct { + int parent_mapping_index; + int first_dir_index; + } dir; + } info; + /* path contains the full path, i.e. it always starts with s->path */ + char* path; + + enum { MODE_UNDEFINED = 0, MODE_NORMAL = 1, MODE_MODIFIED = 2, + MODE_DIRECTORY = 4, MODE_FAKED = 8, + MODE_DELETED = 16, MODE_RENAMED = 32 } mode; + int read_only; +} mapping_t; + +#ifdef DEBUG +static void print_direntry(const struct direntry_t*); +static void print_mapping(const struct mapping_t* mapping); +#endif + +/* here begins the real VVFAT driver */ + +typedef struct BDRVVVFATState { + BlockDriverState* bs; /* pointer to parent */ + unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */ + unsigned char first_sectors[0x40*0x200]; + + int fat_type; /* 16 or 32 */ + array_t fat,directory,mapping; + + unsigned int cluster_size; + unsigned int sectors_per_cluster; + unsigned int sectors_per_fat; + unsigned int sectors_of_root_directory; + uint32_t last_cluster_of_root_directory; + unsigned int faked_sectors; /* how many sectors are faked before file data */ + uint32_t sector_count; /* total number of sectors of the partition */ + uint32_t cluster_count; /* total number of clusters of this partition */ + uint32_t max_fat_value; + + int current_fd; + mapping_t* current_mapping; + unsigned char* cluster; /* points to current cluster */ + unsigned char* cluster_buffer; /* points to a buffer to hold temp data */ + unsigned int current_cluster; + + /* write support */ + BlockDriverState* write_target; + char* qcow_filename; + BlockDriverState* qcow; + void* fat2; + char* used_clusters; + array_t commits; + const char* path; + int downcase_short_names; +} BDRVVVFATState; + +/* take the sector position spos and convert it to Cylinder/Head/Sector position + * if the position is outside the specified geometry, fill maximum value for CHS + * and return 1 to signal overflow. + */ +static int sector2CHS(BlockDriverState* bs, mbr_chs_t * chs, int spos){ + int head,sector; + sector = spos % (bs->secs); spos/= bs->secs; + head = spos % (bs->heads); spos/= bs->heads; + if(spos >= bs->cyls){ + /* Overflow, + it happens if 32bit sector positions are used, while CHS is only 24bit. + Windows/Dos is said to take 1023/255/63 as nonrepresentable CHS */ + chs->head = 0xFF; + chs->sector = 0xFF; + chs->cylinder = 0xFF; + return 1; + } + chs->head = (uint8_t)head; + chs->sector = (uint8_t)( (sector+1) | ((spos>>8)<<6) ); + chs->cylinder = (uint8_t)spos; + return 0; +} + +static void init_mbr(BDRVVVFATState* s) +{ + /* TODO: if the files mbr.img and bootsect.img exist, use them */ + mbr_t* real_mbr=(mbr_t*)s->first_sectors; + partition_t* partition=&(real_mbr->partition[0]); + int lba; + + memset(s->first_sectors,0,512); + + /* Win NT Disk Signature */ + real_mbr->nt_id= cpu_to_le32(0xbe1afdfa); + + partition->attributes=0x80; /* bootable */ + + /* LBA is used when partition is outside the CHS geometry */ + lba = sector2CHS(s->bs, &partition->start_CHS, s->first_sectors_number-1); + lba|= sector2CHS(s->bs, &partition->end_CHS, s->sector_count); + + /*LBA partitions are identified only by start/length_sector_long not by CHS*/ + partition->start_sector_long =cpu_to_le32(s->first_sectors_number-1); + partition->length_sector_long=cpu_to_le32(s->sector_count - s->first_sectors_number+1); + + /* FAT12/FAT16/FAT32 */ + /* DOS uses different types when partition is LBA, + probably to prevent older versions from using CHS on them */ + partition->fs_type= s->fat_type==12 ? 0x1: + s->fat_type==16 ? (lba?0xe:0x06): + /*fat_tyoe==32*/ (lba?0xc:0x0b); + + real_mbr->magic[0]=0x55; real_mbr->magic[1]=0xaa; +} + +/* direntry functions */ + +/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */ +static inline int short2long_name(char* dest,const char* src) +{ + int i; + int len; + for(i=0;i<129 && src[i];i++) { + dest[2*i]=src[i]; + dest[2*i+1]=0; + } + len=2*i; + dest[2*i]=dest[2*i+1]=0; + for(i=2*i+2;(i%26);i++) + dest[i]=0xff; + return len; +} + +static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename) +{ + char buffer[258]; + int length=short2long_name(buffer,filename), + number_of_entries=(length+25)/26,i; + direntry_t* entry; + + for(i=0;idirectory)); + entry->attributes=0xf; + entry->reserved[0]=0; + entry->begin=0; + entry->name[0]=(number_of_entries-i)|(i==0?0x40:0); + } + for(i=0;i<26*number_of_entries;i++) { + int offset=(i%26); + if(offset<10) offset=1+offset; + else if(offset<22) offset=14+offset-10; + else offset=28+offset-22; + entry=array_get(&(s->directory),s->directory.next-1-(i/26)); + entry->name[offset]=buffer[i]; + } + return array_get(&(s->directory),s->directory.next-number_of_entries); +} + +static char is_free(const direntry_t* direntry) +{ + return direntry->name[0]==0xe5 || direntry->name[0]==0x00; +} + +static char is_volume_label(const direntry_t* direntry) +{ + return direntry->attributes == 0x28; +} + +static char is_long_name(const direntry_t* direntry) +{ + return direntry->attributes == 0xf; +} + +static char is_short_name(const direntry_t* direntry) +{ + return !is_volume_label(direntry) && !is_long_name(direntry) + && !is_free(direntry); +} + +static char is_directory(const direntry_t* direntry) +{ + return direntry->attributes & 0x10 && direntry->name[0] != 0xe5; +} + +static inline char is_dot(const direntry_t* direntry) +{ + return is_short_name(direntry) && direntry->name[0] == '.'; +} + +static char is_file(const direntry_t* direntry) +{ + return is_short_name(direntry) && !is_directory(direntry); +} + +static inline uint32_t begin_of_direntry(const direntry_t* direntry) +{ + return le16_to_cpu(direntry->begin)|(le16_to_cpu(direntry->begin_hi)<<16); +} + +static inline uint32_t filesize_of_direntry(const direntry_t* direntry) +{ + return le32_to_cpu(direntry->size); +} + +static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin) +{ + direntry->begin = cpu_to_le16(begin & 0xffff); + direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff); +} + +/* fat functions */ + +static inline uint8_t fat_chksum(const direntry_t* entry) +{ + uint8_t chksum=0; + int i; + + for(i=0;i<11;i++) { + unsigned char c; + + c = (i <= 8) ? entry->name[i] : entry->extension[i-8]; + chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c; + } + + return chksum; +} + +/* if return_time==0, this returns the fat_date, else the fat_time */ +static uint16_t fat_datetime(time_t time,int return_time) { + struct tm* t; +#ifdef _WIN32 + t=localtime(&time); /* this is not thread safe */ +#else + struct tm t1; + t=&t1; + localtime_r(&time,t); +#endif + if(return_time) + return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11)); + return cpu_to_le16((t->tm_mday)|((t->tm_mon+1)<<5)|((t->tm_year-80)<<9)); +} + +static inline void fat_set(BDRVVVFATState* s,unsigned int cluster,uint32_t value) +{ + if(s->fat_type==32) { + uint32_t* entry=array_get(&(s->fat),cluster); + *entry=cpu_to_le32(value); + } else if(s->fat_type==16) { + uint16_t* entry=array_get(&(s->fat),cluster); + *entry=cpu_to_le16(value&0xffff); + } else { + int offset = (cluster*3/2); + unsigned char* p = array_get(&(s->fat), offset); + switch (cluster&1) { + case 0: + p[0] = value&0xff; + p[1] = (p[1]&0xf0) | ((value>>8)&0xf); + break; + case 1: + p[0] = (p[0]&0xf) | ((value&0xf)<<4); + p[1] = (value>>4); + break; + } + } +} + +static inline uint32_t fat_get(BDRVVVFATState* s,unsigned int cluster) +{ + if(s->fat_type==32) { + uint32_t* entry=array_get(&(s->fat),cluster); + return le32_to_cpu(*entry); + } else if(s->fat_type==16) { + uint16_t* entry=array_get(&(s->fat),cluster); + return le16_to_cpu(*entry); + } else { + const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2; + return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff; + } +} + +static inline int fat_eof(BDRVVVFATState* s,uint32_t fat_entry) +{ + if(fat_entry>s->max_fat_value-8) + return -1; + return 0; +} + +static inline void init_fat(BDRVVVFATState* s) +{ + if (s->fat_type == 12) { + array_init(&(s->fat),1); + array_ensure_allocated(&(s->fat), + s->sectors_per_fat * 0x200 * 3 / 2 - 1); + } else { + array_init(&(s->fat),(s->fat_type==32?4:2)); + array_ensure_allocated(&(s->fat), + s->sectors_per_fat * 0x200 / s->fat.item_size - 1); + } + memset(s->fat.pointer,0,s->fat.size); + + switch(s->fat_type) { + case 12: s->max_fat_value=0xfff; break; + case 16: s->max_fat_value=0xffff; break; + case 32: s->max_fat_value=0x0fffffff; break; + default: s->max_fat_value=0; /* error... */ + } + +} + +/* TODO: in create_short_filename, 0xe5->0x05 is not yet handled! */ +/* TODO: in parse_short_filename, 0x05->0xe5 is not yet handled! */ +static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, + unsigned int directory_start, const char* filename, int is_dot) +{ + int i,j,long_index=s->directory.next; + direntry_t* entry = NULL; + direntry_t* entry_long = NULL; + + if(is_dot) { + entry=array_get_next(&(s->directory)); + memset(entry->name,0x20,11); + memcpy(entry->name,filename,strlen(filename)); + return entry; + } + + entry_long=create_long_filename(s,filename); + + i = strlen(filename); + for(j = i - 1; j>0 && filename[j]!='.';j--); + if (j > 0) + i = (j > 8 ? 8 : j); + else if (i > 8) + i = 8; + + entry=array_get_next(&(s->directory)); + memset(entry->name,0x20,11); + memcpy(entry->name, filename, i); + + if(j > 0) + for (i = 0; i < 3 && filename[j+1+i]; i++) + entry->extension[i] = filename[j+1+i]; + + /* upcase & remove unwanted characters */ + for(i=10;i>=0;i--) { + if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--); + if(entry->name[i]<=' ' || entry->name[i]>0x7f + || strchr(".*?<>|\":/\\[];,+='",entry->name[i])) + entry->name[i]='_'; + else if(entry->name[i]>='a' && entry->name[i]<='z') + entry->name[i]+='A'-'a'; + } + + /* mangle duplicates */ + while(1) { + direntry_t* entry1=array_get(&(s->directory),directory_start); + int j; + + for(;entry1name,entry->name,11)) + break; /* found dupe */ + if(entry1==entry) /* no dupe found */ + break; + + /* use all 8 characters of name */ + if(entry->name[7]==' ') { + int j; + for(j=6;j>0 && entry->name[j]==' ';j--) + entry->name[j]='~'; + } + + /* increment number */ + for(j=7;j>0 && entry->name[j]=='9';j--) + entry->name[j]='0'; + if(j>0) { + if(entry->name[j]<'0' || entry->name[j]>'9') + entry->name[j]='0'; + else + entry->name[j]++; + } + } + + /* calculate checksum; propagate to long name */ + if(entry_long) { + uint8_t chksum=fat_chksum(entry); + + /* calculate anew, because realloc could have taken place */ + entry_long=array_get(&(s->directory),long_index); + while(entry_longreserved[1]=chksum; + entry_long++; + } + } + + return entry; +} + +/* + * Read a directory. (the index of the corresponding mapping must be passed). + */ +static int read_directory(BDRVVVFATState* s, int mapping_index) +{ + mapping_t* mapping = array_get(&(s->mapping), mapping_index); + direntry_t* direntry; + const char* dirname = mapping->path; + int first_cluster = mapping->begin; + int parent_index = mapping->info.dir.parent_mapping_index; + mapping_t* parent_mapping = (mapping_t*) + (parent_index >= 0 ? array_get(&(s->mapping), parent_index) : NULL); + int first_cluster_of_parent = parent_mapping ? parent_mapping->begin : -1; + + DIR* dir=opendir(dirname); + struct dirent* entry; + int i; + + assert(mapping->mode & MODE_DIRECTORY); + + if(!dir) { + mapping->end = mapping->begin; + return -1; + } + + i = mapping->info.dir.first_dir_index = + first_cluster == 0 ? 0 : s->directory.next; + + /* actually read the directory, and allocate the mappings */ + while((entry=readdir(dir))) { + unsigned int length=strlen(dirname)+2+strlen(entry->d_name); + char* buffer; + direntry_t* direntry; + struct stat st; + int is_dot=!strcmp(entry->d_name,"."); + int is_dotdot=!strcmp(entry->d_name,".."); + + if(first_cluster == 0 && (is_dotdot || is_dot)) + continue; + + buffer=(char*)qemu_malloc(length); + snprintf(buffer,length,"%s/%s",dirname,entry->d_name); + + if(stat(buffer,&st)<0) { + free(buffer); + continue; + } + + /* create directory entry for this file */ + direntry=create_short_and_long_name(s, i, entry->d_name, + is_dot || is_dotdot); + direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20); + direntry->reserved[0]=direntry->reserved[1]=0; + direntry->ctime=fat_datetime(st.st_ctime,1); + direntry->cdate=fat_datetime(st.st_ctime,0); + direntry->adate=fat_datetime(st.st_atime,0); + direntry->begin_hi=0; + direntry->mtime=fat_datetime(st.st_mtime,1); + direntry->mdate=fat_datetime(st.st_mtime,0); + if(is_dotdot) + set_begin_of_direntry(direntry, first_cluster_of_parent); + else if(is_dot) + set_begin_of_direntry(direntry, first_cluster); + else + direntry->begin=0; /* do that later */ + if (st.st_size > 0x7fffffff) { + fprintf(stderr, "File %s is larger than 2GB\n", buffer); + free(buffer); + return -2; + } + direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size); + + /* create mapping for this file */ + if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) { + s->current_mapping=(mapping_t*)array_get_next(&(s->mapping)); + s->current_mapping->begin=0; + s->current_mapping->end=st.st_size; + /* + * we get the direntry of the most recent direntry, which + * contains the short name and all the relevant information. + */ + s->current_mapping->dir_index=s->directory.next-1; + s->current_mapping->first_mapping_index = -1; + if (S_ISDIR(st.st_mode)) { + s->current_mapping->mode = MODE_DIRECTORY; + s->current_mapping->info.dir.parent_mapping_index = + mapping_index; + } else { + s->current_mapping->mode = MODE_UNDEFINED; + s->current_mapping->info.file.offset = 0; + } + s->current_mapping->path=buffer; + s->current_mapping->read_only = + (st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0; + } + } + closedir(dir); + + /* fill with zeroes up to the end of the cluster */ + while(s->directory.next%(0x10*s->sectors_per_cluster)) { + direntry_t* direntry=array_get_next(&(s->directory)); + memset(direntry,0,sizeof(direntry_t)); + } + +/* TODO: if there are more entries, bootsector has to be adjusted! */ +#define ROOT_ENTRIES (0x02 * 0x10 * s->sectors_per_cluster) + if (mapping_index == 0 && s->directory.next < ROOT_ENTRIES) { + /* root directory */ + int cur = s->directory.next; + array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1); + memset(array_get(&(s->directory), cur), 0, + (ROOT_ENTRIES - cur) * sizeof(direntry_t)); + } + + /* reget the mapping, since s->mapping was possibly realloc()ed */ + mapping = (mapping_t*)array_get(&(s->mapping), mapping_index); + first_cluster += (s->directory.next - mapping->info.dir.first_dir_index) + * 0x20 / s->cluster_size; + mapping->end = first_cluster; + + direntry = (direntry_t*)array_get(&(s->directory), mapping->dir_index); + set_begin_of_direntry(direntry, mapping->begin); + + return 0; +} + +static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num) +{ + return (sector_num-s->faked_sectors)/s->sectors_per_cluster; +} + +static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num) +{ + return s->faked_sectors + s->sectors_per_cluster * cluster_num; +} + +static inline uint32_t sector_offset_in_cluster(BDRVVVFATState* s,off_t sector_num) +{ + return (sector_num-s->first_sectors_number-2*s->sectors_per_fat)%s->sectors_per_cluster; +} + +#ifdef DBG +static direntry_t* get_direntry_for_mapping(BDRVVVFATState* s,mapping_t* mapping) +{ + if(mapping->mode==MODE_UNDEFINED) + return 0; + return (direntry_t*)(s->directory.pointer+sizeof(direntry_t)*mapping->dir_index); +} +#endif + +static int init_directories(BDRVVVFATState* s, + const char* dirname) +{ + bootsector_t* bootsector; + mapping_t* mapping; + unsigned int i; + unsigned int cluster; + + memset(&(s->first_sectors[0]),0,0x40*0x200); + + s->cluster_size=s->sectors_per_cluster*0x200; + s->cluster_buffer=qemu_malloc(s->cluster_size); + + /* + * The formula: sc = spf+1+spf*spc*(512*8/fat_type), + * where sc is sector_count, + * spf is sectors_per_fat, + * spc is sectors_per_clusters, and + * fat_type = 12, 16 or 32. + */ + i = 1+s->sectors_per_cluster*0x200*8/s->fat_type; + s->sectors_per_fat=(s->sector_count+i)/i; /* round up */ + + array_init(&(s->mapping),sizeof(mapping_t)); + array_init(&(s->directory),sizeof(direntry_t)); + + /* add volume label */ + { + direntry_t* entry=array_get_next(&(s->directory)); + entry->attributes=0x28; /* archive | volume label */ + snprintf((char*)entry->name,11,"QEMU VVFAT"); + } + + /* Now build FAT, and write back information into directory */ + init_fat(s); + + s->faked_sectors=s->first_sectors_number+s->sectors_per_fat*2; + s->cluster_count=sector2cluster(s, s->sector_count); + + mapping = array_get_next(&(s->mapping)); + mapping->begin = 0; + mapping->dir_index = 0; + mapping->info.dir.parent_mapping_index = -1; + mapping->first_mapping_index = -1; + mapping->path = strdup(dirname); + i = strlen(mapping->path); + if (i > 0 && mapping->path[i - 1] == '/') + mapping->path[i - 1] = '\0'; + mapping->mode = MODE_DIRECTORY; + mapping->read_only = 0; + s->path = mapping->path; + + for (i = 0, cluster = 0; i < s->mapping.next; i++) { + /* MS-DOS expects the FAT to be 0 for the root directory + * (except for the media byte). */ + /* LATER TODO: still true for FAT32? */ + int fix_fat = (i != 0); + mapping = array_get(&(s->mapping), i); + + if (mapping->mode & MODE_DIRECTORY) { + mapping->begin = cluster; + if(read_directory(s, i)) { + fprintf(stderr, "Could not read directory %s\n", + mapping->path); + return -1; + } + mapping = array_get(&(s->mapping), i); + } else { + assert(mapping->mode == MODE_UNDEFINED); + mapping->mode=MODE_NORMAL; + mapping->begin = cluster; + if (mapping->end > 0) { + direntry_t* direntry = array_get(&(s->directory), + mapping->dir_index); + + mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size; + set_begin_of_direntry(direntry, mapping->begin); + } else { + mapping->end = cluster + 1; + fix_fat = 0; + } + } + + assert(mapping->begin < mapping->end); + + /* next free cluster */ + cluster = mapping->end; + + if(cluster > s->cluster_count) { + fprintf(stderr,"Directory does not fit in FAT%d (capacity %s)\n", + s->fat_type, + s->fat_type == 12 ? s->sector_count == 2880 ? "1.44 MB" + : "2.88 MB" + : "504MB"); + return -EINVAL; + } + + /* fix fat for entry */ + if (fix_fat) { + int j; + for(j = mapping->begin; j < mapping->end - 1; j++) + fat_set(s, j, j+1); + fat_set(s, mapping->end - 1, s->max_fat_value); + } + } + + mapping = array_get(&(s->mapping), 0); + s->sectors_of_root_directory = mapping->end * s->sectors_per_cluster; + s->last_cluster_of_root_directory = mapping->end; + + /* the FAT signature */ + fat_set(s,0,s->max_fat_value); + fat_set(s,1,s->max_fat_value); + + s->current_mapping = NULL; + + bootsector=(bootsector_t*)(s->first_sectors+(s->first_sectors_number-1)*0x200); + bootsector->jump[0]=0xeb; + bootsector->jump[1]=0x3e; + bootsector->jump[2]=0x90; + memcpy(bootsector->name,"QEMU ",8); + bootsector->sector_size=cpu_to_le16(0x200); + bootsector->sectors_per_cluster=s->sectors_per_cluster; + bootsector->reserved_sectors=cpu_to_le16(1); + bootsector->number_of_fats=0x2; /* number of FATs */ + bootsector->root_entries=cpu_to_le16(s->sectors_of_root_directory*0x10); + bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count); + bootsector->media_type=(s->fat_type!=12?0xf8:s->sector_count==5760?0xf9:0xf8); /* media descriptor */ + s->fat.pointer[0] = bootsector->media_type; + bootsector->sectors_per_fat=cpu_to_le16(s->sectors_per_fat); + bootsector->sectors_per_track=cpu_to_le16(s->bs->secs); + bootsector->number_of_heads=cpu_to_le16(s->bs->heads); + bootsector->hidden_sectors=cpu_to_le32(s->first_sectors_number==1?0:0x3f); + bootsector->total_sectors=cpu_to_le32(s->sector_count>0xffff?s->sector_count:0); + + /* LATER TODO: if FAT32, this is wrong */ + bootsector->u.fat16.drive_number=s->fat_type==12?0:0x80; /* assume this is hda (TODO) */ + bootsector->u.fat16.current_head=0; + bootsector->u.fat16.signature=0x29; + bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd); + + memcpy(bootsector->u.fat16.volume_label,"QEMU VVFAT ",11); + memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12 ":s->fat_type==16?"FAT16 ":"FAT32 "),8); + bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa; + + return 0; +} + +#ifdef DEBUG +static BDRVVVFATState *vvv = NULL; +#endif + +static int enable_write_target(BDRVVVFATState *s); +static int is_consistent(BDRVVVFATState *s); + +static int vvfat_open(BlockDriverState *bs, const char* dirname, int flags) +{ + BDRVVVFATState *s = bs->opaque; + int floppy = 0; + int i; + +#ifdef DEBUG + vvv = s; +#endif + +DLOG(if (stderr == NULL) { + stderr = fopen("vvfat.log", "a"); + setbuf(stderr, NULL); +}) + + s->bs = bs; + + s->fat_type=16; + /* LATER TODO: if FAT32, adjust */ + s->sectors_per_cluster=0x10; + /* 504MB disk*/ + bs->cyls=1024; bs->heads=16; bs->secs=63; + + s->current_cluster=0xffffffff; + + s->first_sectors_number=0x40; + /* read only is the default for safety */ + bs->read_only = 1; + s->qcow = s->write_target = NULL; + s->qcow_filename = NULL; + s->fat2 = NULL; + s->downcase_short_names = 1; + + if (!strstart(dirname, "fat:", NULL)) + return -1; + + if (strstr(dirname, ":floppy:")) { + floppy = 1; + s->fat_type = 12; + s->first_sectors_number = 1; + s->sectors_per_cluster=2; + bs->cyls = 80; bs->heads = 2; bs->secs = 36; + } + + s->sector_count=bs->cyls*bs->heads*bs->secs; + + if (strstr(dirname, ":32:")) { + fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. You are welcome to do so!\n"); + s->fat_type = 32; + } else if (strstr(dirname, ":16:")) { + s->fat_type = 16; + } else if (strstr(dirname, ":12:")) { + s->fat_type = 12; + s->sector_count=2880; + } + + if (strstr(dirname, ":rw:")) { + if (enable_write_target(s)) + return -1; + bs->read_only = 0; + } + + i = strrchr(dirname, ':') - dirname; + assert(i >= 3); + if (dirname[i-2] == ':' && qemu_isalpha(dirname[i-1])) + /* workaround for DOS drive names */ + dirname += i-1; + else + dirname += i+1; + + bs->total_sectors=bs->cyls*bs->heads*bs->secs; + + if(init_directories(s, dirname)) + return -1; + + s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count; + + if(s->first_sectors_number==0x40) + init_mbr(s); + + /* for some reason or other, MS-DOS does not like to know about CHS... */ + if (floppy) + bs->heads = bs->cyls = bs->secs = 0; + + // assert(is_consistent(s)); + return 0; +} + +static inline void vvfat_close_current_file(BDRVVVFATState *s) +{ + if(s->current_mapping) { + s->current_mapping = NULL; + if (s->current_fd) { + close(s->current_fd); + s->current_fd = 0; + } + } + s->current_cluster = -1; +} + +/* mappings between index1 and index2-1 are supposed to be ordered + * return value is the index of the last mapping for which end>cluster_num + */ +static inline int find_mapping_for_cluster_aux(BDRVVVFATState* s,int cluster_num,int index1,int index2) +{ + int index3=index1+1; + while(1) { + mapping_t* mapping; + index3=(index1+index2)/2; + mapping=array_get(&(s->mapping),index3); + assert(mapping->begin < mapping->end); + if(mapping->begin>=cluster_num) { + assert(index2!=index3 || index2==0); + if(index2==index3) + return index1; + index2=index3; + } else { + if(index1==index3) + return mapping->end<=cluster_num ? index2 : index1; + index1=index3; + } + assert(index1<=index2); + DLOG(mapping=array_get(&(s->mapping),index1); + assert(mapping->begin<=cluster_num); + assert(index2 >= s->mapping.next || + ((mapping = array_get(&(s->mapping),index2)) && + mapping->end>cluster_num))); + } +} + +static inline mapping_t* find_mapping_for_cluster(BDRVVVFATState* s,int cluster_num) +{ + int index=find_mapping_for_cluster_aux(s,cluster_num,0,s->mapping.next); + mapping_t* mapping; + if(index>=s->mapping.next) + return NULL; + mapping=array_get(&(s->mapping),index); + if(mapping->begin>cluster_num) + return NULL; + assert(mapping->begin<=cluster_num && mapping->end>cluster_num); + return mapping; +} + +/* + * This function simply compares path == mapping->path. Since the mappings + * are sorted by cluster, this is expensive: O(n). + */ +static inline mapping_t* find_mapping_for_path(BDRVVVFATState* s, + const char* path) +{ + int i; + + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->first_mapping_index < 0 && + !strcmp(path, mapping->path)) + return mapping; + } + + return NULL; +} + +static int open_file(BDRVVVFATState* s,mapping_t* mapping) +{ + if(!mapping) + return -1; + if(!s->current_mapping || + strcmp(s->current_mapping->path,mapping->path)) { + /* open file */ + int fd = open(mapping->path, O_RDONLY | O_BINARY | O_LARGEFILE); + if(fd<0) + return -1; + vvfat_close_current_file(s); + s->current_fd = fd; + s->current_mapping = mapping; + } + return 0; +} + +static inline int read_cluster(BDRVVVFATState *s,int cluster_num) +{ + if(s->current_cluster != cluster_num) { + int result=0; + off_t offset; + assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY)); + if(!s->current_mapping + || s->current_mapping->begin>cluster_num + || s->current_mapping->end<=cluster_num) { + /* binary search of mappings for file */ + mapping_t* mapping=find_mapping_for_cluster(s,cluster_num); + + assert(!mapping || (cluster_num>=mapping->begin && cluster_numend)); + + if (mapping && mapping->mode & MODE_DIRECTORY) { + vvfat_close_current_file(s); + s->current_mapping = mapping; +read_cluster_directory: + offset = s->cluster_size*(cluster_num-s->current_mapping->begin); + s->cluster = (unsigned char*)s->directory.pointer+offset + + 0x20*s->current_mapping->info.dir.first_dir_index; + assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0); + assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size); + s->current_cluster = cluster_num; + return 0; + } + + if(open_file(s,mapping)) + return -2; + } else if (s->current_mapping->mode & MODE_DIRECTORY) + goto read_cluster_directory; + + assert(s->current_fd); + + offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset; + if(lseek(s->current_fd, offset, SEEK_SET)!=offset) + return -3; + s->cluster=s->cluster_buffer; + result=read(s->current_fd,s->cluster,s->cluster_size); + if(result<0) { + s->current_cluster = -1; + return -1; + } + s->current_cluster = cluster_num; + } + return 0; +} + +#ifdef DEBUG +static void hexdump(const void* address, uint32_t len) +{ + const unsigned char* p = address; + int i, j; + + for (i = 0; i < len; i += 16) { + for (j = 0; j < 16 && i + j < len; j++) + fprintf(stderr, "%02x ", p[i + j]); + for (; j < 16; j++) + fprintf(stderr, " "); + fprintf(stderr, " "); + for (j = 0; j < 16 && i + j < len; j++) + fprintf(stderr, "%c", (p[i + j] < ' ' || p[i + j] > 0x7f) ? '.' : p[i + j]); + fprintf(stderr, "\n"); + } +} + +static void print_direntry(const direntry_t* direntry) +{ + int j = 0; + char buffer[1024]; + + fprintf(stderr, "direntry 0x%x: ", (int)direntry); + if(!direntry) + return; + if(is_long_name(direntry)) { + unsigned char* c=(unsigned char*)direntry; + int i; + for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2) +#define ADD_CHAR(c) {buffer[j] = (c); if (buffer[j] < ' ') buffer[j] = 0xb0; j++;} + ADD_CHAR(c[i]); + for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2) + ADD_CHAR(c[i]); + for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2) + ADD_CHAR(c[i]); + buffer[j] = 0; + fprintf(stderr, "%s\n", buffer); + } else { + int i; + for(i=0;i<11;i++) + ADD_CHAR(direntry->name[i]); + buffer[j] = 0; + fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n", + buffer, + direntry->attributes, + begin_of_direntry(direntry),le32_to_cpu(direntry->size)); + } +} + +static void print_mapping(const mapping_t* mapping) +{ + fprintf(stderr, "mapping (0x%x): begin, end = %d, %d, dir_index = %d, first_mapping_index = %d, name = %s, mode = 0x%x, " , (int)mapping, mapping->begin, mapping->end, mapping->dir_index, mapping->first_mapping_index, mapping->path, mapping->mode); + if (mapping->mode & MODE_DIRECTORY) + fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index); + else + fprintf(stderr, "offset = %d\n", mapping->info.file.offset); +} +#endif + +static int vvfat_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors) +{ + BDRVVVFATState *s = bs->opaque; + int i; + + for(i=0;i= s->sector_count) + return -1; + if (s->qcow) { + int n; + if (s->qcow->drv->bdrv_is_allocated(s->qcow, + sector_num, nb_sectors-i, &n)) { +DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n)); + if (s->qcow->drv->bdrv_read(s->qcow, sector_num, buf+i*0x200, n)) + return -1; + i += n - 1; + sector_num += n - 1; + continue; + } +DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num)); + } + if(sector_numfaked_sectors) { + if(sector_numfirst_sectors_number) + memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200); + else if(sector_num-s->first_sectors_numbersectors_per_fat) + memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200); + else if(sector_num-s->first_sectors_number-s->sectors_per_fatsectors_per_fat) + memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200); + } else { + uint32_t sector=sector_num-s->faked_sectors, + sector_offset_in_cluster=(sector%s->sectors_per_cluster), + cluster_num=sector/s->sectors_per_cluster; + if(read_cluster(s, cluster_num) != 0) { + /* LATER TODO: strict: return -1; */ + memset(buf+i*0x200,0,0x200); + continue; + } + memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200); + } + } + return 0; +} + +/* LATER TODO: statify all functions */ + +/* + * Idea of the write support (use snapshot): + * + * 1. check if all data is consistent, recording renames, modifications, + * new files and directories (in s->commits). + * + * 2. if the data is not consistent, stop committing + * + * 3. handle renames, and create new files and directories (do not yet + * write their contents) + * + * 4. walk the directories, fixing the mapping and direntries, and marking + * the handled mappings as not deleted + * + * 5. commit the contents of the files + * + * 6. handle deleted files and directories + * + */ + +typedef struct commit_t { + char* path; + union { + struct { uint32_t cluster; } rename; + struct { int dir_index; uint32_t modified_offset; } writeout; + struct { uint32_t first_cluster; } new_file; + struct { uint32_t cluster; } mkdir; + } param; + /* DELETEs and RMDIRs are handled differently: see handle_deletes() */ + enum { + ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR + } action; +} commit_t; + +static void clear_commits(BDRVVVFATState* s) +{ + int i; +DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next)); + for (i = 0; i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); + assert(commit->path || commit->action == ACTION_WRITEOUT); + if (commit->action != ACTION_WRITEOUT) { + assert(commit->path); + free(commit->path); + } else + assert(commit->path == NULL); + } + s->commits.next = 0; +} + +static void schedule_rename(BDRVVVFATState* s, + uint32_t cluster, char* new_path) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = new_path; + commit->param.rename.cluster = cluster; + commit->action = ACTION_RENAME; +} + +static void schedule_writeout(BDRVVVFATState* s, + int dir_index, uint32_t modified_offset) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = NULL; + commit->param.writeout.dir_index = dir_index; + commit->param.writeout.modified_offset = modified_offset; + commit->action = ACTION_WRITEOUT; +} + +static void schedule_new_file(BDRVVVFATState* s, + char* path, uint32_t first_cluster) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = path; + commit->param.new_file.first_cluster = first_cluster; + commit->action = ACTION_NEW_FILE; +} + +static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path) +{ + commit_t* commit = array_get_next(&(s->commits)); + commit->path = path; + commit->param.mkdir.cluster = cluster; + commit->action = ACTION_MKDIR; +} + +typedef struct { + /* + * Since the sequence number is at most 0x3f, and the filename + * length is at most 13 times the sequence number, the maximal + * filename length is 0x3f * 13 bytes. + */ + unsigned char name[0x3f * 13 + 1]; + int checksum, len; + int sequence_number; +} long_file_name; + +static void lfn_init(long_file_name* lfn) +{ + lfn->sequence_number = lfn->len = 0; + lfn->checksum = 0x100; +} + +/* return 0 if parsed successfully, > 0 if no long name, < 0 if error */ +static int parse_long_name(long_file_name* lfn, + const direntry_t* direntry) +{ + int i, j, offset; + const unsigned char* pointer = (const unsigned char*)direntry; + + if (!is_long_name(direntry)) + return 1; + + if (pointer[0] & 0x40) { + lfn->sequence_number = pointer[0] & 0x3f; + lfn->checksum = pointer[13]; + lfn->name[0] = 0; + lfn->name[lfn->sequence_number * 13] = 0; + } else if ((pointer[0] & 0x3f) != --lfn->sequence_number) + return -1; + else if (pointer[13] != lfn->checksum) + return -2; + else if (pointer[12] || pointer[26] || pointer[27]) + return -3; + + offset = 13 * (lfn->sequence_number - 1); + for (i = 0, j = 1; i < 13; i++, j+=2) { + if (j == 11) + j = 14; + else if (j == 26) + j = 28; + + if (pointer[j+1] == 0) + lfn->name[offset + i] = pointer[j]; + else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0) + return -4; + else + lfn->name[offset + i] = 0; + } + + if (pointer[0] & 0x40) + lfn->len = offset + strlen((char*)lfn->name + offset); + + return 0; +} + +/* returns 0 if successful, >0 if no short_name, and <0 on error */ +static int parse_short_name(BDRVVVFATState* s, + long_file_name* lfn, direntry_t* direntry) +{ + int i, j; + + if (!is_short_name(direntry)) + return 1; + + for (j = 7; j >= 0 && direntry->name[j] == ' '; j--); + for (i = 0; i <= j; i++) { + if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f) + return -1; + else if (s->downcase_short_names) + lfn->name[i] = qemu_tolower(direntry->name[i]); + else + lfn->name[i] = direntry->name[i]; + } + + for (j = 2; j >= 0 && direntry->extension[j] == ' '; j--); + if (j >= 0) { + lfn->name[i++] = '.'; + lfn->name[i + j + 1] = '\0'; + for (;j >= 0; j--) { + if (direntry->extension[j] <= ' ' || direntry->extension[j] > 0x7f) + return -2; + else if (s->downcase_short_names) + lfn->name[i + j] = qemu_tolower(direntry->extension[j]); + else + lfn->name[i + j] = direntry->extension[j]; + } + } else + lfn->name[i + j + 1] = '\0'; + + lfn->len = strlen((char*)lfn->name); + + return 0; +} + +static inline uint32_t modified_fat_get(BDRVVVFATState* s, + unsigned int cluster) +{ + if (cluster < s->last_cluster_of_root_directory) { + if (cluster + 1 == s->last_cluster_of_root_directory) + return s->max_fat_value; + else + return cluster + 1; + } + + if (s->fat_type==32) { + uint32_t* entry=((uint32_t*)s->fat2)+cluster; + return le32_to_cpu(*entry); + } else if (s->fat_type==16) { + uint16_t* entry=((uint16_t*)s->fat2)+cluster; + return le16_to_cpu(*entry); + } else { + const uint8_t* x=s->fat2+cluster*3/2; + return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff; + } +} + +static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num) +{ + int was_modified = 0; + int i, dummy; + + if (s->qcow == NULL) + return 0; + + for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) + was_modified = s->qcow->drv->bdrv_is_allocated(s->qcow, + cluster2sector(s, cluster_num) + i, 1, &dummy); + + return was_modified; +} + +static const char* get_basename(const char* path) +{ + char* basename = strrchr(path, '/'); + if (basename == NULL) + return path; + else + return basename + 1; /* strip '/' */ +} + +/* + * The array s->used_clusters holds the states of the clusters. If it is + * part of a file, it has bit 2 set, in case of a directory, bit 1. If it + * was modified, bit 3 is set. + * If any cluster is allocated, but not part of a file or directory, this + * driver refuses to commit. + */ +typedef enum { + USED_DIRECTORY = 1, USED_FILE = 2, USED_ANY = 3, USED_ALLOCATED = 4 +} used_t; + +/* + * get_cluster_count_for_direntry() not only determines how many clusters + * are occupied by direntry, but also if it was renamed or modified. + * + * A file is thought to be renamed *only* if there already was a file with + * exactly the same first cluster, but a different name. + * + * Further, the files/directories handled by this function are + * assumed to be *not* deleted (and *only* those). + */ +static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, + direntry_t* direntry, const char* path) +{ + /* + * This is a little bit tricky: + * IF the guest OS just inserts a cluster into the file chain, + * and leaves the rest alone, (i.e. the original file had clusters + * 15 -> 16, but now has 15 -> 32 -> 16), then the following happens: + * + * - do_commit will write the cluster into the file at the given + * offset, but + * + * - the cluster which is overwritten should be moved to a later + * position in the file. + * + * I am not aware that any OS does something as braindead, but this + * situation could happen anyway when not committing for a long time. + * Just to be sure that this does not bite us, detect it, and copy the + * contents of the clusters to-be-overwritten into the qcow. + */ + int copy_it = 0; + int was_modified = 0; + int32_t ret = 0; + + uint32_t cluster_num = begin_of_direntry(direntry); + uint32_t offset = 0; + int first_mapping_index = -1; + mapping_t* mapping = NULL; + const char* basename2 = NULL; + + vvfat_close_current_file(s); + + /* the root directory */ + if (cluster_num == 0) + return 0; + + /* write support */ + if (s->qcow) { + basename2 = get_basename(path); + + mapping = find_mapping_for_cluster(s, cluster_num); + + if (mapping) { + const char* basename; + + assert(mapping->mode & MODE_DELETED); + mapping->mode &= ~MODE_DELETED; + + basename = get_basename(mapping->path); + + assert(mapping->mode & MODE_NORMAL); + + /* rename */ + if (strcmp(basename, basename2)) + schedule_rename(s, cluster_num, strdup(path)); + } else if (is_file(direntry)) + /* new file */ + schedule_new_file(s, strdup(path), cluster_num); + else { + assert(0); + return 0; + } + } + + while(1) { + if (s->qcow) { + if (!copy_it && cluster_was_modified(s, cluster_num)) { + if (mapping == NULL || + mapping->begin > cluster_num || + mapping->end <= cluster_num) + mapping = find_mapping_for_cluster(s, cluster_num); + + + if (mapping && + (mapping->mode & MODE_DIRECTORY) == 0) { + + /* was modified in qcow */ + if (offset != mapping->info.file.offset + s->cluster_size + * (cluster_num - mapping->begin)) { + /* offset of this cluster in file chain has changed */ + assert(0); + copy_it = 1; + } else if (offset == 0) { + const char* basename = get_basename(mapping->path); + + if (strcmp(basename, basename2)) + copy_it = 1; + first_mapping_index = array_index(&(s->mapping), mapping); + } + + if (mapping->first_mapping_index != first_mapping_index + && mapping->info.file.offset > 0) { + assert(0); + copy_it = 1; + } + + /* need to write out? */ + if (!was_modified && is_file(direntry)) { + was_modified = 1; + schedule_writeout(s, mapping->dir_index, offset); + } + } + } + + if (copy_it) { + int i, dummy; + /* + * This is horribly inefficient, but that is okay, since + * it is rarely executed, if at all. + */ + int64_t offset = cluster2sector(s, cluster_num); + + vvfat_close_current_file(s); + for (i = 0; i < s->sectors_per_cluster; i++) + if (!s->qcow->drv->bdrv_is_allocated(s->qcow, + offset + i, 1, &dummy)) { + if (vvfat_read(s->bs, + offset, s->cluster_buffer, 1)) + return -1; + if (s->qcow->drv->bdrv_write(s->qcow, + offset, s->cluster_buffer, 1)) + return -2; + } + } + } + + ret++; + if (s->used_clusters[cluster_num] & USED_ANY) + return 0; + s->used_clusters[cluster_num] = USED_FILE; + + cluster_num = modified_fat_get(s, cluster_num); + + if (fat_eof(s, cluster_num)) + return ret; + else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16) + return -1; + + offset += s->cluster_size; + } +} + +/* + * This function looks at the modified data (qcow). + * It returns 0 upon inconsistency or error, and the number of clusters + * used by the directory, its subdirectories and their files. + */ +static int check_directory_consistency(BDRVVVFATState *s, + int cluster_num, const char* path) +{ + int ret = 0; + unsigned char* cluster = qemu_malloc(s->cluster_size); + direntry_t* direntries = (direntry_t*)cluster; + mapping_t* mapping = find_mapping_for_cluster(s, cluster_num); + + long_file_name lfn; + int path_len = strlen(path); + char path2[PATH_MAX]; + + assert(path_len < PATH_MAX); /* len was tested before! */ + pstrcpy(path2, sizeof(path2), path); + path2[path_len] = '/'; + path2[path_len + 1] = '\0'; + + if (mapping) { + const char* basename = get_basename(mapping->path); + const char* basename2 = get_basename(path); + + assert(mapping->mode & MODE_DIRECTORY); + + assert(mapping->mode & MODE_DELETED); + mapping->mode &= ~MODE_DELETED; + + if (strcmp(basename, basename2)) + schedule_rename(s, cluster_num, strdup(path)); + } else + /* new directory */ + schedule_mkdir(s, cluster_num, strdup(path)); + + lfn_init(&lfn); + do { + int i; + int subret = 0; + + ret++; + + if (s->used_clusters[cluster_num] & USED_ANY) { + fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num); + return 0; + } + s->used_clusters[cluster_num] = USED_DIRECTORY; + +DLOG(fprintf(stderr, "read cluster %d (sector %d)\n", (int)cluster_num, (int)cluster2sector(s, cluster_num))); + subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster, + s->sectors_per_cluster); + if (subret) { + fprintf(stderr, "Error fetching direntries\n"); + fail: + free(cluster); + return 0; + } + + for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) { + int cluster_count = 0; + +DLOG(fprintf(stderr, "check direntry %d: \n", i); print_direntry(direntries + i)); + if (is_volume_label(direntries + i) || is_dot(direntries + i) || + is_free(direntries + i)) + continue; + + subret = parse_long_name(&lfn, direntries + i); + if (subret < 0) { + fprintf(stderr, "Error in long name\n"); + goto fail; + } + if (subret == 0 || is_free(direntries + i)) + continue; + + if (fat_chksum(direntries+i) != lfn.checksum) { + subret = parse_short_name(s, &lfn, direntries + i); + if (subret < 0) { + fprintf(stderr, "Error in short name (%d)\n", subret); + goto fail; + } + if (subret > 0 || !strcmp((char*)lfn.name, ".") + || !strcmp((char*)lfn.name, "..")) + continue; + } + lfn.checksum = 0x100; /* cannot use long name twice */ + + if (path_len + 1 + lfn.len >= PATH_MAX) { + fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name); + goto fail; + } + pstrcpy(path2 + path_len + 1, sizeof(path2) - path_len - 1, + (char*)lfn.name); + + if (is_directory(direntries + i)) { + if (begin_of_direntry(direntries + i) == 0) { + DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i)); + goto fail; + } + cluster_count = check_directory_consistency(s, + begin_of_direntry(direntries + i), path2); + if (cluster_count == 0) { + DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i)); + goto fail; + } + } else if (is_file(direntries + i)) { + /* check file size with FAT */ + cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2); + if (cluster_count != + (le32_to_cpu(direntries[i].size) + s->cluster_size + - 1) / s->cluster_size) { + DLOG(fprintf(stderr, "Cluster count mismatch\n")); + goto fail; + } + } else + assert(0); /* cluster_count = 0; */ + + ret += cluster_count; + } + + cluster_num = modified_fat_get(s, cluster_num); + } while(!fat_eof(s, cluster_num)); + + free(cluster); + return ret; +} + +/* returns 1 on success */ +static int is_consistent(BDRVVVFATState* s) +{ + int i, check; + int used_clusters_count = 0; + +DLOG(checkpoint()); + /* + * - get modified FAT + * - compare the two FATs (TODO) + * - get buffer for marking used clusters + * - recurse direntries from root (using bs->bdrv_read to make + * sure to get the new data) + * - check that the FAT agrees with the size + * - count the number of clusters occupied by this directory and + * its files + * - check that the cumulative used cluster count agrees with the + * FAT + * - if all is fine, return number of used clusters + */ + if (s->fat2 == NULL) { + int size = 0x200 * s->sectors_per_fat; + s->fat2 = qemu_malloc(size); + memcpy(s->fat2, s->fat.pointer, size); + } + check = vvfat_read(s->bs, + s->first_sectors_number, s->fat2, s->sectors_per_fat); + if (check) { + fprintf(stderr, "Could not copy fat\n"); + return 0; + } + assert (s->used_clusters); + for (i = 0; i < sector2cluster(s, s->sector_count); i++) + s->used_clusters[i] &= ~USED_ANY; + + clear_commits(s); + + /* mark every mapped file/directory as deleted. + * (check_directory_consistency() will unmark those still present). */ + if (s->qcow) + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->first_mapping_index < 0) + mapping->mode |= MODE_DELETED; + } + + used_clusters_count = check_directory_consistency(s, 0, s->path); + if (used_clusters_count <= 0) { + DLOG(fprintf(stderr, "problem in directory\n")); + return 0; + } + + check = s->last_cluster_of_root_directory; + for (i = check; i < sector2cluster(s, s->sector_count); i++) { + if (modified_fat_get(s, i)) { + if(!s->used_clusters[i]) { + DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i)); + return 0; + } + check++; + } + + if (s->used_clusters[i] == USED_ALLOCATED) { + /* allocated, but not used... */ + DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i)); + return 0; + } + } + + if (check != used_clusters_count) + return 0; + + return used_clusters_count; +} + +static inline void adjust_mapping_indices(BDRVVVFATState* s, + int offset, int adjust) +{ + int i; + + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + +#define ADJUST_MAPPING_INDEX(name) \ + if (mapping->name >= offset) \ + mapping->name += adjust + + ADJUST_MAPPING_INDEX(first_mapping_index); + if (mapping->mode & MODE_DIRECTORY) + ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index); + } +} + +/* insert or update mapping */ +static mapping_t* insert_mapping(BDRVVVFATState* s, + uint32_t begin, uint32_t end) +{ + /* + * - find mapping where mapping->begin >= begin, + * - if mapping->begin > begin: insert + * - adjust all references to mappings! + * - else: adjust + * - replace name + */ + int index = find_mapping_for_cluster_aux(s, begin, 0, s->mapping.next); + mapping_t* mapping = NULL; + mapping_t* first_mapping = array_get(&(s->mapping), 0); + + if (index < s->mapping.next && (mapping = array_get(&(s->mapping), index)) + && mapping->begin < begin) { + mapping->end = begin; + index++; + mapping = array_get(&(s->mapping), index); + } + if (index >= s->mapping.next || mapping->begin > begin) { + mapping = array_insert(&(s->mapping), index, 1); + mapping->path = NULL; + adjust_mapping_indices(s, index, +1); + } + + mapping->begin = begin; + mapping->end = end; + +DLOG(mapping_t* next_mapping; +assert(index + 1 >= s->mapping.next || +((next_mapping = array_get(&(s->mapping), index + 1)) && + next_mapping->begin >= end))); + + if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer) + s->current_mapping = array_get(&(s->mapping), + s->current_mapping - first_mapping); + + return mapping; +} + +static int remove_mapping(BDRVVVFATState* s, int mapping_index) +{ + mapping_t* mapping = array_get(&(s->mapping), mapping_index); + mapping_t* first_mapping = array_get(&(s->mapping), 0); + + /* free mapping */ + if (mapping->first_mapping_index < 0) + free(mapping->path); + + /* remove from s->mapping */ + array_remove(&(s->mapping), mapping_index); + + /* adjust all references to mappings */ + adjust_mapping_indices(s, mapping_index, -1); + + if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer) + s->current_mapping = array_get(&(s->mapping), + s->current_mapping - first_mapping); + + return 0; +} + +static void adjust_dirindices(BDRVVVFATState* s, int offset, int adjust) +{ + int i; + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->dir_index >= offset) + mapping->dir_index += adjust; + if ((mapping->mode & MODE_DIRECTORY) && + mapping->info.dir.first_dir_index >= offset) + mapping->info.dir.first_dir_index += adjust; + } +} + +static direntry_t* insert_direntries(BDRVVVFATState* s, + int dir_index, int count) +{ + /* + * make room in s->directory, + * adjust_dirindices + */ + direntry_t* result = array_insert(&(s->directory), dir_index, count); + if (result == NULL) + return NULL; + adjust_dirindices(s, dir_index, count); + return result; +} + +static int remove_direntries(BDRVVVFATState* s, int dir_index, int count) +{ + int ret = array_remove_slice(&(s->directory), dir_index, count); + if (ret) + return ret; + adjust_dirindices(s, dir_index, -count); + return 0; +} + +/* + * Adapt the mappings of the cluster chain starting at first cluster + * (i.e. if a file starts at first_cluster, the chain is followed according + * to the modified fat, and the corresponding entries in s->mapping are + * adjusted) + */ +static int commit_mappings(BDRVVVFATState* s, + uint32_t first_cluster, int dir_index) +{ + mapping_t* mapping = find_mapping_for_cluster(s, first_cluster); + direntry_t* direntry = array_get(&(s->directory), dir_index); + uint32_t cluster = first_cluster; + + vvfat_close_current_file(s); + + assert(mapping); + assert(mapping->begin == first_cluster); + mapping->first_mapping_index = -1; + mapping->dir_index = dir_index; + mapping->mode = (dir_index <= 0 || is_directory(direntry)) ? + MODE_DIRECTORY : MODE_NORMAL; + + while (!fat_eof(s, cluster)) { + uint32_t c, c1; + + for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1; + c = c1, c1 = modified_fat_get(s, c1)); + + c++; + if (c > mapping->end) { + int index = array_index(&(s->mapping), mapping); + int i, max_i = s->mapping.next - index; + for (i = 1; i < max_i && mapping[i].begin < c; i++); + while (--i > 0) + remove_mapping(s, index + 1); + } + assert(mapping == array_get(&(s->mapping), s->mapping.next - 1) + || mapping[1].begin >= c); + mapping->end = c; + + if (!fat_eof(s, c1)) { + int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next); + mapping_t* next_mapping = i >= s->mapping.next ? NULL : + array_get(&(s->mapping), i); + + if (next_mapping == NULL || next_mapping->begin > c1) { + int i1 = array_index(&(s->mapping), mapping); + + next_mapping = insert_mapping(s, c1, c1+1); + + if (c1 < c) + i1++; + mapping = array_get(&(s->mapping), i1); + } + + next_mapping->dir_index = mapping->dir_index; + next_mapping->first_mapping_index = + mapping->first_mapping_index < 0 ? + array_index(&(s->mapping), mapping) : + mapping->first_mapping_index; + next_mapping->path = mapping->path; + next_mapping->mode = mapping->mode; + next_mapping->read_only = mapping->read_only; + if (mapping->mode & MODE_DIRECTORY) { + next_mapping->info.dir.parent_mapping_index = + mapping->info.dir.parent_mapping_index; + next_mapping->info.dir.first_dir_index = + mapping->info.dir.first_dir_index + + 0x10 * s->sectors_per_cluster * + (mapping->end - mapping->begin); + } else + next_mapping->info.file.offset = mapping->info.file.offset + + mapping->end - mapping->begin; + + mapping = next_mapping; + } + + cluster = c1; + } + + return 0; +} + +static int commit_direntries(BDRVVVFATState* s, + int dir_index, int parent_mapping_index) +{ + direntry_t* direntry = array_get(&(s->directory), dir_index); + uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry); + mapping_t* mapping = find_mapping_for_cluster(s, first_cluster); + + int factor = 0x10 * s->sectors_per_cluster; + int old_cluster_count, new_cluster_count; + int current_dir_index = mapping->info.dir.first_dir_index; + int first_dir_index = current_dir_index; + int ret, i; + uint32_t c; + +DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapping->path, parent_mapping_index)); + + assert(direntry); + assert(mapping); + assert(mapping->begin == first_cluster); + assert(mapping->info.dir.first_dir_index < s->directory.next); + assert(mapping->mode & MODE_DIRECTORY); + assert(dir_index == 0 || is_directory(direntry)); + + mapping->info.dir.parent_mapping_index = parent_mapping_index; + + if (first_cluster == 0) { + old_cluster_count = new_cluster_count = + s->last_cluster_of_root_directory; + } else { + for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c); + c = fat_get(s, c)) + old_cluster_count++; + + for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c); + c = modified_fat_get(s, c)) + new_cluster_count++; + } + + if (new_cluster_count > old_cluster_count) { + if (insert_direntries(s, + current_dir_index + factor * old_cluster_count, + factor * (new_cluster_count - old_cluster_count)) == NULL) + return -1; + } else if (new_cluster_count < old_cluster_count) + remove_direntries(s, + current_dir_index + factor * new_cluster_count, + factor * (old_cluster_count - new_cluster_count)); + + for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) { + void* direntry = array_get(&(s->directory), current_dir_index); + int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry, + s->sectors_per_cluster); + if (ret) + return ret; + assert(!strncmp(s->directory.pointer, "QEMU", 4)); + current_dir_index += factor; + } + + ret = commit_mappings(s, first_cluster, dir_index); + if (ret) + return ret; + + /* recurse */ + for (i = 0; i < factor * new_cluster_count; i++) { + direntry = array_get(&(s->directory), first_dir_index + i); + if (is_directory(direntry) && !is_dot(direntry)) { + mapping = find_mapping_for_cluster(s, first_cluster); + assert(mapping->mode & MODE_DIRECTORY); + ret = commit_direntries(s, first_dir_index + i, + array_index(&(s->mapping), mapping)); + if (ret) + return ret; + } + } + + return 0; +} + +/* commit one file (adjust contents, adjust mapping), + return first_mapping_index */ +static int commit_one_file(BDRVVVFATState* s, + int dir_index, uint32_t offset) +{ + direntry_t* direntry = array_get(&(s->directory), dir_index); + uint32_t c = begin_of_direntry(direntry); + uint32_t first_cluster = c; + mapping_t* mapping = find_mapping_for_cluster(s, c); + uint32_t size = filesize_of_direntry(direntry); + char* cluster = qemu_malloc(s->cluster_size); + uint32_t i; + int fd = 0; + + assert(offset < size); + assert((offset % s->cluster_size) == 0); + + for (i = s->cluster_size; i < offset; i += s->cluster_size) + c = modified_fat_get(s, c); + + fd = open(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666); + if (fd < 0) { + fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path, + strerror(errno), errno); + return fd; + } + if (offset > 0) + if (lseek(fd, offset, SEEK_SET) != offset) + return -3; + + while (offset < size) { + uint32_t c1; + int rest_size = (size - offset > s->cluster_size ? + s->cluster_size : size - offset); + int ret; + + c1 = modified_fat_get(s, c); + + assert((size - offset == 0 && fat_eof(s, c)) || + (size > offset && c >=2 && !fat_eof(s, c))); + + ret = vvfat_read(s->bs, cluster2sector(s, c), + (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200); + + if (ret < 0) + return ret; + + if (write(fd, cluster, rest_size) < 0) + return -2; + + offset += rest_size; + c = c1; + } + + ftruncate(fd, size); + close(fd); + + return commit_mappings(s, first_cluster, dir_index); +} + +#ifdef DEBUG +/* test, if all mappings point to valid direntries */ +static void check1(BDRVVVFATState* s) +{ + int i; + for (i = 0; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->mode & MODE_DELETED) { + fprintf(stderr, "deleted\n"); + continue; + } + assert(mapping->dir_index >= 0); + assert(mapping->dir_index < s->directory.next); + direntry_t* direntry = array_get(&(s->directory), mapping->dir_index); + assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0); + if (mapping->mode & MODE_DIRECTORY) { + assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next); + assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0); + } + } +} + +/* test, if all direntries have mappings */ +static void check2(BDRVVVFATState* s) +{ + int i; + int first_mapping = -1; + + for (i = 0; i < s->directory.next; i++) { + direntry_t* direntry = array_get(&(s->directory), i); + + if (is_short_name(direntry) && begin_of_direntry(direntry)) { + mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry)); + assert(mapping); + assert(mapping->dir_index == i || is_dot(direntry)); + assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry)); + } + + if ((i % (0x10 * s->sectors_per_cluster)) == 0) { + /* cluster start */ + int j, count = 0; + + for (j = 0; j < s->mapping.next; j++) { + mapping_t* mapping = array_get(&(s->mapping), j); + if (mapping->mode & MODE_DELETED) + continue; + if (mapping->mode & MODE_DIRECTORY) { + if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) { + assert(++count == 1); + if (mapping->first_mapping_index == -1) + first_mapping = array_index(&(s->mapping), mapping); + else + assert(first_mapping == mapping->first_mapping_index); + if (mapping->info.dir.parent_mapping_index < 0) + assert(j == 0); + else { + mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index); + assert(parent->mode & MODE_DIRECTORY); + assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index); + } + } + } + } + if (count == 0) + first_mapping = -1; + } + } +} +#endif + +static int handle_renames_and_mkdirs(BDRVVVFATState* s) +{ + int i; + +#ifdef DEBUG + fprintf(stderr, "handle_renames\n"); + for (i = 0; i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); + fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action); + } +#endif + + for (i = 0; i < s->commits.next;) { + commit_t* commit = array_get(&(s->commits), i); + if (commit->action == ACTION_RENAME) { + mapping_t* mapping = find_mapping_for_cluster(s, + commit->param.rename.cluster); + char* old_path = mapping->path; + + assert(commit->path); + mapping->path = commit->path; + if (rename(old_path, mapping->path)) + return -2; + + if (mapping->mode & MODE_DIRECTORY) { + int l1 = strlen(mapping->path); + int l2 = strlen(old_path); + int diff = l1 - l2; + direntry_t* direntry = array_get(&(s->directory), + mapping->info.dir.first_dir_index); + uint32_t c = mapping->begin; + int i = 0; + + /* recurse */ + while (!fat_eof(s, c)) { + do { + direntry_t* d = direntry + i; + + if (is_file(d) || (is_directory(d) && !is_dot(d))) { + mapping_t* m = find_mapping_for_cluster(s, + begin_of_direntry(d)); + int l = strlen(m->path); + char* new_path = qemu_malloc(l + diff + 1); + + assert(!strncmp(m->path, mapping->path, l2)); + + pstrcpy(new_path, l + diff + 1, mapping->path); + pstrcpy(new_path + l1, l + diff + 1 - l1, + m->path + l2); + + schedule_rename(s, m->begin, new_path); + } + i++; + } while((i % (0x10 * s->sectors_per_cluster)) != 0); + c = fat_get(s, c); + } + } + + free(old_path); + array_remove(&(s->commits), i); + continue; + } else if (commit->action == ACTION_MKDIR) { + mapping_t* mapping; + int j, parent_path_len; + +#ifdef __MINGW32__ + if (mkdir(commit->path)) + return -5; +#else + if (mkdir(commit->path, 0755)) + return -5; +#endif + + mapping = insert_mapping(s, commit->param.mkdir.cluster, + commit->param.mkdir.cluster + 1); + if (mapping == NULL) + return -6; + + mapping->mode = MODE_DIRECTORY; + mapping->read_only = 0; + mapping->path = commit->path; + j = s->directory.next; + assert(j); + insert_direntries(s, s->directory.next, + 0x10 * s->sectors_per_cluster); + mapping->info.dir.first_dir_index = j; + + parent_path_len = strlen(commit->path) + - strlen(get_basename(commit->path)) - 1; + for (j = 0; j < s->mapping.next; j++) { + mapping_t* m = array_get(&(s->mapping), j); + if (m->first_mapping_index < 0 && m != mapping && + !strncmp(m->path, mapping->path, parent_path_len) && + strlen(m->path) == parent_path_len) + break; + } + assert(j < s->mapping.next); + mapping->info.dir.parent_mapping_index = j; + + array_remove(&(s->commits), i); + continue; + } + + i++; + } + return 0; +} + +/* + * TODO: make sure that the short name is not matching *another* file + */ +static int handle_commits(BDRVVVFATState* s) +{ + int i, fail = 0; + + vvfat_close_current_file(s); + + for (i = 0; !fail && i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); + switch(commit->action) { + case ACTION_RENAME: case ACTION_MKDIR: + assert(0); + fail = -2; + break; + case ACTION_WRITEOUT: { + direntry_t* entry = array_get(&(s->directory), + commit->param.writeout.dir_index); + uint32_t begin = begin_of_direntry(entry); + mapping_t* mapping = find_mapping_for_cluster(s, begin); + + assert(mapping); + assert(mapping->begin == begin); + assert(commit->path == NULL); + + if (commit_one_file(s, commit->param.writeout.dir_index, + commit->param.writeout.modified_offset)) + fail = -3; + + break; + } + case ACTION_NEW_FILE: { + int begin = commit->param.new_file.first_cluster; + mapping_t* mapping = find_mapping_for_cluster(s, begin); + direntry_t* entry; + int i; + + /* find direntry */ + for (i = 0; i < s->directory.next; i++) { + entry = array_get(&(s->directory), i); + if (is_file(entry) && begin_of_direntry(entry) == begin) + break; + } + + if (i >= s->directory.next) { + fail = -6; + continue; + } + + /* make sure there exists an initial mapping */ + if (mapping && mapping->begin != begin) { + mapping->end = begin; + mapping = NULL; + } + if (mapping == NULL) { + mapping = insert_mapping(s, begin, begin+1); + } + /* most members will be fixed in commit_mappings() */ + assert(commit->path); + mapping->path = commit->path; + mapping->read_only = 0; + mapping->mode = MODE_NORMAL; + mapping->info.file.offset = 0; + + if (commit_one_file(s, i, 0)) + fail = -7; + + break; + } + default: + assert(0); + } + } + if (i > 0 && array_remove_slice(&(s->commits), 0, i)) + return -1; + return fail; +} + +static int handle_deletes(BDRVVVFATState* s) +{ + int i, deferred = 1, deleted = 1; + + /* delete files corresponding to mappings marked as deleted */ + /* handle DELETEs and unused mappings (modified_fat_get(s, mapping->begin) == 0) */ + while (deferred && deleted) { + deferred = 0; + deleted = 0; + + for (i = 1; i < s->mapping.next; i++) { + mapping_t* mapping = array_get(&(s->mapping), i); + if (mapping->mode & MODE_DELETED) { + direntry_t* entry = array_get(&(s->directory), + mapping->dir_index); + + if (is_free(entry)) { + /* remove file/directory */ + if (mapping->mode & MODE_DIRECTORY) { + int j, next_dir_index = s->directory.next, + first_dir_index = mapping->info.dir.first_dir_index; + + if (rmdir(mapping->path) < 0) { + if (errno == ENOTEMPTY) { + deferred++; + continue; + } else + return -5; + } + + for (j = 1; j < s->mapping.next; j++) { + mapping_t* m = array_get(&(s->mapping), j); + if (m->mode & MODE_DIRECTORY && + m->info.dir.first_dir_index > + first_dir_index && + m->info.dir.first_dir_index < + next_dir_index) + next_dir_index = + m->info.dir.first_dir_index; + } + remove_direntries(s, first_dir_index, + next_dir_index - first_dir_index); + + deleted++; + } + } else { + if (unlink(mapping->path)) + return -4; + deleted++; + } + DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry)); + remove_mapping(s, i); + } + } + } + + return 0; +} + +/* + * synchronize mapping with new state: + * + * - copy FAT (with bdrv_read) + * - mark all filenames corresponding to mappings as deleted + * - recurse direntries from root (using bs->bdrv_read) + * - delete files corresponding to mappings marked as deleted + */ +static int do_commit(BDRVVVFATState* s) +{ + int ret = 0; + + /* the real meat are the commits. Nothing to do? Move along! */ + if (s->commits.next == 0) + return 0; + + vvfat_close_current_file(s); + + ret = handle_renames_and_mkdirs(s); + if (ret) { + fprintf(stderr, "Error handling renames (%d)\n", ret); + assert(0); + return ret; + } + + /* copy FAT (with bdrv_read) */ + memcpy(s->fat.pointer, s->fat2, 0x200 * s->sectors_per_fat); + + /* recurse direntries from root (using bs->bdrv_read) */ + ret = commit_direntries(s, 0, -1); + if (ret) { + fprintf(stderr, "Fatal: error while committing (%d)\n", ret); + assert(0); + return ret; + } + + ret = handle_commits(s); + if (ret) { + fprintf(stderr, "Error handling commits (%d)\n", ret); + assert(0); + return ret; + } + + ret = handle_deletes(s); + if (ret) { + fprintf(stderr, "Error deleting\n"); + assert(0); + return ret; + } + + s->qcow->drv->bdrv_make_empty(s->qcow); + + memset(s->used_clusters, 0, sector2cluster(s, s->sector_count)); + +DLOG(checkpoint()); + return 0; +} + +static int try_commit(BDRVVVFATState* s) +{ + vvfat_close_current_file(s); +DLOG(checkpoint()); + if(!is_consistent(s)) + return -1; + return do_commit(s); +} + +static int vvfat_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors) +{ + BDRVVVFATState *s = bs->opaque; + int i, ret; + +DLOG(checkpoint()); + + vvfat_close_current_file(s); + + /* + * Some sanity checks: + * - do not allow writing to the boot sector + * - do not allow to write non-ASCII filenames + */ + + if (sector_num < s->first_sectors_number) + return -1; + + for (i = sector2cluster(s, sector_num); + i <= sector2cluster(s, sector_num + nb_sectors - 1);) { + mapping_t* mapping = find_mapping_for_cluster(s, i); + if (mapping) { + if (mapping->read_only) { + fprintf(stderr, "Tried to write to write-protected file %s\n", + mapping->path); + return -1; + } + + if (mapping->mode & MODE_DIRECTORY) { + int begin = cluster2sector(s, i); + int end = begin + s->sectors_per_cluster, k; + int dir_index; + const direntry_t* direntries; + long_file_name lfn; + + lfn_init(&lfn); + + if (begin < sector_num) + begin = sector_num; + if (end > sector_num + nb_sectors) + end = sector_num + nb_sectors; + dir_index = mapping->dir_index + + 0x10 * (begin - mapping->begin * s->sectors_per_cluster); + direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num)); + + for (k = 0; k < (end - begin) * 0x10; k++) { + /* do not allow non-ASCII filenames */ + if (parse_long_name(&lfn, direntries + k) < 0) { + fprintf(stderr, "Warning: non-ASCII filename\n"); + return -1; + } + /* no access to the direntry of a read-only file */ + else if (is_short_name(direntries+k) && + (direntries[k].attributes & 1)) { + if (memcmp(direntries + k, + array_get(&(s->directory), dir_index + k), + sizeof(direntry_t))) { + fprintf(stderr, "Warning: tried to write to write-protected file\n"); + return -1; + } + } + } + } + i = mapping->end; + } else + i++; + } + + /* + * Use qcow backend. Commit later. + */ +DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors)); + ret = s->qcow->drv->bdrv_write(s->qcow, sector_num, buf, nb_sectors); + if (ret < 0) { + fprintf(stderr, "Error writing to qcow backend\n"); + return ret; + } + + for (i = sector2cluster(s, sector_num); + i <= sector2cluster(s, sector_num + nb_sectors - 1); i++) + if (i >= 0) + s->used_clusters[i] |= USED_ALLOCATED; + +DLOG(checkpoint()); + /* TODO: add timeout */ + try_commit(s); + +DLOG(checkpoint()); + return 0; +} + +static int vvfat_is_allocated(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int* n) +{ + BDRVVVFATState* s = bs->opaque; + *n = s->sector_count - sector_num; + if (*n > nb_sectors) + *n = nb_sectors; + else if (*n < 0) + return 0; + return 1; +} + +static int write_target_commit(BlockDriverState *bs, int64_t sector_num, + const uint8_t* buffer, int nb_sectors) { + BDRVVVFATState* s = bs->opaque; + return try_commit(s); +} + +static void write_target_close(BlockDriverState *bs) { + BDRVVVFATState* s = bs->opaque; + bdrv_delete(s->qcow); + free(s->qcow_filename); +} + +static BlockDriver vvfat_write_target = { + "vvfat_write_target", 0, NULL, NULL, NULL, + write_target_commit, + write_target_close, + NULL, NULL, NULL +}; + +static int enable_write_target(BDRVVVFATState *s) +{ + int size = sector2cluster(s, s->sector_count); + s->used_clusters = calloc(size, 1); + + array_init(&(s->commits), sizeof(commit_t)); + + s->qcow_filename = qemu_malloc(1024); + get_tmp_filename(s->qcow_filename, 1024); + if (bdrv_create(bdrv_find_format("qcow"), + s->qcow_filename, s->sector_count, "fat:", 0) < 0) + return -1; + s->qcow = bdrv_new(""); + if (s->qcow == NULL || bdrv_open(s->qcow, s->qcow_filename, 0) < 0) + return -1; + +#ifndef _WIN32 + unlink(s->qcow_filename); +#endif + + s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1); + s->bs->backing_hd->drv = &vvfat_write_target; + s->bs->backing_hd->opaque = s; + + return 0; +} + +static void vvfat_close(BlockDriverState *bs) +{ + BDRVVVFATState *s = bs->opaque; + + vvfat_close_current_file(s); + array_free(&(s->fat)); + array_free(&(s->directory)); + array_free(&(s->mapping)); + if(s->cluster_buffer) + free(s->cluster_buffer); +} + +static BlockDriver bdrv_vvfat = { + .format_name = "vvfat", + .instance_size = sizeof(BDRVVVFATState), + .bdrv_open = vvfat_open, + .bdrv_read = vvfat_read, + .bdrv_write = vvfat_write, + .bdrv_close = vvfat_close, + .bdrv_is_allocated = vvfat_is_allocated, + .protocol_name = "fat", +}; + +static void bdrv_vvfat_init(void) +{ + bdrv_register(&bdrv_vvfat); +} + +block_init(bdrv_vvfat_init); + +#ifdef DEBUG +static void checkpoint(void) { + assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2); + check1(vvv); + check2(vvv); + assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY)); +#if 0 + if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf) + fprintf(stderr, "Nonono!\n"); + mapping_t* mapping; + direntry_t* direntry; + assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next); + assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next); + if (vvv->mapping.next<47) + return; + assert((mapping = array_get(&(vvv->mapping), 47))); + assert(mapping->dir_index < vvv->directory.next); + direntry = array_get(&(vvv->directory), mapping->dir_index); + assert(!memcmp(direntry->name, "USB H ", 11) || direntry->name[0]==0); +#endif + return; + /* avoid compiler warnings: */ + hexdump(NULL, 100); + remove_mapping(vvv, NULL); + print_mapping(NULL); + print_direntry(NULL); +} +#endif diff --git a/configure b/configure index d3788fcc1..8fbe35b00 100755 --- a/configure +++ b/configure @@ -2029,7 +2029,7 @@ done # for target in $targets # build tree in object directory if source path is different from current one if test "$source_path_used" = "yes" ; then - DIRS="tests tests/cris slirp audio" + DIRS="tests tests/cris slirp audio block" FILES="Makefile tests/Makefile" FILES="$FILES tests/cris/Makefile tests/cris/.gdbinit" FILES="$FILES tests/test-mmap.c"