]> git.proxmox.com Git - qemu.git/commitdiff
Move block drivers into their own directory
authorAnthony Liguori <aliguori@us.ibm.com>
Sat, 9 May 2009 22:14:19 +0000 (17:14 -0500)
committerAnthony Liguori <aliguori@us.ibm.com>
Thu, 14 May 2009 21:13:46 +0000 (16:13 -0500)
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
28 files changed:
Makefile
block-bochs.c [deleted file]
block-cloop.c [deleted file]
block-cow.c [deleted file]
block-dmg.c [deleted file]
block-nbd.c [deleted file]
block-parallels.c [deleted file]
block-qcow.c [deleted file]
block-qcow2.c [deleted file]
block-raw-posix.c [deleted file]
block-raw-win32.c [deleted file]
block-vmdk.c [deleted file]
block-vpc.c [deleted file]
block-vvfat.c [deleted file]
block/bochs.c [new file with mode: 0644]
block/cloop.c [new file with mode: 0644]
block/cow.c [new file with mode: 0644]
block/dmg.c [new file with mode: 0644]
block/nbd.c [new file with mode: 0644]
block/parallels.c [new file with mode: 0644]
block/qcow.c [new file with mode: 0644]
block/qcow2.c [new file with mode: 0644]
block/raw-posix.c [new file with mode: 0644]
block/raw-win32.c [new file with mode: 0644]
block/vmdk.c [new file with mode: 0644]
block/vpc.c [new file with mode: 0644]
block/vvfat.c [new file with mode: 0644]
configure

index 046c74369ae94b2de5d71a85ba0b30b2242a9bff..22d200671b04cfbb7e098c1374e78c6ebf708a43 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -64,18 +64,18 @@ recurse-all: $(SUBDIR_RULES)
 # BLOCK_OBJS is code used by both qemu system emulation and qemu-img
 
 BLOCK_OBJS=cutils.o cache-utils.o qemu-malloc.o module.o
-BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o
-BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
-BLOCK_OBJS+=block-qcow2.o block-parallels.o block-nbd.o
+BLOCK_OBJS+=block/cow.o block/qcow.o aes.o block/vmdk.o block/cloop.o
+BLOCK_OBJS+=block/dmg.o block/bochs.o block/vpc.o block/vvfat.o
+BLOCK_OBJS+=block/qcow2.o block/parallels.o block/nbd.o
 BLOCK_OBJS+=nbd.o block.o aio.o
 
 ifdef CONFIG_WIN32
-BLOCK_OBJS += block-raw-win32.o
+BLOCK_OBJS += block/raw-win32.o
 else
 ifdef CONFIG_AIO
 BLOCK_OBJS += posix-aio-compat.o
 endif
-BLOCK_OBJS += block-raw-posix.o
+BLOCK_OBJS += block/raw-posix.o
 endif
 
 ######################################################################
@@ -234,7 +234,7 @@ clean:
 # avoid old build problems by removing potentially incorrect old files
        rm -f config.mak config.h op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
        rm -f *.o *.d *.a $(TOOLS) TAGS cscope.* *.pod *~ */*~
-       rm -f slirp/*.o slirp/*.d audio/*.o audio/*.d
+       rm -f slirp/*.o slirp/*.d audio/*.o audio/*.d block/*.o block/*.d
        $(MAKE) -C tests clean
        for d in $(TARGET_DIRS); do \
        $(MAKE) -C $$d $@ || exit 1 ; \
@@ -408,4 +408,4 @@ tarbin:
        $(mandir)/man8/qemu-nbd.8
 
 # Include automatically generated dependency files
--include $(wildcard *.d audio/*.d slirp/*.d)
+-include $(wildcard *.d audio/*.d slirp/*.d block/*.d)
diff --git a/block-bochs.c b/block-bochs.c
deleted file mode 100644 (file)
index bac81c4..0000000
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Block driver for the various disk image formats used by Bochs
- * Currently only for "growing" type in read-only mode
- *
- * Copyright (c) 2005 Alex Beregszaszi
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-
-/**************************************************************/
-
-#define HEADER_MAGIC "Bochs Virtual HD Image"
-#define HEADER_VERSION 0x00020000
-#define HEADER_V1 0x00010000
-#define HEADER_SIZE 512
-
-#define REDOLOG_TYPE "Redolog"
-#define GROWING_TYPE "Growing"
-
-// not allocated: 0xffffffff
-
-// always little-endian
-struct bochs_header_v1 {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
-    uint32_t version;
-    uint32_t header; // size of header
-
-    union {
-       struct {
-           uint32_t catalog; // num of entries
-           uint32_t bitmap; // bitmap size
-           uint32_t extent; // extent size
-           uint64_t disk; // disk size
-           char padding[HEADER_SIZE - 64 - 8 - 20];
-       } redolog;
-       char padding[HEADER_SIZE - 64 - 8];
-    } extra;
-};
-
-// always little-endian
-struct bochs_header {
-    char magic[32]; // "Bochs Virtual HD Image"
-    char type[16]; // "Redolog"
-    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
-    uint32_t version;
-    uint32_t header; // size of header
-
-    union {
-       struct {
-           uint32_t catalog; // num of entries
-           uint32_t bitmap; // bitmap size
-           uint32_t extent; // extent size
-           uint32_t reserved; // for ???
-           uint64_t disk; // disk size
-           char padding[HEADER_SIZE - 64 - 8 - 24];
-       } redolog;
-       char padding[HEADER_SIZE - 64 - 8];
-    } extra;
-};
-
-typedef struct BDRVBochsState {
-    int fd;
-
-    uint32_t *catalog_bitmap;
-    int catalog_size;
-
-    int data_offset;
-
-    int bitmap_blocks;
-    int extent_blocks;
-    int extent_size;
-} BDRVBochsState;
-
-static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const struct bochs_header *bochs = (const void *)buf;
-
-    if (buf_size < HEADER_SIZE)
-       return 0;
-
-    if (!strcmp(bochs->magic, HEADER_MAGIC) &&
-       !strcmp(bochs->type, REDOLOG_TYPE) &&
-       !strcmp(bochs->subtype, GROWING_TYPE) &&
-       ((le32_to_cpu(bochs->version) == HEADER_VERSION) ||
-       (le32_to_cpu(bochs->version) == HEADER_V1)))
-       return 100;
-
-    return 0;
-}
-
-static int bochs_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVBochsState *s = bs->opaque;
-    int fd, i;
-    struct bochs_header bochs;
-    struct bochs_header_v1 header_v1;
-
-    fd = open(filename, O_RDWR | O_BINARY);
-    if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY);
-        if (fd < 0)
-            return -1;
-    }
-
-    bs->read_only = 1; // no write support yet
-
-    s->fd = fd;
-
-    if (read(fd, &bochs, sizeof(bochs)) != sizeof(bochs)) {
-        goto fail;
-    }
-
-    if (strcmp(bochs.magic, HEADER_MAGIC) ||
-        strcmp(bochs.type, REDOLOG_TYPE) ||
-        strcmp(bochs.subtype, GROWING_TYPE) ||
-       ((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
-       (le32_to_cpu(bochs.version) != HEADER_V1))) {
-        goto fail;
-    }
-
-    if (le32_to_cpu(bochs.version) == HEADER_V1) {
-      memcpy(&header_v1, &bochs, sizeof(bochs));
-      bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512;
-    } else {
-      bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
-    }
-
-    lseek(s->fd, le32_to_cpu(bochs.header), SEEK_SET);
-
-    s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
-    s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
-    if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
-       s->catalog_size * 4)
-       goto fail;
-    for (i = 0; i < s->catalog_size; i++)
-       le32_to_cpus(&s->catalog_bitmap[i]);
-
-    s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4);
-
-    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512;
-    s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512;
-
-    s->extent_size = le32_to_cpu(bochs.extra.redolog.extent);
-
-    return 0;
- fail:
-    close(fd);
-    return -1;
-}
-
-static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
-{
-    BDRVBochsState *s = bs->opaque;
-    int64_t offset = sector_num * 512;
-    int64_t extent_index, extent_offset, bitmap_offset, block_offset;
-    char bitmap_entry;
-
-    // seek to sector
-    extent_index = offset / s->extent_size;
-    extent_offset = (offset % s->extent_size) / 512;
-
-    if (s->catalog_bitmap[extent_index] == 0xffffffff)
-    {
-//     fprintf(stderr, "page not allocated [%x - %x:%x]\n",
-//         sector_num, extent_index, extent_offset);
-       return -1; // not allocated
-    }
-
-    bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
-       (s->extent_blocks + s->bitmap_blocks));
-    block_offset = bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
-
-//    fprintf(stderr, "sect: %x [ext i: %x o: %x] -> %x bitmap: %x block: %x\n",
-//     sector_num, extent_index, extent_offset,
-//     le32_to_cpu(s->catalog_bitmap[extent_index]),
-//     bitmap_offset, block_offset);
-
-    // read in bitmap for current extent
-    lseek(s->fd, bitmap_offset + (extent_offset / 8), SEEK_SET);
-
-    read(s->fd, &bitmap_entry, 1);
-
-    if (!((bitmap_entry >> (extent_offset % 8)) & 1))
-    {
-//     fprintf(stderr, "sector (%x) in bitmap not allocated\n",
-//         sector_num);
-       return -1; // not allocated
-    }
-
-    lseek(s->fd, block_offset, SEEK_SET);
-
-    return 0;
-}
-
-static int bochs_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVBochsState *s = bs->opaque;
-    int ret;
-
-    while (nb_sectors > 0) {
-       if (!seek_to_sector(bs, sector_num))
-       {
-           ret = read(s->fd, buf, 512);
-           if (ret != 512)
-               return -1;
-       }
-       else
-            memset(buf, 0, 512);
-        nb_sectors--;
-        sector_num++;
-        buf += 512;
-    }
-    return 0;
-}
-
-static void bochs_close(BlockDriverState *bs)
-{
-    BDRVBochsState *s = bs->opaque;
-    qemu_free(s->catalog_bitmap);
-    close(s->fd);
-}
-
-static BlockDriver bdrv_bochs = {
-    .format_name       = "bochs",
-    .instance_size     = sizeof(BDRVBochsState),
-    .bdrv_probe                = bochs_probe,
-    .bdrv_open         = bochs_open,
-    .bdrv_read         = bochs_read,
-    .bdrv_close                = bochs_close,
-};
-
-static void bdrv_bochs_init(void)
-{
-    bdrv_register(&bdrv_bochs);
-}
-
-block_init(bdrv_bochs_init);
diff --git a/block-cloop.c b/block-cloop.c
deleted file mode 100644 (file)
index 06c687e..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * QEMU Block driver for CLOOP images
- *
- * Copyright (c) 2004 Johannes E. Schindelin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-#include <zlib.h>
-
-typedef struct BDRVCloopState {
-    int fd;
-    uint32_t block_size;
-    uint32_t n_blocks;
-    uint64_t* offsets;
-    uint32_t sectors_per_block;
-    uint32_t current_block;
-    uint8_t *compressed_block;
-    uint8_t *uncompressed_block;
-    z_stream zstream;
-} BDRVCloopState;
-
-static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const char* magic_version_2_0="#!/bin/sh\n"
-       "#V2.0 Format\n"
-       "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n";
-    int length=strlen(magic_version_2_0);
-    if(length>buf_size)
-       length=buf_size;
-    if(!memcmp(magic_version_2_0,buf,length))
-       return 2;
-    return 0;
-}
-
-static int cloop_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVCloopState *s = bs->opaque;
-    uint32_t offsets_size,max_compressed_block_size=1,i;
-
-    s->fd = open(filename, O_RDONLY | O_BINARY);
-    if (s->fd < 0)
-        return -errno;
-    bs->read_only = 1;
-
-    /* read header */
-    if(lseek(s->fd,128,SEEK_SET)<0) {
-cloop_close:
-       close(s->fd);
-       return -1;
-    }
-    if(read(s->fd,&s->block_size,4)<4)
-       goto cloop_close;
-    s->block_size=be32_to_cpu(s->block_size);
-    if(read(s->fd,&s->n_blocks,4)<4)
-       goto cloop_close;
-    s->n_blocks=be32_to_cpu(s->n_blocks);
-
-    /* read offsets */
-    offsets_size=s->n_blocks*sizeof(uint64_t);
-    s->offsets=(uint64_t*)qemu_malloc(offsets_size);
-    if(read(s->fd,s->offsets,offsets_size)<offsets_size)
-       goto cloop_close;
-    for(i=0;i<s->n_blocks;i++) {
-       s->offsets[i]=be64_to_cpu(s->offsets[i]);
-       if(i>0) {
-           uint32_t size=s->offsets[i]-s->offsets[i-1];
-           if(size>max_compressed_block_size)
-               max_compressed_block_size=size;
-       }
-    }
-
-    /* initialize zlib engine */
-    s->compressed_block = qemu_malloc(max_compressed_block_size+1);
-    s->uncompressed_block = qemu_malloc(s->block_size);
-    if(inflateInit(&s->zstream) != Z_OK)
-       goto cloop_close;
-    s->current_block=s->n_blocks;
-
-    s->sectors_per_block = s->block_size/512;
-    bs->total_sectors = s->n_blocks*s->sectors_per_block;
-    return 0;
-}
-
-static inline int cloop_read_block(BDRVCloopState *s,int block_num)
-{
-    if(s->current_block != block_num) {
-       int ret;
-        uint32_t bytes = s->offsets[block_num+1]-s->offsets[block_num];
-
-       lseek(s->fd, s->offsets[block_num], SEEK_SET);
-        ret = read(s->fd, s->compressed_block, bytes);
-        if (ret != bytes)
-            return -1;
-
-       s->zstream.next_in = s->compressed_block;
-       s->zstream.avail_in = bytes;
-       s->zstream.next_out = s->uncompressed_block;
-       s->zstream.avail_out = s->block_size;
-       ret = inflateReset(&s->zstream);
-       if(ret != Z_OK)
-           return -1;
-       ret = inflate(&s->zstream, Z_FINISH);
-       if(ret != Z_STREAM_END || s->zstream.total_out != s->block_size)
-           return -1;
-
-       s->current_block = block_num;
-    }
-    return 0;
-}
-
-static int cloop_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVCloopState *s = bs->opaque;
-    int i;
-
-    for(i=0;i<nb_sectors;i++) {
-       uint32_t sector_offset_in_block=((sector_num+i)%s->sectors_per_block),
-           block_num=(sector_num+i)/s->sectors_per_block;
-       if(cloop_read_block(s, block_num) != 0)
-           return -1;
-       memcpy(buf+i*512,s->uncompressed_block+sector_offset_in_block*512,512);
-    }
-    return 0;
-}
-
-static void cloop_close(BlockDriverState *bs)
-{
-    BDRVCloopState *s = bs->opaque;
-    close(s->fd);
-    if(s->n_blocks>0)
-       free(s->offsets);
-    free(s->compressed_block);
-    free(s->uncompressed_block);
-    inflateEnd(&s->zstream);
-}
-
-static BlockDriver bdrv_cloop = {
-    .format_name       = "cloop",
-    .instance_size     = sizeof(BDRVCloopState),
-    .bdrv_probe                = cloop_probe,
-    .bdrv_open         = cloop_open,
-    .bdrv_read         = cloop_read,
-    .bdrv_close                = cloop_close,
-};
-
-static void bdrv_cloop_init(void)
-{
-    bdrv_register(&bdrv_cloop);
-}
-
-block_init(bdrv_cloop_init);
diff --git a/block-cow.c b/block-cow.c
deleted file mode 100644 (file)
index 94b3549..0000000
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Block driver for the COW format
- *
- * Copyright (c) 2004 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef _WIN32
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-#include <sys/mman.h>
-
-/**************************************************************/
-/* COW block driver using file system holes */
-
-/* user mode linux compatible COW file */
-#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
-#define COW_VERSION 2
-
-struct cow_header_v2 {
-    uint32_t magic;
-    uint32_t version;
-    char backing_file[1024];
-    int32_t mtime;
-    uint64_t size;
-    uint32_t sectorsize;
-};
-
-typedef struct BDRVCowState {
-    int fd;
-    uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */
-    uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */
-    int cow_bitmap_size;
-    int64_t cow_sectors_offset;
-} BDRVCowState;
-
-static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const struct cow_header_v2 *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(struct cow_header_v2) &&
-        be32_to_cpu(cow_header->magic) == COW_MAGIC &&
-        be32_to_cpu(cow_header->version) == COW_VERSION)
-        return 100;
-    else
-        return 0;
-}
-
-static int cow_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVCowState *s = bs->opaque;
-    int fd;
-    struct cow_header_v2 cow_header;
-    int64_t size;
-
-    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
-    if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
-        if (fd < 0)
-            return -1;
-    }
-    s->fd = fd;
-    /* see if it is a cow image */
-    if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) {
-        goto fail;
-    }
-
-    if (be32_to_cpu(cow_header.magic) != COW_MAGIC ||
-        be32_to_cpu(cow_header.version) != COW_VERSION) {
-        goto fail;
-    }
-
-    /* cow image found */
-    size = be64_to_cpu(cow_header.size);
-    bs->total_sectors = size / 512;
-
-    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
-            cow_header.backing_file);
-
-    /* mmap the bitmap */
-    s->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
-    s->cow_bitmap_addr = (void *)mmap(get_mmap_addr(s->cow_bitmap_size),
-                                      s->cow_bitmap_size,
-                                      PROT_READ | PROT_WRITE,
-                                      MAP_SHARED, s->fd, 0);
-    if (s->cow_bitmap_addr == MAP_FAILED)
-        goto fail;
-    s->cow_bitmap = s->cow_bitmap_addr + sizeof(cow_header);
-    s->cow_sectors_offset = (s->cow_bitmap_size + 511) & ~511;
-    return 0;
- fail:
-    close(fd);
-    return -1;
-}
-
-static inline void cow_set_bit(uint8_t *bitmap, int64_t bitnum)
-{
-    bitmap[bitnum / 8] |= (1 << (bitnum%8));
-}
-
-static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum)
-{
-    return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
-}
-
-
-/* Return true if first block has been changed (ie. current version is
- * in COW file).  Set the number of continuous blocks for which that
- * is true. */
-static inline int is_changed(uint8_t *bitmap,
-                             int64_t sector_num, int nb_sectors,
-                             int *num_same)
-{
-    int changed;
-
-    if (!bitmap || nb_sectors == 0) {
-       *num_same = nb_sectors;
-       return 0;
-    }
-
-    changed = is_bit_set(bitmap, sector_num);
-    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
-       if (is_bit_set(bitmap, sector_num + *num_same) != changed)
-           break;
-    }
-
-    return changed;
-}
-
-static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                            int nb_sectors, int *pnum)
-{
-    BDRVCowState *s = bs->opaque;
-    return is_changed(s->cow_bitmap, sector_num, nb_sectors, pnum);
-}
-
-static int cow_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret, n;
-
-    while (nb_sectors > 0) {
-        if (is_changed(s->cow_bitmap, sector_num, nb_sectors, &n)) {
-            lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
-            ret = read(s->fd, buf, n * 512);
-            if (ret != n * 512)
-                return -1;
-        } else {
-            if (bs->backing_hd) {
-                /* read from the base image */
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
-                if (ret < 0)
-                    return -1;
-            } else {
-            memset(buf, 0, n * 512);
-        }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-
-static int cow_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVCowState *s = bs->opaque;
-    int ret, i;
-
-    lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
-    ret = write(s->fd, buf, nb_sectors * 512);
-    if (ret != nb_sectors * 512)
-        return -1;
-    for (i = 0; i < nb_sectors; i++)
-        cow_set_bit(s->cow_bitmap, sector_num + i);
-    return 0;
-}
-
-static void cow_close(BlockDriverState *bs)
-{
-    BDRVCowState *s = bs->opaque;
-    munmap((void *)s->cow_bitmap_addr, s->cow_bitmap_size);
-    close(s->fd);
-}
-
-static int cow_create(const char *filename, int64_t image_sectors,
-                      const char *image_filename, int flags)
-{
-    int fd, cow_fd;
-    struct cow_header_v2 cow_header;
-    struct stat st;
-
-    if (flags)
-        return -ENOTSUP;
-
-    cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-              0644);
-    if (cow_fd < 0)
-        return -1;
-    memset(&cow_header, 0, sizeof(cow_header));
-    cow_header.magic = cpu_to_be32(COW_MAGIC);
-    cow_header.version = cpu_to_be32(COW_VERSION);
-    if (image_filename) {
-        /* Note: if no file, we put a dummy mtime */
-        cow_header.mtime = cpu_to_be32(0);
-
-        fd = open(image_filename, O_RDONLY | O_BINARY);
-        if (fd < 0) {
-            close(cow_fd);
-            goto mtime_fail;
-        }
-        if (fstat(fd, &st) != 0) {
-            close(fd);
-            goto mtime_fail;
-        }
-        close(fd);
-        cow_header.mtime = cpu_to_be32(st.st_mtime);
-    mtime_fail:
-        pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
-                image_filename);
-    }
-    cow_header.sectorsize = cpu_to_be32(512);
-    cow_header.size = cpu_to_be64(image_sectors * 512);
-    write(cow_fd, &cow_header, sizeof(cow_header));
-    /* resize to include at least all the bitmap */
-    ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
-    close(cow_fd);
-    return 0;
-}
-
-static void cow_flush(BlockDriverState *bs)
-{
-    BDRVCowState *s = bs->opaque;
-    fsync(s->fd);
-}
-
-static BlockDriver bdrv_cow = {
-    .format_name       = "cow",
-    .instance_size     = sizeof(BDRVCowState),
-    .bdrv_probe                = cow_probe,
-    .bdrv_open         = cow_open,
-    .bdrv_read         = cow_read,
-    .bdrv_write                = cow_write,
-    .bdrv_close                = cow_close,
-    .bdrv_create       = cow_create,
-    .bdrv_flush                = cow_flush,
-    .bdrv_is_allocated = cow_is_allocated,
-};
-
-static void bdrv_cow_init(void)
-{
-    bdrv_register(&bdrv_cow);
-}
-
-block_init(bdrv_cow_init);
-#endif
diff --git a/block-dmg.c b/block-dmg.c
deleted file mode 100644 (file)
index 262560f..0000000
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * QEMU Block driver for DMG images
- *
- * Copyright (c) 2004 Johannes E. Schindelin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block_int.h"
-#include "bswap.h"
-#include "module.h"
-#include <zlib.h>
-
-typedef struct BDRVDMGState {
-    int fd;
-
-    /* each chunk contains a certain number of sectors,
-     * offsets[i] is the offset in the .dmg file,
-     * lengths[i] is the length of the compressed chunk,
-     * sectors[i] is the sector beginning at offsets[i],
-     * sectorcounts[i] is the number of sectors in that chunk,
-     * the sectors array is ordered
-     * 0<=i<n_chunks */
-
-    uint32_t n_chunks;
-    uint32_t* types;
-    uint64_t* offsets;
-    uint64_t* lengths;
-    uint64_t* sectors;
-    uint64_t* sectorcounts;
-    uint32_t current_chunk;
-    uint8_t *compressed_chunk;
-    uint8_t *uncompressed_chunk;
-    z_stream zstream;
-} BDRVDMGState;
-
-static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    int len=strlen(filename);
-    if(len>4 && !strcmp(filename+len-4,".dmg"))
-       return 2;
-    return 0;
-}
-
-static off_t read_off(int fd)
-{
-       uint64_t buffer;
-       if(read(fd,&buffer,8)<8)
-               return 0;
-       return be64_to_cpu(buffer);
-}
-
-static off_t read_uint32(int fd)
-{
-       uint32_t buffer;
-       if(read(fd,&buffer,4)<4)
-               return 0;
-       return be32_to_cpu(buffer);
-}
-
-static int dmg_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVDMGState *s = bs->opaque;
-    off_t info_begin,info_end,last_in_offset,last_out_offset;
-    uint32_t count;
-    uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
-
-    s->fd = open(filename, O_RDONLY | O_BINARY);
-    if (s->fd < 0)
-        return -errno;
-    bs->read_only = 1;
-    s->n_chunks = 0;
-    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
-
-    /* read offset of info blocks */
-    if(lseek(s->fd,-0x1d8,SEEK_END)<0) {
-dmg_close:
-       close(s->fd);
-       /* open raw instead */
-       bs->drv=bdrv_find_format("raw");
-       return bs->drv->bdrv_open(bs, filename, flags);
-    }
-    info_begin=read_off(s->fd);
-    if(info_begin==0)
-       goto dmg_close;
-    if(lseek(s->fd,info_begin,SEEK_SET)<0)
-       goto dmg_close;
-    if(read_uint32(s->fd)!=0x100)
-       goto dmg_close;
-    if((count = read_uint32(s->fd))==0)
-       goto dmg_close;
-    info_end = info_begin+count;
-    if(lseek(s->fd,0xf8,SEEK_CUR)<0)
-       goto dmg_close;
-
-    /* read offsets */
-    last_in_offset = last_out_offset = 0;
-    while(lseek(s->fd,0,SEEK_CUR)<info_end) {
-        uint32_t type;
-
-       count = read_uint32(s->fd);
-       if(count==0)
-           goto dmg_close;
-       type = read_uint32(s->fd);
-       if(type!=0x6d697368 || count<244)
-           lseek(s->fd,count-4,SEEK_CUR);
-       else {
-           int new_size, chunk_count;
-           if(lseek(s->fd,200,SEEK_CUR)<0)
-               goto dmg_close;
-           chunk_count = (count-204)/40;
-           new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-           s->types = qemu_realloc(s->types, new_size/2);
-           s->offsets = qemu_realloc(s->offsets, new_size);
-           s->lengths = qemu_realloc(s->lengths, new_size);
-           s->sectors = qemu_realloc(s->sectors, new_size);
-           s->sectorcounts = qemu_realloc(s->sectorcounts, new_size);
-
-           for(i=s->n_chunks;i<s->n_chunks+chunk_count;i++) {
-               s->types[i] = read_uint32(s->fd);
-               if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) {
-                   if(s->types[i]==0xffffffff) {
-                       last_in_offset = s->offsets[i-1]+s->lengths[i-1];
-                       last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1];
-                   }
-                   chunk_count--;
-                   i--;
-                   if(lseek(s->fd,36,SEEK_CUR)<0)
-                       goto dmg_close;
-                   continue;
-               }
-               read_uint32(s->fd);
-               s->sectors[i] = last_out_offset+read_off(s->fd);
-               s->sectorcounts[i] = read_off(s->fd);
-               s->offsets[i] = last_in_offset+read_off(s->fd);
-               s->lengths[i] = read_off(s->fd);
-               if(s->lengths[i]>max_compressed_size)
-                   max_compressed_size = s->lengths[i];
-               if(s->sectorcounts[i]>max_sectors_per_chunk)
-                   max_sectors_per_chunk = s->sectorcounts[i];
-           }
-           s->n_chunks+=chunk_count;
-       }
-    }
-
-    /* initialize zlib engine */
-    s->compressed_chunk = qemu_malloc(max_compressed_size+1);
-    s->uncompressed_chunk = qemu_malloc(512*max_sectors_per_chunk);
-    if(inflateInit(&s->zstream) != Z_OK)
-       goto dmg_close;
-
-    s->current_chunk = s->n_chunks;
-
-    return 0;
-}
-
-static inline int is_sector_in_chunk(BDRVDMGState* s,
-               uint32_t chunk_num,int sector_num)
-{
-    if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num ||
-           s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num)
-       return 0;
-    else
-       return -1;
-}
-
-static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num)
-{
-    /* binary search */
-    uint32_t chunk1=0,chunk2=s->n_chunks,chunk3;
-    while(chunk1!=chunk2) {
-       chunk3 = (chunk1+chunk2)/2;
-       if(s->sectors[chunk3]>sector_num)
-           chunk2 = chunk3;
-       else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num)
-           return chunk3;
-       else
-           chunk1 = chunk3;
-    }
-    return s->n_chunks; /* error */
-}
-
-static inline int dmg_read_chunk(BDRVDMGState *s,int sector_num)
-{
-    if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) {
-       int ret;
-       uint32_t chunk = search_chunk(s,sector_num);
-
-       if(chunk>=s->n_chunks)
-           return -1;
-
-       s->current_chunk = s->n_chunks;
-       switch(s->types[chunk]) {
-       case 0x80000005: { /* zlib compressed */
-           int i;
-
-           ret = lseek(s->fd, s->offsets[chunk], SEEK_SET);
-           if(ret<0)
-               return -1;
-
-           /* we need to buffer, because only the chunk as whole can be
-            * inflated. */
-           i=0;
-           do {
-               ret = read(s->fd, s->compressed_chunk+i, s->lengths[chunk]-i);
-               if(ret<0 && errno==EINTR)
-                   ret=0;
-               i+=ret;
-           } while(ret>=0 && ret+i<s->lengths[chunk]);
-
-           if (ret != s->lengths[chunk])
-               return -1;
-
-           s->zstream.next_in = s->compressed_chunk;
-           s->zstream.avail_in = s->lengths[chunk];
-           s->zstream.next_out = s->uncompressed_chunk;
-           s->zstream.avail_out = 512*s->sectorcounts[chunk];
-           ret = inflateReset(&s->zstream);
-           if(ret != Z_OK)
-               return -1;
-           ret = inflate(&s->zstream, Z_FINISH);
-           if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk])
-               return -1;
-           break; }
-       case 1: /* copy */
-           ret = read(s->fd, s->uncompressed_chunk, s->lengths[chunk]);
-           if (ret != s->lengths[chunk])
-               return -1;
-           break;
-       case 2: /* zero */
-           memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]);
-           break;
-       }
-       s->current_chunk = chunk;
-    }
-    return 0;
-}
-
-static int dmg_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVDMGState *s = bs->opaque;
-    int i;
-
-    for(i=0;i<nb_sectors;i++) {
-       uint32_t sector_offset_in_chunk;
-       if(dmg_read_chunk(s, sector_num+i) != 0)
-           return -1;
-       sector_offset_in_chunk = sector_num+i-s->sectors[s->current_chunk];
-       memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512);
-    }
-    return 0;
-}
-
-static void dmg_close(BlockDriverState *bs)
-{
-    BDRVDMGState *s = bs->opaque;
-    close(s->fd);
-    if(s->n_chunks>0) {
-       free(s->types);
-       free(s->offsets);
-       free(s->lengths);
-       free(s->sectors);
-       free(s->sectorcounts);
-    }
-    free(s->compressed_chunk);
-    free(s->uncompressed_chunk);
-    inflateEnd(&s->zstream);
-}
-
-static BlockDriver bdrv_dmg = {
-    .format_name       = "dmg",
-    .instance_size     = sizeof(BDRVDMGState),
-    .bdrv_probe                = dmg_probe,
-    .bdrv_open         = dmg_open,
-    .bdrv_read         = dmg_read,
-    .bdrv_close                = dmg_close,
-};
-
-static void bdrv_dmg_init(void)
-{
-    bdrv_register(&bdrv_dmg);
-}
-
-block_init(bdrv_dmg_init);
diff --git a/block-nbd.c b/block-nbd.c
deleted file mode 100644 (file)
index 47d4778..0000000
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * QEMU Block driver for  NBD
- *
- * Copyright (C) 2008 Bull S.A.S.
- *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * Some parts:
- *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "nbd.h"
-#include "module.h"
-
-#include <sys/types.h>
-#include <unistd.h>
-
-typedef struct BDRVNBDState {
-    int sock;
-    off_t size;
-    size_t blocksize;
-} BDRVNBDState;
-
-static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
-{
-    BDRVNBDState *s = bs->opaque;
-    const char *host;
-    const char *unixpath;
-    int sock;
-    off_t size;
-    size_t blocksize;
-    int ret;
-
-    if ((flags & BDRV_O_CREAT))
-        return -EINVAL;
-
-    if (!strstart(filename, "nbd:", &host))
-        return -EINVAL;
-
-    if (strstart(host, "unix:", &unixpath)) {
-
-        if (unixpath[0] != '/')
-            return -EINVAL;
-
-        sock = unix_socket_outgoing(unixpath);
-
-    } else {
-        uint16_t port;
-        char *p, *r;
-        char hostname[128];
-
-        pstrcpy(hostname, 128, host);
-
-        p = strchr(hostname, ':');
-        if (p == NULL)
-            return -EINVAL;
-
-        *p = '\0';
-        p++;
-
-        port = strtol(p, &r, 0);
-        if (r == p)
-            return -EINVAL;
-        sock = tcp_socket_outgoing(hostname, port);
-    }
-
-    if (sock == -1)
-        return -errno;
-
-    ret = nbd_receive_negotiate(sock, &size, &blocksize);
-    if (ret == -1)
-        return -errno;
-
-    s->sock = sock;
-    s->size = size;
-    s->blocksize = blocksize;
-
-    return 0;
-}
-
-static int nbd_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-    struct nbd_reply reply;
-
-    request.type = NBD_CMD_READ;
-    request.handle = (uint64_t)(intptr_t)bs;
-    request.from = sector_num * 512;;
-    request.len = nb_sectors * 512;
-
-    if (nbd_send_request(s->sock, &request) == -1)
-        return -errno;
-
-    if (nbd_receive_reply(s->sock, &reply) == -1)
-        return -errno;
-
-    if (reply.error !=0)
-        return -reply.error;
-
-    if (reply.handle != request.handle)
-        return -EIO;
-
-    if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len)
-        return -EIO;
-
-    return 0;
-}
-
-static int nbd_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-    struct nbd_reply reply;
-
-    request.type = NBD_CMD_WRITE;
-    request.handle = (uint64_t)(intptr_t)bs;
-    request.from = sector_num * 512;;
-    request.len = nb_sectors * 512;
-
-    if (nbd_send_request(s->sock, &request) == -1)
-        return -errno;
-
-    if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len)
-        return -EIO;
-
-    if (nbd_receive_reply(s->sock, &reply) == -1)
-        return -errno;
-
-    if (reply.error !=0)
-        return -reply.error;
-
-    if (reply.handle != request.handle)
-        return -EIO;
-
-    return 0;
-}
-
-static void nbd_close(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-    struct nbd_request request;
-
-    request.type = NBD_CMD_DISC;
-    request.handle = (uint64_t)(intptr_t)bs;
-    request.from = 0;
-    request.len = 0;
-    nbd_send_request(s->sock, &request);
-
-    close(s->sock);
-}
-
-static int64_t nbd_getlength(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-
-    return s->size;
-}
-
-static BlockDriver bdrv_nbd = {
-    .format_name       = "nbd",
-    .instance_size     = sizeof(BDRVNBDState),
-    .bdrv_open         = nbd_open,
-    .bdrv_read         = nbd_read,
-    .bdrv_write                = nbd_write,
-    .bdrv_close                = nbd_close,
-    .bdrv_getlength    = nbd_getlength,
-    .protocol_name     = "nbd",
-};
-
-static void bdrv_nbd_init(void)
-{
-    bdrv_register(&bdrv_nbd);
-}
-
-block_init(bdrv_nbd_init);
diff --git a/block-parallels.c b/block-parallels.c
deleted file mode 100644 (file)
index 0b64a5c..0000000
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * Block driver for Parallels disk image format
- *
- * Copyright (c) 2007 Alex Beregszaszi
- *
- * This code is based on comparing different disk images created by Parallels.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-
-/**************************************************************/
-
-#define HEADER_MAGIC "WithoutFreeSpace"
-#define HEADER_VERSION 2
-#define HEADER_SIZE 64
-
-// always little-endian
-struct parallels_header {
-    char magic[16]; // "WithoutFreeSpace"
-    uint32_t version;
-    uint32_t heads;
-    uint32_t cylinders;
-    uint32_t tracks;
-    uint32_t catalog_entries;
-    uint32_t nb_sectors;
-    char padding[24];
-} __attribute__((packed));
-
-typedef struct BDRVParallelsState {
-    int fd;
-
-    uint32_t *catalog_bitmap;
-    int catalog_size;
-
-    int tracks;
-} BDRVParallelsState;
-
-static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const struct parallels_header *ph = (const void *)buf;
-
-    if (buf_size < HEADER_SIZE)
-       return 0;
-
-    if (!memcmp(ph->magic, HEADER_MAGIC, 16) &&
-       (le32_to_cpu(ph->version) == HEADER_VERSION))
-       return 100;
-
-    return 0;
-}
-
-static int parallels_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVParallelsState *s = bs->opaque;
-    int fd, i;
-    struct parallels_header ph;
-
-    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
-    if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
-        if (fd < 0)
-            return -1;
-    }
-
-    bs->read_only = 1; // no write support yet
-
-    s->fd = fd;
-
-    if (read(fd, &ph, sizeof(ph)) != sizeof(ph))
-        goto fail;
-
-    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
-       (le32_to_cpu(ph.version) != HEADER_VERSION)) {
-        goto fail;
-    }
-
-    bs->total_sectors = le32_to_cpu(ph.nb_sectors);
-
-    if (lseek(s->fd, 64, SEEK_SET) != 64)
-       goto fail;
-
-    s->tracks = le32_to_cpu(ph.tracks);
-
-    s->catalog_size = le32_to_cpu(ph.catalog_entries);
-    s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
-    if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
-       s->catalog_size * 4)
-       goto fail;
-    for (i = 0; i < s->catalog_size; i++)
-       le32_to_cpus(&s->catalog_bitmap[i]);
-
-    return 0;
-fail:
-    if (s->catalog_bitmap)
-       qemu_free(s->catalog_bitmap);
-    close(fd);
-    return -1;
-}
-
-static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint32_t index, offset, position;
-
-    index = sector_num / s->tracks;
-    offset = sector_num % s->tracks;
-
-    // not allocated
-    if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
-       return -1;
-
-    position = (s->catalog_bitmap[index] + offset) * 512;
-
-//    fprintf(stderr, "sector: %llx index=%x offset=%x pointer=%x position=%x\n",
-//     sector_num, index, offset, s->catalog_bitmap[index], position);
-
-    if (lseek(s->fd, position, SEEK_SET) != position)
-       return -1;
-
-    return 0;
-}
-
-static int parallels_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVParallelsState *s = bs->opaque;
-
-    while (nb_sectors > 0) {
-       if (!seek_to_sector(bs, sector_num)) {
-           if (read(s->fd, buf, 512) != 512)
-               return -1;
-       } else
-            memset(buf, 0, 512);
-        nb_sectors--;
-        sector_num++;
-        buf += 512;
-    }
-    return 0;
-}
-
-static void parallels_close(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-    qemu_free(s->catalog_bitmap);
-    close(s->fd);
-}
-
-static BlockDriver bdrv_parallels = {
-    .format_name       = "parallels",
-    .instance_size     = sizeof(BDRVParallelsState),
-    .bdrv_probe                = parallels_probe,
-    .bdrv_open         = parallels_open,
-    .bdrv_read         = parallels_read,
-    .bdrv_close                = parallels_close,
-};
-
-static void bdrv_parallels_init(void)
-{
-    bdrv_register(&bdrv_parallels);
-}
-
-block_init(bdrv_parallels_init);
diff --git a/block-qcow.c b/block-qcow.c
deleted file mode 100644 (file)
index 1cf7c3b..0000000
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * Block driver for the QCOW format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-#include <zlib.h>
-#include "aes.h"
-
-/**************************************************************/
-/* QEMU COW block driver with compression and encryption support */
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 1
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES  1
-
-#define QCOW_OFLAG_COMPRESSED (1LL << 63)
-
-typedef struct QCowHeader {
-    uint32_t magic;
-    uint32_t version;
-    uint64_t backing_file_offset;
-    uint32_t backing_file_size;
-    uint32_t mtime;
-    uint64_t size; /* in bytes */
-    uint8_t cluster_bits;
-    uint8_t l2_bits;
-    uint32_t crypt_method;
-    uint64_t l1_table_offset;
-} QCowHeader;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct BDRVQcowState {
-    BlockDriverState *hd;
-    int cluster_bits;
-    int cluster_size;
-    int cluster_sectors;
-    int l2_bits;
-    int l2_size;
-    int l1_size;
-    uint64_t cluster_offset_mask;
-    uint64_t l1_table_offset;
-    uint64_t *l1_table;
-    uint64_t *l2_cache;
-    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
-    uint32_t l2_cache_counts[L2_CACHE_SIZE];
-    uint8_t *cluster_cache;
-    uint8_t *cluster_data;
-    uint64_t cluster_cache_offset;
-    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
-    uint32_t crypt_method_header;
-    AES_KEY aes_encrypt_key;
-    AES_KEY aes_decrypt_key;
-} BDRVQcowState;
-
-static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
-
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const QCowHeader *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(QCowHeader) &&
-        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-        be32_to_cpu(cow_header->version) == QCOW_VERSION)
-        return 100;
-    else
-        return 0;
-}
-
-static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVQcowState *s = bs->opaque;
-    int len, i, shift, ret;
-    QCowHeader header;
-
-    ret = bdrv_file_open(&s->hd, filename, flags);
-    if (ret < 0)
-        return ret;
-    if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
-        goto fail;
-    be32_to_cpus(&header.magic);
-    be32_to_cpus(&header.version);
-    be64_to_cpus(&header.backing_file_offset);
-    be32_to_cpus(&header.backing_file_size);
-    be32_to_cpus(&header.mtime);
-    be64_to_cpus(&header.size);
-    be32_to_cpus(&header.crypt_method);
-    be64_to_cpus(&header.l1_table_offset);
-
-    if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
-        goto fail;
-    if (header.size <= 1 || header.cluster_bits < 9)
-        goto fail;
-    if (header.crypt_method > QCOW_CRYPT_AES)
-        goto fail;
-    s->crypt_method_header = header.crypt_method;
-    if (s->crypt_method_header)
-        bs->encrypted = 1;
-    s->cluster_bits = header.cluster_bits;
-    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - 9);
-    s->l2_bits = header.l2_bits;
-    s->l2_size = 1 << s->l2_bits;
-    bs->total_sectors = header.size / 512;
-    s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-
-    /* read the level 1 table */
-    shift = s->cluster_bits + s->l2_bits;
-    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
-
-    s->l1_table_offset = header.l1_table_offset;
-    s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
-    if (!s->l1_table)
-        goto fail;
-    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
-        s->l1_size * sizeof(uint64_t))
-        goto fail;
-    for(i = 0;i < s->l1_size; i++) {
-        be64_to_cpus(&s->l1_table[i]);
-    }
-    /* alloc L2 cache */
-    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    if (!s->l2_cache)
-        goto fail;
-    s->cluster_cache = qemu_malloc(s->cluster_size);
-    if (!s->cluster_cache)
-        goto fail;
-    s->cluster_data = qemu_malloc(s->cluster_size);
-    if (!s->cluster_data)
-        goto fail;
-    s->cluster_cache_offset = -1;
-
-    /* read the backing file name */
-    if (header.backing_file_offset != 0) {
-        len = header.backing_file_size;
-        if (len > 1023)
-            len = 1023;
-        if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
-            goto fail;
-        bs->backing_file[len] = '\0';
-    }
-    return 0;
-
- fail:
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
-    qemu_free(s->cluster_cache);
-    qemu_free(s->cluster_data);
-    bdrv_delete(s->hd);
-    return -1;
-}
-
-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint8_t keybuf[16];
-    int len, i;
-
-    memset(keybuf, 0, 16);
-    len = strlen(key);
-    if (len > 16)
-        len = 16;
-    /* XXX: we could compress the chars to 7 bits to increase
-       entropy */
-    for(i = 0;i < len;i++) {
-        keybuf[i] = key[i];
-    }
-    s->crypt_method = s->crypt_method_header;
-
-    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
-        return -1;
-    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
-        return -1;
-#if 0
-    /* test */
-    {
-        uint8_t in[16];
-        uint8_t out[16];
-        uint8_t tmp[16];
-        for(i=0;i<16;i++)
-            in[i] = i;
-        AES_encrypt(in, tmp, &s->aes_encrypt_key);
-        AES_decrypt(tmp, out, &s->aes_decrypt_key);
-        for(i = 0; i < 16; i++)
-            printf(" %02x", tmp[i]);
-        printf("\n");
-        for(i = 0; i < 16; i++)
-            printf(" %02x", out[i]);
-        printf("\n");
-    }
-#endif
-    return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                            uint8_t *out_buf, const uint8_t *in_buf,
-                            int nb_sectors, int enc,
-                            const AES_KEY *key)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        AES_cbc_encrypt(in_buf, out_buf, 512, key,
-                        ivec.b, enc);
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-}
-
-/* 'allocate' is:
- *
- * 0 to not allocate.
- *
- * 1 to allocate a normal cluster (for sector indexes 'n_start' to
- * 'n_end')
- *
- * 2 to allocate a compressed cluster of size
- * 'compressed_size'. 'compressed_size' must be > 0 and <
- * cluster_size
- *
- * return 0 if not allocated.
- */
-static uint64_t get_cluster_offset(BlockDriverState *bs,
-                                   uint64_t offset, int allocate,
-                                   int compressed_size,
-                                   int n_start, int n_end)
-{
-    BDRVQcowState *s = bs->opaque;
-    int min_index, i, j, l1_index, l2_index;
-    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
-    uint32_t min_count;
-    int new_l2_table;
-
-    l1_index = offset >> (s->l2_bits + s->cluster_bits);
-    l2_offset = s->l1_table[l1_index];
-    new_l2_table = 0;
-    if (!l2_offset) {
-        if (!allocate)
-            return 0;
-        /* allocate a new l2 entry */
-        l2_offset = bdrv_getlength(s->hd);
-        /* round to cluster size */
-        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
-        /* update the L1 entry */
-        s->l1_table[l1_index] = l2_offset;
-        tmp = cpu_to_be64(l2_offset);
-        if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
-                        &tmp, sizeof(tmp)) != sizeof(tmp))
-            return 0;
-        new_l2_table = 1;
-    }
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
-            /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
-                for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
-                }
-            }
-            l2_table = s->l2_cache + (i << s->l2_bits);
-            goto found;
-        }
-    }
-    /* not found: load a new entry in the least used one */
-    min_index = 0;
-    min_count = 0xffffffff;
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
-            min_index = i;
-        }
-    }
-    l2_table = s->l2_cache + (min_index << s->l2_bits);
-    if (new_l2_table) {
-        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
-            s->l2_size * sizeof(uint64_t))
-            return 0;
-    } else {
-        if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
-            s->l2_size * sizeof(uint64_t))
-            return 0;
-    }
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
- found:
-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-    if (!cluster_offset ||
-        ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
-        if (!allocate)
-            return 0;
-        /* allocate a new cluster */
-        if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
-            (n_end - n_start) < s->cluster_sectors) {
-            /* if the cluster is already compressed, we must
-               decompress it in the case it is not completely
-               overwritten */
-            if (decompress_cluster(s, cluster_offset) < 0)
-                return 0;
-            cluster_offset = bdrv_getlength(s->hd);
-            cluster_offset = (cluster_offset + s->cluster_size - 1) &
-                ~(s->cluster_size - 1);
-            /* write the cluster content */
-            if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) !=
-                s->cluster_size)
-                return -1;
-        } else {
-            cluster_offset = bdrv_getlength(s->hd);
-            if (allocate == 1) {
-                /* round to cluster size */
-                cluster_offset = (cluster_offset + s->cluster_size - 1) &
-                    ~(s->cluster_size - 1);
-                bdrv_truncate(s->hd, cluster_offset + s->cluster_size);
-                /* if encrypted, we must initialize the cluster
-                   content which won't be written */
-                if (s->crypt_method &&
-                    (n_end - n_start) < s->cluster_sectors) {
-                    uint64_t start_sect;
-                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
-                    memset(s->cluster_data + 512, 0x00, 512);
-                    for(i = 0; i < s->cluster_sectors; i++) {
-                        if (i < n_start || i >= n_end) {
-                            encrypt_sectors(s, start_sect + i,
-                                            s->cluster_data,
-                                            s->cluster_data + 512, 1, 1,
-                                            &s->aes_encrypt_key);
-                            if (bdrv_pwrite(s->hd, cluster_offset + i * 512,
-                                            s->cluster_data, 512) != 512)
-                                return -1;
-                        }
-                    }
-                }
-            } else if (allocate == 2) {
-                cluster_offset |= QCOW_OFLAG_COMPRESSED |
-                    (uint64_t)compressed_size << (63 - s->cluster_bits);
-            }
-        }
-        /* update L2 table */
-        tmp = cpu_to_be64(cluster_offset);
-        l2_table[l2_index] = tmp;
-        if (bdrv_pwrite(s->hd,
-                        l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp))
-            return 0;
-    }
-    return cluster_offset;
-}
-
-static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                             int nb_sectors, int *pnum)
-{
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
-    index_in_cluster = sector_num & (s->cluster_sectors - 1);
-    n = s->cluster_sectors - index_in_cluster;
-    if (n > nb_sectors)
-        n = nb_sectors;
-    *pnum = n;
-    return (cluster_offset != 0);
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
-                             const uint8_t *buf, int buf_size)
-{
-    z_stream strm1, *strm = &strm1;
-    int ret, out_len;
-
-    memset(strm, 0, sizeof(*strm));
-
-    strm->next_in = (uint8_t *)buf;
-    strm->avail_in = buf_size;
-    strm->next_out = out_buf;
-    strm->avail_out = out_buf_size;
-
-    ret = inflateInit2(strm, -12);
-    if (ret != Z_OK)
-        return -1;
-    ret = inflate(strm, Z_FINISH);
-    out_len = strm->next_out - out_buf;
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
-        out_len != out_buf_size) {
-        inflateEnd(strm);
-        return -1;
-    }
-    inflateEnd(strm);
-    return 0;
-}
-
-static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
-{
-    int ret, csize;
-    uint64_t coffset;
-
-    coffset = cluster_offset & s->cluster_offset_mask;
-    if (s->cluster_cache_offset != coffset) {
-        csize = cluster_offset >> (63 - s->cluster_bits);
-        csize &= (s->cluster_size - 1);
-        ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize);
-        if (ret != csize)
-            return -1;
-        if (decompress_buffer(s->cluster_cache, s->cluster_size,
-                              s->cluster_data, csize) < 0) {
-            return -1;
-        }
-        s->cluster_cache_offset = coffset;
-    }
-    return 0;
-}
-
-#if 0
-
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
-                     uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    while (nb_sectors > 0) {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors)
-            n = nb_sectors;
-        if (!cluster_offset) {
-            if (bs->backing_hd) {
-                /* read from the base image */
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
-                if (ret < 0)
-                    return -1;
-            } else {
-                memset(buf, 0, 512 * n);
-            }
-        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
-            if (decompress_cluster(s, cluster_offset) < 0)
-                return -1;
-            memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
-        } else {
-            ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
-            if (ret != n * 512)
-                return -1;
-            if (s->crypt_method) {
-                encrypt_sectors(s, sector_num, buf, buf, n, 0,
-                                &s->aes_decrypt_key);
-            }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-#endif
-
-static int qcow_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    while (nb_sectors > 0) {
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors)
-            n = nb_sectors;
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
-                                            index_in_cluster,
-                                            index_in_cluster + n);
-        if (!cluster_offset)
-            return -1;
-        if (s->crypt_method) {
-            encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
-                            &s->aes_encrypt_key);
-            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512,
-                              s->cluster_data, n * 512);
-        } else {
-            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
-        }
-        if (ret != n * 512)
-            return -1;
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    s->cluster_cache_offset = -1; /* disable compressed cache */
-    return 0;
-}
-
-typedef struct QCowAIOCB {
-    BlockDriverAIOCB common;
-    int64_t sector_num;
-    QEMUIOVector *qiov;
-    uint8_t *buf;
-    void *orig_buf;
-    int nb_sectors;
-    int n;
-    uint64_t cluster_offset;
-    uint8_t *cluster_data;
-    struct iovec hd_iov;
-    QEMUIOVector hd_qiov;
-    BlockDriverAIOCB *hd_aiocb;
-} QCowAIOCB;
-
-static void qcow_aio_read_cb(void *opaque, int ret)
-{
-    QCowAIOCB *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster;
-
-    acb->hd_aiocb = NULL;
-    if (ret < 0)
-        goto done;
-
- redo:
-    /* post process the read buffer */
-    if (!acb->cluster_offset) {
-        /* nothing to do */
-    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
-        /* nothing to do */
-    } else {
-        if (s->crypt_method) {
-            encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
-                            acb->n, 0,
-                            &s->aes_decrypt_key);
-        }
-    }
-
-    acb->nb_sectors -= acb->n;
-    acb->sector_num += acb->n;
-    acb->buf += acb->n * 512;
-
-    if (acb->nb_sectors == 0) {
-        /* request completed */
-        ret = 0;
-        goto done;
-    }
-
-    /* prepare next AIO request */
-    acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9,
-                                             0, 0, 0, 0);
-    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
-    acb->n = s->cluster_sectors - index_in_cluster;
-    if (acb->n > acb->nb_sectors)
-        acb->n = acb->nb_sectors;
-
-    if (!acb->cluster_offset) {
-        if (bs->backing_hd) {
-            /* read from the base image */
-            acb->hd_iov.iov_base = (void *)acb->buf;
-            acb->hd_iov.iov_len = acb->n * 512;
-            qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-            acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
-                &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-            if (acb->hd_aiocb == NULL)
-                goto done;
-        } else {
-            /* Note: in this case, no need to wait */
-            memset(acb->buf, 0, 512 * acb->n);
-            goto redo;
-        }
-    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
-        /* add AIO support for compressed blocks ? */
-        if (decompress_cluster(s, acb->cluster_offset) < 0)
-            goto done;
-        memcpy(acb->buf,
-               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
-        goto redo;
-    } else {
-        if ((acb->cluster_offset & 511) != 0) {
-            ret = -EIO;
-            goto done;
-        }
-        acb->hd_iov.iov_base = (void *)acb->buf;
-        acb->hd_iov.iov_len = acb->n * 512;
-        qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-        acb->hd_aiocb = bdrv_aio_readv(s->hd,
-                            (acb->cluster_offset >> 9) + index_in_cluster,
-                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-        if (acb->hd_aiocb == NULL)
-            goto done;
-    }
-
-    return;
-
-done:
-    if (acb->qiov->niov > 1) {
-        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
-        qemu_vfree(acb->orig_buf);
-    }
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
-}
-
-static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    QCowAIOCB *acb;
-
-    acb = qemu_aio_get(bs, cb, opaque);
-    if (!acb)
-        return NULL;
-    acb->hd_aiocb = NULL;
-    acb->sector_num = sector_num;
-    acb->qiov = qiov;
-    if (qiov->niov > 1)
-        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
-    else
-        acb->buf = (uint8_t *)qiov->iov->iov_base;
-    acb->nb_sectors = nb_sectors;
-    acb->n = 0;
-    acb->cluster_offset = 0;
-
-    qcow_aio_read_cb(acb, 0);
-    return &acb->common;
-}
-
-static void qcow_aio_write_cb(void *opaque, int ret)
-{
-    QCowAIOCB *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster;
-    uint64_t cluster_offset;
-    const uint8_t *src_buf;
-
-    acb->hd_aiocb = NULL;
-
-    if (ret < 0)
-        goto done;
-
-    acb->nb_sectors -= acb->n;
-    acb->sector_num += acb->n;
-    acb->buf += acb->n * 512;
-
-    if (acb->nb_sectors == 0) {
-        /* request completed */
-        ret = 0;
-        goto done;
-    }
-
-    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
-    acb->n = s->cluster_sectors - index_in_cluster;
-    if (acb->n > acb->nb_sectors)
-        acb->n = acb->nb_sectors;
-    cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0,
-                                        index_in_cluster,
-                                        index_in_cluster + acb->n);
-    if (!cluster_offset || (cluster_offset & 511) != 0) {
-        ret = -EIO;
-        goto done;
-    }
-    if (s->crypt_method) {
-        if (!acb->cluster_data) {
-            acb->cluster_data = qemu_mallocz(s->cluster_size);
-            if (!acb->cluster_data) {
-                ret = -ENOMEM;
-                goto done;
-            }
-        }
-        encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
-                        acb->n, 1, &s->aes_encrypt_key);
-        src_buf = acb->cluster_data;
-    } else {
-        src_buf = acb->buf;
-    }
-
-    acb->hd_iov.iov_base = (void *)src_buf;
-    acb->hd_iov.iov_len = acb->n * 512;
-    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-    acb->hd_aiocb = bdrv_aio_writev(s->hd,
-                                    (cluster_offset >> 9) + index_in_cluster,
-                                    &acb->hd_qiov, acb->n,
-                                    qcow_aio_write_cb, acb);
-    if (acb->hd_aiocb == NULL)
-        goto done;
-    return;
-
-done:
-    if (acb->qiov->niov > 1)
-        qemu_vfree(acb->orig_buf);
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
-}
-
-static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowAIOCB *acb;
-
-    s->cluster_cache_offset = -1; /* disable compressed cache */
-
-    acb = qemu_aio_get(bs, cb, opaque);
-    if (!acb)
-        return NULL;
-    acb->hd_aiocb = NULL;
-    acb->sector_num = sector_num;
-    acb->qiov = qiov;
-    if (qiov->niov > 1) {
-        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
-        qemu_iovec_to_buffer(qiov, acb->buf);
-    } else {
-        acb->buf = (uint8_t *)qiov->iov->iov_base;
-    }
-    acb->nb_sectors = nb_sectors;
-    acb->n = 0;
-
-    qcow_aio_write_cb(acb, 0);
-    return &acb->common;
-}
-
-static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    QCowAIOCB *acb = (QCowAIOCB *)blockacb;
-    if (acb->hd_aiocb)
-        bdrv_aio_cancel(acb->hd_aiocb);
-    qemu_aio_release(acb);
-}
-
-static void qcow_close(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
-    qemu_free(s->cluster_cache);
-    qemu_free(s->cluster_data);
-    bdrv_delete(s->hd);
-}
-
-static int qcow_create(const char *filename, int64_t total_size,
-                      const char *backing_file, int flags)
-{
-    int fd, header_size, backing_filename_len, l1_size, i, shift;
-    QCowHeader header;
-    uint64_t tmp;
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
-    if (fd < 0)
-        return -1;
-    memset(&header, 0, sizeof(header));
-    header.magic = cpu_to_be32(QCOW_MAGIC);
-    header.version = cpu_to_be32(QCOW_VERSION);
-    header.size = cpu_to_be64(total_size * 512);
-    header_size = sizeof(header);
-    backing_filename_len = 0;
-    if (backing_file) {
-        if (strcmp(backing_file, "fat:")) {
-            header.backing_file_offset = cpu_to_be64(header_size);
-            backing_filename_len = strlen(backing_file);
-            header.backing_file_size = cpu_to_be32(backing_filename_len);
-            header_size += backing_filename_len;
-        } else {
-            /* special backing file for vvfat */
-            backing_file = NULL;
-        }
-        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
-                                    unmodifyed sectors */
-        header.l2_bits = 12; /* 32 KB L2 tables */
-    } else {
-        header.cluster_bits = 12; /* 4 KB clusters */
-        header.l2_bits = 9; /* 4 KB L2 tables */
-    }
-    header_size = (header_size + 7) & ~7;
-    shift = header.cluster_bits + header.l2_bits;
-    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
-
-    header.l1_table_offset = cpu_to_be64(header_size);
-    if (flags & BLOCK_FLAG_ENCRYPT) {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
-    } else {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
-    }
-
-    /* write all the data */
-    write(fd, &header, sizeof(header));
-    if (backing_file) {
-        write(fd, backing_file, backing_filename_len);
-    }
-    lseek(fd, header_size, SEEK_SET);
-    tmp = 0;
-    for(i = 0;i < l1_size; i++) {
-        write(fd, &tmp, sizeof(tmp));
-    }
-    close(fd);
-    return 0;
-}
-
-static int qcow_make_empty(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
-    int ret;
-
-    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
-       return -1;
-    ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
-    if (ret < 0)
-        return ret;
-
-    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-
-    return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    z_stream strm;
-    int ret, out_len;
-    uint8_t *out_buf;
-    uint64_t cluster_offset;
-
-    if (nb_sectors != s->cluster_sectors)
-        return -EINVAL;
-
-    out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-    if (!out_buf)
-        return -1;
-
-    /* best compression, small window, no zlib header */
-    memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-                       Z_DEFLATED, -12,
-                       9, Z_DEFAULT_STRATEGY);
-    if (ret != 0) {
-        qemu_free(out_buf);
-        return -1;
-    }
-
-    strm.avail_in = s->cluster_size;
-    strm.next_in = (uint8_t *)buf;
-    strm.avail_out = s->cluster_size;
-    strm.next_out = out_buf;
-
-    ret = deflate(&strm, Z_FINISH);
-    if (ret != Z_STREAM_END && ret != Z_OK) {
-        qemu_free(out_buf);
-        deflateEnd(&strm);
-        return -1;
-    }
-    out_len = strm.next_out - out_buf;
-
-    deflateEnd(&strm);
-
-    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
-        /* could not compress: write normal cluster */
-        qcow_write(bs, sector_num, buf, s->cluster_sectors);
-    } else {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-                                            out_len, 0, 0);
-        cluster_offset &= s->cluster_offset_mask;
-        if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
-            qemu_free(out_buf);
-            return -1;
-        }
-    }
-
-    qemu_free(out_buf);
-    return 0;
-}
-
-static void qcow_flush(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdrv_flush(s->hd);
-}
-
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdi->cluster_size = s->cluster_size;
-    return 0;
-}
-
-static BlockDriver bdrv_qcow = {
-    .format_name       = "qcow",
-    .instance_size     = sizeof(BDRVQcowState),
-    .bdrv_probe                = qcow_probe,
-    .bdrv_open         = qcow_open,
-    .bdrv_close                = qcow_close,
-    .bdrv_create       = qcow_create,
-    .bdrv_flush                = qcow_flush,
-    .bdrv_is_allocated = qcow_is_allocated,
-    .bdrv_set_key      = qcow_set_key,
-    .bdrv_make_empty   = qcow_make_empty,
-    .bdrv_aio_readv    = qcow_aio_readv,
-    .bdrv_aio_writev   = qcow_aio_writev,
-    .bdrv_aio_cancel   = qcow_aio_cancel,
-    .aiocb_size                = sizeof(QCowAIOCB),
-    .bdrv_write_compressed = qcow_write_compressed,
-    .bdrv_get_info     = qcow_get_info,
-};
-
-static void bdrv_qcow_init(void)
-{
-    bdrv_register(&bdrv_qcow);
-}
-
-block_init(bdrv_qcow_init);
diff --git a/block-qcow2.c b/block-qcow2.c
deleted file mode 100644 (file)
index a6de9b6..0000000
+++ /dev/null
@@ -1,2931 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-#include <zlib.h>
-#include "aes.h"
-
-/*
-  Differences with QCOW:
-
-  - Support for multiple incremental snapshots.
-  - Memory management by reference counts.
-  - Clusters which have a reference count of one have the bit
-    QCOW_OFLAG_COPIED to optimize write performance.
-  - Size of compressed clusters is stored in sectors to reduce bit usage
-    in the cluster offsets.
-  - Support for storing additional data (such as the VM state) in the
-    snapshots.
-  - If a backing store is used, the cluster size is not constrained
-    (could be backported to QCOW).
-  - L2 tables have always a size of one cluster.
-*/
-
-//#define DEBUG_ALLOC
-//#define DEBUG_ALLOC2
-//#define DEBUG_EXT
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 2
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES  1
-
-#define QCOW_MAX_CRYPT_CLUSTERS 32
-
-/* indicate that the refcount of the referenced cluster is exactly one. */
-#define QCOW_OFLAG_COPIED     (1LL << 63)
-/* indicate that the cluster is compressed (they never have the copied flag) */
-#define QCOW_OFLAG_COMPRESSED (1LL << 62)
-
-#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
-
-typedef struct QCowHeader {
-    uint32_t magic;
-    uint32_t version;
-    uint64_t backing_file_offset;
-    uint32_t backing_file_size;
-    uint32_t cluster_bits;
-    uint64_t size; /* in bytes */
-    uint32_t crypt_method;
-    uint32_t l1_size; /* XXX: save number of clusters instead ? */
-    uint64_t l1_table_offset;
-    uint64_t refcount_table_offset;
-    uint32_t refcount_table_clusters;
-    uint32_t nb_snapshots;
-    uint64_t snapshots_offset;
-} QCowHeader;
-
-
-typedef struct {
-    uint32_t magic;
-    uint32_t len;
-} QCowExtension;
-#define  QCOW_EXT_MAGIC_END 0
-#define  QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
-
-
-typedef struct __attribute__((packed)) QCowSnapshotHeader {
-    /* header is 8 byte aligned */
-    uint64_t l1_table_offset;
-
-    uint32_t l1_size;
-    uint16_t id_str_size;
-    uint16_t name_size;
-
-    uint32_t date_sec;
-    uint32_t date_nsec;
-
-    uint64_t vm_clock_nsec;
-
-    uint32_t vm_state_size;
-    uint32_t extra_data_size; /* for extension */
-    /* extra data follows */
-    /* id_str follows */
-    /* name follows  */
-} QCowSnapshotHeader;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct QCowSnapshot {
-    uint64_t l1_table_offset;
-    uint32_t l1_size;
-    char *id_str;
-    char *name;
-    uint32_t vm_state_size;
-    uint32_t date_sec;
-    uint32_t date_nsec;
-    uint64_t vm_clock_nsec;
-} QCowSnapshot;
-
-typedef struct BDRVQcowState {
-    BlockDriverState *hd;
-    int cluster_bits;
-    int cluster_size;
-    int cluster_sectors;
-    int l2_bits;
-    int l2_size;
-    int l1_size;
-    int l1_vm_state_index;
-    int csize_shift;
-    int csize_mask;
-    uint64_t cluster_offset_mask;
-    uint64_t l1_table_offset;
-    uint64_t *l1_table;
-    uint64_t *l2_cache;
-    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
-    uint32_t l2_cache_counts[L2_CACHE_SIZE];
-    uint8_t *cluster_cache;
-    uint8_t *cluster_data;
-    uint64_t cluster_cache_offset;
-
-    uint64_t *refcount_table;
-    uint64_t refcount_table_offset;
-    uint32_t refcount_table_size;
-    uint64_t refcount_block_cache_offset;
-    uint16_t *refcount_block_cache;
-    int64_t free_cluster_index;
-    int64_t free_byte_offset;
-
-    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
-    uint32_t crypt_method_header;
-    AES_KEY aes_encrypt_key;
-    AES_KEY aes_decrypt_key;
-    uint64_t snapshots_offset;
-    int snapshots_size;
-    int nb_snapshots;
-    QCowSnapshot *snapshots;
-} BDRVQcowState;
-
-static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
-                     uint8_t *buf, int nb_sectors);
-static int qcow_read_snapshots(BlockDriverState *bs);
-static void qcow_free_snapshots(BlockDriverState *bs);
-static int refcount_init(BlockDriverState *bs);
-static void refcount_close(BlockDriverState *bs);
-static int get_refcount(BlockDriverState *bs, int64_t cluster_index);
-static int update_cluster_refcount(BlockDriverState *bs,
-                                   int64_t cluster_index,
-                                   int addend);
-static void update_refcount(BlockDriverState *bs,
-                            int64_t offset, int64_t length,
-                            int addend);
-static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
-static int64_t alloc_bytes(BlockDriverState *bs, int size);
-static void free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size);
-static int check_refcounts(BlockDriverState *bs);
-
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const QCowHeader *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(QCowHeader) &&
-        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-        be32_to_cpu(cow_header->version) == QCOW_VERSION)
-        return 100;
-    else
-        return 0;
-}
-
-
-/* 
- * read qcow2 extension and fill bs
- * start reading from start_offset
- * finish reading upon magic of value 0 or when end_offset reached
- * unknown magic is skipped (future extension this version knows nothing about)
- * return 0 upon success, non-0 otherwise
- */
-static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
-                                uint64_t end_offset)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowExtension ext;
-    uint64_t offset;
-
-#ifdef DEBUG_EXT
-    printf("qcow_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
-#endif
-    offset = start_offset;
-    while (offset < end_offset) {
-
-#ifdef DEBUG_EXT
-        /* Sanity check */
-        if (offset > s->cluster_size)
-            printf("qcow_handle_extension: suspicious offset %lu\n", offset);
-
-        printf("attemting to read extended header in offset %lu\n", offset);
-#endif
-
-        if (bdrv_pread(s->hd, offset, &ext, sizeof(ext)) != sizeof(ext)) {
-            fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n",
-                    (unsigned long long)offset);
-            return 1;
-        }
-        be32_to_cpus(&ext.magic);
-        be32_to_cpus(&ext.len);
-        offset += sizeof(ext);
-#ifdef DEBUG_EXT
-        printf("ext.magic = 0x%x\n", ext.magic);
-#endif
-        switch (ext.magic) {
-        case QCOW_EXT_MAGIC_END:
-            return 0;
-
-        case QCOW_EXT_MAGIC_BACKING_FORMAT:
-            if (ext.len >= sizeof(bs->backing_format)) {
-                fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
-                        " (>=%zu)\n",
-                        ext.len, sizeof(bs->backing_format));
-                return 2;
-            }
-            if (bdrv_pread(s->hd, offset , bs->backing_format,
-                           ext.len) != ext.len)
-                return 3;
-            bs->backing_format[ext.len] = '\0';
-#ifdef DEBUG_EXT
-            printf("Qcow2: Got format extension %s\n", bs->backing_format);
-#endif
-            offset += ((ext.len + 7) & ~7);
-            break;
-
-        default:
-            /* unknown magic -- just skip it */
-            offset += ((ext.len + 7) & ~7);
-            break;
-        }
-    }
-
-    return 0;
-}
-
-
-static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVQcowState *s = bs->opaque;
-    int len, i, shift, ret;
-    QCowHeader header;
-    uint64_t ext_end;
-
-    /* Performance is terrible right now with cache=writethrough due mainly
-     * to reference count updates.  If the user does not explicitly specify
-     * a caching type, force to writeback caching.
-     */
-    if ((flags & BDRV_O_CACHE_DEF)) {
-        flags |= BDRV_O_CACHE_WB;
-        flags &= ~BDRV_O_CACHE_DEF;
-    }
-    ret = bdrv_file_open(&s->hd, filename, flags);
-    if (ret < 0)
-        return ret;
-    if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
-        goto fail;
-    be32_to_cpus(&header.magic);
-    be32_to_cpus(&header.version);
-    be64_to_cpus(&header.backing_file_offset);
-    be32_to_cpus(&header.backing_file_size);
-    be64_to_cpus(&header.size);
-    be32_to_cpus(&header.cluster_bits);
-    be32_to_cpus(&header.crypt_method);
-    be64_to_cpus(&header.l1_table_offset);
-    be32_to_cpus(&header.l1_size);
-    be64_to_cpus(&header.refcount_table_offset);
-    be32_to_cpus(&header.refcount_table_clusters);
-    be64_to_cpus(&header.snapshots_offset);
-    be32_to_cpus(&header.nb_snapshots);
-
-    if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
-        goto fail;
-    if (header.size <= 1 ||
-        header.cluster_bits < 9 ||
-        header.cluster_bits > 16)
-        goto fail;
-    if (header.crypt_method > QCOW_CRYPT_AES)
-        goto fail;
-    s->crypt_method_header = header.crypt_method;
-    if (s->crypt_method_header)
-        bs->encrypted = 1;
-    s->cluster_bits = header.cluster_bits;
-    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - 9);
-    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
-    s->l2_size = 1 << s->l2_bits;
-    bs->total_sectors = header.size / 512;
-    s->csize_shift = (62 - (s->cluster_bits - 8));
-    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
-    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
-    s->refcount_table_offset = header.refcount_table_offset;
-    s->refcount_table_size =
-        header.refcount_table_clusters << (s->cluster_bits - 3);
-
-    s->snapshots_offset = header.snapshots_offset;
-    s->nb_snapshots = header.nb_snapshots;
-
-    /* read the level 1 table */
-    s->l1_size = header.l1_size;
-    shift = s->cluster_bits + s->l2_bits;
-    s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift;
-    /* the L1 table must contain at least enough entries to put
-       header.size bytes */
-    if (s->l1_size < s->l1_vm_state_index)
-        goto fail;
-    s->l1_table_offset = header.l1_table_offset;
-    s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
-    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
-        s->l1_size * sizeof(uint64_t))
-        goto fail;
-    for(i = 0;i < s->l1_size; i++) {
-        be64_to_cpus(&s->l1_table[i]);
-    }
-    /* alloc L2 cache */
-    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    s->cluster_cache = qemu_malloc(s->cluster_size);
-    /* one more sector for decompressed data alignment */
-    s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
-                                  + 512);
-    s->cluster_cache_offset = -1;
-
-    if (refcount_init(bs) < 0)
-        goto fail;
-
-    /* read qcow2 extensions */
-    if (header.backing_file_offset)
-        ext_end = header.backing_file_offset;
-    else
-        ext_end = s->cluster_size;
-    if (qcow_read_extensions(bs, sizeof(header), ext_end))
-        goto fail;
-
-    /* read the backing file name */
-    if (header.backing_file_offset != 0) {
-        len = header.backing_file_size;
-        if (len > 1023)
-            len = 1023;
-        if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
-            goto fail;
-        bs->backing_file[len] = '\0';
-    }
-    if (qcow_read_snapshots(bs) < 0)
-        goto fail;
-
-#ifdef DEBUG_ALLOC
-    check_refcounts(bs);
-#endif
-    return 0;
-
- fail:
-    qcow_free_snapshots(bs);
-    refcount_close(bs);
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
-    qemu_free(s->cluster_cache);
-    qemu_free(s->cluster_data);
-    bdrv_delete(s->hd);
-    return -1;
-}
-
-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint8_t keybuf[16];
-    int len, i;
-
-    memset(keybuf, 0, 16);
-    len = strlen(key);
-    if (len > 16)
-        len = 16;
-    /* XXX: we could compress the chars to 7 bits to increase
-       entropy */
-    for(i = 0;i < len;i++) {
-        keybuf[i] = key[i];
-    }
-    s->crypt_method = s->crypt_method_header;
-
-    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
-        return -1;
-    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
-        return -1;
-#if 0
-    /* test */
-    {
-        uint8_t in[16];
-        uint8_t out[16];
-        uint8_t tmp[16];
-        for(i=0;i<16;i++)
-            in[i] = i;
-        AES_encrypt(in, tmp, &s->aes_encrypt_key);
-        AES_decrypt(tmp, out, &s->aes_decrypt_key);
-        for(i = 0; i < 16; i++)
-            printf(" %02x", tmp[i]);
-        printf("\n");
-        for(i = 0; i < 16; i++)
-            printf(" %02x", out[i]);
-        printf("\n");
-    }
-#endif
-    return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                            uint8_t *out_buf, const uint8_t *in_buf,
-                            int nb_sectors, int enc,
-                            const AES_KEY *key)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        AES_cbc_encrypt(in_buf, out_buf, 512, key,
-                        ivec.b, enc);
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-}
-
-static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
-                        uint64_t cluster_offset, int n_start, int n_end)
-{
-    BDRVQcowState *s = bs->opaque;
-    int n, ret;
-
-    n = n_end - n_start;
-    if (n <= 0)
-        return 0;
-    ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n);
-    if (ret < 0)
-        return ret;
-    if (s->crypt_method) {
-        encrypt_sectors(s, start_sect + n_start,
-                        s->cluster_data,
-                        s->cluster_data, n, 1,
-                        &s->aes_encrypt_key);
-    }
-    ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start,
-                     s->cluster_data, n);
-    if (ret < 0)
-        return ret;
-    return 0;
-}
-
-static void l2_cache_reset(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-
-    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-}
-
-static inline int l2_cache_new_entry(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint32_t min_count;
-    int min_index, i;
-
-    /* find a new entry in the least used one */
-    min_index = 0;
-    min_count = 0xffffffff;
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
-            min_index = i;
-        }
-    }
-    return min_index;
-}
-
-static int64_t align_offset(int64_t offset, int n)
-{
-    offset = (offset + n - 1) & ~(n - 1);
-    return offset;
-}
-
-static int grow_l1_table(BlockDriverState *bs, int min_size)
-{
-    BDRVQcowState *s = bs->opaque;
-    int new_l1_size, new_l1_size2, ret, i;
-    uint64_t *new_l1_table;
-    uint64_t new_l1_table_offset;
-    uint8_t data[12];
-
-    new_l1_size = s->l1_size;
-    if (min_size <= new_l1_size)
-        return 0;
-    while (min_size > new_l1_size) {
-        new_l1_size = (new_l1_size * 3 + 1) / 2;
-    }
-#ifdef DEBUG_ALLOC2
-    printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
-#endif
-
-    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
-    new_l1_table = qemu_mallocz(new_l1_size2);
-    memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
-
-    /* write new table (align to cluster) */
-    new_l1_table_offset = alloc_clusters(bs, new_l1_size2);
-
-    for(i = 0; i < s->l1_size; i++)
-        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2);
-    if (ret != new_l1_size2)
-        goto fail;
-    for(i = 0; i < s->l1_size; i++)
-        new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
-
-    /* set new table */
-    cpu_to_be32w((uint32_t*)data, new_l1_size);
-    cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
-    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data,
-                sizeof(data)) != sizeof(data))
-        goto fail;
-    qemu_free(s->l1_table);
-    free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
-    s->l1_table_offset = new_l1_table_offset;
-    s->l1_table = new_l1_table;
-    s->l1_size = new_l1_size;
-    return 0;
- fail:
-    qemu_free(s->l1_table);
-    return -EIO;
-}
-
-/*
- * seek_l2_table
- *
- * seek l2_offset in the l2_cache table
- * if not found, return NULL,
- * if found,
- *   increments the l2 cache hit count of the entry,
- *   if counter overflow, divide by two all counters
- *   return the pointer to the l2 cache entry
- *
- */
-
-static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset)
-{
-    int i, j;
-
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
-            /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
-                for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
-                }
-            }
-            return s->l2_cache + (i << s->l2_bits);
-        }
-    }
-    return NULL;
-}
-
-/*
- * l2_load
- *
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
- *
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
- */
-
-static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset)
-{
-    BDRVQcowState *s = bs->opaque;
-    int min_index;
-    uint64_t *l2_table;
-
-    /* seek if the table for the given offset is in the cache */
-
-    l2_table = seek_l2_table(s, l2_offset);
-    if (l2_table != NULL)
-        return l2_table;
-
-    /* not found: load a new entry in the least used one */
-
-    min_index = l2_cache_new_entry(bs);
-    l2_table = s->l2_cache + (min_index << s->l2_bits);
-    if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
-        s->l2_size * sizeof(uint64_t))
-        return NULL;
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
-
-    return l2_table;
-}
-
-/*
- * l2_allocate
- *
- * Allocate a new l2 entry in the file. If l1_index points to an already
- * used entry in the L2 table (i.e. we are doing a copy on write for the L2
- * table) copy the contents of the old L2 table into the newly allocated one.
- * Otherwise the new table is initialized with zeros.
- *
- */
-
-static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index)
-{
-    BDRVQcowState *s = bs->opaque;
-    int min_index;
-    uint64_t old_l2_offset, tmp;
-    uint64_t *l2_table, l2_offset;
-
-    old_l2_offset = s->l1_table[l1_index];
-
-    /* allocate a new l2 entry */
-
-    l2_offset = alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
-
-    /* update the L1 entry */
-
-    s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
-
-    tmp = cpu_to_be64(l2_offset | QCOW_OFLAG_COPIED);
-    if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
-                    &tmp, sizeof(tmp)) != sizeof(tmp))
-        return NULL;
-
-    /* allocate a new entry in the l2 cache */
-
-    min_index = l2_cache_new_entry(bs);
-    l2_table = s->l2_cache + (min_index << s->l2_bits);
-
-    if (old_l2_offset == 0) {
-        /* if there was no old l2 table, clear the new table */
-        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-    } else {
-        /* if there was an old l2 table, read it from the disk */
-        if (bdrv_pread(s->hd, old_l2_offset,
-                       l2_table, s->l2_size * sizeof(uint64_t)) !=
-            s->l2_size * sizeof(uint64_t))
-            return NULL;
-    }
-    /* write the l2 table to the file */
-    if (bdrv_pwrite(s->hd, l2_offset,
-                    l2_table, s->l2_size * sizeof(uint64_t)) !=
-        s->l2_size * sizeof(uint64_t))
-        return NULL;
-
-    /* update the l2 cache entry */
-
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
-
-    return l2_table;
-}
-
-static int size_to_clusters(BDRVQcowState *s, int64_t size)
-{
-    return (size + (s->cluster_size - 1)) >> s->cluster_bits;
-}
-
-static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
-        uint64_t *l2_table, uint64_t start, uint64_t mask)
-{
-    int i;
-    uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask;
-
-    if (!offset)
-        return 0;
-
-    for (i = start; i < start + nb_clusters; i++)
-        if (offset + i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
-            break;
-
-       return (i - start);
-}
-
-static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
-{
-    int i = 0;
-
-    while(nb_clusters-- && l2_table[i] == 0)
-        i++;
-
-    return i;
-}
-
-/*
- * get_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * on entry, *num is the number of contiguous clusters we'd like to
- * access following offset.
- *
- * on exit, *num is the number of contiguous clusters we can read.
- *
- * Return 1, if the offset is found
- * Return 0, otherwise.
- *
- */
-
-static uint64_t get_cluster_offset(BlockDriverState *bs,
-                                   uint64_t offset, int *num)
-{
-    BDRVQcowState *s = bs->opaque;
-    int l1_index, l2_index;
-    uint64_t l2_offset, *l2_table, cluster_offset;
-    int l1_bits, c;
-    int index_in_cluster, nb_available, nb_needed, nb_clusters;
-
-    index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
-    nb_needed = *num + index_in_cluster;
-
-    l1_bits = s->l2_bits + s->cluster_bits;
-
-    /* compute how many bytes there are between the offset and
-     * the end of the l1 entry
-     */
-
-    nb_available = (1 << l1_bits) - (offset & ((1 << l1_bits) - 1));
-
-    /* compute the number of available sectors */
-
-    nb_available = (nb_available >> 9) + index_in_cluster;
-
-    if (nb_needed > nb_available) {
-        nb_needed = nb_available;
-    }
-
-    cluster_offset = 0;
-
-    /* seek the the l2 offset in the l1 table */
-
-    l1_index = offset >> l1_bits;
-    if (l1_index >= s->l1_size)
-        goto out;
-
-    l2_offset = s->l1_table[l1_index];
-
-    /* seek the l2 table of the given l2 offset */
-
-    if (!l2_offset)
-        goto out;
-
-    /* load the l2 table in memory */
-
-    l2_offset &= ~QCOW_OFLAG_COPIED;
-    l2_table = l2_load(bs, l2_offset);
-    if (l2_table == NULL)
-        return 0;
-
-    /* find the cluster offset for the given disk offset */
-
-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-    nb_clusters = size_to_clusters(s, nb_needed << 9);
-
-    if (!cluster_offset) {
-        /* how many empty clusters ? */
-        c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
-    } else {
-        /* how many allocated clusters ? */
-        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
-    }
-
-   nb_available = (c * s->cluster_sectors);
-out:
-    if (nb_available > nb_needed)
-        nb_available = nb_needed;
-
-    *num = nb_available - index_in_cluster;
-
-    return cluster_offset & ~QCOW_OFLAG_COPIED;
-}
-
-/*
- * free_any_clusters
- *
- * free clusters according to its type: compressed or not
- *
- */
-
-static void free_any_clusters(BlockDriverState *bs,
-                              uint64_t cluster_offset, int nb_clusters)
-{
-    BDRVQcowState *s = bs->opaque;
-
-    /* free the cluster */
-
-    if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
-        int nb_csectors;
-        nb_csectors = ((cluster_offset >> s->csize_shift) &
-                       s->csize_mask) + 1;
-        free_clusters(bs, (cluster_offset & s->cluster_offset_mask) & ~511,
-                      nb_csectors * 512);
-        return;
-    }
-
-    free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
-
-    return;
-}
-
-/*
- * get_cluster_table
- *
- * for a given disk offset, load (and allocate if needed)
- * the l2 table.
- *
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
- *
- */
-
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
-                             uint64_t **new_l2_table,
-                             uint64_t *new_l2_offset,
-                             int *new_l2_index)
-{
-    BDRVQcowState *s = bs->opaque;
-    int l1_index, l2_index, ret;
-    uint64_t l2_offset, *l2_table;
-
-    /* seek the the l2 offset in the l1 table */
-
-    l1_index = offset >> (s->l2_bits + s->cluster_bits);
-    if (l1_index >= s->l1_size) {
-        ret = grow_l1_table(bs, l1_index + 1);
-        if (ret < 0)
-            return 0;
-    }
-    l2_offset = s->l1_table[l1_index];
-
-    /* seek the l2 table of the given l2 offset */
-
-    if (l2_offset & QCOW_OFLAG_COPIED) {
-        /* load the l2 table in memory */
-        l2_offset &= ~QCOW_OFLAG_COPIED;
-        l2_table = l2_load(bs, l2_offset);
-        if (l2_table == NULL)
-            return 0;
-    } else {
-        if (l2_offset)
-            free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
-        l2_table = l2_allocate(bs, l1_index);
-        if (l2_table == NULL)
-            return 0;
-        l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED;
-    }
-
-    /* find the cluster offset for the given disk offset */
-
-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-
-    *new_l2_table = l2_table;
-    *new_l2_offset = l2_offset;
-    *new_l2_index = l2_index;
-
-    return 1;
-}
-
-/*
- * alloc_compressed_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * If the offset is not found, allocate a new compressed cluster.
- *
- * Return the cluster offset if successful,
- * Return 0, otherwise.
- *
- */
-
-static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs,
-                                                uint64_t offset,
-                                                int compressed_size)
-{
-    BDRVQcowState *s = bs->opaque;
-    int l2_index, ret;
-    uint64_t l2_offset, *l2_table, cluster_offset;
-    int nb_csectors;
-
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
-    if (ret == 0)
-        return 0;
-
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-    if (cluster_offset & QCOW_OFLAG_COPIED)
-        return cluster_offset & ~QCOW_OFLAG_COPIED;
-
-    if (cluster_offset)
-        free_any_clusters(bs, cluster_offset, 1);
-
-    cluster_offset = alloc_bytes(bs, compressed_size);
-    nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
-                  (cluster_offset >> 9);
-
-    cluster_offset |= QCOW_OFLAG_COMPRESSED |
-                      ((uint64_t)nb_csectors << s->csize_shift);
-
-    /* update L2 table */
-
-    /* compressed clusters never have the copied flag */
-
-    l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    if (bdrv_pwrite(s->hd,
-                    l2_offset + l2_index * sizeof(uint64_t),
-                    l2_table + l2_index,
-                    sizeof(uint64_t)) != sizeof(uint64_t))
-        return 0;
-
-    return cluster_offset;
-}
-
-typedef struct QCowL2Meta
-{
-    uint64_t offset;
-    int n_start;
-    int nb_available;
-    int nb_clusters;
-} QCowL2Meta;
-
-static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
-        QCowL2Meta *m)
-{
-    BDRVQcowState *s = bs->opaque;
-    int i, j = 0, l2_index, ret;
-    uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
-
-    if (m->nb_clusters == 0)
-        return 0;
-
-    old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t));
-
-    /* copy content of unmodified sectors */
-    start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
-    if (m->n_start) {
-        ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
-        if (ret < 0)
-            goto err;
-    }
-
-    if (m->nb_available & (s->cluster_sectors - 1)) {
-        uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
-        ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
-                m->nb_available - end, s->cluster_sectors);
-        if (ret < 0)
-            goto err;
-    }
-
-    ret = -EIO;
-    /* update L2 table */
-    if (!get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index))
-        goto err;
-
-    for (i = 0; i < m->nb_clusters; i++) {
-        /* if two concurrent writes happen to the same unallocated cluster
-        * each write allocates separate cluster and writes data concurrently.
-        * The first one to complete updates l2 table with pointer to its
-        * cluster the second one has to do RMW (which is done above by
-        * copy_sectors()), update l2 table with its cluster pointer and free
-        * old cluster. This is what this loop does */
-        if(l2_table[l2_index + i] != 0)
-            old_cluster[j++] = l2_table[l2_index + i];
-
-        l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
-                    (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
-     }
-
-    if (bdrv_pwrite(s->hd, l2_offset + l2_index * sizeof(uint64_t),
-                l2_table + l2_index, m->nb_clusters * sizeof(uint64_t)) !=
-            m->nb_clusters * sizeof(uint64_t))
-        goto err;
-
-    for (i = 0; i < j; i++)
-        free_any_clusters(bs, be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED,
-                          1);
-
-    ret = 0;
-err:
-    qemu_free(old_cluster);
-    return ret;
- }
-
-/*
- * alloc_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * If the offset is not found, allocate a new cluster.
- *
- * Return the cluster offset if successful,
- * Return 0, otherwise.
- *
- */
-
-static uint64_t alloc_cluster_offset(BlockDriverState *bs,
-                                     uint64_t offset,
-                                     int n_start, int n_end,
-                                     int *num, QCowL2Meta *m)
-{
-    BDRVQcowState *s = bs->opaque;
-    int l2_index, ret;
-    uint64_t l2_offset, *l2_table, cluster_offset;
-    int nb_clusters, i = 0;
-
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
-    if (ret == 0)
-        return 0;
-
-    nb_clusters = size_to_clusters(s, n_end << 9);
-
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
-    /* We keep all QCOW_OFLAG_COPIED clusters */
-
-    if (cluster_offset & QCOW_OFLAG_COPIED) {
-        nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], 0, 0);
-
-        cluster_offset &= ~QCOW_OFLAG_COPIED;
-        m->nb_clusters = 0;
-
-        goto out;
-    }
-
-    /* for the moment, multiple compressed clusters are not managed */
-
-    if (cluster_offset & QCOW_OFLAG_COMPRESSED)
-        nb_clusters = 1;
-
-    /* how many available clusters ? */
-
-    while (i < nb_clusters) {
-        i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
-                &l2_table[l2_index], i, 0);
-
-        if(be64_to_cpu(l2_table[l2_index + i]))
-            break;
-
-        i += count_contiguous_free_clusters(nb_clusters - i,
-                &l2_table[l2_index + i]);
-
-        cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
-
-        if ((cluster_offset & QCOW_OFLAG_COPIED) ||
-                (cluster_offset & QCOW_OFLAG_COMPRESSED))
-            break;
-    }
-    nb_clusters = i;
-
-    /* allocate a new cluster */
-
-    cluster_offset = alloc_clusters(bs, nb_clusters * s->cluster_size);
-
-    /* save info needed for meta data update */
-    m->offset = offset;
-    m->n_start = n_start;
-    m->nb_clusters = nb_clusters;
-
-out:
-    m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
-
-    *num = m->nb_available - n_start;
-
-    return cluster_offset;
-}
-
-static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                             int nb_sectors, int *pnum)
-{
-    uint64_t cluster_offset;
-
-    *pnum = nb_sectors;
-    cluster_offset = get_cluster_offset(bs, sector_num << 9, pnum);
-
-    return (cluster_offset != 0);
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
-                             const uint8_t *buf, int buf_size)
-{
-    z_stream strm1, *strm = &strm1;
-    int ret, out_len;
-
-    memset(strm, 0, sizeof(*strm));
-
-    strm->next_in = (uint8_t *)buf;
-    strm->avail_in = buf_size;
-    strm->next_out = out_buf;
-    strm->avail_out = out_buf_size;
-
-    ret = inflateInit2(strm, -12);
-    if (ret != Z_OK)
-        return -1;
-    ret = inflate(strm, Z_FINISH);
-    out_len = strm->next_out - out_buf;
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
-        out_len != out_buf_size) {
-        inflateEnd(strm);
-        return -1;
-    }
-    inflateEnd(strm);
-    return 0;
-}
-
-static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
-{
-    int ret, csize, nb_csectors, sector_offset;
-    uint64_t coffset;
-
-    coffset = cluster_offset & s->cluster_offset_mask;
-    if (s->cluster_cache_offset != coffset) {
-        nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
-        sector_offset = coffset & 511;
-        csize = nb_csectors * 512 - sector_offset;
-        ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors);
-        if (ret < 0) {
-            return -1;
-        }
-        if (decompress_buffer(s->cluster_cache, s->cluster_size,
-                              s->cluster_data + sector_offset, csize) < 0) {
-            return -1;
-        }
-        s->cluster_cache_offset = coffset;
-    }
-    return 0;
-}
-
-/* handle reading after the end of the backing file */
-static int backing_read1(BlockDriverState *bs,
-                         int64_t sector_num, uint8_t *buf, int nb_sectors)
-{
-    int n1;
-    if ((sector_num + nb_sectors) <= bs->total_sectors)
-        return nb_sectors;
-    if (sector_num >= bs->total_sectors)
-        n1 = 0;
-    else
-        n1 = bs->total_sectors - sector_num;
-    memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
-    return n1;
-}
-
-static int qcow_read(BlockDriverState *bs, int64_t sector_num,
-                     uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, index_in_cluster, n, n1;
-    uint64_t cluster_offset;
-
-    while (nb_sectors > 0) {
-        n = nb_sectors;
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, &n);
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        if (!cluster_offset) {
-            if (bs->backing_hd) {
-                /* read from the base image */
-                n1 = backing_read1(bs->backing_hd, sector_num, buf, n);
-                if (n1 > 0) {
-                    ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
-                    if (ret < 0)
-                        return -1;
-                }
-            } else {
-                memset(buf, 0, 512 * n);
-            }
-        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
-            if (decompress_cluster(s, cluster_offset) < 0)
-                return -1;
-            memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
-        } else {
-            ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
-            if (ret != n * 512)
-                return -1;
-            if (s->crypt_method) {
-                encrypt_sectors(s, sector_num, buf, buf, n, 0,
-                                &s->aes_decrypt_key);
-            }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-
-static int qcow_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, index_in_cluster, n;
-    uint64_t cluster_offset;
-    int n_end;
-    QCowL2Meta l2meta;
-
-    while (nb_sectors > 0) {
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n_end = index_in_cluster + nb_sectors;
-        if (s->crypt_method &&
-            n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
-            n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
-        cluster_offset = alloc_cluster_offset(bs, sector_num << 9,
-                                              index_in_cluster,
-                                              n_end, &n, &l2meta);
-        if (!cluster_offset)
-            return -1;
-        if (s->crypt_method) {
-            encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
-                            &s->aes_encrypt_key);
-            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512,
-                              s->cluster_data, n * 512);
-        } else {
-            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
-        }
-        if (ret != n * 512 || alloc_cluster_link_l2(bs, cluster_offset, &l2meta) < 0) {
-            free_any_clusters(bs, cluster_offset, l2meta.nb_clusters);
-            return -1;
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    s->cluster_cache_offset = -1; /* disable compressed cache */
-    return 0;
-}
-
-typedef struct QCowAIOCB {
-    BlockDriverAIOCB common;
-    int64_t sector_num;
-    QEMUIOVector *qiov;
-    uint8_t *buf;
-    void *orig_buf;
-    int nb_sectors;
-    int n;
-    uint64_t cluster_offset;
-    uint8_t *cluster_data;
-    BlockDriverAIOCB *hd_aiocb;
-    struct iovec hd_iov;
-    QEMUIOVector hd_qiov;
-    QEMUBH *bh;
-    QCowL2Meta l2meta;
-} QCowAIOCB;
-
-static void qcow_aio_read_cb(void *opaque, int ret);
-static void qcow_aio_read_bh(void *opaque)
-{
-    QCowAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qcow_aio_read_cb(opaque, 0);
-}
-
-static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
-{
-    if (acb->bh)
-        return -EIO;
-
-    acb->bh = qemu_bh_new(cb, acb);
-    if (!acb->bh)
-        return -EIO;
-
-    qemu_bh_schedule(acb->bh);
-
-    return 0;
-}
-
-static void qcow_aio_read_cb(void *opaque, int ret)
-{
-    QCowAIOCB *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster, n1;
-
-    acb->hd_aiocb = NULL;
-    if (ret < 0)
-        goto done;
-
-    /* post process the read buffer */
-    if (!acb->cluster_offset) {
-        /* nothing to do */
-    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
-        /* nothing to do */
-    } else {
-        if (s->crypt_method) {
-            encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
-                            acb->n, 0,
-                            &s->aes_decrypt_key);
-        }
-    }
-
-    acb->nb_sectors -= acb->n;
-    acb->sector_num += acb->n;
-    acb->buf += acb->n * 512;
-
-    if (acb->nb_sectors == 0) {
-        /* request completed */
-        ret = 0;
-        goto done;
-    }
-
-    /* prepare next AIO request */
-    acb->n = acb->nb_sectors;
-    acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, &acb->n);
-    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
-
-    if (!acb->cluster_offset) {
-        if (bs->backing_hd) {
-            /* read from the base image */
-            n1 = backing_read1(bs->backing_hd, acb->sector_num,
-                               acb->buf, acb->n);
-            if (n1 > 0) {
-                acb->hd_iov.iov_base = (void *)acb->buf;
-                acb->hd_iov.iov_len = acb->n * 512;
-                qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-                acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
-                                    &acb->hd_qiov, acb->n,
-                                   qcow_aio_read_cb, acb);
-                if (acb->hd_aiocb == NULL)
-                    goto done;
-            } else {
-                ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
-                if (ret < 0)
-                    goto done;
-            }
-        } else {
-            /* Note: in this case, no need to wait */
-            memset(acb->buf, 0, 512 * acb->n);
-            ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
-            if (ret < 0)
-                goto done;
-        }
-    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
-        /* add AIO support for compressed blocks ? */
-        if (decompress_cluster(s, acb->cluster_offset) < 0)
-            goto done;
-        memcpy(acb->buf,
-               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
-        ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
-        if (ret < 0)
-            goto done;
-    } else {
-        if ((acb->cluster_offset & 511) != 0) {
-            ret = -EIO;
-            goto done;
-        }
-
-        acb->hd_iov.iov_base = (void *)acb->buf;
-        acb->hd_iov.iov_len = acb->n * 512;
-        qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-        acb->hd_aiocb = bdrv_aio_readv(s->hd,
-                            (acb->cluster_offset >> 9) + index_in_cluster,
-                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
-        if (acb->hd_aiocb == NULL)
-            goto done;
-    }
-
-    return;
-done:
-    if (acb->qiov->niov > 1) {
-        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
-        qemu_vfree(acb->orig_buf);
-    }
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
-}
-
-static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
-{
-    QCowAIOCB *acb;
-
-    acb = qemu_aio_get(bs, cb, opaque);
-    if (!acb)
-        return NULL;
-    acb->hd_aiocb = NULL;
-    acb->sector_num = sector_num;
-    acb->qiov = qiov;
-    if (qiov->niov > 1) {
-        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
-        if (is_write)
-            qemu_iovec_to_buffer(qiov, acb->buf);
-    } else {
-        acb->buf = (uint8_t *)qiov->iov->iov_base;
-    }
-    acb->nb_sectors = nb_sectors;
-    acb->n = 0;
-    acb->cluster_offset = 0;
-    acb->l2meta.nb_clusters = 0;
-    return acb;
-}
-
-static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    QCowAIOCB *acb;
-
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-    if (!acb)
-        return NULL;
-
-    qcow_aio_read_cb(acb, 0);
-    return &acb->common;
-}
-
-static void qcow_aio_write_cb(void *opaque, int ret)
-{
-    QCowAIOCB *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster;
-    const uint8_t *src_buf;
-    int n_end;
-
-    acb->hd_aiocb = NULL;
-
-    if (ret < 0)
-        goto done;
-
-    if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
-        free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
-        goto done;
-    }
-
-    acb->nb_sectors -= acb->n;
-    acb->sector_num += acb->n;
-    acb->buf += acb->n * 512;
-
-    if (acb->nb_sectors == 0) {
-        /* request completed */
-        ret = 0;
-        goto done;
-    }
-
-    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
-    n_end = index_in_cluster + acb->nb_sectors;
-    if (s->crypt_method &&
-        n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
-        n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
-
-    acb->cluster_offset = alloc_cluster_offset(bs, acb->sector_num << 9,
-                                          index_in_cluster,
-                                          n_end, &acb->n, &acb->l2meta);
-    if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
-        ret = -EIO;
-        goto done;
-    }
-    if (s->crypt_method) {
-        if (!acb->cluster_data) {
-            acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
-                                             s->cluster_size);
-        }
-        encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
-                        acb->n, 1, &s->aes_encrypt_key);
-        src_buf = acb->cluster_data;
-    } else {
-        src_buf = acb->buf;
-    }
-    acb->hd_iov.iov_base = (void *)src_buf;
-    acb->hd_iov.iov_len = acb->n * 512;
-    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
-    acb->hd_aiocb = bdrv_aio_writev(s->hd,
-                                    (acb->cluster_offset >> 9) + index_in_cluster,
-                                    &acb->hd_qiov, acb->n,
-                                    qcow_aio_write_cb, acb);
-    if (acb->hd_aiocb == NULL)
-        goto done;
-
-    return;
-
-done:
-    if (acb->qiov->niov > 1)
-        qemu_vfree(acb->orig_buf);
-    acb->common.cb(acb->common.opaque, ret);
-    qemu_aio_release(acb);
-}
-
-static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowAIOCB *acb;
-
-    s->cluster_cache_offset = -1; /* disable compressed cache */
-
-    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-    if (!acb)
-        return NULL;
-
-    qcow_aio_write_cb(acb, 0);
-    return &acb->common;
-}
-
-static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    QCowAIOCB *acb = (QCowAIOCB *)blockacb;
-    if (acb->hd_aiocb)
-        bdrv_aio_cancel(acb->hd_aiocb);
-    qemu_aio_release(acb);
-}
-
-static void qcow_close(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
-    qemu_free(s->cluster_cache);
-    qemu_free(s->cluster_data);
-    refcount_close(bs);
-    bdrv_delete(s->hd);
-}
-
-/* XXX: use std qcow open function ? */
-typedef struct QCowCreateState {
-    int cluster_size;
-    int cluster_bits;
-    uint16_t *refcount_block;
-    uint64_t *refcount_table;
-    int64_t l1_table_offset;
-    int64_t refcount_table_offset;
-    int64_t refcount_block_offset;
-} QCowCreateState;
-
-static void create_refcount_update(QCowCreateState *s,
-                                   int64_t offset, int64_t size)
-{
-    int refcount;
-    int64_t start, last, cluster_offset;
-    uint16_t *p;
-
-    start = offset & ~(s->cluster_size - 1);
-    last = (offset + size - 1)  & ~(s->cluster_size - 1);
-    for(cluster_offset = start; cluster_offset <= last;
-        cluster_offset += s->cluster_size) {
-        p = &s->refcount_block[cluster_offset >> s->cluster_bits];
-        refcount = be16_to_cpu(*p);
-        refcount++;
-        *p = cpu_to_be16(refcount);
-    }
-}
-
-static int qcow_create2(const char *filename, int64_t total_size,
-                        const char *backing_file, const char *backing_format,
-                        int flags)
-{
-
-    int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
-    int ref_clusters, backing_format_len = 0;
-    QCowHeader header;
-    uint64_t tmp, offset;
-    QCowCreateState s1, *s = &s1;
-    QCowExtension ext_bf = {0, 0};
-
-
-    memset(s, 0, sizeof(*s));
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
-    if (fd < 0)
-        return -1;
-    memset(&header, 0, sizeof(header));
-    header.magic = cpu_to_be32(QCOW_MAGIC);
-    header.version = cpu_to_be32(QCOW_VERSION);
-    header.size = cpu_to_be64(total_size * 512);
-    header_size = sizeof(header);
-    backing_filename_len = 0;
-    if (backing_file) {
-        if (backing_format) {
-            ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT;
-            backing_format_len = strlen(backing_format);
-            ext_bf.len = (backing_format_len + 7) & ~7;
-            header_size += ((sizeof(ext_bf) + ext_bf.len + 7) & ~7);
-        }
-        header.backing_file_offset = cpu_to_be64(header_size);
-        backing_filename_len = strlen(backing_file);
-        header.backing_file_size = cpu_to_be32(backing_filename_len);
-        header_size += backing_filename_len;
-    }
-    s->cluster_bits = 12;  /* 4 KB clusters */
-    s->cluster_size = 1 << s->cluster_bits;
-    header.cluster_bits = cpu_to_be32(s->cluster_bits);
-    header_size = (header_size + 7) & ~7;
-    if (flags & BLOCK_FLAG_ENCRYPT) {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
-    } else {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
-    }
-    l2_bits = s->cluster_bits - 3;
-    shift = s->cluster_bits + l2_bits;
-    l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
-    offset = align_offset(header_size, s->cluster_size);
-    s->l1_table_offset = offset;
-    header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
-    header.l1_size = cpu_to_be32(l1_size);
-    offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
-
-    s->refcount_table = qemu_mallocz(s->cluster_size);
-
-    s->refcount_table_offset = offset;
-    header.refcount_table_offset = cpu_to_be64(offset);
-    header.refcount_table_clusters = cpu_to_be32(1);
-    offset += s->cluster_size;
-    s->refcount_block_offset = offset;
-
-    /* count how many refcount blocks needed */
-    tmp = offset >> s->cluster_bits;
-    ref_clusters = (tmp >> (s->cluster_bits - REFCOUNT_SHIFT)) + 1;
-    for (i=0; i < ref_clusters; i++) {
-        s->refcount_table[i] = cpu_to_be64(offset);
-        offset += s->cluster_size;
-    }
-
-    s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size);
-
-    /* update refcounts */
-    create_refcount_update(s, 0, header_size);
-    create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t));
-    create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
-    create_refcount_update(s, s->refcount_block_offset, ref_clusters * s->cluster_size);
-
-    /* write all the data */
-    write(fd, &header, sizeof(header));
-    if (backing_file) {
-        if (backing_format_len) {
-            char zero[16];
-            int d = ext_bf.len - backing_format_len;
-
-            memset(zero, 0, sizeof(zero));
-            cpu_to_be32s(&ext_bf.magic);
-            cpu_to_be32s(&ext_bf.len);
-            write(fd, &ext_bf, sizeof(ext_bf));
-            write(fd, backing_format, backing_format_len);
-            if (d>0) {
-                write(fd, zero, d);
-            }
-        }
-        write(fd, backing_file, backing_filename_len);
-    }
-    lseek(fd, s->l1_table_offset, SEEK_SET);
-    tmp = 0;
-    for(i = 0;i < l1_size; i++) {
-        write(fd, &tmp, sizeof(tmp));
-    }
-    lseek(fd, s->refcount_table_offset, SEEK_SET);
-    write(fd, s->refcount_table, s->cluster_size);
-
-    lseek(fd, s->refcount_block_offset, SEEK_SET);
-    write(fd, s->refcount_block, ref_clusters * s->cluster_size);
-
-    qemu_free(s->refcount_table);
-    qemu_free(s->refcount_block);
-    close(fd);
-    return 0;
-}
-
-static int qcow_create(const char *filename, int64_t total_size,
-                       const char *backing_file, int flags)
-{
-    return qcow_create2(filename, total_size, backing_file, NULL, flags);
-}
-
-static int qcow_make_empty(BlockDriverState *bs)
-{
-#if 0
-    /* XXX: not correct */
-    BDRVQcowState *s = bs->opaque;
-    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
-    int ret;
-
-    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
-        return -1;
-    ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
-    if (ret < 0)
-        return ret;
-
-    l2_cache_reset(bs);
-#endif
-    return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    z_stream strm;
-    int ret, out_len;
-    uint8_t *out_buf;
-    uint64_t cluster_offset;
-
-    if (nb_sectors == 0) {
-        /* align end of file to a sector boundary to ease reading with
-           sector based I/Os */
-        cluster_offset = bdrv_getlength(s->hd);
-        cluster_offset = (cluster_offset + 511) & ~511;
-        bdrv_truncate(s->hd, cluster_offset);
-        return 0;
-    }
-
-    if (nb_sectors != s->cluster_sectors)
-        return -EINVAL;
-
-    out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
-    /* best compression, small window, no zlib header */
-    memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-                       Z_DEFLATED, -12,
-                       9, Z_DEFAULT_STRATEGY);
-    if (ret != 0) {
-        qemu_free(out_buf);
-        return -1;
-    }
-
-    strm.avail_in = s->cluster_size;
-    strm.next_in = (uint8_t *)buf;
-    strm.avail_out = s->cluster_size;
-    strm.next_out = out_buf;
-
-    ret = deflate(&strm, Z_FINISH);
-    if (ret != Z_STREAM_END && ret != Z_OK) {
-        qemu_free(out_buf);
-        deflateEnd(&strm);
-        return -1;
-    }
-    out_len = strm.next_out - out_buf;
-
-    deflateEnd(&strm);
-
-    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
-        /* could not compress: write normal cluster */
-        qcow_write(bs, sector_num, buf, s->cluster_sectors);
-    } else {
-        cluster_offset = alloc_compressed_cluster_offset(bs, sector_num << 9,
-                                              out_len);
-        if (!cluster_offset)
-            return -1;
-        cluster_offset &= s->cluster_offset_mask;
-        if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
-            qemu_free(out_buf);
-            return -1;
-        }
-    }
-
-    qemu_free(out_buf);
-    return 0;
-}
-
-static void qcow_flush(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdrv_flush(s->hd);
-}
-
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdi->cluster_size = s->cluster_size;
-    bdi->vm_state_offset = (int64_t)s->l1_vm_state_index <<
-        (s->cluster_bits + s->l2_bits);
-    return 0;
-}
-
-/*********************************************************/
-/* snapshot support */
-
-/* update the refcounts of snapshots and the copied flag */
-static int update_snapshot_refcount(BlockDriverState *bs,
-                                    int64_t l1_table_offset,
-                                    int l1_size,
-                                    int addend)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
-    int64_t old_offset, old_l2_offset;
-    int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
-
-    l2_cache_reset(bs);
-
-    l2_table = NULL;
-    l1_table = NULL;
-    l1_size2 = l1_size * sizeof(uint64_t);
-    l1_allocated = 0;
-    if (l1_table_offset != s->l1_table_offset) {
-        l1_table = qemu_malloc(l1_size2);
-        l1_allocated = 1;
-        if (bdrv_pread(s->hd, l1_table_offset,
-                       l1_table, l1_size2) != l1_size2)
-            goto fail;
-        for(i = 0;i < l1_size; i++)
-            be64_to_cpus(&l1_table[i]);
-    } else {
-        assert(l1_size == s->l1_size);
-        l1_table = s->l1_table;
-        l1_allocated = 0;
-    }
-
-    l2_size = s->l2_size * sizeof(uint64_t);
-    l2_table = qemu_malloc(l2_size);
-    l1_modified = 0;
-    for(i = 0; i < l1_size; i++) {
-        l2_offset = l1_table[i];
-        if (l2_offset) {
-            old_l2_offset = l2_offset;
-            l2_offset &= ~QCOW_OFLAG_COPIED;
-            l2_modified = 0;
-            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
-                goto fail;
-            for(j = 0; j < s->l2_size; j++) {
-                offset = be64_to_cpu(l2_table[j]);
-                if (offset != 0) {
-                    old_offset = offset;
-                    offset &= ~QCOW_OFLAG_COPIED;
-                    if (offset & QCOW_OFLAG_COMPRESSED) {
-                        nb_csectors = ((offset >> s->csize_shift) &
-                                       s->csize_mask) + 1;
-                        if (addend != 0)
-                            update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
-                                            nb_csectors * 512, addend);
-                        /* compressed clusters are never modified */
-                        refcount = 2;
-                    } else {
-                        if (addend != 0) {
-                            refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
-                        } else {
-                            refcount = get_refcount(bs, offset >> s->cluster_bits);
-                        }
-                    }
-
-                    if (refcount == 1) {
-                        offset |= QCOW_OFLAG_COPIED;
-                    }
-                    if (offset != old_offset) {
-                        l2_table[j] = cpu_to_be64(offset);
-                        l2_modified = 1;
-                    }
-                }
-            }
-            if (l2_modified) {
-                if (bdrv_pwrite(s->hd,
-                                l2_offset, l2_table, l2_size) != l2_size)
-                    goto fail;
-            }
-
-            if (addend != 0) {
-                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
-            } else {
-                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
-            }
-            if (refcount == 1) {
-                l2_offset |= QCOW_OFLAG_COPIED;
-            }
-            if (l2_offset != old_l2_offset) {
-                l1_table[i] = l2_offset;
-                l1_modified = 1;
-            }
-        }
-    }
-    if (l1_modified) {
-        for(i = 0; i < l1_size; i++)
-            cpu_to_be64s(&l1_table[i]);
-        if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
-                        l1_size2) != l1_size2)
-            goto fail;
-        for(i = 0; i < l1_size; i++)
-            be64_to_cpus(&l1_table[i]);
-    }
-    if (l1_allocated)
-        qemu_free(l1_table);
-    qemu_free(l2_table);
-    return 0;
- fail:
-    if (l1_allocated)
-        qemu_free(l1_table);
-    qemu_free(l2_table);
-    return -EIO;
-}
-
-static void qcow_free_snapshots(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    int i;
-
-    for(i = 0; i < s->nb_snapshots; i++) {
-        qemu_free(s->snapshots[i].name);
-        qemu_free(s->snapshots[i].id_str);
-    }
-    qemu_free(s->snapshots);
-    s->snapshots = NULL;
-    s->nb_snapshots = 0;
-}
-
-static int qcow_read_snapshots(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowSnapshotHeader h;
-    QCowSnapshot *sn;
-    int i, id_str_size, name_size;
-    int64_t offset;
-    uint32_t extra_data_size;
-
-    if (!s->nb_snapshots) {
-        s->snapshots = NULL;
-        s->snapshots_size = 0;
-        return 0;
-    }
-
-    offset = s->snapshots_offset;
-    s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot));
-    for(i = 0; i < s->nb_snapshots; i++) {
-        offset = align_offset(offset, 8);
-        if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h))
-            goto fail;
-        offset += sizeof(h);
-        sn = s->snapshots + i;
-        sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
-        sn->l1_size = be32_to_cpu(h.l1_size);
-        sn->vm_state_size = be32_to_cpu(h.vm_state_size);
-        sn->date_sec = be32_to_cpu(h.date_sec);
-        sn->date_nsec = be32_to_cpu(h.date_nsec);
-        sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
-        extra_data_size = be32_to_cpu(h.extra_data_size);
-
-        id_str_size = be16_to_cpu(h.id_str_size);
-        name_size = be16_to_cpu(h.name_size);
-
-        offset += extra_data_size;
-
-        sn->id_str = qemu_malloc(id_str_size + 1);
-        if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
-            goto fail;
-        offset += id_str_size;
-        sn->id_str[id_str_size] = '\0';
-
-        sn->name = qemu_malloc(name_size + 1);
-        if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size)
-            goto fail;
-        offset += name_size;
-        sn->name[name_size] = '\0';
-    }
-    s->snapshots_size = offset - s->snapshots_offset;
-    return 0;
- fail:
-    qcow_free_snapshots(bs);
-    return -1;
-}
-
-/* add at the end of the file a new list of snapshots */
-static int qcow_write_snapshots(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowSnapshot *sn;
-    QCowSnapshotHeader h;
-    int i, name_size, id_str_size, snapshots_size;
-    uint64_t data64;
-    uint32_t data32;
-    int64_t offset, snapshots_offset;
-
-    /* compute the size of the snapshots */
-    offset = 0;
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        offset = align_offset(offset, 8);
-        offset += sizeof(h);
-        offset += strlen(sn->id_str);
-        offset += strlen(sn->name);
-    }
-    snapshots_size = offset;
-
-    snapshots_offset = alloc_clusters(bs, snapshots_size);
-    offset = snapshots_offset;
-
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        memset(&h, 0, sizeof(h));
-        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
-        h.l1_size = cpu_to_be32(sn->l1_size);
-        h.vm_state_size = cpu_to_be32(sn->vm_state_size);
-        h.date_sec = cpu_to_be32(sn->date_sec);
-        h.date_nsec = cpu_to_be32(sn->date_nsec);
-        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
-
-        id_str_size = strlen(sn->id_str);
-        name_size = strlen(sn->name);
-        h.id_str_size = cpu_to_be16(id_str_size);
-        h.name_size = cpu_to_be16(name_size);
-        offset = align_offset(offset, 8);
-        if (bdrv_pwrite(s->hd, offset, &h, sizeof(h)) != sizeof(h))
-            goto fail;
-        offset += sizeof(h);
-        if (bdrv_pwrite(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
-            goto fail;
-        offset += id_str_size;
-        if (bdrv_pwrite(s->hd, offset, sn->name, name_size) != name_size)
-            goto fail;
-        offset += name_size;
-    }
-
-    /* update the various header fields */
-    data64 = cpu_to_be64(snapshots_offset);
-    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, snapshots_offset),
-                    &data64, sizeof(data64)) != sizeof(data64))
-        goto fail;
-    data32 = cpu_to_be32(s->nb_snapshots);
-    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, nb_snapshots),
-                    &data32, sizeof(data32)) != sizeof(data32))
-        goto fail;
-
-    /* free the old snapshot table */
-    free_clusters(bs, s->snapshots_offset, s->snapshots_size);
-    s->snapshots_offset = snapshots_offset;
-    s->snapshots_size = snapshots_size;
-    return 0;
- fail:
-    return -1;
-}
-
-static void find_new_snapshot_id(BlockDriverState *bs,
-                                 char *id_str, int id_str_size)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowSnapshot *sn;
-    int i, id, id_max = 0;
-
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        id = strtoul(sn->id_str, NULL, 10);
-        if (id > id_max)
-            id_max = id;
-    }
-    snprintf(id_str, id_str_size, "%d", id_max + 1);
-}
-
-static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
-{
-    BDRVQcowState *s = bs->opaque;
-    int i;
-
-    for(i = 0; i < s->nb_snapshots; i++) {
-        if (!strcmp(s->snapshots[i].id_str, id_str))
-            return i;
-    }
-    return -1;
-}
-
-static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
-{
-    BDRVQcowState *s = bs->opaque;
-    int i, ret;
-
-    ret = find_snapshot_by_id(bs, name);
-    if (ret >= 0)
-        return ret;
-    for(i = 0; i < s->nb_snapshots; i++) {
-        if (!strcmp(s->snapshots[i].name, name))
-            return i;
-    }
-    return -1;
-}
-
-/* if no id is provided, a new one is constructed */
-static int qcow_snapshot_create(BlockDriverState *bs,
-                                QEMUSnapshotInfo *sn_info)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowSnapshot *snapshots1, sn1, *sn = &sn1;
-    int i, ret;
-    uint64_t *l1_table = NULL;
-
-    memset(sn, 0, sizeof(*sn));
-
-    if (sn_info->id_str[0] == '\0') {
-        /* compute a new id */
-        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
-    }
-
-    /* check that the ID is unique */
-    if (find_snapshot_by_id(bs, sn_info->id_str) >= 0)
-        return -ENOENT;
-
-    sn->id_str = qemu_strdup(sn_info->id_str);
-    if (!sn->id_str)
-        goto fail;
-    sn->name = qemu_strdup(sn_info->name);
-    if (!sn->name)
-        goto fail;
-    sn->vm_state_size = sn_info->vm_state_size;
-    sn->date_sec = sn_info->date_sec;
-    sn->date_nsec = sn_info->date_nsec;
-    sn->vm_clock_nsec = sn_info->vm_clock_nsec;
-
-    ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
-    if (ret < 0)
-        goto fail;
-
-    /* create the L1 table of the snapshot */
-    sn->l1_table_offset = alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
-    sn->l1_size = s->l1_size;
-
-    l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
-    for(i = 0; i < s->l1_size; i++) {
-        l1_table[i] = cpu_to_be64(s->l1_table[i]);
-    }
-    if (bdrv_pwrite(s->hd, sn->l1_table_offset,
-                    l1_table, s->l1_size * sizeof(uint64_t)) !=
-        (s->l1_size * sizeof(uint64_t)))
-        goto fail;
-    qemu_free(l1_table);
-    l1_table = NULL;
-
-    snapshots1 = qemu_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
-    if (s->snapshots) {
-        memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
-        qemu_free(s->snapshots);
-    }
-    s->snapshots = snapshots1;
-    s->snapshots[s->nb_snapshots++] = *sn;
-
-    if (qcow_write_snapshots(bs) < 0)
-        goto fail;
-#ifdef DEBUG_ALLOC
-    check_refcounts(bs);
-#endif
-    return 0;
- fail:
-    qemu_free(sn->name);
-    qemu_free(l1_table);
-    return -1;
-}
-
-/* copy the snapshot 'snapshot_name' into the current disk image */
-static int qcow_snapshot_goto(BlockDriverState *bs,
-                              const char *snapshot_id)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowSnapshot *sn;
-    int i, snapshot_index, l1_size2;
-
-    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
-    if (snapshot_index < 0)
-        return -ENOENT;
-    sn = &s->snapshots[snapshot_index];
-
-    if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0)
-        goto fail;
-
-    if (grow_l1_table(bs, sn->l1_size) < 0)
-        goto fail;
-
-    s->l1_size = sn->l1_size;
-    l1_size2 = s->l1_size * sizeof(uint64_t);
-    /* copy the snapshot l1 table to the current l1 table */
-    if (bdrv_pread(s->hd, sn->l1_table_offset,
-                   s->l1_table, l1_size2) != l1_size2)
-        goto fail;
-    if (bdrv_pwrite(s->hd, s->l1_table_offset,
-                    s->l1_table, l1_size2) != l1_size2)
-        goto fail;
-    for(i = 0;i < s->l1_size; i++) {
-        be64_to_cpus(&s->l1_table[i]);
-    }
-
-    if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0)
-        goto fail;
-
-#ifdef DEBUG_ALLOC
-    check_refcounts(bs);
-#endif
-    return 0;
- fail:
-    return -EIO;
-}
-
-static int qcow_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
-{
-    BDRVQcowState *s = bs->opaque;
-    QCowSnapshot *sn;
-    int snapshot_index, ret;
-
-    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
-    if (snapshot_index < 0)
-        return -ENOENT;
-    sn = &s->snapshots[snapshot_index];
-
-    ret = update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1);
-    if (ret < 0)
-        return ret;
-    /* must update the copied flag on the current cluster offsets */
-    ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
-    if (ret < 0)
-        return ret;
-    free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t));
-
-    qemu_free(sn->id_str);
-    qemu_free(sn->name);
-    memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn));
-    s->nb_snapshots--;
-    ret = qcow_write_snapshots(bs);
-    if (ret < 0) {
-        /* XXX: restore snapshot if error ? */
-        return ret;
-    }
-#ifdef DEBUG_ALLOC
-    check_refcounts(bs);
-#endif
-    return 0;
-}
-
-static int qcow_snapshot_list(BlockDriverState *bs,
-                              QEMUSnapshotInfo **psn_tab)
-{
-    BDRVQcowState *s = bs->opaque;
-    QEMUSnapshotInfo *sn_tab, *sn_info;
-    QCowSnapshot *sn;
-    int i;
-
-    sn_tab = qemu_mallocz(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn_info = sn_tab + i;
-        sn = s->snapshots + i;
-        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
-                sn->id_str);
-        pstrcpy(sn_info->name, sizeof(sn_info->name),
-                sn->name);
-        sn_info->vm_state_size = sn->vm_state_size;
-        sn_info->date_sec = sn->date_sec;
-        sn_info->date_nsec = sn->date_nsec;
-        sn_info->vm_clock_nsec = sn->vm_clock_nsec;
-    }
-    *psn_tab = sn_tab;
-    return s->nb_snapshots;
-}
-
-/*********************************************************/
-/* refcount handling */
-
-static int refcount_init(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, refcount_table_size2, i;
-
-    s->refcount_block_cache = qemu_malloc(s->cluster_size);
-    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
-    s->refcount_table = qemu_malloc(refcount_table_size2);
-    if (s->refcount_table_size > 0) {
-        ret = bdrv_pread(s->hd, s->refcount_table_offset,
-                         s->refcount_table, refcount_table_size2);
-        if (ret != refcount_table_size2)
-            goto fail;
-        for(i = 0; i < s->refcount_table_size; i++)
-            be64_to_cpus(&s->refcount_table[i]);
-    }
-    return 0;
- fail:
-    return -ENOMEM;
-}
-
-static void refcount_close(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    qemu_free(s->refcount_block_cache);
-    qemu_free(s->refcount_table);
-}
-
-
-static int load_refcount_block(BlockDriverState *bs,
-                               int64_t refcount_block_offset)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret;
-    ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
-                     s->cluster_size);
-    if (ret != s->cluster_size)
-        return -EIO;
-    s->refcount_block_cache_offset = refcount_block_offset;
-    return 0;
-}
-
-static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
-{
-    BDRVQcowState *s = bs->opaque;
-    int refcount_table_index, block_index;
-    int64_t refcount_block_offset;
-
-    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-    if (refcount_table_index >= s->refcount_table_size)
-        return 0;
-    refcount_block_offset = s->refcount_table[refcount_table_index];
-    if (!refcount_block_offset)
-        return 0;
-    if (refcount_block_offset != s->refcount_block_cache_offset) {
-        /* better than nothing: return allocated if read error */
-        if (load_refcount_block(bs, refcount_block_offset) < 0)
-            return 1;
-    }
-    block_index = cluster_index &
-        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-    return be16_to_cpu(s->refcount_block_cache[block_index]);
-}
-
-/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
-{
-    BDRVQcowState *s = bs->opaque;
-    int i, nb_clusters;
-
-    nb_clusters = size_to_clusters(s, size);
-retry:
-    for(i = 0; i < nb_clusters; i++) {
-        int64_t i = s->free_cluster_index++;
-        if (get_refcount(bs, i) != 0)
-            goto retry;
-    }
-#ifdef DEBUG_ALLOC2
-    printf("alloc_clusters: size=%lld -> %lld\n",
-            size,
-            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
-#endif
-    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
-}
-
-static int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
-{
-    int64_t offset;
-
-    offset = alloc_clusters_noref(bs, size);
-    update_refcount(bs, offset, size, 1);
-    return offset;
-}
-
-/* only used to allocate compressed sectors. We try to allocate
-   contiguous sectors. size must be <= cluster_size */
-static int64_t alloc_bytes(BlockDriverState *bs, int size)
-{
-    BDRVQcowState *s = bs->opaque;
-    int64_t offset, cluster_offset;
-    int free_in_cluster;
-
-    assert(size > 0 && size <= s->cluster_size);
-    if (s->free_byte_offset == 0) {
-        s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
-    }
- redo:
-    free_in_cluster = s->cluster_size -
-        (s->free_byte_offset & (s->cluster_size - 1));
-    if (size <= free_in_cluster) {
-        /* enough space in current cluster */
-        offset = s->free_byte_offset;
-        s->free_byte_offset += size;
-        free_in_cluster -= size;
-        if (free_in_cluster == 0)
-            s->free_byte_offset = 0;
-        if ((offset & (s->cluster_size - 1)) != 0)
-            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
-    } else {
-        offset = alloc_clusters(bs, s->cluster_size);
-        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
-        if ((cluster_offset + s->cluster_size) == offset) {
-            /* we are lucky: contiguous data */
-            offset = s->free_byte_offset;
-            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
-            s->free_byte_offset += size;
-        } else {
-            s->free_byte_offset = offset;
-            goto redo;
-        }
-    }
-    return offset;
-}
-
-static void free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size)
-{
-    update_refcount(bs, offset, size, -1);
-}
-
-static int grow_refcount_table(BlockDriverState *bs, int min_size)
-{
-    BDRVQcowState *s = bs->opaque;
-    int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
-    uint64_t *new_table;
-    int64_t table_offset;
-    uint8_t data[12];
-    int old_table_size;
-    int64_t old_table_offset;
-
-    if (min_size <= s->refcount_table_size)
-        return 0;
-    /* compute new table size */
-    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
-    for(;;) {
-        if (refcount_table_clusters == 0) {
-            refcount_table_clusters = 1;
-        } else {
-            refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
-        }
-        new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
-        if (min_size <= new_table_size)
-            break;
-    }
-#ifdef DEBUG_ALLOC2
-    printf("grow_refcount_table from %d to %d\n",
-           s->refcount_table_size,
-           new_table_size);
-#endif
-    new_table_size2 = new_table_size * sizeof(uint64_t);
-    new_table = qemu_mallocz(new_table_size2);
-    memcpy(new_table, s->refcount_table,
-           s->refcount_table_size * sizeof(uint64_t));
-    for(i = 0; i < s->refcount_table_size; i++)
-        cpu_to_be64s(&new_table[i]);
-    /* Note: we cannot update the refcount now to avoid recursion */
-    table_offset = alloc_clusters_noref(bs, new_table_size2);
-    ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
-    if (ret != new_table_size2)
-        goto fail;
-    for(i = 0; i < s->refcount_table_size; i++)
-        be64_to_cpus(&new_table[i]);
-
-    cpu_to_be64w((uint64_t*)data, table_offset);
-    cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
-    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
-                    data, sizeof(data)) != sizeof(data))
-        goto fail;
-    qemu_free(s->refcount_table);
-    old_table_offset = s->refcount_table_offset;
-    old_table_size = s->refcount_table_size;
-    s->refcount_table = new_table;
-    s->refcount_table_size = new_table_size;
-    s->refcount_table_offset = table_offset;
-
-    update_refcount(bs, table_offset, new_table_size2, 1);
-    free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
-    return 0;
- fail:
-    free_clusters(bs, table_offset, new_table_size2);
-    qemu_free(new_table);
-    return -EIO;
-}
-
-/* addend must be 1 or -1 */
-/* XXX: cache several refcount block clusters ? */
-static int update_cluster_refcount(BlockDriverState *bs,
-                                   int64_t cluster_index,
-                                   int addend)
-{
-    BDRVQcowState *s = bs->opaque;
-    int64_t offset, refcount_block_offset;
-    int ret, refcount_table_index, block_index, refcount;
-    uint64_t data64;
-
-    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-    if (refcount_table_index >= s->refcount_table_size) {
-        if (addend < 0)
-            return -EINVAL;
-        ret = grow_refcount_table(bs, refcount_table_index + 1);
-        if (ret < 0)
-            return ret;
-    }
-    refcount_block_offset = s->refcount_table[refcount_table_index];
-    if (!refcount_block_offset) {
-        if (addend < 0)
-            return -EINVAL;
-        /* create a new refcount block */
-        /* Note: we cannot update the refcount now to avoid recursion */
-        offset = alloc_clusters_noref(bs, s->cluster_size);
-        memset(s->refcount_block_cache, 0, s->cluster_size);
-        ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
-        if (ret != s->cluster_size)
-            return -EINVAL;
-        s->refcount_table[refcount_table_index] = offset;
-        data64 = cpu_to_be64(offset);
-        ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
-                          refcount_table_index * sizeof(uint64_t),
-                          &data64, sizeof(data64));
-        if (ret != sizeof(data64))
-            return -EINVAL;
-
-        refcount_block_offset = offset;
-        s->refcount_block_cache_offset = offset;
-        update_refcount(bs, offset, s->cluster_size, 1);
-    } else {
-        if (refcount_block_offset != s->refcount_block_cache_offset) {
-            if (load_refcount_block(bs, refcount_block_offset) < 0)
-                return -EIO;
-        }
-    }
-    /* we can update the count and save it */
-    block_index = cluster_index &
-        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-    refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
-    refcount += addend;
-    if (refcount < 0 || refcount > 0xffff)
-        return -EINVAL;
-    if (refcount == 0 && cluster_index < s->free_cluster_index) {
-        s->free_cluster_index = cluster_index;
-    }
-    s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
-    if (bdrv_pwrite(s->hd,
-                    refcount_block_offset + (block_index << REFCOUNT_SHIFT),
-                    &s->refcount_block_cache[block_index], 2) != 2)
-        return -EIO;
-    return refcount;
-}
-
-static void update_refcount(BlockDriverState *bs,
-                            int64_t offset, int64_t length,
-                            int addend)
-{
-    BDRVQcowState *s = bs->opaque;
-    int64_t start, last, cluster_offset;
-
-#ifdef DEBUG_ALLOC2
-    printf("update_refcount: offset=%lld size=%lld addend=%d\n",
-           offset, length, addend);
-#endif
-    if (length <= 0)
-        return;
-    start = offset & ~(s->cluster_size - 1);
-    last = (offset + length - 1) & ~(s->cluster_size - 1);
-    for(cluster_offset = start; cluster_offset <= last;
-        cluster_offset += s->cluster_size) {
-        update_cluster_refcount(bs, cluster_offset >> s->cluster_bits, addend);
-    }
-}
-
-/*
- * Increases the refcount for a range of clusters in a given refcount table.
- * This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared the the refcount table saved in the image.
- *
- * Returns the number of errors in the image that were found
- */
-static int inc_refcounts(BlockDriverState *bs,
-                          uint16_t *refcount_table,
-                          int refcount_table_size,
-                          int64_t offset, int64_t size)
-{
-    BDRVQcowState *s = bs->opaque;
-    int64_t start, last, cluster_offset;
-    int k;
-    int errors = 0;
-
-    if (size <= 0)
-        return 0;
-
-    start = offset & ~(s->cluster_size - 1);
-    last = (offset + size - 1) & ~(s->cluster_size - 1);
-    for(cluster_offset = start; cluster_offset <= last;
-        cluster_offset += s->cluster_size) {
-        k = cluster_offset >> s->cluster_bits;
-        if (k < 0 || k >= refcount_table_size) {
-            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
-                cluster_offset);
-            errors++;
-        } else {
-            if (++refcount_table[k] == 0) {
-                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
-                    "\n", cluster_offset);
-                errors++;
-            }
-        }
-    }
-
-    return errors;
-}
-
-/*
- * Increases the refcount in the given refcount table for the all clusters
- * referenced in the L2 table. While doing so, performs some checks on L2
- * entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l2(BlockDriverState *bs,
-    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
-    int check_copied)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint64_t *l2_table, offset;
-    int i, l2_size, nb_csectors, refcount;
-    int errors = 0;
-
-    /* Read L2 table from disk */
-    l2_size = s->l2_size * sizeof(uint64_t);
-    l2_table = qemu_malloc(l2_size);
-
-    if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
-        goto fail;
-
-    /* Do the actual checks */
-    for(i = 0; i < s->l2_size; i++) {
-        offset = be64_to_cpu(l2_table[i]);
-        if (offset != 0) {
-            if (offset & QCOW_OFLAG_COMPRESSED) {
-                /* Compressed clusters don't have QCOW_OFLAG_COPIED */
-                if (offset & QCOW_OFLAG_COPIED) {
-                    fprintf(stderr, "ERROR: cluster %" PRId64 ": "
-                        "copied flag must never be set for compressed "
-                        "clusters\n", offset >> s->cluster_bits);
-                    offset &= ~QCOW_OFLAG_COPIED;
-                    errors++;
-                }
-
-                /* Mark cluster as used */
-                nb_csectors = ((offset >> s->csize_shift) &
-                               s->csize_mask) + 1;
-                offset &= s->cluster_offset_mask;
-                errors += inc_refcounts(bs, refcount_table,
-                              refcount_table_size,
-                              offset & ~511, nb_csectors * 512);
-            } else {
-                /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
-                if (check_copied) {
-                    uint64_t entry = offset;
-                    offset &= ~QCOW_OFLAG_COPIED;
-                    refcount = get_refcount(bs, offset >> s->cluster_bits);
-                    if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
-                        fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
-                            PRIx64 " refcount=%d\n", entry, refcount);
-                        errors++;
-                    }
-                }
-
-                /* Mark cluster as used */
-                offset &= ~QCOW_OFLAG_COPIED;
-                errors += inc_refcounts(bs, refcount_table,
-                              refcount_table_size,
-                              offset, s->cluster_size);
-
-                /* Correct offsets are cluster aligned */
-                if (offset & (s->cluster_size - 1)) {
-                    fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
-                        "properly aligned; L2 entry corrupted.\n", offset);
-                    errors++;
-                }
-            }
-        }
-    }
-
-    qemu_free(l2_table);
-    return errors;
-
-fail:
-    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
-    qemu_free(l2_table);
-    return -EIO;
-}
-
-/*
- * Increases the refcount for the L1 table, its L2 tables and all referenced
- * clusters in the given refcount table. While doing so, performs some checks
- * on L1 and L2 entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l1(BlockDriverState *bs,
-                              uint16_t *refcount_table,
-                              int refcount_table_size,
-                              int64_t l1_table_offset, int l1_size,
-                              int check_copied)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint64_t *l1_table, l2_offset, l1_size2;
-    int i, refcount, ret;
-    int errors = 0;
-
-    l1_size2 = l1_size * sizeof(uint64_t);
-
-    /* Mark L1 table as used */
-    errors += inc_refcounts(bs, refcount_table, refcount_table_size,
-                  l1_table_offset, l1_size2);
-
-    /* Read L1 table entries from disk */
-    l1_table = qemu_malloc(l1_size2);
-    if (bdrv_pread(s->hd, l1_table_offset,
-                   l1_table, l1_size2) != l1_size2)
-        goto fail;
-    for(i = 0;i < l1_size; i++)
-        be64_to_cpus(&l1_table[i]);
-
-    /* Do the actual checks */
-    for(i = 0; i < l1_size; i++) {
-        l2_offset = l1_table[i];
-        if (l2_offset) {
-            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
-            if (check_copied) {
-                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
-                    >> s->cluster_bits);
-                if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
-                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
-                        " refcount=%d\n", l2_offset, refcount);
-                    errors++;
-                }
-            }
-
-            /* Mark L2 table as used */
-            l2_offset &= ~QCOW_OFLAG_COPIED;
-            errors += inc_refcounts(bs, refcount_table,
-                          refcount_table_size,
-                          l2_offset,
-                          s->cluster_size);
-
-            /* L2 tables are cluster aligned */
-            if (l2_offset & (s->cluster_size - 1)) {
-                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
-                    "cluster aligned; L1 entry corrupted\n", l2_offset);
-                errors++;
-            }
-
-            /* Process and check L2 entries */
-            ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
-                l2_offset, check_copied);
-            if (ret < 0) {
-                goto fail;
-            }
-            errors += ret;
-        }
-    }
-    qemu_free(l1_table);
-    return errors;
-
-fail:
-    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
-    qemu_free(l1_table);
-    return -EIO;
-}
-
-/*
- * Checks an image for refcount consistency.
- *
- * Returns 0 if no errors are found, the number of errors in case the image is
- * detected as corrupted, and -errno when an internal error occured.
- */
-static int check_refcounts(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    int64_t size;
-    int nb_clusters, refcount1, refcount2, i;
-    QCowSnapshot *sn;
-    uint16_t *refcount_table;
-    int ret, errors = 0;
-
-    size = bdrv_getlength(s->hd);
-    nb_clusters = size_to_clusters(s, size);
-    refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
-
-    /* header */
-    errors += inc_refcounts(bs, refcount_table, nb_clusters,
-                  0, s->cluster_size);
-
-    /* current L1 table */
-    ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
-                       s->l1_table_offset, s->l1_size, 1);
-    if (ret < 0) {
-        return ret;
-    }
-    errors += ret;
-
-    /* snapshots */
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        check_refcounts_l1(bs, refcount_table, nb_clusters,
-                           sn->l1_table_offset, sn->l1_size, 0);
-    }
-    errors += inc_refcounts(bs, refcount_table, nb_clusters,
-                  s->snapshots_offset, s->snapshots_size);
-
-    /* refcount data */
-    errors += inc_refcounts(bs, refcount_table, nb_clusters,
-                  s->refcount_table_offset,
-                  s->refcount_table_size * sizeof(uint64_t));
-    for(i = 0; i < s->refcount_table_size; i++) {
-        int64_t offset;
-        offset = s->refcount_table[i];
-        if (offset != 0) {
-            errors += inc_refcounts(bs, refcount_table, nb_clusters,
-                          offset, s->cluster_size);
-        }
-    }
-
-    /* compare ref counts */
-    for(i = 0; i < nb_clusters; i++) {
-        refcount1 = get_refcount(bs, i);
-        refcount2 = refcount_table[i];
-        if (refcount1 != refcount2) {
-            fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
-                   i, refcount1, refcount2);
-            errors++;
-        }
-    }
-
-    qemu_free(refcount_table);
-
-    return errors;
-}
-
-static int qcow_check(BlockDriverState *bs)
-{
-    return check_refcounts(bs);
-}
-
-#if 0
-static void dump_refcounts(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    int64_t nb_clusters, k, k1, size;
-    int refcount;
-
-    size = bdrv_getlength(s->hd);
-    nb_clusters = size_to_clusters(s, size);
-    for(k = 0; k < nb_clusters;) {
-        k1 = k;
-        refcount = get_refcount(bs, k);
-        k++;
-        while (k < nb_clusters && get_refcount(bs, k) == refcount)
-            k++;
-        printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1);
-    }
-}
-#endif
-
-static int qcow_put_buffer(BlockDriverState *bs, const uint8_t *buf,
-                           int64_t pos, int size)
-{
-    int growable = bs->growable;
-
-    bs->growable = 1;
-    bdrv_pwrite(bs, pos, buf, size);
-    bs->growable = growable;
-
-    return size;
-}
-
-static int qcow_get_buffer(BlockDriverState *bs, uint8_t *buf,
-                           int64_t pos, int size)
-{
-    int growable = bs->growable;
-    int ret;
-
-    bs->growable = 1;
-    ret = bdrv_pread(bs, pos, buf, size);
-    bs->growable = growable;
-
-    return ret;
-}
-
-static BlockDriver bdrv_qcow2 = {
-    .format_name       = "qcow2",
-    .instance_size     = sizeof(BDRVQcowState),
-    .bdrv_probe                = qcow_probe,
-    .bdrv_open         = qcow_open,
-    .bdrv_close                = qcow_close,
-    .bdrv_create       = qcow_create,
-    .bdrv_flush                = qcow_flush,
-    .bdrv_is_allocated = qcow_is_allocated,
-    .bdrv_set_key      = qcow_set_key,
-    .bdrv_make_empty   = qcow_make_empty,
-
-    .bdrv_aio_readv    = qcow_aio_readv,
-    .bdrv_aio_writev   = qcow_aio_writev,
-    .bdrv_aio_cancel   = qcow_aio_cancel,
-    .aiocb_size                = sizeof(QCowAIOCB),
-    .bdrv_write_compressed = qcow_write_compressed,
-
-    .bdrv_snapshot_create = qcow_snapshot_create,
-    .bdrv_snapshot_goto        = qcow_snapshot_goto,
-    .bdrv_snapshot_delete = qcow_snapshot_delete,
-    .bdrv_snapshot_list        = qcow_snapshot_list,
-    .bdrv_get_info     = qcow_get_info,
-
-    .bdrv_put_buffer    = qcow_put_buffer,
-    .bdrv_get_buffer    = qcow_get_buffer,
-
-    .bdrv_create2 = qcow_create2,
-    .bdrv_check = qcow_check,
-};
-
-static void bdrv_qcow2_init(void)
-{
-    bdrv_register(&bdrv_qcow2);
-}
-
-block_init(bdrv_qcow2_init);
diff --git a/block-raw-posix.c b/block-raw-posix.c
deleted file mode 100644 (file)
index f3a9476..0000000
+++ /dev/null
@@ -1,1438 +0,0 @@
-/*
- * Block driver for RAW files (posix)
- *
- * Copyright (c) 2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "qemu-timer.h"
-#include "qemu-char.h"
-#include "block_int.h"
-#include "module.h"
-#ifdef CONFIG_AIO
-#include "posix-aio-compat.h"
-#endif
-
-#ifdef CONFIG_COCOA
-#include <paths.h>
-#include <sys/param.h>
-#include <IOKit/IOKitLib.h>
-#include <IOKit/IOBSD.h>
-#include <IOKit/storage/IOMediaBSDClient.h>
-#include <IOKit/storage/IOMedia.h>
-#include <IOKit/storage/IOCDMedia.h>
-//#include <IOKit/storage/IOCDTypes.h>
-#include <CoreFoundation/CoreFoundation.h>
-#endif
-
-#ifdef __sun__
-#define _POSIX_PTHREAD_SEMANTICS 1
-#include <signal.h>
-#include <sys/dkio.h>
-#endif
-#ifdef __linux__
-#include <sys/ioctl.h>
-#include <linux/cdrom.h>
-#include <linux/fd.h>
-#endif
-#ifdef __FreeBSD__
-#include <signal.h>
-#include <sys/disk.h>
-#include <sys/cdio.h>
-#endif
-
-#ifdef __OpenBSD__
-#include <sys/ioctl.h>
-#include <sys/disklabel.h>
-#include <sys/dkio.h>
-#endif
-
-#ifdef __DragonFly__
-#include <sys/ioctl.h>
-#include <sys/diskslice.h>
-#endif
-
-//#define DEBUG_FLOPPY
-
-//#define DEBUG_BLOCK
-#if defined(DEBUG_BLOCK)
-#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
-    { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
-#else
-#define DEBUG_BLOCK_PRINT(formatCstr, ...)
-#endif
-
-/* OS X does not have O_DSYNC */
-#ifndef O_DSYNC
-#define O_DSYNC O_SYNC
-#endif
-
-/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
-#ifndef O_DIRECT
-#define O_DIRECT O_DSYNC
-#endif
-
-#define FTYPE_FILE   0
-#define FTYPE_CD     1
-#define FTYPE_FD     2
-
-#define ALIGNED_BUFFER_SIZE (32 * 512)
-
-/* if the FD is not accessed during that time (in ms), we try to
-   reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT 1000
-
-typedef struct BDRVRawState {
-    int fd;
-    int type;
-    unsigned int lseek_err_cnt;
-#if defined(__linux__)
-    /* linux floppy specific */
-    int fd_open_flags;
-    int64_t fd_open_time;
-    int64_t fd_error_time;
-    int fd_got_error;
-    int fd_media_changed;
-#endif
-#if defined(__FreeBSD__)
-    int cd_open_flags;
-#endif
-    uint8_t* aligned_buf;
-} BDRVRawState;
-
-static int posix_aio_init(void);
-
-static int fd_open(BlockDriverState *bs);
-
-#if defined(__FreeBSD__)
-static int cd_open(BlockDriverState *bs);
-#endif
-
-static int raw_is_inserted(BlockDriverState *bs);
-
-static int raw_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd, open_flags, ret;
-
-    posix_aio_init();
-
-    s->lseek_err_cnt = 0;
-
-    open_flags = O_BINARY;
-    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
-        open_flags |= O_RDWR;
-    } else {
-        open_flags |= O_RDONLY;
-        bs->read_only = 1;
-    }
-    if (flags & BDRV_O_CREAT)
-        open_flags |= O_CREAT | O_TRUNC;
-
-    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
-     * and O_DIRECT for no caching. */
-    if ((flags & BDRV_O_NOCACHE))
-        open_flags |= O_DIRECT;
-    else if (!(flags & BDRV_O_CACHE_WB))
-        open_flags |= O_DSYNC;
-
-    s->type = FTYPE_FILE;
-
-    fd = open(filename, open_flags, 0644);
-    if (fd < 0) {
-        ret = -errno;
-        if (ret == -EROFS)
-            ret = -EACCES;
-        return ret;
-    }
-    s->fd = fd;
-    s->aligned_buf = NULL;
-    if ((flags & BDRV_O_NOCACHE)) {
-        s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
-        if (s->aligned_buf == NULL) {
-            ret = -errno;
-            close(fd);
-            return ret;
-        }
-    }
-    return 0;
-}
-
-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
-        {
-            unsigned int sectorsize = 512;
-            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
-                sectorsize > bufsize)
-                bufsize = sectorsize;
-        }
-#endif
-#ifdef CONFIG_COCOA
-        u_int32_t   blockSize = 512;
-        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
-            bufsize = blockSize;
-        }
-#endif
-*/
-
-/*
- * offset and count are in bytes, but must be multiples of 512 for files
- * opened with O_DIRECT. buf must be aligned to 512 bytes then.
- *
- * This function may be called without alignment if the caller ensures
- * that O_DIRECT is not in effect.
- */
-static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
-                     uint8_t *buf, int count)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    ret = fd_open(bs);
-    if (ret < 0)
-        return ret;
-
-    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
-        ++(s->lseek_err_cnt);
-        if(s->lseek_err_cnt <= 10) {
-            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
-                              "] lseek failed : %d = %s\n",
-                              s->fd, bs->filename, offset, buf, count,
-                              bs->total_sectors, errno, strerror(errno));
-        }
-        return -1;
-    }
-    s->lseek_err_cnt=0;
-
-    ret = read(s->fd, buf, count);
-    if (ret == count)
-        goto label__raw_read__success;
-
-    DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
-                      "] read failed %d : %d = %s\n",
-                      s->fd, bs->filename, offset, buf, count,
-                      bs->total_sectors, ret, errno, strerror(errno));
-
-    /* Try harder for CDrom. */
-    if (bs->type == BDRV_TYPE_CDROM) {
-        lseek(s->fd, offset, SEEK_SET);
-        ret = read(s->fd, buf, count);
-        if (ret == count)
-            goto label__raw_read__success;
-        lseek(s->fd, offset, SEEK_SET);
-        ret = read(s->fd, buf, count);
-        if (ret == count)
-            goto label__raw_read__success;
-
-        DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
-                          "] retry read failed %d : %d = %s\n",
-                          s->fd, bs->filename, offset, buf, count,
-                          bs->total_sectors, ret, errno, strerror(errno));
-    }
-
-label__raw_read__success:
-
-    return ret;
-}
-
-/*
- * offset and count are in bytes, but must be multiples of 512 for files
- * opened with O_DIRECT. buf must be aligned to 512 bytes then.
- *
- * This function may be called without alignment if the caller ensures
- * that O_DIRECT is not in effect.
- */
-static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
-                      const uint8_t *buf, int count)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    ret = fd_open(bs);
-    if (ret < 0)
-        return -errno;
-
-    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
-        ++(s->lseek_err_cnt);
-        if(s->lseek_err_cnt) {
-            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
-                              PRId64 "] lseek failed : %d = %s\n",
-                              s->fd, bs->filename, offset, buf, count,
-                              bs->total_sectors, errno, strerror(errno));
-        }
-        return -EIO;
-    }
-    s->lseek_err_cnt = 0;
-
-    ret = write(s->fd, buf, count);
-    if (ret == count)
-        goto label__raw_write__success;
-
-    DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
-                      "] write failed %d : %d = %s\n",
-                      s->fd, bs->filename, offset, buf, count,
-                      bs->total_sectors, ret, errno, strerror(errno));
-
-label__raw_write__success:
-
-    return  (ret < 0) ? -errno : ret;
-}
-
-
-/*
- * offset and count are in bytes and possibly not aligned. For files opened
- * with O_DIRECT, necessary alignments are ensured before calling
- * raw_pread_aligned to do the actual read.
- */
-static int raw_pread(BlockDriverState *bs, int64_t offset,
-                     uint8_t *buf, int count)
-{
-    BDRVRawState *s = bs->opaque;
-    int size, ret, shift, sum;
-
-    sum = 0;
-
-    if (s->aligned_buf != NULL)  {
-
-        if (offset & 0x1ff) {
-            /* align offset on a 512 bytes boundary */
-
-            shift = offset & 0x1ff;
-            size = (shift + count + 0x1ff) & ~0x1ff;
-            if (size > ALIGNED_BUFFER_SIZE)
-                size = ALIGNED_BUFFER_SIZE;
-            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
-            if (ret < 0)
-                return ret;
-
-            size = 512 - shift;
-            if (size > count)
-                size = count;
-            memcpy(buf, s->aligned_buf + shift, size);
-
-            buf += size;
-            offset += size;
-            count -= size;
-            sum += size;
-
-            if (count == 0)
-                return sum;
-        }
-        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
-
-            /* read on aligned buffer */
-
-            while (count) {
-
-                size = (count + 0x1ff) & ~0x1ff;
-                if (size > ALIGNED_BUFFER_SIZE)
-                    size = ALIGNED_BUFFER_SIZE;
-
-                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
-                if (ret < 0)
-                    return ret;
-
-                size = ret;
-                if (size > count)
-                    size = count;
-
-                memcpy(buf, s->aligned_buf, size);
-
-                buf += size;
-                offset += size;
-                count -= size;
-                sum += size;
-            }
-
-            return sum;
-        }
-    }
-
-    return raw_pread_aligned(bs, offset, buf, count) + sum;
-}
-
-static int raw_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    int ret;
-
-    ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512);
-    if (ret == (nb_sectors * 512))
-        ret = 0;
-    return ret;
-}
-
-/*
- * offset and count are in bytes and possibly not aligned. For files opened
- * with O_DIRECT, necessary alignments are ensured before calling
- * raw_pwrite_aligned to do the actual write.
- */
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
-                      const uint8_t *buf, int count)
-{
-    BDRVRawState *s = bs->opaque;
-    int size, ret, shift, sum;
-
-    sum = 0;
-
-    if (s->aligned_buf != NULL) {
-
-        if (offset & 0x1ff) {
-            /* align offset on a 512 bytes boundary */
-            shift = offset & 0x1ff;
-            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
-            if (ret < 0)
-                return ret;
-
-            size = 512 - shift;
-            if (size > count)
-                size = count;
-            memcpy(s->aligned_buf + shift, buf, size);
-
-            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
-            if (ret < 0)
-                return ret;
-
-            buf += size;
-            offset += size;
-            count -= size;
-            sum += size;
-
-            if (count == 0)
-                return sum;
-        }
-        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
-
-            while ((size = (count & ~0x1ff)) != 0) {
-
-                if (size > ALIGNED_BUFFER_SIZE)
-                    size = ALIGNED_BUFFER_SIZE;
-
-                memcpy(s->aligned_buf, buf, size);
-
-                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
-                if (ret < 0)
-                    return ret;
-
-                buf += ret;
-                offset += ret;
-                count -= ret;
-                sum += ret;
-            }
-            /* here, count < 512 because (count & ~0x1ff) == 0 */
-            if (count) {
-                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
-                if (ret < 0)
-                    return ret;
-                 memcpy(s->aligned_buf, buf, count);
-
-                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
-                 if (ret < 0)
-                     return ret;
-                 if (count < ret)
-                     ret = count;
-
-                 sum += ret;
-            }
-            return sum;
-        }
-    }
-    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
-}
-
-static int raw_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512);
-    if (ret == (nb_sectors * 512))
-        ret = 0;
-    return ret;
-}
-
-#ifdef CONFIG_AIO
-/***********************************************************/
-/* Unix AIO using POSIX AIO */
-
-typedef struct RawAIOCB {
-    BlockDriverAIOCB common;
-    struct qemu_paiocb aiocb;
-    struct RawAIOCB *next;
-    int ret;
-} RawAIOCB;
-
-typedef struct PosixAioState
-{
-    int rfd, wfd;
-    RawAIOCB *first_aio;
-} PosixAioState;
-
-static void posix_aio_read(void *opaque)
-{
-    PosixAioState *s = opaque;
-    RawAIOCB *acb, **pacb;
-    int ret;
-    ssize_t len;
-
-    /* read all bytes from signal pipe */
-    for (;;) {
-        char bytes[16];
-
-        len = read(s->rfd, bytes, sizeof(bytes));
-        if (len == -1 && errno == EINTR)
-            continue; /* try again */
-        if (len == sizeof(bytes))
-            continue; /* more to read */
-        break;
-    }
-
-    for(;;) {
-        pacb = &s->first_aio;
-        for(;;) {
-            acb = *pacb;
-            if (!acb)
-                goto the_end;
-            ret = qemu_paio_error(&acb->aiocb);
-            if (ret == ECANCELED) {
-                /* remove the request */
-                *pacb = acb->next;
-                qemu_aio_release(acb);
-            } else if (ret != EINPROGRESS) {
-                /* end of aio */
-                if (ret == 0) {
-                    ret = qemu_paio_return(&acb->aiocb);
-                    if (ret == acb->aiocb.aio_nbytes)
-                        ret = 0;
-                    else
-                        ret = -EINVAL;
-                } else {
-                    ret = -ret;
-                }
-                /* remove the request */
-                *pacb = acb->next;
-                /* call the callback */
-                acb->common.cb(acb->common.opaque, ret);
-                qemu_aio_release(acb);
-                break;
-            } else {
-                pacb = &acb->next;
-            }
-        }
-    }
- the_end: ;
-}
-
-static int posix_aio_flush(void *opaque)
-{
-    PosixAioState *s = opaque;
-    return !!s->first_aio;
-}
-
-static PosixAioState *posix_aio_state;
-
-static void aio_signal_handler(int signum)
-{
-    if (posix_aio_state) {
-        char byte = 0;
-
-        write(posix_aio_state->wfd, &byte, sizeof(byte));
-    }
-
-    qemu_service_io();
-}
-
-static int posix_aio_init(void)
-{
-    struct sigaction act;
-    PosixAioState *s;
-    int fds[2];
-    struct qemu_paioinit ai;
-  
-    if (posix_aio_state)
-        return 0;
-
-    s = qemu_malloc(sizeof(PosixAioState));
-
-    sigfillset(&act.sa_mask);
-    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
-    act.sa_handler = aio_signal_handler;
-    sigaction(SIGUSR2, &act, NULL);
-
-    s->first_aio = NULL;
-    if (pipe(fds) == -1) {
-        fprintf(stderr, "failed to create pipe\n");
-        return -errno;
-    }
-
-    s->rfd = fds[0];
-    s->wfd = fds[1];
-
-    fcntl(s->rfd, F_SETFL, O_NONBLOCK);
-    fcntl(s->wfd, F_SETFL, O_NONBLOCK);
-
-    qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
-
-    memset(&ai, 0, sizeof(ai));
-    ai.aio_threads = 64;
-    ai.aio_num = 64;
-    qemu_paio_init(&ai);
-
-    posix_aio_state = s;
-
-    return 0;
-}
-
-static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
-        QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-    RawAIOCB *acb;
-
-    if (fd_open(bs) < 0)
-        return NULL;
-
-    acb = qemu_aio_get(bs, cb, opaque);
-    if (!acb)
-        return NULL;
-    acb->aiocb.aio_fildes = s->fd;
-    acb->aiocb.ev_signo = SIGUSR2;
-    acb->aiocb.aio_iov = qiov->iov;
-    acb->aiocb.aio_niov = qiov->niov;
-    acb->aiocb.aio_nbytes = nb_sectors * 512;
-    acb->aiocb.aio_offset = sector_num * 512;
-    acb->aiocb.aio_flags = 0;
-
-    /*
-     * If O_DIRECT is used the buffer needs to be aligned on a sector
-     * boundary. Tell the low level code to ensure that in case it's
-     * not done yet.
-     */
-    if (s->aligned_buf)
-        acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
-
-    acb->next = posix_aio_state->first_aio;
-    posix_aio_state->first_aio = acb;
-    return acb;
-}
-
-static void raw_aio_remove(RawAIOCB *acb)
-{
-    RawAIOCB **pacb;
-
-    /* remove the callback from the queue */
-    pacb = &posix_aio_state->first_aio;
-    for(;;) {
-        if (*pacb == NULL) {
-            fprintf(stderr, "raw_aio_remove: aio request not found!\n");
-            break;
-        } else if (*pacb == acb) {
-            *pacb = acb->next;
-            qemu_aio_release(acb);
-            break;
-        }
-        pacb = &(*pacb)->next;
-    }
-}
-
-static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    RawAIOCB *acb;
-
-    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
-    if (!acb)
-        return NULL;
-    if (qemu_paio_read(&acb->aiocb) < 0) {
-        raw_aio_remove(acb);
-        return NULL;
-    }
-    return &acb->common;
-}
-
-static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    RawAIOCB *acb;
-
-    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
-    if (!acb)
-        return NULL;
-    if (qemu_paio_write(&acb->aiocb) < 0) {
-        raw_aio_remove(acb);
-        return NULL;
-    }
-    return &acb->common;
-}
-
-static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
-{
-    int ret;
-    RawAIOCB *acb = (RawAIOCB *)blockacb;
-
-    ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
-    if (ret == QEMU_PAIO_NOTCANCELED) {
-        /* fail safe: if the aio could not be canceled, we wait for
-           it */
-        while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
-    }
-
-    raw_aio_remove(acb);
-}
-#else /* CONFIG_AIO */
-static int posix_aio_init(void)
-{
-    return 0;
-}
-#endif /* CONFIG_AIO */
-
-
-static void raw_close(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    if (s->fd >= 0) {
-        close(s->fd);
-        s->fd = -1;
-        if (s->aligned_buf != NULL)
-            qemu_free(s->aligned_buf);
-    }
-}
-
-static int raw_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BDRVRawState *s = bs->opaque;
-    if (s->type != FTYPE_FILE)
-        return -ENOTSUP;
-    if (ftruncate(s->fd, offset) < 0)
-        return -errno;
-    return 0;
-}
-
-#ifdef __OpenBSD__
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
-    struct stat st;
-
-    if (fstat(fd, &st))
-        return -1;
-    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
-        struct disklabel dl;
-
-        if (ioctl(fd, DIOCGDINFO, &dl))
-            return -1;
-        return (uint64_t)dl.d_secsize *
-            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
-    } else
-        return st.st_size;
-}
-#else /* !__OpenBSD__ */
-static int64_t  raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
-    int64_t size;
-#ifdef HOST_BSD
-    struct stat sb;
-#ifdef __FreeBSD__
-    int reopened = 0;
-#endif
-#endif
-#ifdef __sun__
-    struct dk_minfo minfo;
-    int rv;
-#endif
-    int ret;
-
-    ret = fd_open(bs);
-    if (ret < 0)
-        return ret;
-
-#ifdef HOST_BSD
-#ifdef __FreeBSD__
-again:
-#endif
-    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
-#ifdef DIOCGMEDIASIZE
-       if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
-#elif defined(DIOCGPART)
-        {
-                struct partinfo pi;
-                if (ioctl(fd, DIOCGPART, &pi) == 0)
-                        size = pi.media_size;
-                else
-                        size = 0;
-        }
-        if (size == 0)
-#endif
-#ifdef CONFIG_COCOA
-        size = LONG_LONG_MAX;
-#else
-        size = lseek(fd, 0LL, SEEK_END);
-#endif
-#ifdef __FreeBSD__
-        switch(s->type) {
-        case FTYPE_CD:
-            /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
-            if (size == 2048LL * (unsigned)-1)
-                size = 0;
-            /* XXX no disc?  maybe we need to reopen... */
-            if (size <= 0 && !reopened && cd_open(bs) >= 0) {
-                reopened = 1;
-                goto again;
-            }
-        }
-#endif
-    } else
-#endif
-#ifdef __sun__
-    /*
-     * use the DKIOCGMEDIAINFO ioctl to read the size.
-     */
-    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
-    if ( rv != -1 ) {
-        size = minfo.dki_lbsize * minfo.dki_capacity;
-    } else /* there are reports that lseek on some devices
-              fails, but irc discussion said that contingency
-              on contingency was overkill */
-#endif
-    {
-        size = lseek(fd, 0, SEEK_END);
-    }
-    return size;
-}
-#endif
-
-static int raw_create(const char *filename, int64_t total_size,
-                      const char *backing_file, int flags)
-{
-    int fd;
-
-    if (flags || backing_file)
-        return -ENOTSUP;
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-              0644);
-    if (fd < 0)
-        return -EIO;
-    ftruncate(fd, total_size * 512);
-    close(fd);
-    return 0;
-}
-
-static void raw_flush(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    fsync(s->fd);
-}
-
-static BlockDriver bdrv_raw = {
-    .format_name = "raw",
-    .instance_size = sizeof(BDRVRawState),
-    .bdrv_probe = NULL, /* no probe for protocols */
-    .bdrv_open = raw_open,
-    .bdrv_read = raw_read,
-    .bdrv_write = raw_write,
-    .bdrv_close = raw_close,
-    .bdrv_create = raw_create,
-    .bdrv_flush = raw_flush,
-
-#ifdef CONFIG_AIO
-    .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev = raw_aio_writev,
-    .bdrv_aio_cancel = raw_aio_cancel,
-    .aiocb_size = sizeof(RawAIOCB),
-#endif
-
-    .bdrv_truncate = raw_truncate,
-    .bdrv_getlength = raw_getlength,
-};
-
-/***********************************************/
-/* host device */
-
-#ifdef CONFIG_COCOA
-static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
-static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
-
-kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
-{
-    kern_return_t       kernResult;
-    mach_port_t     masterPort;
-    CFMutableDictionaryRef  classesToMatch;
-
-    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
-    if ( KERN_SUCCESS != kernResult ) {
-        printf( "IOMasterPort returned %d\n", kernResult );
-    }
-
-    classesToMatch = IOServiceMatching( kIOCDMediaClass );
-    if ( classesToMatch == NULL ) {
-        printf( "IOServiceMatching returned a NULL dictionary.\n" );
-    } else {
-    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
-    }
-    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
-    if ( KERN_SUCCESS != kernResult )
-    {
-        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
-    }
-
-    return kernResult;
-}
-
-kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
-{
-    io_object_t     nextMedia;
-    kern_return_t   kernResult = KERN_FAILURE;
-    *bsdPath = '\0';
-    nextMedia = IOIteratorNext( mediaIterator );
-    if ( nextMedia )
-    {
-        CFTypeRef   bsdPathAsCFString;
-    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
-        if ( bsdPathAsCFString ) {
-            size_t devPathLength;
-            strcpy( bsdPath, _PATH_DEV );
-            strcat( bsdPath, "r" );
-            devPathLength = strlen( bsdPath );
-            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
-                kernResult = KERN_SUCCESS;
-            }
-            CFRelease( bsdPathAsCFString );
-        }
-        IOObjectRelease( nextMedia );
-    }
-
-    return kernResult;
-}
-
-#endif
-
-static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd, open_flags, ret;
-
-    posix_aio_init();
-
-#ifdef CONFIG_COCOA
-    if (strstart(filename, "/dev/cdrom", NULL)) {
-        kern_return_t kernResult;
-        io_iterator_t mediaIterator;
-        char bsdPath[ MAXPATHLEN ];
-        int fd;
-
-        kernResult = FindEjectableCDMedia( &mediaIterator );
-        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
-
-        if ( bsdPath[ 0 ] != '\0' ) {
-            strcat(bsdPath,"s0");
-            /* some CDs don't have a partition 0 */
-            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
-            if (fd < 0) {
-                bsdPath[strlen(bsdPath)-1] = '1';
-            } else {
-                close(fd);
-            }
-            filename = bsdPath;
-        }
-
-        if ( mediaIterator )
-            IOObjectRelease( mediaIterator );
-    }
-#endif
-    open_flags = O_BINARY;
-    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
-        open_flags |= O_RDWR;
-    } else {
-        open_flags |= O_RDONLY;
-        bs->read_only = 1;
-    }
-    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
-     * and O_DIRECT for no caching. */
-    if ((flags & BDRV_O_NOCACHE))
-        open_flags |= O_DIRECT;
-    else if (!(flags & BDRV_O_CACHE_WB))
-        open_flags |= O_DSYNC;
-
-    s->type = FTYPE_FILE;
-#if defined(__linux__)
-    if (strstart(filename, "/dev/cd", NULL)) {
-        /* open will not fail even if no CD is inserted */
-        open_flags |= O_NONBLOCK;
-        s->type = FTYPE_CD;
-    } else if (strstart(filename, "/dev/fd", NULL)) {
-        s->type = FTYPE_FD;
-        s->fd_open_flags = open_flags;
-        /* open will not fail even if no floppy is inserted */
-        open_flags |= O_NONBLOCK;
-#ifdef CONFIG_AIO
-    } else if (strstart(filename, "/dev/sg", NULL)) {
-        bs->sg = 1;
-#endif
-    }
-#endif
-#if defined(__FreeBSD__)
-    if (strstart(filename, "/dev/cd", NULL) ||
-        strstart(filename, "/dev/acd", NULL)) {
-        s->type = FTYPE_CD;
-        s->cd_open_flags = open_flags;
-    }
-#endif
-    s->fd = -1;
-    fd = open(filename, open_flags, 0644);
-    if (fd < 0) {
-        ret = -errno;
-        if (ret == -EROFS)
-            ret = -EACCES;
-        return ret;
-    }
-    s->fd = fd;
-#if defined(__FreeBSD__)
-    /* make sure the door isnt locked at this time */
-    if (s->type == FTYPE_CD)
-        ioctl (s->fd, CDIOCALLOW);
-#endif
-#if defined(__linux__)
-    /* close fd so that we can reopen it as needed */
-    if (s->type == FTYPE_FD) {
-        close(s->fd);
-        s->fd = -1;
-        s->fd_media_changed = 1;
-    }
-#endif
-    return 0;
-}
-
-#if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
-   present. The current method is to try to open the floppy at every
-   I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int last_media_present;
-
-    if (s->type != FTYPE_FD)
-        return 0;
-    last_media_present = (s->fd >= 0);
-    if (s->fd >= 0 &&
-        (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
-        close(s->fd);
-        s->fd = -1;
-#ifdef DEBUG_FLOPPY
-        printf("Floppy closed\n");
-#endif
-    }
-    if (s->fd < 0) {
-        if (s->fd_got_error &&
-            (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
-#ifdef DEBUG_FLOPPY
-            printf("No floppy (open delayed)\n");
-#endif
-            return -EIO;
-        }
-        s->fd = open(bs->filename, s->fd_open_flags);
-        if (s->fd < 0) {
-            s->fd_error_time = qemu_get_clock(rt_clock);
-            s->fd_got_error = 1;
-            if (last_media_present)
-                s->fd_media_changed = 1;
-#ifdef DEBUG_FLOPPY
-            printf("No floppy\n");
-#endif
-            return -EIO;
-        }
-#ifdef DEBUG_FLOPPY
-        printf("Floppy opened\n");
-#endif
-    }
-    if (!last_media_present)
-        s->fd_media_changed = 1;
-    s->fd_open_time = qemu_get_clock(rt_clock);
-    s->fd_got_error = 0;
-    return 0;
-}
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    switch(s->type) {
-    case FTYPE_CD:
-        ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-        if (ret == CDS_DISC_OK)
-            return 1;
-        else
-            return 0;
-        break;
-    case FTYPE_FD:
-        ret = fd_open(bs);
-        return (ret >= 0);
-    default:
-        return 1;
-    }
-}
-
-/* currently only used by fdc.c, but a CD version would be good too */
-static int raw_media_changed(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    switch(s->type) {
-    case FTYPE_FD:
-        {
-            int ret;
-            /* XXX: we do not have a true media changed indication. It
-               does not work if the floppy is changed without trying
-               to read it */
-            fd_open(bs);
-            ret = s->fd_media_changed;
-            s->fd_media_changed = 0;
-#ifdef DEBUG_FLOPPY
-            printf("Floppy changed=%d\n", ret);
-#endif
-            return ret;
-        }
-    default:
-        return -ENOTSUP;
-    }
-}
-
-static int raw_eject(BlockDriverState *bs, int eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-
-    switch(s->type) {
-    case FTYPE_CD:
-        if (eject_flag) {
-            if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
-                perror("CDROMEJECT");
-        } else {
-            if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
-                perror("CDROMEJECT");
-        }
-        break;
-    case FTYPE_FD:
-        {
-            int fd;
-            if (s->fd >= 0) {
-                close(s->fd);
-                s->fd = -1;
-            }
-            fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
-            if (fd >= 0) {
-                if (ioctl(fd, FDEJECT, 0) < 0)
-                    perror("FDEJECT");
-                close(fd);
-            }
-        }
-        break;
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static int raw_set_locked(BlockDriverState *bs, int locked)
-{
-    BDRVRawState *s = bs->opaque;
-
-    switch(s->type) {
-    case FTYPE_CD:
-        if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
-            /* Note: an error can happen if the distribution automatically
-               mounts the CD-ROM */
-            //        perror("CDROM_LOCKDOOR");
-        }
-        break;
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
-    BDRVRawState *s = bs->opaque;
-
-    return ioctl(s->fd, req, buf);
-}
-
-#ifdef CONFIG_AIO
-static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-    RawAIOCB *acb;
-
-    if (fd_open(bs) < 0)
-        return NULL;
-
-    acb = qemu_aio_get(bs, cb, opaque);
-    if (!acb)
-        return NULL;
-    acb->aiocb.aio_fildes = s->fd;
-    acb->aiocb.ev_signo = SIGUSR2;
-    acb->aiocb.aio_offset = 0;
-    acb->aiocb.aio_flags = 0;
-
-    acb->next = posix_aio_state->first_aio;
-    posix_aio_state->first_aio = acb;
-
-    acb->aiocb.aio_ioctl_buf = buf;
-    acb->aiocb.aio_ioctl_cmd = req;
-    if (qemu_paio_ioctl(&acb->aiocb) < 0) {
-        raw_aio_remove(acb);
-        return NULL;
-    }
-
-    return &acb->common;
-}
-#endif
-
-#elif defined(__FreeBSD__)
-
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    /* this is just to ensure s->fd is sane (its called by io ops) */
-    if (s->fd >= 0)
-        return 0;
-    return -EIO;
-}
-
-static int cd_open(BlockDriverState *bs)
-{
-#if defined(__FreeBSD__)
-    BDRVRawState *s = bs->opaque;
-    int fd;
-
-    switch(s->type) {
-    case FTYPE_CD:
-        /* XXX force reread of possibly changed/newly loaded disc,
-         * FreeBSD seems to not notice sometimes... */
-        if (s->fd >= 0)
-            close (s->fd);
-        fd = open(bs->filename, s->cd_open_flags, 0644);
-        if (fd < 0) {
-            s->fd = -1;
-            return -EIO;
-        }
-        s->fd = fd;
-        /* make sure the door isnt locked at this time */
-        ioctl (s->fd, CDIOCALLOW);
-    }
-#endif
-    return 0;
-}
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    switch(s->type) {
-    case FTYPE_CD:
-        return (raw_getlength(bs) > 0);
-    case FTYPE_FD:
-        /* XXX handle this */
-        /* FALLTHRU */
-    default:
-        return 1;
-    }
-}
-
-static int raw_media_changed(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-
-static int raw_eject(BlockDriverState *bs, int eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-
-    switch(s->type) {
-    case FTYPE_CD:
-        if (s->fd < 0)
-            return -ENOTSUP;
-        (void) ioctl (s->fd, CDIOCALLOW);
-        if (eject_flag) {
-            if (ioctl (s->fd, CDIOCEJECT) < 0)
-                perror("CDIOCEJECT");
-        } else {
-            if (ioctl (s->fd, CDIOCCLOSE) < 0)
-                perror("CDIOCCLOSE");
-        }
-        if (cd_open(bs) < 0)
-            return -ENOTSUP;
-        break;
-    case FTYPE_FD:
-        /* XXX handle this */
-        /* FALLTHRU */
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static int raw_set_locked(BlockDriverState *bs, int locked)
-{
-    BDRVRawState *s = bs->opaque;
-
-    switch(s->type) {
-    case FTYPE_CD:
-        if (s->fd < 0)
-            return -ENOTSUP;
-        if (ioctl (s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
-            /* Note: an error can happen if the distribution automatically
-               mounts the CD-ROM */
-            //        perror("CDROM_LOCKDOOR");
-        }
-        break;
-    default:
-        return -ENOTSUP;
-    }
-    return 0;
-}
-
-static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
-    return -ENOTSUP;
-}
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
-    return 0;
-}
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return 1;
-}
-
-static int raw_media_changed(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-
-static int raw_eject(BlockDriverState *bs, int eject_flag)
-{
-    return -ENOTSUP;
-}
-
-static int raw_set_locked(BlockDriverState *bs, int locked)
-{
-    return -ENOTSUP;
-}
-
-static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
-    return -ENOTSUP;
-}
-
-static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    return NULL;
-}
-#endif /* !linux && !FreeBSD */
-
-#if defined(__linux__) || defined(__FreeBSD__)
-static int hdev_create(const char *filename, int64_t total_size,
-                       const char *backing_file, int flags)
-{
-    int fd;
-    int ret = 0;
-    struct stat stat_buf;
-
-    if (flags || backing_file)
-        return -ENOTSUP;
-
-    fd = open(filename, O_WRONLY | O_BINARY);
-    if (fd < 0)
-        return -EIO;
-
-    if (fstat(fd, &stat_buf) < 0)
-        ret = -EIO;
-    else if (!S_ISBLK(stat_buf.st_mode))
-        ret = -EIO;
-    else if (lseek(fd, 0, SEEK_END) < total_size * 512)
-        ret = -ENOSPC;
-
-    close(fd);
-    return ret;
-}
-
-#else  /* !(linux || freebsd) */
-
-static int hdev_create(const char *filename, int64_t total_size,
-                       const char *backing_file, int flags)
-{
-    return -ENOTSUP;
-}
-#endif
-
-static BlockDriver bdrv_host_device = {
-    .format_name       = "host_device",
-    .instance_size     = sizeof(BDRVRawState),
-    .bdrv_open         = hdev_open,
-    .bdrv_close                = raw_close,
-    .bdrv_create        = hdev_create,
-    .bdrv_flush                = raw_flush,
-
-#ifdef CONFIG_AIO
-    .bdrv_aio_readv    = raw_aio_readv,
-    .bdrv_aio_writev   = raw_aio_writev,
-    .bdrv_aio_cancel   = raw_aio_cancel,
-    .aiocb_size                = sizeof(RawAIOCB),
-#endif
-
-    .bdrv_read          = raw_read,
-    .bdrv_write         = raw_write,
-    .bdrv_getlength    = raw_getlength,
-
-    /* removable device support */
-    .bdrv_is_inserted  = raw_is_inserted,
-    .bdrv_media_changed        = raw_media_changed,
-    .bdrv_eject                = raw_eject,
-    .bdrv_set_locked   = raw_set_locked,
-    /* generic scsi device */
-    .bdrv_ioctl                = raw_ioctl,
-#ifdef CONFIG_AIO
-    .bdrv_aio_ioctl    = raw_aio_ioctl,
-#endif
-};
-
-static void bdrv_raw_init(void)
-{
-    bdrv_register(&bdrv_raw);
-    bdrv_register(&bdrv_host_device);
-}
-
-block_init(bdrv_raw_init);
diff --git a/block-raw-win32.c b/block-raw-win32.c
deleted file mode 100644 (file)
index 15f3ec4..0000000
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * Block driver for RAW files (win32)
- *
- * Copyright (c) 2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "qemu-timer.h"
-#include "block_int.h"
-#include "module.h"
-#include <windows.h>
-#include <winioctl.h>
-
-#define FTYPE_FILE 0
-#define FTYPE_CD     1
-#define FTYPE_HARDDISK 2
-
-typedef struct BDRVRawState {
-    HANDLE hfile;
-    int type;
-    char drive_path[16]; /* format: "d:\" */
-} BDRVRawState;
-
-int qemu_ftruncate64(int fd, int64_t length)
-{
-    LARGE_INTEGER li;
-    LONG high;
-    HANDLE h;
-    BOOL res;
-
-    if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0)
-       return -1;
-
-    h = (HANDLE)_get_osfhandle(fd);
-
-    /* get current position, ftruncate do not change position */
-    li.HighPart = 0;
-    li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT);
-    if (li.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR)
-       return -1;
-
-    high = length >> 32;
-    if (!SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN))
-       return -1;
-    res = SetEndOfFile(h);
-
-    /* back to old position */
-    SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN);
-    return res ? 0 : -1;
-}
-
-static int set_sparse(int fd)
-{
-    DWORD returned;
-    return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE,
-                                NULL, 0, NULL, 0, &returned, NULL);
-}
-
-static int raw_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVRawState *s = bs->opaque;
-    int access_flags, create_flags;
-    DWORD overlapped;
-
-    s->type = FTYPE_FILE;
-
-    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
-        access_flags = GENERIC_READ | GENERIC_WRITE;
-    } else {
-        access_flags = GENERIC_READ;
-    }
-    if (flags & BDRV_O_CREAT) {
-        create_flags = CREATE_ALWAYS;
-    } else {
-        create_flags = OPEN_EXISTING;
-    }
-    overlapped = FILE_ATTRIBUTE_NORMAL;
-    if ((flags & BDRV_O_NOCACHE))
-        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
-    else if (!(flags & BDRV_O_CACHE_WB))
-        overlapped |= FILE_FLAG_WRITE_THROUGH;
-    s->hfile = CreateFile(filename, access_flags,
-                          FILE_SHARE_READ, NULL,
-                          create_flags, overlapped, NULL);
-    if (s->hfile == INVALID_HANDLE_VALUE) {
-        int err = GetLastError();
-
-        if (err == ERROR_ACCESS_DENIED)
-            return -EACCES;
-        return -1;
-    }
-    return 0;
-}
-
-static int raw_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = ReadFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
-}
-
-static int raw_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVRawState *s = bs->opaque;
-    OVERLAPPED ov;
-    DWORD ret_count;
-    int ret;
-    int64_t offset = sector_num * 512;
-    int count = nb_sectors * 512;
-
-    memset(&ov, 0, sizeof(ov));
-    ov.Offset = offset;
-    ov.OffsetHigh = offset >> 32;
-    ret = WriteFile(s->hfile, buf, count, &ret_count, &ov);
-    if (!ret)
-        return ret_count;
-    if (ret_count == count)
-        ret_count = 0;
-    return ret_count;
-}
-
-static void raw_flush(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    FlushFileBuffers(s->hfile);
-}
-
-static void raw_close(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    CloseHandle(s->hfile);
-}
-
-static int raw_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BDRVRawState *s = bs->opaque;
-    LONG low, high;
-
-    low = offset;
-    high = offset >> 32;
-    if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN))
-       return -EIO;
-    if (!SetEndOfFile(s->hfile))
-        return -EIO;
-    return 0;
-}
-
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    LARGE_INTEGER l;
-    ULARGE_INTEGER available, total, total_free;
-    DISK_GEOMETRY_EX dg;
-    DWORD count;
-    BOOL status;
-
-    switch(s->type) {
-    case FTYPE_FILE:
-        l.LowPart = GetFileSize(s->hfile, (PDWORD)&l.HighPart);
-        if (l.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR)
-            return -EIO;
-        break;
-    case FTYPE_CD:
-        if (!GetDiskFreeSpaceEx(s->drive_path, &available, &total, &total_free))
-            return -EIO;
-        l.QuadPart = total.QuadPart;
-        break;
-    case FTYPE_HARDDISK:
-        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
-                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
-        if (status != 0) {
-            l = dg.DiskSize;
-        }
-        break;
-    default:
-        return -EIO;
-    }
-    return l.QuadPart;
-}
-
-static int raw_create(const char *filename, int64_t total_size,
-                      const char *backing_file, int flags)
-{
-    int fd;
-
-    if (flags || backing_file)
-        return -ENOTSUP;
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-              0644);
-    if (fd < 0)
-        return -EIO;
-    set_sparse(fd);
-    ftruncate(fd, total_size * 512);
-    close(fd);
-    return 0;
-}
-
-static BlockDriver bdrv_raw = {
-    .format_name       = "raw",
-    .instance_size     = sizeof(BDRVRawState),
-    .bdrv_open         = raw_open,
-    .bdrv_close                = raw_close,
-    .bdrv_create       = raw_create,
-    .bdrv_flush                = raw_flush,
-    .bdrv_read         = raw_read,
-    .bdrv_write                = raw_write,
-    .bdrv_truncate     = raw_truncate,
-    .bdrv_getlength    = raw_getlength,
-};
-
-/***********************************************/
-/* host device */
-
-static int find_cdrom(char *cdrom_name, int cdrom_name_size)
-{
-    char drives[256], *pdrv = drives;
-    UINT type;
-
-    memset(drives, 0, sizeof(drives));
-    GetLogicalDriveStrings(sizeof(drives), drives);
-    while(pdrv[0] != '\0') {
-        type = GetDriveType(pdrv);
-        switch(type) {
-        case DRIVE_CDROM:
-            snprintf(cdrom_name, cdrom_name_size, "\\\\.\\%c:", pdrv[0]);
-            return 0;
-            break;
-        }
-        pdrv += lstrlen(pdrv) + 1;
-    }
-    return -1;
-}
-
-static int find_device_type(BlockDriverState *bs, const char *filename)
-{
-    BDRVRawState *s = bs->opaque;
-    UINT type;
-    const char *p;
-
-    if (strstart(filename, "\\\\.\\", &p) ||
-        strstart(filename, "//./", &p)) {
-        if (stristart(p, "PhysicalDrive", NULL))
-            return FTYPE_HARDDISK;
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", p[0]);
-        type = GetDriveType(s->drive_path);
-        switch (type) {
-        case DRIVE_REMOVABLE:
-        case DRIVE_FIXED:
-            return FTYPE_HARDDISK;
-        case DRIVE_CDROM:
-            return FTYPE_CD;
-        default:
-            return FTYPE_FILE;
-        }
-    } else {
-        return FTYPE_FILE;
-    }
-}
-
-static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVRawState *s = bs->opaque;
-    int access_flags, create_flags;
-    DWORD overlapped;
-    char device_name[64];
-
-    if (strstart(filename, "/dev/cdrom", NULL)) {
-        if (find_cdrom(device_name, sizeof(device_name)) < 0)
-            return -ENOENT;
-        filename = device_name;
-    } else {
-        /* transform drive letters into device name */
-        if (((filename[0] >= 'a' && filename[0] <= 'z') ||
-             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
-            filename[1] == ':' && filename[2] == '\0') {
-            snprintf(device_name, sizeof(device_name), "\\\\.\\%c:", filename[0]);
-            filename = device_name;
-        }
-    }
-    s->type = find_device_type(bs, filename);
-
-    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
-        access_flags = GENERIC_READ | GENERIC_WRITE;
-    } else {
-        access_flags = GENERIC_READ;
-    }
-    create_flags = OPEN_EXISTING;
-
-    overlapped = FILE_ATTRIBUTE_NORMAL;
-    if ((flags & BDRV_O_NOCACHE))
-        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
-    else if (!(flags & BDRV_O_CACHE_WB))
-        overlapped |= FILE_FLAG_WRITE_THROUGH;
-    s->hfile = CreateFile(filename, access_flags,
-                          FILE_SHARE_READ, NULL,
-                          create_flags, overlapped, NULL);
-    if (s->hfile == INVALID_HANDLE_VALUE) {
-        int err = GetLastError();
-
-        if (err == ERROR_ACCESS_DENIED)
-            return -EACCES;
-        return -1;
-    }
-    return 0;
-}
-
-#if 0
-/***********************************************/
-/* removable device additional commands */
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
-    return 1;
-}
-
-static int raw_media_changed(BlockDriverState *bs)
-{
-    return -ENOTSUP;
-}
-
-static int raw_eject(BlockDriverState *bs, int eject_flag)
-{
-    DWORD ret_count;
-
-    if (s->type == FTYPE_FILE)
-        return -ENOTSUP;
-    if (eject_flag) {
-        DeviceIoControl(s->hfile, IOCTL_STORAGE_EJECT_MEDIA,
-                        NULL, 0, NULL, 0, &lpBytesReturned, NULL);
-    } else {
-        DeviceIoControl(s->hfile, IOCTL_STORAGE_LOAD_MEDIA,
-                        NULL, 0, NULL, 0, &lpBytesReturned, NULL);
-    }
-}
-
-static int raw_set_locked(BlockDriverState *bs, int locked)
-{
-    return -ENOTSUP;
-}
-#endif
-
-static BlockDriver bdrv_host_device = {
-    .format_name       = "host_device",
-    .instance_size     = sizeof(BDRVRawState),
-    .bdrv_open         = hdev_open,
-    .bdrv_close                = raw_close,
-    .bdrv_flush                = raw_flush,
-
-    .bdrv_read         = raw_read,
-    .bdrv_write                = raw_write,
-    .bdrv_getlength    = raw_getlength,
-};
-
-static void bdrv_raw_init(void)
-{
-    bdrv_register(&bdrv_raw);
-    bdrv_register(&bdrv_host_device);
-}
-
-block_init(bdrv_raw_init);
diff --git a/block-vmdk.c b/block-vmdk.c
deleted file mode 100644 (file)
index 13866e9..0000000
+++ /dev/null
@@ -1,833 +0,0 @@
-/*
- * Block driver for the VMDK format
- *
- * Copyright (c) 2004 Fabrice Bellard
- * Copyright (c) 2005 Filip Navara
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-
-#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
-#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
-
-typedef struct {
-    uint32_t version;
-    uint32_t flags;
-    uint32_t disk_sectors;
-    uint32_t granularity;
-    uint32_t l1dir_offset;
-    uint32_t l1dir_size;
-    uint32_t file_sectors;
-    uint32_t cylinders;
-    uint32_t heads;
-    uint32_t sectors_per_track;
-} VMDK3Header;
-
-typedef struct {
-    uint32_t version;
-    uint32_t flags;
-    int64_t capacity;
-    int64_t granularity;
-    int64_t desc_offset;
-    int64_t desc_size;
-    int32_t num_gtes_per_gte;
-    int64_t rgd_offset;
-    int64_t gd_offset;
-    int64_t grain_offset;
-    char filler[1];
-    char check_bytes[4];
-} __attribute__((packed)) VMDK4Header;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct BDRVVmdkState {
-    BlockDriverState *hd;
-    int64_t l1_table_offset;
-    int64_t l1_backup_table_offset;
-    uint32_t *l1_table;
-    uint32_t *l1_backup_table;
-    unsigned int l1_size;
-    uint32_t l1_entry_sectors;
-
-    unsigned int l2_size;
-    uint32_t *l2_cache;
-    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
-    uint32_t l2_cache_counts[L2_CACHE_SIZE];
-
-    unsigned int cluster_sectors;
-    uint32_t parent_cid;
-    int is_parent;
-} BDRVVmdkState;
-
-typedef struct VmdkMetaData {
-    uint32_t offset;
-    unsigned int l1_index;
-    unsigned int l2_index;
-    unsigned int l2_offset;
-    int valid;
-} VmdkMetaData;
-
-typedef struct ActiveBDRVState{
-    BlockDriverState *hd;            // active image handler
-    uint64_t cluster_offset;         // current write offset
-}ActiveBDRVState;
-
-static ActiveBDRVState activeBDRV;
-
-
-static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    uint32_t magic;
-
-    if (buf_size < 4)
-        return 0;
-    magic = be32_to_cpu(*(uint32_t *)buf);
-    if (magic == VMDK3_MAGIC ||
-        magic == VMDK4_MAGIC)
-        return 100;
-    else
-        return 0;
-}
-
-#define CHECK_CID 1
-
-#define SECTOR_SIZE 512
-#define DESC_SIZE 20*SECTOR_SIZE       // 20 sectors of 512 bytes each
-#define HEADER_SIZE 512                        // first sector of 512 bytes
-
-static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
-{
-    BDRVVmdkState *s = bs->opaque;
-    char desc[DESC_SIZE];
-    uint32_t cid;
-    const char *p_name, *cid_str;
-    size_t cid_str_size;
-
-    /* the descriptor offset = 0x200 */
-    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
-        return 0;
-
-    if (parent) {
-        cid_str = "parentCID";
-        cid_str_size = sizeof("parentCID");
-    } else {
-        cid_str = "CID";
-        cid_str_size = sizeof("CID");
-    }
-
-    if ((p_name = strstr(desc,cid_str)) != NULL) {
-        p_name += cid_str_size;
-        sscanf(p_name,"%x",&cid);
-    }
-
-    return cid;
-}
-
-static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
-{
-    BDRVVmdkState *s = bs->opaque;
-    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
-    char *p_name, *tmp_str;
-
-    /* the descriptor offset = 0x200 */
-    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
-        return -1;
-
-    tmp_str = strstr(desc,"parentCID");
-    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
-    if ((p_name = strstr(desc,"CID")) != NULL) {
-        p_name += sizeof("CID");
-        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
-        pstrcat(desc, sizeof(desc), tmp_desc);
-    }
-
-    if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
-        return -1;
-    return 0;
-}
-
-static int vmdk_is_cid_valid(BlockDriverState *bs)
-{
-#ifdef CHECK_CID
-    BDRVVmdkState *s = bs->opaque;
-    BlockDriverState *p_bs = s->hd->backing_hd;
-    uint32_t cur_pcid;
-
-    if (p_bs) {
-        cur_pcid = vmdk_read_cid(p_bs,0);
-        if (s->parent_cid != cur_pcid)
-            // CID not valid
-            return 0;
-    }
-#endif
-    // CID valid
-    return 1;
-}
-
-static int vmdk_snapshot_create(const char *filename, const char *backing_file)
-{
-    int snp_fd, p_fd;
-    uint32_t p_cid;
-    char *p_name, *gd_buf, *rgd_buf;
-    const char *real_filename, *temp_str;
-    VMDK4Header header;
-    uint32_t gde_entries, gd_size;
-    int64_t gd_offset, rgd_offset, capacity, gt_size;
-    char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
-    static const char desc_template[] =
-    "# Disk DescriptorFile\n"
-    "version=1\n"
-    "CID=%x\n"
-    "parentCID=%x\n"
-    "createType=\"monolithicSparse\"\n"
-    "parentFileNameHint=\"%s\"\n"
-    "\n"
-    "# Extent description\n"
-    "RW %u SPARSE \"%s\"\n"
-    "\n"
-    "# The Disk Data Base \n"
-    "#DDB\n"
-    "\n";
-
-    snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
-    if (snp_fd < 0)
-        return -1;
-    p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
-    if (p_fd < 0) {
-        close(snp_fd);
-        return -1;
-    }
-
-    /* read the header */
-    if (lseek(p_fd, 0x0, SEEK_SET) == -1)
-        goto fail;
-    if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE)
-        goto fail;
-
-    /* write the header */
-    if (lseek(snp_fd, 0x0, SEEK_SET) == -1)
-        goto fail;
-    if (write(snp_fd, hdr, HEADER_SIZE) == -1)
-        goto fail;
-
-    memset(&header, 0, sizeof(header));
-    memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
-
-    ftruncate(snp_fd, header.grain_offset << 9);
-    /* the descriptor offset = 0x200 */
-    if (lseek(p_fd, 0x200, SEEK_SET) == -1)
-        goto fail;
-    if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE)
-        goto fail;
-
-    if ((p_name = strstr(p_desc,"CID")) != NULL) {
-        p_name += sizeof("CID");
-        sscanf(p_name,"%x",&p_cid);
-    }
-
-    real_filename = filename;
-    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, '/')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, ':')) != NULL)
-        real_filename = temp_str + 1;
-
-    snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
-             (uint32_t)header.capacity, real_filename);
-
-    /* write the descriptor */
-    if (lseek(snp_fd, 0x200, SEEK_SET) == -1)
-        goto fail;
-    if (write(snp_fd, s_desc, strlen(s_desc)) == -1)
-        goto fail;
-
-    gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
-    rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
-    capacity = header.capacity * SECTOR_SIZE;       // Extent size
-    /*
-     * Each GDE span 32M disk, means:
-     * 512 GTE per GT, each GTE points to grain
-     */
-    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
-    if (!gt_size)
-        goto fail;
-    gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
-    gd_size = gde_entries * sizeof(uint32_t);
-
-    /* write RGD */
-    rgd_buf = qemu_malloc(gd_size);
-    if (lseek(p_fd, rgd_offset, SEEK_SET) == -1)
-        goto fail_rgd;
-    if (read(p_fd, rgd_buf, gd_size) != gd_size)
-        goto fail_rgd;
-    if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1)
-        goto fail_rgd;
-    if (write(snp_fd, rgd_buf, gd_size) == -1)
-        goto fail_rgd;
-    qemu_free(rgd_buf);
-
-    /* write GD */
-    gd_buf = qemu_malloc(gd_size);
-    if (lseek(p_fd, gd_offset, SEEK_SET) == -1)
-        goto fail_gd;
-    if (read(p_fd, gd_buf, gd_size) != gd_size)
-        goto fail_gd;
-    if (lseek(snp_fd, gd_offset, SEEK_SET) == -1)
-        goto fail_gd;
-    if (write(snp_fd, gd_buf, gd_size) == -1)
-        goto fail_gd;
-    qemu_free(gd_buf);
-
-    close(p_fd);
-    close(snp_fd);
-    return 0;
-
-    fail_gd:
-    qemu_free(gd_buf);
-    fail_rgd:
-    qemu_free(rgd_buf);
-    fail:
-    close(p_fd);
-    close(snp_fd);
-    return -1;
-}
-
-static void vmdk_parent_close(BlockDriverState *bs)
-{
-    if (bs->backing_hd)
-        bdrv_close(bs->backing_hd);
-}
-
-static int parent_open = 0;
-static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
-{
-    BDRVVmdkState *s = bs->opaque;
-    char *p_name;
-    char desc[DESC_SIZE];
-    char parent_img_name[1024];
-
-    /* the descriptor offset = 0x200 */
-    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
-        return -1;
-
-    if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
-        char *end_name;
-        struct stat file_buf;
-
-        p_name += sizeof("parentFileNameHint") + 1;
-        if ((end_name = strchr(p_name,'\"')) == NULL)
-            return -1;
-        if ((end_name - p_name) > sizeof (s->hd->backing_file) - 1)
-            return -1;
-
-        pstrcpy(s->hd->backing_file, end_name - p_name + 1, p_name);
-        if (stat(s->hd->backing_file, &file_buf) != 0) {
-            path_combine(parent_img_name, sizeof(parent_img_name),
-                         filename, s->hd->backing_file);
-        } else {
-            pstrcpy(parent_img_name, sizeof(parent_img_name),
-                    s->hd->backing_file);
-        }
-
-        s->hd->backing_hd = bdrv_new("");
-        if (!s->hd->backing_hd) {
-            failure:
-            bdrv_close(s->hd);
-            return -1;
-        }
-        parent_open = 1;
-        if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
-            goto failure;
-        parent_open = 0;
-    }
-
-    return 0;
-}
-
-static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVVmdkState *s = bs->opaque;
-    uint32_t magic;
-    int l1_size, i, ret;
-
-    if (parent_open)
-        // Parent must be opened as RO.
-        flags = BDRV_O_RDONLY;
-
-    ret = bdrv_file_open(&s->hd, filename, flags);
-    if (ret < 0)
-        return ret;
-    if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
-        goto fail;
-
-    magic = be32_to_cpu(magic);
-    if (magic == VMDK3_MAGIC) {
-        VMDK3Header header;
-
-        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
-            goto fail;
-        s->cluster_sectors = le32_to_cpu(header.granularity);
-        s->l2_size = 1 << 9;
-        s->l1_size = 1 << 6;
-        bs->total_sectors = le32_to_cpu(header.disk_sectors);
-        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
-        s->l1_backup_table_offset = 0;
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
-    } else if (magic == VMDK4_MAGIC) {
-        VMDK4Header header;
-
-        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
-            goto fail;
-        bs->total_sectors = le64_to_cpu(header.capacity);
-        s->cluster_sectors = le64_to_cpu(header.granularity);
-        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
-        if (s->l1_entry_sectors <= 0)
-            goto fail;
-        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
-            / s->l1_entry_sectors;
-        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
-        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
-
-        if (parent_open)
-            s->is_parent = 1;
-        else
-            s->is_parent = 0;
-
-        // try to open parent images, if exist
-        if (vmdk_parent_open(bs, filename) != 0)
-            goto fail;
-        // write the CID once after the image creation
-        s->parent_cid = vmdk_read_cid(bs,1);
-    } else {
-        goto fail;
-    }
-
-    /* read the L1 table */
-    l1_size = s->l1_size * sizeof(uint32_t);
-    s->l1_table = qemu_malloc(l1_size);
-    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
-        goto fail;
-    for(i = 0; i < s->l1_size; i++) {
-        le32_to_cpus(&s->l1_table[i]);
-    }
-
-    if (s->l1_backup_table_offset) {
-        s->l1_backup_table = qemu_malloc(l1_size);
-        if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
-            goto fail;
-        for(i = 0; i < s->l1_size; i++) {
-            le32_to_cpus(&s->l1_backup_table[i]);
-        }
-    }
-
-    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
-    return 0;
- fail:
-    qemu_free(s->l1_backup_table);
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
-    bdrv_delete(s->hd);
-    return -1;
-}
-
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
-                                   uint64_t offset, int allocate);
-
-static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
-                             uint64_t offset, int allocate)
-{
-    uint64_t parent_cluster_offset;
-    BDRVVmdkState *s = bs->opaque;
-    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
-
-    // we will be here if it's first write on non-exist grain(cluster).
-    // try to read from parent image, if exist
-    if (s->hd->backing_hd) {
-        BDRVVmdkState *ps = s->hd->backing_hd->opaque;
-
-        if (!vmdk_is_cid_valid(bs))
-            return -1;
-
-        parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
-
-        if (parent_cluster_offset) {
-            BDRVVmdkState *act_s = activeBDRV.hd->opaque;
-
-            if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
-                return -1;
-
-            //Write grain only into the active image
-            if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
-                return -1;
-        }
-    }
-    return 0;
-}
-
-static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
-{
-    BDRVVmdkState *s = bs->opaque;
-
-    /* update L2 table */
-    if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
-                    &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
-        return -1;
-    /* update backup L2 table */
-    if (s->l1_backup_table_offset != 0) {
-        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
-        if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
-                        &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
-            return -1;
-    }
-
-    return 0;
-}
-
-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
-                                   uint64_t offset, int allocate)
-{
-    BDRVVmdkState *s = bs->opaque;
-    unsigned int l1_index, l2_offset, l2_index;
-    int min_index, i, j;
-    uint32_t min_count, *l2_table, tmp = 0;
-    uint64_t cluster_offset;
-
-    if (m_data)
-        m_data->valid = 0;
-
-    l1_index = (offset >> 9) / s->l1_entry_sectors;
-    if (l1_index >= s->l1_size)
-        return 0;
-    l2_offset = s->l1_table[l1_index];
-    if (!l2_offset)
-        return 0;
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
-            /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
-                for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
-                }
-            }
-            l2_table = s->l2_cache + (i * s->l2_size);
-            goto found;
-        }
-    }
-    /* not found: load a new entry in the least used one */
-    min_index = 0;
-    min_count = 0xffffffff;
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
-            min_index = i;
-        }
-    }
-    l2_table = s->l2_cache + (min_index * s->l2_size);
-    if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
-                                                                        s->l2_size * sizeof(uint32_t))
-        return 0;
-
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
- found:
-    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
-    cluster_offset = le32_to_cpu(l2_table[l2_index]);
-
-    if (!cluster_offset) {
-        if (!allocate)
-            return 0;
-        // Avoid the L2 tables update for the images that have snapshots.
-        if (!s->is_parent) {
-            cluster_offset = bdrv_getlength(s->hd);
-            bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
-
-            cluster_offset >>= 9;
-            tmp = cpu_to_le32(cluster_offset);
-            l2_table[l2_index] = tmp;
-            // Save the active image state
-            activeBDRV.cluster_offset = cluster_offset;
-            activeBDRV.hd = bs;
-        }
-        /* First of all we write grain itself, to avoid race condition
-         * that may to corrupt the image.
-         * This problem may occur because of insufficient space on host disk
-         * or inappropriate VM shutdown.
-         */
-        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
-            return 0;
-
-        if (m_data) {
-            m_data->offset = tmp;
-            m_data->l1_index = l1_index;
-            m_data->l2_index = l2_index;
-            m_data->l2_offset = l2_offset;
-            m_data->valid = 1;
-        }
-    }
-    cluster_offset <<= 9;
-    return cluster_offset;
-}
-
-static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                             int nb_sectors, int *pnum)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-    index_in_cluster = sector_num % s->cluster_sectors;
-    n = s->cluster_sectors - index_in_cluster;
-    if (n > nb_sectors)
-        n = nb_sectors;
-    *pnum = n;
-    return (cluster_offset != 0);
-}
-
-static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n, ret;
-    uint64_t cluster_offset;
-
-    while (nb_sectors > 0) {
-        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-        index_in_cluster = sector_num % s->cluster_sectors;
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors)
-            n = nb_sectors;
-        if (!cluster_offset) {
-            // try to read from parent image, if exist
-            if (s->hd->backing_hd) {
-                if (!vmdk_is_cid_valid(bs))
-                    return -1;
-                ret = bdrv_read(s->hd->backing_hd, sector_num, buf, n);
-                if (ret < 0)
-                    return -1;
-            } else {
-                memset(buf, 0, 512 * n);
-            }
-        } else {
-            if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
-                return -1;
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-
-static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
-{
-    BDRVVmdkState *s = bs->opaque;
-    VmdkMetaData m_data;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
-    static int cid_update = 0;
-
-    if (sector_num > bs->total_sectors) {
-        fprintf(stderr,
-                "(VMDK) Wrong offset: sector_num=0x%" PRIx64
-                " total_sectors=0x%" PRIx64 "\n",
-                sector_num, bs->total_sectors);
-        return -1;
-    }
-
-    while (nb_sectors > 0) {
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors)
-            n = nb_sectors;
-        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
-        if (!cluster_offset)
-            return -1;
-
-        if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
-            return -1;
-        if (m_data.valid) {
-            /* update L2 tables */
-            if (vmdk_L2update(bs, &m_data) == -1)
-                return -1;
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-
-        // update CID on the first write every time the virtual disk is opened
-        if (!cid_update) {
-            vmdk_write_cid(bs, time(NULL));
-            cid_update++;
-        }
-    }
-    return 0;
-}
-
-static int vmdk_create(const char *filename, int64_t total_size,
-                       const char *backing_file, int flags)
-{
-    int fd, i;
-    VMDK4Header header;
-    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
-    static const char desc_template[] =
-        "# Disk DescriptorFile\n"
-        "version=1\n"
-        "CID=%x\n"
-        "parentCID=ffffffff\n"
-        "createType=\"monolithicSparse\"\n"
-        "\n"
-        "# Extent description\n"
-        "RW %" PRId64 " SPARSE \"%s\"\n"
-        "\n"
-        "# The Disk Data Base \n"
-        "#DDB\n"
-        "\n"
-        "ddb.virtualHWVersion = \"%d\"\n"
-        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
-        "ddb.geometry.heads = \"16\"\n"
-        "ddb.geometry.sectors = \"63\"\n"
-        "ddb.adapterType = \"ide\"\n";
-    char desc[1024];
-    const char *real_filename, *temp_str;
-
-    /* XXX: add support for backing file */
-    if (backing_file) {
-        return vmdk_snapshot_create(filename, backing_file);
-    }
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-              0644);
-    if (fd < 0)
-        return -1;
-    magic = cpu_to_be32(VMDK4_MAGIC);
-    memset(&header, 0, sizeof(header));
-    header.version = cpu_to_le32(1);
-    header.flags = cpu_to_le32(3); /* ?? */
-    header.capacity = cpu_to_le64(total_size);
-    header.granularity = cpu_to_le64(128);
-    header.num_gtes_per_gte = cpu_to_le32(512);
-
-    grains = (total_size + header.granularity - 1) / header.granularity;
-    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
-    gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
-    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
-
-    header.desc_offset = 1;
-    header.desc_size = 20;
-    header.rgd_offset = header.desc_offset + header.desc_size;
-    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
-    header.grain_offset =
-       ((header.gd_offset + gd_size + (gt_size * gt_count) +
-         header.granularity - 1) / header.granularity) *
-        header.granularity;
-
-    header.desc_offset = cpu_to_le64(header.desc_offset);
-    header.desc_size = cpu_to_le64(header.desc_size);
-    header.rgd_offset = cpu_to_le64(header.rgd_offset);
-    header.gd_offset = cpu_to_le64(header.gd_offset);
-    header.grain_offset = cpu_to_le64(header.grain_offset);
-
-    header.check_bytes[0] = 0xa;
-    header.check_bytes[1] = 0x20;
-    header.check_bytes[2] = 0xd;
-    header.check_bytes[3] = 0xa;
-
-    /* write all the data */
-    write(fd, &magic, sizeof(magic));
-    write(fd, &header, sizeof(header));
-
-    ftruncate(fd, header.grain_offset << 9);
-
-    /* write grain directory */
-    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
-    for (i = 0, tmp = header.rgd_offset + gd_size;
-         i < gt_count; i++, tmp += gt_size)
-        write(fd, &tmp, sizeof(tmp));
-
-    /* write backup grain directory */
-    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
-    for (i = 0, tmp = header.gd_offset + gd_size;
-         i < gt_count; i++, tmp += gt_size)
-        write(fd, &tmp, sizeof(tmp));
-
-    /* compose the descriptor */
-    real_filename = filename;
-    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, '/')) != NULL)
-        real_filename = temp_str + 1;
-    if ((temp_str = strrchr(real_filename, ':')) != NULL)
-        real_filename = temp_str + 1;
-    snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
-             total_size, real_filename,
-             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
-             total_size / (int64_t)(63 * 16));
-
-    /* write the descriptor */
-    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
-    write(fd, desc, strlen(desc));
-
-    close(fd);
-    return 0;
-}
-
-static void vmdk_close(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
-    // try to close parent image, if exist
-    vmdk_parent_close(s->hd);
-    bdrv_delete(s->hd);
-}
-
-static void vmdk_flush(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-    bdrv_flush(s->hd);
-}
-
-static BlockDriver bdrv_vmdk = {
-    .format_name       = "vmdk",
-    .instance_size     = sizeof(BDRVVmdkState),
-    .bdrv_probe                = vmdk_probe,
-    .bdrv_open         = vmdk_open,
-    .bdrv_read         = vmdk_read,
-    .bdrv_write                = vmdk_write,
-    .bdrv_close                = vmdk_close,
-    .bdrv_create       = vmdk_create,
-    .bdrv_flush                = vmdk_flush,
-    .bdrv_is_allocated = vmdk_is_allocated,
-};
-
-static void bdrv_vmdk_init(void)
-{
-    bdrv_register(&bdrv_vmdk);
-}
-
-block_init(bdrv_vmdk_init);
diff --git a/block-vpc.c b/block-vpc.c
deleted file mode 100644 (file)
index 211ae5c..0000000
+++ /dev/null
@@ -1,606 +0,0 @@
-/*
- * Block driver for Conectix/Microsoft Virtual PC images
- *
- * Copyright (c) 2005 Alex Beregszaszi
- * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-
-/**************************************************************/
-
-#define HEADER_SIZE 512
-
-//#define CACHE
-
-enum vhd_type {
-    VHD_FIXED           = 2,
-    VHD_DYNAMIC         = 3,
-    VHD_DIFFERENCING    = 4,
-};
-
-// Seconds since Jan 1, 2000 0:00:00 (UTC)
-#define VHD_TIMESTAMP_BASE 946684800
-
-// always big-endian
-struct vhd_footer {
-    char        creator[8]; // "conectix"
-    uint32_t    features;
-    uint32_t    version;
-
-    // Offset of next header structure, 0xFFFFFFFF if none
-    uint64_t    data_offset;
-
-    // Seconds since Jan 1, 2000 0:00:00 (UTC)
-    uint32_t    timestamp;
-
-    char        creator_app[4]; // "vpc "
-    uint16_t    major;
-    uint16_t    minor;
-    char        creator_os[4]; // "Wi2k"
-
-    uint64_t    orig_size;
-    uint64_t    size;
-
-    uint16_t    cyls;
-    uint8_t     heads;
-    uint8_t     secs_per_cyl;
-
-    uint32_t    type;
-
-    // Checksum of the Hard Disk Footer ("one's complement of the sum of all
-    // the bytes in the footer without the checksum field")
-    uint32_t    checksum;
-
-    // UUID used to identify a parent hard disk (backing file)
-    uint8_t     uuid[16];
-
-    uint8_t     in_saved_state;
-};
-
-struct vhd_dyndisk_header {
-    char        magic[8]; // "cxsparse"
-
-    // Offset of next header structure, 0xFFFFFFFF if none
-    uint64_t    data_offset;
-
-    // Offset of the Block Allocation Table (BAT)
-    uint64_t    table_offset;
-
-    uint32_t    version;
-    uint32_t    max_table_entries; // 32bit/entry
-
-    // 2 MB by default, must be a power of two
-    uint32_t    block_size;
-
-    uint32_t    checksum;
-    uint8_t     parent_uuid[16];
-    uint32_t    parent_timestamp;
-    uint32_t    reserved;
-
-    // Backing file name (in UTF-16)
-    uint8_t     parent_name[512];
-
-    struct {
-        uint32_t    platform;
-        uint32_t    data_space;
-        uint32_t    data_length;
-        uint32_t    reserved;
-        uint64_t    data_offset;
-    } parent_locator[8];
-};
-
-typedef struct BDRVVPCState {
-    BlockDriverState *hd;
-
-    uint8_t footer_buf[HEADER_SIZE];
-    uint64_t free_data_block_offset;
-    int max_table_entries;
-    uint32_t *pagetable;
-    uint64_t bat_offset;
-    uint64_t last_bitmap_offset;
-
-    uint32_t block_size;
-    uint32_t bitmap_size;
-
-#ifdef CACHE
-    uint8_t *pageentry_u8;
-    uint32_t *pageentry_u32;
-    uint16_t *pageentry_u16;
-
-    uint64_t last_bitmap;
-#endif
-} BDRVVPCState;
-
-static uint32_t vpc_checksum(uint8_t* buf, size_t size)
-{
-    uint32_t res = 0;
-    int i;
-
-    for (i = 0; i < size; i++)
-        res += buf[i];
-
-    return ~res;
-}
-
-
-static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-       return 100;
-    return 0;
-}
-
-static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
-{
-    BDRVVPCState *s = bs->opaque;
-    int ret, i;
-    struct vhd_footer* footer;
-    struct vhd_dyndisk_header* dyndisk_header;
-    uint8_t buf[HEADER_SIZE];
-    uint32_t checksum;
-
-    ret = bdrv_file_open(&s->hd, filename, flags);
-    if (ret < 0)
-        return ret;
-
-    if (bdrv_pread(s->hd, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
-        goto fail;
-
-    footer = (struct vhd_footer*) s->footer_buf;
-    if (strncmp(footer->creator, "conectix", 8))
-        goto fail;
-
-    checksum = be32_to_cpu(footer->checksum);
-    footer->checksum = 0;
-    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
-        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
-            "incorrect.\n", filename);
-
-    // The visible size of a image in Virtual PC depends on the geometry
-    // rather than on the size stored in the footer (the size in the footer
-    // is too large usually)
-    bs->total_sectors = (int64_t)
-        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
-
-    if (bdrv_pread(s->hd, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE)
-            != HEADER_SIZE)
-        goto fail;
-
-    dyndisk_header = (struct vhd_dyndisk_header*) buf;
-
-    if (strncmp(dyndisk_header->magic, "cxsparse", 8))
-        goto fail;
-
-
-    s->block_size = be32_to_cpu(dyndisk_header->block_size);
-    s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
-
-    s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
-    s->pagetable = qemu_malloc(s->max_table_entries * 4);
-
-    s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
-    if (bdrv_pread(s->hd, s->bat_offset, s->pagetable,
-            s->max_table_entries * 4) != s->max_table_entries * 4)
-           goto fail;
-
-    s->free_data_block_offset =
-        (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
-
-    for (i = 0; i < s->max_table_entries; i++) {
-        be32_to_cpus(&s->pagetable[i]);
-        if (s->pagetable[i] != 0xFFFFFFFF) {
-            int64_t next = (512 * (int64_t) s->pagetable[i]) +
-                s->bitmap_size + s->block_size;
-
-            if (next> s->free_data_block_offset)
-                s->free_data_block_offset = next;
-        }
-    }
-
-    s->last_bitmap_offset = (int64_t) -1;
-
-#ifdef CACHE
-    s->pageentry_u8 = qemu_malloc(512);
-    s->pageentry_u32 = s->pageentry_u8;
-    s->pageentry_u16 = s->pageentry_u8;
-    s->last_pagetable = -1;
-#endif
-
-    return 0;
- fail:
-    bdrv_delete(s->hd);
-    return -1;
-}
-
-/*
- * Returns the absolute byte offset of the given sector in the image file.
- * If the sector is not allocated, -1 is returned instead.
- *
- * The parameter write must be 1 if the offset will be used for a write
- * operation (the block bitmaps is updated then), 0 otherwise.
- */
-static inline int64_t get_sector_offset(BlockDriverState *bs,
-    int64_t sector_num, int write)
-{
-    BDRVVPCState *s = bs->opaque;
-    uint64_t offset = sector_num * 512;
-    uint64_t bitmap_offset, block_offset;
-    uint32_t pagetable_index, pageentry_index;
-
-    pagetable_index = offset / s->block_size;
-    pageentry_index = (offset % s->block_size) / 512;
-
-    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
-        return -1; // not allocated
-
-    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
-    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
-
-    // We must ensure that we don't write to any sectors which are marked as
-    // unused in the bitmap. We get away with setting all bits in the block
-    // bitmap each time we write to a new block. This might cause Virtual PC to
-    // miss sparse read optimization, but it's not a problem in terms of
-    // correctness.
-    if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
-
-        s->last_bitmap_offset = bitmap_offset;
-        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite(s->hd, bitmap_offset, bitmap, s->bitmap_size);
-    }
-
-//    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
-//     sector_num, pagetable_index, pageentry_index,
-//     bitmap_offset, block_offset);
-
-// disabled by reason
-#if 0
-#ifdef CACHE
-    if (bitmap_offset != s->last_bitmap)
-    {
-       lseek(s->fd, bitmap_offset, SEEK_SET);
-
-       s->last_bitmap = bitmap_offset;
-
-       // Scary! Bitmap is stored as big endian 32bit entries,
-       // while we used to look it up byte by byte
-       read(s->fd, s->pageentry_u8, 512);
-       for (i = 0; i < 128; i++)
-           be32_to_cpus(&s->pageentry_u32[i]);
-    }
-
-    if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1)
-       return -1;
-#else
-    lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET);
-
-    read(s->fd, &bitmap_entry, 1);
-
-    if ((bitmap_entry >> (pageentry_index % 8)) & 1)
-       return -1; // not allocated
-#endif
-#endif
-
-    return block_offset;
-}
-
-/*
- * Writes the footer to the end of the image file. This is needed when the
- * file grows as it overwrites the old footer
- *
- * Returns 0 on success and < 0 on error
- */
-static int rewrite_footer(BlockDriverState* bs)
-{
-    int ret;
-    BDRVVPCState *s = bs->opaque;
-    int64_t offset = s->free_data_block_offset;
-
-    ret = bdrv_pwrite(s->hd, offset, s->footer_buf, HEADER_SIZE);
-    if (ret < 0)
-        return ret;
-
-    return 0;
-}
-
-/*
- * Allocates a new block. This involves writing a new footer and updating
- * the Block Allocation Table to use the space at the old end of the image
- * file (overwriting the old footer)
- *
- * Returns the sectors' offset in the image file on success and < 0 on error
- */
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
-{
-    BDRVVPCState *s = bs->opaque;
-    int64_t bat_offset;
-    uint32_t index, bat_value;
-    int ret;
-    uint8_t bitmap[s->bitmap_size];
-
-    // Check if sector_num is valid
-    if ((sector_num < 0) || (sector_num > bs->total_sectors))
-        return -1;
-
-    // Write entry into in-memory BAT
-    index = (sector_num * 512) / s->block_size;
-    if (s->pagetable[index] != 0xFFFFFFFF)
-        return -1;
-
-    s->pagetable[index] = s->free_data_block_offset / 512;
-
-    // Initialize the block's bitmap
-    memset(bitmap, 0xff, s->bitmap_size);
-    bdrv_pwrite(s->hd, s->free_data_block_offset, bitmap, s->bitmap_size);
-
-    // Write new footer (the old one will be overwritten)
-    s->free_data_block_offset += s->block_size + s->bitmap_size;
-    ret = rewrite_footer(bs);
-    if (ret < 0)
-        goto fail;
-
-    // Write BAT entry to disk
-    bat_offset = s->bat_offset + (4 * index);
-    bat_value = be32_to_cpu(s->pagetable[index]);
-    ret = bdrv_pwrite(s->hd, bat_offset, &bat_value, 4);
-    if (ret < 0)
-        goto fail;
-
-    return get_sector_offset(bs, sector_num, 0);
-
-fail:
-    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
-    return -1;
-}
-
-static int vpc_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVVPCState *s = bs->opaque;
-    int ret;
-    int64_t offset;
-
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 0);
-
-        if (offset == -1) {
-            memset(buf, 0, 512);
-        } else {
-            ret = bdrv_pread(s->hd, offset, buf, 512);
-            if (ret != 512)
-                return -1;
-        }
-
-        nb_sectors--;
-        sector_num++;
-        buf += 512;
-    }
-    return 0;
-}
-
-static int vpc_write(BlockDriverState *bs, int64_t sector_num,
-    const uint8_t *buf, int nb_sectors)
-{
-    BDRVVPCState *s = bs->opaque;
-    int64_t offset;
-    int ret;
-
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 1);
-
-        if (offset == -1) {
-            offset = alloc_block(bs, sector_num);
-            if (offset < 0)
-                return -1;
-        }
-
-        ret = bdrv_pwrite(s->hd, offset, buf, 512);
-        if (ret != 512)
-            return -1;
-
-        nb_sectors--;
-        sector_num++;
-        buf += 512;
-    }
-
-    return 0;
-}
-
-
-/*
- * Calculates the number of cylinders, heads and sectors per cylinder
- * based on a given number of sectors. This is the algorithm described
- * in the VHD specification.
- *
- * Note that the geometry doesn't always exactly match total_sectors but
- * may round it down.
- *
- * Returns 0 on success, -EFBIG if the size is larger than 127 GB
- */
-static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
-    uint8_t* heads, uint8_t* secs_per_cyl)
-{
-    uint32_t cyls_times_heads;
-
-    if (total_sectors > 65535 * 16 * 255)
-        return -EFBIG;
-
-    if (total_sectors > 65535 * 16 * 63) {
-        *secs_per_cyl = 255;
-        *heads = 16;
-        cyls_times_heads = total_sectors / *secs_per_cyl;
-    } else {
-        *secs_per_cyl = 17;
-        cyls_times_heads = total_sectors / *secs_per_cyl;
-        *heads = (cyls_times_heads + 1023) / 1024;
-
-        if (*heads < 4)
-            *heads = 4;
-
-        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
-            *secs_per_cyl = 31;
-            *heads = 16;
-            cyls_times_heads = total_sectors / *secs_per_cyl;
-        }
-
-        if (cyls_times_heads >= (*heads * 1024)) {
-            *secs_per_cyl = 63;
-            *heads = 16;
-            cyls_times_heads = total_sectors / *secs_per_cyl;
-        }
-    }
-
-    // Note: Rounding up deviates from the Virtual PC behaviour
-    // However, we need this to avoid truncating images in qemu-img convert
-    *cyls = (cyls_times_heads + *heads - 1) / *heads;
-
-    return 0;
-}
-
-static int vpc_create(const char *filename, int64_t total_sectors,
-    const char *backing_file, int flags)
-{
-    uint8_t buf[1024];
-    struct vhd_footer* footer = (struct vhd_footer*) buf;
-    struct vhd_dyndisk_header* dyndisk_header =
-        (struct vhd_dyndisk_header*) buf;
-    int fd, i;
-    uint16_t cyls;
-    uint8_t heads;
-    uint8_t secs_per_cyl;
-    size_t block_size, num_bat_entries;
-
-    if (backing_file != NULL)
-        return -ENOTSUP;
-
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
-    if (fd < 0)
-        return -EIO;
-
-    // Calculate matching total_size and geometry
-    if (calculate_geometry(total_sectors, &cyls, &heads, &secs_per_cyl))
-        return -EFBIG;
-    total_sectors = (int64_t) cyls * heads * secs_per_cyl;
-
-    // Prepare the Hard Disk Footer
-    memset(buf, 0, 1024);
-
-    strncpy(footer->creator, "conectix", 8);
-    // TODO Check if "qemu" creator_app is ok for VPC
-    strncpy(footer->creator_app, "qemu", 4);
-    strncpy(footer->creator_os, "Wi2k", 4);
-
-    footer->features = be32_to_cpu(0x02);
-    footer->version = be32_to_cpu(0x00010000);
-    footer->data_offset = be64_to_cpu(HEADER_SIZE);
-    footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE);
-
-    // Version of Virtual PC 2007
-    footer->major = be16_to_cpu(0x0005);
-    footer->minor =be16_to_cpu(0x0003);
-
-    footer->orig_size = be64_to_cpu(total_sectors * 512);
-    footer->size = be64_to_cpu(total_sectors * 512);
-
-    footer->cyls = be16_to_cpu(cyls);
-    footer->heads = heads;
-    footer->secs_per_cyl = secs_per_cyl;
-
-    footer->type = be32_to_cpu(VHD_DYNAMIC);
-
-    // TODO uuid is missing
-
-    footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
-
-    // Write the footer (twice: at the beginning and at the end)
-    block_size = 0x200000;
-    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
-
-    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE)
-        return -EIO;
-
-    if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0)
-        return -EIO;
-    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE)
-        return -EIO;
-
-    // Write the initial BAT
-    if (lseek(fd, 3 * 512, SEEK_SET) < 0)
-        return -EIO;
-
-    memset(buf, 0xFF, 512);
-    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++)
-        if (write(fd, buf, 512) != 512)
-            return -EIO;
-
-
-    // Prepare the Dynamic Disk Header
-    memset(buf, 0, 1024);
-
-    strncpy(dyndisk_header->magic, "cxsparse", 8);
-
-    dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFF);
-    dyndisk_header->table_offset = be64_to_cpu(3 * 512);
-    dyndisk_header->version = be32_to_cpu(0x00010000);
-    dyndisk_header->block_size = be32_to_cpu(block_size);
-    dyndisk_header->max_table_entries = be32_to_cpu(num_bat_entries);
-
-    dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));
-
-    // Write the header
-    if (lseek(fd, 512, SEEK_SET) < 0)
-        return -EIO;
-    if (write(fd, buf, 1024) != 1024)
-        return -EIO;
-
-    close(fd);
-    return 0;
-}
-
-static void vpc_close(BlockDriverState *bs)
-{
-    BDRVVPCState *s = bs->opaque;
-    qemu_free(s->pagetable);
-#ifdef CACHE
-    qemu_free(s->pageentry_u8);
-#endif
-    bdrv_delete(s->hd);
-}
-
-static BlockDriver bdrv_vpc = {
-    .format_name       = "vpc",
-    .instance_size     = sizeof(BDRVVPCState),
-    .bdrv_probe                = vpc_probe,
-    .bdrv_open         = vpc_open,
-    .bdrv_read         = vpc_read,
-    .bdrv_write                = vpc_write,
-    .bdrv_close                = vpc_close,
-    .bdrv_create       = vpc_create,
-};
-
-static void bdrv_vpc_init(void)
-{
-    bdrv_register(&bdrv_vpc);
-}
-
-block_init(bdrv_vpc_init);
diff --git a/block-vvfat.c b/block-vvfat.c
deleted file mode 100644 (file)
index 2a8feb3..0000000
+++ /dev/null
@@ -1,2855 +0,0 @@
-/* vim:set shiftwidth=4 ts=8: */
-/*
- * QEMU Block driver for virtual VFAT (shadows a local directory)
- *
- * Copyright (c) 2004,2005 Johannes E. Schindelin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <sys/stat.h>
-#include <dirent.h>
-#include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-
-#ifndef S_IWGRP
-#define S_IWGRP 0
-#endif
-#ifndef S_IWOTH
-#define S_IWOTH 0
-#endif
-
-/* TODO: add ":bootsector=blabla.img:" */
-/* LATER TODO: add automatic boot sector generation from
-    BOOTEASY.ASM and Ranish Partition Manager
-    Note that DOS assumes the system files to be the first files in the
-    file system (test if the boot sector still relies on that fact)! */
-/* MAYBE TODO: write block-visofs.c */
-/* TODO: call try_commit() only after a timeout */
-
-/* #define DEBUG */
-
-#ifdef DEBUG
-
-#define DLOG(a) a
-
-#undef stderr
-#define stderr STDERR
-FILE* stderr = NULL;
-
-static void checkpoint(void);
-
-#ifdef __MINGW32__
-void nonono(const char* file, int line, const char* msg) {
-    fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg);
-    exit(-5);
-}
-#undef assert
-#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0)
-#endif
-
-#else
-
-#define DLOG(a)
-
-#endif
-
-/* dynamic array functions */
-typedef struct array_t {
-    char* pointer;
-    unsigned int size,next,item_size;
-} array_t;
-
-static inline void array_init(array_t* array,unsigned int item_size)
-{
-    array->pointer = NULL;
-    array->size=0;
-    array->next=0;
-    array->item_size=item_size;
-}
-
-static inline void array_free(array_t* array)
-{
-    if(array->pointer)
-        free(array->pointer);
-    array->size=array->next=0;
-}
-
-/* does not automatically grow */
-static inline void* array_get(array_t* array,unsigned int index) {
-    assert(index < array->next);
-    return array->pointer + index * array->item_size;
-}
-
-static inline int array_ensure_allocated(array_t* array, int index)
-{
-    if((index + 1) * array->item_size > array->size) {
-       int new_size = (index + 32) * array->item_size;
-       array->pointer = qemu_realloc(array->pointer, new_size);
-       if (!array->pointer)
-           return -1;
-       array->size = new_size;
-       array->next = index + 1;
-    }
-
-    return 0;
-}
-
-static inline void* array_get_next(array_t* array) {
-    unsigned int next = array->next;
-    void* result;
-
-    if (array_ensure_allocated(array, next) < 0)
-       return NULL;
-
-    array->next = next + 1;
-    result = array_get(array, next);
-
-    return result;
-}
-
-static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) {
-    if((array->next+count)*array->item_size>array->size) {
-       int increment=count*array->item_size;
-       array->pointer=qemu_realloc(array->pointer,array->size+increment);
-       if(!array->pointer)
-            return NULL;
-       array->size+=increment;
-    }
-    memmove(array->pointer+(index+count)*array->item_size,
-               array->pointer+index*array->item_size,
-               (array->next-index)*array->item_size);
-    array->next+=count;
-    return array->pointer+index*array->item_size;
-}
-
-/* this performs a "roll", so that the element which was at index_from becomes
- * index_to, but the order of all other elements is preserved. */
-static inline int array_roll(array_t* array,int index_to,int index_from,int count)
-{
-    char* buf;
-    char* from;
-    char* to;
-    int is;
-
-    if(!array ||
-           index_to<0 || index_to>=array->next ||
-           index_from<0 || index_from>=array->next)
-       return -1;
-
-    if(index_to==index_from)
-       return 0;
-
-    is=array->item_size;
-    from=array->pointer+index_from*is;
-    to=array->pointer+index_to*is;
-    buf=qemu_malloc(is*count);
-    memcpy(buf,from,is*count);
-
-    if(index_to<index_from)
-       memmove(to+is*count,to,from-to);
-    else
-       memmove(from,from+is*count,to-from);
-
-    memcpy(to,buf,is*count);
-
-    free(buf);
-
-    return 0;
-}
-
-static inline int array_remove_slice(array_t* array,int index, int count)
-{
-    assert(index >=0);
-    assert(count > 0);
-    assert(index + count <= array->next);
-    if(array_roll(array,array->next-1,index,count))
-       return -1;
-    array->next -= count;
-    return 0;
-}
-
-static int array_remove(array_t* array,int index)
-{
-    return array_remove_slice(array, index, 1);
-}
-
-/* return the index for a given member */
-static int array_index(array_t* array, void* pointer)
-{
-    size_t offset = (char*)pointer - array->pointer;
-    assert((offset % array->item_size) == 0);
-    assert(offset/array->item_size < array->next);
-    return offset/array->item_size;
-}
-
-/* These structures are used to fake a disk and the VFAT filesystem.
- * For this reason we need to use __attribute__((packed)). */
-
-typedef struct bootsector_t {
-    uint8_t jump[3];
-    uint8_t name[8];
-    uint16_t sector_size;
-    uint8_t sectors_per_cluster;
-    uint16_t reserved_sectors;
-    uint8_t number_of_fats;
-    uint16_t root_entries;
-    uint16_t total_sectors16;
-    uint8_t media_type;
-    uint16_t sectors_per_fat;
-    uint16_t sectors_per_track;
-    uint16_t number_of_heads;
-    uint32_t hidden_sectors;
-    uint32_t total_sectors;
-    union {
-        struct {
-           uint8_t drive_number;
-           uint8_t current_head;
-           uint8_t signature;
-           uint32_t id;
-           uint8_t volume_label[11];
-       } __attribute__((packed)) fat16;
-       struct {
-           uint32_t sectors_per_fat;
-           uint16_t flags;
-           uint8_t major,minor;
-           uint32_t first_cluster_of_root_directory;
-           uint16_t info_sector;
-           uint16_t backup_boot_sector;
-           uint16_t ignored;
-       } __attribute__((packed)) fat32;
-    } u;
-    uint8_t fat_type[8];
-    uint8_t ignored[0x1c0];
-    uint8_t magic[2];
-} __attribute__((packed)) bootsector_t;
-
-typedef struct {
-    uint8_t head;
-    uint8_t sector;
-    uint8_t cylinder;
-} mbr_chs_t;
-
-typedef struct partition_t {
-    uint8_t attributes; /* 0x80 = bootable */
-    mbr_chs_t start_CHS;
-    uint8_t   fs_type; /* 0x1 = FAT12, 0x6 = FAT16, 0xe = FAT16_LBA, 0xb = FAT32, 0xc = FAT32_LBA */
-    mbr_chs_t end_CHS;
-    uint32_t start_sector_long;
-    uint32_t length_sector_long;
-} __attribute__((packed)) partition_t;
-
-typedef struct mbr_t {
-    uint8_t ignored[0x1b8];
-    uint32_t nt_id;
-    uint8_t ignored2[2];
-    partition_t partition[4];
-    uint8_t magic[2];
-} __attribute__((packed)) mbr_t;
-
-typedef struct direntry_t {
-    uint8_t name[8];
-    uint8_t extension[3];
-    uint8_t attributes;
-    uint8_t reserved[2];
-    uint16_t ctime;
-    uint16_t cdate;
-    uint16_t adate;
-    uint16_t begin_hi;
-    uint16_t mtime;
-    uint16_t mdate;
-    uint16_t begin;
-    uint32_t size;
-} __attribute__((packed)) direntry_t;
-
-/* this structure are used to transparently access the files */
-
-typedef struct mapping_t {
-    /* begin is the first cluster, end is the last+1 */
-    uint32_t begin,end;
-    /* as s->directory is growable, no pointer may be used here */
-    unsigned int dir_index;
-    /* the clusters of a file may be in any order; this points to the first */
-    int first_mapping_index;
-    union {
-       /* offset is
-        * - the offset in the file (in clusters) for a file, or
-        * - the next cluster of the directory for a directory, and
-        * - the address of the buffer for a faked entry
-        */
-       struct {
-           uint32_t offset;
-       } file;
-       struct {
-           int parent_mapping_index;
-           int first_dir_index;
-       } dir;
-    } info;
-    /* path contains the full path, i.e. it always starts with s->path */
-    char* path;
-
-    enum { MODE_UNDEFINED = 0, MODE_NORMAL = 1, MODE_MODIFIED = 2,
-       MODE_DIRECTORY = 4, MODE_FAKED = 8,
-       MODE_DELETED = 16, MODE_RENAMED = 32 } mode;
-    int read_only;
-} mapping_t;
-
-#ifdef DEBUG
-static void print_direntry(const struct direntry_t*);
-static void print_mapping(const struct mapping_t* mapping);
-#endif
-
-/* here begins the real VVFAT driver */
-
-typedef struct BDRVVVFATState {
-    BlockDriverState* bs; /* pointer to parent */
-    unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */
-    unsigned char first_sectors[0x40*0x200];
-
-    int fat_type; /* 16 or 32 */
-    array_t fat,directory,mapping;
-
-    unsigned int cluster_size;
-    unsigned int sectors_per_cluster;
-    unsigned int sectors_per_fat;
-    unsigned int sectors_of_root_directory;
-    uint32_t last_cluster_of_root_directory;
-    unsigned int faked_sectors; /* how many sectors are faked before file data */
-    uint32_t sector_count; /* total number of sectors of the partition */
-    uint32_t cluster_count; /* total number of clusters of this partition */
-    uint32_t max_fat_value;
-
-    int current_fd;
-    mapping_t* current_mapping;
-    unsigned char* cluster; /* points to current cluster */
-    unsigned char* cluster_buffer; /* points to a buffer to hold temp data */
-    unsigned int current_cluster;
-
-    /* write support */
-    BlockDriverState* write_target;
-    char* qcow_filename;
-    BlockDriverState* qcow;
-    void* fat2;
-    char* used_clusters;
-    array_t commits;
-    const char* path;
-    int downcase_short_names;
-} BDRVVVFATState;
-
-/* take the sector position spos and convert it to Cylinder/Head/Sector position
- * if the position is outside the specified geometry, fill maximum value for CHS
- * and return 1 to signal overflow.
- */
-static int sector2CHS(BlockDriverState* bs, mbr_chs_t * chs, int spos){
-    int head,sector;
-    sector   = spos % (bs->secs);  spos/= bs->secs;
-    head     = spos % (bs->heads); spos/= bs->heads;
-    if(spos >= bs->cyls){
-        /* Overflow,
-        it happens if 32bit sector positions are used, while CHS is only 24bit.
-        Windows/Dos is said to take 1023/255/63 as nonrepresentable CHS */
-        chs->head     = 0xFF;
-        chs->sector   = 0xFF;
-        chs->cylinder = 0xFF;
-        return 1;
-    }
-    chs->head     = (uint8_t)head;
-    chs->sector   = (uint8_t)( (sector+1) | ((spos>>8)<<6) );
-    chs->cylinder = (uint8_t)spos;
-    return 0;
-}
-
-static void init_mbr(BDRVVVFATState* s)
-{
-    /* TODO: if the files mbr.img and bootsect.img exist, use them */
-    mbr_t* real_mbr=(mbr_t*)s->first_sectors;
-    partition_t* partition=&(real_mbr->partition[0]);
-    int lba;
-
-    memset(s->first_sectors,0,512);
-
-    /* Win NT Disk Signature */
-    real_mbr->nt_id= cpu_to_le32(0xbe1afdfa);
-
-    partition->attributes=0x80; /* bootable */
-
-    /* LBA is used when partition is outside the CHS geometry */
-    lba = sector2CHS(s->bs, &partition->start_CHS, s->first_sectors_number-1);
-    lba|= sector2CHS(s->bs, &partition->end_CHS,   s->sector_count);
-
-    /*LBA partitions are identified only by start/length_sector_long not by CHS*/
-    partition->start_sector_long =cpu_to_le32(s->first_sectors_number-1);
-    partition->length_sector_long=cpu_to_le32(s->sector_count - s->first_sectors_number+1);
-
-    /* FAT12/FAT16/FAT32 */
-    /* DOS uses different types when partition is LBA,
-       probably to prevent older versions from using CHS on them */
-    partition->fs_type= s->fat_type==12 ? 0x1:
-                        s->fat_type==16 ? (lba?0xe:0x06):
-                         /*fat_tyoe==32*/ (lba?0xc:0x0b);
-
-    real_mbr->magic[0]=0x55; real_mbr->magic[1]=0xaa;
-}
-
-/* direntry functions */
-
-/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */
-static inline int short2long_name(char* dest,const char* src)
-{
-    int i;
-    int len;
-    for(i=0;i<129 && src[i];i++) {
-        dest[2*i]=src[i];
-       dest[2*i+1]=0;
-    }
-    len=2*i;
-    dest[2*i]=dest[2*i+1]=0;
-    for(i=2*i+2;(i%26);i++)
-       dest[i]=0xff;
-    return len;
-}
-
-static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename)
-{
-    char buffer[258];
-    int length=short2long_name(buffer,filename),
-        number_of_entries=(length+25)/26,i;
-    direntry_t* entry;
-
-    for(i=0;i<number_of_entries;i++) {
-       entry=array_get_next(&(s->directory));
-       entry->attributes=0xf;
-       entry->reserved[0]=0;
-       entry->begin=0;
-       entry->name[0]=(number_of_entries-i)|(i==0?0x40:0);
-    }
-    for(i=0;i<26*number_of_entries;i++) {
-       int offset=(i%26);
-       if(offset<10) offset=1+offset;
-       else if(offset<22) offset=14+offset-10;
-       else offset=28+offset-22;
-       entry=array_get(&(s->directory),s->directory.next-1-(i/26));
-       entry->name[offset]=buffer[i];
-    }
-    return array_get(&(s->directory),s->directory.next-number_of_entries);
-}
-
-static char is_free(const direntry_t* direntry)
-{
-    return direntry->name[0]==0xe5 || direntry->name[0]==0x00;
-}
-
-static char is_volume_label(const direntry_t* direntry)
-{
-    return direntry->attributes == 0x28;
-}
-
-static char is_long_name(const direntry_t* direntry)
-{
-    return direntry->attributes == 0xf;
-}
-
-static char is_short_name(const direntry_t* direntry)
-{
-    return !is_volume_label(direntry) && !is_long_name(direntry)
-       && !is_free(direntry);
-}
-
-static char is_directory(const direntry_t* direntry)
-{
-    return direntry->attributes & 0x10 && direntry->name[0] != 0xe5;
-}
-
-static inline char is_dot(const direntry_t* direntry)
-{
-    return is_short_name(direntry) && direntry->name[0] == '.';
-}
-
-static char is_file(const direntry_t* direntry)
-{
-    return is_short_name(direntry) && !is_directory(direntry);
-}
-
-static inline uint32_t begin_of_direntry(const direntry_t* direntry)
-{
-    return le16_to_cpu(direntry->begin)|(le16_to_cpu(direntry->begin_hi)<<16);
-}
-
-static inline uint32_t filesize_of_direntry(const direntry_t* direntry)
-{
-    return le32_to_cpu(direntry->size);
-}
-
-static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin)
-{
-    direntry->begin = cpu_to_le16(begin & 0xffff);
-    direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff);
-}
-
-/* fat functions */
-
-static inline uint8_t fat_chksum(const direntry_t* entry)
-{
-    uint8_t chksum=0;
-    int i;
-
-    for(i=0;i<11;i++) {
-        unsigned char c;
-
-        c = (i <= 8) ? entry->name[i] : entry->extension[i-8];
-        chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c;
-    }
-
-    return chksum;
-}
-
-/* if return_time==0, this returns the fat_date, else the fat_time */
-static uint16_t fat_datetime(time_t time,int return_time) {
-    struct tm* t;
-#ifdef _WIN32
-    t=localtime(&time); /* this is not thread safe */
-#else
-    struct tm t1;
-    t=&t1;
-    localtime_r(&time,t);
-#endif
-    if(return_time)
-       return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11));
-    return cpu_to_le16((t->tm_mday)|((t->tm_mon+1)<<5)|((t->tm_year-80)<<9));
-}
-
-static inline void fat_set(BDRVVVFATState* s,unsigned int cluster,uint32_t value)
-{
-    if(s->fat_type==32) {
-       uint32_t* entry=array_get(&(s->fat),cluster);
-       *entry=cpu_to_le32(value);
-    } else if(s->fat_type==16) {
-       uint16_t* entry=array_get(&(s->fat),cluster);
-       *entry=cpu_to_le16(value&0xffff);
-    } else {
-       int offset = (cluster*3/2);
-       unsigned char* p = array_get(&(s->fat), offset);
-        switch (cluster&1) {
-       case 0:
-               p[0] = value&0xff;
-               p[1] = (p[1]&0xf0) | ((value>>8)&0xf);
-               break;
-       case 1:
-               p[0] = (p[0]&0xf) | ((value&0xf)<<4);
-               p[1] = (value>>4);
-               break;
-       }
-    }
-}
-
-static inline uint32_t fat_get(BDRVVVFATState* s,unsigned int cluster)
-{
-    if(s->fat_type==32) {
-       uint32_t* entry=array_get(&(s->fat),cluster);
-       return le32_to_cpu(*entry);
-    } else if(s->fat_type==16) {
-       uint16_t* entry=array_get(&(s->fat),cluster);
-       return le16_to_cpu(*entry);
-    } else {
-       const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2;
-       return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
-    }
-}
-
-static inline int fat_eof(BDRVVVFATState* s,uint32_t fat_entry)
-{
-    if(fat_entry>s->max_fat_value-8)
-       return -1;
-    return 0;
-}
-
-static inline void init_fat(BDRVVVFATState* s)
-{
-    if (s->fat_type == 12) {
-       array_init(&(s->fat),1);
-       array_ensure_allocated(&(s->fat),
-               s->sectors_per_fat * 0x200 * 3 / 2 - 1);
-    } else {
-       array_init(&(s->fat),(s->fat_type==32?4:2));
-       array_ensure_allocated(&(s->fat),
-               s->sectors_per_fat * 0x200 / s->fat.item_size - 1);
-    }
-    memset(s->fat.pointer,0,s->fat.size);
-
-    switch(s->fat_type) {
-       case 12: s->max_fat_value=0xfff; break;
-       case 16: s->max_fat_value=0xffff; break;
-       case 32: s->max_fat_value=0x0fffffff; break;
-       default: s->max_fat_value=0; /* error... */
-    }
-
-}
-
-/* TODO: in create_short_filename, 0xe5->0x05 is not yet handled! */
-/* TODO: in parse_short_filename, 0x05->0xe5 is not yet handled! */
-static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
-       unsigned int directory_start, const char* filename, int is_dot)
-{
-    int i,j,long_index=s->directory.next;
-    direntry_t* entry = NULL;
-    direntry_t* entry_long = NULL;
-
-    if(is_dot) {
-       entry=array_get_next(&(s->directory));
-       memset(entry->name,0x20,11);
-       memcpy(entry->name,filename,strlen(filename));
-       return entry;
-    }
-
-    entry_long=create_long_filename(s,filename);
-
-    i = strlen(filename);
-    for(j = i - 1; j>0  && filename[j]!='.';j--);
-    if (j > 0)
-       i = (j > 8 ? 8 : j);
-    else if (i > 8)
-       i = 8;
-
-    entry=array_get_next(&(s->directory));
-    memset(entry->name,0x20,11);
-    memcpy(entry->name, filename, i);
-
-    if(j > 0)
-       for (i = 0; i < 3 && filename[j+1+i]; i++)
-           entry->extension[i] = filename[j+1+i];
-
-    /* upcase & remove unwanted characters */
-    for(i=10;i>=0;i--) {
-       if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--);
-       if(entry->name[i]<=' ' || entry->name[i]>0x7f
-               || strchr(".*?<>|\":/\\[];,+='",entry->name[i]))
-           entry->name[i]='_';
-        else if(entry->name[i]>='a' && entry->name[i]<='z')
-            entry->name[i]+='A'-'a';
-    }
-
-    /* mangle duplicates */
-    while(1) {
-       direntry_t* entry1=array_get(&(s->directory),directory_start);
-       int j;
-
-       for(;entry1<entry;entry1++)
-           if(!is_long_name(entry1) && !memcmp(entry1->name,entry->name,11))
-               break; /* found dupe */
-       if(entry1==entry) /* no dupe found */
-           break;
-
-       /* use all 8 characters of name */
-       if(entry->name[7]==' ') {
-           int j;
-           for(j=6;j>0 && entry->name[j]==' ';j--)
-               entry->name[j]='~';
-       }
-
-       /* increment number */
-       for(j=7;j>0 && entry->name[j]=='9';j--)
-           entry->name[j]='0';
-       if(j>0) {
-           if(entry->name[j]<'0' || entry->name[j]>'9')
-               entry->name[j]='0';
-           else
-               entry->name[j]++;
-       }
-    }
-
-    /* calculate checksum; propagate to long name */
-    if(entry_long) {
-        uint8_t chksum=fat_chksum(entry);
-
-       /* calculate anew, because realloc could have taken place */
-       entry_long=array_get(&(s->directory),long_index);
-       while(entry_long<entry && is_long_name(entry_long)) {
-           entry_long->reserved[1]=chksum;
-           entry_long++;
-       }
-    }
-
-    return entry;
-}
-
-/*
- * Read a directory. (the index of the corresponding mapping must be passed).
- */
-static int read_directory(BDRVVVFATState* s, int mapping_index)
-{
-    mapping_t* mapping = array_get(&(s->mapping), mapping_index);
-    direntry_t* direntry;
-    const char* dirname = mapping->path;
-    int first_cluster = mapping->begin;
-    int parent_index = mapping->info.dir.parent_mapping_index;
-    mapping_t* parent_mapping = (mapping_t*)
-        (parent_index >= 0 ? array_get(&(s->mapping), parent_index) : NULL);
-    int first_cluster_of_parent = parent_mapping ? parent_mapping->begin : -1;
-
-    DIR* dir=opendir(dirname);
-    struct dirent* entry;
-    int i;
-
-    assert(mapping->mode & MODE_DIRECTORY);
-
-    if(!dir) {
-       mapping->end = mapping->begin;
-       return -1;
-    }
-
-    i = mapping->info.dir.first_dir_index =
-           first_cluster == 0 ? 0 : s->directory.next;
-
-    /* actually read the directory, and allocate the mappings */
-    while((entry=readdir(dir))) {
-       unsigned int length=strlen(dirname)+2+strlen(entry->d_name);
-        char* buffer;
-       direntry_t* direntry;
-        struct stat st;
-       int is_dot=!strcmp(entry->d_name,".");
-       int is_dotdot=!strcmp(entry->d_name,"..");
-
-       if(first_cluster == 0 && (is_dotdot || is_dot))
-           continue;
-
-       buffer=(char*)qemu_malloc(length);
-       snprintf(buffer,length,"%s/%s",dirname,entry->d_name);
-
-       if(stat(buffer,&st)<0) {
-           free(buffer);
-            continue;
-       }
-
-       /* create directory entry for this file */
-       direntry=create_short_and_long_name(s, i, entry->d_name,
-               is_dot || is_dotdot);
-       direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20);
-       direntry->reserved[0]=direntry->reserved[1]=0;
-       direntry->ctime=fat_datetime(st.st_ctime,1);
-       direntry->cdate=fat_datetime(st.st_ctime,0);
-       direntry->adate=fat_datetime(st.st_atime,0);
-       direntry->begin_hi=0;
-       direntry->mtime=fat_datetime(st.st_mtime,1);
-       direntry->mdate=fat_datetime(st.st_mtime,0);
-       if(is_dotdot)
-           set_begin_of_direntry(direntry, first_cluster_of_parent);
-       else if(is_dot)
-           set_begin_of_direntry(direntry, first_cluster);
-       else
-           direntry->begin=0; /* do that later */
-        if (st.st_size > 0x7fffffff) {
-           fprintf(stderr, "File %s is larger than 2GB\n", buffer);
-           free(buffer);
-           return -2;
-        }
-       direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size);
-
-       /* create mapping for this file */
-       if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) {
-           s->current_mapping=(mapping_t*)array_get_next(&(s->mapping));
-           s->current_mapping->begin=0;
-           s->current_mapping->end=st.st_size;
-           /*
-            * we get the direntry of the most recent direntry, which
-            * contains the short name and all the relevant information.
-            */
-           s->current_mapping->dir_index=s->directory.next-1;
-           s->current_mapping->first_mapping_index = -1;
-           if (S_ISDIR(st.st_mode)) {
-               s->current_mapping->mode = MODE_DIRECTORY;
-               s->current_mapping->info.dir.parent_mapping_index =
-                   mapping_index;
-           } else {
-               s->current_mapping->mode = MODE_UNDEFINED;
-               s->current_mapping->info.file.offset = 0;
-           }
-           s->current_mapping->path=buffer;
-           s->current_mapping->read_only =
-               (st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0;
-       }
-    }
-    closedir(dir);
-
-    /* fill with zeroes up to the end of the cluster */
-    while(s->directory.next%(0x10*s->sectors_per_cluster)) {
-       direntry_t* direntry=array_get_next(&(s->directory));
-       memset(direntry,0,sizeof(direntry_t));
-    }
-
-/* TODO: if there are more entries, bootsector has to be adjusted! */
-#define ROOT_ENTRIES (0x02 * 0x10 * s->sectors_per_cluster)
-    if (mapping_index == 0 && s->directory.next < ROOT_ENTRIES) {
-       /* root directory */
-       int cur = s->directory.next;
-       array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1);
-       memset(array_get(&(s->directory), cur), 0,
-               (ROOT_ENTRIES - cur) * sizeof(direntry_t));
-    }
-
-     /* reget the mapping, since s->mapping was possibly realloc()ed */
-    mapping = (mapping_t*)array_get(&(s->mapping), mapping_index);
-    first_cluster += (s->directory.next - mapping->info.dir.first_dir_index)
-       * 0x20 / s->cluster_size;
-    mapping->end = first_cluster;
-
-    direntry = (direntry_t*)array_get(&(s->directory), mapping->dir_index);
-    set_begin_of_direntry(direntry, mapping->begin);
-
-    return 0;
-}
-
-static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
-{
-    return (sector_num-s->faked_sectors)/s->sectors_per_cluster;
-}
-
-static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num)
-{
-    return s->faked_sectors + s->sectors_per_cluster * cluster_num;
-}
-
-static inline uint32_t sector_offset_in_cluster(BDRVVVFATState* s,off_t sector_num)
-{
-    return (sector_num-s->first_sectors_number-2*s->sectors_per_fat)%s->sectors_per_cluster;
-}
-
-#ifdef DBG
-static direntry_t* get_direntry_for_mapping(BDRVVVFATState* s,mapping_t* mapping)
-{
-    if(mapping->mode==MODE_UNDEFINED)
-       return 0;
-    return (direntry_t*)(s->directory.pointer+sizeof(direntry_t)*mapping->dir_index);
-}
-#endif
-
-static int init_directories(BDRVVVFATState* s,
-       const char* dirname)
-{
-    bootsector_t* bootsector;
-    mapping_t* mapping;
-    unsigned int i;
-    unsigned int cluster;
-
-    memset(&(s->first_sectors[0]),0,0x40*0x200);
-
-    s->cluster_size=s->sectors_per_cluster*0x200;
-    s->cluster_buffer=qemu_malloc(s->cluster_size);
-
-    /*
-     * The formula: sc = spf+1+spf*spc*(512*8/fat_type),
-     * where sc is sector_count,
-     * spf is sectors_per_fat,
-     * spc is sectors_per_clusters, and
-     * fat_type = 12, 16 or 32.
-     */
-    i = 1+s->sectors_per_cluster*0x200*8/s->fat_type;
-    s->sectors_per_fat=(s->sector_count+i)/i; /* round up */
-
-    array_init(&(s->mapping),sizeof(mapping_t));
-    array_init(&(s->directory),sizeof(direntry_t));
-
-    /* add volume label */
-    {
-       direntry_t* entry=array_get_next(&(s->directory));
-       entry->attributes=0x28; /* archive | volume label */
-       snprintf((char*)entry->name,11,"QEMU VVFAT");
-    }
-
-    /* Now build FAT, and write back information into directory */
-    init_fat(s);
-
-    s->faked_sectors=s->first_sectors_number+s->sectors_per_fat*2;
-    s->cluster_count=sector2cluster(s, s->sector_count);
-
-    mapping = array_get_next(&(s->mapping));
-    mapping->begin = 0;
-    mapping->dir_index = 0;
-    mapping->info.dir.parent_mapping_index = -1;
-    mapping->first_mapping_index = -1;
-    mapping->path = strdup(dirname);
-    i = strlen(mapping->path);
-    if (i > 0 && mapping->path[i - 1] == '/')
-       mapping->path[i - 1] = '\0';
-    mapping->mode = MODE_DIRECTORY;
-    mapping->read_only = 0;
-    s->path = mapping->path;
-
-    for (i = 0, cluster = 0; i < s->mapping.next; i++) {
-       /* MS-DOS expects the FAT to be 0 for the root directory
-        * (except for the media byte). */
-       /* LATER TODO: still true for FAT32? */
-       int fix_fat = (i != 0);
-       mapping = array_get(&(s->mapping), i);
-
-        if (mapping->mode & MODE_DIRECTORY) {
-           mapping->begin = cluster;
-           if(read_directory(s, i)) {
-               fprintf(stderr, "Could not read directory %s\n",
-                       mapping->path);
-               return -1;
-           }
-           mapping = array_get(&(s->mapping), i);
-       } else {
-           assert(mapping->mode == MODE_UNDEFINED);
-           mapping->mode=MODE_NORMAL;
-           mapping->begin = cluster;
-           if (mapping->end > 0) {
-               direntry_t* direntry = array_get(&(s->directory),
-                       mapping->dir_index);
-
-               mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size;
-               set_begin_of_direntry(direntry, mapping->begin);
-           } else {
-               mapping->end = cluster + 1;
-               fix_fat = 0;
-           }
-       }
-
-       assert(mapping->begin < mapping->end);
-
-       /* next free cluster */
-       cluster = mapping->end;
-
-       if(cluster > s->cluster_count) {
-           fprintf(stderr,"Directory does not fit in FAT%d (capacity %s)\n",
-                   s->fat_type,
-                   s->fat_type == 12 ? s->sector_count == 2880 ? "1.44 MB"
-                                                               : "2.88 MB"
-                                     : "504MB");
-           return -EINVAL;
-       }
-
-       /* fix fat for entry */
-       if (fix_fat) {
-           int j;
-           for(j = mapping->begin; j < mapping->end - 1; j++)
-               fat_set(s, j, j+1);
-           fat_set(s, mapping->end - 1, s->max_fat_value);
-       }
-    }
-
-    mapping = array_get(&(s->mapping), 0);
-    s->sectors_of_root_directory = mapping->end * s->sectors_per_cluster;
-    s->last_cluster_of_root_directory = mapping->end;
-
-    /* the FAT signature */
-    fat_set(s,0,s->max_fat_value);
-    fat_set(s,1,s->max_fat_value);
-
-    s->current_mapping = NULL;
-
-    bootsector=(bootsector_t*)(s->first_sectors+(s->first_sectors_number-1)*0x200);
-    bootsector->jump[0]=0xeb;
-    bootsector->jump[1]=0x3e;
-    bootsector->jump[2]=0x90;
-    memcpy(bootsector->name,"QEMU    ",8);
-    bootsector->sector_size=cpu_to_le16(0x200);
-    bootsector->sectors_per_cluster=s->sectors_per_cluster;
-    bootsector->reserved_sectors=cpu_to_le16(1);
-    bootsector->number_of_fats=0x2; /* number of FATs */
-    bootsector->root_entries=cpu_to_le16(s->sectors_of_root_directory*0x10);
-    bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count);
-    bootsector->media_type=(s->fat_type!=12?0xf8:s->sector_count==5760?0xf9:0xf8); /* media descriptor */
-    s->fat.pointer[0] = bootsector->media_type;
-    bootsector->sectors_per_fat=cpu_to_le16(s->sectors_per_fat);
-    bootsector->sectors_per_track=cpu_to_le16(s->bs->secs);
-    bootsector->number_of_heads=cpu_to_le16(s->bs->heads);
-    bootsector->hidden_sectors=cpu_to_le32(s->first_sectors_number==1?0:0x3f);
-    bootsector->total_sectors=cpu_to_le32(s->sector_count>0xffff?s->sector_count:0);
-
-    /* LATER TODO: if FAT32, this is wrong */
-    bootsector->u.fat16.drive_number=s->fat_type==12?0:0x80; /* assume this is hda (TODO) */
-    bootsector->u.fat16.current_head=0;
-    bootsector->u.fat16.signature=0x29;
-    bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd);
-
-    memcpy(bootsector->u.fat16.volume_label,"QEMU VVFAT ",11);
-    memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12   ":s->fat_type==16?"FAT16   ":"FAT32   "),8);
-    bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa;
-
-    return 0;
-}
-
-#ifdef DEBUG
-static BDRVVVFATState *vvv = NULL;
-#endif
-
-static int enable_write_target(BDRVVVFATState *s);
-static int is_consistent(BDRVVVFATState *s);
-
-static int vvfat_open(BlockDriverState *bs, const char* dirname, int flags)
-{
-    BDRVVVFATState *s = bs->opaque;
-    int floppy = 0;
-    int i;
-
-#ifdef DEBUG
-    vvv = s;
-#endif
-
-DLOG(if (stderr == NULL) {
-    stderr = fopen("vvfat.log", "a");
-    setbuf(stderr, NULL);
-})
-
-    s->bs = bs;
-
-    s->fat_type=16;
-    /* LATER TODO: if FAT32, adjust */
-    s->sectors_per_cluster=0x10;
-    /* 504MB disk*/
-    bs->cyls=1024; bs->heads=16; bs->secs=63;
-
-    s->current_cluster=0xffffffff;
-
-    s->first_sectors_number=0x40;
-    /* read only is the default for safety */
-    bs->read_only = 1;
-    s->qcow = s->write_target = NULL;
-    s->qcow_filename = NULL;
-    s->fat2 = NULL;
-    s->downcase_short_names = 1;
-
-    if (!strstart(dirname, "fat:", NULL))
-       return -1;
-
-    if (strstr(dirname, ":floppy:")) {
-       floppy = 1;
-       s->fat_type = 12;
-       s->first_sectors_number = 1;
-       s->sectors_per_cluster=2;
-       bs->cyls = 80; bs->heads = 2; bs->secs = 36;
-    }
-
-    s->sector_count=bs->cyls*bs->heads*bs->secs;
-
-    if (strstr(dirname, ":32:")) {
-       fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. You are welcome to do so!\n");
-       s->fat_type = 32;
-    } else if (strstr(dirname, ":16:")) {
-       s->fat_type = 16;
-    } else if (strstr(dirname, ":12:")) {
-       s->fat_type = 12;
-       s->sector_count=2880;
-    }
-
-    if (strstr(dirname, ":rw:")) {
-       if (enable_write_target(s))
-           return -1;
-       bs->read_only = 0;
-    }
-
-    i = strrchr(dirname, ':') - dirname;
-    assert(i >= 3);
-    if (dirname[i-2] == ':' && qemu_isalpha(dirname[i-1]))
-       /* workaround for DOS drive names */
-       dirname += i-1;
-    else
-       dirname += i+1;
-
-    bs->total_sectors=bs->cyls*bs->heads*bs->secs;
-
-    if(init_directories(s, dirname))
-       return -1;
-
-    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;
-
-    if(s->first_sectors_number==0x40)
-       init_mbr(s);
-
-    /* for some reason or other, MS-DOS does not like to know about CHS... */
-    if (floppy)
-       bs->heads = bs->cyls = bs->secs = 0;
-
-    //    assert(is_consistent(s));
-    return 0;
-}
-
-static inline void vvfat_close_current_file(BDRVVVFATState *s)
-{
-    if(s->current_mapping) {
-       s->current_mapping = NULL;
-       if (s->current_fd) {
-               close(s->current_fd);
-               s->current_fd = 0;
-       }
-    }
-    s->current_cluster = -1;
-}
-
-/* mappings between index1 and index2-1 are supposed to be ordered
- * return value is the index of the last mapping for which end>cluster_num
- */
-static inline int find_mapping_for_cluster_aux(BDRVVVFATState* s,int cluster_num,int index1,int index2)
-{
-    int index3=index1+1;
-    while(1) {
-       mapping_t* mapping;
-       index3=(index1+index2)/2;
-       mapping=array_get(&(s->mapping),index3);
-       assert(mapping->begin < mapping->end);
-       if(mapping->begin>=cluster_num) {
-           assert(index2!=index3 || index2==0);
-           if(index2==index3)
-               return index1;
-           index2=index3;
-       } else {
-           if(index1==index3)
-               return mapping->end<=cluster_num ? index2 : index1;
-           index1=index3;
-       }
-       assert(index1<=index2);
-       DLOG(mapping=array_get(&(s->mapping),index1);
-       assert(mapping->begin<=cluster_num);
-       assert(index2 >= s->mapping.next ||
-               ((mapping = array_get(&(s->mapping),index2)) &&
-               mapping->end>cluster_num)));
-    }
-}
-
-static inline mapping_t* find_mapping_for_cluster(BDRVVVFATState* s,int cluster_num)
-{
-    int index=find_mapping_for_cluster_aux(s,cluster_num,0,s->mapping.next);
-    mapping_t* mapping;
-    if(index>=s->mapping.next)
-        return NULL;
-    mapping=array_get(&(s->mapping),index);
-    if(mapping->begin>cluster_num)
-        return NULL;
-    assert(mapping->begin<=cluster_num && mapping->end>cluster_num);
-    return mapping;
-}
-
-/*
- * This function simply compares path == mapping->path. Since the mappings
- * are sorted by cluster, this is expensive: O(n).
- */
-static inline mapping_t* find_mapping_for_path(BDRVVVFATState* s,
-       const char* path)
-{
-    int i;
-
-    for (i = 0; i < s->mapping.next; i++) {
-       mapping_t* mapping = array_get(&(s->mapping), i);
-       if (mapping->first_mapping_index < 0 &&
-               !strcmp(path, mapping->path))
-           return mapping;
-    }
-
-    return NULL;
-}
-
-static int open_file(BDRVVVFATState* s,mapping_t* mapping)
-{
-    if(!mapping)
-       return -1;
-    if(!s->current_mapping ||
-           strcmp(s->current_mapping->path,mapping->path)) {
-       /* open file */
-       int fd = open(mapping->path, O_RDONLY | O_BINARY | O_LARGEFILE);
-       if(fd<0)
-           return -1;
-       vvfat_close_current_file(s);
-       s->current_fd = fd;
-       s->current_mapping = mapping;
-    }
-    return 0;
-}
-
-static inline int read_cluster(BDRVVVFATState *s,int cluster_num)
-{
-    if(s->current_cluster != cluster_num) {
-       int result=0;
-       off_t offset;
-       assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY));
-       if(!s->current_mapping
-               || s->current_mapping->begin>cluster_num
-               || s->current_mapping->end<=cluster_num) {
-           /* binary search of mappings for file */
-           mapping_t* mapping=find_mapping_for_cluster(s,cluster_num);
-
-           assert(!mapping || (cluster_num>=mapping->begin && cluster_num<mapping->end));
-
-           if (mapping && mapping->mode & MODE_DIRECTORY) {
-               vvfat_close_current_file(s);
-               s->current_mapping = mapping;
-read_cluster_directory:
-               offset = s->cluster_size*(cluster_num-s->current_mapping->begin);
-               s->cluster = (unsigned char*)s->directory.pointer+offset
-                       + 0x20*s->current_mapping->info.dir.first_dir_index;
-               assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0);
-               assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size);
-               s->current_cluster = cluster_num;
-               return 0;
-           }
-
-           if(open_file(s,mapping))
-               return -2;
-       } else if (s->current_mapping->mode & MODE_DIRECTORY)
-           goto read_cluster_directory;
-
-       assert(s->current_fd);
-
-       offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset;
-       if(lseek(s->current_fd, offset, SEEK_SET)!=offset)
-           return -3;
-       s->cluster=s->cluster_buffer;
-       result=read(s->current_fd,s->cluster,s->cluster_size);
-       if(result<0) {
-           s->current_cluster = -1;
-           return -1;
-       }
-       s->current_cluster = cluster_num;
-    }
-    return 0;
-}
-
-#ifdef DEBUG
-static void hexdump(const void* address, uint32_t len)
-{
-    const unsigned char* p = address;
-    int i, j;
-
-    for (i = 0; i < len; i += 16) {
-       for (j = 0; j < 16 && i + j < len; j++)
-           fprintf(stderr, "%02x ", p[i + j]);
-       for (; j < 16; j++)
-           fprintf(stderr, "   ");
-       fprintf(stderr, " ");
-       for (j = 0; j < 16 && i + j < len; j++)
-           fprintf(stderr, "%c", (p[i + j] < ' ' || p[i + j] > 0x7f) ? '.' : p[i + j]);
-       fprintf(stderr, "\n");
-    }
-}
-
-static void print_direntry(const direntry_t* direntry)
-{
-    int j = 0;
-    char buffer[1024];
-
-    fprintf(stderr, "direntry 0x%x: ", (int)direntry);
-    if(!direntry)
-       return;
-    if(is_long_name(direntry)) {
-       unsigned char* c=(unsigned char*)direntry;
-       int i;
-       for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2)
-#define ADD_CHAR(c) {buffer[j] = (c); if (buffer[j] < ' ') buffer[j] = 0xb0; j++;}
-           ADD_CHAR(c[i]);
-       for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2)
-           ADD_CHAR(c[i]);
-       for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2)
-           ADD_CHAR(c[i]);
-       buffer[j] = 0;
-       fprintf(stderr, "%s\n", buffer);
-    } else {
-       int i;
-       for(i=0;i<11;i++)
-           ADD_CHAR(direntry->name[i]);
-       buffer[j] = 0;
-       fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n",
-               buffer,
-               direntry->attributes,
-               begin_of_direntry(direntry),le32_to_cpu(direntry->size));
-    }
-}
-
-static void print_mapping(const mapping_t* mapping)
-{
-    fprintf(stderr, "mapping (0x%x): begin, end = %d, %d, dir_index = %d, first_mapping_index = %d, name = %s, mode = 0x%x, " , (int)mapping, mapping->begin, mapping->end, mapping->dir_index, mapping->first_mapping_index, mapping->path, mapping->mode);
-    if (mapping->mode & MODE_DIRECTORY)
-       fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index);
-    else
-       fprintf(stderr, "offset = %d\n", mapping->info.file.offset);
-}
-#endif
-
-static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVVVFATState *s = bs->opaque;
-    int i;
-
-    for(i=0;i<nb_sectors;i++,sector_num++) {
-       if (sector_num >= s->sector_count)
-          return -1;
-       if (s->qcow) {
-           int n;
-           if (s->qcow->drv->bdrv_is_allocated(s->qcow,
-                       sector_num, nb_sectors-i, &n)) {
-DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
-               if (s->qcow->drv->bdrv_read(s->qcow, sector_num, buf+i*0x200, n))
-                   return -1;
-               i += n - 1;
-               sector_num += n - 1;
-               continue;
-           }
-DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
-       }
-       if(sector_num<s->faked_sectors) {
-           if(sector_num<s->first_sectors_number)
-               memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200);
-           else if(sector_num-s->first_sectors_number<s->sectors_per_fat)
-               memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200);
-           else if(sector_num-s->first_sectors_number-s->sectors_per_fat<s->sectors_per_fat)
-               memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200);
-       } else {
-           uint32_t sector=sector_num-s->faked_sectors,
-           sector_offset_in_cluster=(sector%s->sectors_per_cluster),
-           cluster_num=sector/s->sectors_per_cluster;
-           if(read_cluster(s, cluster_num) != 0) {
-               /* LATER TODO: strict: return -1; */
-               memset(buf+i*0x200,0,0x200);
-               continue;
-           }
-           memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200);
-       }
-    }
-    return 0;
-}
-
-/* LATER TODO: statify all functions */
-
-/*
- * Idea of the write support (use snapshot):
- *
- * 1. check if all data is consistent, recording renames, modifications,
- *    new files and directories (in s->commits).
- *
- * 2. if the data is not consistent, stop committing
- *
- * 3. handle renames, and create new files and directories (do not yet
- *    write their contents)
- *
- * 4. walk the directories, fixing the mapping and direntries, and marking
- *    the handled mappings as not deleted
- *
- * 5. commit the contents of the files
- *
- * 6. handle deleted files and directories
- *
- */
-
-typedef struct commit_t {
-    char* path;
-    union {
-       struct { uint32_t cluster; } rename;
-       struct { int dir_index; uint32_t modified_offset; } writeout;
-       struct { uint32_t first_cluster; } new_file;
-       struct { uint32_t cluster; } mkdir;
-    } param;
-    /* DELETEs and RMDIRs are handled differently: see handle_deletes() */
-    enum {
-       ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR
-    } action;
-} commit_t;
-
-static void clear_commits(BDRVVVFATState* s)
-{
-    int i;
-DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next));
-    for (i = 0; i < s->commits.next; i++) {
-       commit_t* commit = array_get(&(s->commits), i);
-       assert(commit->path || commit->action == ACTION_WRITEOUT);
-       if (commit->action != ACTION_WRITEOUT) {
-           assert(commit->path);
-           free(commit->path);
-       } else
-           assert(commit->path == NULL);
-    }
-    s->commits.next = 0;
-}
-
-static void schedule_rename(BDRVVVFATState* s,
-       uint32_t cluster, char* new_path)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = new_path;
-    commit->param.rename.cluster = cluster;
-    commit->action = ACTION_RENAME;
-}
-
-static void schedule_writeout(BDRVVVFATState* s,
-       int dir_index, uint32_t modified_offset)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = NULL;
-    commit->param.writeout.dir_index = dir_index;
-    commit->param.writeout.modified_offset = modified_offset;
-    commit->action = ACTION_WRITEOUT;
-}
-
-static void schedule_new_file(BDRVVVFATState* s,
-       char* path, uint32_t first_cluster)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = path;
-    commit->param.new_file.first_cluster = first_cluster;
-    commit->action = ACTION_NEW_FILE;
-}
-
-static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = path;
-    commit->param.mkdir.cluster = cluster;
-    commit->action = ACTION_MKDIR;
-}
-
-typedef struct {
-    /*
-     * Since the sequence number is at most 0x3f, and the filename
-     * length is at most 13 times the sequence number, the maximal
-     * filename length is 0x3f * 13 bytes.
-     */
-    unsigned char name[0x3f * 13 + 1];
-    int checksum, len;
-    int sequence_number;
-} long_file_name;
-
-static void lfn_init(long_file_name* lfn)
-{
-   lfn->sequence_number = lfn->len = 0;
-   lfn->checksum = 0x100;
-}
-
-/* return 0 if parsed successfully, > 0 if no long name, < 0 if error */
-static int parse_long_name(long_file_name* lfn,
-       const direntry_t* direntry)
-{
-    int i, j, offset;
-    const unsigned char* pointer = (const unsigned char*)direntry;
-
-    if (!is_long_name(direntry))
-       return 1;
-
-    if (pointer[0] & 0x40) {
-       lfn->sequence_number = pointer[0] & 0x3f;
-       lfn->checksum = pointer[13];
-       lfn->name[0] = 0;
-       lfn->name[lfn->sequence_number * 13] = 0;
-    } else if ((pointer[0] & 0x3f) != --lfn->sequence_number)
-       return -1;
-    else if (pointer[13] != lfn->checksum)
-       return -2;
-    else if (pointer[12] || pointer[26] || pointer[27])
-       return -3;
-
-    offset = 13 * (lfn->sequence_number - 1);
-    for (i = 0, j = 1; i < 13; i++, j+=2) {
-       if (j == 11)
-           j = 14;
-       else if (j == 26)
-           j = 28;
-
-       if (pointer[j+1] == 0)
-           lfn->name[offset + i] = pointer[j];
-       else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0)
-           return -4;
-       else
-           lfn->name[offset + i] = 0;
-    }
-
-    if (pointer[0] & 0x40)
-       lfn->len = offset + strlen((char*)lfn->name + offset);
-
-    return 0;
-}
-
-/* returns 0 if successful, >0 if no short_name, and <0 on error */
-static int parse_short_name(BDRVVVFATState* s,
-       long_file_name* lfn, direntry_t* direntry)
-{
-    int i, j;
-
-    if (!is_short_name(direntry))
-       return 1;
-
-    for (j = 7; j >= 0 && direntry->name[j] == ' '; j--);
-    for (i = 0; i <= j; i++) {
-       if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f)
-           return -1;
-       else if (s->downcase_short_names)
-           lfn->name[i] = qemu_tolower(direntry->name[i]);
-       else
-           lfn->name[i] = direntry->name[i];
-    }
-
-    for (j = 2; j >= 0 && direntry->extension[j] == ' '; j--);
-    if (j >= 0) {
-       lfn->name[i++] = '.';
-       lfn->name[i + j + 1] = '\0';
-       for (;j >= 0; j--) {
-           if (direntry->extension[j] <= ' ' || direntry->extension[j] > 0x7f)
-               return -2;
-           else if (s->downcase_short_names)
-               lfn->name[i + j] = qemu_tolower(direntry->extension[j]);
-           else
-               lfn->name[i + j] = direntry->extension[j];
-       }
-    } else
-       lfn->name[i + j + 1] = '\0';
-
-    lfn->len = strlen((char*)lfn->name);
-
-    return 0;
-}
-
-static inline uint32_t modified_fat_get(BDRVVVFATState* s,
-       unsigned int cluster)
-{
-    if (cluster < s->last_cluster_of_root_directory) {
-       if (cluster + 1 == s->last_cluster_of_root_directory)
-           return s->max_fat_value;
-       else
-           return cluster + 1;
-    }
-
-    if (s->fat_type==32) {
-        uint32_t* entry=((uint32_t*)s->fat2)+cluster;
-        return le32_to_cpu(*entry);
-    } else if (s->fat_type==16) {
-        uint16_t* entry=((uint16_t*)s->fat2)+cluster;
-        return le16_to_cpu(*entry);
-    } else {
-        const uint8_t* x=s->fat2+cluster*3/2;
-        return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
-    }
-}
-
-static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
-{
-    int was_modified = 0;
-    int i, dummy;
-
-    if (s->qcow == NULL)
-       return 0;
-
-    for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
-       was_modified = s->qcow->drv->bdrv_is_allocated(s->qcow,
-               cluster2sector(s, cluster_num) + i, 1, &dummy);
-
-    return was_modified;
-}
-
-static const char* get_basename(const char* path)
-{
-    char* basename = strrchr(path, '/');
-    if (basename == NULL)
-       return path;
-    else
-       return basename + 1; /* strip '/' */
-}
-
-/*
- * The array s->used_clusters holds the states of the clusters. If it is
- * part of a file, it has bit 2 set, in case of a directory, bit 1. If it
- * was modified, bit 3 is set.
- * If any cluster is allocated, but not part of a file or directory, this
- * driver refuses to commit.
- */
-typedef enum {
-     USED_DIRECTORY = 1, USED_FILE = 2, USED_ANY = 3, USED_ALLOCATED = 4
-} used_t;
-
-/*
- * get_cluster_count_for_direntry() not only determines how many clusters
- * are occupied by direntry, but also if it was renamed or modified.
- *
- * A file is thought to be renamed *only* if there already was a file with
- * exactly the same first cluster, but a different name.
- *
- * Further, the files/directories handled by this function are
- * assumed to be *not* deleted (and *only* those).
- */
-static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
-       direntry_t* direntry, const char* path)
-{
-    /*
-     * This is a little bit tricky:
-     * IF the guest OS just inserts a cluster into the file chain,
-     * and leaves the rest alone, (i.e. the original file had clusters
-     * 15 -> 16, but now has 15 -> 32 -> 16), then the following happens:
-     *
-     * - do_commit will write the cluster into the file at the given
-     *   offset, but
-     *
-     * - the cluster which is overwritten should be moved to a later
-     *   position in the file.
-     *
-     * I am not aware that any OS does something as braindead, but this
-     * situation could happen anyway when not committing for a long time.
-     * Just to be sure that this does not bite us, detect it, and copy the
-     * contents of the clusters to-be-overwritten into the qcow.
-     */
-    int copy_it = 0;
-    int was_modified = 0;
-    int32_t ret = 0;
-
-    uint32_t cluster_num = begin_of_direntry(direntry);
-    uint32_t offset = 0;
-    int first_mapping_index = -1;
-    mapping_t* mapping = NULL;
-    const char* basename2 = NULL;
-
-    vvfat_close_current_file(s);
-
-    /* the root directory */
-    if (cluster_num == 0)
-       return 0;
-
-    /* write support */
-    if (s->qcow) {
-       basename2 = get_basename(path);
-
-       mapping = find_mapping_for_cluster(s, cluster_num);
-
-       if (mapping) {
-           const char* basename;
-
-           assert(mapping->mode & MODE_DELETED);
-           mapping->mode &= ~MODE_DELETED;
-
-           basename = get_basename(mapping->path);
-
-           assert(mapping->mode & MODE_NORMAL);
-
-           /* rename */
-           if (strcmp(basename, basename2))
-               schedule_rename(s, cluster_num, strdup(path));
-       } else if (is_file(direntry))
-           /* new file */
-           schedule_new_file(s, strdup(path), cluster_num);
-       else {
-           assert(0);
-           return 0;
-       }
-    }
-
-    while(1) {
-       if (s->qcow) {
-           if (!copy_it && cluster_was_modified(s, cluster_num)) {
-               if (mapping == NULL ||
-                       mapping->begin > cluster_num ||
-                       mapping->end <= cluster_num)
-               mapping = find_mapping_for_cluster(s, cluster_num);
-
-
-               if (mapping &&
-                       (mapping->mode & MODE_DIRECTORY) == 0) {
-
-                   /* was modified in qcow */
-                   if (offset != mapping->info.file.offset + s->cluster_size
-                           * (cluster_num - mapping->begin)) {
-                       /* offset of this cluster in file chain has changed */
-                       assert(0);
-                       copy_it = 1;
-                   } else if (offset == 0) {
-                       const char* basename = get_basename(mapping->path);
-
-                       if (strcmp(basename, basename2))
-                           copy_it = 1;
-                       first_mapping_index = array_index(&(s->mapping), mapping);
-                   }
-
-                   if (mapping->first_mapping_index != first_mapping_index
-                           && mapping->info.file.offset > 0) {
-                       assert(0);
-                       copy_it = 1;
-                   }
-
-                   /* need to write out? */
-                   if (!was_modified && is_file(direntry)) {
-                       was_modified = 1;
-                       schedule_writeout(s, mapping->dir_index, offset);
-                   }
-               }
-           }
-
-           if (copy_it) {
-               int i, dummy;
-               /*
-                * This is horribly inefficient, but that is okay, since
-                * it is rarely executed, if at all.
-                */
-               int64_t offset = cluster2sector(s, cluster_num);
-
-               vvfat_close_current_file(s);
-               for (i = 0; i < s->sectors_per_cluster; i++)
-                   if (!s->qcow->drv->bdrv_is_allocated(s->qcow,
-                               offset + i, 1, &dummy)) {
-                       if (vvfat_read(s->bs,
-                                   offset, s->cluster_buffer, 1))
-                           return -1;
-                       if (s->qcow->drv->bdrv_write(s->qcow,
-                                   offset, s->cluster_buffer, 1))
-                           return -2;
-                   }
-           }
-       }
-
-       ret++;
-       if (s->used_clusters[cluster_num] & USED_ANY)
-           return 0;
-       s->used_clusters[cluster_num] = USED_FILE;
-
-       cluster_num = modified_fat_get(s, cluster_num);
-
-       if (fat_eof(s, cluster_num))
-           return ret;
-       else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16)
-           return -1;
-
-       offset += s->cluster_size;
-    }
-}
-
-/*
- * This function looks at the modified data (qcow).
- * It returns 0 upon inconsistency or error, and the number of clusters
- * used by the directory, its subdirectories and their files.
- */
-static int check_directory_consistency(BDRVVVFATState *s,
-       int cluster_num, const char* path)
-{
-    int ret = 0;
-    unsigned char* cluster = qemu_malloc(s->cluster_size);
-    direntry_t* direntries = (direntry_t*)cluster;
-    mapping_t* mapping = find_mapping_for_cluster(s, cluster_num);
-
-    long_file_name lfn;
-    int path_len = strlen(path);
-    char path2[PATH_MAX];
-
-    assert(path_len < PATH_MAX); /* len was tested before! */
-    pstrcpy(path2, sizeof(path2), path);
-    path2[path_len] = '/';
-    path2[path_len + 1] = '\0';
-
-    if (mapping) {
-       const char* basename = get_basename(mapping->path);
-       const char* basename2 = get_basename(path);
-
-       assert(mapping->mode & MODE_DIRECTORY);
-
-       assert(mapping->mode & MODE_DELETED);
-       mapping->mode &= ~MODE_DELETED;
-
-       if (strcmp(basename, basename2))
-           schedule_rename(s, cluster_num, strdup(path));
-    } else
-       /* new directory */
-       schedule_mkdir(s, cluster_num, strdup(path));
-
-    lfn_init(&lfn);
-    do {
-       int i;
-       int subret = 0;
-
-       ret++;
-
-       if (s->used_clusters[cluster_num] & USED_ANY) {
-           fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num);
-           return 0;
-       }
-       s->used_clusters[cluster_num] = USED_DIRECTORY;
-
-DLOG(fprintf(stderr, "read cluster %d (sector %d)\n", (int)cluster_num, (int)cluster2sector(s, cluster_num)));
-       subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster,
-               s->sectors_per_cluster);
-       if (subret) {
-           fprintf(stderr, "Error fetching direntries\n");
-       fail:
-           free(cluster);
-           return 0;
-       }
-
-       for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) {
-           int cluster_count = 0;
-
-DLOG(fprintf(stderr, "check direntry %d: \n", i); print_direntry(direntries + i));
-           if (is_volume_label(direntries + i) || is_dot(direntries + i) ||
-                   is_free(direntries + i))
-               continue;
-
-           subret = parse_long_name(&lfn, direntries + i);
-           if (subret < 0) {
-               fprintf(stderr, "Error in long name\n");
-               goto fail;
-           }
-           if (subret == 0 || is_free(direntries + i))
-               continue;
-
-           if (fat_chksum(direntries+i) != lfn.checksum) {
-               subret = parse_short_name(s, &lfn, direntries + i);
-               if (subret < 0) {
-                   fprintf(stderr, "Error in short name (%d)\n", subret);
-                   goto fail;
-               }
-               if (subret > 0 || !strcmp((char*)lfn.name, ".")
-                       || !strcmp((char*)lfn.name, ".."))
-                   continue;
-           }
-           lfn.checksum = 0x100; /* cannot use long name twice */
-
-           if (path_len + 1 + lfn.len >= PATH_MAX) {
-               fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name);
-               goto fail;
-           }
-            pstrcpy(path2 + path_len + 1, sizeof(path2) - path_len - 1,
-                    (char*)lfn.name);
-
-           if (is_directory(direntries + i)) {
-               if (begin_of_direntry(direntries + i) == 0) {
-                   DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i));
-                   goto fail;
-               }
-               cluster_count = check_directory_consistency(s,
-                       begin_of_direntry(direntries + i), path2);
-               if (cluster_count == 0) {
-                   DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i));
-                   goto fail;
-               }
-           } else if (is_file(direntries + i)) {
-               /* check file size with FAT */
-               cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2);
-               if (cluster_count !=
-                       (le32_to_cpu(direntries[i].size) + s->cluster_size
-                        - 1) / s->cluster_size) {
-                   DLOG(fprintf(stderr, "Cluster count mismatch\n"));
-                   goto fail;
-               }
-           } else
-               assert(0); /* cluster_count = 0; */
-
-           ret += cluster_count;
-       }
-
-       cluster_num = modified_fat_get(s, cluster_num);
-    } while(!fat_eof(s, cluster_num));
-
-    free(cluster);
-    return ret;
-}
-
-/* returns 1 on success */
-static int is_consistent(BDRVVVFATState* s)
-{
-    int i, check;
-    int used_clusters_count = 0;
-
-DLOG(checkpoint());
-    /*
-     * - get modified FAT
-     * - compare the two FATs (TODO)
-     * - get buffer for marking used clusters
-     * - recurse direntries from root (using bs->bdrv_read to make
-     *    sure to get the new data)
-     *   - check that the FAT agrees with the size
-     *   - count the number of clusters occupied by this directory and
-     *     its files
-     * - check that the cumulative used cluster count agrees with the
-     *   FAT
-     * - if all is fine, return number of used clusters
-     */
-    if (s->fat2 == NULL) {
-       int size = 0x200 * s->sectors_per_fat;
-       s->fat2 = qemu_malloc(size);
-       memcpy(s->fat2, s->fat.pointer, size);
-    }
-    check = vvfat_read(s->bs,
-           s->first_sectors_number, s->fat2, s->sectors_per_fat);
-    if (check) {
-       fprintf(stderr, "Could not copy fat\n");
-       return 0;
-    }
-    assert (s->used_clusters);
-    for (i = 0; i < sector2cluster(s, s->sector_count); i++)
-       s->used_clusters[i] &= ~USED_ANY;
-
-    clear_commits(s);
-
-    /* mark every mapped file/directory as deleted.
-     * (check_directory_consistency() will unmark those still present). */
-    if (s->qcow)
-       for (i = 0; i < s->mapping.next; i++) {
-           mapping_t* mapping = array_get(&(s->mapping), i);
-           if (mapping->first_mapping_index < 0)
-               mapping->mode |= MODE_DELETED;
-       }
-
-    used_clusters_count = check_directory_consistency(s, 0, s->path);
-    if (used_clusters_count <= 0) {
-       DLOG(fprintf(stderr, "problem in directory\n"));
-       return 0;
-    }
-
-    check = s->last_cluster_of_root_directory;
-    for (i = check; i < sector2cluster(s, s->sector_count); i++) {
-       if (modified_fat_get(s, i)) {
-           if(!s->used_clusters[i]) {
-               DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i));
-               return 0;
-           }
-           check++;
-       }
-
-       if (s->used_clusters[i] == USED_ALLOCATED) {
-           /* allocated, but not used... */
-           DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i));
-           return 0;
-       }
-    }
-
-    if (check != used_clusters_count)
-       return 0;
-
-    return used_clusters_count;
-}
-
-static inline void adjust_mapping_indices(BDRVVVFATState* s,
-       int offset, int adjust)
-{
-    int i;
-
-    for (i = 0; i < s->mapping.next; i++) {
-       mapping_t* mapping = array_get(&(s->mapping), i);
-
-#define ADJUST_MAPPING_INDEX(name) \
-       if (mapping->name >= offset) \
-           mapping->name += adjust
-
-       ADJUST_MAPPING_INDEX(first_mapping_index);
-       if (mapping->mode & MODE_DIRECTORY)
-           ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index);
-    }
-}
-
-/* insert or update mapping */
-static mapping_t* insert_mapping(BDRVVVFATState* s,
-       uint32_t begin, uint32_t end)
-{
-    /*
-     * - find mapping where mapping->begin >= begin,
-     * - if mapping->begin > begin: insert
-     *   - adjust all references to mappings!
-     * - else: adjust
-     * - replace name
-     */
-    int index = find_mapping_for_cluster_aux(s, begin, 0, s->mapping.next);
-    mapping_t* mapping = NULL;
-    mapping_t* first_mapping = array_get(&(s->mapping), 0);
-
-    if (index < s->mapping.next && (mapping = array_get(&(s->mapping), index))
-           && mapping->begin < begin) {
-       mapping->end = begin;
-       index++;
-       mapping = array_get(&(s->mapping), index);
-    }
-    if (index >= s->mapping.next || mapping->begin > begin) {
-       mapping = array_insert(&(s->mapping), index, 1);
-       mapping->path = NULL;
-       adjust_mapping_indices(s, index, +1);
-    }
-
-    mapping->begin = begin;
-    mapping->end = end;
-
-DLOG(mapping_t* next_mapping;
-assert(index + 1 >= s->mapping.next ||
-((next_mapping = array_get(&(s->mapping), index + 1)) &&
- next_mapping->begin >= end)));
-
-    if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
-       s->current_mapping = array_get(&(s->mapping),
-               s->current_mapping - first_mapping);
-
-    return mapping;
-}
-
-static int remove_mapping(BDRVVVFATState* s, int mapping_index)
-{
-    mapping_t* mapping = array_get(&(s->mapping), mapping_index);
-    mapping_t* first_mapping = array_get(&(s->mapping), 0);
-
-    /* free mapping */
-    if (mapping->first_mapping_index < 0)
-       free(mapping->path);
-
-    /* remove from s->mapping */
-    array_remove(&(s->mapping), mapping_index);
-
-    /* adjust all references to mappings */
-    adjust_mapping_indices(s, mapping_index, -1);
-
-    if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
-       s->current_mapping = array_get(&(s->mapping),
-               s->current_mapping - first_mapping);
-
-    return 0;
-}
-
-static void adjust_dirindices(BDRVVVFATState* s, int offset, int adjust)
-{
-    int i;
-    for (i = 0; i < s->mapping.next; i++) {
-       mapping_t* mapping = array_get(&(s->mapping), i);
-       if (mapping->dir_index >= offset)
-           mapping->dir_index += adjust;
-       if ((mapping->mode & MODE_DIRECTORY) &&
-               mapping->info.dir.first_dir_index >= offset)
-           mapping->info.dir.first_dir_index += adjust;
-    }
-}
-
-static direntry_t* insert_direntries(BDRVVVFATState* s,
-       int dir_index, int count)
-{
-    /*
-     * make room in s->directory,
-     * adjust_dirindices
-     */
-    direntry_t* result = array_insert(&(s->directory), dir_index, count);
-    if (result == NULL)
-       return NULL;
-    adjust_dirindices(s, dir_index, count);
-    return result;
-}
-
-static int remove_direntries(BDRVVVFATState* s, int dir_index, int count)
-{
-    int ret = array_remove_slice(&(s->directory), dir_index, count);
-    if (ret)
-       return ret;
-    adjust_dirindices(s, dir_index, -count);
-    return 0;
-}
-
-/*
- * Adapt the mappings of the cluster chain starting at first cluster
- * (i.e. if a file starts at first_cluster, the chain is followed according
- * to the modified fat, and the corresponding entries in s->mapping are
- * adjusted)
- */
-static int commit_mappings(BDRVVVFATState* s,
-       uint32_t first_cluster, int dir_index)
-{
-    mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
-    direntry_t* direntry = array_get(&(s->directory), dir_index);
-    uint32_t cluster = first_cluster;
-
-    vvfat_close_current_file(s);
-
-    assert(mapping);
-    assert(mapping->begin == first_cluster);
-    mapping->first_mapping_index = -1;
-    mapping->dir_index = dir_index;
-    mapping->mode = (dir_index <= 0 || is_directory(direntry)) ?
-       MODE_DIRECTORY : MODE_NORMAL;
-
-    while (!fat_eof(s, cluster)) {
-       uint32_t c, c1;
-
-       for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1;
-               c = c1, c1 = modified_fat_get(s, c1));
-
-       c++;
-       if (c > mapping->end) {
-           int index = array_index(&(s->mapping), mapping);
-           int i, max_i = s->mapping.next - index;
-           for (i = 1; i < max_i && mapping[i].begin < c; i++);
-           while (--i > 0)
-               remove_mapping(s, index + 1);
-       }
-       assert(mapping == array_get(&(s->mapping), s->mapping.next - 1)
-               || mapping[1].begin >= c);
-       mapping->end = c;
-
-       if (!fat_eof(s, c1)) {
-           int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next);
-           mapping_t* next_mapping = i >= s->mapping.next ? NULL :
-               array_get(&(s->mapping), i);
-
-           if (next_mapping == NULL || next_mapping->begin > c1) {
-               int i1 = array_index(&(s->mapping), mapping);
-
-               next_mapping = insert_mapping(s, c1, c1+1);
-
-               if (c1 < c)
-                   i1++;
-               mapping = array_get(&(s->mapping), i1);
-           }
-
-           next_mapping->dir_index = mapping->dir_index;
-           next_mapping->first_mapping_index =
-               mapping->first_mapping_index < 0 ?
-               array_index(&(s->mapping), mapping) :
-               mapping->first_mapping_index;
-           next_mapping->path = mapping->path;
-           next_mapping->mode = mapping->mode;
-           next_mapping->read_only = mapping->read_only;
-           if (mapping->mode & MODE_DIRECTORY) {
-               next_mapping->info.dir.parent_mapping_index =
-                       mapping->info.dir.parent_mapping_index;
-               next_mapping->info.dir.first_dir_index =
-                       mapping->info.dir.first_dir_index +
-                       0x10 * s->sectors_per_cluster *
-                       (mapping->end - mapping->begin);
-           } else
-               next_mapping->info.file.offset = mapping->info.file.offset +
-                       mapping->end - mapping->begin;
-
-           mapping = next_mapping;
-       }
-
-       cluster = c1;
-    }
-
-    return 0;
-}
-
-static int commit_direntries(BDRVVVFATState* s,
-       int dir_index, int parent_mapping_index)
-{
-    direntry_t* direntry = array_get(&(s->directory), dir_index);
-    uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
-    mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
-
-    int factor = 0x10 * s->sectors_per_cluster;
-    int old_cluster_count, new_cluster_count;
-    int current_dir_index = mapping->info.dir.first_dir_index;
-    int first_dir_index = current_dir_index;
-    int ret, i;
-    uint32_t c;
-
-DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapping->path, parent_mapping_index));
-
-    assert(direntry);
-    assert(mapping);
-    assert(mapping->begin == first_cluster);
-    assert(mapping->info.dir.first_dir_index < s->directory.next);
-    assert(mapping->mode & MODE_DIRECTORY);
-    assert(dir_index == 0 || is_directory(direntry));
-
-    mapping->info.dir.parent_mapping_index = parent_mapping_index;
-
-    if (first_cluster == 0) {
-       old_cluster_count = new_cluster_count =
-           s->last_cluster_of_root_directory;
-    } else {
-       for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
-               c = fat_get(s, c))
-           old_cluster_count++;
-
-       for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
-               c = modified_fat_get(s, c))
-           new_cluster_count++;
-    }
-
-    if (new_cluster_count > old_cluster_count) {
-       if (insert_direntries(s,
-               current_dir_index + factor * old_cluster_count,
-               factor * (new_cluster_count - old_cluster_count)) == NULL)
-           return -1;
-    } else if (new_cluster_count < old_cluster_count)
-       remove_direntries(s,
-               current_dir_index + factor * new_cluster_count,
-               factor * (old_cluster_count - new_cluster_count));
-
-    for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
-       void* direntry = array_get(&(s->directory), current_dir_index);
-       int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
-               s->sectors_per_cluster);
-       if (ret)
-           return ret;
-       assert(!strncmp(s->directory.pointer, "QEMU", 4));
-       current_dir_index += factor;
-    }
-
-    ret = commit_mappings(s, first_cluster, dir_index);
-    if (ret)
-       return ret;
-
-    /* recurse */
-    for (i = 0; i < factor * new_cluster_count; i++) {
-       direntry = array_get(&(s->directory), first_dir_index + i);
-       if (is_directory(direntry) && !is_dot(direntry)) {
-           mapping = find_mapping_for_cluster(s, first_cluster);
-           assert(mapping->mode & MODE_DIRECTORY);
-           ret = commit_direntries(s, first_dir_index + i,
-               array_index(&(s->mapping), mapping));
-           if (ret)
-               return ret;
-       }
-    }
-
-    return 0;
-}
-
-/* commit one file (adjust contents, adjust mapping),
-   return first_mapping_index */
-static int commit_one_file(BDRVVVFATState* s,
-       int dir_index, uint32_t offset)
-{
-    direntry_t* direntry = array_get(&(s->directory), dir_index);
-    uint32_t c = begin_of_direntry(direntry);
-    uint32_t first_cluster = c;
-    mapping_t* mapping = find_mapping_for_cluster(s, c);
-    uint32_t size = filesize_of_direntry(direntry);
-    char* cluster = qemu_malloc(s->cluster_size);
-    uint32_t i;
-    int fd = 0;
-
-    assert(offset < size);
-    assert((offset % s->cluster_size) == 0);
-
-    for (i = s->cluster_size; i < offset; i += s->cluster_size)
-       c = modified_fat_get(s, c);
-
-    fd = open(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666);
-    if (fd < 0) {
-       fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path,
-               strerror(errno), errno);
-       return fd;
-    }
-    if (offset > 0)
-       if (lseek(fd, offset, SEEK_SET) != offset)
-           return -3;
-
-    while (offset < size) {
-       uint32_t c1;
-       int rest_size = (size - offset > s->cluster_size ?
-               s->cluster_size : size - offset);
-       int ret;
-
-       c1 = modified_fat_get(s, c);
-
-       assert((size - offset == 0 && fat_eof(s, c)) ||
-               (size > offset && c >=2 && !fat_eof(s, c)));
-
-       ret = vvfat_read(s->bs, cluster2sector(s, c),
-           (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
-
-       if (ret < 0)
-           return ret;
-
-       if (write(fd, cluster, rest_size) < 0)
-           return -2;
-
-       offset += rest_size;
-       c = c1;
-    }
-
-    ftruncate(fd, size);
-    close(fd);
-
-    return commit_mappings(s, first_cluster, dir_index);
-}
-
-#ifdef DEBUG
-/* test, if all mappings point to valid direntries */
-static void check1(BDRVVVFATState* s)
-{
-    int i;
-    for (i = 0; i < s->mapping.next; i++) {
-       mapping_t* mapping = array_get(&(s->mapping), i);
-       if (mapping->mode & MODE_DELETED) {
-           fprintf(stderr, "deleted\n");
-           continue;
-       }
-       assert(mapping->dir_index >= 0);
-       assert(mapping->dir_index < s->directory.next);
-       direntry_t* direntry = array_get(&(s->directory), mapping->dir_index);
-       assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0);
-       if (mapping->mode & MODE_DIRECTORY) {
-           assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next);
-           assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0);
-       }
-    }
-}
-
-/* test, if all direntries have mappings */
-static void check2(BDRVVVFATState* s)
-{
-    int i;
-    int first_mapping = -1;
-
-    for (i = 0; i < s->directory.next; i++) {
-       direntry_t* direntry = array_get(&(s->directory), i);
-
-       if (is_short_name(direntry) && begin_of_direntry(direntry)) {
-           mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry));
-           assert(mapping);
-           assert(mapping->dir_index == i || is_dot(direntry));
-           assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry));
-       }
-
-       if ((i % (0x10 * s->sectors_per_cluster)) == 0) {
-           /* cluster start */
-           int j, count = 0;
-
-           for (j = 0; j < s->mapping.next; j++) {
-               mapping_t* mapping = array_get(&(s->mapping), j);
-               if (mapping->mode & MODE_DELETED)
-                   continue;
-               if (mapping->mode & MODE_DIRECTORY) {
-                   if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) {
-                       assert(++count == 1);
-                       if (mapping->first_mapping_index == -1)
-                           first_mapping = array_index(&(s->mapping), mapping);
-                       else
-                           assert(first_mapping == mapping->first_mapping_index);
-                       if (mapping->info.dir.parent_mapping_index < 0)
-                           assert(j == 0);
-                       else {
-                           mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index);
-                           assert(parent->mode & MODE_DIRECTORY);
-                           assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index);
-                       }
-                   }
-               }
-           }
-           if (count == 0)
-               first_mapping = -1;
-       }
-    }
-}
-#endif
-
-static int handle_renames_and_mkdirs(BDRVVVFATState* s)
-{
-    int i;
-
-#ifdef DEBUG
-    fprintf(stderr, "handle_renames\n");
-    for (i = 0; i < s->commits.next; i++) {
-       commit_t* commit = array_get(&(s->commits), i);
-       fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action);
-    }
-#endif
-
-    for (i = 0; i < s->commits.next;) {
-       commit_t* commit = array_get(&(s->commits), i);
-       if (commit->action == ACTION_RENAME) {
-           mapping_t* mapping = find_mapping_for_cluster(s,
-                   commit->param.rename.cluster);
-           char* old_path = mapping->path;
-
-           assert(commit->path);
-           mapping->path = commit->path;
-           if (rename(old_path, mapping->path))
-               return -2;
-
-           if (mapping->mode & MODE_DIRECTORY) {
-               int l1 = strlen(mapping->path);
-               int l2 = strlen(old_path);
-               int diff = l1 - l2;
-               direntry_t* direntry = array_get(&(s->directory),
-                       mapping->info.dir.first_dir_index);
-               uint32_t c = mapping->begin;
-               int i = 0;
-
-               /* recurse */
-               while (!fat_eof(s, c)) {
-                   do {
-                       direntry_t* d = direntry + i;
-
-                       if (is_file(d) || (is_directory(d) && !is_dot(d))) {
-                           mapping_t* m = find_mapping_for_cluster(s,
-                                   begin_of_direntry(d));
-                           int l = strlen(m->path);
-                           char* new_path = qemu_malloc(l + diff + 1);
-
-                           assert(!strncmp(m->path, mapping->path, l2));
-
-                            pstrcpy(new_path, l + diff + 1, mapping->path);
-                            pstrcpy(new_path + l1, l + diff + 1 - l1,
-                                    m->path + l2);
-
-                           schedule_rename(s, m->begin, new_path);
-                       }
-                       i++;
-                   } while((i % (0x10 * s->sectors_per_cluster)) != 0);
-                   c = fat_get(s, c);
-               }
-           }
-
-           free(old_path);
-           array_remove(&(s->commits), i);
-           continue;
-       } else if (commit->action == ACTION_MKDIR) {
-           mapping_t* mapping;
-           int j, parent_path_len;
-
-#ifdef __MINGW32__
-            if (mkdir(commit->path))
-                return -5;
-#else
-            if (mkdir(commit->path, 0755))
-                return -5;
-#endif
-
-           mapping = insert_mapping(s, commit->param.mkdir.cluster,
-                   commit->param.mkdir.cluster + 1);
-           if (mapping == NULL)
-               return -6;
-
-           mapping->mode = MODE_DIRECTORY;
-           mapping->read_only = 0;
-           mapping->path = commit->path;
-           j = s->directory.next;
-           assert(j);
-           insert_direntries(s, s->directory.next,
-                   0x10 * s->sectors_per_cluster);
-           mapping->info.dir.first_dir_index = j;
-
-           parent_path_len = strlen(commit->path)
-               - strlen(get_basename(commit->path)) - 1;
-           for (j = 0; j < s->mapping.next; j++) {
-               mapping_t* m = array_get(&(s->mapping), j);
-               if (m->first_mapping_index < 0 && m != mapping &&
-                       !strncmp(m->path, mapping->path, parent_path_len) &&
-                       strlen(m->path) == parent_path_len)
-                   break;
-           }
-           assert(j < s->mapping.next);
-           mapping->info.dir.parent_mapping_index = j;
-
-           array_remove(&(s->commits), i);
-           continue;
-       }
-
-       i++;
-    }
-    return 0;
-}
-
-/*
- * TODO: make sure that the short name is not matching *another* file
- */
-static int handle_commits(BDRVVVFATState* s)
-{
-    int i, fail = 0;
-
-    vvfat_close_current_file(s);
-
-    for (i = 0; !fail && i < s->commits.next; i++) {
-       commit_t* commit = array_get(&(s->commits), i);
-       switch(commit->action) {
-       case ACTION_RENAME: case ACTION_MKDIR:
-           assert(0);
-           fail = -2;
-           break;
-       case ACTION_WRITEOUT: {
-           direntry_t* entry = array_get(&(s->directory),
-                   commit->param.writeout.dir_index);
-           uint32_t begin = begin_of_direntry(entry);
-           mapping_t* mapping = find_mapping_for_cluster(s, begin);
-
-           assert(mapping);
-           assert(mapping->begin == begin);
-           assert(commit->path == NULL);
-
-           if (commit_one_file(s, commit->param.writeout.dir_index,
-                       commit->param.writeout.modified_offset))
-               fail = -3;
-
-           break;
-       }
-       case ACTION_NEW_FILE: {
-           int begin = commit->param.new_file.first_cluster;
-           mapping_t* mapping = find_mapping_for_cluster(s, begin);
-           direntry_t* entry;
-           int i;
-
-           /* find direntry */
-           for (i = 0; i < s->directory.next; i++) {
-               entry = array_get(&(s->directory), i);
-               if (is_file(entry) && begin_of_direntry(entry) == begin)
-                   break;
-           }
-
-           if (i >= s->directory.next) {
-               fail = -6;
-               continue;
-           }
-
-           /* make sure there exists an initial mapping */
-           if (mapping && mapping->begin != begin) {
-               mapping->end = begin;
-               mapping = NULL;
-           }
-           if (mapping == NULL) {
-               mapping = insert_mapping(s, begin, begin+1);
-           }
-           /* most members will be fixed in commit_mappings() */
-           assert(commit->path);
-           mapping->path = commit->path;
-           mapping->read_only = 0;
-           mapping->mode = MODE_NORMAL;
-           mapping->info.file.offset = 0;
-
-           if (commit_one_file(s, i, 0))
-               fail = -7;
-
-           break;
-       }
-       default:
-           assert(0);
-       }
-    }
-    if (i > 0 && array_remove_slice(&(s->commits), 0, i))
-       return -1;
-    return fail;
-}
-
-static int handle_deletes(BDRVVVFATState* s)
-{
-    int i, deferred = 1, deleted = 1;
-
-    /* delete files corresponding to mappings marked as deleted */
-    /* handle DELETEs and unused mappings (modified_fat_get(s, mapping->begin) == 0) */
-    while (deferred && deleted) {
-       deferred = 0;
-       deleted = 0;
-
-       for (i = 1; i < s->mapping.next; i++) {
-           mapping_t* mapping = array_get(&(s->mapping), i);
-           if (mapping->mode & MODE_DELETED) {
-               direntry_t* entry = array_get(&(s->directory),
-                       mapping->dir_index);
-
-               if (is_free(entry)) {
-                   /* remove file/directory */
-                   if (mapping->mode & MODE_DIRECTORY) {
-                       int j, next_dir_index = s->directory.next,
-                       first_dir_index = mapping->info.dir.first_dir_index;
-
-                       if (rmdir(mapping->path) < 0) {
-                           if (errno == ENOTEMPTY) {
-                               deferred++;
-                               continue;
-                           } else
-                               return -5;
-                       }
-
-                       for (j = 1; j < s->mapping.next; j++) {
-                           mapping_t* m = array_get(&(s->mapping), j);
-                           if (m->mode & MODE_DIRECTORY &&
-                                   m->info.dir.first_dir_index >
-                                   first_dir_index &&
-                                   m->info.dir.first_dir_index <
-                                   next_dir_index)
-                               next_dir_index =
-                                   m->info.dir.first_dir_index;
-                       }
-                       remove_direntries(s, first_dir_index,
-                               next_dir_index - first_dir_index);
-
-                       deleted++;
-                   }
-               } else {
-                   if (unlink(mapping->path))
-                       return -4;
-                   deleted++;
-               }
-               DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry));
-               remove_mapping(s, i);
-           }
-       }
-    }
-
-    return 0;
-}
-
-/*
- * synchronize mapping with new state:
- *
- * - copy FAT (with bdrv_read)
- * - mark all filenames corresponding to mappings as deleted
- * - recurse direntries from root (using bs->bdrv_read)
- * - delete files corresponding to mappings marked as deleted
- */
-static int do_commit(BDRVVVFATState* s)
-{
-    int ret = 0;
-
-    /* the real meat are the commits. Nothing to do? Move along! */
-    if (s->commits.next == 0)
-       return 0;
-
-    vvfat_close_current_file(s);
-
-    ret = handle_renames_and_mkdirs(s);
-    if (ret) {
-       fprintf(stderr, "Error handling renames (%d)\n", ret);
-       assert(0);
-       return ret;
-    }
-
-    /* copy FAT (with bdrv_read) */
-    memcpy(s->fat.pointer, s->fat2, 0x200 * s->sectors_per_fat);
-
-    /* recurse direntries from root (using bs->bdrv_read) */
-    ret = commit_direntries(s, 0, -1);
-    if (ret) {
-       fprintf(stderr, "Fatal: error while committing (%d)\n", ret);
-       assert(0);
-       return ret;
-    }
-
-    ret = handle_commits(s);
-    if (ret) {
-       fprintf(stderr, "Error handling commits (%d)\n", ret);
-       assert(0);
-       return ret;
-    }
-
-    ret = handle_deletes(s);
-    if (ret) {
-       fprintf(stderr, "Error deleting\n");
-        assert(0);
-       return ret;
-    }
-
-    s->qcow->drv->bdrv_make_empty(s->qcow);
-
-    memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
-
-DLOG(checkpoint());
-    return 0;
-}
-
-static int try_commit(BDRVVVFATState* s)
-{
-    vvfat_close_current_file(s);
-DLOG(checkpoint());
-    if(!is_consistent(s))
-       return -1;
-    return do_commit(s);
-}
-
-static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
-                    const uint8_t *buf, int nb_sectors)
-{
-    BDRVVVFATState *s = bs->opaque;
-    int i, ret;
-
-DLOG(checkpoint());
-
-    vvfat_close_current_file(s);
-
-    /*
-     * Some sanity checks:
-     * - do not allow writing to the boot sector
-     * - do not allow to write non-ASCII filenames
-     */
-
-    if (sector_num < s->first_sectors_number)
-       return -1;
-
-    for (i = sector2cluster(s, sector_num);
-           i <= sector2cluster(s, sector_num + nb_sectors - 1);) {
-       mapping_t* mapping = find_mapping_for_cluster(s, i);
-       if (mapping) {
-           if (mapping->read_only) {
-               fprintf(stderr, "Tried to write to write-protected file %s\n",
-                       mapping->path);
-               return -1;
-           }
-
-           if (mapping->mode & MODE_DIRECTORY) {
-               int begin = cluster2sector(s, i);
-               int end = begin + s->sectors_per_cluster, k;
-               int dir_index;
-               const direntry_t* direntries;
-               long_file_name lfn;
-
-               lfn_init(&lfn);
-
-               if (begin < sector_num)
-                   begin = sector_num;
-               if (end > sector_num + nb_sectors)
-                   end = sector_num + nb_sectors;
-               dir_index  = mapping->dir_index +
-                   0x10 * (begin - mapping->begin * s->sectors_per_cluster);
-               direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num));
-
-               for (k = 0; k < (end - begin) * 0x10; k++) {
-                   /* do not allow non-ASCII filenames */
-                   if (parse_long_name(&lfn, direntries + k) < 0) {
-                       fprintf(stderr, "Warning: non-ASCII filename\n");
-                       return -1;
-                   }
-                   /* no access to the direntry of a read-only file */
-                   else if (is_short_name(direntries+k) &&
-                           (direntries[k].attributes & 1)) {
-                       if (memcmp(direntries + k,
-                                   array_get(&(s->directory), dir_index + k),
-                                   sizeof(direntry_t))) {
-                           fprintf(stderr, "Warning: tried to write to write-protected file\n");
-                           return -1;
-                       }
-                   }
-               }
-           }
-           i = mapping->end;
-       } else
-           i++;
-    }
-
-    /*
-     * Use qcow backend. Commit later.
-     */
-DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors));
-    ret = s->qcow->drv->bdrv_write(s->qcow, sector_num, buf, nb_sectors);
-    if (ret < 0) {
-       fprintf(stderr, "Error writing to qcow backend\n");
-       return ret;
-    }
-
-    for (i = sector2cluster(s, sector_num);
-           i <= sector2cluster(s, sector_num + nb_sectors - 1); i++)
-       if (i >= 0)
-           s->used_clusters[i] |= USED_ALLOCATED;
-
-DLOG(checkpoint());
-    /* TODO: add timeout */
-    try_commit(s);
-
-DLOG(checkpoint());
-    return 0;
-}
-
-static int vvfat_is_allocated(BlockDriverState *bs,
-       int64_t sector_num, int nb_sectors, int* n)
-{
-    BDRVVVFATState* s = bs->opaque;
-    *n = s->sector_count - sector_num;
-    if (*n > nb_sectors)
-       *n = nb_sectors;
-    else if (*n < 0)
-       return 0;
-    return 1;
-}
-
-static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
-       const uint8_t* buffer, int nb_sectors) {
-    BDRVVVFATState* s = bs->opaque;
-    return try_commit(s);
-}
-
-static void write_target_close(BlockDriverState *bs) {
-    BDRVVVFATState* s = bs->opaque;
-    bdrv_delete(s->qcow);
-    free(s->qcow_filename);
-}
-
-static BlockDriver vvfat_write_target = {
-    "vvfat_write_target", 0, NULL, NULL, NULL,
-    write_target_commit,
-    write_target_close,
-    NULL, NULL, NULL
-};
-
-static int enable_write_target(BDRVVVFATState *s)
-{
-    int size = sector2cluster(s, s->sector_count);
-    s->used_clusters = calloc(size, 1);
-
-    array_init(&(s->commits), sizeof(commit_t));
-
-    s->qcow_filename = qemu_malloc(1024);
-    get_tmp_filename(s->qcow_filename, 1024);
-    if (bdrv_create(bdrv_find_format("qcow"),
-               s->qcow_filename, s->sector_count, "fat:", 0) < 0)
-       return -1;
-    s->qcow = bdrv_new("");
-    if (s->qcow == NULL || bdrv_open(s->qcow, s->qcow_filename, 0) < 0)
-       return -1;
-
-#ifndef _WIN32
-    unlink(s->qcow_filename);
-#endif
-
-    s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
-    s->bs->backing_hd->drv = &vvfat_write_target;
-    s->bs->backing_hd->opaque = s;
-
-    return 0;
-}
-
-static void vvfat_close(BlockDriverState *bs)
-{
-    BDRVVVFATState *s = bs->opaque;
-
-    vvfat_close_current_file(s);
-    array_free(&(s->fat));
-    array_free(&(s->directory));
-    array_free(&(s->mapping));
-    if(s->cluster_buffer)
-        free(s->cluster_buffer);
-}
-
-static BlockDriver bdrv_vvfat = {
-    .format_name       = "vvfat",
-    .instance_size     = sizeof(BDRVVVFATState),
-    .bdrv_open         = vvfat_open,
-    .bdrv_read         = vvfat_read,
-    .bdrv_write                = vvfat_write,
-    .bdrv_close                = vvfat_close,
-    .bdrv_is_allocated = vvfat_is_allocated,
-    .protocol_name     = "fat",
-};
-
-static void bdrv_vvfat_init(void)
-{
-    bdrv_register(&bdrv_vvfat);
-}
-
-block_init(bdrv_vvfat_init);
-
-#ifdef DEBUG
-static void checkpoint(void) {
-    assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2);
-    check1(vvv);
-    check2(vvv);
-    assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY));
-#if 0
-    if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf)
-       fprintf(stderr, "Nonono!\n");
-    mapping_t* mapping;
-    direntry_t* direntry;
-    assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next);
-    assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next);
-    if (vvv->mapping.next<47)
-       return;
-    assert((mapping = array_get(&(vvv->mapping), 47)));
-    assert(mapping->dir_index < vvv->directory.next);
-    direntry = array_get(&(vvv->directory), mapping->dir_index);
-    assert(!memcmp(direntry->name, "USB     H  ", 11) || direntry->name[0]==0);
-#endif
-    return;
-    /* avoid compiler warnings: */
-    hexdump(NULL, 100);
-    remove_mapping(vvv, NULL);
-    print_mapping(NULL);
-    print_direntry(NULL);
-}
-#endif
diff --git a/block/bochs.c b/block/bochs.c
new file mode 100644 (file)
index 0000000..bac81c4
--- /dev/null
@@ -0,0 +1,259 @@
+/*
+ * Block driver for the various disk image formats used by Bochs
+ * Currently only for "growing" type in read-only mode
+ *
+ * Copyright (c) 2005 Alex Beregszaszi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+/**************************************************************/
+
+#define HEADER_MAGIC "Bochs Virtual HD Image"
+#define HEADER_VERSION 0x00020000
+#define HEADER_V1 0x00010000
+#define HEADER_SIZE 512
+
+#define REDOLOG_TYPE "Redolog"
+#define GROWING_TYPE "Growing"
+
+// not allocated: 0xffffffff
+
+// always little-endian
+struct bochs_header_v1 {
+    char magic[32]; // "Bochs Virtual HD Image"
+    char type[16]; // "Redolog"
+    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
+    uint32_t version;
+    uint32_t header; // size of header
+
+    union {
+       struct {
+           uint32_t catalog; // num of entries
+           uint32_t bitmap; // bitmap size
+           uint32_t extent; // extent size
+           uint64_t disk; // disk size
+           char padding[HEADER_SIZE - 64 - 8 - 20];
+       } redolog;
+       char padding[HEADER_SIZE - 64 - 8];
+    } extra;
+};
+
+// always little-endian
+struct bochs_header {
+    char magic[32]; // "Bochs Virtual HD Image"
+    char type[16]; // "Redolog"
+    char subtype[16]; // "Undoable" / "Volatile" / "Growing"
+    uint32_t version;
+    uint32_t header; // size of header
+
+    union {
+       struct {
+           uint32_t catalog; // num of entries
+           uint32_t bitmap; // bitmap size
+           uint32_t extent; // extent size
+           uint32_t reserved; // for ???
+           uint64_t disk; // disk size
+           char padding[HEADER_SIZE - 64 - 8 - 24];
+       } redolog;
+       char padding[HEADER_SIZE - 64 - 8];
+    } extra;
+};
+
+typedef struct BDRVBochsState {
+    int fd;
+
+    uint32_t *catalog_bitmap;
+    int catalog_size;
+
+    int data_offset;
+
+    int bitmap_blocks;
+    int extent_blocks;
+    int extent_size;
+} BDRVBochsState;
+
+static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const struct bochs_header *bochs = (const void *)buf;
+
+    if (buf_size < HEADER_SIZE)
+       return 0;
+
+    if (!strcmp(bochs->magic, HEADER_MAGIC) &&
+       !strcmp(bochs->type, REDOLOG_TYPE) &&
+       !strcmp(bochs->subtype, GROWING_TYPE) &&
+       ((le32_to_cpu(bochs->version) == HEADER_VERSION) ||
+       (le32_to_cpu(bochs->version) == HEADER_V1)))
+       return 100;
+
+    return 0;
+}
+
+static int bochs_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVBochsState *s = bs->opaque;
+    int fd, i;
+    struct bochs_header bochs;
+    struct bochs_header_v1 header_v1;
+
+    fd = open(filename, O_RDWR | O_BINARY);
+    if (fd < 0) {
+        fd = open(filename, O_RDONLY | O_BINARY);
+        if (fd < 0)
+            return -1;
+    }
+
+    bs->read_only = 1; // no write support yet
+
+    s->fd = fd;
+
+    if (read(fd, &bochs, sizeof(bochs)) != sizeof(bochs)) {
+        goto fail;
+    }
+
+    if (strcmp(bochs.magic, HEADER_MAGIC) ||
+        strcmp(bochs.type, REDOLOG_TYPE) ||
+        strcmp(bochs.subtype, GROWING_TYPE) ||
+       ((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
+       (le32_to_cpu(bochs.version) != HEADER_V1))) {
+        goto fail;
+    }
+
+    if (le32_to_cpu(bochs.version) == HEADER_V1) {
+      memcpy(&header_v1, &bochs, sizeof(bochs));
+      bs->total_sectors = le64_to_cpu(header_v1.extra.redolog.disk) / 512;
+    } else {
+      bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
+    }
+
+    lseek(s->fd, le32_to_cpu(bochs.header), SEEK_SET);
+
+    s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
+    s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
+    if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
+       s->catalog_size * 4)
+       goto fail;
+    for (i = 0; i < s->catalog_size; i++)
+       le32_to_cpus(&s->catalog_bitmap[i]);
+
+    s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4);
+
+    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.bitmap) - 1) / 512;
+    s->extent_blocks = 1 + (le32_to_cpu(bochs.extra.redolog.extent) - 1) / 512;
+
+    s->extent_size = le32_to_cpu(bochs.extra.redolog.extent);
+
+    return 0;
+ fail:
+    close(fd);
+    return -1;
+}
+
+static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+{
+    BDRVBochsState *s = bs->opaque;
+    int64_t offset = sector_num * 512;
+    int64_t extent_index, extent_offset, bitmap_offset, block_offset;
+    char bitmap_entry;
+
+    // seek to sector
+    extent_index = offset / s->extent_size;
+    extent_offset = (offset % s->extent_size) / 512;
+
+    if (s->catalog_bitmap[extent_index] == 0xffffffff)
+    {
+//     fprintf(stderr, "page not allocated [%x - %x:%x]\n",
+//         sector_num, extent_index, extent_offset);
+       return -1; // not allocated
+    }
+
+    bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
+       (s->extent_blocks + s->bitmap_blocks));
+    block_offset = bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
+
+//    fprintf(stderr, "sect: %x [ext i: %x o: %x] -> %x bitmap: %x block: %x\n",
+//     sector_num, extent_index, extent_offset,
+//     le32_to_cpu(s->catalog_bitmap[extent_index]),
+//     bitmap_offset, block_offset);
+
+    // read in bitmap for current extent
+    lseek(s->fd, bitmap_offset + (extent_offset / 8), SEEK_SET);
+
+    read(s->fd, &bitmap_entry, 1);
+
+    if (!((bitmap_entry >> (extent_offset % 8)) & 1))
+    {
+//     fprintf(stderr, "sector (%x) in bitmap not allocated\n",
+//         sector_num);
+       return -1; // not allocated
+    }
+
+    lseek(s->fd, block_offset, SEEK_SET);
+
+    return 0;
+}
+
+static int bochs_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVBochsState *s = bs->opaque;
+    int ret;
+
+    while (nb_sectors > 0) {
+       if (!seek_to_sector(bs, sector_num))
+       {
+           ret = read(s->fd, buf, 512);
+           if (ret != 512)
+               return -1;
+       }
+       else
+            memset(buf, 0, 512);
+        nb_sectors--;
+        sector_num++;
+        buf += 512;
+    }
+    return 0;
+}
+
+static void bochs_close(BlockDriverState *bs)
+{
+    BDRVBochsState *s = bs->opaque;
+    qemu_free(s->catalog_bitmap);
+    close(s->fd);
+}
+
+static BlockDriver bdrv_bochs = {
+    .format_name       = "bochs",
+    .instance_size     = sizeof(BDRVBochsState),
+    .bdrv_probe                = bochs_probe,
+    .bdrv_open         = bochs_open,
+    .bdrv_read         = bochs_read,
+    .bdrv_close                = bochs_close,
+};
+
+static void bdrv_bochs_init(void)
+{
+    bdrv_register(&bdrv_bochs);
+}
+
+block_init(bdrv_bochs_init);
diff --git a/block/cloop.c b/block/cloop.c
new file mode 100644 (file)
index 0000000..06c687e
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * QEMU Block driver for CLOOP images
+ *
+ * Copyright (c) 2004 Johannes E. Schindelin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+#include <zlib.h>
+
+typedef struct BDRVCloopState {
+    int fd;
+    uint32_t block_size;
+    uint32_t n_blocks;
+    uint64_t* offsets;
+    uint32_t sectors_per_block;
+    uint32_t current_block;
+    uint8_t *compressed_block;
+    uint8_t *uncompressed_block;
+    z_stream zstream;
+} BDRVCloopState;
+
+static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const char* magic_version_2_0="#!/bin/sh\n"
+       "#V2.0 Format\n"
+       "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n";
+    int length=strlen(magic_version_2_0);
+    if(length>buf_size)
+       length=buf_size;
+    if(!memcmp(magic_version_2_0,buf,length))
+       return 2;
+    return 0;
+}
+
+static int cloop_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVCloopState *s = bs->opaque;
+    uint32_t offsets_size,max_compressed_block_size=1,i;
+
+    s->fd = open(filename, O_RDONLY | O_BINARY);
+    if (s->fd < 0)
+        return -errno;
+    bs->read_only = 1;
+
+    /* read header */
+    if(lseek(s->fd,128,SEEK_SET)<0) {
+cloop_close:
+       close(s->fd);
+       return -1;
+    }
+    if(read(s->fd,&s->block_size,4)<4)
+       goto cloop_close;
+    s->block_size=be32_to_cpu(s->block_size);
+    if(read(s->fd,&s->n_blocks,4)<4)
+       goto cloop_close;
+    s->n_blocks=be32_to_cpu(s->n_blocks);
+
+    /* read offsets */
+    offsets_size=s->n_blocks*sizeof(uint64_t);
+    s->offsets=(uint64_t*)qemu_malloc(offsets_size);
+    if(read(s->fd,s->offsets,offsets_size)<offsets_size)
+       goto cloop_close;
+    for(i=0;i<s->n_blocks;i++) {
+       s->offsets[i]=be64_to_cpu(s->offsets[i]);
+       if(i>0) {
+           uint32_t size=s->offsets[i]-s->offsets[i-1];
+           if(size>max_compressed_block_size)
+               max_compressed_block_size=size;
+       }
+    }
+
+    /* initialize zlib engine */
+    s->compressed_block = qemu_malloc(max_compressed_block_size+1);
+    s->uncompressed_block = qemu_malloc(s->block_size);
+    if(inflateInit(&s->zstream) != Z_OK)
+       goto cloop_close;
+    s->current_block=s->n_blocks;
+
+    s->sectors_per_block = s->block_size/512;
+    bs->total_sectors = s->n_blocks*s->sectors_per_block;
+    return 0;
+}
+
+static inline int cloop_read_block(BDRVCloopState *s,int block_num)
+{
+    if(s->current_block != block_num) {
+       int ret;
+        uint32_t bytes = s->offsets[block_num+1]-s->offsets[block_num];
+
+       lseek(s->fd, s->offsets[block_num], SEEK_SET);
+        ret = read(s->fd, s->compressed_block, bytes);
+        if (ret != bytes)
+            return -1;
+
+       s->zstream.next_in = s->compressed_block;
+       s->zstream.avail_in = bytes;
+       s->zstream.next_out = s->uncompressed_block;
+       s->zstream.avail_out = s->block_size;
+       ret = inflateReset(&s->zstream);
+       if(ret != Z_OK)
+           return -1;
+       ret = inflate(&s->zstream, Z_FINISH);
+       if(ret != Z_STREAM_END || s->zstream.total_out != s->block_size)
+           return -1;
+
+       s->current_block = block_num;
+    }
+    return 0;
+}
+
+static int cloop_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVCloopState *s = bs->opaque;
+    int i;
+
+    for(i=0;i<nb_sectors;i++) {
+       uint32_t sector_offset_in_block=((sector_num+i)%s->sectors_per_block),
+           block_num=(sector_num+i)/s->sectors_per_block;
+       if(cloop_read_block(s, block_num) != 0)
+           return -1;
+       memcpy(buf+i*512,s->uncompressed_block+sector_offset_in_block*512,512);
+    }
+    return 0;
+}
+
+static void cloop_close(BlockDriverState *bs)
+{
+    BDRVCloopState *s = bs->opaque;
+    close(s->fd);
+    if(s->n_blocks>0)
+       free(s->offsets);
+    free(s->compressed_block);
+    free(s->uncompressed_block);
+    inflateEnd(&s->zstream);
+}
+
+static BlockDriver bdrv_cloop = {
+    .format_name       = "cloop",
+    .instance_size     = sizeof(BDRVCloopState),
+    .bdrv_probe                = cloop_probe,
+    .bdrv_open         = cloop_open,
+    .bdrv_read         = cloop_read,
+    .bdrv_close                = cloop_close,
+};
+
+static void bdrv_cloop_init(void)
+{
+    bdrv_register(&bdrv_cloop);
+}
+
+block_init(bdrv_cloop_init);
diff --git a/block/cow.c b/block/cow.c
new file mode 100644 (file)
index 0000000..94b3549
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * Block driver for the COW format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef _WIN32
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+#include <sys/mman.h>
+
+/**************************************************************/
+/* COW block driver using file system holes */
+
+/* user mode linux compatible COW file */
+#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
+#define COW_VERSION 2
+
+struct cow_header_v2 {
+    uint32_t magic;
+    uint32_t version;
+    char backing_file[1024];
+    int32_t mtime;
+    uint64_t size;
+    uint32_t sectorsize;
+};
+
+typedef struct BDRVCowState {
+    int fd;
+    uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */
+    uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */
+    int cow_bitmap_size;
+    int64_t cow_sectors_offset;
+} BDRVCowState;
+
+static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const struct cow_header_v2 *cow_header = (const void *)buf;
+
+    if (buf_size >= sizeof(struct cow_header_v2) &&
+        be32_to_cpu(cow_header->magic) == COW_MAGIC &&
+        be32_to_cpu(cow_header->version) == COW_VERSION)
+        return 100;
+    else
+        return 0;
+}
+
+static int cow_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVCowState *s = bs->opaque;
+    int fd;
+    struct cow_header_v2 cow_header;
+    int64_t size;
+
+    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
+    if (fd < 0) {
+        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+        if (fd < 0)
+            return -1;
+    }
+    s->fd = fd;
+    /* see if it is a cow image */
+    if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) {
+        goto fail;
+    }
+
+    if (be32_to_cpu(cow_header.magic) != COW_MAGIC ||
+        be32_to_cpu(cow_header.version) != COW_VERSION) {
+        goto fail;
+    }
+
+    /* cow image found */
+    size = be64_to_cpu(cow_header.size);
+    bs->total_sectors = size / 512;
+
+    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
+            cow_header.backing_file);
+
+    /* mmap the bitmap */
+    s->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
+    s->cow_bitmap_addr = (void *)mmap(get_mmap_addr(s->cow_bitmap_size),
+                                      s->cow_bitmap_size,
+                                      PROT_READ | PROT_WRITE,
+                                      MAP_SHARED, s->fd, 0);
+    if (s->cow_bitmap_addr == MAP_FAILED)
+        goto fail;
+    s->cow_bitmap = s->cow_bitmap_addr + sizeof(cow_header);
+    s->cow_sectors_offset = (s->cow_bitmap_size + 511) & ~511;
+    return 0;
+ fail:
+    close(fd);
+    return -1;
+}
+
+static inline void cow_set_bit(uint8_t *bitmap, int64_t bitnum)
+{
+    bitmap[bitnum / 8] |= (1 << (bitnum%8));
+}
+
+static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum)
+{
+    return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
+}
+
+
+/* Return true if first block has been changed (ie. current version is
+ * in COW file).  Set the number of continuous blocks for which that
+ * is true. */
+static inline int is_changed(uint8_t *bitmap,
+                             int64_t sector_num, int nb_sectors,
+                             int *num_same)
+{
+    int changed;
+
+    if (!bitmap || nb_sectors == 0) {
+       *num_same = nb_sectors;
+       return 0;
+    }
+
+    changed = is_bit_set(bitmap, sector_num);
+    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
+       if (is_bit_set(bitmap, sector_num + *num_same) != changed)
+           break;
+    }
+
+    return changed;
+}
+
+static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
+                            int nb_sectors, int *pnum)
+{
+    BDRVCowState *s = bs->opaque;
+    return is_changed(s->cow_bitmap, sector_num, nb_sectors, pnum);
+}
+
+static int cow_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret, n;
+
+    while (nb_sectors > 0) {
+        if (is_changed(s->cow_bitmap, sector_num, nb_sectors, &n)) {
+            lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
+            ret = read(s->fd, buf, n * 512);
+            if (ret != n * 512)
+                return -1;
+        } else {
+            if (bs->backing_hd) {
+                /* read from the base image */
+                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                if (ret < 0)
+                    return -1;
+            } else {
+            memset(buf, 0, n * 512);
+        }
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    return 0;
+}
+
+static int cow_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVCowState *s = bs->opaque;
+    int ret, i;
+
+    lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
+    ret = write(s->fd, buf, nb_sectors * 512);
+    if (ret != nb_sectors * 512)
+        return -1;
+    for (i = 0; i < nb_sectors; i++)
+        cow_set_bit(s->cow_bitmap, sector_num + i);
+    return 0;
+}
+
+static void cow_close(BlockDriverState *bs)
+{
+    BDRVCowState *s = bs->opaque;
+    munmap((void *)s->cow_bitmap_addr, s->cow_bitmap_size);
+    close(s->fd);
+}
+
+static int cow_create(const char *filename, int64_t image_sectors,
+                      const char *image_filename, int flags)
+{
+    int fd, cow_fd;
+    struct cow_header_v2 cow_header;
+    struct stat st;
+
+    if (flags)
+        return -ENOTSUP;
+
+    cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+              0644);
+    if (cow_fd < 0)
+        return -1;
+    memset(&cow_header, 0, sizeof(cow_header));
+    cow_header.magic = cpu_to_be32(COW_MAGIC);
+    cow_header.version = cpu_to_be32(COW_VERSION);
+    if (image_filename) {
+        /* Note: if no file, we put a dummy mtime */
+        cow_header.mtime = cpu_to_be32(0);
+
+        fd = open(image_filename, O_RDONLY | O_BINARY);
+        if (fd < 0) {
+            close(cow_fd);
+            goto mtime_fail;
+        }
+        if (fstat(fd, &st) != 0) {
+            close(fd);
+            goto mtime_fail;
+        }
+        close(fd);
+        cow_header.mtime = cpu_to_be32(st.st_mtime);
+    mtime_fail:
+        pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
+                image_filename);
+    }
+    cow_header.sectorsize = cpu_to_be32(512);
+    cow_header.size = cpu_to_be64(image_sectors * 512);
+    write(cow_fd, &cow_header, sizeof(cow_header));
+    /* resize to include at least all the bitmap */
+    ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
+    close(cow_fd);
+    return 0;
+}
+
+static void cow_flush(BlockDriverState *bs)
+{
+    BDRVCowState *s = bs->opaque;
+    fsync(s->fd);
+}
+
+static BlockDriver bdrv_cow = {
+    .format_name       = "cow",
+    .instance_size     = sizeof(BDRVCowState),
+    .bdrv_probe                = cow_probe,
+    .bdrv_open         = cow_open,
+    .bdrv_read         = cow_read,
+    .bdrv_write                = cow_write,
+    .bdrv_close                = cow_close,
+    .bdrv_create       = cow_create,
+    .bdrv_flush                = cow_flush,
+    .bdrv_is_allocated = cow_is_allocated,
+};
+
+static void bdrv_cow_init(void)
+{
+    bdrv_register(&bdrv_cow);
+}
+
+block_init(bdrv_cow_init);
+#endif
diff --git a/block/dmg.c b/block/dmg.c
new file mode 100644 (file)
index 0000000..262560f
--- /dev/null
@@ -0,0 +1,301 @@
+/*
+ * QEMU Block driver for DMG images
+ *
+ * Copyright (c) 2004 Johannes E. Schindelin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include "bswap.h"
+#include "module.h"
+#include <zlib.h>
+
+typedef struct BDRVDMGState {
+    int fd;
+
+    /* each chunk contains a certain number of sectors,
+     * offsets[i] is the offset in the .dmg file,
+     * lengths[i] is the length of the compressed chunk,
+     * sectors[i] is the sector beginning at offsets[i],
+     * sectorcounts[i] is the number of sectors in that chunk,
+     * the sectors array is ordered
+     * 0<=i<n_chunks */
+
+    uint32_t n_chunks;
+    uint32_t* types;
+    uint64_t* offsets;
+    uint64_t* lengths;
+    uint64_t* sectors;
+    uint64_t* sectorcounts;
+    uint32_t current_chunk;
+    uint8_t *compressed_chunk;
+    uint8_t *uncompressed_chunk;
+    z_stream zstream;
+} BDRVDMGState;
+
+static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    int len=strlen(filename);
+    if(len>4 && !strcmp(filename+len-4,".dmg"))
+       return 2;
+    return 0;
+}
+
+static off_t read_off(int fd)
+{
+       uint64_t buffer;
+       if(read(fd,&buffer,8)<8)
+               return 0;
+       return be64_to_cpu(buffer);
+}
+
+static off_t read_uint32(int fd)
+{
+       uint32_t buffer;
+       if(read(fd,&buffer,4)<4)
+               return 0;
+       return be32_to_cpu(buffer);
+}
+
+static int dmg_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVDMGState *s = bs->opaque;
+    off_t info_begin,info_end,last_in_offset,last_out_offset;
+    uint32_t count;
+    uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
+
+    s->fd = open(filename, O_RDONLY | O_BINARY);
+    if (s->fd < 0)
+        return -errno;
+    bs->read_only = 1;
+    s->n_chunks = 0;
+    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
+
+    /* read offset of info blocks */
+    if(lseek(s->fd,-0x1d8,SEEK_END)<0) {
+dmg_close:
+       close(s->fd);
+       /* open raw instead */
+       bs->drv=bdrv_find_format("raw");
+       return bs->drv->bdrv_open(bs, filename, flags);
+    }
+    info_begin=read_off(s->fd);
+    if(info_begin==0)
+       goto dmg_close;
+    if(lseek(s->fd,info_begin,SEEK_SET)<0)
+       goto dmg_close;
+    if(read_uint32(s->fd)!=0x100)
+       goto dmg_close;
+    if((count = read_uint32(s->fd))==0)
+       goto dmg_close;
+    info_end = info_begin+count;
+    if(lseek(s->fd,0xf8,SEEK_CUR)<0)
+       goto dmg_close;
+
+    /* read offsets */
+    last_in_offset = last_out_offset = 0;
+    while(lseek(s->fd,0,SEEK_CUR)<info_end) {
+        uint32_t type;
+
+       count = read_uint32(s->fd);
+       if(count==0)
+           goto dmg_close;
+       type = read_uint32(s->fd);
+       if(type!=0x6d697368 || count<244)
+           lseek(s->fd,count-4,SEEK_CUR);
+       else {
+           int new_size, chunk_count;
+           if(lseek(s->fd,200,SEEK_CUR)<0)
+               goto dmg_close;
+           chunk_count = (count-204)/40;
+           new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+           s->types = qemu_realloc(s->types, new_size/2);
+           s->offsets = qemu_realloc(s->offsets, new_size);
+           s->lengths = qemu_realloc(s->lengths, new_size);
+           s->sectors = qemu_realloc(s->sectors, new_size);
+           s->sectorcounts = qemu_realloc(s->sectorcounts, new_size);
+
+           for(i=s->n_chunks;i<s->n_chunks+chunk_count;i++) {
+               s->types[i] = read_uint32(s->fd);
+               if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) {
+                   if(s->types[i]==0xffffffff) {
+                       last_in_offset = s->offsets[i-1]+s->lengths[i-1];
+                       last_out_offset = s->sectors[i-1]+s->sectorcounts[i-1];
+                   }
+                   chunk_count--;
+                   i--;
+                   if(lseek(s->fd,36,SEEK_CUR)<0)
+                       goto dmg_close;
+                   continue;
+               }
+               read_uint32(s->fd);
+               s->sectors[i] = last_out_offset+read_off(s->fd);
+               s->sectorcounts[i] = read_off(s->fd);
+               s->offsets[i] = last_in_offset+read_off(s->fd);
+               s->lengths[i] = read_off(s->fd);
+               if(s->lengths[i]>max_compressed_size)
+                   max_compressed_size = s->lengths[i];
+               if(s->sectorcounts[i]>max_sectors_per_chunk)
+                   max_sectors_per_chunk = s->sectorcounts[i];
+           }
+           s->n_chunks+=chunk_count;
+       }
+    }
+
+    /* initialize zlib engine */
+    s->compressed_chunk = qemu_malloc(max_compressed_size+1);
+    s->uncompressed_chunk = qemu_malloc(512*max_sectors_per_chunk);
+    if(inflateInit(&s->zstream) != Z_OK)
+       goto dmg_close;
+
+    s->current_chunk = s->n_chunks;
+
+    return 0;
+}
+
+static inline int is_sector_in_chunk(BDRVDMGState* s,
+               uint32_t chunk_num,int sector_num)
+{
+    if(chunk_num>=s->n_chunks || s->sectors[chunk_num]>sector_num ||
+           s->sectors[chunk_num]+s->sectorcounts[chunk_num]<=sector_num)
+       return 0;
+    else
+       return -1;
+}
+
+static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num)
+{
+    /* binary search */
+    uint32_t chunk1=0,chunk2=s->n_chunks,chunk3;
+    while(chunk1!=chunk2) {
+       chunk3 = (chunk1+chunk2)/2;
+       if(s->sectors[chunk3]>sector_num)
+           chunk2 = chunk3;
+       else if(s->sectors[chunk3]+s->sectorcounts[chunk3]>sector_num)
+           return chunk3;
+       else
+           chunk1 = chunk3;
+    }
+    return s->n_chunks; /* error */
+}
+
+static inline int dmg_read_chunk(BDRVDMGState *s,int sector_num)
+{
+    if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) {
+       int ret;
+       uint32_t chunk = search_chunk(s,sector_num);
+
+       if(chunk>=s->n_chunks)
+           return -1;
+
+       s->current_chunk = s->n_chunks;
+       switch(s->types[chunk]) {
+       case 0x80000005: { /* zlib compressed */
+           int i;
+
+           ret = lseek(s->fd, s->offsets[chunk], SEEK_SET);
+           if(ret<0)
+               return -1;
+
+           /* we need to buffer, because only the chunk as whole can be
+            * inflated. */
+           i=0;
+           do {
+               ret = read(s->fd, s->compressed_chunk+i, s->lengths[chunk]-i);
+               if(ret<0 && errno==EINTR)
+                   ret=0;
+               i+=ret;
+           } while(ret>=0 && ret+i<s->lengths[chunk]);
+
+           if (ret != s->lengths[chunk])
+               return -1;
+
+           s->zstream.next_in = s->compressed_chunk;
+           s->zstream.avail_in = s->lengths[chunk];
+           s->zstream.next_out = s->uncompressed_chunk;
+           s->zstream.avail_out = 512*s->sectorcounts[chunk];
+           ret = inflateReset(&s->zstream);
+           if(ret != Z_OK)
+               return -1;
+           ret = inflate(&s->zstream, Z_FINISH);
+           if(ret != Z_STREAM_END || s->zstream.total_out != 512*s->sectorcounts[chunk])
+               return -1;
+           break; }
+       case 1: /* copy */
+           ret = read(s->fd, s->uncompressed_chunk, s->lengths[chunk]);
+           if (ret != s->lengths[chunk])
+               return -1;
+           break;
+       case 2: /* zero */
+           memset(s->uncompressed_chunk, 0, 512*s->sectorcounts[chunk]);
+           break;
+       }
+       s->current_chunk = chunk;
+    }
+    return 0;
+}
+
+static int dmg_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVDMGState *s = bs->opaque;
+    int i;
+
+    for(i=0;i<nb_sectors;i++) {
+       uint32_t sector_offset_in_chunk;
+       if(dmg_read_chunk(s, sector_num+i) != 0)
+           return -1;
+       sector_offset_in_chunk = sector_num+i-s->sectors[s->current_chunk];
+       memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512);
+    }
+    return 0;
+}
+
+static void dmg_close(BlockDriverState *bs)
+{
+    BDRVDMGState *s = bs->opaque;
+    close(s->fd);
+    if(s->n_chunks>0) {
+       free(s->types);
+       free(s->offsets);
+       free(s->lengths);
+       free(s->sectors);
+       free(s->sectorcounts);
+    }
+    free(s->compressed_chunk);
+    free(s->uncompressed_chunk);
+    inflateEnd(&s->zstream);
+}
+
+static BlockDriver bdrv_dmg = {
+    .format_name       = "dmg",
+    .instance_size     = sizeof(BDRVDMGState),
+    .bdrv_probe                = dmg_probe,
+    .bdrv_open         = dmg_open,
+    .bdrv_read         = dmg_read,
+    .bdrv_close                = dmg_close,
+};
+
+static void bdrv_dmg_init(void)
+{
+    bdrv_register(&bdrv_dmg);
+}
+
+block_init(bdrv_dmg_init);
diff --git a/block/nbd.c b/block/nbd.c
new file mode 100644 (file)
index 0000000..47d4778
--- /dev/null
@@ -0,0 +1,196 @@
+/*
+ * QEMU Block driver for  NBD
+ *
+ * Copyright (C) 2008 Bull S.A.S.
+ *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
+ *
+ * Some parts:
+ *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "nbd.h"
+#include "module.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+
+typedef struct BDRVNBDState {
+    int sock;
+    off_t size;
+    size_t blocksize;
+} BDRVNBDState;
+
+static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
+{
+    BDRVNBDState *s = bs->opaque;
+    const char *host;
+    const char *unixpath;
+    int sock;
+    off_t size;
+    size_t blocksize;
+    int ret;
+
+    if ((flags & BDRV_O_CREAT))
+        return -EINVAL;
+
+    if (!strstart(filename, "nbd:", &host))
+        return -EINVAL;
+
+    if (strstart(host, "unix:", &unixpath)) {
+
+        if (unixpath[0] != '/')
+            return -EINVAL;
+
+        sock = unix_socket_outgoing(unixpath);
+
+    } else {
+        uint16_t port;
+        char *p, *r;
+        char hostname[128];
+
+        pstrcpy(hostname, 128, host);
+
+        p = strchr(hostname, ':');
+        if (p == NULL)
+            return -EINVAL;
+
+        *p = '\0';
+        p++;
+
+        port = strtol(p, &r, 0);
+        if (r == p)
+            return -EINVAL;
+        sock = tcp_socket_outgoing(hostname, port);
+    }
+
+    if (sock == -1)
+        return -errno;
+
+    ret = nbd_receive_negotiate(sock, &size, &blocksize);
+    if (ret == -1)
+        return -errno;
+
+    s->sock = sock;
+    s->size = size;
+    s->blocksize = blocksize;
+
+    return 0;
+}
+
+static int nbd_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+
+    request.type = NBD_CMD_READ;
+    request.handle = (uint64_t)(intptr_t)bs;
+    request.from = sector_num * 512;;
+    request.len = nb_sectors * 512;
+
+    if (nbd_send_request(s->sock, &request) == -1)
+        return -errno;
+
+    if (nbd_receive_reply(s->sock, &reply) == -1)
+        return -errno;
+
+    if (reply.error !=0)
+        return -reply.error;
+
+    if (reply.handle != request.handle)
+        return -EIO;
+
+    if (nbd_wr_sync(s->sock, buf, request.len, 1) != request.len)
+        return -EIO;
+
+    return 0;
+}
+
+static int nbd_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+    struct nbd_reply reply;
+
+    request.type = NBD_CMD_WRITE;
+    request.handle = (uint64_t)(intptr_t)bs;
+    request.from = sector_num * 512;;
+    request.len = nb_sectors * 512;
+
+    if (nbd_send_request(s->sock, &request) == -1)
+        return -errno;
+
+    if (nbd_wr_sync(s->sock, (uint8_t*)buf, request.len, 0) != request.len)
+        return -EIO;
+
+    if (nbd_receive_reply(s->sock, &reply) == -1)
+        return -errno;
+
+    if (reply.error !=0)
+        return -reply.error;
+
+    if (reply.handle != request.handle)
+        return -EIO;
+
+    return 0;
+}
+
+static void nbd_close(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+    struct nbd_request request;
+
+    request.type = NBD_CMD_DISC;
+    request.handle = (uint64_t)(intptr_t)bs;
+    request.from = 0;
+    request.len = 0;
+    nbd_send_request(s->sock, &request);
+
+    close(s->sock);
+}
+
+static int64_t nbd_getlength(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    return s->size;
+}
+
+static BlockDriver bdrv_nbd = {
+    .format_name       = "nbd",
+    .instance_size     = sizeof(BDRVNBDState),
+    .bdrv_open         = nbd_open,
+    .bdrv_read         = nbd_read,
+    .bdrv_write                = nbd_write,
+    .bdrv_close                = nbd_close,
+    .bdrv_getlength    = nbd_getlength,
+    .protocol_name     = "nbd",
+};
+
+static void bdrv_nbd_init(void)
+{
+    bdrv_register(&bdrv_nbd);
+}
+
+block_init(bdrv_nbd_init);
diff --git a/block/parallels.c b/block/parallels.c
new file mode 100644 (file)
index 0000000..0b64a5c
--- /dev/null
@@ -0,0 +1,181 @@
+/*
+ * Block driver for Parallels disk image format
+ *
+ * Copyright (c) 2007 Alex Beregszaszi
+ *
+ * This code is based on comparing different disk images created by Parallels.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+/**************************************************************/
+
+#define HEADER_MAGIC "WithoutFreeSpace"
+#define HEADER_VERSION 2
+#define HEADER_SIZE 64
+
+// always little-endian
+struct parallels_header {
+    char magic[16]; // "WithoutFreeSpace"
+    uint32_t version;
+    uint32_t heads;
+    uint32_t cylinders;
+    uint32_t tracks;
+    uint32_t catalog_entries;
+    uint32_t nb_sectors;
+    char padding[24];
+} __attribute__((packed));
+
+typedef struct BDRVParallelsState {
+    int fd;
+
+    uint32_t *catalog_bitmap;
+    int catalog_size;
+
+    int tracks;
+} BDRVParallelsState;
+
+static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const struct parallels_header *ph = (const void *)buf;
+
+    if (buf_size < HEADER_SIZE)
+       return 0;
+
+    if (!memcmp(ph->magic, HEADER_MAGIC, 16) &&
+       (le32_to_cpu(ph->version) == HEADER_VERSION))
+       return 100;
+
+    return 0;
+}
+
+static int parallels_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVParallelsState *s = bs->opaque;
+    int fd, i;
+    struct parallels_header ph;
+
+    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
+    if (fd < 0) {
+        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+        if (fd < 0)
+            return -1;
+    }
+
+    bs->read_only = 1; // no write support yet
+
+    s->fd = fd;
+
+    if (read(fd, &ph, sizeof(ph)) != sizeof(ph))
+        goto fail;
+
+    if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
+       (le32_to_cpu(ph.version) != HEADER_VERSION)) {
+        goto fail;
+    }
+
+    bs->total_sectors = le32_to_cpu(ph.nb_sectors);
+
+    if (lseek(s->fd, 64, SEEK_SET) != 64)
+       goto fail;
+
+    s->tracks = le32_to_cpu(ph.tracks);
+
+    s->catalog_size = le32_to_cpu(ph.catalog_entries);
+    s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
+    if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
+       s->catalog_size * 4)
+       goto fail;
+    for (i = 0; i < s->catalog_size; i++)
+       le32_to_cpus(&s->catalog_bitmap[i]);
+
+    return 0;
+fail:
+    if (s->catalog_bitmap)
+       qemu_free(s->catalog_bitmap);
+    close(fd);
+    return -1;
+}
+
+static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+{
+    BDRVParallelsState *s = bs->opaque;
+    uint32_t index, offset, position;
+
+    index = sector_num / s->tracks;
+    offset = sector_num % s->tracks;
+
+    // not allocated
+    if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
+       return -1;
+
+    position = (s->catalog_bitmap[index] + offset) * 512;
+
+//    fprintf(stderr, "sector: %llx index=%x offset=%x pointer=%x position=%x\n",
+//     sector_num, index, offset, s->catalog_bitmap[index], position);
+
+    if (lseek(s->fd, position, SEEK_SET) != position)
+       return -1;
+
+    return 0;
+}
+
+static int parallels_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVParallelsState *s = bs->opaque;
+
+    while (nb_sectors > 0) {
+       if (!seek_to_sector(bs, sector_num)) {
+           if (read(s->fd, buf, 512) != 512)
+               return -1;
+       } else
+            memset(buf, 0, 512);
+        nb_sectors--;
+        sector_num++;
+        buf += 512;
+    }
+    return 0;
+}
+
+static void parallels_close(BlockDriverState *bs)
+{
+    BDRVParallelsState *s = bs->opaque;
+    qemu_free(s->catalog_bitmap);
+    close(s->fd);
+}
+
+static BlockDriver bdrv_parallels = {
+    .format_name       = "parallels",
+    .instance_size     = sizeof(BDRVParallelsState),
+    .bdrv_probe                = parallels_probe,
+    .bdrv_open         = parallels_open,
+    .bdrv_read         = parallels_read,
+    .bdrv_close                = parallels_close,
+};
+
+static void bdrv_parallels_init(void)
+{
+    bdrv_register(&bdrv_parallels);
+}
+
+block_init(bdrv_parallels_init);
diff --git a/block/qcow.c b/block/qcow.c
new file mode 100644 (file)
index 0000000..1cf7c3b
--- /dev/null
@@ -0,0 +1,945 @@
+/*
+ * Block driver for the QCOW format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+#include <zlib.h>
+#include "aes.h"
+
+/**************************************************************/
+/* QEMU COW block driver with compression and encryption support */
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 1
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES  1
+
+#define QCOW_OFLAG_COMPRESSED (1LL << 63)
+
+typedef struct QCowHeader {
+    uint32_t magic;
+    uint32_t version;
+    uint64_t backing_file_offset;
+    uint32_t backing_file_size;
+    uint32_t mtime;
+    uint64_t size; /* in bytes */
+    uint8_t cluster_bits;
+    uint8_t l2_bits;
+    uint32_t crypt_method;
+    uint64_t l1_table_offset;
+} QCowHeader;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct BDRVQcowState {
+    BlockDriverState *hd;
+    int cluster_bits;
+    int cluster_size;
+    int cluster_sectors;
+    int l2_bits;
+    int l2_size;
+    int l1_size;
+    uint64_t cluster_offset_mask;
+    uint64_t l1_table_offset;
+    uint64_t *l1_table;
+    uint64_t *l2_cache;
+    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
+    uint32_t l2_cache_counts[L2_CACHE_SIZE];
+    uint8_t *cluster_cache;
+    uint8_t *cluster_data;
+    uint64_t cluster_cache_offset;
+    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+    uint32_t crypt_method_header;
+    AES_KEY aes_encrypt_key;
+    AES_KEY aes_decrypt_key;
+} BDRVQcowState;
+
+static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
+
+static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const QCowHeader *cow_header = (const void *)buf;
+
+    if (buf_size >= sizeof(QCowHeader) &&
+        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+        be32_to_cpu(cow_header->version) == QCOW_VERSION)
+        return 100;
+    else
+        return 0;
+}
+
+static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVQcowState *s = bs->opaque;
+    int len, i, shift, ret;
+    QCowHeader header;
+
+    ret = bdrv_file_open(&s->hd, filename, flags);
+    if (ret < 0)
+        return ret;
+    if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
+        goto fail;
+    be32_to_cpus(&header.magic);
+    be32_to_cpus(&header.version);
+    be64_to_cpus(&header.backing_file_offset);
+    be32_to_cpus(&header.backing_file_size);
+    be32_to_cpus(&header.mtime);
+    be64_to_cpus(&header.size);
+    be32_to_cpus(&header.crypt_method);
+    be64_to_cpus(&header.l1_table_offset);
+
+    if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
+        goto fail;
+    if (header.size <= 1 || header.cluster_bits < 9)
+        goto fail;
+    if (header.crypt_method > QCOW_CRYPT_AES)
+        goto fail;
+    s->crypt_method_header = header.crypt_method;
+    if (s->crypt_method_header)
+        bs->encrypted = 1;
+    s->cluster_bits = header.cluster_bits;
+    s->cluster_size = 1 << s->cluster_bits;
+    s->cluster_sectors = 1 << (s->cluster_bits - 9);
+    s->l2_bits = header.l2_bits;
+    s->l2_size = 1 << s->l2_bits;
+    bs->total_sectors = header.size / 512;
+    s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
+
+    /* read the level 1 table */
+    shift = s->cluster_bits + s->l2_bits;
+    s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+
+    s->l1_table_offset = header.l1_table_offset;
+    s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
+    if (!s->l1_table)
+        goto fail;
+    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
+        s->l1_size * sizeof(uint64_t))
+        goto fail;
+    for(i = 0;i < s->l1_size; i++) {
+        be64_to_cpus(&s->l1_table[i]);
+    }
+    /* alloc L2 cache */
+    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+    if (!s->l2_cache)
+        goto fail;
+    s->cluster_cache = qemu_malloc(s->cluster_size);
+    if (!s->cluster_cache)
+        goto fail;
+    s->cluster_data = qemu_malloc(s->cluster_size);
+    if (!s->cluster_data)
+        goto fail;
+    s->cluster_cache_offset = -1;
+
+    /* read the backing file name */
+    if (header.backing_file_offset != 0) {
+        len = header.backing_file_size;
+        if (len > 1023)
+            len = 1023;
+        if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
+            goto fail;
+        bs->backing_file[len] = '\0';
+    }
+    return 0;
+
+ fail:
+    qemu_free(s->l1_table);
+    qemu_free(s->l2_cache);
+    qemu_free(s->cluster_cache);
+    qemu_free(s->cluster_data);
+    bdrv_delete(s->hd);
+    return -1;
+}
+
+static int qcow_set_key(BlockDriverState *bs, const char *key)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint8_t keybuf[16];
+    int len, i;
+
+    memset(keybuf, 0, 16);
+    len = strlen(key);
+    if (len > 16)
+        len = 16;
+    /* XXX: we could compress the chars to 7 bits to increase
+       entropy */
+    for(i = 0;i < len;i++) {
+        keybuf[i] = key[i];
+    }
+    s->crypt_method = s->crypt_method_header;
+
+    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+        return -1;
+    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+        return -1;
+#if 0
+    /* test */
+    {
+        uint8_t in[16];
+        uint8_t out[16];
+        uint8_t tmp[16];
+        for(i=0;i<16;i++)
+            in[i] = i;
+        AES_encrypt(in, tmp, &s->aes_encrypt_key);
+        AES_decrypt(tmp, out, &s->aes_decrypt_key);
+        for(i = 0; i < 16; i++)
+            printf(" %02x", tmp[i]);
+        printf("\n");
+        for(i = 0; i < 16; i++)
+            printf(" %02x", out[i]);
+        printf("\n");
+    }
+#endif
+    return 0;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+   supported */
+static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+                            uint8_t *out_buf, const uint8_t *in_buf,
+                            int nb_sectors, int enc,
+                            const AES_KEY *key)
+{
+    union {
+        uint64_t ll[2];
+        uint8_t b[16];
+    } ivec;
+    int i;
+
+    for(i = 0; i < nb_sectors; i++) {
+        ivec.ll[0] = cpu_to_le64(sector_num);
+        ivec.ll[1] = 0;
+        AES_cbc_encrypt(in_buf, out_buf, 512, key,
+                        ivec.b, enc);
+        sector_num++;
+        in_buf += 512;
+        out_buf += 512;
+    }
+}
+
+/* 'allocate' is:
+ *
+ * 0 to not allocate.
+ *
+ * 1 to allocate a normal cluster (for sector indexes 'n_start' to
+ * 'n_end')
+ *
+ * 2 to allocate a compressed cluster of size
+ * 'compressed_size'. 'compressed_size' must be > 0 and <
+ * cluster_size
+ *
+ * return 0 if not allocated.
+ */
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                                   uint64_t offset, int allocate,
+                                   int compressed_size,
+                                   int n_start, int n_end)
+{
+    BDRVQcowState *s = bs->opaque;
+    int min_index, i, j, l1_index, l2_index;
+    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
+    uint32_t min_count;
+    int new_l2_table;
+
+    l1_index = offset >> (s->l2_bits + s->cluster_bits);
+    l2_offset = s->l1_table[l1_index];
+    new_l2_table = 0;
+    if (!l2_offset) {
+        if (!allocate)
+            return 0;
+        /* allocate a new l2 entry */
+        l2_offset = bdrv_getlength(s->hd);
+        /* round to cluster size */
+        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
+        /* update the L1 entry */
+        s->l1_table[l1_index] = l2_offset;
+        tmp = cpu_to_be64(l2_offset);
+        if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
+                        &tmp, sizeof(tmp)) != sizeof(tmp))
+            return 0;
+        new_l2_table = 1;
+    }
+    for(i = 0; i < L2_CACHE_SIZE; i++) {
+        if (l2_offset == s->l2_cache_offsets[i]) {
+            /* increment the hit count */
+            if (++s->l2_cache_counts[i] == 0xffffffff) {
+                for(j = 0; j < L2_CACHE_SIZE; j++) {
+                    s->l2_cache_counts[j] >>= 1;
+                }
+            }
+            l2_table = s->l2_cache + (i << s->l2_bits);
+            goto found;
+        }
+    }
+    /* not found: load a new entry in the least used one */
+    min_index = 0;
+    min_count = 0xffffffff;
+    for(i = 0; i < L2_CACHE_SIZE; i++) {
+        if (s->l2_cache_counts[i] < min_count) {
+            min_count = s->l2_cache_counts[i];
+            min_index = i;
+        }
+    }
+    l2_table = s->l2_cache + (min_index << s->l2_bits);
+    if (new_l2_table) {
+        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+        if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+            s->l2_size * sizeof(uint64_t))
+            return 0;
+    } else {
+        if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+            s->l2_size * sizeof(uint64_t))
+            return 0;
+    }
+    s->l2_cache_offsets[min_index] = l2_offset;
+    s->l2_cache_counts[min_index] = 1;
+ found:
+    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+    cluster_offset = be64_to_cpu(l2_table[l2_index]);
+    if (!cluster_offset ||
+        ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
+        if (!allocate)
+            return 0;
+        /* allocate a new cluster */
+        if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
+            (n_end - n_start) < s->cluster_sectors) {
+            /* if the cluster is already compressed, we must
+               decompress it in the case it is not completely
+               overwritten */
+            if (decompress_cluster(s, cluster_offset) < 0)
+                return 0;
+            cluster_offset = bdrv_getlength(s->hd);
+            cluster_offset = (cluster_offset + s->cluster_size - 1) &
+                ~(s->cluster_size - 1);
+            /* write the cluster content */
+            if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) !=
+                s->cluster_size)
+                return -1;
+        } else {
+            cluster_offset = bdrv_getlength(s->hd);
+            if (allocate == 1) {
+                /* round to cluster size */
+                cluster_offset = (cluster_offset + s->cluster_size - 1) &
+                    ~(s->cluster_size - 1);
+                bdrv_truncate(s->hd, cluster_offset + s->cluster_size);
+                /* if encrypted, we must initialize the cluster
+                   content which won't be written */
+                if (s->crypt_method &&
+                    (n_end - n_start) < s->cluster_sectors) {
+                    uint64_t start_sect;
+                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
+                    memset(s->cluster_data + 512, 0x00, 512);
+                    for(i = 0; i < s->cluster_sectors; i++) {
+                        if (i < n_start || i >= n_end) {
+                            encrypt_sectors(s, start_sect + i,
+                                            s->cluster_data,
+                                            s->cluster_data + 512, 1, 1,
+                                            &s->aes_encrypt_key);
+                            if (bdrv_pwrite(s->hd, cluster_offset + i * 512,
+                                            s->cluster_data, 512) != 512)
+                                return -1;
+                        }
+                    }
+                }
+            } else if (allocate == 2) {
+                cluster_offset |= QCOW_OFLAG_COMPRESSED |
+                    (uint64_t)compressed_size << (63 - s->cluster_bits);
+            }
+        }
+        /* update L2 table */
+        tmp = cpu_to_be64(cluster_offset);
+        l2_table[l2_index] = tmp;
+        if (bdrv_pwrite(s->hd,
+                        l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp))
+            return 0;
+    }
+    return cluster_offset;
+}
+
+static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
+                             int nb_sectors, int *pnum)
+{
+    BDRVQcowState *s = bs->opaque;
+    int index_in_cluster, n;
+    uint64_t cluster_offset;
+
+    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+    index_in_cluster = sector_num & (s->cluster_sectors - 1);
+    n = s->cluster_sectors - index_in_cluster;
+    if (n > nb_sectors)
+        n = nb_sectors;
+    *pnum = n;
+    return (cluster_offset != 0);
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+                             const uint8_t *buf, int buf_size)
+{
+    z_stream strm1, *strm = &strm1;
+    int ret, out_len;
+
+    memset(strm, 0, sizeof(*strm));
+
+    strm->next_in = (uint8_t *)buf;
+    strm->avail_in = buf_size;
+    strm->next_out = out_buf;
+    strm->avail_out = out_buf_size;
+
+    ret = inflateInit2(strm, -12);
+    if (ret != Z_OK)
+        return -1;
+    ret = inflate(strm, Z_FINISH);
+    out_len = strm->next_out - out_buf;
+    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+        out_len != out_buf_size) {
+        inflateEnd(strm);
+        return -1;
+    }
+    inflateEnd(strm);
+    return 0;
+}
+
+static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
+{
+    int ret, csize;
+    uint64_t coffset;
+
+    coffset = cluster_offset & s->cluster_offset_mask;
+    if (s->cluster_cache_offset != coffset) {
+        csize = cluster_offset >> (63 - s->cluster_bits);
+        csize &= (s->cluster_size - 1);
+        ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize);
+        if (ret != csize)
+            return -1;
+        if (decompress_buffer(s->cluster_cache, s->cluster_size,
+                              s->cluster_data, csize) < 0) {
+            return -1;
+        }
+        s->cluster_cache_offset = coffset;
+    }
+    return 0;
+}
+
+#if 0
+
+static int qcow_read(BlockDriverState *bs, int64_t sector_num,
+                     uint8_t *buf, int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret, index_in_cluster, n;
+    uint64_t cluster_offset;
+
+    while (nb_sectors > 0) {
+        cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        n = s->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors)
+            n = nb_sectors;
+        if (!cluster_offset) {
+            if (bs->backing_hd) {
+                /* read from the base image */
+                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                if (ret < 0)
+                    return -1;
+            } else {
+                memset(buf, 0, 512 * n);
+            }
+        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+            if (decompress_cluster(s, cluster_offset) < 0)
+                return -1;
+            memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
+        } else {
+            ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
+            if (ret != n * 512)
+                return -1;
+            if (s->crypt_method) {
+                encrypt_sectors(s, sector_num, buf, buf, n, 0,
+                                &s->aes_decrypt_key);
+            }
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    return 0;
+}
+#endif
+
+static int qcow_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret, index_in_cluster, n;
+    uint64_t cluster_offset;
+
+    while (nb_sectors > 0) {
+        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        n = s->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors)
+            n = nb_sectors;
+        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+                                            index_in_cluster,
+                                            index_in_cluster + n);
+        if (!cluster_offset)
+            return -1;
+        if (s->crypt_method) {
+            encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
+                            &s->aes_encrypt_key);
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512,
+                              s->cluster_data, n * 512);
+        } else {
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
+        }
+        if (ret != n * 512)
+            return -1;
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    s->cluster_cache_offset = -1; /* disable compressed cache */
+    return 0;
+}
+
+typedef struct QCowAIOCB {
+    BlockDriverAIOCB common;
+    int64_t sector_num;
+    QEMUIOVector *qiov;
+    uint8_t *buf;
+    void *orig_buf;
+    int nb_sectors;
+    int n;
+    uint64_t cluster_offset;
+    uint8_t *cluster_data;
+    struct iovec hd_iov;
+    QEMUIOVector hd_qiov;
+    BlockDriverAIOCB *hd_aiocb;
+} QCowAIOCB;
+
+static void qcow_aio_read_cb(void *opaque, int ret)
+{
+    QCowAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVQcowState *s = bs->opaque;
+    int index_in_cluster;
+
+    acb->hd_aiocb = NULL;
+    if (ret < 0)
+        goto done;
+
+ redo:
+    /* post process the read buffer */
+    if (!acb->cluster_offset) {
+        /* nothing to do */
+    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* nothing to do */
+    } else {
+        if (s->crypt_method) {
+            encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
+                            acb->n, 0,
+                            &s->aes_decrypt_key);
+        }
+    }
+
+    acb->nb_sectors -= acb->n;
+    acb->sector_num += acb->n;
+    acb->buf += acb->n * 512;
+
+    if (acb->nb_sectors == 0) {
+        /* request completed */
+        ret = 0;
+        goto done;
+    }
+
+    /* prepare next AIO request */
+    acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9,
+                                             0, 0, 0, 0);
+    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
+    acb->n = s->cluster_sectors - index_in_cluster;
+    if (acb->n > acb->nb_sectors)
+        acb->n = acb->nb_sectors;
+
+    if (!acb->cluster_offset) {
+        if (bs->backing_hd) {
+            /* read from the base image */
+            acb->hd_iov.iov_base = (void *)acb->buf;
+            acb->hd_iov.iov_len = acb->n * 512;
+            qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+            acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
+                &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
+            if (acb->hd_aiocb == NULL)
+                goto done;
+        } else {
+            /* Note: in this case, no need to wait */
+            memset(acb->buf, 0, 512 * acb->n);
+            goto redo;
+        }
+    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* add AIO support for compressed blocks ? */
+        if (decompress_cluster(s, acb->cluster_offset) < 0)
+            goto done;
+        memcpy(acb->buf,
+               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
+        goto redo;
+    } else {
+        if ((acb->cluster_offset & 511) != 0) {
+            ret = -EIO;
+            goto done;
+        }
+        acb->hd_iov.iov_base = (void *)acb->buf;
+        acb->hd_iov.iov_len = acb->n * 512;
+        qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+        acb->hd_aiocb = bdrv_aio_readv(s->hd,
+                            (acb->cluster_offset >> 9) + index_in_cluster,
+                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
+        if (acb->hd_aiocb == NULL)
+            goto done;
+    }
+
+    return;
+
+done:
+    if (acb->qiov->niov > 1) {
+        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+        qemu_vfree(acb->orig_buf);
+    }
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    QCowAIOCB *acb;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb)
+        return NULL;
+    acb->hd_aiocb = NULL;
+    acb->sector_num = sector_num;
+    acb->qiov = qiov;
+    if (qiov->niov > 1)
+        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
+    else
+        acb->buf = (uint8_t *)qiov->iov->iov_base;
+    acb->nb_sectors = nb_sectors;
+    acb->n = 0;
+    acb->cluster_offset = 0;
+
+    qcow_aio_read_cb(acb, 0);
+    return &acb->common;
+}
+
+static void qcow_aio_write_cb(void *opaque, int ret)
+{
+    QCowAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVQcowState *s = bs->opaque;
+    int index_in_cluster;
+    uint64_t cluster_offset;
+    const uint8_t *src_buf;
+
+    acb->hd_aiocb = NULL;
+
+    if (ret < 0)
+        goto done;
+
+    acb->nb_sectors -= acb->n;
+    acb->sector_num += acb->n;
+    acb->buf += acb->n * 512;
+
+    if (acb->nb_sectors == 0) {
+        /* request completed */
+        ret = 0;
+        goto done;
+    }
+
+    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
+    acb->n = s->cluster_sectors - index_in_cluster;
+    if (acb->n > acb->nb_sectors)
+        acb->n = acb->nb_sectors;
+    cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, 1, 0,
+                                        index_in_cluster,
+                                        index_in_cluster + acb->n);
+    if (!cluster_offset || (cluster_offset & 511) != 0) {
+        ret = -EIO;
+        goto done;
+    }
+    if (s->crypt_method) {
+        if (!acb->cluster_data) {
+            acb->cluster_data = qemu_mallocz(s->cluster_size);
+            if (!acb->cluster_data) {
+                ret = -ENOMEM;
+                goto done;
+            }
+        }
+        encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
+                        acb->n, 1, &s->aes_encrypt_key);
+        src_buf = acb->cluster_data;
+    } else {
+        src_buf = acb->buf;
+    }
+
+    acb->hd_iov.iov_base = (void *)src_buf;
+    acb->hd_iov.iov_len = acb->n * 512;
+    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+    acb->hd_aiocb = bdrv_aio_writev(s->hd,
+                                    (cluster_offset >> 9) + index_in_cluster,
+                                    &acb->hd_qiov, acb->n,
+                                    qcow_aio_write_cb, acb);
+    if (acb->hd_aiocb == NULL)
+        goto done;
+    return;
+
+done:
+    if (acb->qiov->niov > 1)
+        qemu_vfree(acb->orig_buf);
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb;
+
+    s->cluster_cache_offset = -1; /* disable compressed cache */
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb)
+        return NULL;
+    acb->hd_aiocb = NULL;
+    acb->sector_num = sector_num;
+    acb->qiov = qiov;
+    if (qiov->niov > 1) {
+        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
+        qemu_iovec_to_buffer(qiov, acb->buf);
+    } else {
+        acb->buf = (uint8_t *)qiov->iov->iov_base;
+    }
+    acb->nb_sectors = nb_sectors;
+    acb->n = 0;
+
+    qcow_aio_write_cb(acb, 0);
+    return &acb->common;
+}
+
+static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QCowAIOCB *acb = (QCowAIOCB *)blockacb;
+    if (acb->hd_aiocb)
+        bdrv_aio_cancel(acb->hd_aiocb);
+    qemu_aio_release(acb);
+}
+
+static void qcow_close(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    qemu_free(s->l1_table);
+    qemu_free(s->l2_cache);
+    qemu_free(s->cluster_cache);
+    qemu_free(s->cluster_data);
+    bdrv_delete(s->hd);
+}
+
+static int qcow_create(const char *filename, int64_t total_size,
+                      const char *backing_file, int flags)
+{
+    int fd, header_size, backing_filename_len, l1_size, i, shift;
+    QCowHeader header;
+    uint64_t tmp;
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+    if (fd < 0)
+        return -1;
+    memset(&header, 0, sizeof(header));
+    header.magic = cpu_to_be32(QCOW_MAGIC);
+    header.version = cpu_to_be32(QCOW_VERSION);
+    header.size = cpu_to_be64(total_size * 512);
+    header_size = sizeof(header);
+    backing_filename_len = 0;
+    if (backing_file) {
+        if (strcmp(backing_file, "fat:")) {
+            header.backing_file_offset = cpu_to_be64(header_size);
+            backing_filename_len = strlen(backing_file);
+            header.backing_file_size = cpu_to_be32(backing_filename_len);
+            header_size += backing_filename_len;
+        } else {
+            /* special backing file for vvfat */
+            backing_file = NULL;
+        }
+        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
+                                    unmodifyed sectors */
+        header.l2_bits = 12; /* 32 KB L2 tables */
+    } else {
+        header.cluster_bits = 12; /* 4 KB clusters */
+        header.l2_bits = 9; /* 4 KB L2 tables */
+    }
+    header_size = (header_size + 7) & ~7;
+    shift = header.cluster_bits + header.l2_bits;
+    l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
+
+    header.l1_table_offset = cpu_to_be64(header_size);
+    if (flags & BLOCK_FLAG_ENCRYPT) {
+        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+    } else {
+        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+    }
+
+    /* write all the data */
+    write(fd, &header, sizeof(header));
+    if (backing_file) {
+        write(fd, backing_file, backing_filename_len);
+    }
+    lseek(fd, header_size, SEEK_SET);
+    tmp = 0;
+    for(i = 0;i < l1_size; i++) {
+        write(fd, &tmp, sizeof(tmp));
+    }
+    close(fd);
+    return 0;
+}
+
+static int qcow_make_empty(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+    int ret;
+
+    memset(s->l1_table, 0, l1_length);
+    if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
+       return -1;
+    ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
+    if (ret < 0)
+        return ret;
+
+    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
+    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
+
+    return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+   tables to avoid losing bytes in alignment */
+static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
+                                 const uint8_t *buf, int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    z_stream strm;
+    int ret, out_len;
+    uint8_t *out_buf;
+    uint64_t cluster_offset;
+
+    if (nb_sectors != s->cluster_sectors)
+        return -EINVAL;
+
+    out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+    if (!out_buf)
+        return -1;
+
+    /* best compression, small window, no zlib header */
+    memset(&strm, 0, sizeof(strm));
+    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+                       Z_DEFLATED, -12,
+                       9, Z_DEFAULT_STRATEGY);
+    if (ret != 0) {
+        qemu_free(out_buf);
+        return -1;
+    }
+
+    strm.avail_in = s->cluster_size;
+    strm.next_in = (uint8_t *)buf;
+    strm.avail_out = s->cluster_size;
+    strm.next_out = out_buf;
+
+    ret = deflate(&strm, Z_FINISH);
+    if (ret != Z_STREAM_END && ret != Z_OK) {
+        qemu_free(out_buf);
+        deflateEnd(&strm);
+        return -1;
+    }
+    out_len = strm.next_out - out_buf;
+
+    deflateEnd(&strm);
+
+    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+        /* could not compress: write normal cluster */
+        qcow_write(bs, sector_num, buf, s->cluster_sectors);
+    } else {
+        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
+                                            out_len, 0, 0);
+        cluster_offset &= s->cluster_offset_mask;
+        if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
+            qemu_free(out_buf);
+            return -1;
+        }
+    }
+
+    qemu_free(out_buf);
+    return 0;
+}
+
+static void qcow_flush(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    bdrv_flush(s->hd);
+}
+
+static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+    BDRVQcowState *s = bs->opaque;
+    bdi->cluster_size = s->cluster_size;
+    return 0;
+}
+
+static BlockDriver bdrv_qcow = {
+    .format_name       = "qcow",
+    .instance_size     = sizeof(BDRVQcowState),
+    .bdrv_probe                = qcow_probe,
+    .bdrv_open         = qcow_open,
+    .bdrv_close                = qcow_close,
+    .bdrv_create       = qcow_create,
+    .bdrv_flush                = qcow_flush,
+    .bdrv_is_allocated = qcow_is_allocated,
+    .bdrv_set_key      = qcow_set_key,
+    .bdrv_make_empty   = qcow_make_empty,
+    .bdrv_aio_readv    = qcow_aio_readv,
+    .bdrv_aio_writev   = qcow_aio_writev,
+    .bdrv_aio_cancel   = qcow_aio_cancel,
+    .aiocb_size                = sizeof(QCowAIOCB),
+    .bdrv_write_compressed = qcow_write_compressed,
+    .bdrv_get_info     = qcow_get_info,
+};
+
+static void bdrv_qcow_init(void)
+{
+    bdrv_register(&bdrv_qcow);
+}
+
+block_init(bdrv_qcow_init);
diff --git a/block/qcow2.c b/block/qcow2.c
new file mode 100644 (file)
index 0000000..a6de9b6
--- /dev/null
@@ -0,0 +1,2931 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+#include <zlib.h>
+#include "aes.h"
+
+/*
+  Differences with QCOW:
+
+  - Support for multiple incremental snapshots.
+  - Memory management by reference counts.
+  - Clusters which have a reference count of one have the bit
+    QCOW_OFLAG_COPIED to optimize write performance.
+  - Size of compressed clusters is stored in sectors to reduce bit usage
+    in the cluster offsets.
+  - Support for storing additional data (such as the VM state) in the
+    snapshots.
+  - If a backing store is used, the cluster size is not constrained
+    (could be backported to QCOW).
+  - L2 tables have always a size of one cluster.
+*/
+
+//#define DEBUG_ALLOC
+//#define DEBUG_ALLOC2
+//#define DEBUG_EXT
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 2
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES  1
+
+#define QCOW_MAX_CRYPT_CLUSTERS 32
+
+/* indicate that the refcount of the referenced cluster is exactly one. */
+#define QCOW_OFLAG_COPIED     (1LL << 63)
+/* indicate that the cluster is compressed (they never have the copied flag) */
+#define QCOW_OFLAG_COMPRESSED (1LL << 62)
+
+#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
+
+typedef struct QCowHeader {
+    uint32_t magic;
+    uint32_t version;
+    uint64_t backing_file_offset;
+    uint32_t backing_file_size;
+    uint32_t cluster_bits;
+    uint64_t size; /* in bytes */
+    uint32_t crypt_method;
+    uint32_t l1_size; /* XXX: save number of clusters instead ? */
+    uint64_t l1_table_offset;
+    uint64_t refcount_table_offset;
+    uint32_t refcount_table_clusters;
+    uint32_t nb_snapshots;
+    uint64_t snapshots_offset;
+} QCowHeader;
+
+
+typedef struct {
+    uint32_t magic;
+    uint32_t len;
+} QCowExtension;
+#define  QCOW_EXT_MAGIC_END 0
+#define  QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+
+
+typedef struct __attribute__((packed)) QCowSnapshotHeader {
+    /* header is 8 byte aligned */
+    uint64_t l1_table_offset;
+
+    uint32_t l1_size;
+    uint16_t id_str_size;
+    uint16_t name_size;
+
+    uint32_t date_sec;
+    uint32_t date_nsec;
+
+    uint64_t vm_clock_nsec;
+
+    uint32_t vm_state_size;
+    uint32_t extra_data_size; /* for extension */
+    /* extra data follows */
+    /* id_str follows */
+    /* name follows  */
+} QCowSnapshotHeader;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct QCowSnapshot {
+    uint64_t l1_table_offset;
+    uint32_t l1_size;
+    char *id_str;
+    char *name;
+    uint32_t vm_state_size;
+    uint32_t date_sec;
+    uint32_t date_nsec;
+    uint64_t vm_clock_nsec;
+} QCowSnapshot;
+
+typedef struct BDRVQcowState {
+    BlockDriverState *hd;
+    int cluster_bits;
+    int cluster_size;
+    int cluster_sectors;
+    int l2_bits;
+    int l2_size;
+    int l1_size;
+    int l1_vm_state_index;
+    int csize_shift;
+    int csize_mask;
+    uint64_t cluster_offset_mask;
+    uint64_t l1_table_offset;
+    uint64_t *l1_table;
+    uint64_t *l2_cache;
+    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
+    uint32_t l2_cache_counts[L2_CACHE_SIZE];
+    uint8_t *cluster_cache;
+    uint8_t *cluster_data;
+    uint64_t cluster_cache_offset;
+
+    uint64_t *refcount_table;
+    uint64_t refcount_table_offset;
+    uint32_t refcount_table_size;
+    uint64_t refcount_block_cache_offset;
+    uint16_t *refcount_block_cache;
+    int64_t free_cluster_index;
+    int64_t free_byte_offset;
+
+    uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+    uint32_t crypt_method_header;
+    AES_KEY aes_encrypt_key;
+    AES_KEY aes_decrypt_key;
+    uint64_t snapshots_offset;
+    int snapshots_size;
+    int nb_snapshots;
+    QCowSnapshot *snapshots;
+} BDRVQcowState;
+
+static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
+static int qcow_read(BlockDriverState *bs, int64_t sector_num,
+                     uint8_t *buf, int nb_sectors);
+static int qcow_read_snapshots(BlockDriverState *bs);
+static void qcow_free_snapshots(BlockDriverState *bs);
+static int refcount_init(BlockDriverState *bs);
+static void refcount_close(BlockDriverState *bs);
+static int get_refcount(BlockDriverState *bs, int64_t cluster_index);
+static int update_cluster_refcount(BlockDriverState *bs,
+                                   int64_t cluster_index,
+                                   int addend);
+static void update_refcount(BlockDriverState *bs,
+                            int64_t offset, int64_t length,
+                            int addend);
+static int64_t alloc_clusters(BlockDriverState *bs, int64_t size);
+static int64_t alloc_bytes(BlockDriverState *bs, int size);
+static void free_clusters(BlockDriverState *bs,
+                          int64_t offset, int64_t size);
+static int check_refcounts(BlockDriverState *bs);
+
+static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const QCowHeader *cow_header = (const void *)buf;
+
+    if (buf_size >= sizeof(QCowHeader) &&
+        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+        be32_to_cpu(cow_header->version) == QCOW_VERSION)
+        return 100;
+    else
+        return 0;
+}
+
+
+/* 
+ * read qcow2 extension and fill bs
+ * start reading from start_offset
+ * finish reading upon magic of value 0 or when end_offset reached
+ * unknown magic is skipped (future extension this version knows nothing about)
+ * return 0 upon success, non-0 otherwise
+ */
+static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
+                                uint64_t end_offset)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowExtension ext;
+    uint64_t offset;
+
+#ifdef DEBUG_EXT
+    printf("qcow_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
+#endif
+    offset = start_offset;
+    while (offset < end_offset) {
+
+#ifdef DEBUG_EXT
+        /* Sanity check */
+        if (offset > s->cluster_size)
+            printf("qcow_handle_extension: suspicious offset %lu\n", offset);
+
+        printf("attemting to read extended header in offset %lu\n", offset);
+#endif
+
+        if (bdrv_pread(s->hd, offset, &ext, sizeof(ext)) != sizeof(ext)) {
+            fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n",
+                    (unsigned long long)offset);
+            return 1;
+        }
+        be32_to_cpus(&ext.magic);
+        be32_to_cpus(&ext.len);
+        offset += sizeof(ext);
+#ifdef DEBUG_EXT
+        printf("ext.magic = 0x%x\n", ext.magic);
+#endif
+        switch (ext.magic) {
+        case QCOW_EXT_MAGIC_END:
+            return 0;
+
+        case QCOW_EXT_MAGIC_BACKING_FORMAT:
+            if (ext.len >= sizeof(bs->backing_format)) {
+                fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
+                        " (>=%zu)\n",
+                        ext.len, sizeof(bs->backing_format));
+                return 2;
+            }
+            if (bdrv_pread(s->hd, offset , bs->backing_format,
+                           ext.len) != ext.len)
+                return 3;
+            bs->backing_format[ext.len] = '\0';
+#ifdef DEBUG_EXT
+            printf("Qcow2: Got format extension %s\n", bs->backing_format);
+#endif
+            offset += ((ext.len + 7) & ~7);
+            break;
+
+        default:
+            /* unknown magic -- just skip it */
+            offset += ((ext.len + 7) & ~7);
+            break;
+        }
+    }
+
+    return 0;
+}
+
+
+static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVQcowState *s = bs->opaque;
+    int len, i, shift, ret;
+    QCowHeader header;
+    uint64_t ext_end;
+
+    /* Performance is terrible right now with cache=writethrough due mainly
+     * to reference count updates.  If the user does not explicitly specify
+     * a caching type, force to writeback caching.
+     */
+    if ((flags & BDRV_O_CACHE_DEF)) {
+        flags |= BDRV_O_CACHE_WB;
+        flags &= ~BDRV_O_CACHE_DEF;
+    }
+    ret = bdrv_file_open(&s->hd, filename, flags);
+    if (ret < 0)
+        return ret;
+    if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
+        goto fail;
+    be32_to_cpus(&header.magic);
+    be32_to_cpus(&header.version);
+    be64_to_cpus(&header.backing_file_offset);
+    be32_to_cpus(&header.backing_file_size);
+    be64_to_cpus(&header.size);
+    be32_to_cpus(&header.cluster_bits);
+    be32_to_cpus(&header.crypt_method);
+    be64_to_cpus(&header.l1_table_offset);
+    be32_to_cpus(&header.l1_size);
+    be64_to_cpus(&header.refcount_table_offset);
+    be32_to_cpus(&header.refcount_table_clusters);
+    be64_to_cpus(&header.snapshots_offset);
+    be32_to_cpus(&header.nb_snapshots);
+
+    if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
+        goto fail;
+    if (header.size <= 1 ||
+        header.cluster_bits < 9 ||
+        header.cluster_bits > 16)
+        goto fail;
+    if (header.crypt_method > QCOW_CRYPT_AES)
+        goto fail;
+    s->crypt_method_header = header.crypt_method;
+    if (s->crypt_method_header)
+        bs->encrypted = 1;
+    s->cluster_bits = header.cluster_bits;
+    s->cluster_size = 1 << s->cluster_bits;
+    s->cluster_sectors = 1 << (s->cluster_bits - 9);
+    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
+    s->l2_size = 1 << s->l2_bits;
+    bs->total_sectors = header.size / 512;
+    s->csize_shift = (62 - (s->cluster_bits - 8));
+    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
+    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
+    s->refcount_table_offset = header.refcount_table_offset;
+    s->refcount_table_size =
+        header.refcount_table_clusters << (s->cluster_bits - 3);
+
+    s->snapshots_offset = header.snapshots_offset;
+    s->nb_snapshots = header.nb_snapshots;
+
+    /* read the level 1 table */
+    s->l1_size = header.l1_size;
+    shift = s->cluster_bits + s->l2_bits;
+    s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift;
+    /* the L1 table must contain at least enough entries to put
+       header.size bytes */
+    if (s->l1_size < s->l1_vm_state_index)
+        goto fail;
+    s->l1_table_offset = header.l1_table_offset;
+    s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
+    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
+        s->l1_size * sizeof(uint64_t))
+        goto fail;
+    for(i = 0;i < s->l1_size; i++) {
+        be64_to_cpus(&s->l1_table[i]);
+    }
+    /* alloc L2 cache */
+    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+    s->cluster_cache = qemu_malloc(s->cluster_size);
+    /* one more sector for decompressed data alignment */
+    s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+                                  + 512);
+    s->cluster_cache_offset = -1;
+
+    if (refcount_init(bs) < 0)
+        goto fail;
+
+    /* read qcow2 extensions */
+    if (header.backing_file_offset)
+        ext_end = header.backing_file_offset;
+    else
+        ext_end = s->cluster_size;
+    if (qcow_read_extensions(bs, sizeof(header), ext_end))
+        goto fail;
+
+    /* read the backing file name */
+    if (header.backing_file_offset != 0) {
+        len = header.backing_file_size;
+        if (len > 1023)
+            len = 1023;
+        if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
+            goto fail;
+        bs->backing_file[len] = '\0';
+    }
+    if (qcow_read_snapshots(bs) < 0)
+        goto fail;
+
+#ifdef DEBUG_ALLOC
+    check_refcounts(bs);
+#endif
+    return 0;
+
+ fail:
+    qcow_free_snapshots(bs);
+    refcount_close(bs);
+    qemu_free(s->l1_table);
+    qemu_free(s->l2_cache);
+    qemu_free(s->cluster_cache);
+    qemu_free(s->cluster_data);
+    bdrv_delete(s->hd);
+    return -1;
+}
+
+static int qcow_set_key(BlockDriverState *bs, const char *key)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint8_t keybuf[16];
+    int len, i;
+
+    memset(keybuf, 0, 16);
+    len = strlen(key);
+    if (len > 16)
+        len = 16;
+    /* XXX: we could compress the chars to 7 bits to increase
+       entropy */
+    for(i = 0;i < len;i++) {
+        keybuf[i] = key[i];
+    }
+    s->crypt_method = s->crypt_method_header;
+
+    if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+        return -1;
+    if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+        return -1;
+#if 0
+    /* test */
+    {
+        uint8_t in[16];
+        uint8_t out[16];
+        uint8_t tmp[16];
+        for(i=0;i<16;i++)
+            in[i] = i;
+        AES_encrypt(in, tmp, &s->aes_encrypt_key);
+        AES_decrypt(tmp, out, &s->aes_decrypt_key);
+        for(i = 0; i < 16; i++)
+            printf(" %02x", tmp[i]);
+        printf("\n");
+        for(i = 0; i < 16; i++)
+            printf(" %02x", out[i]);
+        printf("\n");
+    }
+#endif
+    return 0;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+   supported */
+static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+                            uint8_t *out_buf, const uint8_t *in_buf,
+                            int nb_sectors, int enc,
+                            const AES_KEY *key)
+{
+    union {
+        uint64_t ll[2];
+        uint8_t b[16];
+    } ivec;
+    int i;
+
+    for(i = 0; i < nb_sectors; i++) {
+        ivec.ll[0] = cpu_to_le64(sector_num);
+        ivec.ll[1] = 0;
+        AES_cbc_encrypt(in_buf, out_buf, 512, key,
+                        ivec.b, enc);
+        sector_num++;
+        in_buf += 512;
+        out_buf += 512;
+    }
+}
+
+static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
+                        uint64_t cluster_offset, int n_start, int n_end)
+{
+    BDRVQcowState *s = bs->opaque;
+    int n, ret;
+
+    n = n_end - n_start;
+    if (n <= 0)
+        return 0;
+    ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n);
+    if (ret < 0)
+        return ret;
+    if (s->crypt_method) {
+        encrypt_sectors(s, start_sect + n_start,
+                        s->cluster_data,
+                        s->cluster_data, n, 1,
+                        &s->aes_encrypt_key);
+    }
+    ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start,
+                     s->cluster_data, n);
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
+static void l2_cache_reset(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
+    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
+}
+
+static inline int l2_cache_new_entry(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint32_t min_count;
+    int min_index, i;
+
+    /* find a new entry in the least used one */
+    min_index = 0;
+    min_count = 0xffffffff;
+    for(i = 0; i < L2_CACHE_SIZE; i++) {
+        if (s->l2_cache_counts[i] < min_count) {
+            min_count = s->l2_cache_counts[i];
+            min_index = i;
+        }
+    }
+    return min_index;
+}
+
+static int64_t align_offset(int64_t offset, int n)
+{
+    offset = (offset + n - 1) & ~(n - 1);
+    return offset;
+}
+
+static int grow_l1_table(BlockDriverState *bs, int min_size)
+{
+    BDRVQcowState *s = bs->opaque;
+    int new_l1_size, new_l1_size2, ret, i;
+    uint64_t *new_l1_table;
+    uint64_t new_l1_table_offset;
+    uint8_t data[12];
+
+    new_l1_size = s->l1_size;
+    if (min_size <= new_l1_size)
+        return 0;
+    while (min_size > new_l1_size) {
+        new_l1_size = (new_l1_size * 3 + 1) / 2;
+    }
+#ifdef DEBUG_ALLOC2
+    printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
+#endif
+
+    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
+    new_l1_table = qemu_mallocz(new_l1_size2);
+    memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
+
+    /* write new table (align to cluster) */
+    new_l1_table_offset = alloc_clusters(bs, new_l1_size2);
+
+    for(i = 0; i < s->l1_size; i++)
+        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
+    ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2);
+    if (ret != new_l1_size2)
+        goto fail;
+    for(i = 0; i < s->l1_size; i++)
+        new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
+
+    /* set new table */
+    cpu_to_be32w((uint32_t*)data, new_l1_size);
+    cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
+    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data,
+                sizeof(data)) != sizeof(data))
+        goto fail;
+    qemu_free(s->l1_table);
+    free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
+    s->l1_table_offset = new_l1_table_offset;
+    s->l1_table = new_l1_table;
+    s->l1_size = new_l1_size;
+    return 0;
+ fail:
+    qemu_free(s->l1_table);
+    return -EIO;
+}
+
+/*
+ * seek_l2_table
+ *
+ * seek l2_offset in the l2_cache table
+ * if not found, return NULL,
+ * if found,
+ *   increments the l2 cache hit count of the entry,
+ *   if counter overflow, divide by two all counters
+ *   return the pointer to the l2 cache entry
+ *
+ */
+
+static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset)
+{
+    int i, j;
+
+    for(i = 0; i < L2_CACHE_SIZE; i++) {
+        if (l2_offset == s->l2_cache_offsets[i]) {
+            /* increment the hit count */
+            if (++s->l2_cache_counts[i] == 0xffffffff) {
+                for(j = 0; j < L2_CACHE_SIZE; j++) {
+                    s->l2_cache_counts[j] >>= 1;
+                }
+            }
+            return s->l2_cache + (i << s->l2_bits);
+        }
+    }
+    return NULL;
+}
+
+/*
+ * l2_load
+ *
+ * Loads a L2 table into memory. If the table is in the cache, the cache
+ * is used; otherwise the L2 table is loaded from the image file.
+ *
+ * Returns a pointer to the L2 table on success, or NULL if the read from
+ * the image file failed.
+ */
+
+static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset)
+{
+    BDRVQcowState *s = bs->opaque;
+    int min_index;
+    uint64_t *l2_table;
+
+    /* seek if the table for the given offset is in the cache */
+
+    l2_table = seek_l2_table(s, l2_offset);
+    if (l2_table != NULL)
+        return l2_table;
+
+    /* not found: load a new entry in the least used one */
+
+    min_index = l2_cache_new_entry(bs);
+    l2_table = s->l2_cache + (min_index << s->l2_bits);
+    if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+        s->l2_size * sizeof(uint64_t))
+        return NULL;
+    s->l2_cache_offsets[min_index] = l2_offset;
+    s->l2_cache_counts[min_index] = 1;
+
+    return l2_table;
+}
+
+/*
+ * l2_allocate
+ *
+ * Allocate a new l2 entry in the file. If l1_index points to an already
+ * used entry in the L2 table (i.e. we are doing a copy on write for the L2
+ * table) copy the contents of the old L2 table into the newly allocated one.
+ * Otherwise the new table is initialized with zeros.
+ *
+ */
+
+static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index)
+{
+    BDRVQcowState *s = bs->opaque;
+    int min_index;
+    uint64_t old_l2_offset, tmp;
+    uint64_t *l2_table, l2_offset;
+
+    old_l2_offset = s->l1_table[l1_index];
+
+    /* allocate a new l2 entry */
+
+    l2_offset = alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
+
+    /* update the L1 entry */
+
+    s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
+
+    tmp = cpu_to_be64(l2_offset | QCOW_OFLAG_COPIED);
+    if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
+                    &tmp, sizeof(tmp)) != sizeof(tmp))
+        return NULL;
+
+    /* allocate a new entry in the l2 cache */
+
+    min_index = l2_cache_new_entry(bs);
+    l2_table = s->l2_cache + (min_index << s->l2_bits);
+
+    if (old_l2_offset == 0) {
+        /* if there was no old l2 table, clear the new table */
+        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+    } else {
+        /* if there was an old l2 table, read it from the disk */
+        if (bdrv_pread(s->hd, old_l2_offset,
+                       l2_table, s->l2_size * sizeof(uint64_t)) !=
+            s->l2_size * sizeof(uint64_t))
+            return NULL;
+    }
+    /* write the l2 table to the file */
+    if (bdrv_pwrite(s->hd, l2_offset,
+                    l2_table, s->l2_size * sizeof(uint64_t)) !=
+        s->l2_size * sizeof(uint64_t))
+        return NULL;
+
+    /* update the l2 cache entry */
+
+    s->l2_cache_offsets[min_index] = l2_offset;
+    s->l2_cache_counts[min_index] = 1;
+
+    return l2_table;
+}
+
+static int size_to_clusters(BDRVQcowState *s, int64_t size)
+{
+    return (size + (s->cluster_size - 1)) >> s->cluster_bits;
+}
+
+static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+        uint64_t *l2_table, uint64_t start, uint64_t mask)
+{
+    int i;
+    uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask;
+
+    if (!offset)
+        return 0;
+
+    for (i = start; i < start + nb_clusters; i++)
+        if (offset + i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
+            break;
+
+       return (i - start);
+}
+
+static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+{
+    int i = 0;
+
+    while(nb_clusters-- && l2_table[i] == 0)
+        i++;
+
+    return i;
+}
+
+/*
+ * get_cluster_offset
+ *
+ * For a given offset of the disk image, return cluster offset in
+ * qcow2 file.
+ *
+ * on entry, *num is the number of contiguous clusters we'd like to
+ * access following offset.
+ *
+ * on exit, *num is the number of contiguous clusters we can read.
+ *
+ * Return 1, if the offset is found
+ * Return 0, otherwise.
+ *
+ */
+
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                                   uint64_t offset, int *num)
+{
+    BDRVQcowState *s = bs->opaque;
+    int l1_index, l2_index;
+    uint64_t l2_offset, *l2_table, cluster_offset;
+    int l1_bits, c;
+    int index_in_cluster, nb_available, nb_needed, nb_clusters;
+
+    index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
+    nb_needed = *num + index_in_cluster;
+
+    l1_bits = s->l2_bits + s->cluster_bits;
+
+    /* compute how many bytes there are between the offset and
+     * the end of the l1 entry
+     */
+
+    nb_available = (1 << l1_bits) - (offset & ((1 << l1_bits) - 1));
+
+    /* compute the number of available sectors */
+
+    nb_available = (nb_available >> 9) + index_in_cluster;
+
+    if (nb_needed > nb_available) {
+        nb_needed = nb_available;
+    }
+
+    cluster_offset = 0;
+
+    /* seek the the l2 offset in the l1 table */
+
+    l1_index = offset >> l1_bits;
+    if (l1_index >= s->l1_size)
+        goto out;
+
+    l2_offset = s->l1_table[l1_index];
+
+    /* seek the l2 table of the given l2 offset */
+
+    if (!l2_offset)
+        goto out;
+
+    /* load the l2 table in memory */
+
+    l2_offset &= ~QCOW_OFLAG_COPIED;
+    l2_table = l2_load(bs, l2_offset);
+    if (l2_table == NULL)
+        return 0;
+
+    /* find the cluster offset for the given disk offset */
+
+    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+    cluster_offset = be64_to_cpu(l2_table[l2_index]);
+    nb_clusters = size_to_clusters(s, nb_needed << 9);
+
+    if (!cluster_offset) {
+        /* how many empty clusters ? */
+        c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+    } else {
+        /* how many allocated clusters ? */
+        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+                &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
+    }
+
+   nb_available = (c * s->cluster_sectors);
+out:
+    if (nb_available > nb_needed)
+        nb_available = nb_needed;
+
+    *num = nb_available - index_in_cluster;
+
+    return cluster_offset & ~QCOW_OFLAG_COPIED;
+}
+
+/*
+ * free_any_clusters
+ *
+ * free clusters according to its type: compressed or not
+ *
+ */
+
+static void free_any_clusters(BlockDriverState *bs,
+                              uint64_t cluster_offset, int nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+
+    /* free the cluster */
+
+    if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        int nb_csectors;
+        nb_csectors = ((cluster_offset >> s->csize_shift) &
+                       s->csize_mask) + 1;
+        free_clusters(bs, (cluster_offset & s->cluster_offset_mask) & ~511,
+                      nb_csectors * 512);
+        return;
+    }
+
+    free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits);
+
+    return;
+}
+
+/*
+ * get_cluster_table
+ *
+ * for a given disk offset, load (and allocate if needed)
+ * the l2 table.
+ *
+ * the l2 table offset in the qcow2 file and the cluster index
+ * in the l2 table are given to the caller.
+ *
+ */
+
+static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
+                             uint64_t **new_l2_table,
+                             uint64_t *new_l2_offset,
+                             int *new_l2_index)
+{
+    BDRVQcowState *s = bs->opaque;
+    int l1_index, l2_index, ret;
+    uint64_t l2_offset, *l2_table;
+
+    /* seek the the l2 offset in the l1 table */
+
+    l1_index = offset >> (s->l2_bits + s->cluster_bits);
+    if (l1_index >= s->l1_size) {
+        ret = grow_l1_table(bs, l1_index + 1);
+        if (ret < 0)
+            return 0;
+    }
+    l2_offset = s->l1_table[l1_index];
+
+    /* seek the l2 table of the given l2 offset */
+
+    if (l2_offset & QCOW_OFLAG_COPIED) {
+        /* load the l2 table in memory */
+        l2_offset &= ~QCOW_OFLAG_COPIED;
+        l2_table = l2_load(bs, l2_offset);
+        if (l2_table == NULL)
+            return 0;
+    } else {
+        if (l2_offset)
+            free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
+        l2_table = l2_allocate(bs, l1_index);
+        if (l2_table == NULL)
+            return 0;
+        l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED;
+    }
+
+    /* find the cluster offset for the given disk offset */
+
+    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+
+    *new_l2_table = l2_table;
+    *new_l2_offset = l2_offset;
+    *new_l2_index = l2_index;
+
+    return 1;
+}
+
+/*
+ * alloc_compressed_cluster_offset
+ *
+ * For a given offset of the disk image, return cluster offset in
+ * qcow2 file.
+ *
+ * If the offset is not found, allocate a new compressed cluster.
+ *
+ * Return the cluster offset if successful,
+ * Return 0, otherwise.
+ *
+ */
+
+static uint64_t alloc_compressed_cluster_offset(BlockDriverState *bs,
+                                                uint64_t offset,
+                                                int compressed_size)
+{
+    BDRVQcowState *s = bs->opaque;
+    int l2_index, ret;
+    uint64_t l2_offset, *l2_table, cluster_offset;
+    int nb_csectors;
+
+    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+    if (ret == 0)
+        return 0;
+
+    cluster_offset = be64_to_cpu(l2_table[l2_index]);
+    if (cluster_offset & QCOW_OFLAG_COPIED)
+        return cluster_offset & ~QCOW_OFLAG_COPIED;
+
+    if (cluster_offset)
+        free_any_clusters(bs, cluster_offset, 1);
+
+    cluster_offset = alloc_bytes(bs, compressed_size);
+    nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
+                  (cluster_offset >> 9);
+
+    cluster_offset |= QCOW_OFLAG_COMPRESSED |
+                      ((uint64_t)nb_csectors << s->csize_shift);
+
+    /* update L2 table */
+
+    /* compressed clusters never have the copied flag */
+
+    l2_table[l2_index] = cpu_to_be64(cluster_offset);
+    if (bdrv_pwrite(s->hd,
+                    l2_offset + l2_index * sizeof(uint64_t),
+                    l2_table + l2_index,
+                    sizeof(uint64_t)) != sizeof(uint64_t))
+        return 0;
+
+    return cluster_offset;
+}
+
+typedef struct QCowL2Meta
+{
+    uint64_t offset;
+    int n_start;
+    int nb_available;
+    int nb_clusters;
+} QCowL2Meta;
+
+static int alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
+        QCowL2Meta *m)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i, j = 0, l2_index, ret;
+    uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
+
+    if (m->nb_clusters == 0)
+        return 0;
+
+    old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t));
+
+    /* copy content of unmodified sectors */
+    start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
+    if (m->n_start) {
+        ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
+        if (ret < 0)
+            goto err;
+    }
+
+    if (m->nb_available & (s->cluster_sectors - 1)) {
+        uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
+        ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
+                m->nb_available - end, s->cluster_sectors);
+        if (ret < 0)
+            goto err;
+    }
+
+    ret = -EIO;
+    /* update L2 table */
+    if (!get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index))
+        goto err;
+
+    for (i = 0; i < m->nb_clusters; i++) {
+        /* if two concurrent writes happen to the same unallocated cluster
+        * each write allocates separate cluster and writes data concurrently.
+        * The first one to complete updates l2 table with pointer to its
+        * cluster the second one has to do RMW (which is done above by
+        * copy_sectors()), update l2 table with its cluster pointer and free
+        * old cluster. This is what this loop does */
+        if(l2_table[l2_index + i] != 0)
+            old_cluster[j++] = l2_table[l2_index + i];
+
+        l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+                    (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
+     }
+
+    if (bdrv_pwrite(s->hd, l2_offset + l2_index * sizeof(uint64_t),
+                l2_table + l2_index, m->nb_clusters * sizeof(uint64_t)) !=
+            m->nb_clusters * sizeof(uint64_t))
+        goto err;
+
+    for (i = 0; i < j; i++)
+        free_any_clusters(bs, be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED,
+                          1);
+
+    ret = 0;
+err:
+    qemu_free(old_cluster);
+    return ret;
+ }
+
+/*
+ * alloc_cluster_offset
+ *
+ * For a given offset of the disk image, return cluster offset in
+ * qcow2 file.
+ *
+ * If the offset is not found, allocate a new cluster.
+ *
+ * Return the cluster offset if successful,
+ * Return 0, otherwise.
+ *
+ */
+
+static uint64_t alloc_cluster_offset(BlockDriverState *bs,
+                                     uint64_t offset,
+                                     int n_start, int n_end,
+                                     int *num, QCowL2Meta *m)
+{
+    BDRVQcowState *s = bs->opaque;
+    int l2_index, ret;
+    uint64_t l2_offset, *l2_table, cluster_offset;
+    int nb_clusters, i = 0;
+
+    ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
+    if (ret == 0)
+        return 0;
+
+    nb_clusters = size_to_clusters(s, n_end << 9);
+
+    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+    cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+    /* We keep all QCOW_OFLAG_COPIED clusters */
+
+    if (cluster_offset & QCOW_OFLAG_COPIED) {
+        nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
+                &l2_table[l2_index], 0, 0);
+
+        cluster_offset &= ~QCOW_OFLAG_COPIED;
+        m->nb_clusters = 0;
+
+        goto out;
+    }
+
+    /* for the moment, multiple compressed clusters are not managed */
+
+    if (cluster_offset & QCOW_OFLAG_COMPRESSED)
+        nb_clusters = 1;
+
+    /* how many available clusters ? */
+
+    while (i < nb_clusters) {
+        i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
+                &l2_table[l2_index], i, 0);
+
+        if(be64_to_cpu(l2_table[l2_index + i]))
+            break;
+
+        i += count_contiguous_free_clusters(nb_clusters - i,
+                &l2_table[l2_index + i]);
+
+        cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
+
+        if ((cluster_offset & QCOW_OFLAG_COPIED) ||
+                (cluster_offset & QCOW_OFLAG_COMPRESSED))
+            break;
+    }
+    nb_clusters = i;
+
+    /* allocate a new cluster */
+
+    cluster_offset = alloc_clusters(bs, nb_clusters * s->cluster_size);
+
+    /* save info needed for meta data update */
+    m->offset = offset;
+    m->n_start = n_start;
+    m->nb_clusters = nb_clusters;
+
+out:
+    m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
+
+    *num = m->nb_available - n_start;
+
+    return cluster_offset;
+}
+
+static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
+                             int nb_sectors, int *pnum)
+{
+    uint64_t cluster_offset;
+
+    *pnum = nb_sectors;
+    cluster_offset = get_cluster_offset(bs, sector_num << 9, pnum);
+
+    return (cluster_offset != 0);
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+                             const uint8_t *buf, int buf_size)
+{
+    z_stream strm1, *strm = &strm1;
+    int ret, out_len;
+
+    memset(strm, 0, sizeof(*strm));
+
+    strm->next_in = (uint8_t *)buf;
+    strm->avail_in = buf_size;
+    strm->next_out = out_buf;
+    strm->avail_out = out_buf_size;
+
+    ret = inflateInit2(strm, -12);
+    if (ret != Z_OK)
+        return -1;
+    ret = inflate(strm, Z_FINISH);
+    out_len = strm->next_out - out_buf;
+    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+        out_len != out_buf_size) {
+        inflateEnd(strm);
+        return -1;
+    }
+    inflateEnd(strm);
+    return 0;
+}
+
+static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
+{
+    int ret, csize, nb_csectors, sector_offset;
+    uint64_t coffset;
+
+    coffset = cluster_offset & s->cluster_offset_mask;
+    if (s->cluster_cache_offset != coffset) {
+        nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
+        sector_offset = coffset & 511;
+        csize = nb_csectors * 512 - sector_offset;
+        ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors);
+        if (ret < 0) {
+            return -1;
+        }
+        if (decompress_buffer(s->cluster_cache, s->cluster_size,
+                              s->cluster_data + sector_offset, csize) < 0) {
+            return -1;
+        }
+        s->cluster_cache_offset = coffset;
+    }
+    return 0;
+}
+
+/* handle reading after the end of the backing file */
+static int backing_read1(BlockDriverState *bs,
+                         int64_t sector_num, uint8_t *buf, int nb_sectors)
+{
+    int n1;
+    if ((sector_num + nb_sectors) <= bs->total_sectors)
+        return nb_sectors;
+    if (sector_num >= bs->total_sectors)
+        n1 = 0;
+    else
+        n1 = bs->total_sectors - sector_num;
+    memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
+    return n1;
+}
+
+static int qcow_read(BlockDriverState *bs, int64_t sector_num,
+                     uint8_t *buf, int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret, index_in_cluster, n, n1;
+    uint64_t cluster_offset;
+
+    while (nb_sectors > 0) {
+        n = nb_sectors;
+        cluster_offset = get_cluster_offset(bs, sector_num << 9, &n);
+        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        if (!cluster_offset) {
+            if (bs->backing_hd) {
+                /* read from the base image */
+                n1 = backing_read1(bs->backing_hd, sector_num, buf, n);
+                if (n1 > 0) {
+                    ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
+                    if (ret < 0)
+                        return -1;
+                }
+            } else {
+                memset(buf, 0, 512 * n);
+            }
+        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+            if (decompress_cluster(s, cluster_offset) < 0)
+                return -1;
+            memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
+        } else {
+            ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
+            if (ret != n * 512)
+                return -1;
+            if (s->crypt_method) {
+                encrypt_sectors(s, sector_num, buf, buf, n, 0,
+                                &s->aes_decrypt_key);
+            }
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    return 0;
+}
+
+static int qcow_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret, index_in_cluster, n;
+    uint64_t cluster_offset;
+    int n_end;
+    QCowL2Meta l2meta;
+
+    while (nb_sectors > 0) {
+        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        n_end = index_in_cluster + nb_sectors;
+        if (s->crypt_method &&
+            n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
+            n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+        cluster_offset = alloc_cluster_offset(bs, sector_num << 9,
+                                              index_in_cluster,
+                                              n_end, &n, &l2meta);
+        if (!cluster_offset)
+            return -1;
+        if (s->crypt_method) {
+            encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
+                            &s->aes_encrypt_key);
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512,
+                              s->cluster_data, n * 512);
+        } else {
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
+        }
+        if (ret != n * 512 || alloc_cluster_link_l2(bs, cluster_offset, &l2meta) < 0) {
+            free_any_clusters(bs, cluster_offset, l2meta.nb_clusters);
+            return -1;
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    s->cluster_cache_offset = -1; /* disable compressed cache */
+    return 0;
+}
+
+typedef struct QCowAIOCB {
+    BlockDriverAIOCB common;
+    int64_t sector_num;
+    QEMUIOVector *qiov;
+    uint8_t *buf;
+    void *orig_buf;
+    int nb_sectors;
+    int n;
+    uint64_t cluster_offset;
+    uint8_t *cluster_data;
+    BlockDriverAIOCB *hd_aiocb;
+    struct iovec hd_iov;
+    QEMUIOVector hd_qiov;
+    QEMUBH *bh;
+    QCowL2Meta l2meta;
+} QCowAIOCB;
+
+static void qcow_aio_read_cb(void *opaque, int ret);
+static void qcow_aio_read_bh(void *opaque)
+{
+    QCowAIOCB *acb = opaque;
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qcow_aio_read_cb(opaque, 0);
+}
+
+static int qcow_schedule_bh(QEMUBHFunc *cb, QCowAIOCB *acb)
+{
+    if (acb->bh)
+        return -EIO;
+
+    acb->bh = qemu_bh_new(cb, acb);
+    if (!acb->bh)
+        return -EIO;
+
+    qemu_bh_schedule(acb->bh);
+
+    return 0;
+}
+
+static void qcow_aio_read_cb(void *opaque, int ret)
+{
+    QCowAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVQcowState *s = bs->opaque;
+    int index_in_cluster, n1;
+
+    acb->hd_aiocb = NULL;
+    if (ret < 0)
+        goto done;
+
+    /* post process the read buffer */
+    if (!acb->cluster_offset) {
+        /* nothing to do */
+    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* nothing to do */
+    } else {
+        if (s->crypt_method) {
+            encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
+                            acb->n, 0,
+                            &s->aes_decrypt_key);
+        }
+    }
+
+    acb->nb_sectors -= acb->n;
+    acb->sector_num += acb->n;
+    acb->buf += acb->n * 512;
+
+    if (acb->nb_sectors == 0) {
+        /* request completed */
+        ret = 0;
+        goto done;
+    }
+
+    /* prepare next AIO request */
+    acb->n = acb->nb_sectors;
+    acb->cluster_offset = get_cluster_offset(bs, acb->sector_num << 9, &acb->n);
+    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
+
+    if (!acb->cluster_offset) {
+        if (bs->backing_hd) {
+            /* read from the base image */
+            n1 = backing_read1(bs->backing_hd, acb->sector_num,
+                               acb->buf, acb->n);
+            if (n1 > 0) {
+                acb->hd_iov.iov_base = (void *)acb->buf;
+                acb->hd_iov.iov_len = acb->n * 512;
+                qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+                acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
+                                    &acb->hd_qiov, acb->n,
+                                   qcow_aio_read_cb, acb);
+                if (acb->hd_aiocb == NULL)
+                    goto done;
+            } else {
+                ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
+                if (ret < 0)
+                    goto done;
+            }
+        } else {
+            /* Note: in this case, no need to wait */
+            memset(acb->buf, 0, 512 * acb->n);
+            ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
+            if (ret < 0)
+                goto done;
+        }
+    } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* add AIO support for compressed blocks ? */
+        if (decompress_cluster(s, acb->cluster_offset) < 0)
+            goto done;
+        memcpy(acb->buf,
+               s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
+        ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
+        if (ret < 0)
+            goto done;
+    } else {
+        if ((acb->cluster_offset & 511) != 0) {
+            ret = -EIO;
+            goto done;
+        }
+
+        acb->hd_iov.iov_base = (void *)acb->buf;
+        acb->hd_iov.iov_len = acb->n * 512;
+        qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+        acb->hd_aiocb = bdrv_aio_readv(s->hd,
+                            (acb->cluster_offset >> 9) + index_in_cluster,
+                            &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
+        if (acb->hd_aiocb == NULL)
+            goto done;
+    }
+
+    return;
+done:
+    if (acb->qiov->niov > 1) {
+        qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
+        qemu_vfree(acb->orig_buf);
+    }
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
+}
+
+static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int is_write)
+{
+    QCowAIOCB *acb;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb)
+        return NULL;
+    acb->hd_aiocb = NULL;
+    acb->sector_num = sector_num;
+    acb->qiov = qiov;
+    if (qiov->niov > 1) {
+        acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
+        if (is_write)
+            qemu_iovec_to_buffer(qiov, acb->buf);
+    } else {
+        acb->buf = (uint8_t *)qiov->iov->iov_base;
+    }
+    acb->nb_sectors = nb_sectors;
+    acb->n = 0;
+    acb->cluster_offset = 0;
+    acb->l2meta.nb_clusters = 0;
+    return acb;
+}
+
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    QCowAIOCB *acb;
+
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+    if (!acb)
+        return NULL;
+
+    qcow_aio_read_cb(acb, 0);
+    return &acb->common;
+}
+
+static void qcow_aio_write_cb(void *opaque, int ret)
+{
+    QCowAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVQcowState *s = bs->opaque;
+    int index_in_cluster;
+    const uint8_t *src_buf;
+    int n_end;
+
+    acb->hd_aiocb = NULL;
+
+    if (ret < 0)
+        goto done;
+
+    if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
+        free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
+        goto done;
+    }
+
+    acb->nb_sectors -= acb->n;
+    acb->sector_num += acb->n;
+    acb->buf += acb->n * 512;
+
+    if (acb->nb_sectors == 0) {
+        /* request completed */
+        ret = 0;
+        goto done;
+    }
+
+    index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
+    n_end = index_in_cluster + acb->nb_sectors;
+    if (s->crypt_method &&
+        n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
+        n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+
+    acb->cluster_offset = alloc_cluster_offset(bs, acb->sector_num << 9,
+                                          index_in_cluster,
+                                          n_end, &acb->n, &acb->l2meta);
+    if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
+        ret = -EIO;
+        goto done;
+    }
+    if (s->crypt_method) {
+        if (!acb->cluster_data) {
+            acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
+                                             s->cluster_size);
+        }
+        encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
+                        acb->n, 1, &s->aes_encrypt_key);
+        src_buf = acb->cluster_data;
+    } else {
+        src_buf = acb->buf;
+    }
+    acb->hd_iov.iov_base = (void *)src_buf;
+    acb->hd_iov.iov_len = acb->n * 512;
+    qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+    acb->hd_aiocb = bdrv_aio_writev(s->hd,
+                                    (acb->cluster_offset >> 9) + index_in_cluster,
+                                    &acb->hd_qiov, acb->n,
+                                    qcow_aio_write_cb, acb);
+    if (acb->hd_aiocb == NULL)
+        goto done;
+
+    return;
+
+done:
+    if (acb->qiov->niov > 1)
+        qemu_vfree(acb->orig_buf);
+    acb->common.cb(acb->common.opaque, ret);
+    qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb;
+
+    s->cluster_cache_offset = -1; /* disable compressed cache */
+
+    acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+    if (!acb)
+        return NULL;
+
+    qcow_aio_write_cb(acb, 0);
+    return &acb->common;
+}
+
+static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    QCowAIOCB *acb = (QCowAIOCB *)blockacb;
+    if (acb->hd_aiocb)
+        bdrv_aio_cancel(acb->hd_aiocb);
+    qemu_aio_release(acb);
+}
+
+static void qcow_close(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    qemu_free(s->l1_table);
+    qemu_free(s->l2_cache);
+    qemu_free(s->cluster_cache);
+    qemu_free(s->cluster_data);
+    refcount_close(bs);
+    bdrv_delete(s->hd);
+}
+
+/* XXX: use std qcow open function ? */
+typedef struct QCowCreateState {
+    int cluster_size;
+    int cluster_bits;
+    uint16_t *refcount_block;
+    uint64_t *refcount_table;
+    int64_t l1_table_offset;
+    int64_t refcount_table_offset;
+    int64_t refcount_block_offset;
+} QCowCreateState;
+
+static void create_refcount_update(QCowCreateState *s,
+                                   int64_t offset, int64_t size)
+{
+    int refcount;
+    int64_t start, last, cluster_offset;
+    uint16_t *p;
+
+    start = offset & ~(s->cluster_size - 1);
+    last = (offset + size - 1)  & ~(s->cluster_size - 1);
+    for(cluster_offset = start; cluster_offset <= last;
+        cluster_offset += s->cluster_size) {
+        p = &s->refcount_block[cluster_offset >> s->cluster_bits];
+        refcount = be16_to_cpu(*p);
+        refcount++;
+        *p = cpu_to_be16(refcount);
+    }
+}
+
+static int qcow_create2(const char *filename, int64_t total_size,
+                        const char *backing_file, const char *backing_format,
+                        int flags)
+{
+
+    int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
+    int ref_clusters, backing_format_len = 0;
+    QCowHeader header;
+    uint64_t tmp, offset;
+    QCowCreateState s1, *s = &s1;
+    QCowExtension ext_bf = {0, 0};
+
+
+    memset(s, 0, sizeof(*s));
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+    if (fd < 0)
+        return -1;
+    memset(&header, 0, sizeof(header));
+    header.magic = cpu_to_be32(QCOW_MAGIC);
+    header.version = cpu_to_be32(QCOW_VERSION);
+    header.size = cpu_to_be64(total_size * 512);
+    header_size = sizeof(header);
+    backing_filename_len = 0;
+    if (backing_file) {
+        if (backing_format) {
+            ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT;
+            backing_format_len = strlen(backing_format);
+            ext_bf.len = (backing_format_len + 7) & ~7;
+            header_size += ((sizeof(ext_bf) + ext_bf.len + 7) & ~7);
+        }
+        header.backing_file_offset = cpu_to_be64(header_size);
+        backing_filename_len = strlen(backing_file);
+        header.backing_file_size = cpu_to_be32(backing_filename_len);
+        header_size += backing_filename_len;
+    }
+    s->cluster_bits = 12;  /* 4 KB clusters */
+    s->cluster_size = 1 << s->cluster_bits;
+    header.cluster_bits = cpu_to_be32(s->cluster_bits);
+    header_size = (header_size + 7) & ~7;
+    if (flags & BLOCK_FLAG_ENCRYPT) {
+        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+    } else {
+        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+    }
+    l2_bits = s->cluster_bits - 3;
+    shift = s->cluster_bits + l2_bits;
+    l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
+    offset = align_offset(header_size, s->cluster_size);
+    s->l1_table_offset = offset;
+    header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
+    header.l1_size = cpu_to_be32(l1_size);
+    offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
+
+    s->refcount_table = qemu_mallocz(s->cluster_size);
+
+    s->refcount_table_offset = offset;
+    header.refcount_table_offset = cpu_to_be64(offset);
+    header.refcount_table_clusters = cpu_to_be32(1);
+    offset += s->cluster_size;
+    s->refcount_block_offset = offset;
+
+    /* count how many refcount blocks needed */
+    tmp = offset >> s->cluster_bits;
+    ref_clusters = (tmp >> (s->cluster_bits - REFCOUNT_SHIFT)) + 1;
+    for (i=0; i < ref_clusters; i++) {
+        s->refcount_table[i] = cpu_to_be64(offset);
+        offset += s->cluster_size;
+    }
+
+    s->refcount_block = qemu_mallocz(ref_clusters * s->cluster_size);
+
+    /* update refcounts */
+    create_refcount_update(s, 0, header_size);
+    create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t));
+    create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
+    create_refcount_update(s, s->refcount_block_offset, ref_clusters * s->cluster_size);
+
+    /* write all the data */
+    write(fd, &header, sizeof(header));
+    if (backing_file) {
+        if (backing_format_len) {
+            char zero[16];
+            int d = ext_bf.len - backing_format_len;
+
+            memset(zero, 0, sizeof(zero));
+            cpu_to_be32s(&ext_bf.magic);
+            cpu_to_be32s(&ext_bf.len);
+            write(fd, &ext_bf, sizeof(ext_bf));
+            write(fd, backing_format, backing_format_len);
+            if (d>0) {
+                write(fd, zero, d);
+            }
+        }
+        write(fd, backing_file, backing_filename_len);
+    }
+    lseek(fd, s->l1_table_offset, SEEK_SET);
+    tmp = 0;
+    for(i = 0;i < l1_size; i++) {
+        write(fd, &tmp, sizeof(tmp));
+    }
+    lseek(fd, s->refcount_table_offset, SEEK_SET);
+    write(fd, s->refcount_table, s->cluster_size);
+
+    lseek(fd, s->refcount_block_offset, SEEK_SET);
+    write(fd, s->refcount_block, ref_clusters * s->cluster_size);
+
+    qemu_free(s->refcount_table);
+    qemu_free(s->refcount_block);
+    close(fd);
+    return 0;
+}
+
+static int qcow_create(const char *filename, int64_t total_size,
+                       const char *backing_file, int flags)
+{
+    return qcow_create2(filename, total_size, backing_file, NULL, flags);
+}
+
+static int qcow_make_empty(BlockDriverState *bs)
+{
+#if 0
+    /* XXX: not correct */
+    BDRVQcowState *s = bs->opaque;
+    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+    int ret;
+
+    memset(s->l1_table, 0, l1_length);
+    if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
+        return -1;
+    ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
+    if (ret < 0)
+        return ret;
+
+    l2_cache_reset(bs);
+#endif
+    return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+   tables to avoid losing bytes in alignment */
+static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
+                                 const uint8_t *buf, int nb_sectors)
+{
+    BDRVQcowState *s = bs->opaque;
+    z_stream strm;
+    int ret, out_len;
+    uint8_t *out_buf;
+    uint64_t cluster_offset;
+
+    if (nb_sectors == 0) {
+        /* align end of file to a sector boundary to ease reading with
+           sector based I/Os */
+        cluster_offset = bdrv_getlength(s->hd);
+        cluster_offset = (cluster_offset + 511) & ~511;
+        bdrv_truncate(s->hd, cluster_offset);
+        return 0;
+    }
+
+    if (nb_sectors != s->cluster_sectors)
+        return -EINVAL;
+
+    out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+
+    /* best compression, small window, no zlib header */
+    memset(&strm, 0, sizeof(strm));
+    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+                       Z_DEFLATED, -12,
+                       9, Z_DEFAULT_STRATEGY);
+    if (ret != 0) {
+        qemu_free(out_buf);
+        return -1;
+    }
+
+    strm.avail_in = s->cluster_size;
+    strm.next_in = (uint8_t *)buf;
+    strm.avail_out = s->cluster_size;
+    strm.next_out = out_buf;
+
+    ret = deflate(&strm, Z_FINISH);
+    if (ret != Z_STREAM_END && ret != Z_OK) {
+        qemu_free(out_buf);
+        deflateEnd(&strm);
+        return -1;
+    }
+    out_len = strm.next_out - out_buf;
+
+    deflateEnd(&strm);
+
+    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+        /* could not compress: write normal cluster */
+        qcow_write(bs, sector_num, buf, s->cluster_sectors);
+    } else {
+        cluster_offset = alloc_compressed_cluster_offset(bs, sector_num << 9,
+                                              out_len);
+        if (!cluster_offset)
+            return -1;
+        cluster_offset &= s->cluster_offset_mask;
+        if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
+            qemu_free(out_buf);
+            return -1;
+        }
+    }
+
+    qemu_free(out_buf);
+    return 0;
+}
+
+static void qcow_flush(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    bdrv_flush(s->hd);
+}
+
+static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+    BDRVQcowState *s = bs->opaque;
+    bdi->cluster_size = s->cluster_size;
+    bdi->vm_state_offset = (int64_t)s->l1_vm_state_index <<
+        (s->cluster_bits + s->l2_bits);
+    return 0;
+}
+
+/*********************************************************/
+/* snapshot support */
+
+/* update the refcounts of snapshots and the copied flag */
+static int update_snapshot_refcount(BlockDriverState *bs,
+                                    int64_t l1_table_offset,
+                                    int l1_size,
+                                    int addend)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
+    int64_t old_offset, old_l2_offset;
+    int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
+
+    l2_cache_reset(bs);
+
+    l2_table = NULL;
+    l1_table = NULL;
+    l1_size2 = l1_size * sizeof(uint64_t);
+    l1_allocated = 0;
+    if (l1_table_offset != s->l1_table_offset) {
+        l1_table = qemu_malloc(l1_size2);
+        l1_allocated = 1;
+        if (bdrv_pread(s->hd, l1_table_offset,
+                       l1_table, l1_size2) != l1_size2)
+            goto fail;
+        for(i = 0;i < l1_size; i++)
+            be64_to_cpus(&l1_table[i]);
+    } else {
+        assert(l1_size == s->l1_size);
+        l1_table = s->l1_table;
+        l1_allocated = 0;
+    }
+
+    l2_size = s->l2_size * sizeof(uint64_t);
+    l2_table = qemu_malloc(l2_size);
+    l1_modified = 0;
+    for(i = 0; i < l1_size; i++) {
+        l2_offset = l1_table[i];
+        if (l2_offset) {
+            old_l2_offset = l2_offset;
+            l2_offset &= ~QCOW_OFLAG_COPIED;
+            l2_modified = 0;
+            if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
+                goto fail;
+            for(j = 0; j < s->l2_size; j++) {
+                offset = be64_to_cpu(l2_table[j]);
+                if (offset != 0) {
+                    old_offset = offset;
+                    offset &= ~QCOW_OFLAG_COPIED;
+                    if (offset & QCOW_OFLAG_COMPRESSED) {
+                        nb_csectors = ((offset >> s->csize_shift) &
+                                       s->csize_mask) + 1;
+                        if (addend != 0)
+                            update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
+                                            nb_csectors * 512, addend);
+                        /* compressed clusters are never modified */
+                        refcount = 2;
+                    } else {
+                        if (addend != 0) {
+                            refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend);
+                        } else {
+                            refcount = get_refcount(bs, offset >> s->cluster_bits);
+                        }
+                    }
+
+                    if (refcount == 1) {
+                        offset |= QCOW_OFLAG_COPIED;
+                    }
+                    if (offset != old_offset) {
+                        l2_table[j] = cpu_to_be64(offset);
+                        l2_modified = 1;
+                    }
+                }
+            }
+            if (l2_modified) {
+                if (bdrv_pwrite(s->hd,
+                                l2_offset, l2_table, l2_size) != l2_size)
+                    goto fail;
+            }
+
+            if (addend != 0) {
+                refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
+            } else {
+                refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+            }
+            if (refcount == 1) {
+                l2_offset |= QCOW_OFLAG_COPIED;
+            }
+            if (l2_offset != old_l2_offset) {
+                l1_table[i] = l2_offset;
+                l1_modified = 1;
+            }
+        }
+    }
+    if (l1_modified) {
+        for(i = 0; i < l1_size; i++)
+            cpu_to_be64s(&l1_table[i]);
+        if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
+                        l1_size2) != l1_size2)
+            goto fail;
+        for(i = 0; i < l1_size; i++)
+            be64_to_cpus(&l1_table[i]);
+    }
+    if (l1_allocated)
+        qemu_free(l1_table);
+    qemu_free(l2_table);
+    return 0;
+ fail:
+    if (l1_allocated)
+        qemu_free(l1_table);
+    qemu_free(l2_table);
+    return -EIO;
+}
+
+static void qcow_free_snapshots(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i;
+
+    for(i = 0; i < s->nb_snapshots; i++) {
+        qemu_free(s->snapshots[i].name);
+        qemu_free(s->snapshots[i].id_str);
+    }
+    qemu_free(s->snapshots);
+    s->snapshots = NULL;
+    s->nb_snapshots = 0;
+}
+
+static int qcow_read_snapshots(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowSnapshotHeader h;
+    QCowSnapshot *sn;
+    int i, id_str_size, name_size;
+    int64_t offset;
+    uint32_t extra_data_size;
+
+    if (!s->nb_snapshots) {
+        s->snapshots = NULL;
+        s->snapshots_size = 0;
+        return 0;
+    }
+
+    offset = s->snapshots_offset;
+    s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot));
+    for(i = 0; i < s->nb_snapshots; i++) {
+        offset = align_offset(offset, 8);
+        if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h))
+            goto fail;
+        offset += sizeof(h);
+        sn = s->snapshots + i;
+        sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
+        sn->l1_size = be32_to_cpu(h.l1_size);
+        sn->vm_state_size = be32_to_cpu(h.vm_state_size);
+        sn->date_sec = be32_to_cpu(h.date_sec);
+        sn->date_nsec = be32_to_cpu(h.date_nsec);
+        sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
+        extra_data_size = be32_to_cpu(h.extra_data_size);
+
+        id_str_size = be16_to_cpu(h.id_str_size);
+        name_size = be16_to_cpu(h.name_size);
+
+        offset += extra_data_size;
+
+        sn->id_str = qemu_malloc(id_str_size + 1);
+        if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
+            goto fail;
+        offset += id_str_size;
+        sn->id_str[id_str_size] = '\0';
+
+        sn->name = qemu_malloc(name_size + 1);
+        if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size)
+            goto fail;
+        offset += name_size;
+        sn->name[name_size] = '\0';
+    }
+    s->snapshots_size = offset - s->snapshots_offset;
+    return 0;
+ fail:
+    qcow_free_snapshots(bs);
+    return -1;
+}
+
+/* add at the end of the file a new list of snapshots */
+static int qcow_write_snapshots(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowSnapshot *sn;
+    QCowSnapshotHeader h;
+    int i, name_size, id_str_size, snapshots_size;
+    uint64_t data64;
+    uint32_t data32;
+    int64_t offset, snapshots_offset;
+
+    /* compute the size of the snapshots */
+    offset = 0;
+    for(i = 0; i < s->nb_snapshots; i++) {
+        sn = s->snapshots + i;
+        offset = align_offset(offset, 8);
+        offset += sizeof(h);
+        offset += strlen(sn->id_str);
+        offset += strlen(sn->name);
+    }
+    snapshots_size = offset;
+
+    snapshots_offset = alloc_clusters(bs, snapshots_size);
+    offset = snapshots_offset;
+
+    for(i = 0; i < s->nb_snapshots; i++) {
+        sn = s->snapshots + i;
+        memset(&h, 0, sizeof(h));
+        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
+        h.l1_size = cpu_to_be32(sn->l1_size);
+        h.vm_state_size = cpu_to_be32(sn->vm_state_size);
+        h.date_sec = cpu_to_be32(sn->date_sec);
+        h.date_nsec = cpu_to_be32(sn->date_nsec);
+        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
+
+        id_str_size = strlen(sn->id_str);
+        name_size = strlen(sn->name);
+        h.id_str_size = cpu_to_be16(id_str_size);
+        h.name_size = cpu_to_be16(name_size);
+        offset = align_offset(offset, 8);
+        if (bdrv_pwrite(s->hd, offset, &h, sizeof(h)) != sizeof(h))
+            goto fail;
+        offset += sizeof(h);
+        if (bdrv_pwrite(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
+            goto fail;
+        offset += id_str_size;
+        if (bdrv_pwrite(s->hd, offset, sn->name, name_size) != name_size)
+            goto fail;
+        offset += name_size;
+    }
+
+    /* update the various header fields */
+    data64 = cpu_to_be64(snapshots_offset);
+    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, snapshots_offset),
+                    &data64, sizeof(data64)) != sizeof(data64))
+        goto fail;
+    data32 = cpu_to_be32(s->nb_snapshots);
+    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, nb_snapshots),
+                    &data32, sizeof(data32)) != sizeof(data32))
+        goto fail;
+
+    /* free the old snapshot table */
+    free_clusters(bs, s->snapshots_offset, s->snapshots_size);
+    s->snapshots_offset = snapshots_offset;
+    s->snapshots_size = snapshots_size;
+    return 0;
+ fail:
+    return -1;
+}
+
+static void find_new_snapshot_id(BlockDriverState *bs,
+                                 char *id_str, int id_str_size)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowSnapshot *sn;
+    int i, id, id_max = 0;
+
+    for(i = 0; i < s->nb_snapshots; i++) {
+        sn = s->snapshots + i;
+        id = strtoul(sn->id_str, NULL, 10);
+        if (id > id_max)
+            id_max = id;
+    }
+    snprintf(id_str, id_str_size, "%d", id_max + 1);
+}
+
+static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i;
+
+    for(i = 0; i < s->nb_snapshots; i++) {
+        if (!strcmp(s->snapshots[i].id_str, id_str))
+            return i;
+    }
+    return -1;
+}
+
+static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i, ret;
+
+    ret = find_snapshot_by_id(bs, name);
+    if (ret >= 0)
+        return ret;
+    for(i = 0; i < s->nb_snapshots; i++) {
+        if (!strcmp(s->snapshots[i].name, name))
+            return i;
+    }
+    return -1;
+}
+
+/* if no id is provided, a new one is constructed */
+static int qcow_snapshot_create(BlockDriverState *bs,
+                                QEMUSnapshotInfo *sn_info)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowSnapshot *snapshots1, sn1, *sn = &sn1;
+    int i, ret;
+    uint64_t *l1_table = NULL;
+
+    memset(sn, 0, sizeof(*sn));
+
+    if (sn_info->id_str[0] == '\0') {
+        /* compute a new id */
+        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+    }
+
+    /* check that the ID is unique */
+    if (find_snapshot_by_id(bs, sn_info->id_str) >= 0)
+        return -ENOENT;
+
+    sn->id_str = qemu_strdup(sn_info->id_str);
+    if (!sn->id_str)
+        goto fail;
+    sn->name = qemu_strdup(sn_info->name);
+    if (!sn->name)
+        goto fail;
+    sn->vm_state_size = sn_info->vm_state_size;
+    sn->date_sec = sn_info->date_sec;
+    sn->date_nsec = sn_info->date_nsec;
+    sn->vm_clock_nsec = sn_info->vm_clock_nsec;
+
+    ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
+    if (ret < 0)
+        goto fail;
+
+    /* create the L1 table of the snapshot */
+    sn->l1_table_offset = alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+    sn->l1_size = s->l1_size;
+
+    l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
+    for(i = 0; i < s->l1_size; i++) {
+        l1_table[i] = cpu_to_be64(s->l1_table[i]);
+    }
+    if (bdrv_pwrite(s->hd, sn->l1_table_offset,
+                    l1_table, s->l1_size * sizeof(uint64_t)) !=
+        (s->l1_size * sizeof(uint64_t)))
+        goto fail;
+    qemu_free(l1_table);
+    l1_table = NULL;
+
+    snapshots1 = qemu_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
+    if (s->snapshots) {
+        memcpy(snapshots1, s->snapshots, s->nb_snapshots * sizeof(QCowSnapshot));
+        qemu_free(s->snapshots);
+    }
+    s->snapshots = snapshots1;
+    s->snapshots[s->nb_snapshots++] = *sn;
+
+    if (qcow_write_snapshots(bs) < 0)
+        goto fail;
+#ifdef DEBUG_ALLOC
+    check_refcounts(bs);
+#endif
+    return 0;
+ fail:
+    qemu_free(sn->name);
+    qemu_free(l1_table);
+    return -1;
+}
+
+/* copy the snapshot 'snapshot_name' into the current disk image */
+static int qcow_snapshot_goto(BlockDriverState *bs,
+                              const char *snapshot_id)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowSnapshot *sn;
+    int i, snapshot_index, l1_size2;
+
+    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+    if (snapshot_index < 0)
+        return -ENOENT;
+    sn = &s->snapshots[snapshot_index];
+
+    if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0)
+        goto fail;
+
+    if (grow_l1_table(bs, sn->l1_size) < 0)
+        goto fail;
+
+    s->l1_size = sn->l1_size;
+    l1_size2 = s->l1_size * sizeof(uint64_t);
+    /* copy the snapshot l1 table to the current l1 table */
+    if (bdrv_pread(s->hd, sn->l1_table_offset,
+                   s->l1_table, l1_size2) != l1_size2)
+        goto fail;
+    if (bdrv_pwrite(s->hd, s->l1_table_offset,
+                    s->l1_table, l1_size2) != l1_size2)
+        goto fail;
+    for(i = 0;i < s->l1_size; i++) {
+        be64_to_cpus(&s->l1_table[i]);
+    }
+
+    if (update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0)
+        goto fail;
+
+#ifdef DEBUG_ALLOC
+    check_refcounts(bs);
+#endif
+    return 0;
+ fail:
+    return -EIO;
+}
+
+static int qcow_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowSnapshot *sn;
+    int snapshot_index, ret;
+
+    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+    if (snapshot_index < 0)
+        return -ENOENT;
+    sn = &s->snapshots[snapshot_index];
+
+    ret = update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1);
+    if (ret < 0)
+        return ret;
+    /* must update the copied flag on the current cluster offsets */
+    ret = update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+    if (ret < 0)
+        return ret;
+    free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t));
+
+    qemu_free(sn->id_str);
+    qemu_free(sn->name);
+    memmove(sn, sn + 1, (s->nb_snapshots - snapshot_index - 1) * sizeof(*sn));
+    s->nb_snapshots--;
+    ret = qcow_write_snapshots(bs);
+    if (ret < 0) {
+        /* XXX: restore snapshot if error ? */
+        return ret;
+    }
+#ifdef DEBUG_ALLOC
+    check_refcounts(bs);
+#endif
+    return 0;
+}
+
+static int qcow_snapshot_list(BlockDriverState *bs,
+                              QEMUSnapshotInfo **psn_tab)
+{
+    BDRVQcowState *s = bs->opaque;
+    QEMUSnapshotInfo *sn_tab, *sn_info;
+    QCowSnapshot *sn;
+    int i;
+
+    sn_tab = qemu_mallocz(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
+    for(i = 0; i < s->nb_snapshots; i++) {
+        sn_info = sn_tab + i;
+        sn = s->snapshots + i;
+        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
+                sn->id_str);
+        pstrcpy(sn_info->name, sizeof(sn_info->name),
+                sn->name);
+        sn_info->vm_state_size = sn->vm_state_size;
+        sn_info->date_sec = sn->date_sec;
+        sn_info->date_nsec = sn->date_nsec;
+        sn_info->vm_clock_nsec = sn->vm_clock_nsec;
+    }
+    *psn_tab = sn_tab;
+    return s->nb_snapshots;
+}
+
+/*********************************************************/
+/* refcount handling */
+
+static int refcount_init(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret, refcount_table_size2, i;
+
+    s->refcount_block_cache = qemu_malloc(s->cluster_size);
+    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
+    s->refcount_table = qemu_malloc(refcount_table_size2);
+    if (s->refcount_table_size > 0) {
+        ret = bdrv_pread(s->hd, s->refcount_table_offset,
+                         s->refcount_table, refcount_table_size2);
+        if (ret != refcount_table_size2)
+            goto fail;
+        for(i = 0; i < s->refcount_table_size; i++)
+            be64_to_cpus(&s->refcount_table[i]);
+    }
+    return 0;
+ fail:
+    return -ENOMEM;
+}
+
+static void refcount_close(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    qemu_free(s->refcount_block_cache);
+    qemu_free(s->refcount_table);
+}
+
+
+static int load_refcount_block(BlockDriverState *bs,
+                               int64_t refcount_block_offset)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+    ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
+                     s->cluster_size);
+    if (ret != s->cluster_size)
+        return -EIO;
+    s->refcount_block_cache_offset = refcount_block_offset;
+    return 0;
+}
+
+static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
+{
+    BDRVQcowState *s = bs->opaque;
+    int refcount_table_index, block_index;
+    int64_t refcount_block_offset;
+
+    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+    if (refcount_table_index >= s->refcount_table_size)
+        return 0;
+    refcount_block_offset = s->refcount_table[refcount_table_index];
+    if (!refcount_block_offset)
+        return 0;
+    if (refcount_block_offset != s->refcount_block_cache_offset) {
+        /* better than nothing: return allocated if read error */
+        if (load_refcount_block(bs, refcount_block_offset) < 0)
+            return 1;
+    }
+    block_index = cluster_index &
+        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+    return be16_to_cpu(s->refcount_block_cache[block_index]);
+}
+
+/* return < 0 if error */
+static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i, nb_clusters;
+
+    nb_clusters = size_to_clusters(s, size);
+retry:
+    for(i = 0; i < nb_clusters; i++) {
+        int64_t i = s->free_cluster_index++;
+        if (get_refcount(bs, i) != 0)
+            goto retry;
+    }
+#ifdef DEBUG_ALLOC2
+    printf("alloc_clusters: size=%lld -> %lld\n",
+            size,
+            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
+#endif
+    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
+}
+
+static int64_t alloc_clusters(BlockDriverState *bs, int64_t size)
+{
+    int64_t offset;
+
+    offset = alloc_clusters_noref(bs, size);
+    update_refcount(bs, offset, size, 1);
+    return offset;
+}
+
+/* only used to allocate compressed sectors. We try to allocate
+   contiguous sectors. size must be <= cluster_size */
+static int64_t alloc_bytes(BlockDriverState *bs, int size)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t offset, cluster_offset;
+    int free_in_cluster;
+
+    assert(size > 0 && size <= s->cluster_size);
+    if (s->free_byte_offset == 0) {
+        s->free_byte_offset = alloc_clusters(bs, s->cluster_size);
+    }
+ redo:
+    free_in_cluster = s->cluster_size -
+        (s->free_byte_offset & (s->cluster_size - 1));
+    if (size <= free_in_cluster) {
+        /* enough space in current cluster */
+        offset = s->free_byte_offset;
+        s->free_byte_offset += size;
+        free_in_cluster -= size;
+        if (free_in_cluster == 0)
+            s->free_byte_offset = 0;
+        if ((offset & (s->cluster_size - 1)) != 0)
+            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
+    } else {
+        offset = alloc_clusters(bs, s->cluster_size);
+        cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
+        if ((cluster_offset + s->cluster_size) == offset) {
+            /* we are lucky: contiguous data */
+            offset = s->free_byte_offset;
+            update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
+            s->free_byte_offset += size;
+        } else {
+            s->free_byte_offset = offset;
+            goto redo;
+        }
+    }
+    return offset;
+}
+
+static void free_clusters(BlockDriverState *bs,
+                          int64_t offset, int64_t size)
+{
+    update_refcount(bs, offset, size, -1);
+}
+
+static int grow_refcount_table(BlockDriverState *bs, int min_size)
+{
+    BDRVQcowState *s = bs->opaque;
+    int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
+    uint64_t *new_table;
+    int64_t table_offset;
+    uint8_t data[12];
+    int old_table_size;
+    int64_t old_table_offset;
+
+    if (min_size <= s->refcount_table_size)
+        return 0;
+    /* compute new table size */
+    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
+    for(;;) {
+        if (refcount_table_clusters == 0) {
+            refcount_table_clusters = 1;
+        } else {
+            refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
+        }
+        new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
+        if (min_size <= new_table_size)
+            break;
+    }
+#ifdef DEBUG_ALLOC2
+    printf("grow_refcount_table from %d to %d\n",
+           s->refcount_table_size,
+           new_table_size);
+#endif
+    new_table_size2 = new_table_size * sizeof(uint64_t);
+    new_table = qemu_mallocz(new_table_size2);
+    memcpy(new_table, s->refcount_table,
+           s->refcount_table_size * sizeof(uint64_t));
+    for(i = 0; i < s->refcount_table_size; i++)
+        cpu_to_be64s(&new_table[i]);
+    /* Note: we cannot update the refcount now to avoid recursion */
+    table_offset = alloc_clusters_noref(bs, new_table_size2);
+    ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
+    if (ret != new_table_size2)
+        goto fail;
+    for(i = 0; i < s->refcount_table_size; i++)
+        be64_to_cpus(&new_table[i]);
+
+    cpu_to_be64w((uint64_t*)data, table_offset);
+    cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
+    if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
+                    data, sizeof(data)) != sizeof(data))
+        goto fail;
+    qemu_free(s->refcount_table);
+    old_table_offset = s->refcount_table_offset;
+    old_table_size = s->refcount_table_size;
+    s->refcount_table = new_table;
+    s->refcount_table_size = new_table_size;
+    s->refcount_table_offset = table_offset;
+
+    update_refcount(bs, table_offset, new_table_size2, 1);
+    free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
+    return 0;
+ fail:
+    free_clusters(bs, table_offset, new_table_size2);
+    qemu_free(new_table);
+    return -EIO;
+}
+
+/* addend must be 1 or -1 */
+/* XXX: cache several refcount block clusters ? */
+static int update_cluster_refcount(BlockDriverState *bs,
+                                   int64_t cluster_index,
+                                   int addend)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t offset, refcount_block_offset;
+    int ret, refcount_table_index, block_index, refcount;
+    uint64_t data64;
+
+    refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+    if (refcount_table_index >= s->refcount_table_size) {
+        if (addend < 0)
+            return -EINVAL;
+        ret = grow_refcount_table(bs, refcount_table_index + 1);
+        if (ret < 0)
+            return ret;
+    }
+    refcount_block_offset = s->refcount_table[refcount_table_index];
+    if (!refcount_block_offset) {
+        if (addend < 0)
+            return -EINVAL;
+        /* create a new refcount block */
+        /* Note: we cannot update the refcount now to avoid recursion */
+        offset = alloc_clusters_noref(bs, s->cluster_size);
+        memset(s->refcount_block_cache, 0, s->cluster_size);
+        ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
+        if (ret != s->cluster_size)
+            return -EINVAL;
+        s->refcount_table[refcount_table_index] = offset;
+        data64 = cpu_to_be64(offset);
+        ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
+                          refcount_table_index * sizeof(uint64_t),
+                          &data64, sizeof(data64));
+        if (ret != sizeof(data64))
+            return -EINVAL;
+
+        refcount_block_offset = offset;
+        s->refcount_block_cache_offset = offset;
+        update_refcount(bs, offset, s->cluster_size, 1);
+    } else {
+        if (refcount_block_offset != s->refcount_block_cache_offset) {
+            if (load_refcount_block(bs, refcount_block_offset) < 0)
+                return -EIO;
+        }
+    }
+    /* we can update the count and save it */
+    block_index = cluster_index &
+        ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+    refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
+    refcount += addend;
+    if (refcount < 0 || refcount > 0xffff)
+        return -EINVAL;
+    if (refcount == 0 && cluster_index < s->free_cluster_index) {
+        s->free_cluster_index = cluster_index;
+    }
+    s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
+    if (bdrv_pwrite(s->hd,
+                    refcount_block_offset + (block_index << REFCOUNT_SHIFT),
+                    &s->refcount_block_cache[block_index], 2) != 2)
+        return -EIO;
+    return refcount;
+}
+
+static void update_refcount(BlockDriverState *bs,
+                            int64_t offset, int64_t length,
+                            int addend)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t start, last, cluster_offset;
+
+#ifdef DEBUG_ALLOC2
+    printf("update_refcount: offset=%lld size=%lld addend=%d\n",
+           offset, length, addend);
+#endif
+    if (length <= 0)
+        return;
+    start = offset & ~(s->cluster_size - 1);
+    last = (offset + length - 1) & ~(s->cluster_size - 1);
+    for(cluster_offset = start; cluster_offset <= last;
+        cluster_offset += s->cluster_size) {
+        update_cluster_refcount(bs, cluster_offset >> s->cluster_bits, addend);
+    }
+}
+
+/*
+ * Increases the refcount for a range of clusters in a given refcount table.
+ * This is used to construct a temporary refcount table out of L1 and L2 tables
+ * which can be compared the the refcount table saved in the image.
+ *
+ * Returns the number of errors in the image that were found
+ */
+static int inc_refcounts(BlockDriverState *bs,
+                          uint16_t *refcount_table,
+                          int refcount_table_size,
+                          int64_t offset, int64_t size)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t start, last, cluster_offset;
+    int k;
+    int errors = 0;
+
+    if (size <= 0)
+        return 0;
+
+    start = offset & ~(s->cluster_size - 1);
+    last = (offset + size - 1) & ~(s->cluster_size - 1);
+    for(cluster_offset = start; cluster_offset <= last;
+        cluster_offset += s->cluster_size) {
+        k = cluster_offset >> s->cluster_bits;
+        if (k < 0 || k >= refcount_table_size) {
+            fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
+                cluster_offset);
+            errors++;
+        } else {
+            if (++refcount_table[k] == 0) {
+                fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
+                    "\n", cluster_offset);
+                errors++;
+            }
+        }
+    }
+
+    return errors;
+}
+
+/*
+ * Increases the refcount in the given refcount table for the all clusters
+ * referenced in the L2 table. While doing so, performs some checks on L2
+ * entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l2(BlockDriverState *bs,
+    uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
+    int check_copied)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t *l2_table, offset;
+    int i, l2_size, nb_csectors, refcount;
+    int errors = 0;
+
+    /* Read L2 table from disk */
+    l2_size = s->l2_size * sizeof(uint64_t);
+    l2_table = qemu_malloc(l2_size);
+
+    if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
+        goto fail;
+
+    /* Do the actual checks */
+    for(i = 0; i < s->l2_size; i++) {
+        offset = be64_to_cpu(l2_table[i]);
+        if (offset != 0) {
+            if (offset & QCOW_OFLAG_COMPRESSED) {
+                /* Compressed clusters don't have QCOW_OFLAG_COPIED */
+                if (offset & QCOW_OFLAG_COPIED) {
+                    fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+                        "copied flag must never be set for compressed "
+                        "clusters\n", offset >> s->cluster_bits);
+                    offset &= ~QCOW_OFLAG_COPIED;
+                    errors++;
+                }
+
+                /* Mark cluster as used */
+                nb_csectors = ((offset >> s->csize_shift) &
+                               s->csize_mask) + 1;
+                offset &= s->cluster_offset_mask;
+                errors += inc_refcounts(bs, refcount_table,
+                              refcount_table_size,
+                              offset & ~511, nb_csectors * 512);
+            } else {
+                /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+                if (check_copied) {
+                    uint64_t entry = offset;
+                    offset &= ~QCOW_OFLAG_COPIED;
+                    refcount = get_refcount(bs, offset >> s->cluster_bits);
+                    if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
+                        fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+                            PRIx64 " refcount=%d\n", entry, refcount);
+                        errors++;
+                    }
+                }
+
+                /* Mark cluster as used */
+                offset &= ~QCOW_OFLAG_COPIED;
+                errors += inc_refcounts(bs, refcount_table,
+                              refcount_table_size,
+                              offset, s->cluster_size);
+
+                /* Correct offsets are cluster aligned */
+                if (offset & (s->cluster_size - 1)) {
+                    fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
+                        "properly aligned; L2 entry corrupted.\n", offset);
+                    errors++;
+                }
+            }
+        }
+    }
+
+    qemu_free(l2_table);
+    return errors;
+
+fail:
+    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+    qemu_free(l2_table);
+    return -EIO;
+}
+
+/*
+ * Increases the refcount for the L1 table, its L2 tables and all referenced
+ * clusters in the given refcount table. While doing so, performs some checks
+ * on L1 and L2 entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l1(BlockDriverState *bs,
+                              uint16_t *refcount_table,
+                              int refcount_table_size,
+                              int64_t l1_table_offset, int l1_size,
+                              int check_copied)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t *l1_table, l2_offset, l1_size2;
+    int i, refcount, ret;
+    int errors = 0;
+
+    l1_size2 = l1_size * sizeof(uint64_t);
+
+    /* Mark L1 table as used */
+    errors += inc_refcounts(bs, refcount_table, refcount_table_size,
+                  l1_table_offset, l1_size2);
+
+    /* Read L1 table entries from disk */
+    l1_table = qemu_malloc(l1_size2);
+    if (bdrv_pread(s->hd, l1_table_offset,
+                   l1_table, l1_size2) != l1_size2)
+        goto fail;
+    for(i = 0;i < l1_size; i++)
+        be64_to_cpus(&l1_table[i]);
+
+    /* Do the actual checks */
+    for(i = 0; i < l1_size; i++) {
+        l2_offset = l1_table[i];
+        if (l2_offset) {
+            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+            if (check_copied) {
+                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
+                    >> s->cluster_bits);
+                if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
+                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
+                        " refcount=%d\n", l2_offset, refcount);
+                    errors++;
+                }
+            }
+
+            /* Mark L2 table as used */
+            l2_offset &= ~QCOW_OFLAG_COPIED;
+            errors += inc_refcounts(bs, refcount_table,
+                          refcount_table_size,
+                          l2_offset,
+                          s->cluster_size);
+
+            /* L2 tables are cluster aligned */
+            if (l2_offset & (s->cluster_size - 1)) {
+                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+                    "cluster aligned; L1 entry corrupted\n", l2_offset);
+                errors++;
+            }
+
+            /* Process and check L2 entries */
+            ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
+                l2_offset, check_copied);
+            if (ret < 0) {
+                goto fail;
+            }
+            errors += ret;
+        }
+    }
+    qemu_free(l1_table);
+    return errors;
+
+fail:
+    fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+    qemu_free(l1_table);
+    return -EIO;
+}
+
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occured.
+ */
+static int check_refcounts(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t size;
+    int nb_clusters, refcount1, refcount2, i;
+    QCowSnapshot *sn;
+    uint16_t *refcount_table;
+    int ret, errors = 0;
+
+    size = bdrv_getlength(s->hd);
+    nb_clusters = size_to_clusters(s, size);
+    refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
+
+    /* header */
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
+                  0, s->cluster_size);
+
+    /* current L1 table */
+    ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
+                       s->l1_table_offset, s->l1_size, 1);
+    if (ret < 0) {
+        return ret;
+    }
+    errors += ret;
+
+    /* snapshots */
+    for(i = 0; i < s->nb_snapshots; i++) {
+        sn = s->snapshots + i;
+        check_refcounts_l1(bs, refcount_table, nb_clusters,
+                           sn->l1_table_offset, sn->l1_size, 0);
+    }
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
+                  s->snapshots_offset, s->snapshots_size);
+
+    /* refcount data */
+    errors += inc_refcounts(bs, refcount_table, nb_clusters,
+                  s->refcount_table_offset,
+                  s->refcount_table_size * sizeof(uint64_t));
+    for(i = 0; i < s->refcount_table_size; i++) {
+        int64_t offset;
+        offset = s->refcount_table[i];
+        if (offset != 0) {
+            errors += inc_refcounts(bs, refcount_table, nb_clusters,
+                          offset, s->cluster_size);
+        }
+    }
+
+    /* compare ref counts */
+    for(i = 0; i < nb_clusters; i++) {
+        refcount1 = get_refcount(bs, i);
+        refcount2 = refcount_table[i];
+        if (refcount1 != refcount2) {
+            fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
+                   i, refcount1, refcount2);
+            errors++;
+        }
+    }
+
+    qemu_free(refcount_table);
+
+    return errors;
+}
+
+static int qcow_check(BlockDriverState *bs)
+{
+    return check_refcounts(bs);
+}
+
+#if 0
+static void dump_refcounts(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    int64_t nb_clusters, k, k1, size;
+    int refcount;
+
+    size = bdrv_getlength(s->hd);
+    nb_clusters = size_to_clusters(s, size);
+    for(k = 0; k < nb_clusters;) {
+        k1 = k;
+        refcount = get_refcount(bs, k);
+        k++;
+        while (k < nb_clusters && get_refcount(bs, k) == refcount)
+            k++;
+        printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1);
+    }
+}
+#endif
+
+static int qcow_put_buffer(BlockDriverState *bs, const uint8_t *buf,
+                           int64_t pos, int size)
+{
+    int growable = bs->growable;
+
+    bs->growable = 1;
+    bdrv_pwrite(bs, pos, buf, size);
+    bs->growable = growable;
+
+    return size;
+}
+
+static int qcow_get_buffer(BlockDriverState *bs, uint8_t *buf,
+                           int64_t pos, int size)
+{
+    int growable = bs->growable;
+    int ret;
+
+    bs->growable = 1;
+    ret = bdrv_pread(bs, pos, buf, size);
+    bs->growable = growable;
+
+    return ret;
+}
+
+static BlockDriver bdrv_qcow2 = {
+    .format_name       = "qcow2",
+    .instance_size     = sizeof(BDRVQcowState),
+    .bdrv_probe                = qcow_probe,
+    .bdrv_open         = qcow_open,
+    .bdrv_close                = qcow_close,
+    .bdrv_create       = qcow_create,
+    .bdrv_flush                = qcow_flush,
+    .bdrv_is_allocated = qcow_is_allocated,
+    .bdrv_set_key      = qcow_set_key,
+    .bdrv_make_empty   = qcow_make_empty,
+
+    .bdrv_aio_readv    = qcow_aio_readv,
+    .bdrv_aio_writev   = qcow_aio_writev,
+    .bdrv_aio_cancel   = qcow_aio_cancel,
+    .aiocb_size                = sizeof(QCowAIOCB),
+    .bdrv_write_compressed = qcow_write_compressed,
+
+    .bdrv_snapshot_create = qcow_snapshot_create,
+    .bdrv_snapshot_goto        = qcow_snapshot_goto,
+    .bdrv_snapshot_delete = qcow_snapshot_delete,
+    .bdrv_snapshot_list        = qcow_snapshot_list,
+    .bdrv_get_info     = qcow_get_info,
+
+    .bdrv_put_buffer    = qcow_put_buffer,
+    .bdrv_get_buffer    = qcow_get_buffer,
+
+    .bdrv_create2 = qcow_create2,
+    .bdrv_check = qcow_check,
+};
+
+static void bdrv_qcow2_init(void)
+{
+    bdrv_register(&bdrv_qcow2);
+}
+
+block_init(bdrv_qcow2_init);
diff --git a/block/raw-posix.c b/block/raw-posix.c
new file mode 100644 (file)
index 0000000..f3a9476
--- /dev/null
@@ -0,0 +1,1438 @@
+/*
+ * Block driver for RAW files (posix)
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu-timer.h"
+#include "qemu-char.h"
+#include "block_int.h"
+#include "module.h"
+#ifdef CONFIG_AIO
+#include "posix-aio-compat.h"
+#endif
+
+#ifdef CONFIG_COCOA
+#include <paths.h>
+#include <sys/param.h>
+#include <IOKit/IOKitLib.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/storage/IOMediaBSDClient.h>
+#include <IOKit/storage/IOMedia.h>
+#include <IOKit/storage/IOCDMedia.h>
+//#include <IOKit/storage/IOCDTypes.h>
+#include <CoreFoundation/CoreFoundation.h>
+#endif
+
+#ifdef __sun__
+#define _POSIX_PTHREAD_SEMANTICS 1
+#include <signal.h>
+#include <sys/dkio.h>
+#endif
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/cdrom.h>
+#include <linux/fd.h>
+#endif
+#ifdef __FreeBSD__
+#include <signal.h>
+#include <sys/disk.h>
+#include <sys/cdio.h>
+#endif
+
+#ifdef __OpenBSD__
+#include <sys/ioctl.h>
+#include <sys/disklabel.h>
+#include <sys/dkio.h>
+#endif
+
+#ifdef __DragonFly__
+#include <sys/ioctl.h>
+#include <sys/diskslice.h>
+#endif
+
+//#define DEBUG_FLOPPY
+
+//#define DEBUG_BLOCK
+#if defined(DEBUG_BLOCK)
+#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
+    { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
+#else
+#define DEBUG_BLOCK_PRINT(formatCstr, ...)
+#endif
+
+/* OS X does not have O_DSYNC */
+#ifndef O_DSYNC
+#define O_DSYNC O_SYNC
+#endif
+
+/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
+#ifndef O_DIRECT
+#define O_DIRECT O_DSYNC
+#endif
+
+#define FTYPE_FILE   0
+#define FTYPE_CD     1
+#define FTYPE_FD     2
+
+#define ALIGNED_BUFFER_SIZE (32 * 512)
+
+/* if the FD is not accessed during that time (in ms), we try to
+   reopen it to see if the disk has been changed */
+#define FD_OPEN_TIMEOUT 1000
+
+typedef struct BDRVRawState {
+    int fd;
+    int type;
+    unsigned int lseek_err_cnt;
+#if defined(__linux__)
+    /* linux floppy specific */
+    int fd_open_flags;
+    int64_t fd_open_time;
+    int64_t fd_error_time;
+    int fd_got_error;
+    int fd_media_changed;
+#endif
+#if defined(__FreeBSD__)
+    int cd_open_flags;
+#endif
+    uint8_t* aligned_buf;
+} BDRVRawState;
+
+static int posix_aio_init(void);
+
+static int fd_open(BlockDriverState *bs);
+
+#if defined(__FreeBSD__)
+static int cd_open(BlockDriverState *bs);
+#endif
+
+static int raw_is_inserted(BlockDriverState *bs);
+
+static int raw_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVRawState *s = bs->opaque;
+    int fd, open_flags, ret;
+
+    posix_aio_init();
+
+    s->lseek_err_cnt = 0;
+
+    open_flags = O_BINARY;
+    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+        bs->read_only = 1;
+    }
+    if (flags & BDRV_O_CREAT)
+        open_flags |= O_CREAT | O_TRUNC;
+
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((flags & BDRV_O_NOCACHE))
+        open_flags |= O_DIRECT;
+    else if (!(flags & BDRV_O_CACHE_WB))
+        open_flags |= O_DSYNC;
+
+    s->type = FTYPE_FILE;
+
+    fd = open(filename, open_flags, 0644);
+    if (fd < 0) {
+        ret = -errno;
+        if (ret == -EROFS)
+            ret = -EACCES;
+        return ret;
+    }
+    s->fd = fd;
+    s->aligned_buf = NULL;
+    if ((flags & BDRV_O_NOCACHE)) {
+        s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
+        if (s->aligned_buf == NULL) {
+            ret = -errno;
+            close(fd);
+            return ret;
+        }
+    }
+    return 0;
+}
+
+/* XXX: use host sector size if necessary with:
+#ifdef DIOCGSECTORSIZE
+        {
+            unsigned int sectorsize = 512;
+            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
+                sectorsize > bufsize)
+                bufsize = sectorsize;
+        }
+#endif
+#ifdef CONFIG_COCOA
+        u_int32_t   blockSize = 512;
+        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
+            bufsize = blockSize;
+        }
+#endif
+*/
+
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
+                     uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    ret = fd_open(bs);
+    if (ret < 0)
+        return ret;
+
+    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
+        ++(s->lseek_err_cnt);
+        if(s->lseek_err_cnt <= 10) {
+            DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
+                              "] lseek failed : %d = %s\n",
+                              s->fd, bs->filename, offset, buf, count,
+                              bs->total_sectors, errno, strerror(errno));
+        }
+        return -1;
+    }
+    s->lseek_err_cnt=0;
+
+    ret = read(s->fd, buf, count);
+    if (ret == count)
+        goto label__raw_read__success;
+
+    DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
+                      "] read failed %d : %d = %s\n",
+                      s->fd, bs->filename, offset, buf, count,
+                      bs->total_sectors, ret, errno, strerror(errno));
+
+    /* Try harder for CDrom. */
+    if (bs->type == BDRV_TYPE_CDROM) {
+        lseek(s->fd, offset, SEEK_SET);
+        ret = read(s->fd, buf, count);
+        if (ret == count)
+            goto label__raw_read__success;
+        lseek(s->fd, offset, SEEK_SET);
+        ret = read(s->fd, buf, count);
+        if (ret == count)
+            goto label__raw_read__success;
+
+        DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
+                          "] retry read failed %d : %d = %s\n",
+                          s->fd, bs->filename, offset, buf, count,
+                          bs->total_sectors, ret, errno, strerror(errno));
+    }
+
+label__raw_read__success:
+
+    return ret;
+}
+
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
+                      const uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    ret = fd_open(bs);
+    if (ret < 0)
+        return -errno;
+
+    if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
+        ++(s->lseek_err_cnt);
+        if(s->lseek_err_cnt) {
+            DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
+                              PRId64 "] lseek failed : %d = %s\n",
+                              s->fd, bs->filename, offset, buf, count,
+                              bs->total_sectors, errno, strerror(errno));
+        }
+        return -EIO;
+    }
+    s->lseek_err_cnt = 0;
+
+    ret = write(s->fd, buf, count);
+    if (ret == count)
+        goto label__raw_write__success;
+
+    DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
+                      "] write failed %d : %d = %s\n",
+                      s->fd, bs->filename, offset, buf, count,
+                      bs->total_sectors, ret, errno, strerror(errno));
+
+label__raw_write__success:
+
+    return  (ret < 0) ? -errno : ret;
+}
+
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+                     uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int size, ret, shift, sum;
+
+    sum = 0;
+
+    if (s->aligned_buf != NULL)  {
+
+        if (offset & 0x1ff) {
+            /* align offset on a 512 bytes boundary */
+
+            shift = offset & 0x1ff;
+            size = (shift + count + 0x1ff) & ~0x1ff;
+            if (size > ALIGNED_BUFFER_SIZE)
+                size = ALIGNED_BUFFER_SIZE;
+            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
+            if (ret < 0)
+                return ret;
+
+            size = 512 - shift;
+            if (size > count)
+                size = count;
+            memcpy(buf, s->aligned_buf + shift, size);
+
+            buf += size;
+            offset += size;
+            count -= size;
+            sum += size;
+
+            if (count == 0)
+                return sum;
+        }
+        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+            /* read on aligned buffer */
+
+            while (count) {
+
+                size = (count + 0x1ff) & ~0x1ff;
+                if (size > ALIGNED_BUFFER_SIZE)
+                    size = ALIGNED_BUFFER_SIZE;
+
+                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
+                if (ret < 0)
+                    return ret;
+
+                size = ret;
+                if (size > count)
+                    size = count;
+
+                memcpy(buf, s->aligned_buf, size);
+
+                buf += size;
+                offset += size;
+                count -= size;
+                sum += size;
+            }
+
+            return sum;
+        }
+    }
+
+    return raw_pread_aligned(bs, offset, buf, count) + sum;
+}
+
+static int raw_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    int ret;
+
+    ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512);
+    if (ret == (nb_sectors * 512))
+        ret = 0;
+    return ret;
+}
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+                      const uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int size, ret, shift, sum;
+
+    sum = 0;
+
+    if (s->aligned_buf != NULL) {
+
+        if (offset & 0x1ff) {
+            /* align offset on a 512 bytes boundary */
+            shift = offset & 0x1ff;
+            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
+            if (ret < 0)
+                return ret;
+
+            size = 512 - shift;
+            if (size > count)
+                size = count;
+            memcpy(s->aligned_buf + shift, buf, size);
+
+            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
+            if (ret < 0)
+                return ret;
+
+            buf += size;
+            offset += size;
+            count -= size;
+            sum += size;
+
+            if (count == 0)
+                return sum;
+        }
+        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+            while ((size = (count & ~0x1ff)) != 0) {
+
+                if (size > ALIGNED_BUFFER_SIZE)
+                    size = ALIGNED_BUFFER_SIZE;
+
+                memcpy(s->aligned_buf, buf, size);
+
+                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
+                if (ret < 0)
+                    return ret;
+
+                buf += ret;
+                offset += ret;
+                count -= ret;
+                sum += ret;
+            }
+            /* here, count < 512 because (count & ~0x1ff) == 0 */
+            if (count) {
+                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
+                if (ret < 0)
+                    return ret;
+                 memcpy(s->aligned_buf, buf, count);
+
+                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
+                 if (ret < 0)
+                     return ret;
+                 if (count < ret)
+                     ret = count;
+
+                 sum += ret;
+            }
+            return sum;
+        }
+    }
+    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
+}
+
+static int raw_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    int ret;
+    ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512);
+    if (ret == (nb_sectors * 512))
+        ret = 0;
+    return ret;
+}
+
+#ifdef CONFIG_AIO
+/***********************************************************/
+/* Unix AIO using POSIX AIO */
+
+typedef struct RawAIOCB {
+    BlockDriverAIOCB common;
+    struct qemu_paiocb aiocb;
+    struct RawAIOCB *next;
+    int ret;
+} RawAIOCB;
+
+typedef struct PosixAioState
+{
+    int rfd, wfd;
+    RawAIOCB *first_aio;
+} PosixAioState;
+
+static void posix_aio_read(void *opaque)
+{
+    PosixAioState *s = opaque;
+    RawAIOCB *acb, **pacb;
+    int ret;
+    ssize_t len;
+
+    /* read all bytes from signal pipe */
+    for (;;) {
+        char bytes[16];
+
+        len = read(s->rfd, bytes, sizeof(bytes));
+        if (len == -1 && errno == EINTR)
+            continue; /* try again */
+        if (len == sizeof(bytes))
+            continue; /* more to read */
+        break;
+    }
+
+    for(;;) {
+        pacb = &s->first_aio;
+        for(;;) {
+            acb = *pacb;
+            if (!acb)
+                goto the_end;
+            ret = qemu_paio_error(&acb->aiocb);
+            if (ret == ECANCELED) {
+                /* remove the request */
+                *pacb = acb->next;
+                qemu_aio_release(acb);
+            } else if (ret != EINPROGRESS) {
+                /* end of aio */
+                if (ret == 0) {
+                    ret = qemu_paio_return(&acb->aiocb);
+                    if (ret == acb->aiocb.aio_nbytes)
+                        ret = 0;
+                    else
+                        ret = -EINVAL;
+                } else {
+                    ret = -ret;
+                }
+                /* remove the request */
+                *pacb = acb->next;
+                /* call the callback */
+                acb->common.cb(acb->common.opaque, ret);
+                qemu_aio_release(acb);
+                break;
+            } else {
+                pacb = &acb->next;
+            }
+        }
+    }
+ the_end: ;
+}
+
+static int posix_aio_flush(void *opaque)
+{
+    PosixAioState *s = opaque;
+    return !!s->first_aio;
+}
+
+static PosixAioState *posix_aio_state;
+
+static void aio_signal_handler(int signum)
+{
+    if (posix_aio_state) {
+        char byte = 0;
+
+        write(posix_aio_state->wfd, &byte, sizeof(byte));
+    }
+
+    qemu_service_io();
+}
+
+static int posix_aio_init(void)
+{
+    struct sigaction act;
+    PosixAioState *s;
+    int fds[2];
+    struct qemu_paioinit ai;
+  
+    if (posix_aio_state)
+        return 0;
+
+    s = qemu_malloc(sizeof(PosixAioState));
+
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
+    act.sa_handler = aio_signal_handler;
+    sigaction(SIGUSR2, &act, NULL);
+
+    s->first_aio = NULL;
+    if (pipe(fds) == -1) {
+        fprintf(stderr, "failed to create pipe\n");
+        return -errno;
+    }
+
+    s->rfd = fds[0];
+    s->wfd = fds[1];
+
+    fcntl(s->rfd, F_SETFL, O_NONBLOCK);
+    fcntl(s->wfd, F_SETFL, O_NONBLOCK);
+
+    qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
+
+    memset(&ai, 0, sizeof(ai));
+    ai.aio_threads = 64;
+    ai.aio_num = 64;
+    qemu_paio_init(&ai);
+
+    posix_aio_state = s;
+
+    return 0;
+}
+
+static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
+        QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+    RawAIOCB *acb;
+
+    if (fd_open(bs) < 0)
+        return NULL;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb)
+        return NULL;
+    acb->aiocb.aio_fildes = s->fd;
+    acb->aiocb.ev_signo = SIGUSR2;
+    acb->aiocb.aio_iov = qiov->iov;
+    acb->aiocb.aio_niov = qiov->niov;
+    acb->aiocb.aio_nbytes = nb_sectors * 512;
+    acb->aiocb.aio_offset = sector_num * 512;
+    acb->aiocb.aio_flags = 0;
+
+    /*
+     * If O_DIRECT is used the buffer needs to be aligned on a sector
+     * boundary. Tell the low level code to ensure that in case it's
+     * not done yet.
+     */
+    if (s->aligned_buf)
+        acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
+
+    acb->next = posix_aio_state->first_aio;
+    posix_aio_state->first_aio = acb;
+    return acb;
+}
+
+static void raw_aio_remove(RawAIOCB *acb)
+{
+    RawAIOCB **pacb;
+
+    /* remove the callback from the queue */
+    pacb = &posix_aio_state->first_aio;
+    for(;;) {
+        if (*pacb == NULL) {
+            fprintf(stderr, "raw_aio_remove: aio request not found!\n");
+            break;
+        } else if (*pacb == acb) {
+            *pacb = acb->next;
+            qemu_aio_release(acb);
+            break;
+        }
+        pacb = &(*pacb)->next;
+    }
+}
+
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawAIOCB *acb;
+
+    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
+    if (!acb)
+        return NULL;
+    if (qemu_paio_read(&acb->aiocb) < 0) {
+        raw_aio_remove(acb);
+        return NULL;
+    }
+    return &acb->common;
+}
+
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    RawAIOCB *acb;
+
+    acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
+    if (!acb)
+        return NULL;
+    if (qemu_paio_write(&acb->aiocb) < 0) {
+        raw_aio_remove(acb);
+        return NULL;
+    }
+    return &acb->common;
+}
+
+static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    int ret;
+    RawAIOCB *acb = (RawAIOCB *)blockacb;
+
+    ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
+    if (ret == QEMU_PAIO_NOTCANCELED) {
+        /* fail safe: if the aio could not be canceled, we wait for
+           it */
+        while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
+    }
+
+    raw_aio_remove(acb);
+}
+#else /* CONFIG_AIO */
+static int posix_aio_init(void)
+{
+    return 0;
+}
+#endif /* CONFIG_AIO */
+
+
+static void raw_close(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    if (s->fd >= 0) {
+        close(s->fd);
+        s->fd = -1;
+        if (s->aligned_buf != NULL)
+            qemu_free(s->aligned_buf);
+    }
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
+{
+    BDRVRawState *s = bs->opaque;
+    if (s->type != FTYPE_FILE)
+        return -ENOTSUP;
+    if (ftruncate(s->fd, offset) < 0)
+        return -errno;
+    return 0;
+}
+
+#ifdef __OpenBSD__
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    int fd = s->fd;
+    struct stat st;
+
+    if (fstat(fd, &st))
+        return -1;
+    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+        struct disklabel dl;
+
+        if (ioctl(fd, DIOCGDINFO, &dl))
+            return -1;
+        return (uint64_t)dl.d_secsize *
+            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
+    } else
+        return st.st_size;
+}
+#else /* !__OpenBSD__ */
+static int64_t  raw_getlength(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    int fd = s->fd;
+    int64_t size;
+#ifdef HOST_BSD
+    struct stat sb;
+#ifdef __FreeBSD__
+    int reopened = 0;
+#endif
+#endif
+#ifdef __sun__
+    struct dk_minfo minfo;
+    int rv;
+#endif
+    int ret;
+
+    ret = fd_open(bs);
+    if (ret < 0)
+        return ret;
+
+#ifdef HOST_BSD
+#ifdef __FreeBSD__
+again:
+#endif
+    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
+#ifdef DIOCGMEDIASIZE
+       if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
+#elif defined(DIOCGPART)
+        {
+                struct partinfo pi;
+                if (ioctl(fd, DIOCGPART, &pi) == 0)
+                        size = pi.media_size;
+                else
+                        size = 0;
+        }
+        if (size == 0)
+#endif
+#ifdef CONFIG_COCOA
+        size = LONG_LONG_MAX;
+#else
+        size = lseek(fd, 0LL, SEEK_END);
+#endif
+#ifdef __FreeBSD__
+        switch(s->type) {
+        case FTYPE_CD:
+            /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
+            if (size == 2048LL * (unsigned)-1)
+                size = 0;
+            /* XXX no disc?  maybe we need to reopen... */
+            if (size <= 0 && !reopened && cd_open(bs) >= 0) {
+                reopened = 1;
+                goto again;
+            }
+        }
+#endif
+    } else
+#endif
+#ifdef __sun__
+    /*
+     * use the DKIOCGMEDIAINFO ioctl to read the size.
+     */
+    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
+    if ( rv != -1 ) {
+        size = minfo.dki_lbsize * minfo.dki_capacity;
+    } else /* there are reports that lseek on some devices
+              fails, but irc discussion said that contingency
+              on contingency was overkill */
+#endif
+    {
+        size = lseek(fd, 0, SEEK_END);
+    }
+    return size;
+}
+#endif
+
+static int raw_create(const char *filename, int64_t total_size,
+                      const char *backing_file, int flags)
+{
+    int fd;
+
+    if (flags || backing_file)
+        return -ENOTSUP;
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+              0644);
+    if (fd < 0)
+        return -EIO;
+    ftruncate(fd, total_size * 512);
+    close(fd);
+    return 0;
+}
+
+static void raw_flush(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    fsync(s->fd);
+}
+
+static BlockDriver bdrv_raw = {
+    .format_name = "raw",
+    .instance_size = sizeof(BDRVRawState),
+    .bdrv_probe = NULL, /* no probe for protocols */
+    .bdrv_open = raw_open,
+    .bdrv_read = raw_read,
+    .bdrv_write = raw_write,
+    .bdrv_close = raw_close,
+    .bdrv_create = raw_create,
+    .bdrv_flush = raw_flush,
+
+#ifdef CONFIG_AIO
+    .bdrv_aio_readv = raw_aio_readv,
+    .bdrv_aio_writev = raw_aio_writev,
+    .bdrv_aio_cancel = raw_aio_cancel,
+    .aiocb_size = sizeof(RawAIOCB),
+#endif
+
+    .bdrv_truncate = raw_truncate,
+    .bdrv_getlength = raw_getlength,
+};
+
+/***********************************************/
+/* host device */
+
+#ifdef CONFIG_COCOA
+static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
+static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
+
+kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
+{
+    kern_return_t       kernResult;
+    mach_port_t     masterPort;
+    CFMutableDictionaryRef  classesToMatch;
+
+    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
+    if ( KERN_SUCCESS != kernResult ) {
+        printf( "IOMasterPort returned %d\n", kernResult );
+    }
+
+    classesToMatch = IOServiceMatching( kIOCDMediaClass );
+    if ( classesToMatch == NULL ) {
+        printf( "IOServiceMatching returned a NULL dictionary.\n" );
+    } else {
+    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
+    }
+    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
+    if ( KERN_SUCCESS != kernResult )
+    {
+        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
+    }
+
+    return kernResult;
+}
+
+kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
+{
+    io_object_t     nextMedia;
+    kern_return_t   kernResult = KERN_FAILURE;
+    *bsdPath = '\0';
+    nextMedia = IOIteratorNext( mediaIterator );
+    if ( nextMedia )
+    {
+        CFTypeRef   bsdPathAsCFString;
+    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
+        if ( bsdPathAsCFString ) {
+            size_t devPathLength;
+            strcpy( bsdPath, _PATH_DEV );
+            strcat( bsdPath, "r" );
+            devPathLength = strlen( bsdPath );
+            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
+                kernResult = KERN_SUCCESS;
+            }
+            CFRelease( bsdPathAsCFString );
+        }
+        IOObjectRelease( nextMedia );
+    }
+
+    return kernResult;
+}
+
+#endif
+
+static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVRawState *s = bs->opaque;
+    int fd, open_flags, ret;
+
+    posix_aio_init();
+
+#ifdef CONFIG_COCOA
+    if (strstart(filename, "/dev/cdrom", NULL)) {
+        kern_return_t kernResult;
+        io_iterator_t mediaIterator;
+        char bsdPath[ MAXPATHLEN ];
+        int fd;
+
+        kernResult = FindEjectableCDMedia( &mediaIterator );
+        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
+
+        if ( bsdPath[ 0 ] != '\0' ) {
+            strcat(bsdPath,"s0");
+            /* some CDs don't have a partition 0 */
+            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
+            if (fd < 0) {
+                bsdPath[strlen(bsdPath)-1] = '1';
+            } else {
+                close(fd);
+            }
+            filename = bsdPath;
+        }
+
+        if ( mediaIterator )
+            IOObjectRelease( mediaIterator );
+    }
+#endif
+    open_flags = O_BINARY;
+    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+        bs->read_only = 1;
+    }
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((flags & BDRV_O_NOCACHE))
+        open_flags |= O_DIRECT;
+    else if (!(flags & BDRV_O_CACHE_WB))
+        open_flags |= O_DSYNC;
+
+    s->type = FTYPE_FILE;
+#if defined(__linux__)
+    if (strstart(filename, "/dev/cd", NULL)) {
+        /* open will not fail even if no CD is inserted */
+        open_flags |= O_NONBLOCK;
+        s->type = FTYPE_CD;
+    } else if (strstart(filename, "/dev/fd", NULL)) {
+        s->type = FTYPE_FD;
+        s->fd_open_flags = open_flags;
+        /* open will not fail even if no floppy is inserted */
+        open_flags |= O_NONBLOCK;
+#ifdef CONFIG_AIO
+    } else if (strstart(filename, "/dev/sg", NULL)) {
+        bs->sg = 1;
+#endif
+    }
+#endif
+#if defined(__FreeBSD__)
+    if (strstart(filename, "/dev/cd", NULL) ||
+        strstart(filename, "/dev/acd", NULL)) {
+        s->type = FTYPE_CD;
+        s->cd_open_flags = open_flags;
+    }
+#endif
+    s->fd = -1;
+    fd = open(filename, open_flags, 0644);
+    if (fd < 0) {
+        ret = -errno;
+        if (ret == -EROFS)
+            ret = -EACCES;
+        return ret;
+    }
+    s->fd = fd;
+#if defined(__FreeBSD__)
+    /* make sure the door isnt locked at this time */
+    if (s->type == FTYPE_CD)
+        ioctl (s->fd, CDIOCALLOW);
+#endif
+#if defined(__linux__)
+    /* close fd so that we can reopen it as needed */
+    if (s->type == FTYPE_FD) {
+        close(s->fd);
+        s->fd = -1;
+        s->fd_media_changed = 1;
+    }
+#endif
+    return 0;
+}
+
+#if defined(__linux__)
+/* Note: we do not have a reliable method to detect if the floppy is
+   present. The current method is to try to open the floppy at every
+   I/O and to keep it opened during a few hundreds of ms. */
+static int fd_open(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    int last_media_present;
+
+    if (s->type != FTYPE_FD)
+        return 0;
+    last_media_present = (s->fd >= 0);
+    if (s->fd >= 0 &&
+        (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
+        close(s->fd);
+        s->fd = -1;
+#ifdef DEBUG_FLOPPY
+        printf("Floppy closed\n");
+#endif
+    }
+    if (s->fd < 0) {
+        if (s->fd_got_error &&
+            (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
+#ifdef DEBUG_FLOPPY
+            printf("No floppy (open delayed)\n");
+#endif
+            return -EIO;
+        }
+        s->fd = open(bs->filename, s->fd_open_flags);
+        if (s->fd < 0) {
+            s->fd_error_time = qemu_get_clock(rt_clock);
+            s->fd_got_error = 1;
+            if (last_media_present)
+                s->fd_media_changed = 1;
+#ifdef DEBUG_FLOPPY
+            printf("No floppy\n");
+#endif
+            return -EIO;
+        }
+#ifdef DEBUG_FLOPPY
+        printf("Floppy opened\n");
+#endif
+    }
+    if (!last_media_present)
+        s->fd_media_changed = 1;
+    s->fd_open_time = qemu_get_clock(rt_clock);
+    s->fd_got_error = 0;
+    return 0;
+}
+
+static int raw_is_inserted(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    switch(s->type) {
+    case FTYPE_CD:
+        ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
+        if (ret == CDS_DISC_OK)
+            return 1;
+        else
+            return 0;
+        break;
+    case FTYPE_FD:
+        ret = fd_open(bs);
+        return (ret >= 0);
+    default:
+        return 1;
+    }
+}
+
+/* currently only used by fdc.c, but a CD version would be good too */
+static int raw_media_changed(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    switch(s->type) {
+    case FTYPE_FD:
+        {
+            int ret;
+            /* XXX: we do not have a true media changed indication. It
+               does not work if the floppy is changed without trying
+               to read it */
+            fd_open(bs);
+            ret = s->fd_media_changed;
+            s->fd_media_changed = 0;
+#ifdef DEBUG_FLOPPY
+            printf("Floppy changed=%d\n", ret);
+#endif
+            return ret;
+        }
+    default:
+        return -ENOTSUP;
+    }
+}
+
+static int raw_eject(BlockDriverState *bs, int eject_flag)
+{
+    BDRVRawState *s = bs->opaque;
+
+    switch(s->type) {
+    case FTYPE_CD:
+        if (eject_flag) {
+            if (ioctl (s->fd, CDROMEJECT, NULL) < 0)
+                perror("CDROMEJECT");
+        } else {
+            if (ioctl (s->fd, CDROMCLOSETRAY, NULL) < 0)
+                perror("CDROMEJECT");
+        }
+        break;
+    case FTYPE_FD:
+        {
+            int fd;
+            if (s->fd >= 0) {
+                close(s->fd);
+                s->fd = -1;
+            }
+            fd = open(bs->filename, s->fd_open_flags | O_NONBLOCK);
+            if (fd >= 0) {
+                if (ioctl(fd, FDEJECT, 0) < 0)
+                    perror("FDEJECT");
+                close(fd);
+            }
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static int raw_set_locked(BlockDriverState *bs, int locked)
+{
+    BDRVRawState *s = bs->opaque;
+
+    switch(s->type) {
+    case FTYPE_CD:
+        if (ioctl (s->fd, CDROM_LOCKDOOR, locked) < 0) {
+            /* Note: an error can happen if the distribution automatically
+               mounts the CD-ROM */
+            //        perror("CDROM_LOCKDOOR");
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+    BDRVRawState *s = bs->opaque;
+
+    return ioctl(s->fd, req, buf);
+}
+
+#ifdef CONFIG_AIO
+static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    BDRVRawState *s = bs->opaque;
+    RawAIOCB *acb;
+
+    if (fd_open(bs) < 0)
+        return NULL;
+
+    acb = qemu_aio_get(bs, cb, opaque);
+    if (!acb)
+        return NULL;
+    acb->aiocb.aio_fildes = s->fd;
+    acb->aiocb.ev_signo = SIGUSR2;
+    acb->aiocb.aio_offset = 0;
+    acb->aiocb.aio_flags = 0;
+
+    acb->next = posix_aio_state->first_aio;
+    posix_aio_state->first_aio = acb;
+
+    acb->aiocb.aio_ioctl_buf = buf;
+    acb->aiocb.aio_ioctl_cmd = req;
+    if (qemu_paio_ioctl(&acb->aiocb) < 0) {
+        raw_aio_remove(acb);
+        return NULL;
+    }
+
+    return &acb->common;
+}
+#endif
+
+#elif defined(__FreeBSD__)
+
+static int fd_open(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    /* this is just to ensure s->fd is sane (its called by io ops) */
+    if (s->fd >= 0)
+        return 0;
+    return -EIO;
+}
+
+static int cd_open(BlockDriverState *bs)
+{
+#if defined(__FreeBSD__)
+    BDRVRawState *s = bs->opaque;
+    int fd;
+
+    switch(s->type) {
+    case FTYPE_CD:
+        /* XXX force reread of possibly changed/newly loaded disc,
+         * FreeBSD seems to not notice sometimes... */
+        if (s->fd >= 0)
+            close (s->fd);
+        fd = open(bs->filename, s->cd_open_flags, 0644);
+        if (fd < 0) {
+            s->fd = -1;
+            return -EIO;
+        }
+        s->fd = fd;
+        /* make sure the door isnt locked at this time */
+        ioctl (s->fd, CDIOCALLOW);
+    }
+#endif
+    return 0;
+}
+
+static int raw_is_inserted(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    switch(s->type) {
+    case FTYPE_CD:
+        return (raw_getlength(bs) > 0);
+    case FTYPE_FD:
+        /* XXX handle this */
+        /* FALLTHRU */
+    default:
+        return 1;
+    }
+}
+
+static int raw_media_changed(BlockDriverState *bs)
+{
+    return -ENOTSUP;
+}
+
+static int raw_eject(BlockDriverState *bs, int eject_flag)
+{
+    BDRVRawState *s = bs->opaque;
+
+    switch(s->type) {
+    case FTYPE_CD:
+        if (s->fd < 0)
+            return -ENOTSUP;
+        (void) ioctl (s->fd, CDIOCALLOW);
+        if (eject_flag) {
+            if (ioctl (s->fd, CDIOCEJECT) < 0)
+                perror("CDIOCEJECT");
+        } else {
+            if (ioctl (s->fd, CDIOCCLOSE) < 0)
+                perror("CDIOCCLOSE");
+        }
+        if (cd_open(bs) < 0)
+            return -ENOTSUP;
+        break;
+    case FTYPE_FD:
+        /* XXX handle this */
+        /* FALLTHRU */
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static int raw_set_locked(BlockDriverState *bs, int locked)
+{
+    BDRVRawState *s = bs->opaque;
+
+    switch(s->type) {
+    case FTYPE_CD:
+        if (s->fd < 0)
+            return -ENOTSUP;
+        if (ioctl (s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
+            /* Note: an error can happen if the distribution automatically
+               mounts the CD-ROM */
+            //        perror("CDROM_LOCKDOOR");
+        }
+        break;
+    default:
+        return -ENOTSUP;
+    }
+    return 0;
+}
+
+static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+    return -ENOTSUP;
+}
+#else /* !linux && !FreeBSD */
+
+static int fd_open(BlockDriverState *bs)
+{
+    return 0;
+}
+
+static int raw_is_inserted(BlockDriverState *bs)
+{
+    return 1;
+}
+
+static int raw_media_changed(BlockDriverState *bs)
+{
+    return -ENOTSUP;
+}
+
+static int raw_eject(BlockDriverState *bs, int eject_flag)
+{
+    return -ENOTSUP;
+}
+
+static int raw_set_locked(BlockDriverState *bs, int locked)
+{
+    return -ENOTSUP;
+}
+
+static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+    return -ENOTSUP;
+}
+
+static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+        unsigned long int req, void *buf,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return NULL;
+}
+#endif /* !linux && !FreeBSD */
+
+#if defined(__linux__) || defined(__FreeBSD__)
+static int hdev_create(const char *filename, int64_t total_size,
+                       const char *backing_file, int flags)
+{
+    int fd;
+    int ret = 0;
+    struct stat stat_buf;
+
+    if (flags || backing_file)
+        return -ENOTSUP;
+
+    fd = open(filename, O_WRONLY | O_BINARY);
+    if (fd < 0)
+        return -EIO;
+
+    if (fstat(fd, &stat_buf) < 0)
+        ret = -EIO;
+    else if (!S_ISBLK(stat_buf.st_mode))
+        ret = -EIO;
+    else if (lseek(fd, 0, SEEK_END) < total_size * 512)
+        ret = -ENOSPC;
+
+    close(fd);
+    return ret;
+}
+
+#else  /* !(linux || freebsd) */
+
+static int hdev_create(const char *filename, int64_t total_size,
+                       const char *backing_file, int flags)
+{
+    return -ENOTSUP;
+}
+#endif
+
+static BlockDriver bdrv_host_device = {
+    .format_name       = "host_device",
+    .instance_size     = sizeof(BDRVRawState),
+    .bdrv_open         = hdev_open,
+    .bdrv_close                = raw_close,
+    .bdrv_create        = hdev_create,
+    .bdrv_flush                = raw_flush,
+
+#ifdef CONFIG_AIO
+    .bdrv_aio_readv    = raw_aio_readv,
+    .bdrv_aio_writev   = raw_aio_writev,
+    .bdrv_aio_cancel   = raw_aio_cancel,
+    .aiocb_size                = sizeof(RawAIOCB),
+#endif
+
+    .bdrv_read          = raw_read,
+    .bdrv_write         = raw_write,
+    .bdrv_getlength    = raw_getlength,
+
+    /* removable device support */
+    .bdrv_is_inserted  = raw_is_inserted,
+    .bdrv_media_changed        = raw_media_changed,
+    .bdrv_eject                = raw_eject,
+    .bdrv_set_locked   = raw_set_locked,
+    /* generic scsi device */
+    .bdrv_ioctl                = raw_ioctl,
+#ifdef CONFIG_AIO
+    .bdrv_aio_ioctl    = raw_aio_ioctl,
+#endif
+};
+
+static void bdrv_raw_init(void)
+{
+    bdrv_register(&bdrv_raw);
+    bdrv_register(&bdrv_host_device);
+}
+
+block_init(bdrv_raw_init);
diff --git a/block/raw-win32.c b/block/raw-win32.c
new file mode 100644 (file)
index 0000000..ab3abd6
--- /dev/null
@@ -0,0 +1,394 @@
+/*
+ * Block driver for RAW files (win32)
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu-timer.h"
+#include "block_int.h"
+#include "module.h"
+#include <windows.h>
+#include <winioctl.h>
+
+#define FTYPE_FILE 0
+#define FTYPE_CD     1
+#define FTYPE_HARDDISK 2
+
+typedef struct BDRVRawState {
+    HANDLE hfile;
+    int type;
+    char drive_path[16]; /* format: "d:\" */
+} BDRVRawState;
+
+int qemu_ftruncate64(int fd, int64_t length)
+{
+    LARGE_INTEGER li;
+    LONG high;
+    HANDLE h;
+    BOOL res;
+
+    if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0)
+       return -1;
+
+    h = (HANDLE)_get_osfhandle(fd);
+
+    /* get current position, ftruncate do not change position */
+    li.HighPart = 0;
+    li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT);
+    if (li.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR)
+       return -1;
+
+    high = length >> 32;
+    if (!SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN))
+       return -1;
+    res = SetEndOfFile(h);
+
+    /* back to old position */
+    SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN);
+    return res ? 0 : -1;
+}
+
+static int set_sparse(int fd)
+{
+    DWORD returned;
+    return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE,
+                                NULL, 0, NULL, 0, &returned, NULL);
+}
+
+static int raw_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVRawState *s = bs->opaque;
+    int access_flags, create_flags;
+    DWORD overlapped;
+
+    s->type = FTYPE_FILE;
+
+    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+        access_flags = GENERIC_READ | GENERIC_WRITE;
+    } else {
+        access_flags = GENERIC_READ;
+    }
+    if (flags & BDRV_O_CREAT) {
+        create_flags = CREATE_ALWAYS;
+    } else {
+        create_flags = OPEN_EXISTING;
+    }
+    overlapped = FILE_ATTRIBUTE_NORMAL;
+    if ((flags & BDRV_O_NOCACHE))
+        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+    else if (!(flags & BDRV_O_CACHE_WB))
+        overlapped |= FILE_FLAG_WRITE_THROUGH;
+    s->hfile = CreateFile(filename, access_flags,
+                          FILE_SHARE_READ, NULL,
+                          create_flags, overlapped, NULL);
+    if (s->hfile == INVALID_HANDLE_VALUE) {
+        int err = GetLastError();
+
+        if (err == ERROR_ACCESS_DENIED)
+            return -EACCES;
+        return -1;
+    }
+    return 0;
+}
+
+static int raw_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVRawState *s = bs->opaque;
+    OVERLAPPED ov;
+    DWORD ret_count;
+    int ret;
+    int64_t offset = sector_num * 512;
+    int count = nb_sectors * 512;
+
+    memset(&ov, 0, sizeof(ov));
+    ov.Offset = offset;
+    ov.OffsetHigh = offset >> 32;
+    ret = ReadFile(s->hfile, buf, count, &ret_count, &ov);
+    if (!ret)
+        return ret_count;
+    if (ret_count == count)
+        ret_count = 0;
+    return ret_count;
+}
+
+static int raw_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVRawState *s = bs->opaque;
+    OVERLAPPED ov;
+    DWORD ret_count;
+    int ret;
+    int64_t offset = sector_num * 512;
+    int count = nb_sectors * 512;
+
+    memset(&ov, 0, sizeof(ov));
+    ov.Offset = offset;
+    ov.OffsetHigh = offset >> 32;
+    ret = WriteFile(s->hfile, buf, count, &ret_count, &ov);
+    if (!ret)
+        return ret_count;
+    if (ret_count == count)
+        ret_count = 0;
+    return ret_count;
+}
+
+static void raw_flush(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    FlushFileBuffers(s->hfile);
+}
+
+static void raw_close(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    CloseHandle(s->hfile);
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
+{
+    BDRVRawState *s = bs->opaque;
+    LONG low, high;
+
+    low = offset;
+    high = offset >> 32;
+    if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN))
+       return -EIO;
+    if (!SetEndOfFile(s->hfile))
+        return -EIO;
+    return 0;
+}
+
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    LARGE_INTEGER l;
+    ULARGE_INTEGER available, total, total_free;
+    DISK_GEOMETRY_EX dg;
+    DWORD count;
+    BOOL status;
+
+    switch(s->type) {
+    case FTYPE_FILE:
+        l.LowPart = GetFileSize(s->hfile, (PDWORD)&l.HighPart);
+        if (l.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR)
+            return -EIO;
+        break;
+    case FTYPE_CD:
+        if (!GetDiskFreeSpaceEx(s->drive_path, &available, &total, &total_free))
+            return -EIO;
+        l.QuadPart = total.QuadPart;
+        break;
+    case FTYPE_HARDDISK:
+        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
+        if (status != 0) {
+            l = dg.DiskSize;
+        }
+        break;
+    default:
+        return -EIO;
+    }
+    return l.QuadPart;
+}
+
+static int raw_create(const char *filename, int64_t total_size,
+                      const char *backing_file, int flags)
+{
+    int fd;
+
+    if (flags || backing_file)
+        return -ENOTSUP;
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+              0644);
+    if (fd < 0)
+        return -EIO;
+    set_sparse(fd);
+    ftruncate(fd, total_size * 512);
+    close(fd);
+    return 0;
+}
+
+static BlockDriver bdrv_raw = {
+    .format_name       = "raw",
+    .instance_size     = sizeof(BDRVRawState),
+    .bdrv_open         = raw_open,
+    .bdrv_close                = raw_close,
+    .bdrv_create       = raw_create,
+    .bdrv_flush                = raw_flush,
+    .bdrv_read         = raw_read,
+    .bdrv_write                = raw_write,
+    .bdrv_truncate     = raw_truncate,
+    .bdrv_getlength    = raw_getlength,
+};
+
+/***********************************************/
+/* host device */
+
+static int find_cdrom(char *cdrom_name, int cdrom_name_size)
+{
+    char drives[256], *pdrv = drives;
+    UINT type;
+
+    memset(drives, 0, sizeof(drives));
+    GetLogicalDriveStrings(sizeof(drives), drives);
+    while(pdrv[0] != '\0') {
+        type = GetDriveType(pdrv);
+        switch(type) {
+        case DRIVE_CDROM:
+            snprintf(cdrom_name, cdrom_name_size, "\\\\.\\%c:", pdrv[0]);
+            return 0;
+            break;
+        }
+        pdrv += lstrlen(pdrv) + 1;
+    }
+    return -1;
+}
+
+static int find_device_type(BlockDriverState *bs, const char *filename)
+{
+    BDRVRawState *s = bs->opaque;
+    UINT type;
+    const char *p;
+
+    if (strstart(filename, "\\\\.\\", &p) ||
+        strstart(filename, "//./", &p)) {
+        if (stristart(p, "PhysicalDrive", NULL))
+            return FTYPE_HARDDISK;
+        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", p[0]);
+        type = GetDriveType(s->drive_path);
+        switch (type) {
+        case DRIVE_REMOVABLE:
+        case DRIVE_FIXED:
+            return FTYPE_HARDDISK;
+        case DRIVE_CDROM:
+            return FTYPE_CD;
+        default:
+            return FTYPE_FILE;
+        }
+    } else {
+        return FTYPE_FILE;
+    }
+}
+
+static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVRawState *s = bs->opaque;
+    int access_flags, create_flags;
+    DWORD overlapped;
+    char device_name[64];
+
+    if (strstart(filename, "/dev/cdrom", NULL)) {
+        if (find_cdrom(device_name, sizeof(device_name)) < 0)
+            return -ENOENT;
+        filename = device_name;
+    } else {
+        /* transform drive letters into device name */
+        if (((filename[0] >= 'a' && filename[0] <= 'z') ||
+             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
+            filename[1] == ':' && filename[2] == '\0') {
+            snprintf(device_name, sizeof(device_name), "\\\\.\\%c:", filename[0]);
+            filename = device_name;
+        }
+    }
+    s->type = find_device_type(bs, filename);
+
+    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+        access_flags = GENERIC_READ | GENERIC_WRITE;
+    } else {
+        access_flags = GENERIC_READ;
+    }
+    create_flags = OPEN_EXISTING;
+
+    overlapped = FILE_ATTRIBUTE_NORMAL;
+    if ((flags & BDRV_O_NOCACHE))
+        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+    else if (!(flags & BDRV_O_CACHE_WB))
+        overlapped |= FILE_FLAG_WRITE_THROUGH;
+    s->hfile = CreateFile(filename, access_flags,
+                          FILE_SHARE_READ, NULL,
+                          create_flags, overlapped, NULL);
+    if (s->hfile == INVALID_HANDLE_VALUE) {
+        int err = GetLastError();
+
+        if (err == ERROR_ACCESS_DENIED)
+            return -EACCES;
+        return -1;
+    }
+    return 0;
+}
+
+#if 0
+/***********************************************/
+/* removable device additional commands */
+
+static int raw_is_inserted(BlockDriverState *bs)
+{
+    return 1;
+}
+
+static int raw_media_changed(BlockDriverState *bs)
+{
+    return -ENOTSUP;
+}
+
+static int raw_eject(BlockDriverState *bs, int eject_flag)
+{
+    DWORD ret_count;
+
+    if (s->type == FTYPE_FILE)
+        return -ENOTSUP;
+    if (eject_flag) {
+        DeviceIoControl(s->hfile, IOCTL_STORAGE_EJECT_MEDIA,
+                        NULL, 0, NULL, 0, &lpBytesReturned, NULL);
+    } else {
+        DeviceIoControl(s->hfile, IOCTL_STORAGE_LOAD_MEDIA,
+                        NULL, 0, NULL, 0, &lpBytesReturned, NULL);
+    }
+}
+
+static int raw_set_locked(BlockDriverState *bs, int locked)
+{
+    return -ENOTSUP;
+}
+#endif
+
+static BlockDriver bdrv_host_device = {
+    .format_name       = "host_device",
+    .instance_size     = sizeof(BDRVRawState),
+    .bdrv_open         = hdev_open,
+    .bdrv_close                = raw_close,
+    .bdrv_flush                = raw_flush,
+
+    .bdrv_read         = raw_read,
+    .bdrv_write                = raw_write,
+    .bdrv_getlength    = raw_getlength,
+};
+
+static void bdrv_raw_init(void)
+{
+    bdrv_register(&bdrv_raw);
+    bdrv_register(&bdrv_host_device);
+    return 0;
+}
+
+block_init(bdrv_raw_init);
diff --git a/block/vmdk.c b/block/vmdk.c
new file mode 100644 (file)
index 0000000..13866e9
--- /dev/null
@@ -0,0 +1,833 @@
+/*
+ * Block driver for the VMDK format
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2005 Filip Navara
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
+#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
+
+typedef struct {
+    uint32_t version;
+    uint32_t flags;
+    uint32_t disk_sectors;
+    uint32_t granularity;
+    uint32_t l1dir_offset;
+    uint32_t l1dir_size;
+    uint32_t file_sectors;
+    uint32_t cylinders;
+    uint32_t heads;
+    uint32_t sectors_per_track;
+} VMDK3Header;
+
+typedef struct {
+    uint32_t version;
+    uint32_t flags;
+    int64_t capacity;
+    int64_t granularity;
+    int64_t desc_offset;
+    int64_t desc_size;
+    int32_t num_gtes_per_gte;
+    int64_t rgd_offset;
+    int64_t gd_offset;
+    int64_t grain_offset;
+    char filler[1];
+    char check_bytes[4];
+} __attribute__((packed)) VMDK4Header;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct BDRVVmdkState {
+    BlockDriverState *hd;
+    int64_t l1_table_offset;
+    int64_t l1_backup_table_offset;
+    uint32_t *l1_table;
+    uint32_t *l1_backup_table;
+    unsigned int l1_size;
+    uint32_t l1_entry_sectors;
+
+    unsigned int l2_size;
+    uint32_t *l2_cache;
+    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
+    uint32_t l2_cache_counts[L2_CACHE_SIZE];
+
+    unsigned int cluster_sectors;
+    uint32_t parent_cid;
+    int is_parent;
+} BDRVVmdkState;
+
+typedef struct VmdkMetaData {
+    uint32_t offset;
+    unsigned int l1_index;
+    unsigned int l2_index;
+    unsigned int l2_offset;
+    int valid;
+} VmdkMetaData;
+
+typedef struct ActiveBDRVState{
+    BlockDriverState *hd;            // active image handler
+    uint64_t cluster_offset;         // current write offset
+}ActiveBDRVState;
+
+static ActiveBDRVState activeBDRV;
+
+
+static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    uint32_t magic;
+
+    if (buf_size < 4)
+        return 0;
+    magic = be32_to_cpu(*(uint32_t *)buf);
+    if (magic == VMDK3_MAGIC ||
+        magic == VMDK4_MAGIC)
+        return 100;
+    else
+        return 0;
+}
+
+#define CHECK_CID 1
+
+#define SECTOR_SIZE 512
+#define DESC_SIZE 20*SECTOR_SIZE       // 20 sectors of 512 bytes each
+#define HEADER_SIZE 512                        // first sector of 512 bytes
+
+static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
+{
+    BDRVVmdkState *s = bs->opaque;
+    char desc[DESC_SIZE];
+    uint32_t cid;
+    const char *p_name, *cid_str;
+    size_t cid_str_size;
+
+    /* the descriptor offset = 0x200 */
+    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+        return 0;
+
+    if (parent) {
+        cid_str = "parentCID";
+        cid_str_size = sizeof("parentCID");
+    } else {
+        cid_str = "CID";
+        cid_str_size = sizeof("CID");
+    }
+
+    if ((p_name = strstr(desc,cid_str)) != NULL) {
+        p_name += cid_str_size;
+        sscanf(p_name,"%x",&cid);
+    }
+
+    return cid;
+}
+
+static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
+{
+    BDRVVmdkState *s = bs->opaque;
+    char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+    char *p_name, *tmp_str;
+
+    /* the descriptor offset = 0x200 */
+    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+        return -1;
+
+    tmp_str = strstr(desc,"parentCID");
+    pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
+    if ((p_name = strstr(desc,"CID")) != NULL) {
+        p_name += sizeof("CID");
+        snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
+        pstrcat(desc, sizeof(desc), tmp_desc);
+    }
+
+    if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+        return -1;
+    return 0;
+}
+
+static int vmdk_is_cid_valid(BlockDriverState *bs)
+{
+#ifdef CHECK_CID
+    BDRVVmdkState *s = bs->opaque;
+    BlockDriverState *p_bs = s->hd->backing_hd;
+    uint32_t cur_pcid;
+
+    if (p_bs) {
+        cur_pcid = vmdk_read_cid(p_bs,0);
+        if (s->parent_cid != cur_pcid)
+            // CID not valid
+            return 0;
+    }
+#endif
+    // CID valid
+    return 1;
+}
+
+static int vmdk_snapshot_create(const char *filename, const char *backing_file)
+{
+    int snp_fd, p_fd;
+    uint32_t p_cid;
+    char *p_name, *gd_buf, *rgd_buf;
+    const char *real_filename, *temp_str;
+    VMDK4Header header;
+    uint32_t gde_entries, gd_size;
+    int64_t gd_offset, rgd_offset, capacity, gt_size;
+    char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
+    static const char desc_template[] =
+    "# Disk DescriptorFile\n"
+    "version=1\n"
+    "CID=%x\n"
+    "parentCID=%x\n"
+    "createType=\"monolithicSparse\"\n"
+    "parentFileNameHint=\"%s\"\n"
+    "\n"
+    "# Extent description\n"
+    "RW %u SPARSE \"%s\"\n"
+    "\n"
+    "# The Disk Data Base \n"
+    "#DDB\n"
+    "\n";
+
+    snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
+    if (snp_fd < 0)
+        return -1;
+    p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
+    if (p_fd < 0) {
+        close(snp_fd);
+        return -1;
+    }
+
+    /* read the header */
+    if (lseek(p_fd, 0x0, SEEK_SET) == -1)
+        goto fail;
+    if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE)
+        goto fail;
+
+    /* write the header */
+    if (lseek(snp_fd, 0x0, SEEK_SET) == -1)
+        goto fail;
+    if (write(snp_fd, hdr, HEADER_SIZE) == -1)
+        goto fail;
+
+    memset(&header, 0, sizeof(header));
+    memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
+
+    ftruncate(snp_fd, header.grain_offset << 9);
+    /* the descriptor offset = 0x200 */
+    if (lseek(p_fd, 0x200, SEEK_SET) == -1)
+        goto fail;
+    if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE)
+        goto fail;
+
+    if ((p_name = strstr(p_desc,"CID")) != NULL) {
+        p_name += sizeof("CID");
+        sscanf(p_name,"%x",&p_cid);
+    }
+
+    real_filename = filename;
+    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
+        real_filename = temp_str + 1;
+    if ((temp_str = strrchr(real_filename, '/')) != NULL)
+        real_filename = temp_str + 1;
+    if ((temp_str = strrchr(real_filename, ':')) != NULL)
+        real_filename = temp_str + 1;
+
+    snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
+             (uint32_t)header.capacity, real_filename);
+
+    /* write the descriptor */
+    if (lseek(snp_fd, 0x200, SEEK_SET) == -1)
+        goto fail;
+    if (write(snp_fd, s_desc, strlen(s_desc)) == -1)
+        goto fail;
+
+    gd_offset = header.gd_offset * SECTOR_SIZE;     // offset of GD table
+    rgd_offset = header.rgd_offset * SECTOR_SIZE;   // offset of RGD table
+    capacity = header.capacity * SECTOR_SIZE;       // Extent size
+    /*
+     * Each GDE span 32M disk, means:
+     * 512 GTE per GT, each GTE points to grain
+     */
+    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
+    if (!gt_size)
+        goto fail;
+    gde_entries = (uint32_t)(capacity / gt_size);  // number of gde/rgde
+    gd_size = gde_entries * sizeof(uint32_t);
+
+    /* write RGD */
+    rgd_buf = qemu_malloc(gd_size);
+    if (lseek(p_fd, rgd_offset, SEEK_SET) == -1)
+        goto fail_rgd;
+    if (read(p_fd, rgd_buf, gd_size) != gd_size)
+        goto fail_rgd;
+    if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1)
+        goto fail_rgd;
+    if (write(snp_fd, rgd_buf, gd_size) == -1)
+        goto fail_rgd;
+    qemu_free(rgd_buf);
+
+    /* write GD */
+    gd_buf = qemu_malloc(gd_size);
+    if (lseek(p_fd, gd_offset, SEEK_SET) == -1)
+        goto fail_gd;
+    if (read(p_fd, gd_buf, gd_size) != gd_size)
+        goto fail_gd;
+    if (lseek(snp_fd, gd_offset, SEEK_SET) == -1)
+        goto fail_gd;
+    if (write(snp_fd, gd_buf, gd_size) == -1)
+        goto fail_gd;
+    qemu_free(gd_buf);
+
+    close(p_fd);
+    close(snp_fd);
+    return 0;
+
+    fail_gd:
+    qemu_free(gd_buf);
+    fail_rgd:
+    qemu_free(rgd_buf);
+    fail:
+    close(p_fd);
+    close(snp_fd);
+    return -1;
+}
+
+static void vmdk_parent_close(BlockDriverState *bs)
+{
+    if (bs->backing_hd)
+        bdrv_close(bs->backing_hd);
+}
+
+static int parent_open = 0;
+static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
+{
+    BDRVVmdkState *s = bs->opaque;
+    char *p_name;
+    char desc[DESC_SIZE];
+    char parent_img_name[1024];
+
+    /* the descriptor offset = 0x200 */
+    if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+        return -1;
+
+    if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
+        char *end_name;
+        struct stat file_buf;
+
+        p_name += sizeof("parentFileNameHint") + 1;
+        if ((end_name = strchr(p_name,'\"')) == NULL)
+            return -1;
+        if ((end_name - p_name) > sizeof (s->hd->backing_file) - 1)
+            return -1;
+
+        pstrcpy(s->hd->backing_file, end_name - p_name + 1, p_name);
+        if (stat(s->hd->backing_file, &file_buf) != 0) {
+            path_combine(parent_img_name, sizeof(parent_img_name),
+                         filename, s->hd->backing_file);
+        } else {
+            pstrcpy(parent_img_name, sizeof(parent_img_name),
+                    s->hd->backing_file);
+        }
+
+        s->hd->backing_hd = bdrv_new("");
+        if (!s->hd->backing_hd) {
+            failure:
+            bdrv_close(s->hd);
+            return -1;
+        }
+        parent_open = 1;
+        if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
+            goto failure;
+        parent_open = 0;
+    }
+
+    return 0;
+}
+
+static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVVmdkState *s = bs->opaque;
+    uint32_t magic;
+    int l1_size, i, ret;
+
+    if (parent_open)
+        // Parent must be opened as RO.
+        flags = BDRV_O_RDONLY;
+
+    ret = bdrv_file_open(&s->hd, filename, flags);
+    if (ret < 0)
+        return ret;
+    if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
+        goto fail;
+
+    magic = be32_to_cpu(magic);
+    if (magic == VMDK3_MAGIC) {
+        VMDK3Header header;
+
+        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
+            goto fail;
+        s->cluster_sectors = le32_to_cpu(header.granularity);
+        s->l2_size = 1 << 9;
+        s->l1_size = 1 << 6;
+        bs->total_sectors = le32_to_cpu(header.disk_sectors);
+        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
+        s->l1_backup_table_offset = 0;
+        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+    } else if (magic == VMDK4_MAGIC) {
+        VMDK4Header header;
+
+        if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
+            goto fail;
+        bs->total_sectors = le64_to_cpu(header.capacity);
+        s->cluster_sectors = le64_to_cpu(header.granularity);
+        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
+        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+        if (s->l1_entry_sectors <= 0)
+            goto fail;
+        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
+            / s->l1_entry_sectors;
+        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
+        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
+
+        if (parent_open)
+            s->is_parent = 1;
+        else
+            s->is_parent = 0;
+
+        // try to open parent images, if exist
+        if (vmdk_parent_open(bs, filename) != 0)
+            goto fail;
+        // write the CID once after the image creation
+        s->parent_cid = vmdk_read_cid(bs,1);
+    } else {
+        goto fail;
+    }
+
+    /* read the L1 table */
+    l1_size = s->l1_size * sizeof(uint32_t);
+    s->l1_table = qemu_malloc(l1_size);
+    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
+        goto fail;
+    for(i = 0; i < s->l1_size; i++) {
+        le32_to_cpus(&s->l1_table[i]);
+    }
+
+    if (s->l1_backup_table_offset) {
+        s->l1_backup_table = qemu_malloc(l1_size);
+        if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
+            goto fail;
+        for(i = 0; i < s->l1_size; i++) {
+            le32_to_cpus(&s->l1_backup_table[i]);
+        }
+    }
+
+    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
+    return 0;
+ fail:
+    qemu_free(s->l1_backup_table);
+    qemu_free(s->l1_table);
+    qemu_free(s->l2_cache);
+    bdrv_delete(s->hd);
+    return -1;
+}
+
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
+                                   uint64_t offset, int allocate);
+
+static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
+                             uint64_t offset, int allocate)
+{
+    uint64_t parent_cluster_offset;
+    BDRVVmdkState *s = bs->opaque;
+    uint8_t  whole_grain[s->cluster_sectors*512];        // 128 sectors * 512 bytes each = grain size 64KB
+
+    // we will be here if it's first write on non-exist grain(cluster).
+    // try to read from parent image, if exist
+    if (s->hd->backing_hd) {
+        BDRVVmdkState *ps = s->hd->backing_hd->opaque;
+
+        if (!vmdk_is_cid_valid(bs))
+            return -1;
+
+        parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
+
+        if (parent_cluster_offset) {
+            BDRVVmdkState *act_s = activeBDRV.hd->opaque;
+
+            if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
+                return -1;
+
+            //Write grain only into the active image
+            if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
+                return -1;
+        }
+    }
+    return 0;
+}
+
+static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+{
+    BDRVVmdkState *s = bs->opaque;
+
+    /* update L2 table */
+    if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+                    &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
+        return -1;
+    /* update backup L2 table */
+    if (s->l1_backup_table_offset != 0) {
+        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
+        if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+                        &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
+            return -1;
+    }
+
+    return 0;
+}
+
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
+                                   uint64_t offset, int allocate)
+{
+    BDRVVmdkState *s = bs->opaque;
+    unsigned int l1_index, l2_offset, l2_index;
+    int min_index, i, j;
+    uint32_t min_count, *l2_table, tmp = 0;
+    uint64_t cluster_offset;
+
+    if (m_data)
+        m_data->valid = 0;
+
+    l1_index = (offset >> 9) / s->l1_entry_sectors;
+    if (l1_index >= s->l1_size)
+        return 0;
+    l2_offset = s->l1_table[l1_index];
+    if (!l2_offset)
+        return 0;
+    for(i = 0; i < L2_CACHE_SIZE; i++) {
+        if (l2_offset == s->l2_cache_offsets[i]) {
+            /* increment the hit count */
+            if (++s->l2_cache_counts[i] == 0xffffffff) {
+                for(j = 0; j < L2_CACHE_SIZE; j++) {
+                    s->l2_cache_counts[j] >>= 1;
+                }
+            }
+            l2_table = s->l2_cache + (i * s->l2_size);
+            goto found;
+        }
+    }
+    /* not found: load a new entry in the least used one */
+    min_index = 0;
+    min_count = 0xffffffff;
+    for(i = 0; i < L2_CACHE_SIZE; i++) {
+        if (s->l2_cache_counts[i] < min_count) {
+            min_count = s->l2_cache_counts[i];
+            min_index = i;
+        }
+    }
+    l2_table = s->l2_cache + (min_index * s->l2_size);
+    if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
+                                                                        s->l2_size * sizeof(uint32_t))
+        return 0;
+
+    s->l2_cache_offsets[min_index] = l2_offset;
+    s->l2_cache_counts[min_index] = 1;
+ found:
+    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
+    cluster_offset = le32_to_cpu(l2_table[l2_index]);
+
+    if (!cluster_offset) {
+        if (!allocate)
+            return 0;
+        // Avoid the L2 tables update for the images that have snapshots.
+        if (!s->is_parent) {
+            cluster_offset = bdrv_getlength(s->hd);
+            bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
+
+            cluster_offset >>= 9;
+            tmp = cpu_to_le32(cluster_offset);
+            l2_table[l2_index] = tmp;
+            // Save the active image state
+            activeBDRV.cluster_offset = cluster_offset;
+            activeBDRV.hd = bs;
+        }
+        /* First of all we write grain itself, to avoid race condition
+         * that may to corrupt the image.
+         * This problem may occur because of insufficient space on host disk
+         * or inappropriate VM shutdown.
+         */
+        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
+            return 0;
+
+        if (m_data) {
+            m_data->offset = tmp;
+            m_data->l1_index = l1_index;
+            m_data->l2_index = l2_index;
+            m_data->l2_offset = l2_offset;
+            m_data->valid = 1;
+        }
+    }
+    cluster_offset <<= 9;
+    return cluster_offset;
+}
+
+static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
+                             int nb_sectors, int *pnum)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int index_in_cluster, n;
+    uint64_t cluster_offset;
+
+    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
+    index_in_cluster = sector_num % s->cluster_sectors;
+    n = s->cluster_sectors - index_in_cluster;
+    if (n > nb_sectors)
+        n = nb_sectors;
+    *pnum = n;
+    return (cluster_offset != 0);
+}
+
+static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVVmdkState *s = bs->opaque;
+    int index_in_cluster, n, ret;
+    uint64_t cluster_offset;
+
+    while (nb_sectors > 0) {
+        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
+        index_in_cluster = sector_num % s->cluster_sectors;
+        n = s->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors)
+            n = nb_sectors;
+        if (!cluster_offset) {
+            // try to read from parent image, if exist
+            if (s->hd->backing_hd) {
+                if (!vmdk_is_cid_valid(bs))
+                    return -1;
+                ret = bdrv_read(s->hd->backing_hd, sector_num, buf, n);
+                if (ret < 0)
+                    return -1;
+            } else {
+                memset(buf, 0, 512 * n);
+            }
+        } else {
+            if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
+                return -1;
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+    }
+    return 0;
+}
+
+static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
+{
+    BDRVVmdkState *s = bs->opaque;
+    VmdkMetaData m_data;
+    int index_in_cluster, n;
+    uint64_t cluster_offset;
+    static int cid_update = 0;
+
+    if (sector_num > bs->total_sectors) {
+        fprintf(stderr,
+                "(VMDK) Wrong offset: sector_num=0x%" PRIx64
+                " total_sectors=0x%" PRIx64 "\n",
+                sector_num, bs->total_sectors);
+        return -1;
+    }
+
+    while (nb_sectors > 0) {
+        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        n = s->cluster_sectors - index_in_cluster;
+        if (n > nb_sectors)
+            n = nb_sectors;
+        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
+        if (!cluster_offset)
+            return -1;
+
+        if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
+            return -1;
+        if (m_data.valid) {
+            /* update L2 tables */
+            if (vmdk_L2update(bs, &m_data) == -1)
+                return -1;
+        }
+        nb_sectors -= n;
+        sector_num += n;
+        buf += n * 512;
+
+        // update CID on the first write every time the virtual disk is opened
+        if (!cid_update) {
+            vmdk_write_cid(bs, time(NULL));
+            cid_update++;
+        }
+    }
+    return 0;
+}
+
+static int vmdk_create(const char *filename, int64_t total_size,
+                       const char *backing_file, int flags)
+{
+    int fd, i;
+    VMDK4Header header;
+    uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
+    static const char desc_template[] =
+        "# Disk DescriptorFile\n"
+        "version=1\n"
+        "CID=%x\n"
+        "parentCID=ffffffff\n"
+        "createType=\"monolithicSparse\"\n"
+        "\n"
+        "# Extent description\n"
+        "RW %" PRId64 " SPARSE \"%s\"\n"
+        "\n"
+        "# The Disk Data Base \n"
+        "#DDB\n"
+        "\n"
+        "ddb.virtualHWVersion = \"%d\"\n"
+        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
+        "ddb.geometry.heads = \"16\"\n"
+        "ddb.geometry.sectors = \"63\"\n"
+        "ddb.adapterType = \"ide\"\n";
+    char desc[1024];
+    const char *real_filename, *temp_str;
+
+    /* XXX: add support for backing file */
+    if (backing_file) {
+        return vmdk_snapshot_create(filename, backing_file);
+    }
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+              0644);
+    if (fd < 0)
+        return -1;
+    magic = cpu_to_be32(VMDK4_MAGIC);
+    memset(&header, 0, sizeof(header));
+    header.version = cpu_to_le32(1);
+    header.flags = cpu_to_le32(3); /* ?? */
+    header.capacity = cpu_to_le64(total_size);
+    header.granularity = cpu_to_le64(128);
+    header.num_gtes_per_gte = cpu_to_le32(512);
+
+    grains = (total_size + header.granularity - 1) / header.granularity;
+    gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
+    gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
+    gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
+
+    header.desc_offset = 1;
+    header.desc_size = 20;
+    header.rgd_offset = header.desc_offset + header.desc_size;
+    header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
+    header.grain_offset =
+       ((header.gd_offset + gd_size + (gt_size * gt_count) +
+         header.granularity - 1) / header.granularity) *
+        header.granularity;
+
+    header.desc_offset = cpu_to_le64(header.desc_offset);
+    header.desc_size = cpu_to_le64(header.desc_size);
+    header.rgd_offset = cpu_to_le64(header.rgd_offset);
+    header.gd_offset = cpu_to_le64(header.gd_offset);
+    header.grain_offset = cpu_to_le64(header.grain_offset);
+
+    header.check_bytes[0] = 0xa;
+    header.check_bytes[1] = 0x20;
+    header.check_bytes[2] = 0xd;
+    header.check_bytes[3] = 0xa;
+
+    /* write all the data */
+    write(fd, &magic, sizeof(magic));
+    write(fd, &header, sizeof(header));
+
+    ftruncate(fd, header.grain_offset << 9);
+
+    /* write grain directory */
+    lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
+    for (i = 0, tmp = header.rgd_offset + gd_size;
+         i < gt_count; i++, tmp += gt_size)
+        write(fd, &tmp, sizeof(tmp));
+
+    /* write backup grain directory */
+    lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
+    for (i = 0, tmp = header.gd_offset + gd_size;
+         i < gt_count; i++, tmp += gt_size)
+        write(fd, &tmp, sizeof(tmp));
+
+    /* compose the descriptor */
+    real_filename = filename;
+    if ((temp_str = strrchr(real_filename, '\\')) != NULL)
+        real_filename = temp_str + 1;
+    if ((temp_str = strrchr(real_filename, '/')) != NULL)
+        real_filename = temp_str + 1;
+    if ((temp_str = strrchr(real_filename, ':')) != NULL)
+        real_filename = temp_str + 1;
+    snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
+             total_size, real_filename,
+             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
+             total_size / (int64_t)(63 * 16));
+
+    /* write the descriptor */
+    lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
+    write(fd, desc, strlen(desc));
+
+    close(fd);
+    return 0;
+}
+
+static void vmdk_close(BlockDriverState *bs)
+{
+    BDRVVmdkState *s = bs->opaque;
+
+    qemu_free(s->l1_table);
+    qemu_free(s->l2_cache);
+    // try to close parent image, if exist
+    vmdk_parent_close(s->hd);
+    bdrv_delete(s->hd);
+}
+
+static void vmdk_flush(BlockDriverState *bs)
+{
+    BDRVVmdkState *s = bs->opaque;
+    bdrv_flush(s->hd);
+}
+
+static BlockDriver bdrv_vmdk = {
+    .format_name       = "vmdk",
+    .instance_size     = sizeof(BDRVVmdkState),
+    .bdrv_probe                = vmdk_probe,
+    .bdrv_open         = vmdk_open,
+    .bdrv_read         = vmdk_read,
+    .bdrv_write                = vmdk_write,
+    .bdrv_close                = vmdk_close,
+    .bdrv_create       = vmdk_create,
+    .bdrv_flush                = vmdk_flush,
+    .bdrv_is_allocated = vmdk_is_allocated,
+};
+
+static void bdrv_vmdk_init(void)
+{
+    bdrv_register(&bdrv_vmdk);
+}
+
+block_init(bdrv_vmdk_init);
diff --git a/block/vpc.c b/block/vpc.c
new file mode 100644 (file)
index 0000000..211ae5c
--- /dev/null
@@ -0,0 +1,606 @@
+/*
+ * Block driver for Conectix/Microsoft Virtual PC images
+ *
+ * Copyright (c) 2005 Alex Beregszaszi
+ * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+/**************************************************************/
+
+#define HEADER_SIZE 512
+
+//#define CACHE
+
+enum vhd_type {
+    VHD_FIXED           = 2,
+    VHD_DYNAMIC         = 3,
+    VHD_DIFFERENCING    = 4,
+};
+
+// Seconds since Jan 1, 2000 0:00:00 (UTC)
+#define VHD_TIMESTAMP_BASE 946684800
+
+// always big-endian
+struct vhd_footer {
+    char        creator[8]; // "conectix"
+    uint32_t    features;
+    uint32_t    version;
+
+    // Offset of next header structure, 0xFFFFFFFF if none
+    uint64_t    data_offset;
+
+    // Seconds since Jan 1, 2000 0:00:00 (UTC)
+    uint32_t    timestamp;
+
+    char        creator_app[4]; // "vpc "
+    uint16_t    major;
+    uint16_t    minor;
+    char        creator_os[4]; // "Wi2k"
+
+    uint64_t    orig_size;
+    uint64_t    size;
+
+    uint16_t    cyls;
+    uint8_t     heads;
+    uint8_t     secs_per_cyl;
+
+    uint32_t    type;
+
+    // Checksum of the Hard Disk Footer ("one's complement of the sum of all
+    // the bytes in the footer without the checksum field")
+    uint32_t    checksum;
+
+    // UUID used to identify a parent hard disk (backing file)
+    uint8_t     uuid[16];
+
+    uint8_t     in_saved_state;
+};
+
+struct vhd_dyndisk_header {
+    char        magic[8]; // "cxsparse"
+
+    // Offset of next header structure, 0xFFFFFFFF if none
+    uint64_t    data_offset;
+
+    // Offset of the Block Allocation Table (BAT)
+    uint64_t    table_offset;
+
+    uint32_t    version;
+    uint32_t    max_table_entries; // 32bit/entry
+
+    // 2 MB by default, must be a power of two
+    uint32_t    block_size;
+
+    uint32_t    checksum;
+    uint8_t     parent_uuid[16];
+    uint32_t    parent_timestamp;
+    uint32_t    reserved;
+
+    // Backing file name (in UTF-16)
+    uint8_t     parent_name[512];
+
+    struct {
+        uint32_t    platform;
+        uint32_t    data_space;
+        uint32_t    data_length;
+        uint32_t    reserved;
+        uint64_t    data_offset;
+    } parent_locator[8];
+};
+
+typedef struct BDRVVPCState {
+    BlockDriverState *hd;
+
+    uint8_t footer_buf[HEADER_SIZE];
+    uint64_t free_data_block_offset;
+    int max_table_entries;
+    uint32_t *pagetable;
+    uint64_t bat_offset;
+    uint64_t last_bitmap_offset;
+
+    uint32_t block_size;
+    uint32_t bitmap_size;
+
+#ifdef CACHE
+    uint8_t *pageentry_u8;
+    uint32_t *pageentry_u32;
+    uint16_t *pageentry_u16;
+
+    uint64_t last_bitmap;
+#endif
+} BDRVVPCState;
+
+static uint32_t vpc_checksum(uint8_t* buf, size_t size)
+{
+    uint32_t res = 0;
+    int i;
+
+    for (i = 0; i < size; i++)
+        res += buf[i];
+
+    return ~res;
+}
+
+
+static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
+       return 100;
+    return 0;
+}
+
+static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVVPCState *s = bs->opaque;
+    int ret, i;
+    struct vhd_footer* footer;
+    struct vhd_dyndisk_header* dyndisk_header;
+    uint8_t buf[HEADER_SIZE];
+    uint32_t checksum;
+
+    ret = bdrv_file_open(&s->hd, filename, flags);
+    if (ret < 0)
+        return ret;
+
+    if (bdrv_pread(s->hd, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
+        goto fail;
+
+    footer = (struct vhd_footer*) s->footer_buf;
+    if (strncmp(footer->creator, "conectix", 8))
+        goto fail;
+
+    checksum = be32_to_cpu(footer->checksum);
+    footer->checksum = 0;
+    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
+        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
+            "incorrect.\n", filename);
+
+    // The visible size of a image in Virtual PC depends on the geometry
+    // rather than on the size stored in the footer (the size in the footer
+    // is too large usually)
+    bs->total_sectors = (int64_t)
+        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
+
+    if (bdrv_pread(s->hd, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE)
+            != HEADER_SIZE)
+        goto fail;
+
+    dyndisk_header = (struct vhd_dyndisk_header*) buf;
+
+    if (strncmp(dyndisk_header->magic, "cxsparse", 8))
+        goto fail;
+
+
+    s->block_size = be32_to_cpu(dyndisk_header->block_size);
+    s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
+
+    s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
+    s->pagetable = qemu_malloc(s->max_table_entries * 4);
+
+    s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
+    if (bdrv_pread(s->hd, s->bat_offset, s->pagetable,
+            s->max_table_entries * 4) != s->max_table_entries * 4)
+           goto fail;
+
+    s->free_data_block_offset =
+        (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
+
+    for (i = 0; i < s->max_table_entries; i++) {
+        be32_to_cpus(&s->pagetable[i]);
+        if (s->pagetable[i] != 0xFFFFFFFF) {
+            int64_t next = (512 * (int64_t) s->pagetable[i]) +
+                s->bitmap_size + s->block_size;
+
+            if (next> s->free_data_block_offset)
+                s->free_data_block_offset = next;
+        }
+    }
+
+    s->last_bitmap_offset = (int64_t) -1;
+
+#ifdef CACHE
+    s->pageentry_u8 = qemu_malloc(512);
+    s->pageentry_u32 = s->pageentry_u8;
+    s->pageentry_u16 = s->pageentry_u8;
+    s->last_pagetable = -1;
+#endif
+
+    return 0;
+ fail:
+    bdrv_delete(s->hd);
+    return -1;
+}
+
+/*
+ * Returns the absolute byte offset of the given sector in the image file.
+ * If the sector is not allocated, -1 is returned instead.
+ *
+ * The parameter write must be 1 if the offset will be used for a write
+ * operation (the block bitmaps is updated then), 0 otherwise.
+ */
+static inline int64_t get_sector_offset(BlockDriverState *bs,
+    int64_t sector_num, int write)
+{
+    BDRVVPCState *s = bs->opaque;
+    uint64_t offset = sector_num * 512;
+    uint64_t bitmap_offset, block_offset;
+    uint32_t pagetable_index, pageentry_index;
+
+    pagetable_index = offset / s->block_size;
+    pageentry_index = (offset % s->block_size) / 512;
+
+    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
+        return -1; // not allocated
+
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+
+    // We must ensure that we don't write to any sectors which are marked as
+    // unused in the bitmap. We get away with setting all bits in the block
+    // bitmap each time we write to a new block. This might cause Virtual PC to
+    // miss sparse read optimization, but it's not a problem in terms of
+    // correctness.
+    if (write && (s->last_bitmap_offset != bitmap_offset)) {
+        uint8_t bitmap[s->bitmap_size];
+
+        s->last_bitmap_offset = bitmap_offset;
+        memset(bitmap, 0xff, s->bitmap_size);
+        bdrv_pwrite(s->hd, bitmap_offset, bitmap, s->bitmap_size);
+    }
+
+//    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
+//     sector_num, pagetable_index, pageentry_index,
+//     bitmap_offset, block_offset);
+
+// disabled by reason
+#if 0
+#ifdef CACHE
+    if (bitmap_offset != s->last_bitmap)
+    {
+       lseek(s->fd, bitmap_offset, SEEK_SET);
+
+       s->last_bitmap = bitmap_offset;
+
+       // Scary! Bitmap is stored as big endian 32bit entries,
+       // while we used to look it up byte by byte
+       read(s->fd, s->pageentry_u8, 512);
+       for (i = 0; i < 128; i++)
+           be32_to_cpus(&s->pageentry_u32[i]);
+    }
+
+    if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1)
+       return -1;
+#else
+    lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET);
+
+    read(s->fd, &bitmap_entry, 1);
+
+    if ((bitmap_entry >> (pageentry_index % 8)) & 1)
+       return -1; // not allocated
+#endif
+#endif
+
+    return block_offset;
+}
+
+/*
+ * Writes the footer to the end of the image file. This is needed when the
+ * file grows as it overwrites the old footer
+ *
+ * Returns 0 on success and < 0 on error
+ */
+static int rewrite_footer(BlockDriverState* bs)
+{
+    int ret;
+    BDRVVPCState *s = bs->opaque;
+    int64_t offset = s->free_data_block_offset;
+
+    ret = bdrv_pwrite(s->hd, offset, s->footer_buf, HEADER_SIZE);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+/*
+ * Allocates a new block. This involves writing a new footer and updating
+ * the Block Allocation Table to use the space at the old end of the image
+ * file (overwriting the old footer)
+ *
+ * Returns the sectors' offset in the image file on success and < 0 on error
+ */
+static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+{
+    BDRVVPCState *s = bs->opaque;
+    int64_t bat_offset;
+    uint32_t index, bat_value;
+    int ret;
+    uint8_t bitmap[s->bitmap_size];
+
+    // Check if sector_num is valid
+    if ((sector_num < 0) || (sector_num > bs->total_sectors))
+        return -1;
+
+    // Write entry into in-memory BAT
+    index = (sector_num * 512) / s->block_size;
+    if (s->pagetable[index] != 0xFFFFFFFF)
+        return -1;
+
+    s->pagetable[index] = s->free_data_block_offset / 512;
+
+    // Initialize the block's bitmap
+    memset(bitmap, 0xff, s->bitmap_size);
+    bdrv_pwrite(s->hd, s->free_data_block_offset, bitmap, s->bitmap_size);
+
+    // Write new footer (the old one will be overwritten)
+    s->free_data_block_offset += s->block_size + s->bitmap_size;
+    ret = rewrite_footer(bs);
+    if (ret < 0)
+        goto fail;
+
+    // Write BAT entry to disk
+    bat_offset = s->bat_offset + (4 * index);
+    bat_value = be32_to_cpu(s->pagetable[index]);
+    ret = bdrv_pwrite(s->hd, bat_offset, &bat_value, 4);
+    if (ret < 0)
+        goto fail;
+
+    return get_sector_offset(bs, sector_num, 0);
+
+fail:
+    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
+    return -1;
+}
+
+static int vpc_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVVPCState *s = bs->opaque;
+    int ret;
+    int64_t offset;
+
+    while (nb_sectors > 0) {
+        offset = get_sector_offset(bs, sector_num, 0);
+
+        if (offset == -1) {
+            memset(buf, 0, 512);
+        } else {
+            ret = bdrv_pread(s->hd, offset, buf, 512);
+            if (ret != 512)
+                return -1;
+        }
+
+        nb_sectors--;
+        sector_num++;
+        buf += 512;
+    }
+    return 0;
+}
+
+static int vpc_write(BlockDriverState *bs, int64_t sector_num,
+    const uint8_t *buf, int nb_sectors)
+{
+    BDRVVPCState *s = bs->opaque;
+    int64_t offset;
+    int ret;
+
+    while (nb_sectors > 0) {
+        offset = get_sector_offset(bs, sector_num, 1);
+
+        if (offset == -1) {
+            offset = alloc_block(bs, sector_num);
+            if (offset < 0)
+                return -1;
+        }
+
+        ret = bdrv_pwrite(s->hd, offset, buf, 512);
+        if (ret != 512)
+            return -1;
+
+        nb_sectors--;
+        sector_num++;
+        buf += 512;
+    }
+
+    return 0;
+}
+
+
+/*
+ * Calculates the number of cylinders, heads and sectors per cylinder
+ * based on a given number of sectors. This is the algorithm described
+ * in the VHD specification.
+ *
+ * Note that the geometry doesn't always exactly match total_sectors but
+ * may round it down.
+ *
+ * Returns 0 on success, -EFBIG if the size is larger than 127 GB
+ */
+static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
+    uint8_t* heads, uint8_t* secs_per_cyl)
+{
+    uint32_t cyls_times_heads;
+
+    if (total_sectors > 65535 * 16 * 255)
+        return -EFBIG;
+
+    if (total_sectors > 65535 * 16 * 63) {
+        *secs_per_cyl = 255;
+        *heads = 16;
+        cyls_times_heads = total_sectors / *secs_per_cyl;
+    } else {
+        *secs_per_cyl = 17;
+        cyls_times_heads = total_sectors / *secs_per_cyl;
+        *heads = (cyls_times_heads + 1023) / 1024;
+
+        if (*heads < 4)
+            *heads = 4;
+
+        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
+            *secs_per_cyl = 31;
+            *heads = 16;
+            cyls_times_heads = total_sectors / *secs_per_cyl;
+        }
+
+        if (cyls_times_heads >= (*heads * 1024)) {
+            *secs_per_cyl = 63;
+            *heads = 16;
+            cyls_times_heads = total_sectors / *secs_per_cyl;
+        }
+    }
+
+    // Note: Rounding up deviates from the Virtual PC behaviour
+    // However, we need this to avoid truncating images in qemu-img convert
+    *cyls = (cyls_times_heads + *heads - 1) / *heads;
+
+    return 0;
+}
+
+static int vpc_create(const char *filename, int64_t total_sectors,
+    const char *backing_file, int flags)
+{
+    uint8_t buf[1024];
+    struct vhd_footer* footer = (struct vhd_footer*) buf;
+    struct vhd_dyndisk_header* dyndisk_header =
+        (struct vhd_dyndisk_header*) buf;
+    int fd, i;
+    uint16_t cyls;
+    uint8_t heads;
+    uint8_t secs_per_cyl;
+    size_t block_size, num_bat_entries;
+
+    if (backing_file != NULL)
+        return -ENOTSUP;
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+    if (fd < 0)
+        return -EIO;
+
+    // Calculate matching total_size and geometry
+    if (calculate_geometry(total_sectors, &cyls, &heads, &secs_per_cyl))
+        return -EFBIG;
+    total_sectors = (int64_t) cyls * heads * secs_per_cyl;
+
+    // Prepare the Hard Disk Footer
+    memset(buf, 0, 1024);
+
+    strncpy(footer->creator, "conectix", 8);
+    // TODO Check if "qemu" creator_app is ok for VPC
+    strncpy(footer->creator_app, "qemu", 4);
+    strncpy(footer->creator_os, "Wi2k", 4);
+
+    footer->features = be32_to_cpu(0x02);
+    footer->version = be32_to_cpu(0x00010000);
+    footer->data_offset = be64_to_cpu(HEADER_SIZE);
+    footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE);
+
+    // Version of Virtual PC 2007
+    footer->major = be16_to_cpu(0x0005);
+    footer->minor =be16_to_cpu(0x0003);
+
+    footer->orig_size = be64_to_cpu(total_sectors * 512);
+    footer->size = be64_to_cpu(total_sectors * 512);
+
+    footer->cyls = be16_to_cpu(cyls);
+    footer->heads = heads;
+    footer->secs_per_cyl = secs_per_cyl;
+
+    footer->type = be32_to_cpu(VHD_DYNAMIC);
+
+    // TODO uuid is missing
+
+    footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
+
+    // Write the footer (twice: at the beginning and at the end)
+    block_size = 0x200000;
+    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
+
+    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE)
+        return -EIO;
+
+    if (lseek(fd, 1536 + ((num_bat_entries * 4 + 511) & ~511), SEEK_SET) < 0)
+        return -EIO;
+    if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE)
+        return -EIO;
+
+    // Write the initial BAT
+    if (lseek(fd, 3 * 512, SEEK_SET) < 0)
+        return -EIO;
+
+    memset(buf, 0xFF, 512);
+    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++)
+        if (write(fd, buf, 512) != 512)
+            return -EIO;
+
+
+    // Prepare the Dynamic Disk Header
+    memset(buf, 0, 1024);
+
+    strncpy(dyndisk_header->magic, "cxsparse", 8);
+
+    dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFF);
+    dyndisk_header->table_offset = be64_to_cpu(3 * 512);
+    dyndisk_header->version = be32_to_cpu(0x00010000);
+    dyndisk_header->block_size = be32_to_cpu(block_size);
+    dyndisk_header->max_table_entries = be32_to_cpu(num_bat_entries);
+
+    dyndisk_header->checksum = be32_to_cpu(vpc_checksum(buf, 1024));
+
+    // Write the header
+    if (lseek(fd, 512, SEEK_SET) < 0)
+        return -EIO;
+    if (write(fd, buf, 1024) != 1024)
+        return -EIO;
+
+    close(fd);
+    return 0;
+}
+
+static void vpc_close(BlockDriverState *bs)
+{
+    BDRVVPCState *s = bs->opaque;
+    qemu_free(s->pagetable);
+#ifdef CACHE
+    qemu_free(s->pageentry_u8);
+#endif
+    bdrv_delete(s->hd);
+}
+
+static BlockDriver bdrv_vpc = {
+    .format_name       = "vpc",
+    .instance_size     = sizeof(BDRVVPCState),
+    .bdrv_probe                = vpc_probe,
+    .bdrv_open         = vpc_open,
+    .bdrv_read         = vpc_read,
+    .bdrv_write                = vpc_write,
+    .bdrv_close                = vpc_close,
+    .bdrv_create       = vpc_create,
+};
+
+static void bdrv_vpc_init(void)
+{
+    bdrv_register(&bdrv_vpc);
+}
+
+block_init(bdrv_vpc_init);
diff --git a/block/vvfat.c b/block/vvfat.c
new file mode 100644 (file)
index 0000000..2a8feb3
--- /dev/null
@@ -0,0 +1,2855 @@
+/* vim:set shiftwidth=4 ts=8: */
+/*
+ * QEMU Block driver for virtual VFAT (shadows a local directory)
+ *
+ * Copyright (c) 2004,2005 Johannes E. Schindelin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <sys/stat.h>
+#include <dirent.h>
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+#ifndef S_IWGRP
+#define S_IWGRP 0
+#endif
+#ifndef S_IWOTH
+#define S_IWOTH 0
+#endif
+
+/* TODO: add ":bootsector=blabla.img:" */
+/* LATER TODO: add automatic boot sector generation from
+    BOOTEASY.ASM and Ranish Partition Manager
+    Note that DOS assumes the system files to be the first files in the
+    file system (test if the boot sector still relies on that fact)! */
+/* MAYBE TODO: write block-visofs.c */
+/* TODO: call try_commit() only after a timeout */
+
+/* #define DEBUG */
+
+#ifdef DEBUG
+
+#define DLOG(a) a
+
+#undef stderr
+#define stderr STDERR
+FILE* stderr = NULL;
+
+static void checkpoint(void);
+
+#ifdef __MINGW32__
+void nonono(const char* file, int line, const char* msg) {
+    fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg);
+    exit(-5);
+}
+#undef assert
+#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0)
+#endif
+
+#else
+
+#define DLOG(a)
+
+#endif
+
+/* dynamic array functions */
+typedef struct array_t {
+    char* pointer;
+    unsigned int size,next,item_size;
+} array_t;
+
+static inline void array_init(array_t* array,unsigned int item_size)
+{
+    array->pointer = NULL;
+    array->size=0;
+    array->next=0;
+    array->item_size=item_size;
+}
+
+static inline void array_free(array_t* array)
+{
+    if(array->pointer)
+        free(array->pointer);
+    array->size=array->next=0;
+}
+
+/* does not automatically grow */
+static inline void* array_get(array_t* array,unsigned int index) {
+    assert(index < array->next);
+    return array->pointer + index * array->item_size;
+}
+
+static inline int array_ensure_allocated(array_t* array, int index)
+{
+    if((index + 1) * array->item_size > array->size) {
+       int new_size = (index + 32) * array->item_size;
+       array->pointer = qemu_realloc(array->pointer, new_size);
+       if (!array->pointer)
+           return -1;
+       array->size = new_size;
+       array->next = index + 1;
+    }
+
+    return 0;
+}
+
+static inline void* array_get_next(array_t* array) {
+    unsigned int next = array->next;
+    void* result;
+
+    if (array_ensure_allocated(array, next) < 0)
+       return NULL;
+
+    array->next = next + 1;
+    result = array_get(array, next);
+
+    return result;
+}
+
+static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) {
+    if((array->next+count)*array->item_size>array->size) {
+       int increment=count*array->item_size;
+       array->pointer=qemu_realloc(array->pointer,array->size+increment);
+       if(!array->pointer)
+            return NULL;
+       array->size+=increment;
+    }
+    memmove(array->pointer+(index+count)*array->item_size,
+               array->pointer+index*array->item_size,
+               (array->next-index)*array->item_size);
+    array->next+=count;
+    return array->pointer+index*array->item_size;
+}
+
+/* this performs a "roll", so that the element which was at index_from becomes
+ * index_to, but the order of all other elements is preserved. */
+static inline int array_roll(array_t* array,int index_to,int index_from,int count)
+{
+    char* buf;
+    char* from;
+    char* to;
+    int is;
+
+    if(!array ||
+           index_to<0 || index_to>=array->next ||
+           index_from<0 || index_from>=array->next)
+       return -1;
+
+    if(index_to==index_from)
+       return 0;
+
+    is=array->item_size;
+    from=array->pointer+index_from*is;
+    to=array->pointer+index_to*is;
+    buf=qemu_malloc(is*count);
+    memcpy(buf,from,is*count);
+
+    if(index_to<index_from)
+       memmove(to+is*count,to,from-to);
+    else
+       memmove(from,from+is*count,to-from);
+
+    memcpy(to,buf,is*count);
+
+    free(buf);
+
+    return 0;
+}
+
+static inline int array_remove_slice(array_t* array,int index, int count)
+{
+    assert(index >=0);
+    assert(count > 0);
+    assert(index + count <= array->next);
+    if(array_roll(array,array->next-1,index,count))
+       return -1;
+    array->next -= count;
+    return 0;
+}
+
+static int array_remove(array_t* array,int index)
+{
+    return array_remove_slice(array, index, 1);
+}
+
+/* return the index for a given member */
+static int array_index(array_t* array, void* pointer)
+{
+    size_t offset = (char*)pointer - array->pointer;
+    assert((offset % array->item_size) == 0);
+    assert(offset/array->item_size < array->next);
+    return offset/array->item_size;
+}
+
+/* These structures are used to fake a disk and the VFAT filesystem.
+ * For this reason we need to use __attribute__((packed)). */
+
+typedef struct bootsector_t {
+    uint8_t jump[3];
+    uint8_t name[8];
+    uint16_t sector_size;
+    uint8_t sectors_per_cluster;
+    uint16_t reserved_sectors;
+    uint8_t number_of_fats;
+    uint16_t root_entries;
+    uint16_t total_sectors16;
+    uint8_t media_type;
+    uint16_t sectors_per_fat;
+    uint16_t sectors_per_track;
+    uint16_t number_of_heads;
+    uint32_t hidden_sectors;
+    uint32_t total_sectors;
+    union {
+        struct {
+           uint8_t drive_number;
+           uint8_t current_head;
+           uint8_t signature;
+           uint32_t id;
+           uint8_t volume_label[11];
+       } __attribute__((packed)) fat16;
+       struct {
+           uint32_t sectors_per_fat;
+           uint16_t flags;
+           uint8_t major,minor;
+           uint32_t first_cluster_of_root_directory;
+           uint16_t info_sector;
+           uint16_t backup_boot_sector;
+           uint16_t ignored;
+       } __attribute__((packed)) fat32;
+    } u;
+    uint8_t fat_type[8];
+    uint8_t ignored[0x1c0];
+    uint8_t magic[2];
+} __attribute__((packed)) bootsector_t;
+
+typedef struct {
+    uint8_t head;
+    uint8_t sector;
+    uint8_t cylinder;
+} mbr_chs_t;
+
+typedef struct partition_t {
+    uint8_t attributes; /* 0x80 = bootable */
+    mbr_chs_t start_CHS;
+    uint8_t   fs_type; /* 0x1 = FAT12, 0x6 = FAT16, 0xe = FAT16_LBA, 0xb = FAT32, 0xc = FAT32_LBA */
+    mbr_chs_t end_CHS;
+    uint32_t start_sector_long;
+    uint32_t length_sector_long;
+} __attribute__((packed)) partition_t;
+
+typedef struct mbr_t {
+    uint8_t ignored[0x1b8];
+    uint32_t nt_id;
+    uint8_t ignored2[2];
+    partition_t partition[4];
+    uint8_t magic[2];
+} __attribute__((packed)) mbr_t;
+
+typedef struct direntry_t {
+    uint8_t name[8];
+    uint8_t extension[3];
+    uint8_t attributes;
+    uint8_t reserved[2];
+    uint16_t ctime;
+    uint16_t cdate;
+    uint16_t adate;
+    uint16_t begin_hi;
+    uint16_t mtime;
+    uint16_t mdate;
+    uint16_t begin;
+    uint32_t size;
+} __attribute__((packed)) direntry_t;
+
+/* this structure are used to transparently access the files */
+
+typedef struct mapping_t {
+    /* begin is the first cluster, end is the last+1 */
+    uint32_t begin,end;
+    /* as s->directory is growable, no pointer may be used here */
+    unsigned int dir_index;
+    /* the clusters of a file may be in any order; this points to the first */
+    int first_mapping_index;
+    union {
+       /* offset is
+        * - the offset in the file (in clusters) for a file, or
+        * - the next cluster of the directory for a directory, and
+        * - the address of the buffer for a faked entry
+        */
+       struct {
+           uint32_t offset;
+       } file;
+       struct {
+           int parent_mapping_index;
+           int first_dir_index;
+       } dir;
+    } info;
+    /* path contains the full path, i.e. it always starts with s->path */
+    char* path;
+
+    enum { MODE_UNDEFINED = 0, MODE_NORMAL = 1, MODE_MODIFIED = 2,
+       MODE_DIRECTORY = 4, MODE_FAKED = 8,
+       MODE_DELETED = 16, MODE_RENAMED = 32 } mode;
+    int read_only;
+} mapping_t;
+
+#ifdef DEBUG
+static void print_direntry(const struct direntry_t*);
+static void print_mapping(const struct mapping_t* mapping);
+#endif
+
+/* here begins the real VVFAT driver */
+
+typedef struct BDRVVVFATState {
+    BlockDriverState* bs; /* pointer to parent */
+    unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */
+    unsigned char first_sectors[0x40*0x200];
+
+    int fat_type; /* 16 or 32 */
+    array_t fat,directory,mapping;
+
+    unsigned int cluster_size;
+    unsigned int sectors_per_cluster;
+    unsigned int sectors_per_fat;
+    unsigned int sectors_of_root_directory;
+    uint32_t last_cluster_of_root_directory;
+    unsigned int faked_sectors; /* how many sectors are faked before file data */
+    uint32_t sector_count; /* total number of sectors of the partition */
+    uint32_t cluster_count; /* total number of clusters of this partition */
+    uint32_t max_fat_value;
+
+    int current_fd;
+    mapping_t* current_mapping;
+    unsigned char* cluster; /* points to current cluster */
+    unsigned char* cluster_buffer; /* points to a buffer to hold temp data */
+    unsigned int current_cluster;
+
+    /* write support */
+    BlockDriverState* write_target;
+    char* qcow_filename;
+    BlockDriverState* qcow;
+    void* fat2;
+    char* used_clusters;
+    array_t commits;
+    const char* path;
+    int downcase_short_names;
+} BDRVVVFATState;
+
+/* take the sector position spos and convert it to Cylinder/Head/Sector position
+ * if the position is outside the specified geometry, fill maximum value for CHS
+ * and return 1 to signal overflow.
+ */
+static int sector2CHS(BlockDriverState* bs, mbr_chs_t * chs, int spos){
+    int head,sector;
+    sector   = spos % (bs->secs);  spos/= bs->secs;
+    head     = spos % (bs->heads); spos/= bs->heads;
+    if(spos >= bs->cyls){
+        /* Overflow,
+        it happens if 32bit sector positions are used, while CHS is only 24bit.
+        Windows/Dos is said to take 1023/255/63 as nonrepresentable CHS */
+        chs->head     = 0xFF;
+        chs->sector   = 0xFF;
+        chs->cylinder = 0xFF;
+        return 1;
+    }
+    chs->head     = (uint8_t)head;
+    chs->sector   = (uint8_t)( (sector+1) | ((spos>>8)<<6) );
+    chs->cylinder = (uint8_t)spos;
+    return 0;
+}
+
+static void init_mbr(BDRVVVFATState* s)
+{
+    /* TODO: if the files mbr.img and bootsect.img exist, use them */
+    mbr_t* real_mbr=(mbr_t*)s->first_sectors;
+    partition_t* partition=&(real_mbr->partition[0]);
+    int lba;
+
+    memset(s->first_sectors,0,512);
+
+    /* Win NT Disk Signature */
+    real_mbr->nt_id= cpu_to_le32(0xbe1afdfa);
+
+    partition->attributes=0x80; /* bootable */
+
+    /* LBA is used when partition is outside the CHS geometry */
+    lba = sector2CHS(s->bs, &partition->start_CHS, s->first_sectors_number-1);
+    lba|= sector2CHS(s->bs, &partition->end_CHS,   s->sector_count);
+
+    /*LBA partitions are identified only by start/length_sector_long not by CHS*/
+    partition->start_sector_long =cpu_to_le32(s->first_sectors_number-1);
+    partition->length_sector_long=cpu_to_le32(s->sector_count - s->first_sectors_number+1);
+
+    /* FAT12/FAT16/FAT32 */
+    /* DOS uses different types when partition is LBA,
+       probably to prevent older versions from using CHS on them */
+    partition->fs_type= s->fat_type==12 ? 0x1:
+                        s->fat_type==16 ? (lba?0xe:0x06):
+                         /*fat_tyoe==32*/ (lba?0xc:0x0b);
+
+    real_mbr->magic[0]=0x55; real_mbr->magic[1]=0xaa;
+}
+
+/* direntry functions */
+
+/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */
+static inline int short2long_name(char* dest,const char* src)
+{
+    int i;
+    int len;
+    for(i=0;i<129 && src[i];i++) {
+        dest[2*i]=src[i];
+       dest[2*i+1]=0;
+    }
+    len=2*i;
+    dest[2*i]=dest[2*i+1]=0;
+    for(i=2*i+2;(i%26);i++)
+       dest[i]=0xff;
+    return len;
+}
+
+static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename)
+{
+    char buffer[258];
+    int length=short2long_name(buffer,filename),
+        number_of_entries=(length+25)/26,i;
+    direntry_t* entry;
+
+    for(i=0;i<number_of_entries;i++) {
+       entry=array_get_next(&(s->directory));
+       entry->attributes=0xf;
+       entry->reserved[0]=0;
+       entry->begin=0;
+       entry->name[0]=(number_of_entries-i)|(i==0?0x40:0);
+    }
+    for(i=0;i<26*number_of_entries;i++) {
+       int offset=(i%26);
+       if(offset<10) offset=1+offset;
+       else if(offset<22) offset=14+offset-10;
+       else offset=28+offset-22;
+       entry=array_get(&(s->directory),s->directory.next-1-(i/26));
+       entry->name[offset]=buffer[i];
+    }
+    return array_get(&(s->directory),s->directory.next-number_of_entries);
+}
+
+static char is_free(const direntry_t* direntry)
+{
+    return direntry->name[0]==0xe5 || direntry->name[0]==0x00;
+}
+
+static char is_volume_label(const direntry_t* direntry)
+{
+    return direntry->attributes == 0x28;
+}
+
+static char is_long_name(const direntry_t* direntry)
+{
+    return direntry->attributes == 0xf;
+}
+
+static char is_short_name(const direntry_t* direntry)
+{
+    return !is_volume_label(direntry) && !is_long_name(direntry)
+       && !is_free(direntry);
+}
+
+static char is_directory(const direntry_t* direntry)
+{
+    return direntry->attributes & 0x10 && direntry->name[0] != 0xe5;
+}
+
+static inline char is_dot(const direntry_t* direntry)
+{
+    return is_short_name(direntry) && direntry->name[0] == '.';
+}
+
+static char is_file(const direntry_t* direntry)
+{
+    return is_short_name(direntry) && !is_directory(direntry);
+}
+
+static inline uint32_t begin_of_direntry(const direntry_t* direntry)
+{
+    return le16_to_cpu(direntry->begin)|(le16_to_cpu(direntry->begin_hi)<<16);
+}
+
+static inline uint32_t filesize_of_direntry(const direntry_t* direntry)
+{
+    return le32_to_cpu(direntry->size);
+}
+
+static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin)
+{
+    direntry->begin = cpu_to_le16(begin & 0xffff);
+    direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff);
+}
+
+/* fat functions */
+
+static inline uint8_t fat_chksum(const direntry_t* entry)
+{
+    uint8_t chksum=0;
+    int i;
+
+    for(i=0;i<11;i++) {
+        unsigned char c;
+
+        c = (i <= 8) ? entry->name[i] : entry->extension[i-8];
+        chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c;
+    }
+
+    return chksum;
+}
+
+/* if return_time==0, this returns the fat_date, else the fat_time */
+static uint16_t fat_datetime(time_t time,int return_time) {
+    struct tm* t;
+#ifdef _WIN32
+    t=localtime(&time); /* this is not thread safe */
+#else
+    struct tm t1;
+    t=&t1;
+    localtime_r(&time,t);
+#endif
+    if(return_time)
+       return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11));
+    return cpu_to_le16((t->tm_mday)|((t->tm_mon+1)<<5)|((t->tm_year-80)<<9));
+}
+
+static inline void fat_set(BDRVVVFATState* s,unsigned int cluster,uint32_t value)
+{
+    if(s->fat_type==32) {
+       uint32_t* entry=array_get(&(s->fat),cluster);
+       *entry=cpu_to_le32(value);
+    } else if(s->fat_type==16) {
+       uint16_t* entry=array_get(&(s->fat),cluster);
+       *entry=cpu_to_le16(value&0xffff);
+    } else {
+       int offset = (cluster*3/2);
+       unsigned char* p = array_get(&(s->fat), offset);
+        switch (cluster&1) {
+       case 0:
+               p[0] = value&0xff;
+               p[1] = (p[1]&0xf0) | ((value>>8)&0xf);
+               break;
+       case 1:
+               p[0] = (p[0]&0xf) | ((value&0xf)<<4);
+               p[1] = (value>>4);
+               break;
+       }
+    }
+}
+
+static inline uint32_t fat_get(BDRVVVFATState* s,unsigned int cluster)
+{
+    if(s->fat_type==32) {
+       uint32_t* entry=array_get(&(s->fat),cluster);
+       return le32_to_cpu(*entry);
+    } else if(s->fat_type==16) {
+       uint16_t* entry=array_get(&(s->fat),cluster);
+       return le16_to_cpu(*entry);
+    } else {
+       const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2;
+       return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
+    }
+}
+
+static inline int fat_eof(BDRVVVFATState* s,uint32_t fat_entry)
+{
+    if(fat_entry>s->max_fat_value-8)
+       return -1;
+    return 0;
+}
+
+static inline void init_fat(BDRVVVFATState* s)
+{
+    if (s->fat_type == 12) {
+       array_init(&(s->fat),1);
+       array_ensure_allocated(&(s->fat),
+               s->sectors_per_fat * 0x200 * 3 / 2 - 1);
+    } else {
+       array_init(&(s->fat),(s->fat_type==32?4:2));
+       array_ensure_allocated(&(s->fat),
+               s->sectors_per_fat * 0x200 / s->fat.item_size - 1);
+    }
+    memset(s->fat.pointer,0,s->fat.size);
+
+    switch(s->fat_type) {
+       case 12: s->max_fat_value=0xfff; break;
+       case 16: s->max_fat_value=0xffff; break;
+       case 32: s->max_fat_value=0x0fffffff; break;
+       default: s->max_fat_value=0; /* error... */
+    }
+
+}
+
+/* TODO: in create_short_filename, 0xe5->0x05 is not yet handled! */
+/* TODO: in parse_short_filename, 0x05->0xe5 is not yet handled! */
+static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
+       unsigned int directory_start, const char* filename, int is_dot)
+{
+    int i,j,long_index=s->directory.next;
+    direntry_t* entry = NULL;
+    direntry_t* entry_long = NULL;
+
+    if(is_dot) {
+       entry=array_get_next(&(s->directory));
+       memset(entry->name,0x20,11);
+       memcpy(entry->name,filename,strlen(filename));
+       return entry;
+    }
+
+    entry_long=create_long_filename(s,filename);
+
+    i = strlen(filename);
+    for(j = i - 1; j>0  && filename[j]!='.';j--);
+    if (j > 0)
+       i = (j > 8 ? 8 : j);
+    else if (i > 8)
+       i = 8;
+
+    entry=array_get_next(&(s->directory));
+    memset(entry->name,0x20,11);
+    memcpy(entry->name, filename, i);
+
+    if(j > 0)
+       for (i = 0; i < 3 && filename[j+1+i]; i++)
+           entry->extension[i] = filename[j+1+i];
+
+    /* upcase & remove unwanted characters */
+    for(i=10;i>=0;i--) {
+       if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--);
+       if(entry->name[i]<=' ' || entry->name[i]>0x7f
+               || strchr(".*?<>|\":/\\[];,+='",entry->name[i]))
+           entry->name[i]='_';
+        else if(entry->name[i]>='a' && entry->name[i]<='z')
+            entry->name[i]+='A'-'a';
+    }
+
+    /* mangle duplicates */
+    while(1) {
+       direntry_t* entry1=array_get(&(s->directory),directory_start);
+       int j;
+
+       for(;entry1<entry;entry1++)
+           if(!is_long_name(entry1) && !memcmp(entry1->name,entry->name,11))
+               break; /* found dupe */
+       if(entry1==entry) /* no dupe found */
+           break;
+
+       /* use all 8 characters of name */
+       if(entry->name[7]==' ') {
+           int j;
+           for(j=6;j>0 && entry->name[j]==' ';j--)
+               entry->name[j]='~';
+       }
+
+       /* increment number */
+       for(j=7;j>0 && entry->name[j]=='9';j--)
+           entry->name[j]='0';
+       if(j>0) {
+           if(entry->name[j]<'0' || entry->name[j]>'9')
+               entry->name[j]='0';
+           else
+               entry->name[j]++;
+       }
+    }
+
+    /* calculate checksum; propagate to long name */
+    if(entry_long) {
+        uint8_t chksum=fat_chksum(entry);
+
+       /* calculate anew, because realloc could have taken place */
+       entry_long=array_get(&(s->directory),long_index);
+       while(entry_long<entry && is_long_name(entry_long)) {
+           entry_long->reserved[1]=chksum;
+           entry_long++;
+       }
+    }
+
+    return entry;
+}
+
+/*
+ * Read a directory. (the index of the corresponding mapping must be passed).
+ */
+static int read_directory(BDRVVVFATState* s, int mapping_index)
+{
+    mapping_t* mapping = array_get(&(s->mapping), mapping_index);
+    direntry_t* direntry;
+    const char* dirname = mapping->path;
+    int first_cluster = mapping->begin;
+    int parent_index = mapping->info.dir.parent_mapping_index;
+    mapping_t* parent_mapping = (mapping_t*)
+        (parent_index >= 0 ? array_get(&(s->mapping), parent_index) : NULL);
+    int first_cluster_of_parent = parent_mapping ? parent_mapping->begin : -1;
+
+    DIR* dir=opendir(dirname);
+    struct dirent* entry;
+    int i;
+
+    assert(mapping->mode & MODE_DIRECTORY);
+
+    if(!dir) {
+       mapping->end = mapping->begin;
+       return -1;
+    }
+
+    i = mapping->info.dir.first_dir_index =
+           first_cluster == 0 ? 0 : s->directory.next;
+
+    /* actually read the directory, and allocate the mappings */
+    while((entry=readdir(dir))) {
+       unsigned int length=strlen(dirname)+2+strlen(entry->d_name);
+        char* buffer;
+       direntry_t* direntry;
+        struct stat st;
+       int is_dot=!strcmp(entry->d_name,".");
+       int is_dotdot=!strcmp(entry->d_name,"..");
+
+       if(first_cluster == 0 && (is_dotdot || is_dot))
+           continue;
+
+       buffer=(char*)qemu_malloc(length);
+       snprintf(buffer,length,"%s/%s",dirname,entry->d_name);
+
+       if(stat(buffer,&st)<0) {
+           free(buffer);
+            continue;
+       }
+
+       /* create directory entry for this file */
+       direntry=create_short_and_long_name(s, i, entry->d_name,
+               is_dot || is_dotdot);
+       direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20);
+       direntry->reserved[0]=direntry->reserved[1]=0;
+       direntry->ctime=fat_datetime(st.st_ctime,1);
+       direntry->cdate=fat_datetime(st.st_ctime,0);
+       direntry->adate=fat_datetime(st.st_atime,0);
+       direntry->begin_hi=0;
+       direntry->mtime=fat_datetime(st.st_mtime,1);
+       direntry->mdate=fat_datetime(st.st_mtime,0);
+       if(is_dotdot)
+           set_begin_of_direntry(direntry, first_cluster_of_parent);
+       else if(is_dot)
+           set_begin_of_direntry(direntry, first_cluster);
+       else
+           direntry->begin=0; /* do that later */
+        if (st.st_size > 0x7fffffff) {
+           fprintf(stderr, "File %s is larger than 2GB\n", buffer);
+           free(buffer);
+           return -2;
+        }
+       direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size);
+
+       /* create mapping for this file */
+       if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) {
+           s->current_mapping=(mapping_t*)array_get_next(&(s->mapping));
+           s->current_mapping->begin=0;
+           s->current_mapping->end=st.st_size;
+           /*
+            * we get the direntry of the most recent direntry, which
+            * contains the short name and all the relevant information.
+            */
+           s->current_mapping->dir_index=s->directory.next-1;
+           s->current_mapping->first_mapping_index = -1;
+           if (S_ISDIR(st.st_mode)) {
+               s->current_mapping->mode = MODE_DIRECTORY;
+               s->current_mapping->info.dir.parent_mapping_index =
+                   mapping_index;
+           } else {
+               s->current_mapping->mode = MODE_UNDEFINED;
+               s->current_mapping->info.file.offset = 0;
+           }
+           s->current_mapping->path=buffer;
+           s->current_mapping->read_only =
+               (st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0;
+       }
+    }
+    closedir(dir);
+
+    /* fill with zeroes up to the end of the cluster */
+    while(s->directory.next%(0x10*s->sectors_per_cluster)) {
+       direntry_t* direntry=array_get_next(&(s->directory));
+       memset(direntry,0,sizeof(direntry_t));
+    }
+
+/* TODO: if there are more entries, bootsector has to be adjusted! */
+#define ROOT_ENTRIES (0x02 * 0x10 * s->sectors_per_cluster)
+    if (mapping_index == 0 && s->directory.next < ROOT_ENTRIES) {
+       /* root directory */
+       int cur = s->directory.next;
+       array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1);
+       memset(array_get(&(s->directory), cur), 0,
+               (ROOT_ENTRIES - cur) * sizeof(direntry_t));
+    }
+
+     /* reget the mapping, since s->mapping was possibly realloc()ed */
+    mapping = (mapping_t*)array_get(&(s->mapping), mapping_index);
+    first_cluster += (s->directory.next - mapping->info.dir.first_dir_index)
+       * 0x20 / s->cluster_size;
+    mapping->end = first_cluster;
+
+    direntry = (direntry_t*)array_get(&(s->directory), mapping->dir_index);
+    set_begin_of_direntry(direntry, mapping->begin);
+
+    return 0;
+}
+
+static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
+{
+    return (sector_num-s->faked_sectors)/s->sectors_per_cluster;
+}
+
+static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num)
+{
+    return s->faked_sectors + s->sectors_per_cluster * cluster_num;
+}
+
+static inline uint32_t sector_offset_in_cluster(BDRVVVFATState* s,off_t sector_num)
+{
+    return (sector_num-s->first_sectors_number-2*s->sectors_per_fat)%s->sectors_per_cluster;
+}
+
+#ifdef DBG
+static direntry_t* get_direntry_for_mapping(BDRVVVFATState* s,mapping_t* mapping)
+{
+    if(mapping->mode==MODE_UNDEFINED)
+       return 0;
+    return (direntry_t*)(s->directory.pointer+sizeof(direntry_t)*mapping->dir_index);
+}
+#endif
+
+static int init_directories(BDRVVVFATState* s,
+       const char* dirname)
+{
+    bootsector_t* bootsector;
+    mapping_t* mapping;
+    unsigned int i;
+    unsigned int cluster;
+
+    memset(&(s->first_sectors[0]),0,0x40*0x200);
+
+    s->cluster_size=s->sectors_per_cluster*0x200;
+    s->cluster_buffer=qemu_malloc(s->cluster_size);
+
+    /*
+     * The formula: sc = spf+1+spf*spc*(512*8/fat_type),
+     * where sc is sector_count,
+     * spf is sectors_per_fat,
+     * spc is sectors_per_clusters, and
+     * fat_type = 12, 16 or 32.
+     */
+    i = 1+s->sectors_per_cluster*0x200*8/s->fat_type;
+    s->sectors_per_fat=(s->sector_count+i)/i; /* round up */
+
+    array_init(&(s->mapping),sizeof(mapping_t));
+    array_init(&(s->directory),sizeof(direntry_t));
+
+    /* add volume label */
+    {
+       direntry_t* entry=array_get_next(&(s->directory));
+       entry->attributes=0x28; /* archive | volume label */
+       snprintf((char*)entry->name,11,"QEMU VVFAT");
+    }
+
+    /* Now build FAT, and write back information into directory */
+    init_fat(s);
+
+    s->faked_sectors=s->first_sectors_number+s->sectors_per_fat*2;
+    s->cluster_count=sector2cluster(s, s->sector_count);
+
+    mapping = array_get_next(&(s->mapping));
+    mapping->begin = 0;
+    mapping->dir_index = 0;
+    mapping->info.dir.parent_mapping_index = -1;
+    mapping->first_mapping_index = -1;
+    mapping->path = strdup(dirname);
+    i = strlen(mapping->path);
+    if (i > 0 && mapping->path[i - 1] == '/')
+       mapping->path[i - 1] = '\0';
+    mapping->mode = MODE_DIRECTORY;
+    mapping->read_only = 0;
+    s->path = mapping->path;
+
+    for (i = 0, cluster = 0; i < s->mapping.next; i++) {
+       /* MS-DOS expects the FAT to be 0 for the root directory
+        * (except for the media byte). */
+       /* LATER TODO: still true for FAT32? */
+       int fix_fat = (i != 0);
+       mapping = array_get(&(s->mapping), i);
+
+        if (mapping->mode & MODE_DIRECTORY) {
+           mapping->begin = cluster;
+           if(read_directory(s, i)) {
+               fprintf(stderr, "Could not read directory %s\n",
+                       mapping->path);
+               return -1;
+           }
+           mapping = array_get(&(s->mapping), i);
+       } else {
+           assert(mapping->mode == MODE_UNDEFINED);
+           mapping->mode=MODE_NORMAL;
+           mapping->begin = cluster;
+           if (mapping->end > 0) {
+               direntry_t* direntry = array_get(&(s->directory),
+                       mapping->dir_index);
+
+               mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size;
+               set_begin_of_direntry(direntry, mapping->begin);
+           } else {
+               mapping->end = cluster + 1;
+               fix_fat = 0;
+           }
+       }
+
+       assert(mapping->begin < mapping->end);
+
+       /* next free cluster */
+       cluster = mapping->end;
+
+       if(cluster > s->cluster_count) {
+           fprintf(stderr,"Directory does not fit in FAT%d (capacity %s)\n",
+                   s->fat_type,
+                   s->fat_type == 12 ? s->sector_count == 2880 ? "1.44 MB"
+                                                               : "2.88 MB"
+                                     : "504MB");
+           return -EINVAL;
+       }
+
+       /* fix fat for entry */
+       if (fix_fat) {
+           int j;
+           for(j = mapping->begin; j < mapping->end - 1; j++)
+               fat_set(s, j, j+1);
+           fat_set(s, mapping->end - 1, s->max_fat_value);
+       }
+    }
+
+    mapping = array_get(&(s->mapping), 0);
+    s->sectors_of_root_directory = mapping->end * s->sectors_per_cluster;
+    s->last_cluster_of_root_directory = mapping->end;
+
+    /* the FAT signature */
+    fat_set(s,0,s->max_fat_value);
+    fat_set(s,1,s->max_fat_value);
+
+    s->current_mapping = NULL;
+
+    bootsector=(bootsector_t*)(s->first_sectors+(s->first_sectors_number-1)*0x200);
+    bootsector->jump[0]=0xeb;
+    bootsector->jump[1]=0x3e;
+    bootsector->jump[2]=0x90;
+    memcpy(bootsector->name,"QEMU    ",8);
+    bootsector->sector_size=cpu_to_le16(0x200);
+    bootsector->sectors_per_cluster=s->sectors_per_cluster;
+    bootsector->reserved_sectors=cpu_to_le16(1);
+    bootsector->number_of_fats=0x2; /* number of FATs */
+    bootsector->root_entries=cpu_to_le16(s->sectors_of_root_directory*0x10);
+    bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count);
+    bootsector->media_type=(s->fat_type!=12?0xf8:s->sector_count==5760?0xf9:0xf8); /* media descriptor */
+    s->fat.pointer[0] = bootsector->media_type;
+    bootsector->sectors_per_fat=cpu_to_le16(s->sectors_per_fat);
+    bootsector->sectors_per_track=cpu_to_le16(s->bs->secs);
+    bootsector->number_of_heads=cpu_to_le16(s->bs->heads);
+    bootsector->hidden_sectors=cpu_to_le32(s->first_sectors_number==1?0:0x3f);
+    bootsector->total_sectors=cpu_to_le32(s->sector_count>0xffff?s->sector_count:0);
+
+    /* LATER TODO: if FAT32, this is wrong */
+    bootsector->u.fat16.drive_number=s->fat_type==12?0:0x80; /* assume this is hda (TODO) */
+    bootsector->u.fat16.current_head=0;
+    bootsector->u.fat16.signature=0x29;
+    bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd);
+
+    memcpy(bootsector->u.fat16.volume_label,"QEMU VVFAT ",11);
+    memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12   ":s->fat_type==16?"FAT16   ":"FAT32   "),8);
+    bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa;
+
+    return 0;
+}
+
+#ifdef DEBUG
+static BDRVVVFATState *vvv = NULL;
+#endif
+
+static int enable_write_target(BDRVVVFATState *s);
+static int is_consistent(BDRVVVFATState *s);
+
+static int vvfat_open(BlockDriverState *bs, const char* dirname, int flags)
+{
+    BDRVVVFATState *s = bs->opaque;
+    int floppy = 0;
+    int i;
+
+#ifdef DEBUG
+    vvv = s;
+#endif
+
+DLOG(if (stderr == NULL) {
+    stderr = fopen("vvfat.log", "a");
+    setbuf(stderr, NULL);
+})
+
+    s->bs = bs;
+
+    s->fat_type=16;
+    /* LATER TODO: if FAT32, adjust */
+    s->sectors_per_cluster=0x10;
+    /* 504MB disk*/
+    bs->cyls=1024; bs->heads=16; bs->secs=63;
+
+    s->current_cluster=0xffffffff;
+
+    s->first_sectors_number=0x40;
+    /* read only is the default for safety */
+    bs->read_only = 1;
+    s->qcow = s->write_target = NULL;
+    s->qcow_filename = NULL;
+    s->fat2 = NULL;
+    s->downcase_short_names = 1;
+
+    if (!strstart(dirname, "fat:", NULL))
+       return -1;
+
+    if (strstr(dirname, ":floppy:")) {
+       floppy = 1;
+       s->fat_type = 12;
+       s->first_sectors_number = 1;
+       s->sectors_per_cluster=2;
+       bs->cyls = 80; bs->heads = 2; bs->secs = 36;
+    }
+
+    s->sector_count=bs->cyls*bs->heads*bs->secs;
+
+    if (strstr(dirname, ":32:")) {
+       fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. You are welcome to do so!\n");
+       s->fat_type = 32;
+    } else if (strstr(dirname, ":16:")) {
+       s->fat_type = 16;
+    } else if (strstr(dirname, ":12:")) {
+       s->fat_type = 12;
+       s->sector_count=2880;
+    }
+
+    if (strstr(dirname, ":rw:")) {
+       if (enable_write_target(s))
+           return -1;
+       bs->read_only = 0;
+    }
+
+    i = strrchr(dirname, ':') - dirname;
+    assert(i >= 3);
+    if (dirname[i-2] == ':' && qemu_isalpha(dirname[i-1]))
+       /* workaround for DOS drive names */
+       dirname += i-1;
+    else
+       dirname += i+1;
+
+    bs->total_sectors=bs->cyls*bs->heads*bs->secs;
+
+    if(init_directories(s, dirname))
+       return -1;
+
+    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;
+
+    if(s->first_sectors_number==0x40)
+       init_mbr(s);
+
+    /* for some reason or other, MS-DOS does not like to know about CHS... */
+    if (floppy)
+       bs->heads = bs->cyls = bs->secs = 0;
+
+    //    assert(is_consistent(s));
+    return 0;
+}
+
+static inline void vvfat_close_current_file(BDRVVVFATState *s)
+{
+    if(s->current_mapping) {
+       s->current_mapping = NULL;
+       if (s->current_fd) {
+               close(s->current_fd);
+               s->current_fd = 0;
+       }
+    }
+    s->current_cluster = -1;
+}
+
+/* mappings between index1 and index2-1 are supposed to be ordered
+ * return value is the index of the last mapping for which end>cluster_num
+ */
+static inline int find_mapping_for_cluster_aux(BDRVVVFATState* s,int cluster_num,int index1,int index2)
+{
+    int index3=index1+1;
+    while(1) {
+       mapping_t* mapping;
+       index3=(index1+index2)/2;
+       mapping=array_get(&(s->mapping),index3);
+       assert(mapping->begin < mapping->end);
+       if(mapping->begin>=cluster_num) {
+           assert(index2!=index3 || index2==0);
+           if(index2==index3)
+               return index1;
+           index2=index3;
+       } else {
+           if(index1==index3)
+               return mapping->end<=cluster_num ? index2 : index1;
+           index1=index3;
+       }
+       assert(index1<=index2);
+       DLOG(mapping=array_get(&(s->mapping),index1);
+       assert(mapping->begin<=cluster_num);
+       assert(index2 >= s->mapping.next ||
+               ((mapping = array_get(&(s->mapping),index2)) &&
+               mapping->end>cluster_num)));
+    }
+}
+
+static inline mapping_t* find_mapping_for_cluster(BDRVVVFATState* s,int cluster_num)
+{
+    int index=find_mapping_for_cluster_aux(s,cluster_num,0,s->mapping.next);
+    mapping_t* mapping;
+    if(index>=s->mapping.next)
+        return NULL;
+    mapping=array_get(&(s->mapping),index);
+    if(mapping->begin>cluster_num)
+        return NULL;
+    assert(mapping->begin<=cluster_num && mapping->end>cluster_num);
+    return mapping;
+}
+
+/*
+ * This function simply compares path == mapping->path. Since the mappings
+ * are sorted by cluster, this is expensive: O(n).
+ */
+static inline mapping_t* find_mapping_for_path(BDRVVVFATState* s,
+       const char* path)
+{
+    int i;
+
+    for (i = 0; i < s->mapping.next; i++) {
+       mapping_t* mapping = array_get(&(s->mapping), i);
+       if (mapping->first_mapping_index < 0 &&
+               !strcmp(path, mapping->path))
+           return mapping;
+    }
+
+    return NULL;
+}
+
+static int open_file(BDRVVVFATState* s,mapping_t* mapping)
+{
+    if(!mapping)
+       return -1;
+    if(!s->current_mapping ||
+           strcmp(s->current_mapping->path,mapping->path)) {
+       /* open file */
+       int fd = open(mapping->path, O_RDONLY | O_BINARY | O_LARGEFILE);
+       if(fd<0)
+           return -1;
+       vvfat_close_current_file(s);
+       s->current_fd = fd;
+       s->current_mapping = mapping;
+    }
+    return 0;
+}
+
+static inline int read_cluster(BDRVVVFATState *s,int cluster_num)
+{
+    if(s->current_cluster != cluster_num) {
+       int result=0;
+       off_t offset;
+       assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY));
+       if(!s->current_mapping
+               || s->current_mapping->begin>cluster_num
+               || s->current_mapping->end<=cluster_num) {
+           /* binary search of mappings for file */
+           mapping_t* mapping=find_mapping_for_cluster(s,cluster_num);
+
+           assert(!mapping || (cluster_num>=mapping->begin && cluster_num<mapping->end));
+
+           if (mapping && mapping->mode & MODE_DIRECTORY) {
+               vvfat_close_current_file(s);
+               s->current_mapping = mapping;
+read_cluster_directory:
+               offset = s->cluster_size*(cluster_num-s->current_mapping->begin);
+               s->cluster = (unsigned char*)s->directory.pointer+offset
+                       + 0x20*s->current_mapping->info.dir.first_dir_index;
+               assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0);
+               assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size);
+               s->current_cluster = cluster_num;
+               return 0;
+           }
+
+           if(open_file(s,mapping))
+               return -2;
+       } else if (s->current_mapping->mode & MODE_DIRECTORY)
+           goto read_cluster_directory;
+
+       assert(s->current_fd);
+
+       offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset;
+       if(lseek(s->current_fd, offset, SEEK_SET)!=offset)
+           return -3;
+       s->cluster=s->cluster_buffer;
+       result=read(s->current_fd,s->cluster,s->cluster_size);
+       if(result<0) {
+           s->current_cluster = -1;
+           return -1;
+       }
+       s->current_cluster = cluster_num;
+    }
+    return 0;
+}
+
+#ifdef DEBUG
+static void hexdump(const void* address, uint32_t len)
+{
+    const unsigned char* p = address;
+    int i, j;
+
+    for (i = 0; i < len; i += 16) {
+       for (j = 0; j < 16 && i + j < len; j++)
+           fprintf(stderr, "%02x ", p[i + j]);
+       for (; j < 16; j++)
+           fprintf(stderr, "   ");
+       fprintf(stderr, " ");
+       for (j = 0; j < 16 && i + j < len; j++)
+           fprintf(stderr, "%c", (p[i + j] < ' ' || p[i + j] > 0x7f) ? '.' : p[i + j]);
+       fprintf(stderr, "\n");
+    }
+}
+
+static void print_direntry(const direntry_t* direntry)
+{
+    int j = 0;
+    char buffer[1024];
+
+    fprintf(stderr, "direntry 0x%x: ", (int)direntry);
+    if(!direntry)
+       return;
+    if(is_long_name(direntry)) {
+       unsigned char* c=(unsigned char*)direntry;
+       int i;
+       for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2)
+#define ADD_CHAR(c) {buffer[j] = (c); if (buffer[j] < ' ') buffer[j] = 0xb0; j++;}
+           ADD_CHAR(c[i]);
+       for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2)
+           ADD_CHAR(c[i]);
+       for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2)
+           ADD_CHAR(c[i]);
+       buffer[j] = 0;
+       fprintf(stderr, "%s\n", buffer);
+    } else {
+       int i;
+       for(i=0;i<11;i++)
+           ADD_CHAR(direntry->name[i]);
+       buffer[j] = 0;
+       fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n",
+               buffer,
+               direntry->attributes,
+               begin_of_direntry(direntry),le32_to_cpu(direntry->size));
+    }
+}
+
+static void print_mapping(const mapping_t* mapping)
+{
+    fprintf(stderr, "mapping (0x%x): begin, end = %d, %d, dir_index = %d, first_mapping_index = %d, name = %s, mode = 0x%x, " , (int)mapping, mapping->begin, mapping->end, mapping->dir_index, mapping->first_mapping_index, mapping->path, mapping->mode);
+    if (mapping->mode & MODE_DIRECTORY)
+       fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index);
+    else
+       fprintf(stderr, "offset = %d\n", mapping->info.file.offset);
+}
+#endif
+
+static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BDRVVVFATState *s = bs->opaque;
+    int i;
+
+    for(i=0;i<nb_sectors;i++,sector_num++) {
+       if (sector_num >= s->sector_count)
+          return -1;
+       if (s->qcow) {
+           int n;
+           if (s->qcow->drv->bdrv_is_allocated(s->qcow,
+                       sector_num, nb_sectors-i, &n)) {
+DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
+               if (s->qcow->drv->bdrv_read(s->qcow, sector_num, buf+i*0x200, n))
+                   return -1;
+               i += n - 1;
+               sector_num += n - 1;
+               continue;
+           }
+DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
+       }
+       if(sector_num<s->faked_sectors) {
+           if(sector_num<s->first_sectors_number)
+               memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200);
+           else if(sector_num-s->first_sectors_number<s->sectors_per_fat)
+               memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200);
+           else if(sector_num-s->first_sectors_number-s->sectors_per_fat<s->sectors_per_fat)
+               memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200);
+       } else {
+           uint32_t sector=sector_num-s->faked_sectors,
+           sector_offset_in_cluster=(sector%s->sectors_per_cluster),
+           cluster_num=sector/s->sectors_per_cluster;
+           if(read_cluster(s, cluster_num) != 0) {
+               /* LATER TODO: strict: return -1; */
+               memset(buf+i*0x200,0,0x200);
+               continue;
+           }
+           memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200);
+       }
+    }
+    return 0;
+}
+
+/* LATER TODO: statify all functions */
+
+/*
+ * Idea of the write support (use snapshot):
+ *
+ * 1. check if all data is consistent, recording renames, modifications,
+ *    new files and directories (in s->commits).
+ *
+ * 2. if the data is not consistent, stop committing
+ *
+ * 3. handle renames, and create new files and directories (do not yet
+ *    write their contents)
+ *
+ * 4. walk the directories, fixing the mapping and direntries, and marking
+ *    the handled mappings as not deleted
+ *
+ * 5. commit the contents of the files
+ *
+ * 6. handle deleted files and directories
+ *
+ */
+
+typedef struct commit_t {
+    char* path;
+    union {
+       struct { uint32_t cluster; } rename;
+       struct { int dir_index; uint32_t modified_offset; } writeout;
+       struct { uint32_t first_cluster; } new_file;
+       struct { uint32_t cluster; } mkdir;
+    } param;
+    /* DELETEs and RMDIRs are handled differently: see handle_deletes() */
+    enum {
+       ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR
+    } action;
+} commit_t;
+
+static void clear_commits(BDRVVVFATState* s)
+{
+    int i;
+DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next));
+    for (i = 0; i < s->commits.next; i++) {
+       commit_t* commit = array_get(&(s->commits), i);
+       assert(commit->path || commit->action == ACTION_WRITEOUT);
+       if (commit->action != ACTION_WRITEOUT) {
+           assert(commit->path);
+           free(commit->path);
+       } else
+           assert(commit->path == NULL);
+    }
+    s->commits.next = 0;
+}
+
+static void schedule_rename(BDRVVVFATState* s,
+       uint32_t cluster, char* new_path)
+{
+    commit_t* commit = array_get_next(&(s->commits));
+    commit->path = new_path;
+    commit->param.rename.cluster = cluster;
+    commit->action = ACTION_RENAME;
+}
+
+static void schedule_writeout(BDRVVVFATState* s,
+       int dir_index, uint32_t modified_offset)
+{
+    commit_t* commit = array_get_next(&(s->commits));
+    commit->path = NULL;
+    commit->param.writeout.dir_index = dir_index;
+    commit->param.writeout.modified_offset = modified_offset;
+    commit->action = ACTION_WRITEOUT;
+}
+
+static void schedule_new_file(BDRVVVFATState* s,
+       char* path, uint32_t first_cluster)
+{
+    commit_t* commit = array_get_next(&(s->commits));
+    commit->path = path;
+    commit->param.new_file.first_cluster = first_cluster;
+    commit->action = ACTION_NEW_FILE;
+}
+
+static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path)
+{
+    commit_t* commit = array_get_next(&(s->commits));
+    commit->path = path;
+    commit->param.mkdir.cluster = cluster;
+    commit->action = ACTION_MKDIR;
+}
+
+typedef struct {
+    /*
+     * Since the sequence number is at most 0x3f, and the filename
+     * length is at most 13 times the sequence number, the maximal
+     * filename length is 0x3f * 13 bytes.
+     */
+    unsigned char name[0x3f * 13 + 1];
+    int checksum, len;
+    int sequence_number;
+} long_file_name;
+
+static void lfn_init(long_file_name* lfn)
+{
+   lfn->sequence_number = lfn->len = 0;
+   lfn->checksum = 0x100;
+}
+
+/* return 0 if parsed successfully, > 0 if no long name, < 0 if error */
+static int parse_long_name(long_file_name* lfn,
+       const direntry_t* direntry)
+{
+    int i, j, offset;
+    const unsigned char* pointer = (const unsigned char*)direntry;
+
+    if (!is_long_name(direntry))
+       return 1;
+
+    if (pointer[0] & 0x40) {
+       lfn->sequence_number = pointer[0] & 0x3f;
+       lfn->checksum = pointer[13];
+       lfn->name[0] = 0;
+       lfn->name[lfn->sequence_number * 13] = 0;
+    } else if ((pointer[0] & 0x3f) != --lfn->sequence_number)
+       return -1;
+    else if (pointer[13] != lfn->checksum)
+       return -2;
+    else if (pointer[12] || pointer[26] || pointer[27])
+       return -3;
+
+    offset = 13 * (lfn->sequence_number - 1);
+    for (i = 0, j = 1; i < 13; i++, j+=2) {
+       if (j == 11)
+           j = 14;
+       else if (j == 26)
+           j = 28;
+
+       if (pointer[j+1] == 0)
+           lfn->name[offset + i] = pointer[j];
+       else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0)
+           return -4;
+       else
+           lfn->name[offset + i] = 0;
+    }
+
+    if (pointer[0] & 0x40)
+       lfn->len = offset + strlen((char*)lfn->name + offset);
+
+    return 0;
+}
+
+/* returns 0 if successful, >0 if no short_name, and <0 on error */
+static int parse_short_name(BDRVVVFATState* s,
+       long_file_name* lfn, direntry_t* direntry)
+{
+    int i, j;
+
+    if (!is_short_name(direntry))
+       return 1;
+
+    for (j = 7; j >= 0 && direntry->name[j] == ' '; j--);
+    for (i = 0; i <= j; i++) {
+       if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f)
+           return -1;
+       else if (s->downcase_short_names)
+           lfn->name[i] = qemu_tolower(direntry->name[i]);
+       else
+           lfn->name[i] = direntry->name[i];
+    }
+
+    for (j = 2; j >= 0 && direntry->extension[j] == ' '; j--);
+    if (j >= 0) {
+       lfn->name[i++] = '.';
+       lfn->name[i + j + 1] = '\0';
+       for (;j >= 0; j--) {
+           if (direntry->extension[j] <= ' ' || direntry->extension[j] > 0x7f)
+               return -2;
+           else if (s->downcase_short_names)
+               lfn->name[i + j] = qemu_tolower(direntry->extension[j]);
+           else
+               lfn->name[i + j] = direntry->extension[j];
+       }
+    } else
+       lfn->name[i + j + 1] = '\0';
+
+    lfn->len = strlen((char*)lfn->name);
+
+    return 0;
+}
+
+static inline uint32_t modified_fat_get(BDRVVVFATState* s,
+       unsigned int cluster)
+{
+    if (cluster < s->last_cluster_of_root_directory) {
+       if (cluster + 1 == s->last_cluster_of_root_directory)
+           return s->max_fat_value;
+       else
+           return cluster + 1;
+    }
+
+    if (s->fat_type==32) {
+        uint32_t* entry=((uint32_t*)s->fat2)+cluster;
+        return le32_to_cpu(*entry);
+    } else if (s->fat_type==16) {
+        uint16_t* entry=((uint16_t*)s->fat2)+cluster;
+        return le16_to_cpu(*entry);
+    } else {
+        const uint8_t* x=s->fat2+cluster*3/2;
+        return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
+    }
+}
+
+static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
+{
+    int was_modified = 0;
+    int i, dummy;
+
+    if (s->qcow == NULL)
+       return 0;
+
+    for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
+       was_modified = s->qcow->drv->bdrv_is_allocated(s->qcow,
+               cluster2sector(s, cluster_num) + i, 1, &dummy);
+
+    return was_modified;
+}
+
+static const char* get_basename(const char* path)
+{
+    char* basename = strrchr(path, '/');
+    if (basename == NULL)
+       return path;
+    else
+       return basename + 1; /* strip '/' */
+}
+
+/*
+ * The array s->used_clusters holds the states of the clusters. If it is
+ * part of a file, it has bit 2 set, in case of a directory, bit 1. If it
+ * was modified, bit 3 is set.
+ * If any cluster is allocated, but not part of a file or directory, this
+ * driver refuses to commit.
+ */
+typedef enum {
+     USED_DIRECTORY = 1, USED_FILE = 2, USED_ANY = 3, USED_ALLOCATED = 4
+} used_t;
+
+/*
+ * get_cluster_count_for_direntry() not only determines how many clusters
+ * are occupied by direntry, but also if it was renamed or modified.
+ *
+ * A file is thought to be renamed *only* if there already was a file with
+ * exactly the same first cluster, but a different name.
+ *
+ * Further, the files/directories handled by this function are
+ * assumed to be *not* deleted (and *only* those).
+ */
+static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
+       direntry_t* direntry, const char* path)
+{
+    /*
+     * This is a little bit tricky:
+     * IF the guest OS just inserts a cluster into the file chain,
+     * and leaves the rest alone, (i.e. the original file had clusters
+     * 15 -> 16, but now has 15 -> 32 -> 16), then the following happens:
+     *
+     * - do_commit will write the cluster into the file at the given
+     *   offset, but
+     *
+     * - the cluster which is overwritten should be moved to a later
+     *   position in the file.
+     *
+     * I am not aware that any OS does something as braindead, but this
+     * situation could happen anyway when not committing for a long time.
+     * Just to be sure that this does not bite us, detect it, and copy the
+     * contents of the clusters to-be-overwritten into the qcow.
+     */
+    int copy_it = 0;
+    int was_modified = 0;
+    int32_t ret = 0;
+
+    uint32_t cluster_num = begin_of_direntry(direntry);
+    uint32_t offset = 0;
+    int first_mapping_index = -1;
+    mapping_t* mapping = NULL;
+    const char* basename2 = NULL;
+
+    vvfat_close_current_file(s);
+
+    /* the root directory */
+    if (cluster_num == 0)
+       return 0;
+
+    /* write support */
+    if (s->qcow) {
+       basename2 = get_basename(path);
+
+       mapping = find_mapping_for_cluster(s, cluster_num);
+
+       if (mapping) {
+           const char* basename;
+
+           assert(mapping->mode & MODE_DELETED);
+           mapping->mode &= ~MODE_DELETED;
+
+           basename = get_basename(mapping->path);
+
+           assert(mapping->mode & MODE_NORMAL);
+
+           /* rename */
+           if (strcmp(basename, basename2))
+               schedule_rename(s, cluster_num, strdup(path));
+       } else if (is_file(direntry))
+           /* new file */
+           schedule_new_file(s, strdup(path), cluster_num);
+       else {
+           assert(0);
+           return 0;
+       }
+    }
+
+    while(1) {
+       if (s->qcow) {
+           if (!copy_it && cluster_was_modified(s, cluster_num)) {
+               if (mapping == NULL ||
+                       mapping->begin > cluster_num ||
+                       mapping->end <= cluster_num)
+               mapping = find_mapping_for_cluster(s, cluster_num);
+
+
+               if (mapping &&
+                       (mapping->mode & MODE_DIRECTORY) == 0) {
+
+                   /* was modified in qcow */
+                   if (offset != mapping->info.file.offset + s->cluster_size
+                           * (cluster_num - mapping->begin)) {
+                       /* offset of this cluster in file chain has changed */
+                       assert(0);
+                       copy_it = 1;
+                   } else if (offset == 0) {
+                       const char* basename = get_basename(mapping->path);
+
+                       if (strcmp(basename, basename2))
+                           copy_it = 1;
+                       first_mapping_index = array_index(&(s->mapping), mapping);
+                   }
+
+                   if (mapping->first_mapping_index != first_mapping_index
+                           && mapping->info.file.offset > 0) {
+                       assert(0);
+                       copy_it = 1;
+                   }
+
+                   /* need to write out? */
+                   if (!was_modified && is_file(direntry)) {
+                       was_modified = 1;
+                       schedule_writeout(s, mapping->dir_index, offset);
+                   }
+               }
+           }
+
+           if (copy_it) {
+               int i, dummy;
+               /*
+                * This is horribly inefficient, but that is okay, since
+                * it is rarely executed, if at all.
+                */
+               int64_t offset = cluster2sector(s, cluster_num);
+
+               vvfat_close_current_file(s);
+               for (i = 0; i < s->sectors_per_cluster; i++)
+                   if (!s->qcow->drv->bdrv_is_allocated(s->qcow,
+                               offset + i, 1, &dummy)) {
+                       if (vvfat_read(s->bs,
+                                   offset, s->cluster_buffer, 1))
+                           return -1;
+                       if (s->qcow->drv->bdrv_write(s->qcow,
+                                   offset, s->cluster_buffer, 1))
+                           return -2;
+                   }
+           }
+       }
+
+       ret++;
+       if (s->used_clusters[cluster_num] & USED_ANY)
+           return 0;
+       s->used_clusters[cluster_num] = USED_FILE;
+
+       cluster_num = modified_fat_get(s, cluster_num);
+
+       if (fat_eof(s, cluster_num))
+           return ret;
+       else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16)
+           return -1;
+
+       offset += s->cluster_size;
+    }
+}
+
+/*
+ * This function looks at the modified data (qcow).
+ * It returns 0 upon inconsistency or error, and the number of clusters
+ * used by the directory, its subdirectories and their files.
+ */
+static int check_directory_consistency(BDRVVVFATState *s,
+       int cluster_num, const char* path)
+{
+    int ret = 0;
+    unsigned char* cluster = qemu_malloc(s->cluster_size);
+    direntry_t* direntries = (direntry_t*)cluster;
+    mapping_t* mapping = find_mapping_for_cluster(s, cluster_num);
+
+    long_file_name lfn;
+    int path_len = strlen(path);
+    char path2[PATH_MAX];
+
+    assert(path_len < PATH_MAX); /* len was tested before! */
+    pstrcpy(path2, sizeof(path2), path);
+    path2[path_len] = '/';
+    path2[path_len + 1] = '\0';
+
+    if (mapping) {
+       const char* basename = get_basename(mapping->path);
+       const char* basename2 = get_basename(path);
+
+       assert(mapping->mode & MODE_DIRECTORY);
+
+       assert(mapping->mode & MODE_DELETED);
+       mapping->mode &= ~MODE_DELETED;
+
+       if (strcmp(basename, basename2))
+           schedule_rename(s, cluster_num, strdup(path));
+    } else
+       /* new directory */
+       schedule_mkdir(s, cluster_num, strdup(path));
+
+    lfn_init(&lfn);
+    do {
+       int i;
+       int subret = 0;
+
+       ret++;
+
+       if (s->used_clusters[cluster_num] & USED_ANY) {
+           fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num);
+           return 0;
+       }
+       s->used_clusters[cluster_num] = USED_DIRECTORY;
+
+DLOG(fprintf(stderr, "read cluster %d (sector %d)\n", (int)cluster_num, (int)cluster2sector(s, cluster_num)));
+       subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster,
+               s->sectors_per_cluster);
+       if (subret) {
+           fprintf(stderr, "Error fetching direntries\n");
+       fail:
+           free(cluster);
+           return 0;
+       }
+
+       for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) {
+           int cluster_count = 0;
+
+DLOG(fprintf(stderr, "check direntry %d: \n", i); print_direntry(direntries + i));
+           if (is_volume_label(direntries + i) || is_dot(direntries + i) ||
+                   is_free(direntries + i))
+               continue;
+
+           subret = parse_long_name(&lfn, direntries + i);
+           if (subret < 0) {
+               fprintf(stderr, "Error in long name\n");
+               goto fail;
+           }
+           if (subret == 0 || is_free(direntries + i))
+               continue;
+
+           if (fat_chksum(direntries+i) != lfn.checksum) {
+               subret = parse_short_name(s, &lfn, direntries + i);
+               if (subret < 0) {
+                   fprintf(stderr, "Error in short name (%d)\n", subret);
+                   goto fail;
+               }
+               if (subret > 0 || !strcmp((char*)lfn.name, ".")
+                       || !strcmp((char*)lfn.name, ".."))
+                   continue;
+           }
+           lfn.checksum = 0x100; /* cannot use long name twice */
+
+           if (path_len + 1 + lfn.len >= PATH_MAX) {
+               fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name);
+               goto fail;
+           }
+            pstrcpy(path2 + path_len + 1, sizeof(path2) - path_len - 1,
+                    (char*)lfn.name);
+
+           if (is_directory(direntries + i)) {
+               if (begin_of_direntry(direntries + i) == 0) {
+                   DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i));
+                   goto fail;
+               }
+               cluster_count = check_directory_consistency(s,
+                       begin_of_direntry(direntries + i), path2);
+               if (cluster_count == 0) {
+                   DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i));
+                   goto fail;
+               }
+           } else if (is_file(direntries + i)) {
+               /* check file size with FAT */
+               cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2);
+               if (cluster_count !=
+                       (le32_to_cpu(direntries[i].size) + s->cluster_size
+                        - 1) / s->cluster_size) {
+                   DLOG(fprintf(stderr, "Cluster count mismatch\n"));
+                   goto fail;
+               }
+           } else
+               assert(0); /* cluster_count = 0; */
+
+           ret += cluster_count;
+       }
+
+       cluster_num = modified_fat_get(s, cluster_num);
+    } while(!fat_eof(s, cluster_num));
+
+    free(cluster);
+    return ret;
+}
+
+/* returns 1 on success */
+static int is_consistent(BDRVVVFATState* s)
+{
+    int i, check;
+    int used_clusters_count = 0;
+
+DLOG(checkpoint());
+    /*
+     * - get modified FAT
+     * - compare the two FATs (TODO)
+     * - get buffer for marking used clusters
+     * - recurse direntries from root (using bs->bdrv_read to make
+     *    sure to get the new data)
+     *   - check that the FAT agrees with the size
+     *   - count the number of clusters occupied by this directory and
+     *     its files
+     * - check that the cumulative used cluster count agrees with the
+     *   FAT
+     * - if all is fine, return number of used clusters
+     */
+    if (s->fat2 == NULL) {
+       int size = 0x200 * s->sectors_per_fat;
+       s->fat2 = qemu_malloc(size);
+       memcpy(s->fat2, s->fat.pointer, size);
+    }
+    check = vvfat_read(s->bs,
+           s->first_sectors_number, s->fat2, s->sectors_per_fat);
+    if (check) {
+       fprintf(stderr, "Could not copy fat\n");
+       return 0;
+    }
+    assert (s->used_clusters);
+    for (i = 0; i < sector2cluster(s, s->sector_count); i++)
+       s->used_clusters[i] &= ~USED_ANY;
+
+    clear_commits(s);
+
+    /* mark every mapped file/directory as deleted.
+     * (check_directory_consistency() will unmark those still present). */
+    if (s->qcow)
+       for (i = 0; i < s->mapping.next; i++) {
+           mapping_t* mapping = array_get(&(s->mapping), i);
+           if (mapping->first_mapping_index < 0)
+               mapping->mode |= MODE_DELETED;
+       }
+
+    used_clusters_count = check_directory_consistency(s, 0, s->path);
+    if (used_clusters_count <= 0) {
+       DLOG(fprintf(stderr, "problem in directory\n"));
+       return 0;
+    }
+
+    check = s->last_cluster_of_root_directory;
+    for (i = check; i < sector2cluster(s, s->sector_count); i++) {
+       if (modified_fat_get(s, i)) {
+           if(!s->used_clusters[i]) {
+               DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i));
+               return 0;
+           }
+           check++;
+       }
+
+       if (s->used_clusters[i] == USED_ALLOCATED) {
+           /* allocated, but not used... */
+           DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i));
+           return 0;
+       }
+    }
+
+    if (check != used_clusters_count)
+       return 0;
+
+    return used_clusters_count;
+}
+
+static inline void adjust_mapping_indices(BDRVVVFATState* s,
+       int offset, int adjust)
+{
+    int i;
+
+    for (i = 0; i < s->mapping.next; i++) {
+       mapping_t* mapping = array_get(&(s->mapping), i);
+
+#define ADJUST_MAPPING_INDEX(name) \
+       if (mapping->name >= offset) \
+           mapping->name += adjust
+
+       ADJUST_MAPPING_INDEX(first_mapping_index);
+       if (mapping->mode & MODE_DIRECTORY)
+           ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index);
+    }
+}
+
+/* insert or update mapping */
+static mapping_t* insert_mapping(BDRVVVFATState* s,
+       uint32_t begin, uint32_t end)
+{
+    /*
+     * - find mapping where mapping->begin >= begin,
+     * - if mapping->begin > begin: insert
+     *   - adjust all references to mappings!
+     * - else: adjust
+     * - replace name
+     */
+    int index = find_mapping_for_cluster_aux(s, begin, 0, s->mapping.next);
+    mapping_t* mapping = NULL;
+    mapping_t* first_mapping = array_get(&(s->mapping), 0);
+
+    if (index < s->mapping.next && (mapping = array_get(&(s->mapping), index))
+           && mapping->begin < begin) {
+       mapping->end = begin;
+       index++;
+       mapping = array_get(&(s->mapping), index);
+    }
+    if (index >= s->mapping.next || mapping->begin > begin) {
+       mapping = array_insert(&(s->mapping), index, 1);
+       mapping->path = NULL;
+       adjust_mapping_indices(s, index, +1);
+    }
+
+    mapping->begin = begin;
+    mapping->end = end;
+
+DLOG(mapping_t* next_mapping;
+assert(index + 1 >= s->mapping.next ||
+((next_mapping = array_get(&(s->mapping), index + 1)) &&
+ next_mapping->begin >= end)));
+
+    if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
+       s->current_mapping = array_get(&(s->mapping),
+               s->current_mapping - first_mapping);
+
+    return mapping;
+}
+
+static int remove_mapping(BDRVVVFATState* s, int mapping_index)
+{
+    mapping_t* mapping = array_get(&(s->mapping), mapping_index);
+    mapping_t* first_mapping = array_get(&(s->mapping), 0);
+
+    /* free mapping */
+    if (mapping->first_mapping_index < 0)
+       free(mapping->path);
+
+    /* remove from s->mapping */
+    array_remove(&(s->mapping), mapping_index);
+
+    /* adjust all references to mappings */
+    adjust_mapping_indices(s, mapping_index, -1);
+
+    if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
+       s->current_mapping = array_get(&(s->mapping),
+               s->current_mapping - first_mapping);
+
+    return 0;
+}
+
+static void adjust_dirindices(BDRVVVFATState* s, int offset, int adjust)
+{
+    int i;
+    for (i = 0; i < s->mapping.next; i++) {
+       mapping_t* mapping = array_get(&(s->mapping), i);
+       if (mapping->dir_index >= offset)
+           mapping->dir_index += adjust;
+       if ((mapping->mode & MODE_DIRECTORY) &&
+               mapping->info.dir.first_dir_index >= offset)
+           mapping->info.dir.first_dir_index += adjust;
+    }
+}
+
+static direntry_t* insert_direntries(BDRVVVFATState* s,
+       int dir_index, int count)
+{
+    /*
+     * make room in s->directory,
+     * adjust_dirindices
+     */
+    direntry_t* result = array_insert(&(s->directory), dir_index, count);
+    if (result == NULL)
+       return NULL;
+    adjust_dirindices(s, dir_index, count);
+    return result;
+}
+
+static int remove_direntries(BDRVVVFATState* s, int dir_index, int count)
+{
+    int ret = array_remove_slice(&(s->directory), dir_index, count);
+    if (ret)
+       return ret;
+    adjust_dirindices(s, dir_index, -count);
+    return 0;
+}
+
+/*
+ * Adapt the mappings of the cluster chain starting at first cluster
+ * (i.e. if a file starts at first_cluster, the chain is followed according
+ * to the modified fat, and the corresponding entries in s->mapping are
+ * adjusted)
+ */
+static int commit_mappings(BDRVVVFATState* s,
+       uint32_t first_cluster, int dir_index)
+{
+    mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
+    direntry_t* direntry = array_get(&(s->directory), dir_index);
+    uint32_t cluster = first_cluster;
+
+    vvfat_close_current_file(s);
+
+    assert(mapping);
+    assert(mapping->begin == first_cluster);
+    mapping->first_mapping_index = -1;
+    mapping->dir_index = dir_index;
+    mapping->mode = (dir_index <= 0 || is_directory(direntry)) ?
+       MODE_DIRECTORY : MODE_NORMAL;
+
+    while (!fat_eof(s, cluster)) {
+       uint32_t c, c1;
+
+       for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1;
+               c = c1, c1 = modified_fat_get(s, c1));
+
+       c++;
+       if (c > mapping->end) {
+           int index = array_index(&(s->mapping), mapping);
+           int i, max_i = s->mapping.next - index;
+           for (i = 1; i < max_i && mapping[i].begin < c; i++);
+           while (--i > 0)
+               remove_mapping(s, index + 1);
+       }
+       assert(mapping == array_get(&(s->mapping), s->mapping.next - 1)
+               || mapping[1].begin >= c);
+       mapping->end = c;
+
+       if (!fat_eof(s, c1)) {
+           int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next);
+           mapping_t* next_mapping = i >= s->mapping.next ? NULL :
+               array_get(&(s->mapping), i);
+
+           if (next_mapping == NULL || next_mapping->begin > c1) {
+               int i1 = array_index(&(s->mapping), mapping);
+
+               next_mapping = insert_mapping(s, c1, c1+1);
+
+               if (c1 < c)
+                   i1++;
+               mapping = array_get(&(s->mapping), i1);
+           }
+
+           next_mapping->dir_index = mapping->dir_index;
+           next_mapping->first_mapping_index =
+               mapping->first_mapping_index < 0 ?
+               array_index(&(s->mapping), mapping) :
+               mapping->first_mapping_index;
+           next_mapping->path = mapping->path;
+           next_mapping->mode = mapping->mode;
+           next_mapping->read_only = mapping->read_only;
+           if (mapping->mode & MODE_DIRECTORY) {
+               next_mapping->info.dir.parent_mapping_index =
+                       mapping->info.dir.parent_mapping_index;
+               next_mapping->info.dir.first_dir_index =
+                       mapping->info.dir.first_dir_index +
+                       0x10 * s->sectors_per_cluster *
+                       (mapping->end - mapping->begin);
+           } else
+               next_mapping->info.file.offset = mapping->info.file.offset +
+                       mapping->end - mapping->begin;
+
+           mapping = next_mapping;
+       }
+
+       cluster = c1;
+    }
+
+    return 0;
+}
+
+static int commit_direntries(BDRVVVFATState* s,
+       int dir_index, int parent_mapping_index)
+{
+    direntry_t* direntry = array_get(&(s->directory), dir_index);
+    uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
+    mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
+
+    int factor = 0x10 * s->sectors_per_cluster;
+    int old_cluster_count, new_cluster_count;
+    int current_dir_index = mapping->info.dir.first_dir_index;
+    int first_dir_index = current_dir_index;
+    int ret, i;
+    uint32_t c;
+
+DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapping->path, parent_mapping_index));
+
+    assert(direntry);
+    assert(mapping);
+    assert(mapping->begin == first_cluster);
+    assert(mapping->info.dir.first_dir_index < s->directory.next);
+    assert(mapping->mode & MODE_DIRECTORY);
+    assert(dir_index == 0 || is_directory(direntry));
+
+    mapping->info.dir.parent_mapping_index = parent_mapping_index;
+
+    if (first_cluster == 0) {
+       old_cluster_count = new_cluster_count =
+           s->last_cluster_of_root_directory;
+    } else {
+       for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
+               c = fat_get(s, c))
+           old_cluster_count++;
+
+       for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
+               c = modified_fat_get(s, c))
+           new_cluster_count++;
+    }
+
+    if (new_cluster_count > old_cluster_count) {
+       if (insert_direntries(s,
+               current_dir_index + factor * old_cluster_count,
+               factor * (new_cluster_count - old_cluster_count)) == NULL)
+           return -1;
+    } else if (new_cluster_count < old_cluster_count)
+       remove_direntries(s,
+               current_dir_index + factor * new_cluster_count,
+               factor * (old_cluster_count - new_cluster_count));
+
+    for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
+       void* direntry = array_get(&(s->directory), current_dir_index);
+       int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
+               s->sectors_per_cluster);
+       if (ret)
+           return ret;
+       assert(!strncmp(s->directory.pointer, "QEMU", 4));
+       current_dir_index += factor;
+    }
+
+    ret = commit_mappings(s, first_cluster, dir_index);
+    if (ret)
+       return ret;
+
+    /* recurse */
+    for (i = 0; i < factor * new_cluster_count; i++) {
+       direntry = array_get(&(s->directory), first_dir_index + i);
+       if (is_directory(direntry) && !is_dot(direntry)) {
+           mapping = find_mapping_for_cluster(s, first_cluster);
+           assert(mapping->mode & MODE_DIRECTORY);
+           ret = commit_direntries(s, first_dir_index + i,
+               array_index(&(s->mapping), mapping));
+           if (ret)
+               return ret;
+       }
+    }
+
+    return 0;
+}
+
+/* commit one file (adjust contents, adjust mapping),
+   return first_mapping_index */
+static int commit_one_file(BDRVVVFATState* s,
+       int dir_index, uint32_t offset)
+{
+    direntry_t* direntry = array_get(&(s->directory), dir_index);
+    uint32_t c = begin_of_direntry(direntry);
+    uint32_t first_cluster = c;
+    mapping_t* mapping = find_mapping_for_cluster(s, c);
+    uint32_t size = filesize_of_direntry(direntry);
+    char* cluster = qemu_malloc(s->cluster_size);
+    uint32_t i;
+    int fd = 0;
+
+    assert(offset < size);
+    assert((offset % s->cluster_size) == 0);
+
+    for (i = s->cluster_size; i < offset; i += s->cluster_size)
+       c = modified_fat_get(s, c);
+
+    fd = open(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666);
+    if (fd < 0) {
+       fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path,
+               strerror(errno), errno);
+       return fd;
+    }
+    if (offset > 0)
+       if (lseek(fd, offset, SEEK_SET) != offset)
+           return -3;
+
+    while (offset < size) {
+       uint32_t c1;
+       int rest_size = (size - offset > s->cluster_size ?
+               s->cluster_size : size - offset);
+       int ret;
+
+       c1 = modified_fat_get(s, c);
+
+       assert((size - offset == 0 && fat_eof(s, c)) ||
+               (size > offset && c >=2 && !fat_eof(s, c)));
+
+       ret = vvfat_read(s->bs, cluster2sector(s, c),
+           (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
+
+       if (ret < 0)
+           return ret;
+
+       if (write(fd, cluster, rest_size) < 0)
+           return -2;
+
+       offset += rest_size;
+       c = c1;
+    }
+
+    ftruncate(fd, size);
+    close(fd);
+
+    return commit_mappings(s, first_cluster, dir_index);
+}
+
+#ifdef DEBUG
+/* test, if all mappings point to valid direntries */
+static void check1(BDRVVVFATState* s)
+{
+    int i;
+    for (i = 0; i < s->mapping.next; i++) {
+       mapping_t* mapping = array_get(&(s->mapping), i);
+       if (mapping->mode & MODE_DELETED) {
+           fprintf(stderr, "deleted\n");
+           continue;
+       }
+       assert(mapping->dir_index >= 0);
+       assert(mapping->dir_index < s->directory.next);
+       direntry_t* direntry = array_get(&(s->directory), mapping->dir_index);
+       assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0);
+       if (mapping->mode & MODE_DIRECTORY) {
+           assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next);
+           assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0);
+       }
+    }
+}
+
+/* test, if all direntries have mappings */
+static void check2(BDRVVVFATState* s)
+{
+    int i;
+    int first_mapping = -1;
+
+    for (i = 0; i < s->directory.next; i++) {
+       direntry_t* direntry = array_get(&(s->directory), i);
+
+       if (is_short_name(direntry) && begin_of_direntry(direntry)) {
+           mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry));
+           assert(mapping);
+           assert(mapping->dir_index == i || is_dot(direntry));
+           assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry));
+       }
+
+       if ((i % (0x10 * s->sectors_per_cluster)) == 0) {
+           /* cluster start */
+           int j, count = 0;
+
+           for (j = 0; j < s->mapping.next; j++) {
+               mapping_t* mapping = array_get(&(s->mapping), j);
+               if (mapping->mode & MODE_DELETED)
+                   continue;
+               if (mapping->mode & MODE_DIRECTORY) {
+                   if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) {
+                       assert(++count == 1);
+                       if (mapping->first_mapping_index == -1)
+                           first_mapping = array_index(&(s->mapping), mapping);
+                       else
+                           assert(first_mapping == mapping->first_mapping_index);
+                       if (mapping->info.dir.parent_mapping_index < 0)
+                           assert(j == 0);
+                       else {
+                           mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index);
+                           assert(parent->mode & MODE_DIRECTORY);
+                           assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index);
+                       }
+                   }
+               }
+           }
+           if (count == 0)
+               first_mapping = -1;
+       }
+    }
+}
+#endif
+
+static int handle_renames_and_mkdirs(BDRVVVFATState* s)
+{
+    int i;
+
+#ifdef DEBUG
+    fprintf(stderr, "handle_renames\n");
+    for (i = 0; i < s->commits.next; i++) {
+       commit_t* commit = array_get(&(s->commits), i);
+       fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action);
+    }
+#endif
+
+    for (i = 0; i < s->commits.next;) {
+       commit_t* commit = array_get(&(s->commits), i);
+       if (commit->action == ACTION_RENAME) {
+           mapping_t* mapping = find_mapping_for_cluster(s,
+                   commit->param.rename.cluster);
+           char* old_path = mapping->path;
+
+           assert(commit->path);
+           mapping->path = commit->path;
+           if (rename(old_path, mapping->path))
+               return -2;
+
+           if (mapping->mode & MODE_DIRECTORY) {
+               int l1 = strlen(mapping->path);
+               int l2 = strlen(old_path);
+               int diff = l1 - l2;
+               direntry_t* direntry = array_get(&(s->directory),
+                       mapping->info.dir.first_dir_index);
+               uint32_t c = mapping->begin;
+               int i = 0;
+
+               /* recurse */
+               while (!fat_eof(s, c)) {
+                   do {
+                       direntry_t* d = direntry + i;
+
+                       if (is_file(d) || (is_directory(d) && !is_dot(d))) {
+                           mapping_t* m = find_mapping_for_cluster(s,
+                                   begin_of_direntry(d));
+                           int l = strlen(m->path);
+                           char* new_path = qemu_malloc(l + diff + 1);
+
+                           assert(!strncmp(m->path, mapping->path, l2));
+
+                            pstrcpy(new_path, l + diff + 1, mapping->path);
+                            pstrcpy(new_path + l1, l + diff + 1 - l1,
+                                    m->path + l2);
+
+                           schedule_rename(s, m->begin, new_path);
+                       }
+                       i++;
+                   } while((i % (0x10 * s->sectors_per_cluster)) != 0);
+                   c = fat_get(s, c);
+               }
+           }
+
+           free(old_path);
+           array_remove(&(s->commits), i);
+           continue;
+       } else if (commit->action == ACTION_MKDIR) {
+           mapping_t* mapping;
+           int j, parent_path_len;
+
+#ifdef __MINGW32__
+            if (mkdir(commit->path))
+                return -5;
+#else
+            if (mkdir(commit->path, 0755))
+                return -5;
+#endif
+
+           mapping = insert_mapping(s, commit->param.mkdir.cluster,
+                   commit->param.mkdir.cluster + 1);
+           if (mapping == NULL)
+               return -6;
+
+           mapping->mode = MODE_DIRECTORY;
+           mapping->read_only = 0;
+           mapping->path = commit->path;
+           j = s->directory.next;
+           assert(j);
+           insert_direntries(s, s->directory.next,
+                   0x10 * s->sectors_per_cluster);
+           mapping->info.dir.first_dir_index = j;
+
+           parent_path_len = strlen(commit->path)
+               - strlen(get_basename(commit->path)) - 1;
+           for (j = 0; j < s->mapping.next; j++) {
+               mapping_t* m = array_get(&(s->mapping), j);
+               if (m->first_mapping_index < 0 && m != mapping &&
+                       !strncmp(m->path, mapping->path, parent_path_len) &&
+                       strlen(m->path) == parent_path_len)
+                   break;
+           }
+           assert(j < s->mapping.next);
+           mapping->info.dir.parent_mapping_index = j;
+
+           array_remove(&(s->commits), i);
+           continue;
+       }
+
+       i++;
+    }
+    return 0;
+}
+
+/*
+ * TODO: make sure that the short name is not matching *another* file
+ */
+static int handle_commits(BDRVVVFATState* s)
+{
+    int i, fail = 0;
+
+    vvfat_close_current_file(s);
+
+    for (i = 0; !fail && i < s->commits.next; i++) {
+       commit_t* commit = array_get(&(s->commits), i);
+       switch(commit->action) {
+       case ACTION_RENAME: case ACTION_MKDIR:
+           assert(0);
+           fail = -2;
+           break;
+       case ACTION_WRITEOUT: {
+           direntry_t* entry = array_get(&(s->directory),
+                   commit->param.writeout.dir_index);
+           uint32_t begin = begin_of_direntry(entry);
+           mapping_t* mapping = find_mapping_for_cluster(s, begin);
+
+           assert(mapping);
+           assert(mapping->begin == begin);
+           assert(commit->path == NULL);
+
+           if (commit_one_file(s, commit->param.writeout.dir_index,
+                       commit->param.writeout.modified_offset))
+               fail = -3;
+
+           break;
+       }
+       case ACTION_NEW_FILE: {
+           int begin = commit->param.new_file.first_cluster;
+           mapping_t* mapping = find_mapping_for_cluster(s, begin);
+           direntry_t* entry;
+           int i;
+
+           /* find direntry */
+           for (i = 0; i < s->directory.next; i++) {
+               entry = array_get(&(s->directory), i);
+               if (is_file(entry) && begin_of_direntry(entry) == begin)
+                   break;
+           }
+
+           if (i >= s->directory.next) {
+               fail = -6;
+               continue;
+           }
+
+           /* make sure there exists an initial mapping */
+           if (mapping && mapping->begin != begin) {
+               mapping->end = begin;
+               mapping = NULL;
+           }
+           if (mapping == NULL) {
+               mapping = insert_mapping(s, begin, begin+1);
+           }
+           /* most members will be fixed in commit_mappings() */
+           assert(commit->path);
+           mapping->path = commit->path;
+           mapping->read_only = 0;
+           mapping->mode = MODE_NORMAL;
+           mapping->info.file.offset = 0;
+
+           if (commit_one_file(s, i, 0))
+               fail = -7;
+
+           break;
+       }
+       default:
+           assert(0);
+       }
+    }
+    if (i > 0 && array_remove_slice(&(s->commits), 0, i))
+       return -1;
+    return fail;
+}
+
+static int handle_deletes(BDRVVVFATState* s)
+{
+    int i, deferred = 1, deleted = 1;
+
+    /* delete files corresponding to mappings marked as deleted */
+    /* handle DELETEs and unused mappings (modified_fat_get(s, mapping->begin) == 0) */
+    while (deferred && deleted) {
+       deferred = 0;
+       deleted = 0;
+
+       for (i = 1; i < s->mapping.next; i++) {
+           mapping_t* mapping = array_get(&(s->mapping), i);
+           if (mapping->mode & MODE_DELETED) {
+               direntry_t* entry = array_get(&(s->directory),
+                       mapping->dir_index);
+
+               if (is_free(entry)) {
+                   /* remove file/directory */
+                   if (mapping->mode & MODE_DIRECTORY) {
+                       int j, next_dir_index = s->directory.next,
+                       first_dir_index = mapping->info.dir.first_dir_index;
+
+                       if (rmdir(mapping->path) < 0) {
+                           if (errno == ENOTEMPTY) {
+                               deferred++;
+                               continue;
+                           } else
+                               return -5;
+                       }
+
+                       for (j = 1; j < s->mapping.next; j++) {
+                           mapping_t* m = array_get(&(s->mapping), j);
+                           if (m->mode & MODE_DIRECTORY &&
+                                   m->info.dir.first_dir_index >
+                                   first_dir_index &&
+                                   m->info.dir.first_dir_index <
+                                   next_dir_index)
+                               next_dir_index =
+                                   m->info.dir.first_dir_index;
+                       }
+                       remove_direntries(s, first_dir_index,
+                               next_dir_index - first_dir_index);
+
+                       deleted++;
+                   }
+               } else {
+                   if (unlink(mapping->path))
+                       return -4;
+                   deleted++;
+               }
+               DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry));
+               remove_mapping(s, i);
+           }
+       }
+    }
+
+    return 0;
+}
+
+/*
+ * synchronize mapping with new state:
+ *
+ * - copy FAT (with bdrv_read)
+ * - mark all filenames corresponding to mappings as deleted
+ * - recurse direntries from root (using bs->bdrv_read)
+ * - delete files corresponding to mappings marked as deleted
+ */
+static int do_commit(BDRVVVFATState* s)
+{
+    int ret = 0;
+
+    /* the real meat are the commits. Nothing to do? Move along! */
+    if (s->commits.next == 0)
+       return 0;
+
+    vvfat_close_current_file(s);
+
+    ret = handle_renames_and_mkdirs(s);
+    if (ret) {
+       fprintf(stderr, "Error handling renames (%d)\n", ret);
+       assert(0);
+       return ret;
+    }
+
+    /* copy FAT (with bdrv_read) */
+    memcpy(s->fat.pointer, s->fat2, 0x200 * s->sectors_per_fat);
+
+    /* recurse direntries from root (using bs->bdrv_read) */
+    ret = commit_direntries(s, 0, -1);
+    if (ret) {
+       fprintf(stderr, "Fatal: error while committing (%d)\n", ret);
+       assert(0);
+       return ret;
+    }
+
+    ret = handle_commits(s);
+    if (ret) {
+       fprintf(stderr, "Error handling commits (%d)\n", ret);
+       assert(0);
+       return ret;
+    }
+
+    ret = handle_deletes(s);
+    if (ret) {
+       fprintf(stderr, "Error deleting\n");
+        assert(0);
+       return ret;
+    }
+
+    s->qcow->drv->bdrv_make_empty(s->qcow);
+
+    memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
+
+DLOG(checkpoint());
+    return 0;
+}
+
+static int try_commit(BDRVVVFATState* s)
+{
+    vvfat_close_current_file(s);
+DLOG(checkpoint());
+    if(!is_consistent(s))
+       return -1;
+    return do_commit(s);
+}
+
+static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
+                    const uint8_t *buf, int nb_sectors)
+{
+    BDRVVVFATState *s = bs->opaque;
+    int i, ret;
+
+DLOG(checkpoint());
+
+    vvfat_close_current_file(s);
+
+    /*
+     * Some sanity checks:
+     * - do not allow writing to the boot sector
+     * - do not allow to write non-ASCII filenames
+     */
+
+    if (sector_num < s->first_sectors_number)
+       return -1;
+
+    for (i = sector2cluster(s, sector_num);
+           i <= sector2cluster(s, sector_num + nb_sectors - 1);) {
+       mapping_t* mapping = find_mapping_for_cluster(s, i);
+       if (mapping) {
+           if (mapping->read_only) {
+               fprintf(stderr, "Tried to write to write-protected file %s\n",
+                       mapping->path);
+               return -1;
+           }
+
+           if (mapping->mode & MODE_DIRECTORY) {
+               int begin = cluster2sector(s, i);
+               int end = begin + s->sectors_per_cluster, k;
+               int dir_index;
+               const direntry_t* direntries;
+               long_file_name lfn;
+
+               lfn_init(&lfn);
+
+               if (begin < sector_num)
+                   begin = sector_num;
+               if (end > sector_num + nb_sectors)
+                   end = sector_num + nb_sectors;
+               dir_index  = mapping->dir_index +
+                   0x10 * (begin - mapping->begin * s->sectors_per_cluster);
+               direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num));
+
+               for (k = 0; k < (end - begin) * 0x10; k++) {
+                   /* do not allow non-ASCII filenames */
+                   if (parse_long_name(&lfn, direntries + k) < 0) {
+                       fprintf(stderr, "Warning: non-ASCII filename\n");
+                       return -1;
+                   }
+                   /* no access to the direntry of a read-only file */
+                   else if (is_short_name(direntries+k) &&
+                           (direntries[k].attributes & 1)) {
+                       if (memcmp(direntries + k,
+                                   array_get(&(s->directory), dir_index + k),
+                                   sizeof(direntry_t))) {
+                           fprintf(stderr, "Warning: tried to write to write-protected file\n");
+                           return -1;
+                       }
+                   }
+               }
+           }
+           i = mapping->end;
+       } else
+           i++;
+    }
+
+    /*
+     * Use qcow backend. Commit later.
+     */
+DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors));
+    ret = s->qcow->drv->bdrv_write(s->qcow, sector_num, buf, nb_sectors);
+    if (ret < 0) {
+       fprintf(stderr, "Error writing to qcow backend\n");
+       return ret;
+    }
+
+    for (i = sector2cluster(s, sector_num);
+           i <= sector2cluster(s, sector_num + nb_sectors - 1); i++)
+       if (i >= 0)
+           s->used_clusters[i] |= USED_ALLOCATED;
+
+DLOG(checkpoint());
+    /* TODO: add timeout */
+    try_commit(s);
+
+DLOG(checkpoint());
+    return 0;
+}
+
+static int vvfat_is_allocated(BlockDriverState *bs,
+       int64_t sector_num, int nb_sectors, int* n)
+{
+    BDRVVVFATState* s = bs->opaque;
+    *n = s->sector_count - sector_num;
+    if (*n > nb_sectors)
+       *n = nb_sectors;
+    else if (*n < 0)
+       return 0;
+    return 1;
+}
+
+static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
+       const uint8_t* buffer, int nb_sectors) {
+    BDRVVVFATState* s = bs->opaque;
+    return try_commit(s);
+}
+
+static void write_target_close(BlockDriverState *bs) {
+    BDRVVVFATState* s = bs->opaque;
+    bdrv_delete(s->qcow);
+    free(s->qcow_filename);
+}
+
+static BlockDriver vvfat_write_target = {
+    "vvfat_write_target", 0, NULL, NULL, NULL,
+    write_target_commit,
+    write_target_close,
+    NULL, NULL, NULL
+};
+
+static int enable_write_target(BDRVVVFATState *s)
+{
+    int size = sector2cluster(s, s->sector_count);
+    s->used_clusters = calloc(size, 1);
+
+    array_init(&(s->commits), sizeof(commit_t));
+
+    s->qcow_filename = qemu_malloc(1024);
+    get_tmp_filename(s->qcow_filename, 1024);
+    if (bdrv_create(bdrv_find_format("qcow"),
+               s->qcow_filename, s->sector_count, "fat:", 0) < 0)
+       return -1;
+    s->qcow = bdrv_new("");
+    if (s->qcow == NULL || bdrv_open(s->qcow, s->qcow_filename, 0) < 0)
+       return -1;
+
+#ifndef _WIN32
+    unlink(s->qcow_filename);
+#endif
+
+    s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
+    s->bs->backing_hd->drv = &vvfat_write_target;
+    s->bs->backing_hd->opaque = s;
+
+    return 0;
+}
+
+static void vvfat_close(BlockDriverState *bs)
+{
+    BDRVVVFATState *s = bs->opaque;
+
+    vvfat_close_current_file(s);
+    array_free(&(s->fat));
+    array_free(&(s->directory));
+    array_free(&(s->mapping));
+    if(s->cluster_buffer)
+        free(s->cluster_buffer);
+}
+
+static BlockDriver bdrv_vvfat = {
+    .format_name       = "vvfat",
+    .instance_size     = sizeof(BDRVVVFATState),
+    .bdrv_open         = vvfat_open,
+    .bdrv_read         = vvfat_read,
+    .bdrv_write                = vvfat_write,
+    .bdrv_close                = vvfat_close,
+    .bdrv_is_allocated = vvfat_is_allocated,
+    .protocol_name     = "fat",
+};
+
+static void bdrv_vvfat_init(void)
+{
+    bdrv_register(&bdrv_vvfat);
+}
+
+block_init(bdrv_vvfat_init);
+
+#ifdef DEBUG
+static void checkpoint(void) {
+    assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2);
+    check1(vvv);
+    check2(vvv);
+    assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY));
+#if 0
+    if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf)
+       fprintf(stderr, "Nonono!\n");
+    mapping_t* mapping;
+    direntry_t* direntry;
+    assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next);
+    assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next);
+    if (vvv->mapping.next<47)
+       return;
+    assert((mapping = array_get(&(vvv->mapping), 47)));
+    assert(mapping->dir_index < vvv->directory.next);
+    direntry = array_get(&(vvv->directory), mapping->dir_index);
+    assert(!memcmp(direntry->name, "USB     H  ", 11) || direntry->name[0]==0);
+#endif
+    return;
+    /* avoid compiler warnings: */
+    hexdump(NULL, 100);
+    remove_mapping(vvv, NULL);
+    print_mapping(NULL);
+    print_direntry(NULL);
+}
+#endif
index d3788fcc15b630c05b627a8452b2046d9db261a2..8fbe35b00ec55a005e2966c133efd1651d282236 100755 (executable)
--- a/configure
+++ b/configure
@@ -2029,7 +2029,7 @@ done # for target in $targets
 
 # build tree in object directory if source path is different from current one
 if test "$source_path_used" = "yes" ; then
-    DIRS="tests tests/cris slirp audio"
+    DIRS="tests tests/cris slirp audio block"
     FILES="Makefile tests/Makefile"
     FILES="$FILES tests/cris/Makefile tests/cris/.gdbinit"
     FILES="$FILES tests/test-mmap.c"