]> git.proxmox.com Git - mirror_qemu.git/commitdiff
async file I/O API
authorbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>
Tue, 1 Aug 2006 16:21:11 +0000 (16:21 +0000)
committerbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>
Tue, 1 Aug 2006 16:21:11 +0000 (16:21 +0000)
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2075 c046a42c-6fe2-441c-8c8c-71466251a162

16 files changed:
Changelog
Makefile
Makefile.target
block-bochs.c
block-cloop.c
block-cow.c
block-dmg.c
block-qcow.c
block-raw.c [new file with mode: 0644]
block-vmdk.c
block-vpc.c
block-vvfat.c
block.c
block_int.h
vl.c
vl.h

index 58f3e5e743d65dff17509dba648b6e0259d8a9a3..693ce4ed3504d261e5ef916a4430fb90b62cfaae 100644 (file)
--- a/Changelog
+++ b/Changelog
@@ -1,3 +1,8 @@
+version 0.8.3:
+
+  - Support for relative paths in backing files for disk images
+  - Async file I/O API
+
 version 0.8.2:
 
   - ACPI support
index d6e6f61eb7af5d8164b63742e4e15be9268201f8..28a4f861ba6b445176332b7addcc0f47e51d6ef4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -25,14 +25,22 @@ else
 DOCS=
 endif
 
+ifndef CONFIG_DARWIN
+ifndef CONFIG_WIN32
+ifndef CONFIG_SOLARIS
+LIBS+=-lrt
+endif
+endif
+endif
+
 all: $(TOOLS) $(DOCS) recurse-all
 
 subdir-%: dyngen$(EXESUF)
        $(MAKE) -C $(subst subdir-,,$@) all
 
 recurse-all: $(patsubst %,subdir-%, $(TARGET_DIRS))
-        
-qemu-img$(EXESUF): qemu-img.c block.c block-cow.c block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c block-vpc.c block-vvfat.c
+
+qemu-img$(EXESUF): qemu-img.c block.c block-raw.c block-cow.c block-qcow.c aes.c block-vmdk.c block-cloop.c block-dmg.c block-bochs.c block-vpc.c block-vvfat.c
        $(CC) -DQEMU_TOOL $(CFLAGS) $(LDFLAGS) $(DEFINES) -o $@ $^ -lz $(LIBS)
 
 dyngen$(EXESUF): dyngen.c
index 91516edf451d07e3103317eb41701484853d710c..04cdb21af3a839eb33fbecc108255ed7fbc5de06 100644 (file)
@@ -289,7 +289,8 @@ ifeq ($(ARCH),alpha)
 endif
 
 # must use static linking to avoid leaving stuff in virtual address space
-VL_OBJS=vl.o osdep.o block.o readline.o monitor.o pci.o console.o loader.o
+VL_OBJS=vl.o osdep.o readline.o monitor.o pci.o console.o loader.o
+VL_OBJS+=block.o block-raw.o
 VL_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o block-dmg.o block-bochs.o block-vpc.o block-vvfat.o
 ifdef CONFIG_WIN32
 VL_OBJS+=tap-win32.o
index 62317aff38a83ac6937de56baaa2155581151a09..febb4d3fd60a4439fa1e84d18c249bd549eeadf1 100644 (file)
@@ -85,15 +85,15 @@ static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
     return 0;
 }
 
-static int bochs_open(BlockDriverState *bs, const char *filename)
+static int bochs_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVBochsState *s = bs->opaque;
     int fd, i;
     struct bochs_header bochs;
 
-    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
+    fd = open(filename, O_RDWR | O_BINARY);
     if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+        fd = open(filename, O_RDONLY | O_BINARY);
         if (fd < 0)
             return -1;
     }
index c617e1b64bc3d2b391515b1413d0edba5f040b36..f51c32d1bddbbbadf6bb4ad8d31c680c64e41352 100644 (file)
@@ -50,14 +50,14 @@ static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
     return 0;
 }
 
-static int cloop_open(BlockDriverState *bs, const char *filename)
+static int cloop_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVCloopState *s = bs->opaque;
     uint32_t offsets_size,max_compressed_block_size=1,i;
 
-    s->fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+    s->fd = open(filename, O_RDONLY | O_BINARY);
     if (s->fd < 0)
-        return -1;
+        return -errno;
     bs->read_only = 1;
 
     /* read header */
index 6af8b749759910eca057e6dd430ae01d8f099086..07c8a7bf167a704b835ad44ae2a82b37cc70da6d 100644 (file)
@@ -62,7 +62,7 @@ static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
         return 0;
 }
 
-static int cow_open(BlockDriverState *bs, const char *filename)
+static int cow_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVCowState *s = bs->opaque;
     int fd;
@@ -93,22 +93,6 @@ static int cow_open(BlockDriverState *bs, const char *filename)
     pstrcpy(bs->backing_file, sizeof(bs->backing_file), 
             cow_header.backing_file);
     
-#if 0
-    if (cow_header.backing_file[0] != '\0') {
-        if (stat(cow_header.backing_file, &st) != 0) {
-            fprintf(stderr, "%s: could not find original disk image '%s'\n", filename, cow_header.backing_file);
-            goto fail;
-        }
-        if (st.st_mtime != be32_to_cpu(cow_header.mtime)) {
-            fprintf(stderr, "%s: original raw disk image '%s' does not match saved timestamp\n", filename, cow_header.backing_file);
-            goto fail;
-            }
-        fd = open(cow_header.backing_file, O_RDONLY | O_LARGEFILE);
-        if (fd < 0)
-            goto fail;
-        bs->fd = fd;
-    }
-#endif
     /* mmap the bitmap */
     s->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
     s->cow_bitmap_addr = mmap(get_mmap_addr(s->cow_bitmap_size), 
@@ -179,8 +163,15 @@ static int cow_read(BlockDriverState *bs, int64_t sector_num,
             if (ret != n * 512) 
                 return -1;
         } else {
+            if (bs->backing_hd) {
+                /* read from the base image */
+                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                if (ret < 0)
+                    return -1;
+            } else {
             memset(buf, 0, n * 512);
         }
+        }
         nb_sectors -= n;
         sector_num += n;
         buf += n * 512;
@@ -220,7 +211,7 @@ static int cow_create(const char *filename, int64_t image_sectors,
     if (flags)
         return -ENOTSUP;
 
-    cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE
+    cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 
               0644);
     if (cow_fd < 0)
         return -1;
@@ -228,18 +219,23 @@ static int cow_create(const char *filename, int64_t image_sectors,
     cow_header.magic = cpu_to_be32(COW_MAGIC);
     cow_header.version = cpu_to_be32(COW_VERSION);
     if (image_filename) {
+        /* Note: if no file, we put a dummy mtime */
+        cow_header.mtime = cpu_to_be32(0);
+
         fd = open(image_filename, O_RDONLY | O_BINARY);
         if (fd < 0) {
             close(cow_fd);
-            return -1;
+            goto mtime_fail;
         }
         if (fstat(fd, &st) != 0) {
             close(fd);
-            return -1;
+            goto mtime_fail;
         }
         close(fd);
         cow_header.mtime = cpu_to_be32(st.st_mtime);
-        realpath(image_filename, cow_header.backing_file);
+    mtime_fail:
+        pstrcpy(cow_header.backing_file, sizeof(cow_header.backing_file),
+                image_filename);
     }
     cow_header.sectorsize = cpu_to_be32(512);
     cow_header.size = cpu_to_be64(image_sectors * 512);
index a16ab926b59bd2c12aa492079d108befc5cbd680..a883a23f8e132155217ca6480ae8dea6707eeea1 100644 (file)
@@ -73,16 +73,16 @@ static off_t read_uint32(int fd)
        return be32_to_cpu(buffer);
 }
 
-static int dmg_open(BlockDriverState *bs, const char *filename)
+static int dmg_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVDMGState *s = bs->opaque;
     off_t info_begin,info_end,last_in_offset,last_out_offset;
     uint32_t count;
     uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
 
-    s->fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
+    s->fd = open(filename, O_RDONLY | O_BINARY);
     if (s->fd < 0)
-        return -1;
+        return -errno;
     bs->read_only = 1;
     s->n_chunks = 0;
     s->offsets = s->lengths = s->sectors = s->sectorcounts = 0;
@@ -93,7 +93,7 @@ dmg_close:
        close(s->fd);
        /* open raw instead */
        bs->drv=&bdrv_raw;
-       return bs->drv->bdrv_open(bs,filename);
+       return bs->drv->bdrv_open(bs, filename, flags);
     }
     info_begin=read_off(s->fd);
     if(info_begin==0)
index e5b52fb86108aab79620f5b541d781628f15af61..65e74e7781d1dd1c076d0b17aacb09d1896e8961 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Block driver for the QCOW format
  * 
- * Copyright (c) 2004 Fabrice Bellard
+ * Copyright (c) 2004-2006 Fabrice Bellard
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -53,7 +53,7 @@ typedef struct QCowHeader {
 #define L2_CACHE_SIZE 16
 
 typedef struct BDRVQcowState {
-    int fd;
+    BlockDriverState *hd;
     int cluster_bits;
     int cluster_size;
     int cluster_sectors;
@@ -89,20 +89,16 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
         return 0;
 }
 
-static int qcow_open(BlockDriverState *bs, const char *filename)
+static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVQcowState *s = bs->opaque;
-    int fd, len, i, shift;
+    int len, i, shift, ret;
     QCowHeader header;
-    
-    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
-    if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
-        if (fd < 0)
-            return -1;
-    }
-    s->fd = fd;
-    if (read(fd, &header, sizeof(header)) != sizeof(header))
+
+    ret = bdrv_file_open(&s->hd, filename, flags);
+    if (ret < 0)
+        return ret;
+    if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
         goto fail;
     be32_to_cpus(&header.magic);
     be32_to_cpus(&header.version);
@@ -138,8 +134,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename)
     s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
     if (!s->l1_table)
         goto fail;
-    lseek(fd, s->l1_table_offset, SEEK_SET);
-    if (read(fd, s->l1_table, s->l1_size * sizeof(uint64_t)) != 
+    if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) != 
         s->l1_size * sizeof(uint64_t))
         goto fail;
     for(i = 0;i < s->l1_size; i++) {
@@ -162,8 +157,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename)
         len = header.backing_file_size;
         if (len > 1023)
             len = 1023;
-        lseek(fd, header.backing_file_offset, SEEK_SET);
-        if (read(fd, bs->backing_file, len) != len)
+        if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
             goto fail;
         bs->backing_file[len] = '\0';
     }
@@ -174,7 +168,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename)
     qemu_free(s->l2_cache);
     qemu_free(s->cluster_cache);
     qemu_free(s->cluster_data);
-    close(fd);
+    bdrv_delete(s->hd);
     return -1;
 }
 
@@ -276,14 +270,14 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         if (!allocate)
             return 0;
         /* allocate a new l2 entry */
-        l2_offset = lseek(s->fd, 0, SEEK_END);
+        l2_offset = bdrv_getlength(s->hd);
         /* round to cluster size */
         l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
         /* update the L1 entry */
         s->l1_table[l1_index] = l2_offset;
         tmp = cpu_to_be64(l2_offset);
-        lseek(s->fd, s->l1_table_offset + l1_index * sizeof(tmp), SEEK_SET);
-        if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+        if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp), 
+                        &tmp, sizeof(tmp)) != sizeof(tmp))
             return 0;
         new_l2_table = 1;
     }
@@ -309,14 +303,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         }
     }
     l2_table = s->l2_cache + (min_index << s->l2_bits);
-    lseek(s->fd, l2_offset, SEEK_SET);
     if (new_l2_table) {
         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
+        if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
             s->l2_size * sizeof(uint64_t))
             return 0;
     } else {
-        if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != 
+        if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != 
             s->l2_size * sizeof(uint64_t))
             return 0;
     }
@@ -337,21 +330,20 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                overwritten */
             if (decompress_cluster(s, cluster_offset) < 0)
                 return 0;
-            cluster_offset = lseek(s->fd, 0, SEEK_END);
+            cluster_offset = bdrv_getlength(s->hd);
             cluster_offset = (cluster_offset + s->cluster_size - 1) & 
                 ~(s->cluster_size - 1);
             /* write the cluster content */
-            lseek(s->fd, cluster_offset, SEEK_SET);
-            if (write(s->fd, s->cluster_cache, s->cluster_size) != 
+            if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) != 
                 s->cluster_size)
                 return -1;
         } else {
-            cluster_offset = lseek(s->fd, 0, SEEK_END);
+            cluster_offset = bdrv_getlength(s->hd);
             if (allocate == 1) {
                 /* round to cluster size */
                 cluster_offset = (cluster_offset + s->cluster_size - 1) & 
                     ~(s->cluster_size - 1);
-                ftruncate(s->fd, cluster_offset + s->cluster_size);
+                bdrv_truncate(s->hd, cluster_offset + s->cluster_size);
                 /* if encrypted, we must initialize the cluster
                    content which won't be written */
                 if (s->crypt_method && 
@@ -365,8 +357,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                                             s->cluster_data, 
                                             s->cluster_data + 512, 1, 1,
                                             &s->aes_encrypt_key);
-                            lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
-                            if (write(s->fd, s->cluster_data, 512) != 512)
+                            if (bdrv_pwrite(s->hd, cluster_offset + i * 512, 
+                                            s->cluster_data, 512) != 512)
                                 return -1;
                         }
                     }
@@ -379,8 +371,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
         /* update L2 table */
         tmp = cpu_to_be64(cluster_offset);
         l2_table[l2_index] = tmp;
-        lseek(s->fd, l2_offset + l2_index * sizeof(tmp), SEEK_SET);
-        if (write(s->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
+        if (bdrv_pwrite(s->hd, 
+                        l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp))
             return 0;
     }
     return cluster_offset;
@@ -438,8 +430,7 @@ static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
     if (s->cluster_cache_offset != coffset) {
         csize = cluster_offset >> (63 - s->cluster_bits);
         csize &= (s->cluster_size - 1);
-        lseek(s->fd, coffset, SEEK_SET);
-        ret = read(s->fd, s->cluster_data, csize);
+        ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize);
         if (ret != csize) 
             return -1;
         if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -451,6 +442,8 @@ static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
     return 0;
 }
 
+#if 0
+
 static int qcow_read(BlockDriverState *bs, int64_t sector_num, 
                      uint8_t *buf, int nb_sectors)
 {
@@ -465,14 +458,20 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
         if (n > nb_sectors)
             n = nb_sectors;
         if (!cluster_offset) {
-            memset(buf, 0, 512 * n);
+            if (bs->backing_hd) {
+                /* read from the base image */
+                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+                if (ret < 0)
+                    return -1;
+            } else {
+                memset(buf, 0, 512 * n);
+            }
         } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
             if (decompress_cluster(s, cluster_offset) < 0)
                 return -1;
             memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
         } else {
-            lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
-            ret = read(s->fd, buf, n * 512);
+            ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
             if (ret != n * 512) 
                 return -1;
             if (s->crypt_method) {
@@ -486,6 +485,7 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
     }
     return 0;
 }
+#endif
 
 static int qcow_write(BlockDriverState *bs, int64_t sector_num, 
                      const uint8_t *buf, int nb_sectors)
@@ -504,13 +504,13 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
                                             index_in_cluster + n);
         if (!cluster_offset)
             return -1;
-        lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET);
         if (s->crypt_method) {
             encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
                             &s->aes_encrypt_key);
-            ret = write(s->fd, s->cluster_data, n * 512);
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, 
+                              s->cluster_data, n * 512);
         } else {
-            ret = write(s->fd, buf, n * 512);
+            ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
         }
         if (ret != n * 512) 
             return -1;
@@ -522,6 +522,231 @@ static int qcow_write(BlockDriverState *bs, int64_t sector_num,
     return 0;
 }
 
+typedef struct {
+    int64_t sector_num;
+    uint8_t *buf;
+    int nb_sectors;
+    int n;
+    uint64_t cluster_offset;
+    uint8_t *cluster_data; 
+    BlockDriverAIOCB *hd_aiocb;
+    BlockDriverAIOCB *backing_hd_aiocb;
+} QCowAIOCB;
+
+static void qcow_aio_delete(BlockDriverAIOCB *acb);
+
+static int qcow_aio_new(BlockDriverAIOCB *acb)
+{
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb1;
+    acb1 = qemu_mallocz(sizeof(QCowAIOCB));
+    if (!acb1)
+        return -1;
+    acb->opaque = acb1;
+    acb1->hd_aiocb = bdrv_aio_new(s->hd);
+    if (!acb1->hd_aiocb)
+        goto fail;
+    if (bs->backing_hd) {
+        acb1->backing_hd_aiocb = bdrv_aio_new(bs->backing_hd);
+        if (!acb1->backing_hd_aiocb)
+            goto fail;
+    }
+    return 0;
+ fail:
+    qcow_aio_delete(acb);
+    return -1;
+}
+
+static void qcow_aio_read_cb(void *opaque, int ret)
+{
+    BlockDriverAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb1 = acb->opaque;
+    int index_in_cluster;
+
+    if (ret < 0) {
+    fail:
+        acb->cb(acb->cb_opaque, ret);
+        return;
+    }
+
+ redo:
+    /* post process the read buffer */
+    if (!acb1->cluster_offset) {
+        /* nothing to do */
+    } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* nothing to do */
+    } else {
+        if (s->crypt_method) {
+            encrypt_sectors(s, acb1->sector_num, acb1->buf, acb1->buf, 
+                            acb1->n, 0, 
+                            &s->aes_decrypt_key);
+        }
+    }
+
+    acb1->nb_sectors -= acb1->n;
+    acb1->sector_num += acb1->n;
+    acb1->buf += acb1->n * 512;
+
+    if (acb1->nb_sectors == 0) {
+        /* request completed */
+        acb->cb(acb->cb_opaque, 0);
+        return;
+    }
+    
+    /* prepare next AIO request */
+    acb1->cluster_offset = get_cluster_offset(bs, 
+                                              acb1->sector_num << 9, 
+                                              0, 0, 0, 0);
+    index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1);
+    acb1->n = s->cluster_sectors - index_in_cluster;
+    if (acb1->n > acb1->nb_sectors)
+        acb1->n = acb1->nb_sectors;
+
+    if (!acb1->cluster_offset) {
+        if (bs->backing_hd) {
+            /* read from the base image */
+            ret = bdrv_aio_read(acb1->backing_hd_aiocb, acb1->sector_num, 
+                                acb1->buf, acb1->n, qcow_aio_read_cb, acb);
+            if (ret < 0)
+                goto fail;
+        } else {
+            /* Note: in this case, no need to wait */
+            memset(acb1->buf, 0, 512 * acb1->n);
+            goto redo;
+        }
+    } else if (acb1->cluster_offset & QCOW_OFLAG_COMPRESSED) {
+        /* add AIO support for compressed blocks ? */
+        if (decompress_cluster(s, acb1->cluster_offset) < 0)
+            goto fail;
+        memcpy(acb1->buf, 
+               s->cluster_cache + index_in_cluster * 512, 512 * acb1->n);
+        goto redo;
+    } else {
+        if ((acb1->cluster_offset & 511) != 0) {
+            ret = -EIO;
+            goto fail;
+        }
+        ret = bdrv_aio_read(acb1->hd_aiocb, 
+                            (acb1->cluster_offset >> 9) + index_in_cluster, 
+                            acb1->buf, acb1->n, qcow_aio_read_cb, acb);
+        if (ret < 0)
+            goto fail;
+    }
+}
+
+static int qcow_aio_read(BlockDriverAIOCB *acb, int64_t sector_num, 
+                         uint8_t *buf, int nb_sectors)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    
+    acb1->sector_num = sector_num;
+    acb1->buf = buf;
+    acb1->nb_sectors = nb_sectors;
+    acb1->n = 0;
+    acb1->cluster_offset = 0;    
+
+    qcow_aio_read_cb(acb, 0);
+}
+
+static void qcow_aio_write_cb(void *opaque, int ret)
+{
+    BlockDriverAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    QCowAIOCB *acb1 = acb->opaque;
+    int index_in_cluster;
+    uint64_t cluster_offset;
+    const uint8_t *src_buf;
+    
+    if (ret < 0) {
+    fail:
+        acb->cb(acb->cb_opaque, ret);
+        return;
+    }
+
+    acb1->nb_sectors -= acb1->n;
+    acb1->sector_num += acb1->n;
+    acb1->buf += acb1->n * 512;
+
+    if (acb1->nb_sectors == 0) {
+        /* request completed */
+        acb->cb(acb->cb_opaque, 0);
+        return;
+    }
+    
+    index_in_cluster = acb1->sector_num & (s->cluster_sectors - 1);
+    acb1->n = s->cluster_sectors - index_in_cluster;
+    if (acb1->n > acb1->nb_sectors)
+        acb1->n = acb1->nb_sectors;
+    cluster_offset = get_cluster_offset(bs, acb1->sector_num << 9, 1, 0, 
+                                        index_in_cluster, 
+                                        index_in_cluster + acb1->n);
+    if (!cluster_offset || (cluster_offset & 511) != 0) {
+        ret = -EIO;
+        goto fail;
+    }
+    if (s->crypt_method) {
+        if (!acb1->cluster_data) {
+            acb1->cluster_data = qemu_mallocz(s->cluster_size);
+            if (!acb1->cluster_data) {
+                ret = -ENOMEM;
+                goto fail;
+            }
+        }
+        encrypt_sectors(s, acb1->sector_num, acb1->cluster_data, acb1->buf, 
+                        acb1->n, 1, &s->aes_encrypt_key);
+        src_buf = acb1->cluster_data;
+    } else {
+        src_buf = acb1->buf;
+    }
+    ret = bdrv_aio_write(acb1->hd_aiocb, 
+                         (cluster_offset >> 9) + index_in_cluster, 
+                         src_buf, acb1->n, 
+                         qcow_aio_write_cb, acb);
+    if (ret < 0)
+        goto fail;
+}
+
+static int qcow_aio_write(BlockDriverAIOCB *acb, int64_t sector_num, 
+                          const uint8_t *buf, int nb_sectors)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+    
+    s->cluster_cache_offset = -1; /* disable compressed cache */
+
+    acb1->sector_num = sector_num;
+    acb1->buf = (uint8_t *)buf;
+    acb1->nb_sectors = nb_sectors;
+    acb1->n = 0;
+    
+    qcow_aio_write_cb(acb, 0);
+}
+
+static void qcow_aio_cancel(BlockDriverAIOCB *acb)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    if (acb1->hd_aiocb)
+        bdrv_aio_cancel(acb1->hd_aiocb);
+    if (acb1->backing_hd_aiocb)
+        bdrv_aio_cancel(acb1->backing_hd_aiocb);
+}
+
+static void qcow_aio_delete(BlockDriverAIOCB *acb)
+{
+    QCowAIOCB *acb1 = acb->opaque;
+    if (acb1->hd_aiocb)
+        bdrv_aio_delete(acb1->hd_aiocb);
+    if (acb1->backing_hd_aiocb)
+        bdrv_aio_delete(acb1->backing_hd_aiocb);
+    qemu_free(acb1->cluster_data);
+    qemu_free(acb1);
+}
+
 static void qcow_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
@@ -529,7 +754,7 @@ static void qcow_close(BlockDriverState *bs)
     qemu_free(s->l2_cache);
     qemu_free(s->cluster_cache);
     qemu_free(s->cluster_data);
-    close(s->fd);
+    bdrv_delete(s->hd);
 }
 
 static int qcow_create(const char *filename, int64_t total_size,
@@ -537,12 +762,9 @@ static int qcow_create(const char *filename, int64_t total_size,
 {
     int fd, header_size, backing_filename_len, l1_size, i, shift;
     QCowHeader header;
-    char backing_filename[1024];
     uint64_t tmp;
-    struct stat st;
 
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 
-              0644);
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
     if (fd < 0)
         return -1;
     memset(&header, 0, sizeof(header));
@@ -552,28 +774,11 @@ static int qcow_create(const char *filename, int64_t total_size,
     header_size = sizeof(header);
     backing_filename_len = 0;
     if (backing_file) {
-       if (strcmp(backing_file, "fat:")) {
-           const char *p;
-           /* XXX: this is a hack: we do not attempt to check for URL
-              like syntax */
-           p = strchr(backing_file, ':');
-           if (p && (p - backing_file) >= 2) {
-               /* URL like but exclude "c:" like filenames */
-               pstrcpy(backing_filename, sizeof(backing_filename),
-                       backing_file);
-           } else {
-               realpath(backing_file, backing_filename);
-               if (stat(backing_filename, &st) != 0) {
-                   return -1;
-               }
-           }
-           header.backing_file_offset = cpu_to_be64(header_size);
-           backing_filename_len = strlen(backing_filename);
-           header.backing_file_size = cpu_to_be32(backing_filename_len);
-           header_size += backing_filename_len;
-       } else
-           backing_file = NULL;
-        header.mtime = cpu_to_be32(st.st_mtime);
+        header.backing_file_offset = cpu_to_be64(header_size);
+        backing_filename_len = strlen(backing_file);
+        header.backing_file_size = cpu_to_be32(backing_filename_len);
+        header_size += backing_filename_len;
+        header.mtime = cpu_to_be32(0);
         header.cluster_bits = 9; /* 512 byte cluster to avoid copying
                                     unmodifyed sectors */
         header.l2_bits = 12; /* 32 KB L2 tables */
@@ -595,7 +800,7 @@ static int qcow_create(const char *filename, int64_t total_size,
     /* write all the data */
     write(fd, &header, sizeof(header));
     if (backing_file) {
-        write(fd, backing_filename, backing_filename_len);
+        write(fd, backing_file, backing_filename_len);
     }
     lseek(fd, header_size, SEEK_SET);
     tmp = 0;
@@ -610,12 +815,14 @@ int qcow_make_empty(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+    int ret;
 
     memset(s->l1_table, 0, l1_length);
-    lseek(s->fd, s->l1_table_offset, SEEK_SET);
-    if (write(s->fd, s->l1_table, l1_length) < 0)
+    if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
        return -1;
-    ftruncate(s->fd, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
+    if (ret < 0)
+        return ret;
 
     memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
     memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
@@ -682,8 +889,7 @@ int qcow_compress_cluster(BlockDriverState *bs, int64_t sector_num,
         cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, 
                                             out_len, 0, 0);
         cluster_offset &= s->cluster_offset_mask;
-        lseek(s->fd, cluster_offset, SEEK_SET);
-        if (write(s->fd, out_buf, out_len) != out_len) {
+        if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
             qemu_free(out_buf);
             return -1;
         }
@@ -696,7 +902,7 @@ int qcow_compress_cluster(BlockDriverState *bs, int64_t sector_num,
 static void qcow_flush(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
-    fsync(s->fd);
+    bdrv_flush(s->hd);
 }
 
 BlockDriver bdrv_qcow = {
@@ -704,14 +910,20 @@ BlockDriver bdrv_qcow = {
     sizeof(BDRVQcowState),
     qcow_probe,
     qcow_open,
-    qcow_read,
-    qcow_write,
+    NULL,
+    NULL,
     qcow_close,
     qcow_create,
     qcow_flush,
     qcow_is_allocated,
     qcow_set_key,
-    qcow_make_empty
+    qcow_make_empty,
+
+    .bdrv_aio_new = qcow_aio_new,
+    .bdrv_aio_read = qcow_aio_read,
+    .bdrv_aio_write = qcow_aio_write,
+    .bdrv_aio_cancel = qcow_aio_cancel,
+    .bdrv_aio_delete = qcow_aio_delete,
 };
 
 
diff --git a/block-raw.c b/block-raw.c
new file mode 100644 (file)
index 0000000..77502b0
--- /dev/null
@@ -0,0 +1,817 @@
+/*
+ * Block driver for RAW files
+ * 
+ * Copyright (c) 2006 Fabrice Bellard
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "vl.h"
+#include "block_int.h"
+#include <assert.h>
+#ifndef _WIN32
+#include <aio.h>
+
+#ifndef QEMU_TOOL
+#include "exec-all.h"
+#endif
+
+#ifdef CONFIG_COCOA
+#include <paths.h>
+#include <sys/param.h>
+#include <IOKit/IOKitLib.h>
+#include <IOKit/IOBSD.h>
+#include <IOKit/storage/IOMediaBSDClient.h>
+#include <IOKit/storage/IOMedia.h>
+#include <IOKit/storage/IOCDMedia.h>
+//#include <IOKit/storage/IOCDTypes.h>
+#include <CoreFoundation/CoreFoundation.h>
+#endif
+
+#ifdef __sun__
+#include <sys/dkio.h>
+#endif
+
+typedef struct BDRVRawState {
+    int fd;
+} BDRVRawState;
+
+#ifdef CONFIG_COCOA
+static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
+static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
+
+kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
+{
+    kern_return_t       kernResult; 
+    mach_port_t     masterPort;
+    CFMutableDictionaryRef  classesToMatch;
+
+    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
+    if ( KERN_SUCCESS != kernResult ) {
+        printf( "IOMasterPort returned %d\n", kernResult );
+    }
+    
+    classesToMatch = IOServiceMatching( kIOCDMediaClass ); 
+    if ( classesToMatch == NULL ) {
+        printf( "IOServiceMatching returned a NULL dictionary.\n" );
+    } else {
+    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
+    }
+    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
+    if ( KERN_SUCCESS != kernResult )
+    {
+        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
+    }
+    
+    return kernResult;
+}
+
+kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
+{
+    io_object_t     nextMedia;
+    kern_return_t   kernResult = KERN_FAILURE;
+    *bsdPath = '\0';
+    nextMedia = IOIteratorNext( mediaIterator );
+    if ( nextMedia )
+    {
+        CFTypeRef   bsdPathAsCFString;
+    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
+        if ( bsdPathAsCFString ) {
+            size_t devPathLength;
+            strcpy( bsdPath, _PATH_DEV );
+            strcat( bsdPath, "r" );
+            devPathLength = strlen( bsdPath );
+            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
+                kernResult = KERN_SUCCESS;
+            }
+            CFRelease( bsdPathAsCFString );
+        }
+        IOObjectRelease( nextMedia );
+    }
+    
+    return kernResult;
+}
+
+#endif
+
+static int raw_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVRawState *s = bs->opaque;
+    int fd, open_flags;
+
+#ifdef CONFIG_COCOA
+    if (strstart(filename, "/dev/cdrom", NULL)) {
+        kern_return_t kernResult;
+        io_iterator_t mediaIterator;
+        char bsdPath[ MAXPATHLEN ];
+        int fd;
+        kernResult = FindEjectableCDMedia( &mediaIterator );
+        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
+    
+        if ( bsdPath[ 0 ] != '\0' ) {
+            strcat(bsdPath,"s0");
+            /* some CDs don't have a partition 0 */
+            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
+            if (fd < 0) {
+                bsdPath[strlen(bsdPath)-1] = '1';
+            } else {
+                close(fd);
+            }
+            filename = bsdPath;
+        }
+        
+        if ( mediaIterator )
+            IOObjectRelease( mediaIterator );
+    }
+#endif
+    open_flags = O_BINARY;
+    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+        bs->read_only = 1;
+    }
+    if (flags & BDRV_O_CREAT)
+        open_flags |= O_CREAT | O_TRUNC;
+
+    fd = open(filename, open_flags, 0644);
+    if (fd < 0)
+        return -errno;
+    s->fd = fd;
+    return 0;
+}
+
+/* XXX: use host sector size if necessary with:
+#ifdef DIOCGSECTORSIZE
+        {
+            unsigned int sectorsize = 512;
+            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
+                sectorsize > bufsize)
+                bufsize = sectorsize;
+        }
+#endif
+#ifdef CONFIG_COCOA
+        u_int32_t   blockSize = 512;
+        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
+            bufsize = blockSize;
+        }
+#endif
+*/
+
+static int raw_pread(BlockDriverState *bs, int64_t offset, 
+                     uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+    
+    lseek(s->fd, offset, SEEK_SET);
+    ret = read(s->fd, buf, count);
+    return ret;
+}
+
+static int raw_pwrite(BlockDriverState *bs, int64_t offset, 
+                      const uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+    
+    lseek(s->fd, offset, SEEK_SET);
+    ret = write(s->fd, buf, count);
+    return ret;
+}
+
+/***********************************************************/
+/* Unix AOP using POSIX AIO */
+
+typedef struct RawAIOCB {
+    struct aiocb aiocb;
+    int busy; /* only used for debugging */
+    BlockDriverAIOCB *next;
+} RawAIOCB;
+
+static int aio_sig_num = SIGUSR2;
+static BlockDriverAIOCB *first_aio; /* AIO issued */
+
+#ifndef QEMU_TOOL
+static void aio_signal_handler(int signum)
+{
+    CPUState *env = cpu_single_env;
+    if (env) {
+        /* stop the currently executing cpu because a timer occured */
+        cpu_interrupt(env, CPU_INTERRUPT_EXIT);
+#ifdef USE_KQEMU
+        if (env->kqemu_enabled) {
+            kqemu_cpu_interrupt(env);
+        }
+#endif
+    }
+}
+
+void qemu_aio_init(void)
+{
+    struct sigaction act;
+    
+    sigfillset(&act.sa_mask);
+    act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
+    act.sa_handler = aio_signal_handler;
+    sigaction(aio_sig_num, &act, NULL);
+
+    {
+        /* XXX: aio thread exit seems to hang on RH 9 */
+        struct aioinit ai;
+        memset(&ai, 0, sizeof(ai));
+        ai.aio_threads = 2;
+        ai.aio_num = 1;
+        ai.aio_idle_time = 365 * 100000;
+        aio_init(&ai);
+    }
+}
+#endif /* !QEMU_TOOL */
+
+void qemu_aio_poll(void)
+{
+    BlockDriverAIOCB *acb, **pacb;
+    RawAIOCB *acb1;
+    int ret;
+
+    for(;;) {
+        pacb = &first_aio;
+        for(;;) {
+            acb = *pacb;
+            if (!acb)
+                goto the_end;
+            acb1 = acb->opaque;
+            ret = aio_error(&acb1->aiocb);
+            if (ret == ECANCELED) {
+                /* remove the request */
+                acb1->busy = 0;
+                *pacb = acb1->next;
+            } else if (ret != EINPROGRESS) {
+                /* end of aio */
+                if (ret == 0) {
+                    ret = aio_return(&acb1->aiocb);
+                    if (ret == acb1->aiocb.aio_nbytes)
+                        ret = 0;
+                    else
+                        ret = -1;
+                } else {
+                    ret = -ret;
+                }
+                /* remove the request */
+                acb1->busy = 0;
+                *pacb = acb1->next;
+                /* call the callback */
+                acb->cb(acb->cb_opaque, ret);
+                break;
+            } else {
+                pacb = &acb1->next;
+            }
+        }
+    }
+ the_end: ;
+}
+
+/* wait until at least one AIO was handled */
+static sigset_t wait_oset;
+
+void qemu_aio_wait_start(void)
+{
+    sigset_t set;
+    sigemptyset(&set);
+    sigaddset(&set, aio_sig_num);
+    sigprocmask(SIG_BLOCK, &set, &wait_oset);
+}
+
+void qemu_aio_wait(void)
+{
+    sigset_t set;
+    int nb_sigs;
+    sigemptyset(&set);
+    sigaddset(&set, aio_sig_num);
+    sigwait(&set, &nb_sigs);
+    qemu_aio_poll();
+}
+
+void qemu_aio_wait_end(void)
+{
+    sigprocmask(SIG_SETMASK, &wait_oset, NULL);
+}
+
+static int raw_aio_new(BlockDriverAIOCB *acb)
+{
+    RawAIOCB *acb1;
+    BDRVRawState *s = acb->bs->opaque;
+
+    acb1 = qemu_mallocz(sizeof(RawAIOCB));
+    if (!acb1)
+        return -1;
+    acb->opaque = acb1;
+    acb1->aiocb.aio_fildes = s->fd;
+    acb1->aiocb.aio_sigevent.sigev_signo = aio_sig_num;
+    acb1->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+    return 0;
+}
+
+static int raw_aio_read(BlockDriverAIOCB *acb, int64_t sector_num, 
+                        uint8_t *buf, int nb_sectors)
+{
+    RawAIOCB *acb1 = acb->opaque;
+
+    assert(acb1->busy == 0);
+    acb1->busy = 1;
+    acb1->aiocb.aio_buf = buf;
+    acb1->aiocb.aio_nbytes = nb_sectors * 512;
+    acb1->aiocb.aio_offset = sector_num * 512;
+    acb1->next = first_aio;
+    first_aio = acb;
+    if (aio_read(&acb1->aiocb) < 0) {
+        acb1->busy = 0;
+        return -errno;
+    } 
+    return 0;
+}
+
+static int raw_aio_write(BlockDriverAIOCB *acb, int64_t sector_num, 
+                         const uint8_t *buf, int nb_sectors)
+{
+    RawAIOCB *acb1 = acb->opaque;
+
+    assert(acb1->busy == 0);
+    acb1->busy = 1;
+    acb1->aiocb.aio_buf = (uint8_t *)buf;
+    acb1->aiocb.aio_nbytes = nb_sectors * 512;
+    acb1->aiocb.aio_offset = sector_num * 512;
+    acb1->next = first_aio;
+    first_aio = acb;
+    if (aio_write(&acb1->aiocb) < 0) {
+        acb1->busy = 0;
+        return -errno;
+    } 
+    return 0;
+}
+
+static void raw_aio_cancel(BlockDriverAIOCB *acb)
+{
+    RawAIOCB *acb1 = acb->opaque;
+    int ret;
+    BlockDriverAIOCB **pacb;
+
+    ret = aio_cancel(acb1->aiocb.aio_fildes, &acb1->aiocb);
+    if (ret == AIO_NOTCANCELED) {
+        /* fail safe: if the aio could not be canceled, we wait for
+           it */
+        while (aio_error(&acb1->aiocb) == EINPROGRESS);
+    }
+
+    /* remove the callback from the queue */
+    pacb = &first_aio;
+    for(;;) {
+        if (*pacb == NULL) {
+            break;
+        } else if (*pacb == acb) {
+            acb1->busy = 0;
+            *pacb = acb1->next;
+            break;
+        }
+        acb1 = (*pacb)->opaque;
+        pacb = &acb1->next;
+    }
+}
+
+static void raw_aio_delete(BlockDriverAIOCB *acb)
+{
+    RawAIOCB *acb1 = acb->opaque;
+    raw_aio_cancel(acb);
+    qemu_free(acb1);
+}
+
+static void raw_close(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    close(s->fd);
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
+{
+    BDRVRawState *s = bs->opaque;
+    if (ftruncate(s->fd, offset) < 0)
+        return -errno;
+    return 0;
+}
+
+static int64_t  raw_getlength(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    int fd = s->fd;
+    int64_t size;
+#ifdef _BSD
+    struct stat sb;
+#endif
+#ifdef __sun__
+    struct dk_minfo minfo;
+    int rv;
+#endif
+
+#ifdef _BSD
+    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
+#ifdef DIOCGMEDIASIZE
+       if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
+#endif
+#ifdef CONFIG_COCOA
+        size = LONG_LONG_MAX;
+#else
+        size = lseek(fd, 0LL, SEEK_END);
+#endif
+    } else
+#endif
+#ifdef __sun__
+    /*
+     * use the DKIOCGMEDIAINFO ioctl to read the size.
+     */
+    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
+    if ( rv != -1 ) {
+        size = minfo.dki_lbsize * minfo.dki_capacity;
+    } else /* there are reports that lseek on some devices
+              fails, but irc discussion said that contingency
+              on contingency was overkill */
+#endif
+    {
+        size = lseek(fd, 0, SEEK_END);
+    }
+#ifdef _WIN32
+    /* On Windows hosts it can happen that we're unable to get file size
+       for CD-ROM raw device (it's inherent limitation of the CDFS driver). */
+    if (size == -1)
+        size = LONG_LONG_MAX;
+#endif
+    return size;
+}
+
+static int raw_create(const char *filename, int64_t total_size,
+                      const char *backing_file, int flags)
+{
+    int fd;
+
+    if (flags || backing_file)
+        return -ENOTSUP;
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 
+              0644);
+    if (fd < 0)
+        return -EIO;
+    ftruncate(fd, total_size * 512);
+    close(fd);
+    return 0;
+}
+
+static void raw_flush(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    fsync(s->fd);
+}
+
+BlockDriver bdrv_raw = {
+    "raw",
+    sizeof(BDRVRawState),
+    NULL, /* no probe for protocols */
+    raw_open,
+    NULL,
+    NULL,
+    raw_close,
+    raw_create,
+    raw_flush,
+    
+    .bdrv_aio_new = raw_aio_new,
+    .bdrv_aio_read = raw_aio_read,
+    .bdrv_aio_write = raw_aio_write,
+    .bdrv_aio_cancel = raw_aio_cancel,
+    .bdrv_aio_delete = raw_aio_delete,
+    .protocol_name = "file",
+    .bdrv_pread = raw_pread,
+    .bdrv_pwrite = raw_pwrite,
+    .bdrv_truncate = raw_truncate,
+    .bdrv_getlength = raw_getlength,
+};
+
+#else /* _WIN32 */
+
+/* XXX: use another file ? */
+#include <windows.h>
+#include <winioctl.h>
+
+typedef struct BDRVRawState {
+    HANDLE hfile;
+} BDRVRawState;
+
+typedef struct RawAIOCB {
+    HANDLE hEvent;
+    OVERLAPPED ov;
+    int count;
+} RawAIOCB;
+
+int qemu_ftruncate64(int fd, int64_t length)
+{
+    LARGE_INTEGER li;
+    LONG high;
+    HANDLE h;
+    BOOL res;
+
+    if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0)
+       return -1;
+
+    h = (HANDLE)_get_osfhandle(fd);
+
+    /* get current position, ftruncate do not change position */
+    li.HighPart = 0;
+    li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT);
+    if (li.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR)
+       return -1;
+
+    high = length >> 32;
+    if (!SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN))
+       return -1;
+    res = SetEndOfFile(h);
+
+    /* back to old position */
+    SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN);
+    return res ? 0 : -1;
+}
+
+static int set_sparse(int fd)
+{
+    DWORD returned;
+    return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE,
+                                NULL, 0, NULL, 0, &returned, NULL);
+}
+
+static int raw_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    BDRVRawState *s = bs->opaque;
+    int access_flags, create_flags;
+
+    if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+        access_flags = GENERIC_READ | GENERIC_WRITE;
+    } else {
+        access_flags = GENERIC_READ;
+    }
+    if (flags & BDRV_O_CREATE) {
+        create_flags = CREATE_ALWAYS;
+    } else {
+        create_flags = OPEN_EXISTING;
+    }
+    s->hfile = CreateFile(filename, access_flags, 
+                          FILE_SHARE_READ, NULL,
+                          create_flags, FILE_FLAG_OVERLAPPED, 0);
+    if (s->hfile == INVALID_HANDLE_VALUE) 
+        return -1;
+    return 0;
+}
+
+static int raw_pread(BlockDriverState *bs, int64_t offset, 
+                     uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    OVERLAPPED ov;
+    DWORD ret_count;
+    int ret;
+    
+    memset(&ov, 0, sizeof(ov));
+    ov.Offset = offset;
+    ov.OffsetHigh = offset >> 32;
+    ret = ReadFile(s->hfile, buf, count, NULL, &ov);
+    if (!ret)
+        return -EIO;
+    ret = GetOverlappedResult(s->hfile, &ov, &ret_count, TRUE);
+    if (!ret)
+        return -EIO;
+    return ret_count;
+}
+
+static int raw_pwrite(BlockDriverState *bs, int64_t offset, 
+                      const uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    OVERLAPPED ov;
+    DWORD ret_count;
+    int ret;
+    
+    memset(&ov, 0, sizeof(ov));
+    ov.Offset = offset;
+    ov.OffsetHigh = offset >> 32;
+    ret = WriteFile(s->hfile, buf, count, NULL, &ov);
+    if (!ret)
+        return -EIO;
+    ret = GetOverlappedResult(s->hfile, &ov, &ret_count, TRUE);
+    if (!ret)
+        return -EIO;
+    return ret_count;
+}
+
+static int raw_aio_new(BlockDriverAIOCB *acb)
+{
+    RawAIOCB *acb1;
+    BDRVRawState *s = acb->bs->opaque;
+
+    acb1 = qemu_mallocz(sizeof(RawAIOCB));
+    if (!acb1)
+        return -ENOMEM;
+    acb->opaque = acb1;
+    s->hevent = CreateEvent(NULL, TRUE, FALSE, NULL);
+    if (!s->hevent)
+        return -ENOMEM;
+    return 0;
+}
+
+static void raw_aio_cb(void *opaque)
+{
+    BlockDriverAIOCB *acb = acb1;
+    RawAIOCB *acb1 = acb->opaque;
+    DWORD ret_count;
+    int ret;
+
+    ret = GetOverlappedResult(s->hfile, &acb1->ov, &ret_count, TRUE);
+    if (!ret || ret_count != acb1->count) {
+        acb->cb(acb->cb_opaque, -EIO);
+    } else {
+        acb->cb(acb->cb_opaque, 0);
+    }
+}
+
+static int raw_aio_read(BlockDriverAIOCB *acb, int64_t sector_num, 
+                        uint8_t *buf, int nb_sectors)
+{
+    BlockDriverState *bs = acb->bs;
+    BDRVRawState *s = bs->opaque;
+    RawAIOCB *acb1 = acb->opaque;
+    DWORD ret_count;
+    int ret;
+    int64_t offset;
+
+    memset(&acb1->ov, 0, sizeof(acb1->ov));
+    offset = sector_num * 512;
+    acb1->ov.Offset = offset;
+    acb1->ov.OffsetHigh = offset >> 32;
+    acb1->ov.hEvent = acb1->hEvent;
+    acb1->count = nb_sectors * 512;
+    qemu_add_wait_object(acb1->ov.hEvent, raw_aio_cb, acb);
+    ret = ReadFile(s->hfile, buf, acb1->count, NULL, &acb1->ov);
+    if (!ret)
+        return -EIO;
+    return 0;
+}
+
+static int raw_aio_write(BlockDriverAIOCB *acb, int64_t sector_num, 
+                         uint8_t *buf, int nb_sectors)
+{
+    BlockDriverState *bs = acb->bs;
+    BDRVRawState *s = bs->opaque;
+    RawAIOCB *acb1 = acb->opaque;
+    DWORD ret_count;
+    int ret;
+    int64_t offset;
+
+    memset(&acb1->ov, 0, sizeof(acb1->ov));
+    offset = sector_num * 512;
+    acb1->ov.Offset = offset;
+    acb1->ov.OffsetHigh = offset >> 32;
+    acb1->ov.hEvent = acb1->hEvent;
+    acb1->count = nb_sectors * 512;
+    qemu_add_wait_object(acb1->ov.hEvent, raw_aio_cb, acb);
+    ret = ReadFile(s->hfile, buf, acb1->count, NULL, &acb1->ov);
+    if (!ret)
+        return -EIO;
+    return 0;
+}
+
+static void raw_aio_cancel(BlockDriverAIOCB *acb)
+{
+    BlockDriverState *bs = acb->bs;
+    BDRVRawState *s = bs->opaque;
+    RawAIOCB *acb1 = acb->opaque;
+
+    qemu_del_wait_object(acb1->ov.hEvent, raw_aio_cb, acb);
+    /* XXX: if more than one async I/O it is not correct */
+    CancelIo(s->hfile);
+}
+
+static void raw_aio_delete(BlockDriverAIOCB *acb)
+{
+    RawAIOCB *acb1 = acb->opaque;
+    raw_aio_cancel(acb);
+    CloseHandle(acb1->hEvent);
+    qemu_free(acb1);
+}
+
+static void raw_flush(BlockDriverState *bs)
+{
+    /* XXX: add it */
+}
+
+static void raw_close(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    CloseHandle(s->hfile);
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
+{
+    BDRVRawState *s = bs->opaque;
+    DWORD low, high;
+
+    low = length;
+    high = length >> 32;
+    if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN))
+       return -EIO;
+    if (!SetEndOfFile(s->hfile))
+        return -EIO;
+    return 0;
+}
+
+static int64_t  raw_getlength(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+    LARGE_INTEGER l;
+    if (!GetFileSizeEx(s->hfile, &l))
+        return -EIO;
+    return l.QuadPart;
+}
+
+static int raw_create(const char *filename, int64_t total_size,
+                      const char *backing_file, int flags)
+{
+    int fd;
+
+    if (flags || backing_file)
+        return -ENOTSUP;
+
+    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 
+              0644);
+    if (fd < 0)
+        return -EIO;
+    set_sparse(fd);
+    ftruncate(fd, total_size * 512);
+    close(fd);
+    return 0;
+}
+
+void qemu_aio_init(void)
+{
+}
+
+void qemu_aio_poll(void)
+{
+}
+
+void qemu_aio_wait_start(void)
+{
+}
+
+void qemu_aio_wait(void)
+{
+}
+
+void qemu_aio_wait_end(void)
+{
+}
+
+BlockDriver bdrv_raw = {
+    "raw",
+    sizeof(BDRVRawState),
+    NULL, /* no probe for protocols */
+    raw_open,
+    NULL,
+    NULL,
+    raw_close,
+    raw_create,
+    raw_flush,
+    
+#if 0
+    .bdrv_aio_new = raw_aio_new,
+    .bdrv_aio_read = raw_aio_read,
+    .bdrv_aio_write = raw_aio_write,
+    .bdrv_aio_cancel = raw_aio_cancel,
+    .bdrv_aio_delete = raw_aio_delete,
+#endif
+    .protocol_name = "file",
+    .bdrv_pread = raw_pread,
+    .bdrv_pwrite = raw_pwrite,
+    .bdrv_truncate = raw_truncate,
+    .bdrv_getlength = raw_getlength,
+};
+#endif /* _WIN32 */
index 4cc3db84a1c4c8b9756316197567f1e4d318dcaf..8279d6696dc3ce292b0058f1086cd1d9912c55a9 100644 (file)
@@ -89,7 +89,7 @@ static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
         return 0;
 }
 
-static int vmdk_open(BlockDriverState *bs, const char *filename)
+static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVVmdkState *s = bs->opaque;
     int fd, i;
index bdc3b8891c38c05442e7c6981cc74a68767a7a56..4d228c5b628385253123b40d5e9a21d52051792b 100644 (file)
@@ -86,19 +86,16 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
     return 0;
 }
 
-static int vpc_open(BlockDriverState *bs, const char *filename)
+static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVVPCState *s = bs->opaque;
     int fd, i;
     struct vpc_subheader header;
 
-    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
-    if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
-        if (fd < 0)
-            return -1;
-    }
-    
+    fd = open(filename, O_RDONLY | O_BINARY);
+    if (fd < 0)
+        return -1;
+
     bs->read_only = 1; // no write support yet
     
     s->fd = fd;
index 9dedf9115fbc321ddfa2e5e670195826f86d2b3d..ad6b7d29e894ae53c7b708268cea1061442beac1 100644 (file)
@@ -351,13 +351,6 @@ typedef struct BDRVVVFATState {
 } BDRVVVFATState;
 
 
-static int vvfat_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    if (strstart(filename, "fat:", NULL))
-       return 100;
-    return 0;
-}
-
 static void init_mbr(BDRVVVFATState* s)
 {
     /* TODO: if the files mbr.img and bootsect.img exist, use them */
@@ -954,18 +947,22 @@ static int init_directories(BDRVVVFATState* s,
     return 0;
 }
 
+#ifdef DEBUG
 static BDRVVVFATState *vvv = NULL;
+#endif
 
 static int enable_write_target(BDRVVVFATState *s);
 static int is_consistent(BDRVVVFATState *s);
 
-static int vvfat_open(BlockDriverState *bs, const char* dirname)
+static int vvfat_open(BlockDriverState *bs, const char* dirname, int flags)
 {
     BDRVVVFATState *s = bs->opaque;
     int floppy = 0;
     int i;
 
+#ifdef DEBUG
     vvv = s;
+#endif
 
 DLOG(if (stderr == NULL) {
     stderr = fopen("vvfat.log", "a");
@@ -1040,7 +1037,6 @@ DLOG(if (stderr == NULL) {
        bs->heads = bs->cyls = bs->secs = 0;
 
     //    assert(is_consistent(s));
-
     return 0;
 }
 
@@ -2732,8 +2728,7 @@ static int enable_write_target(BDRVVVFATState *s)
     array_init(&(s->commits), sizeof(commit_t));
 
     s->qcow_filename = malloc(1024);
-    strcpy(s->qcow_filename, "/tmp/vl.XXXXXX");
-    get_tmp_filename(s->qcow_filename, strlen(s->qcow_filename) + 1);
+    get_tmp_filename(s->qcow_filename, 1024);
     if (bdrv_create(&bdrv_qcow,
                s->qcow_filename, s->sector_count, "fat:", 0) < 0)
        return -1;
@@ -2767,14 +2762,15 @@ static void vvfat_close(BlockDriverState *bs)
 BlockDriver bdrv_vvfat = {
     "vvfat",
     sizeof(BDRVVVFATState),
-    vvfat_probe,
+    NULL, /* no probe for protocols */
     vvfat_open,
     vvfat_read,
     vvfat_write,
     vvfat_close,
     NULL, /* ??? Not sure if we can do any meaningful flushing.  */
     NULL,
-    vvfat_is_allocated
+    vvfat_is_allocated,
+    .protocol_name = "fat",
 };
 
 #ifdef DEBUG
diff --git a/block.c b/block.c
index ceb0532bd0b1720c9dd29f36a031a0fce0f9b495..3cf8b7b123da1a938c27f849b26ded8eb2672920 100644 (file)
--- a/block.c
+++ b/block.c
 #include <sys/disk.h>
 #endif
 
-#ifdef CONFIG_COCOA
-#include <paths.h>
-#include <sys/param.h>
-#include <IOKit/IOKitLib.h>
-#include <IOKit/IOBSD.h>
-#include <IOKit/storage/IOMediaBSDClient.h>
-#include <IOKit/storage/IOMedia.h>
-#include <IOKit/storage/IOCDMedia.h>
-//#include <IOKit/storage/IOCDTypes.h>
-#include <CoreFoundation/CoreFoundation.h>
-#endif
-
-#ifdef __sun__
-#include <sys/dkio.h>
-#endif
+#define SECTOR_BITS 9
+#define SECTOR_SIZE (1 << SECTOR_BITS)
+
+static int bdrv_aio_new_em(BlockDriverAIOCB *acb);
+static int bdrv_aio_read_em(BlockDriverAIOCB *acb, int64_t sector_num,
+                              uint8_t *buf, int nb_sectors);
+static int bdrv_aio_write_em(BlockDriverAIOCB *acb, int64_t sector_num,
+                               const uint8_t *buf, int nb_sectors);
+static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
+static void bdrv_aio_delete_em(BlockDriverAIOCB *acb);
+static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 
+                        uint8_t *buf, int nb_sectors);
+static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
+                         const uint8_t *buf, int nb_sectors);
 
 static BlockDriverState *bdrv_first;
 static BlockDriver *first_drv;
 
-#ifdef CONFIG_COCOA
-static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
-static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
+#ifdef _WIN32
+#define PATH_SEP '\\'
+#else
+#define PATH_SEP '/'
+#endif
 
-kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
+int path_is_absolute(const char *path)
 {
-    kern_return_t       kernResult; 
-    mach_port_t     masterPort;
-    CFMutableDictionaryRef  classesToMatch;
-
-    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
-    if ( KERN_SUCCESS != kernResult ) {
-        printf( "IOMasterPort returned %d\n", kernResult );
-    }
-    
-    classesToMatch = IOServiceMatching( kIOCDMediaClass ); 
-    if ( classesToMatch == NULL ) {
-        printf( "IOServiceMatching returned a NULL dictionary.\n" );
-    } else {
-    CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
-    }
-    kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
-    if ( KERN_SUCCESS != kernResult )
-    {
-        printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
-    }
-    
-    return kernResult;
+    const char *p;
+    p = strchr(path, ':');
+    if (p)
+        p++;
+    else
+        p = path;
+    return (*p == PATH_SEP);
 }
 
-kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
+/* if filename is absolute, just copy it to dest. Otherwise, build a
+   path to it by considering it is relative to base_path. URL are
+   supported. */
+void path_combine(char *dest, int dest_size,
+                  const char *base_path,
+                  const char *filename)
 {
-    io_object_t     nextMedia;
-    kern_return_t   kernResult = KERN_FAILURE;
-    *bsdPath = '\0';
-    nextMedia = IOIteratorNext( mediaIterator );
-    if ( nextMedia )
-    {
-        CFTypeRef   bsdPathAsCFString;
-    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
-        if ( bsdPathAsCFString ) {
-            size_t devPathLength;
-            strcpy( bsdPath, _PATH_DEV );
-            strcat( bsdPath, "r" );
-            devPathLength = strlen( bsdPath );
-            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
-                kernResult = KERN_SUCCESS;
-            }
-            CFRelease( bsdPathAsCFString );
-        }
-        IOObjectRelease( nextMedia );
+    const char *p, *p1;
+    int len;
+
+    if (dest_size <= 0)
+        return;
+    if (path_is_absolute(filename)) {
+        pstrcpy(dest, dest_size, filename);
+    } else {
+        p = strchr(base_path, ':');
+        if (p)
+            p++;
+        else
+            p = base_path;
+        p1 = strrchr(base_path, PATH_SEP);
+        if (p1)
+            p1++;
+        else
+            p1 = base_path;
+        if (p1 > p)
+            p = p1;
+        len = p - base_path;
+        if (len > dest_size - 1)
+            len = dest_size - 1;
+        memcpy(dest, base_path, len);
+        dest[len] = '\0';
+        pstrcat(dest, dest_size, filename);
     }
-    
-    return kernResult;
 }
 
-#endif
 
 void bdrv_register(BlockDriver *bdrv)
 {
+    if (!bdrv->bdrv_aio_new) {
+        /* add AIO emulation layer */
+        bdrv->bdrv_aio_new = bdrv_aio_new_em;
+        bdrv->bdrv_aio_read = bdrv_aio_read_em;
+        bdrv->bdrv_aio_write = bdrv_aio_write_em;
+        bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
+        bdrv->bdrv_aio_delete = bdrv_aio_delete_em;
+    } else if (!bdrv->bdrv_read && !bdrv->bdrv_pread) {
+        /* add synchronous IO emulation layer */
+        bdrv->bdrv_read = bdrv_read_em;
+        bdrv->bdrv_write = bdrv_write_em;
+    }
     bdrv->next = first_drv;
     first_drv = bdrv;
 }
@@ -156,14 +163,7 @@ int bdrv_create(BlockDriver *drv,
 #ifdef _WIN32
 void get_tmp_filename(char *filename, int size)
 {
-    char* p = strrchr(filename, '/');
-
-    if (p == NULL)
-       return;
-
-    /* XXX: find a better function */
-    tmpnam(p);
-    *p = '/';
+    tmpnam(filename);
 }
 #else
 void get_tmp_filename(char *filename, int size)
@@ -176,101 +176,107 @@ void get_tmp_filename(char *filename, int size)
 }
 #endif
 
+static BlockDriver *find_protocol(const char *filename)
+{
+    BlockDriver *drv1;
+    char protocol[128];
+    int len;
+    const char *p;
+    p = strchr(filename, ':');
+    if (!p)
+        return &bdrv_raw;
+    len = p - filename;
+    if (len > sizeof(protocol) - 1)
+        len = sizeof(protocol) - 1;
+#ifdef _WIN32
+    if (len == 1) {
+        /* specific win32 case for driver letters */
+        return &bdrv_raw;
+    }
+#endif   
+    memcpy(protocol, filename, len);
+    protocol[len] = '\0';
+    for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
+        if (drv1->protocol_name && 
+            !strcmp(drv1->protocol_name, protocol))
+            return drv1;
+    }
+    return NULL;
+}
+
 /* XXX: force raw format if block or character device ? It would
    simplify the BSD case */
 static BlockDriver *find_image_format(const char *filename)
 {
-    int fd, ret, score, score_max;
+    int ret, score, score_max;
     BlockDriver *drv1, *drv;
-    uint8_t *buf;
-    size_t bufsize = 1024;
-
-    fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
-    if (fd < 0) {
-        buf = NULL;
-        ret = 0;
-    } else {
-#ifdef DIOCGSECTORSIZE
-        {
-            unsigned int sectorsize = 512;
-            if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
-                sectorsize > bufsize)
-                bufsize = sectorsize;
-        }
-#endif
-#ifdef CONFIG_COCOA
-        u_int32_t   blockSize = 512;
-        if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
-            bufsize = blockSize;
-        }
-#endif
-        buf = qemu_malloc(bufsize);
-        if (!buf)
-            return NULL;
-        ret = read(fd, buf, bufsize);
-        if (ret < 0) {
-            close(fd);
-            qemu_free(buf);
-            return NULL;
-        }
-        close(fd);
-    }
+    uint8_t buf[2048];
+    BlockDriverState *bs;
     
-    drv = NULL;
+    drv = find_protocol(filename);
+    /* no need to test disk image formats for vvfat or host specific
+       devices */
+    if (drv == &bdrv_vvfat)
+        return drv;
+    if (strstart(filename, "/dev/", NULL))
+        return &bdrv_raw;
+    
+    ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
+    if (ret < 0)
+        return NULL;
+    ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+    bdrv_delete(bs);
+    if (ret < 0) {
+        return NULL;
+    }
+
     score_max = 0;
     for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
-        score = drv1->bdrv_probe(buf, ret, filename);
-        if (score > score_max) {
-            score_max = score;
-            drv = drv1;
+        if (drv1->bdrv_probe) {
+            score = drv1->bdrv_probe(buf, ret, filename);
+            if (score > score_max) {
+                score_max = score;
+                drv = drv1;
+            }
         }
     }
-    qemu_free(buf);
     return drv;
 }
 
-int bdrv_open(BlockDriverState *bs, const char *filename, int snapshot)
+int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
 {
-#ifdef CONFIG_COCOA
-    if ( strncmp( filename, "/dev/cdrom", 10 ) == 0 ) {
-        kern_return_t kernResult;
-        io_iterator_t mediaIterator;
-        char bsdPath[ MAXPATHLEN ];
-        int fd;
-        kernResult = FindEjectableCDMedia( &mediaIterator );
-        kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
-    
-        if ( bsdPath[ 0 ] != '\0' ) {
-            strcat(bsdPath,"s0");
-            /* some CDs don't have a partition 0 */
-            fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
-            if (fd < 0) {
-                bsdPath[strlen(bsdPath)-1] = '1';
-            } else {
-                close(fd);
-            }
-            filename = bsdPath;
-        }
-        
-        if ( mediaIterator )
-            IOObjectRelease( mediaIterator );
+    BlockDriverState *bs;
+    int ret;
+
+    bs = bdrv_new("");
+    if (!bs)
+        return -ENOMEM;
+    ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
+    if (ret < 0) {
+        bdrv_delete(bs);
+        return ret;
     }
-#endif
-    return bdrv_open2(bs, filename, snapshot, NULL);
+    *pbs = bs;
+    return 0;
+}
+
+int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
+{
+    return bdrv_open2(bs, filename, flags, NULL);
 }
 
-int bdrv_open2(BlockDriverState *bs, const char *filename, int snapshot,
+int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
                BlockDriver *drv)
 {
-    int ret;
+    int ret, open_flags;
     char tmp_filename[1024];
+    char backing_filename[1024];
     
     bs->read_only = 0;
     bs->is_temporary = 0;
     bs->encrypted = 0;
 
-    if (snapshot) {
+    if (flags & BDRV_O_SNAPSHOT) {
         BlockDriverState *bs1;
         int64_t total_size;
         
@@ -280,17 +286,16 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int snapshot,
         /* if there is a backing file, use it */
         bs1 = bdrv_new("");
         if (!bs1) {
-            return -1;
+            return -ENOMEM;
         }
         if (bdrv_open(bs1, filename, 0) < 0) {
             bdrv_delete(bs1);
             return -1;
         }
-        total_size = bs1->total_sectors;
+        total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
         bdrv_delete(bs1);
         
         get_tmp_filename(tmp_filename, sizeof(tmp_filename));
-        /* XXX: use cow for linux as it is more efficient ? */
         if (bdrv_create(&bdrv_qcow, tmp_filename, 
                         total_size, filename, 0) < 0) {
             return -1;
@@ -300,27 +305,43 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int snapshot,
     }
 
     pstrcpy(bs->filename, sizeof(bs->filename), filename);
-    if (!drv) {
-        drv = find_image_format(filename);
+    if (flags & BDRV_O_FILE) {
+        drv = find_protocol(filename);
         if (!drv)
-            return -1;
+            return -ENOENT;
+    } else {
+        if (!drv) {
+            drv = find_image_format(filename);
+            if (!drv)
+                return -1;
+        }
     }
     bs->drv = drv;
     bs->opaque = qemu_mallocz(drv->instance_size);
     if (bs->opaque == NULL && drv->instance_size > 0)
         return -1;
-    
-    ret = drv->bdrv_open(bs, filename);
+    /* Note: for compatibility, we open disk image files as RDWR, and
+       RDONLY as fallback */
+    if (!(flags & BDRV_O_FILE))
+        open_flags = BDRV_O_RDWR;
+    else
+        open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
+    ret = drv->bdrv_open(bs, filename, open_flags);
+    if (ret == -EACCES && !(flags & BDRV_O_FILE)) {
+        ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
+        bs->read_only = 1;
+    }
     if (ret < 0) {
         qemu_free(bs->opaque);
-        return -1;
+        return ret;
     }
+
 #ifndef _WIN32
     if (bs->is_temporary) {
         unlink(filename);
     }
 #endif
-    if (bs->backing_file[0] != '\0' && drv->bdrv_is_allocated) {
+    if (bs->backing_file[0] != '\0') {
         /* if there is a backing file, use it */
         bs->backing_hd = bdrv_new("");
         if (!bs->backing_hd) {
@@ -328,7 +349,9 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int snapshot,
             bdrv_close(bs);
             return -1;
         }
-        if (bdrv_open(bs->backing_hd, bs->backing_file, 0) < 0)
+        path_combine(backing_filename, sizeof(backing_filename),
+                     filename, bs->backing_file);
+        if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
             goto fail;
     }
 
@@ -373,7 +396,7 @@ void bdrv_delete(BlockDriverState *bs)
 /* commit COW file into the raw image */
 int bdrv_commit(BlockDriverState *bs)
 {
-    int64_t i;
+    int64_t i, total_sectors;
     int n, j;
     unsigned char sector[512];
 
@@ -388,7 +411,8 @@ int bdrv_commit(BlockDriverState *bs)
        return -ENOTSUP;
     }
 
-    for (i = 0; i < bs->total_sectors;) {
+    total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
+    for (i = 0; i < total_sectors;) {
         if (bs->drv->bdrv_is_allocated(bs, i, 65536, &n)) {
             for(j = 0; j < n; j++) {
                 if (bdrv_read(bs, i, sector, 1) != 0) {
@@ -411,49 +435,43 @@ int bdrv_commit(BlockDriverState *bs)
     return 0;
 }
 
-/* return -1 if error */
+/* return < 0 if error */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
               uint8_t *buf, int nb_sectors)
 {
-    int ret, n;
     BlockDriver *drv = bs->drv;
 
     if (!bs->inserted)
         return -1;
 
-    while (nb_sectors > 0) {
-        if (sector_num == 0 && bs->boot_sector_enabled) {
+    if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
             memcpy(buf, bs->boot_sector_data, 512);
-            n = 1;
-        } else if (bs->backing_hd) {
-            if (drv->bdrv_is_allocated(bs, sector_num, nb_sectors, &n)) {
-                ret = drv->bdrv_read(bs, sector_num, buf, n);
-                if (ret < 0)
-                    return -1;
-            } else {
-                /* read from the base image */
-                ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
-                if (ret < 0)
-                    return -1;
-            }
-        } else {
-            ret = drv->bdrv_read(bs, sector_num, buf, nb_sectors);
-            if (ret < 0)
-                return -1;
-            /* no need to loop */
-            break;
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
+        sector_num++;
+        nb_sectors--;
+        buf += 512;
+        if (nb_sectors == 0)
+            return 0;
+    }
+    if (drv->bdrv_pread) {
+        int ret, len;
+        len = nb_sectors * 512;
+        ret = drv->bdrv_pread(bs, sector_num * 512, buf, len);
+        if (ret < 0)
+            return ret;
+        else if (ret != len)
+            return -EIO;
+        else
+            return 0;
+    } else {
+        return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
     }
-    return 0;
 }
 
-/* return -1 if error */
+/* return < 0 if error */
 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
                const uint8_t *buf, int nb_sectors)
 {
+    BlockDriver *drv = bs->drv;
     if (!bs->inserted)
         return -1;
     if (bs->read_only)
@@ -461,12 +479,183 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
     if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
         memcpy(bs->boot_sector_data, buf, 512);   
     }
-    return bs->drv->bdrv_write(bs, sector_num, buf, nb_sectors);
+    if (drv->bdrv_pwrite) {
+        int ret, len;
+        len = nb_sectors * 512;
+        ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len);
+        if (ret < 0)
+            return ret;
+        else if (ret != len)
+            return -EIO;
+        else
+            return 0;
+    } else {
+        return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
+    }
+}
+
+#if 0
+/* not necessary now */
+static int bdrv_pread_em(BlockDriverState *bs, int64_t offset, 
+                         void *buf1, int count1)
+{
+    uint8_t *buf = buf1;
+    uint8_t tmp_buf[SECTOR_SIZE];
+    int len, nb_sectors, count;
+    int64_t sector_num;
+
+    count = count1;
+    /* first read to align to sector start */
+    len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
+    if (len > count)
+        len = count;
+    sector_num = offset >> SECTOR_BITS;
+    if (len > 0) {
+        if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
+            return -EIO;
+        memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
+        count -= len;
+        if (count == 0)
+            return count1;
+        sector_num++;
+        buf += len;
+    }
+
+    /* read the sectors "in place" */
+    nb_sectors = count >> SECTOR_BITS;
+    if (nb_sectors > 0) {
+        if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
+            return -EIO;
+        sector_num += nb_sectors;
+        len = nb_sectors << SECTOR_BITS;
+        buf += len;
+        count -= len;
+    }
+
+    /* add data from the last sector */
+    if (count > 0) {
+        if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
+            return -EIO;
+        memcpy(buf, tmp_buf, count);
+    }
+    return count1;
+}
+
+static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset, 
+                          const void *buf1, int count1)
+{
+    const uint8_t *buf = buf1;
+    uint8_t tmp_buf[SECTOR_SIZE];
+    int len, nb_sectors, count;
+    int64_t sector_num;
+
+    count = count1;
+    /* first write to align to sector start */
+    len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
+    if (len > count)
+        len = count;
+    sector_num = offset >> SECTOR_BITS;
+    if (len > 0) {
+        if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
+            return -EIO;
+        memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
+        if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
+            return -EIO;
+        count -= len;
+        if (count == 0)
+            return count1;
+        sector_num++;
+        buf += len;
+    }
+
+    /* write the sectors "in place" */
+    nb_sectors = count >> SECTOR_BITS;
+    if (nb_sectors > 0) {
+        if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
+            return -EIO;
+        sector_num += nb_sectors;
+        len = nb_sectors << SECTOR_BITS;
+        buf += len;
+        count -= len;
+    }
+
+    /* add data from the last sector */
+    if (count > 0) {
+        if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
+            return -EIO;
+        memcpy(tmp_buf, buf, count);
+        if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
+            return -EIO;
+    }
+    return count1;
+}
+#endif
+
+/**
+ * Read with byte offsets (needed only for file protocols) 
+ */
+int bdrv_pread(BlockDriverState *bs, int64_t offset, 
+               void *buf1, int count1)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv)
+        return -ENOENT;
+    if (!drv->bdrv_pread)
+        return -ENOTSUP;
+    return drv->bdrv_pread(bs, offset, buf1, count1);
+}
+
+/** 
+ * Write with byte offsets (needed only for file protocols) 
+ */
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 
+                const void *buf1, int count1)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv)
+        return -ENOENT;
+    if (!drv->bdrv_pwrite)
+        return -ENOTSUP;
+    return drv->bdrv_pwrite(bs, offset, buf1, count1);
+}
+
+/**
+ * Truncate file to 'offset' bytes (needed only for file protocols)
+ */
+int bdrv_truncate(BlockDriverState *bs, int64_t offset)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv)
+        return -ENOENT;
+    if (!drv->bdrv_truncate)
+        return -ENOTSUP;
+    return drv->bdrv_truncate(bs, offset);
+}
+
+/**
+ * Length of a file in bytes. Return < 0 if error or unknown.
+ */
+int64_t bdrv_getlength(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv)
+        return -ENOENT;
+    if (!drv->bdrv_getlength) {
+        /* legacy mode */
+        return bs->total_sectors * SECTOR_SIZE;
+    }
+    return drv->bdrv_getlength(bs);
 }
 
 void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr)
 {
-    *nb_sectors_ptr = bs->total_sectors;
+    int64_t size;
+    size = bdrv_getlength(bs);
+    if (size < 0)
+        size = 0;
+    *nb_sectors_ptr = size >> SECTOR_BITS;
 }
 
 /* force a given boot sector. */
@@ -660,187 +849,251 @@ void bdrv_info(void)
     }
 }
 
+void bdrv_get_backing_filename(BlockDriverState *bs, 
+                               char *filename, int filename_size)
+{
+    if (!bs->backing_hd) {
+        pstrcpy(filename, filename_size, "");
+    } else {
+        pstrcpy(filename, filename_size, bs->backing_file);
+    }
+}
+
+
 /**************************************************************/
-/* RAW block driver */
+/* async I/Os */
 
-typedef struct BDRVRawState {
-    int fd;
-} BDRVRawState;
+BlockDriverAIOCB *bdrv_aio_new(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    BlockDriverAIOCB *acb;
+    acb = qemu_mallocz(sizeof(BlockDriverAIOCB));
+    if (!acb)
+        return NULL;
+    
+    acb->bs = bs;
+    if (drv->bdrv_aio_new(acb) < 0) {
+        qemu_free(acb);
+        return NULL;
+    }
+    return acb;
+}
 
-static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
+int bdrv_aio_read(BlockDriverAIOCB *acb, int64_t sector_num,
+                  uint8_t *buf, int nb_sectors,
+                  BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return 1; /* maybe */
+    BlockDriverState *bs = acb->bs;
+    BlockDriver *drv = bs->drv;
+
+    if (!bs->inserted)
+        return -1;
+    
+    /* XXX: we assume that nb_sectors == 0 is suppored by the async read */
+    if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+        memcpy(buf, bs->boot_sector_data, 512);
+        sector_num++;
+        nb_sectors--;
+        buf += 512;
+    }
+
+    acb->cb = cb;
+    acb->cb_opaque = opaque;
+    return drv->bdrv_aio_read(acb, sector_num, buf, nb_sectors);
 }
 
-static int raw_open(BlockDriverState *bs, const char *filename)
+int bdrv_aio_write(BlockDriverAIOCB *acb, int64_t sector_num,
+                   const uint8_t *buf, int nb_sectors,
+                   BlockDriverCompletionFunc *cb, void *opaque)
 {
-    BDRVRawState *s = bs->opaque;
-    int fd;
-    int64_t size;
-#ifdef _BSD
-    struct stat sb;
-#endif
-#ifdef __sun__
-    struct dk_minfo minfo;
-    int rv;
-#endif
+    BlockDriverState *bs = acb->bs;
+    BlockDriver *drv = bs->drv;
 
-    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
-    if (fd < 0) {
-        fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
-        if (fd < 0)
+    if (!bs->inserted)
             return -1;
-        bs->read_only = 1;
+    if (bs->read_only)
+        return -1;
+    if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
+        memcpy(bs->boot_sector_data, buf, 512);   
     }
-#ifdef _BSD
-    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
-#ifdef DIOCGMEDIASIZE
-       if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
-#endif
-#ifdef CONFIG_COCOA
-        size = LONG_LONG_MAX;
-#else
-        size = lseek(fd, 0LL, SEEK_END);
-#endif
-    } else
-#endif
-#ifdef __sun__
-    /*
-     * use the DKIOCGMEDIAINFO ioctl to read the size.
-     */
-    rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
-    if ( rv != -1 ) {
-        size = minfo.dki_lbsize * minfo.dki_capacity;
-    } else /* there are reports that lseek on some devices
-              fails, but irc discussion said that contingency
-              on contingency was overkill */
-#endif
+
+    acb->cb = cb;
+    acb->cb_opaque = opaque;
+    return drv->bdrv_aio_write(acb, sector_num, buf, nb_sectors);
+}
+
+void bdrv_aio_cancel(BlockDriverAIOCB *acb)
     {
-        size = lseek(fd, 0, SEEK_END);
+    BlockDriverState *bs = acb->bs;
+    BlockDriver *drv = bs->drv;
+
+    drv->bdrv_aio_cancel(acb);
     }
-#ifdef _WIN32
-    /* On Windows hosts it can happen that we're unable to get file size
-       for CD-ROM raw device (it's inherent limitation of the CDFS driver). */
-    if (size == -1)
-        size = LONG_LONG_MAX;
-#endif
-    bs->total_sectors = size / 512;
-    s->fd = fd;
+
+void bdrv_aio_delete(BlockDriverAIOCB *acb)
+{
+    BlockDriverState *bs = acb->bs;
+    BlockDriver *drv = bs->drv;
+
+    drv->bdrv_aio_delete(acb);
+    qemu_free(acb);
+}
+
+/**************************************************************/
+/* async block device emulation */
+
+#ifdef QEMU_TOOL
+static int bdrv_aio_new_em(BlockDriverAIOCB *acb)
+{
     return 0;
 }
 
-static int raw_read(BlockDriverState *bs, int64_t sector_num, 
+static int bdrv_aio_read_em(BlockDriverAIOCB *acb, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
-    BDRVRawState *s = bs->opaque;
     int ret;
-    
-    lseek(s->fd, sector_num * 512, SEEK_SET);
-    ret = read(s->fd, buf, nb_sectors * 512);
-    if (ret != nb_sectors * 512) 
-        return -1;
+    ret = bdrv_read(acb->bs, sector_num, buf, nb_sectors);
+    acb->cb(acb->cb_opaque, ret);
     return 0;
 }
 
-static int raw_write(BlockDriverState *bs, int64_t sector_num, 
+static int bdrv_aio_write_em(BlockDriverAIOCB *acb, int64_t sector_num,
                      const uint8_t *buf, int nb_sectors)
 {
-    BDRVRawState *s = bs->opaque;
     int ret;
-    
-    lseek(s->fd, sector_num * 512, SEEK_SET);
-    ret = write(s->fd, buf, nb_sectors * 512);
-    if (ret != nb_sectors * 512) 
-        return -1;
+    ret = bdrv_write(acb->bs, sector_num, buf, nb_sectors);
+    acb->cb(acb->cb_opaque, ret);
     return 0;
 }
 
-static void raw_close(BlockDriverState *bs)
+static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb)
 {
-    BDRVRawState *s = bs->opaque;
-    close(s->fd);
 }
 
-#ifdef _WIN32
-#include <windows.h>
-#include <winioctl.h>
-
-int qemu_ftruncate64(int fd, int64_t length)
+static void bdrv_aio_delete_em(BlockDriverAIOCB *acb)
 {
-    LARGE_INTEGER li;
-    LONG high;
-    HANDLE h;
-    BOOL res;
-
-    if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0)
-       return -1;
+}
+#else
+typedef struct BlockDriverAIOCBSync {
+    QEMUBH *bh;
+    int ret;
+} BlockDriverAIOCBSync;
 
-    h = (HANDLE)_get_osfhandle(fd);
+static void bdrv_aio_bh_cb(void *opaque)
+{
+    BlockDriverAIOCB *acb = opaque;
+    BlockDriverAIOCBSync *acb1 = acb->opaque;
+    acb->cb(acb->cb_opaque, acb1->ret);
+}
 
-    /* get current position, ftruncate do not change position */
-    li.HighPart = 0;
-    li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT);
-    if (li.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR)
-       return -1;
+static int bdrv_aio_new_em(BlockDriverAIOCB *acb)
+{
+    BlockDriverAIOCBSync *acb1;
 
-    high = length >> 32;
-    if (!SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN))
-       return -1;
-    res = SetEndOfFile(h);
+    acb1 = qemu_mallocz(sizeof(BlockDriverAIOCBSync));
+    if (!acb1)
+        return -1;
+    acb->opaque = acb1;
+    acb1->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+    return 0;
+}
 
-    /* back to old position */
-    SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN);
-    return res ? 0 : -1;
+static int bdrv_aio_read_em(BlockDriverAIOCB *acb, int64_t sector_num,
+                    uint8_t *buf, int nb_sectors)
+{
+    BlockDriverAIOCBSync *acb1 = acb->opaque;
+    int ret;
+    
+    ret = bdrv_read(acb->bs, sector_num, buf, nb_sectors);
+    acb1->ret = ret;
+    qemu_bh_schedule(acb1->bh);
+    return 0;
 }
 
-static int set_sparse(int fd)
+static int bdrv_aio_write_em(BlockDriverAIOCB *acb, int64_t sector_num,
+                     const uint8_t *buf, int nb_sectors)
 {
-    DWORD returned;
-    return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE,
-                                NULL, 0, NULL, 0, &returned, NULL);
+    BlockDriverAIOCBSync *acb1 = acb->opaque;
+    int ret;
+    
+    ret = bdrv_write(acb->bs, sector_num, buf, nb_sectors);
+    acb1->ret = ret;
+    qemu_bh_schedule(acb1->bh);
+    return 0;
 }
-#else
-static inline int set_sparse(int fd)
+
+static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb)
 {
-    return 1;
+    BlockDriverAIOCBSync *acb1 = acb->opaque;
+    qemu_bh_cancel(acb1->bh);
 }
-#endif
 
-static int raw_create(const char *filename, int64_t total_size,
-                      const char *backing_file, int flags)
+static void bdrv_aio_delete_em(BlockDriverAIOCB *acb)
 {
-    int fd;
+    BlockDriverAIOCBSync *acb1 = acb->opaque;
+    qemu_bh_delete(acb1->bh);
+}
+#endif /* !QEMU_TOOL */
 
-    if (flags || backing_file)
-        return -ENOTSUP;
+/**************************************************************/
+/* sync block device emulation */
 
-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 
-              0644);
-    if (fd < 0)
-        return -EIO;
-    set_sparse(fd);
-    ftruncate(fd, total_size * 512);
-    close(fd);
-    return 0;
+static void bdrv_rw_em_cb(void *opaque, int ret)
+{
+    *(int *)opaque = ret;
 }
 
-static void raw_flush(BlockDriverState *bs)
+#define NOT_DONE 0x7fffffff
+
+static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num, 
+                        uint8_t *buf, int nb_sectors)
 {
-    BDRVRawState *s = bs->opaque;
-    fsync(s->fd);
+    int async_ret, ret;
+
+    if (!bs->sync_aiocb) {
+        bs->sync_aiocb = bdrv_aio_new(bs);
+        if (!bs->sync_aiocb)
+            return -1;
+    }
+    async_ret = NOT_DONE;
+    qemu_aio_wait_start();
+    ret = bdrv_aio_read(bs->sync_aiocb, sector_num, buf, nb_sectors, 
+                        bdrv_rw_em_cb, &async_ret);
+    if (ret < 0) {
+        qemu_aio_wait_end();
+        return ret;
+    }
+    while (async_ret == NOT_DONE) {
+        qemu_aio_wait();
+    }
+    qemu_aio_wait_end();
+    return async_ret;
 }
 
-BlockDriver bdrv_raw = {
-    "raw",
-    sizeof(BDRVRawState),
-    raw_probe,
-    raw_open,
-    raw_read,
-    raw_write,
-    raw_close,
-    raw_create,
-    raw_flush,
-};
+static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
+                         const uint8_t *buf, int nb_sectors)
+{
+    int async_ret, ret;
+
+    if (!bs->sync_aiocb) {
+        bs->sync_aiocb = bdrv_aio_new(bs);
+        if (!bs->sync_aiocb)
+            return -1;
+    }
+    async_ret = NOT_DONE;
+    qemu_aio_wait_start();
+    ret = bdrv_aio_write(bs->sync_aiocb, sector_num, buf, nb_sectors, 
+                         bdrv_rw_em_cb, &async_ret);
+    if (ret < 0) {
+        qemu_aio_wait_end();
+        return ret;
+    }
+    while (async_ret == NOT_DONE) {
+        qemu_aio_wait();
+    }
+    qemu_aio_wait_end();
+    return async_ret;
+}
 
 void bdrv_init(void)
 {
index c2a2e30a9f9cd30ec88afe1c43df4de4e35d416e..e40503ea38486f5de969c04fe2fb1284fd36e89c 100644 (file)
@@ -28,7 +28,7 @@ struct BlockDriver {
     const char *format_name;
     int instance_size;
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
-    int (*bdrv_open)(BlockDriverState *bs, const char *filename);
+    int (*bdrv_open)(BlockDriverState *bs, const char *filename, int flags);
     int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num, 
                      uint8_t *buf, int nb_sectors);
     int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num, 
@@ -41,11 +41,28 @@ struct BlockDriver {
                              int nb_sectors, int *pnum);
     int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
     int (*bdrv_make_empty)(BlockDriverState *bs);
+    /* aio */
+    int (*bdrv_aio_new)(BlockDriverAIOCB *acb);
+    int (*bdrv_aio_read)(BlockDriverAIOCB *acb, int64_t sector_num,
+                              uint8_t *buf, int nb_sectors);
+    int (*bdrv_aio_write)(BlockDriverAIOCB *acb, int64_t sector_num,
+                          const uint8_t *buf, int nb_sectors);
+    void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
+    void (*bdrv_aio_delete)(BlockDriverAIOCB *acb);
+
+    const char *protocol_name;
+    int (*bdrv_pread)(BlockDriverState *bs, int64_t offset, 
+                      uint8_t *buf, int count);
+    int (*bdrv_pwrite)(BlockDriverState *bs, int64_t offset, 
+                       const uint8_t *buf, int count);
+    int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
+    int64_t (*bdrv_getlength)(BlockDriverState *bs);
+
     struct BlockDriver *next;
 };
 
 struct BlockDriverState {
-    int64_t total_sectors;
+    int64_t total_sectors; /* XXX: will be suppressed */
     int read_only; /* if true, the media is read only */
     int inserted; /* if true, the media is present */
     int removable; /* if true, the media can be removed */
@@ -67,6 +84,9 @@ struct BlockDriverState {
     int is_temporary;
     
     BlockDriverState *backing_hd;
+    /* sync read/write emulation */
+
+    BlockDriverAIOCB *sync_aiocb;
     
     /* NOTE: the following infos are only hints for real hardware
        drivers. They are not used by the block driver */
@@ -76,6 +96,14 @@ struct BlockDriverState {
     BlockDriverState *next;
 };
 
+struct BlockDriverAIOCB {
+    BlockDriverState *bs;
+    BlockDriverCompletionFunc *cb;
+    void *cb_opaque;
+    
+    void *opaque; /* driver opaque */
+};
+
 void get_tmp_filename(char *filename, int size);
 
 #endif /* BLOCK_INT_H */
diff --git a/vl.c b/vl.c
index 657116b00563c1c061c6253a9b46006cab17d182..b31b2398a9709f3db0a6f6e552ee1818e91826ed 100644 (file)
--- a/vl.c
+++ b/vl.c
@@ -4771,6 +4771,77 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
+/***********************************************************/
+/* bottom halves (can be seen as timers which expire ASAP) */
+
+struct QEMUBH {
+    QEMUBHFunc *cb;
+    void *opaque;
+    int scheduled;
+    QEMUBH *next;
+};
+
+static QEMUBH *first_bh = NULL;
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+{
+    QEMUBH *bh;
+    bh = qemu_mallocz(sizeof(QEMUBH));
+    if (!bh)
+        return NULL;
+    bh->cb = cb;
+    bh->opaque = opaque;
+    return bh;
+}
+
+void qemu_bh_poll(void)
+{
+    QEMUBH *bh, **pbh;
+
+    for(;;) {
+        pbh = &first_bh;
+        bh = *pbh;
+        if (!bh)
+            break;
+        *pbh = bh->next;
+        bh->scheduled = 0;
+        bh->cb(bh->opaque);
+    }
+}
+
+void qemu_bh_schedule(QEMUBH *bh)
+{
+    CPUState *env = cpu_single_env;
+    if (bh->scheduled)
+        return;
+    bh->scheduled = 1;
+    bh->next = first_bh;
+    first_bh = bh;
+
+    /* stop the currently executing CPU to execute the BH ASAP */
+    if (env) {
+        cpu_interrupt(env, CPU_INTERRUPT_EXIT);
+    }
+}
+
+void qemu_bh_cancel(QEMUBH *bh)
+{
+    QEMUBH **pbh;
+    if (bh->scheduled) {
+        pbh = &first_bh;
+        while (*pbh != bh)
+            pbh = &(*pbh)->next;
+        *pbh = bh->next;
+        bh->scheduled = 0;
+    }
+}
+
+void qemu_bh_delete(QEMUBH *bh)
+{
+    qemu_bh_cancel(bh);
+    qemu_free(bh);
+}
+
 /***********************************************************/
 /* machine registration */
 
@@ -5030,6 +5101,8 @@ void main_loop_wait(int timeout)
 #ifdef _WIN32
     tap_win32_poll();
 #endif
+    qemu_aio_poll();
+    qemu_bh_poll();
 
     if (vm_running) {
         qemu_run_timers(&active_timers[QEMU_TIMER_VIRTUAL], 
@@ -6049,6 +6122,7 @@ int main(int argc, char **argv)
     
     init_timers();
     init_timer_alarm();
+    qemu_aio_init();
 
 #ifdef _WIN32
     socket_init();
@@ -6093,7 +6167,7 @@ int main(int argc, char **argv)
                 snprintf(buf, sizeof(buf), "hd%c", i + 'a');
                 bs_table[i] = bdrv_new(buf);
             }
-            if (bdrv_open(bs_table[i], hd_filename[i], snapshot) < 0) {
+            if (bdrv_open(bs_table[i], hd_filename[i], snapshot ? BDRV_O_SNAPSHOT : 0) < 0) {
                 fprintf(stderr, "qemu: could not open hard disk image '%s'\n",
                         hd_filename[i]);
                 exit(1);
@@ -6118,7 +6192,8 @@ int main(int argc, char **argv)
                 bdrv_set_type_hint(fd_table[i], BDRV_TYPE_FLOPPY);
             }
             if (fd_filename[i] != '\0') {
-                if (bdrv_open(fd_table[i], fd_filename[i], snapshot) < 0) {
+                if (bdrv_open(fd_table[i], fd_filename[i],
+                              snapshot ? BDRV_O_SNAPSHOT : 0) < 0) {
                     fprintf(stderr, "qemu: could not open floppy disk image '%s'\n",
                             fd_filename[i]);
                     exit(1);
diff --git a/vl.h b/vl.h
index d97f485f3c133e2591489a864e74101497ef244b..d653f4683bbff19ca96ee620e626117a0e66e6c1 100644 (file)
--- a/vl.h
+++ b/vl.h
@@ -481,6 +481,16 @@ void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
 void cpu_save(QEMUFile *f, void *opaque);
 int cpu_load(QEMUFile *f, void *opaque, int version_id);
 
+/* bottom halves */
+typedef struct QEMUBH QEMUBH;
+typedef void QEMUBHFunc(void *opaque);
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+void qemu_bh_schedule(QEMUBH *bh);
+void qemu_bh_cancel(QEMUBH *bh);
+void qemu_bh_delete(QEMUBH *bh);
+void qemu_bh_poll(void);
+
 /* block.c */
 typedef struct BlockDriverState BlockDriverState;
 typedef struct BlockDriver BlockDriver;
@@ -495,6 +505,16 @@ extern BlockDriver bdrv_bochs;
 extern BlockDriver bdrv_vpc;
 extern BlockDriver bdrv_vvfat;
 
+#define BDRV_O_RDONLY      0x0000
+#define BDRV_O_RDWR        0x0002
+#define BDRV_O_ACCESS      0x0003
+#define BDRV_O_CREAT       0x0004 /* create an empty file */
+#define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
+#define BDRV_O_FILE        0x0010 /* open as a raw file (do not try to
+                                     use a disk image format on top of
+                                     it (default for
+                                     bdrv_file_open()) */
+
 void bdrv_init(void);
 BlockDriver *bdrv_find_format(const char *format_name);
 int bdrv_create(BlockDriver *drv, 
@@ -502,17 +522,44 @@ int bdrv_create(BlockDriver *drv,
                 const char *backing_file, int flags);
 BlockDriverState *bdrv_new(const char *device_name);
 void bdrv_delete(BlockDriverState *bs);
-int bdrv_open(BlockDriverState *bs, const char *filename, int snapshot);
-int bdrv_open2(BlockDriverState *bs, const char *filename, int snapshot,
+int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
+int bdrv_open(BlockDriverState *bs, const char *filename, int flags);
+int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
                BlockDriver *drv);
 void bdrv_close(BlockDriverState *bs);
 int bdrv_read(BlockDriverState *bs, int64_t sector_num, 
               uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num, 
                const uint8_t *buf, int nb_sectors);
+int bdrv_pread(BlockDriverState *bs, int64_t offset, 
+               void *buf, int count);
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset, 
+                const void *buf, int count);
+int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_getlength(BlockDriverState *bs);
 void bdrv_get_geometry(BlockDriverState *bs, int64_t *nb_sectors_ptr);
 int bdrv_commit(BlockDriverState *bs);
 void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size);
+/* async block I/O */
+typedef struct BlockDriverAIOCB BlockDriverAIOCB;
+typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+
+BlockDriverAIOCB *bdrv_aio_new(BlockDriverState *bs);
+int bdrv_aio_read(BlockDriverAIOCB *acb, int64_t sector_num,
+                  uint8_t *buf, int nb_sectors,
+                  BlockDriverCompletionFunc *cb, void *opaque);
+int bdrv_aio_write(BlockDriverAIOCB *acb, int64_t sector_num,
+                   const uint8_t *buf, int nb_sectors,
+                   BlockDriverCompletionFunc *cb, void *opaque);
+void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+void bdrv_aio_delete(BlockDriverAIOCB *acb);
+
+void qemu_aio_init(void);
+void qemu_aio_poll(void);
+void qemu_aio_wait_start(void);
+void qemu_aio_wait(void);
+void qemu_aio_wait_end(void);
+
 /* Ensure contents are flushed to disk.  */
 void bdrv_flush(BlockDriverState *bs);
 
@@ -551,6 +598,13 @@ const char *bdrv_get_device_name(BlockDriverState *bs);
 int qcow_get_cluster_size(BlockDriverState *bs);
 int qcow_compress_cluster(BlockDriverState *bs, int64_t sector_num,
                           const uint8_t *buf);
+void bdrv_get_backing_filename(BlockDriverState *bs, 
+                               char *filename, int filename_size);
+
+int path_is_absolute(const char *path);
+void path_combine(char *dest, int dest_size,
+                  const char *base_path,
+                  const char *filename);
 
 #ifndef QEMU_TOOL