X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block-migration.c;h=daf9ec1eab77cfadc35cf70d8ea369934f1c0f29;hb=60aad298cb6de52f2716b2e82e1353ea9de95fd6;hp=b726c6c0025cbaa82e8ca676a5b2955d68d544d2;hpb=52e850dea988585c3d693fd9cd4a4c38968d89b8;p=qemu.git diff --git a/block-migration.c b/block-migration.c index b726c6c00..daf9ec1ea 100644 --- a/block-migration.c +++ b/block-migration.c @@ -29,6 +29,7 @@ #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01 #define BLK_MIG_FLAG_EOS 0x02 #define BLK_MIG_FLAG_PROGRESS 0x04 +#define BLK_MIG_FLAG_ZERO_BLOCK 0x08 #define MAX_IS_ALLOCATED_SEARCH 65536 @@ -80,6 +81,7 @@ typedef struct BlkMigState { int shared_base; QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; int64_t total_sector_sum; + bool zero_blocks; /* Protected by lock. */ QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; @@ -107,19 +109,37 @@ static void blk_mig_unlock(void) qemu_mutex_unlock(&block_mig_state.lock); } +/* Must run outside of the iothread lock during the bulk phase, + * or the VM will stall. + */ + static void blk_send(QEMUFile *f, BlkMigBlock * blk) { int len; + uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK; + + if (block_mig_state.zero_blocks && + buffer_is_zero(blk->buf, BLOCK_SIZE)) { + flags |= BLK_MIG_FLAG_ZERO_BLOCK; + } /* sector number and flags */ qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS) - | BLK_MIG_FLAG_DEVICE_BLOCK); + | flags); /* device name */ len = strlen(blk->bmds->bs->device_name); qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len); + /* if a block is zero we need to flush here since the network + * bandwidth is now a lot higher than the storage device bandwidth. + * thus if we queue zero blocks we slow down the migration */ + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { + qemu_fflush(f); + return; + } + qemu_put_buffer(f, blk->buf, BLOCK_SIZE); } @@ -226,6 +246,8 @@ static void blk_mig_read_cb(void *opaque, int ret) blk_mig_unlock(); } +/* Called with no lock taken. */ + static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) { int64_t total_sectors = bmds->total_sectors; @@ -235,11 +257,13 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) int nr_sectors; if (bmds->shared_base) { + qemu_mutex_lock_iothread(); while (cur_sector < total_sectors && !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { cur_sector += nr_sectors; } + qemu_mutex_unlock_iothread(); } if (cur_sector >= total_sectors) { @@ -272,15 +296,19 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) block_mig_state.submitted++; blk_mig_unlock(); + qemu_mutex_lock_iothread(); blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); bdrv_reset_dirty(bs, cur_sector, nr_sectors); - bmds->cur_sector = cur_sector + nr_sectors; + qemu_mutex_unlock_iothread(); + bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); } +/* Called with iothread lock taken. */ + static void set_dirty_tracking(int enable) { BlkMigDevState *bmds; @@ -308,8 +336,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) bmds->completed_sectors = 0; bmds->shared_base = block_mig_state.shared_base; alloc_aio_bitmap(bmds); - drive_get_ref(drive_get_by_blockdev(bs)); bdrv_set_in_use(bs, 1); + bdrv_ref(bs); block_mig_state.total_sector_sum += sectors; @@ -332,10 +360,13 @@ static void init_blk_migration(QEMUFile *f) block_mig_state.total_sector_sum = 0; block_mig_state.prev_progress = -1; block_mig_state.bulk_completed = 0; + block_mig_state.zero_blocks = migrate_zero_blocks(); bdrv_iterate(init_blk_migration_it, NULL); } +/* Called with no lock taken. */ + static int blk_mig_save_bulked_block(QEMUFile *f) { int64_t completed_sector_sum = 0; @@ -382,6 +413,8 @@ static void blk_mig_reset_dirty_cursor(void) } } +/* Called with iothread lock taken. */ + static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, int is_async) { @@ -451,7 +484,9 @@ error: return ret; } -/* return value: +/* Called with iothread lock taken. + * + * return value: * 0: too much data for max_downtime * 1: few enough data for max_downtime */ @@ -470,6 +505,8 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) return ret; } +/* Called with no locks taken. */ + static int flush_blks(QEMUFile *f) { BlkMigBlock *blk; @@ -509,6 +546,8 @@ static int flush_blks(QEMUFile *f) return ret; } +/* Called with iothread lock taken. */ + static int64_t get_remaining_dirty(void) { BlkMigDevState *bmds; @@ -521,6 +560,8 @@ static int64_t get_remaining_dirty(void) return dirty << BDRV_SECTOR_BITS; } +/* Called with iothread lock taken. */ + static void blk_mig_cleanup(void) { BlkMigDevState *bmds; @@ -534,7 +575,7 @@ static void blk_mig_cleanup(void) while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); bdrv_set_in_use(bmds->bs, 0); - drive_put_ref(drive_get_by_blockdev(bmds->bs)); + bdrv_unref(bmds->bs); g_free(bmds->aio_bitmap); g_free(bmds); } @@ -559,10 +600,12 @@ static int block_save_setup(QEMUFile *f, void *opaque) DPRINTF("Enter save live setup submitted %d transferred %d\n", block_mig_state.submitted, block_mig_state.transferred); + qemu_mutex_lock_iothread(); init_blk_migration(f); /* start track dirty blocks */ set_dirty_tracking(1); + qemu_mutex_unlock_iothread(); ret = flush_blks(f); blk_mig_reset_dirty_cursor(); @@ -600,7 +643,12 @@ static int block_save_iterate(QEMUFile *f, void *opaque) } ret = 0; } else { + /* Always called with iothread lock taken for + * simplicity, block_save_complete also calls it. + */ + qemu_mutex_lock_iothread(); ret = blk_mig_save_dirty_block(f, 1); + qemu_mutex_unlock_iothread(); } if (ret < 0) { return ret; @@ -622,6 +670,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque) return qemu_ftell(f) - last_ftell; } +/* Called with iothread lock taken. */ + static int block_save_complete(QEMUFile *f, void *opaque) { int ret; @@ -665,6 +715,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) /* Estimate pending number of bytes to send */ uint64_t pending; + qemu_mutex_lock_iothread(); blk_mig_lock(); pending = get_remaining_dirty() + block_mig_state.submitted * BLOCK_SIZE + @@ -675,6 +726,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) pending = BLOCK_SIZE; } blk_mig_unlock(); + qemu_mutex_unlock_iothread(); DPRINTF("Enter save live pending %" PRIu64 "\n", pending); return pending; @@ -727,12 +779,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - buf = g_malloc(BLOCK_SIZE); - - qemu_get_buffer(f, buf, BLOCK_SIZE); - ret = bdrv_write(bs, addr, buf, nr_sectors); + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { + ret = bdrv_write_zeroes(bs, addr, nr_sectors); + } else { + buf = g_malloc(BLOCK_SIZE); + qemu_get_buffer(f, buf, BLOCK_SIZE); + ret = bdrv_write(bs, addr, buf, nr_sectors); + g_free(buf); + } - g_free(buf); if (ret < 0) { return ret; }