X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=block-migration.c;h=6acf3e1682f401025906d1342a54ca2ee13086f2;hb=bf4229d3cb09be2efc0add569feba33834fc5d93;hp=483ca7bab9cb32047fb663084db82e995b52150c;hpb=8f794c557c4b51c7a957d47ef6a2230114bb9e79;p=mirror_qemu.git diff --git a/block-migration.c b/block-migration.c index 483ca7bab9..6acf3e1682 100644 --- a/block-migration.c +++ b/block-migration.c @@ -9,16 +9,18 @@ * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. */ #include "qemu-common.h" -#include "block_int.h" +#include "block/block_int.h" #include "hw/hw.h" -#include "qemu-queue.h" -#include "qemu-timer.h" -#include "monitor.h" -#include "block-migration.h" -#include "migration.h" +#include "qemu/queue.h" +#include "qemu/timer.h" +#include "migration/block.h" +#include "migration/migration.h" +#include "sysemu/blockdev.h" #include #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS) @@ -61,7 +63,6 @@ typedef struct BlkMigBlock { QEMUIOVector qiov; BlockDriverAIOCB *aiocb; int ret; - int64_t time; QSIMPLEQ_ENTRY(BlkMigBlock) entry; } BlkMigBlock; @@ -76,8 +77,7 @@ typedef struct BlkMigState { int64_t total_sector_sum; int prev_progress; int bulk_completed; - long double total_time; - int reads; + long double prev_time_offset; } BlkMigState; static BlkMigState block_mig_state; @@ -130,18 +130,6 @@ uint64_t blk_mig_bytes_total(void) return sum << BDRV_SECTOR_BITS; } -static inline void add_avg_read_time(int64_t time) -{ - block_mig_state.reads++; - block_mig_state.total_time += time; -} - -static inline long double compute_read_bwidth(void) -{ - assert(block_mig_state.total_time != 0); - return (block_mig_state.reads * BLOCK_SIZE)/ block_mig_state.total_time; -} - static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector) { int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; @@ -185,18 +173,17 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds) BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1; bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8; - bmds->aio_bitmap = qemu_mallocz(bitmap_size); + bmds->aio_bitmap = g_malloc0(bitmap_size); } static void blk_mig_read_cb(void *opaque, int ret) { + long double curr_time = qemu_get_clock_ns(rt_clock); BlkMigBlock *blk = opaque; blk->ret = ret; - blk->time = qemu_get_clock_ns(rt_clock) - blk->time; - - add_avg_read_time(blk->time); + block_mig_state.prev_time_offset = curr_time; QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry); bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0); @@ -206,8 +193,7 @@ static void blk_mig_read_cb(void *opaque, int ret) assert(block_mig_state.submitted >= 0); } -static int mig_save_device_bulk(Monitor *mon, QEMUFile *f, - BlkMigDevState *bmds) +static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) { int64_t total_sectors = bmds->total_sectors; int64_t cur_sector = bmds->cur_sector; @@ -239,8 +225,8 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f, nr_sectors = total_sectors - cur_sector; } - blk = qemu_malloc(sizeof(BlkMigBlock)); - blk->buf = qemu_malloc(BLOCK_SIZE); + blk = g_malloc(sizeof(BlkMigBlock)); + blk->buf = g_malloc(BLOCK_SIZE); blk->bmds = bmds; blk->sector = cur_sector; blk->nr_sectors = nr_sectors; @@ -249,26 +235,18 @@ static int mig_save_device_bulk(Monitor *mon, QEMUFile *f, blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); - blk->time = qemu_get_clock_ns(rt_clock); + if (block_mig_state.submitted == 0) { + block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock); + } blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); - if (!blk->aiocb) { - goto error; - } block_mig_state.submitted++; bdrv_reset_dirty(bs, cur_sector, nr_sectors); bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); - -error: - monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector); - qemu_file_set_error(f); - qemu_free(blk->buf); - qemu_free(blk); - return 0; } static void set_dirty_tracking(int enable) @@ -282,7 +260,6 @@ static void set_dirty_tracking(int enable) static void init_blk_migration_it(void *opaque, BlockDriverState *bs) { - Monitor *mon = opaque; BlkMigDevState *bmds; int64_t sectors; @@ -292,30 +269,30 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) return; } - bmds = qemu_mallocz(sizeof(BlkMigDevState)); + bmds = g_malloc0(sizeof(BlkMigDevState)); bmds->bs = bs; bmds->bulk_completed = 0; bmds->total_sectors = sectors; bmds->completed_sectors = 0; bmds->shared_base = block_mig_state.shared_base; alloc_aio_bitmap(bmds); + drive_get_ref(drive_get_by_blockdev(bs)); + bdrv_set_in_use(bs, 1); block_mig_state.total_sector_sum += sectors; if (bmds->shared_base) { - monitor_printf(mon, "Start migration for %s with shared base " - "image\n", - bs->device_name); + DPRINTF("Start migration for %s with shared base image\n", + bs->device_name); } else { - monitor_printf(mon, "Start full migration for %s\n", - bs->device_name); + DPRINTF("Start full migration for %s\n", bs->device_name); } QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry); } } -static void init_blk_migration(Monitor *mon, QEMUFile *f) +static void init_blk_migration(QEMUFile *f) { block_mig_state.submitted = 0; block_mig_state.read_done = 0; @@ -323,13 +300,11 @@ static void init_blk_migration(Monitor *mon, QEMUFile *f) block_mig_state.total_sector_sum = 0; block_mig_state.prev_progress = -1; block_mig_state.bulk_completed = 0; - block_mig_state.total_time = 0; - block_mig_state.reads = 0; - bdrv_iterate(init_blk_migration_it, mon); + bdrv_iterate(init_blk_migration_it, NULL); } -static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f) +static int blk_mig_save_bulked_block(QEMUFile *f) { int64_t completed_sector_sum = 0; BlkMigDevState *bmds; @@ -338,7 +313,7 @@ static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f) QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { if (bmds->bulk_completed == 0) { - if (mig_save_device_bulk(mon, f, bmds) == 1) { + if (mig_save_device_bulk(f, bmds) == 1) { /* completed bulk section for this device */ bmds->bulk_completed = 1; } @@ -360,8 +335,7 @@ static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f) block_mig_state.prev_progress = progress; qemu_put_be64(f, (progress << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); - monitor_printf(mon, "Completed %d %%\r", progress); - monitor_flush(mon); + DPRINTF("Completed %d %%\r", progress); } return ret; @@ -376,17 +350,18 @@ static void blk_mig_reset_dirty_cursor(void) } } -static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, - BlkMigDevState *bmds, int is_async) +static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, + int is_async) { BlkMigBlock *blk; int64_t total_sectors = bmds->total_sectors; int64_t sector; int nr_sectors; + int ret = -EIO; for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) { if (bmds_aio_inflight(bmds, sector)) { - qemu_aio_flush(); + bdrv_drain_all(); } if (bdrv_get_dirty(bmds->bs, sector)) { @@ -395,8 +370,8 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, } else { nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - blk = qemu_malloc(sizeof(BlkMigBlock)); - blk->buf = qemu_malloc(BLOCK_SIZE); + blk = g_malloc(sizeof(BlkMigBlock)); + blk->buf = g_malloc(BLOCK_SIZE); blk->bmds = bmds; blk->sector = sector; blk->nr_sectors = nr_sectors; @@ -406,24 +381,23 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); - blk->time = qemu_get_clock_ns(rt_clock); + if (block_mig_state.submitted == 0) { + block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock); + } blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); - if (!blk->aiocb) { - goto error; - } block_mig_state.submitted++; bmds_set_aio_inflight(bmds, sector, nr_sectors, 1); } else { - if (bdrv_read(bmds->bs, sector, blk->buf, - nr_sectors) < 0) { + ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); + if (ret < 0) { goto error; } blk_send(f, blk); - qemu_free(blk->buf); - qemu_free(blk); + g_free(blk->buf); + g_free(blk); } bdrv_reset_dirty(bmds->bs, sector, nr_sectors); @@ -436,21 +410,24 @@ static int mig_save_device_dirty(Monitor *mon, QEMUFile *f, return (bmds->cur_dirty >= bmds->total_sectors); error: - monitor_printf(mon, "Error reading sector %" PRId64 "\n", sector); - qemu_file_set_error(f); - qemu_free(blk->buf); - qemu_free(blk); - return 0; + DPRINTF("Error reading sector %" PRId64 "\n", sector); + g_free(blk->buf); + g_free(blk); + return ret; } -static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async) +/* return value: + * 0: too much data for max_downtime + * 1: few enough data for max_downtime +*/ +static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) { BlkMigDevState *bmds; - int ret = 0; + int ret = 1; QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { - if (mig_save_device_dirty(mon, f, bmds, is_async) == 0) { - ret = 1; + ret = mig_save_device_dirty(f, bmds, is_async); + if (ret <= 0) { break; } } @@ -458,9 +435,10 @@ static int blk_mig_save_dirty_block(Monitor *mon, QEMUFile *f, int is_async) return ret; } -static void flush_blks(QEMUFile* f) +static int flush_blks(QEMUFile *f) { BlkMigBlock *blk; + int ret = 0; DPRINTF("%s Enter submitted %d read_done %d transferred %d\n", __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done, @@ -471,14 +449,14 @@ static void flush_blks(QEMUFile* f) break; } if (blk->ret < 0) { - qemu_file_set_error(f); + ret = blk->ret; break; } blk_send(f, blk); QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); - qemu_free(blk->buf); - qemu_free(blk); + g_free(blk->buf); + g_free(blk); block_mig_state.read_done--; block_mig_state.transferred++; @@ -488,6 +466,7 @@ static void flush_blks(QEMUFile* f) DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done, block_mig_state.transferred); + return ret; } static int64_t get_remaining_dirty(void) @@ -502,134 +481,152 @@ static int64_t get_remaining_dirty(void) return dirty * BLOCK_SIZE; } -static int is_stage2_completed(void) -{ - int64_t remaining_dirty; - long double bwidth; - - if (block_mig_state.bulk_completed == 1) { - - remaining_dirty = get_remaining_dirty(); - if (remaining_dirty == 0) { - return 1; - } - - bwidth = compute_read_bwidth(); - - if ((remaining_dirty / bwidth) <= - migrate_max_downtime()) { - /* finish stage2 because we think that we can finish remaing work - below max_downtime */ - - return 1; - } - } - - return 0; -} - -static void blk_mig_cleanup(Monitor *mon) +static void blk_mig_cleanup(void) { BlkMigDevState *bmds; BlkMigBlock *blk; + bdrv_drain_all(); + set_dirty_tracking(0); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); - qemu_free(bmds->aio_bitmap); - qemu_free(bmds); + bdrv_set_in_use(bmds->bs, 0); + drive_put_ref(drive_get_by_blockdev(bmds->bs)); + g_free(bmds->aio_bitmap); + g_free(bmds); } while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); - qemu_free(blk->buf); - qemu_free(blk); + g_free(blk->buf); + g_free(blk); } +} - monitor_printf(mon, "\n"); +static void block_migration_cancel(void *opaque) +{ + blk_mig_cleanup(); } -static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque) +static int block_save_setup(QEMUFile *f, void *opaque) { - DPRINTF("Enter save live stage %d submitted %d transferred %d\n", - stage, block_mig_state.submitted, block_mig_state.transferred); + int ret; - if (stage < 0) { - blk_mig_cleanup(mon); - return 0; - } + DPRINTF("Enter save live setup submitted %d transferred %d\n", + block_mig_state.submitted, block_mig_state.transferred); - if (block_mig_state.blk_enable != 1) { - /* no need to migrate storage */ - qemu_put_be64(f, BLK_MIG_FLAG_EOS); - return 1; - } + init_blk_migration(f); - if (stage == 1) { - init_blk_migration(mon, f); + /* start track dirty blocks */ + set_dirty_tracking(1); - /* start track dirty blocks */ - set_dirty_tracking(1); + ret = flush_blks(f); + if (ret) { + blk_mig_cleanup(); + return ret; } - flush_blks(f); + blk_mig_reset_dirty_cursor(); - if (qemu_file_has_error(f)) { - blk_mig_cleanup(mon); - return 0; + qemu_put_be64(f, BLK_MIG_FLAG_EOS); + + return 0; +} + +static int block_save_iterate(QEMUFile *f, void *opaque) +{ + int ret; + + DPRINTF("Enter save live iterate submitted %d transferred %d\n", + block_mig_state.submitted, block_mig_state.transferred); + + ret = flush_blks(f); + if (ret) { + blk_mig_cleanup(); + return ret; } blk_mig_reset_dirty_cursor(); - if (stage == 2) { - /* control the rate of transfer */ - while ((block_mig_state.submitted + - block_mig_state.read_done) * BLOCK_SIZE < - qemu_file_get_rate_limit(f)) { - if (block_mig_state.bulk_completed == 0) { - /* first finish the bulk phase */ - if (blk_mig_save_bulked_block(mon, f) == 0) { - /* finished saving bulk on all devices */ - block_mig_state.bulk_completed = 1; - } - } else { - if (blk_mig_save_dirty_block(mon, f, 1) == 0) { - /* no more dirty blocks */ - break; - } + /* control the rate of transfer */ + while ((block_mig_state.submitted + + block_mig_state.read_done) * BLOCK_SIZE < + qemu_file_get_rate_limit(f)) { + if (block_mig_state.bulk_completed == 0) { + /* first finish the bulk phase */ + if (blk_mig_save_bulked_block(f) == 0) { + /* finished saving bulk on all devices */ + block_mig_state.bulk_completed = 1; + } + } else { + ret = blk_mig_save_dirty_block(f, 1); + if (ret != 0) { + /* no more dirty blocks */ + break; } } + } + if (ret) { + blk_mig_cleanup(); + return ret; + } - flush_blks(f); - - if (qemu_file_has_error(f)) { - blk_mig_cleanup(mon); - return 0; - } + ret = flush_blks(f); + if (ret) { + blk_mig_cleanup(); + return ret; } - if (stage == 3) { - /* we know for sure that save bulk is completed and - all async read completed */ - assert(block_mig_state.submitted == 0); + qemu_put_be64(f, BLK_MIG_FLAG_EOS); - while (blk_mig_save_dirty_block(mon, f, 0) != 0); - blk_mig_cleanup(mon); + return 0; +} - /* report completion */ - qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); +static int block_save_complete(QEMUFile *f, void *opaque) +{ + int ret; - if (qemu_file_has_error(f)) { - return 0; - } + DPRINTF("Enter save live complete submitted %d transferred %d\n", + block_mig_state.submitted, block_mig_state.transferred); - monitor_printf(mon, "Block migration completed\n"); + ret = flush_blks(f); + if (ret) { + blk_mig_cleanup(); + return ret; } + blk_mig_reset_dirty_cursor(); + + /* we know for sure that save bulk is completed and + all async read completed */ + assert(block_mig_state.submitted == 0); + + do { + ret = blk_mig_save_dirty_block(f, 0); + } while (ret == 0); + + blk_mig_cleanup(); + if (ret) { + return ret; + } + /* report completion */ + qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); + + DPRINTF("Block migration completed\n"); + qemu_put_be64(f, BLK_MIG_FLAG_EOS); - return ((stage == 2) && is_stage2_completed()); + return 0; +} + +static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +{ + + DPRINTF("Enter save live pending %ld\n", get_remaining_dirty()); + + return get_remaining_dirty(); } static int block_load(QEMUFile *f, void *opaque, int version_id) @@ -642,6 +639,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) uint8_t *buf; int64_t total_sectors = 0; int nr_sectors; + int ret; do { addr = qemu_get_be64(f); @@ -650,7 +648,6 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) addr >>= BDRV_SECTOR_BITS; if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) { - int ret; /* get device name */ len = qemu_get_byte(f); qemu_get_buffer(f, (uint8_t *)device_name, len); @@ -667,7 +664,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) bs_prev = bs; total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; if (total_sectors <= 0) { - error_report("Error getting length of block device %s\n", + error_report("Error getting length of block device %s", device_name); return -EINVAL; } @@ -679,12 +676,12 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - buf = qemu_malloc(BLOCK_SIZE); + buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); ret = bdrv_write(bs, addr, buf, nr_sectors); - qemu_free(buf); + g_free(buf); if (ret < 0) { return ret; } @@ -700,28 +697,45 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) fprintf(stderr, "Unknown flags\n"); return -EINVAL; } - if (qemu_file_has_error(f)) { - return -EIO; + ret = qemu_file_get_error(f); + if (ret != 0) { + return ret; } } while (!(flags & BLK_MIG_FLAG_EOS)); return 0; } -static void block_set_params(int blk_enable, int shared_base, void *opaque) +static void block_set_params(const MigrationParams *params, void *opaque) { - block_mig_state.blk_enable = blk_enable; - block_mig_state.shared_base = shared_base; + block_mig_state.blk_enable = params->blk; + block_mig_state.shared_base = params->shared; /* shared base means that blk_enable = 1 */ - block_mig_state.blk_enable |= shared_base; + block_mig_state.blk_enable |= params->shared; } +static bool block_is_active(void *opaque) +{ + return block_mig_state.blk_enable == 1; +} + +SaveVMHandlers savevm_block_handlers = { + .set_params = block_set_params, + .save_live_setup = block_save_setup, + .save_live_iterate = block_save_iterate, + .save_live_complete = block_save_complete, + .save_live_pending = block_save_pending, + .load_state = block_load, + .cancel = block_migration_cancel, + .is_active = block_is_active, +}; + void blk_mig_init(void) { QSIMPLEQ_INIT(&block_mig_state.bmds_list); QSIMPLEQ_INIT(&block_mig_state.blk_list); - register_savevm_live(NULL, "block", 0, 1, block_set_params, - block_save_live, NULL, block_load, &block_mig_state); + register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers, + &block_mig_state); }