#include "qemu/cutils.h"
#include "qemu/queue.h"
#include "block.h"
+#include "block/dirty-bitmap.h"
#include "migration/misc.h"
#include "migration.h"
+#include "migration-stats.h"
#include "migration/register.h"
#include "qemu-file.h"
#include "migration/vmstate.h"
#include "sysemu/block-backend.h"
+#include "trace.h"
+#include "options.h"
-#define BLOCK_SIZE (1 << 20)
-#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
+#define BLK_MIG_BLOCK_SIZE (1ULL << 20)
+#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS)
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02
#define MAX_IO_BUFFERS 512
#define MAX_PARALLEL_IO 16
-//#define DEBUG_BLK_MIGRATION
-
-#ifdef DEBUG_BLK_MIGRATION
-#define DPRINTF(fmt, ...) \
- do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
- do { } while (0)
-#endif
-
typedef struct BlkMigDevState {
/* Written during setup phase. Can be read without a lock. */
BlockBackend *blk;
uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
if (block_mig_state.zero_blocks &&
- buffer_is_zero(blk->buf, BLOCK_SIZE)) {
+ buffer_is_zero(blk->buf, BLK_MIG_BLOCK_SIZE)) {
flags |= BLK_MIG_FLAG_ZERO_BLOCK;
}
return;
}
- qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
+ qemu_put_buffer(f, blk->buf, BLK_MIG_BLOCK_SIZE);
}
int blk_mig_active(void)
{
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
- if (sector < blk_nb_sectors(bmds->blk)) {
+ if (sector < bmds->total_sectors) {
return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
(1UL << (chunk % (sizeof(unsigned long) * 8))));
} else {
static void alloc_aio_bitmap(BlkMigDevState *bmds)
{
- BlockBackend *bb = bmds->blk;
int64_t bitmap_size;
- bitmap_size = blk_nb_sectors(bb) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
+ bitmap_size = bmds->total_sectors + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
bmds->aio_bitmap = g_malloc0(bitmap_size);
}
blk = g_new(BlkMigBlock, 1);
- blk->buf = g_malloc(BLOCK_SIZE);
+ blk->buf = g_malloc(BLK_MIG_BLOCK_SIZE);
blk->bmds = bmds;
blk->sector = cur_sector;
blk->nr_sectors = nr_sectors;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk),
- BLOCK_SIZE, NULL, NULL);
+ BLK_MIG_BLOCK_SIZE,
+ NULL, NULL);
if (!bmds->dirty_bitmap) {
ret = -errno;
goto fail;
fail:
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
if (bmds->dirty_bitmap) {
- bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
+ bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
}
}
return ret;
BlkMigDevState *bmds;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- bdrv_release_dirty_bitmap(blk_bs(bmds->blk), bmds->dirty_bitmap);
+ bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
}
}
bmds->bulk_completed = 0;
bmds->total_sectors = sectors;
bmds->completed_sectors = 0;
- bmds->shared_base = migrate_use_block_incremental();
+ bmds->shared_base = migrate_block_incremental();
assert(i < num_bs);
bmds_bs[i].bmds = bmds;
block_mig_state.total_sector_sum += sectors;
if (bmds->shared_base) {
- DPRINTF("Start migration for %s with shared base image\n",
- bdrv_get_device_name(bs));
+ trace_migration_block_init_shared(bdrv_get_device_name(bs));
} else {
- DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
+ trace_migration_block_init_full(bdrv_get_device_name(bs));
}
QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
block_mig_state.prev_progress = progress;
qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
| BLK_MIG_FLAG_PROGRESS);
- DPRINTF("Completed %d %%\r", progress);
+ trace_migration_block_progression(progress);
}
return ret;
bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap);
blk = g_new(BlkMigBlock, 1);
- blk->buf = g_malloc(BLOCK_SIZE);
+ blk->buf = g_malloc(BLK_MIG_BLOCK_SIZE);
blk->bmds = bmds;
blk->sector = sector;
blk->nr_sectors = nr_sectors;
bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
blk_mig_unlock();
} else {
- ret = blk_pread(bmds->blk, sector * BDRV_SECTOR_SIZE, blk->buf,
- nr_sectors * BDRV_SECTOR_SIZE);
+ ret = blk_pread(bmds->blk, sector * BDRV_SECTOR_SIZE,
+ nr_sectors * BDRV_SECTOR_SIZE, blk->buf, 0);
if (ret < 0) {
goto error;
}
return (bmds->cur_dirty >= bmds->total_sectors);
error:
- DPRINTF("Error reading sector %" PRId64 "\n", sector);
+ trace_migration_block_save_device_dirty(sector);
g_free(blk->buf);
g_free(blk);
return ret;
BlkMigBlock *blk;
int ret = 0;
- DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
- __func__, block_mig_state.submitted, block_mig_state.read_done,
- block_mig_state.transferred);
+ trace_migration_block_flush_blks("Enter", block_mig_state.submitted,
+ block_mig_state.read_done,
+ block_mig_state.transferred);
blk_mig_lock();
while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
- if (qemu_file_rate_limit(f)) {
+ if (migration_rate_exceeded(f)) {
break;
}
if (blk->ret < 0) {
}
blk_mig_unlock();
- DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __func__,
- block_mig_state.submitted, block_mig_state.read_done,
- block_mig_state.transferred);
+ trace_migration_block_flush_blks("Exit", block_mig_state.submitted,
+ block_mig_state.read_done,
+ block_mig_state.transferred);
return ret;
}
{
int ret;
- DPRINTF("Enter save live setup submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
+ trace_migration_block_save("setup", block_mig_state.submitted,
+ block_mig_state.transferred);
qemu_mutex_lock_iothread();
ret = init_blk_migration(f);
static int block_save_iterate(QEMUFile *f, void *opaque)
{
int ret;
- int64_t last_ftell = qemu_ftell(f);
- int64_t delta_ftell;
+ uint64_t last_bytes = qemu_file_transferred(f);
- DPRINTF("Enter save live iterate submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
+ trace_migration_block_save("iterate", block_mig_state.submitted,
+ block_mig_state.transferred);
ret = flush_blks(f);
if (ret) {
/* control the rate of transfer */
blk_mig_lock();
- while (block_mig_state.read_done * BLOCK_SIZE <
- qemu_file_get_rate_limit(f) &&
+ while (block_mig_state.read_done * BLK_MIG_BLOCK_SIZE <
+ migration_rate_get() &&
block_mig_state.submitted < MAX_PARALLEL_IO &&
(block_mig_state.submitted + block_mig_state.read_done) <
MAX_IO_BUFFERS) {
}
qemu_put_be64(f, BLK_MIG_FLAG_EOS);
- delta_ftell = qemu_ftell(f) - last_ftell;
- if (delta_ftell > 0) {
- return 1;
- } else if (delta_ftell < 0) {
- return -1;
- } else {
- return 0;
- }
+ uint64_t delta_bytes = qemu_file_transferred(f) - last_bytes;
+ return (delta_bytes > 0);
}
/* Called with iothread lock taken. */
{
int ret;
- DPRINTF("Enter save live complete submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
+ trace_migration_block_save("complete", block_mig_state.submitted,
+ block_mig_state.transferred);
ret = flush_blks(f);
if (ret) {
/* report completion */
qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
- DPRINTF("Block migration completed\n");
+ trace_migration_block_save_complete();
qemu_put_be64(f, BLK_MIG_FLAG_EOS);
return 0;
}
-static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *res_precopy_only,
- uint64_t *res_compatible,
- uint64_t *res_postcopy_only)
+static void block_state_pending(void *opaque, uint64_t *must_precopy,
+ uint64_t *can_postcopy)
{
/* Estimate pending number of bytes to send */
uint64_t pending;
qemu_mutex_unlock_iothread();
blk_mig_lock();
- pending += block_mig_state.submitted * BLOCK_SIZE +
- block_mig_state.read_done * BLOCK_SIZE;
+ pending += block_mig_state.submitted * BLK_MIG_BLOCK_SIZE +
+ block_mig_state.read_done * BLK_MIG_BLOCK_SIZE;
blk_mig_unlock();
/* Report at least one block pending during bulk phase */
- if (pending <= max_size && !block_mig_state.bulk_completed) {
- pending = max_size + BLOCK_SIZE;
+ if (!pending && !block_mig_state.bulk_completed) {
+ pending = BLK_MIG_BLOCK_SIZE;
}
- DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
+ trace_migration_block_state_pending(pending);
/* We don't do postcopy */
- *res_precopy_only += pending;
+ *must_precopy += pending;
}
static int block_load(QEMUFile *f, void *opaque, int version_id)
int nr_sectors;
int ret;
BlockDriverInfo bdi;
- int cluster_size = BLOCK_SIZE;
+ int cluster_size = BLK_MIG_BLOCK_SIZE;
do {
addr = qemu_get_be64(f);
return -EINVAL;
}
- blk_invalidate_cache(blk, &local_err);
+ blk_activate(blk, &local_err);
if (local_err) {
error_report_err(local_err);
return -EINVAL;
ret = bdrv_get_info(blk_bs(blk), &bdi);
if (ret == 0 && bdi.cluster_size > 0 &&
- bdi.cluster_size <= BLOCK_SIZE &&
- BLOCK_SIZE % bdi.cluster_size == 0) {
+ bdi.cluster_size <= BLK_MIG_BLOCK_SIZE &&
+ BLK_MIG_BLOCK_SIZE % bdi.cluster_size == 0) {
cluster_size = bdi.cluster_size;
} else {
- cluster_size = BLOCK_SIZE;
+ cluster_size = BLK_MIG_BLOCK_SIZE;
}
}
int64_t cur_addr;
uint8_t *cur_buf;
- buf = g_malloc(BLOCK_SIZE);
- qemu_get_buffer(f, buf, BLOCK_SIZE);
- for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
+ buf = g_malloc(BLK_MIG_BLOCK_SIZE);
+ qemu_get_buffer(f, buf, BLK_MIG_BLOCK_SIZE);
+ for (i = 0; i < BLK_MIG_BLOCK_SIZE / cluster_size; i++) {
cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
cur_buf = buf + i * cluster_size;
if ((!block_mig_state.zero_blocks ||
- cluster_size < BLOCK_SIZE) &&
+ cluster_size < BLK_MIG_BLOCK_SIZE) &&
buffer_is_zero(cur_buf, cluster_size)) {
ret = blk_pwrite_zeroes(blk, cur_addr,
cluster_size,
BDRV_REQ_MAY_UNMAP);
} else {
- ret = blk_pwrite(blk, cur_addr, cur_buf,
- cluster_size, 0);
+ ret = blk_pwrite(blk, cur_addr, cluster_size, cur_buf,
+ 0);
}
if (ret < 0) {
break;
(addr == 100) ? '\n' : '\r');
fflush(stdout);
} else if (!(flags & BLK_MIG_FLAG_EOS)) {
- fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
+ fprintf(stderr, "Unknown block migration flags: 0x%x\n", flags);
return -EINVAL;
}
ret = qemu_file_get_error(f);
static bool block_is_active(void *opaque)
{
- return migrate_use_block();
+ return migrate_block();
}
static SaveVMHandlers savevm_block_handlers = {
.save_setup = block_save_setup,
.save_live_iterate = block_save_iterate,
.save_live_complete_precopy = block_save_complete,
- .save_live_pending = block_save_pending,
+ .state_pending_exact = block_state_pending,
+ .state_pending_estimate = block_state_pending,
.load_state = block_load,
.save_cleanup = block_migration_cleanup,
.is_active = block_is_active,