X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=arch_init.c;h=9dacf5689bce20089c3e456b4fab3c38ef9db771;hb=1de7afc984b49af164e2619e6850b9732b173b34;hp=26f30ef987620b6ee4aaaa527f7e13907f88b504;hpb=5e3bc7144edd6e4fa2824944e5eb16c28197dd5a;p=qemu.git diff --git a/arch_init.c b/arch_init.c index 26f30ef98..9dacf5689 100644 --- a/arch_init.c +++ b/arch_init.c @@ -29,20 +29,25 @@ #include #endif #include "config.h" -#include "monitor.h" +#include "monitor/monitor.h" #include "sysemu.h" +#include "qemu/bitops.h" +#include "qemu/bitmap.h" #include "arch_init.h" #include "audio/audio.h" #include "hw/pc.h" -#include "hw/pci.h" +#include "hw/pci/pci.h" #include "hw/audiodev.h" #include "kvm.h" -#include "migration.h" -#include "net.h" -#include "gdbstub.h" +#include "migration/migration.h" +#include "exec/gdbstub.h" #include "hw/smbios.h" -#include "exec-memory.h" +#include "exec/address-spaces.h" #include "hw/pcspk.h" +#include "migration/page_cache.h" +#include "qemu/config-file.h" +#include "qmp-commands.h" +#include "trace.h" #ifdef DEBUG_ARCH_INIT #define DPRINTF(fmt, ...) \ @@ -91,6 +96,8 @@ int graphic_depth = 15; #define QEMU_ARCH QEMU_ARCH_SPARC #elif defined(TARGET_XTENSA) #define QEMU_ARCH QEMU_ARCH_XTENSA +#elif defined(TARGET_UNICORE32) +#define QEMU_ARCH QEMU_ARCH_UNICORE32 #endif const uint32_t arch_type = QEMU_ARCH; @@ -104,6 +111,7 @@ const uint32_t arch_type = QEMU_ARCH; #define RAM_SAVE_FLAG_PAGE 0x08 #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_FLAG_CONTINUE 0x20 +#define RAM_SAVE_FLAG_XBZRLE 0x40 #ifdef __ALTIVEC__ #include @@ -131,7 +139,6 @@ static struct defconfig_file { /* Indicates it is an user config file (disabled by -no-user-config) */ bool userconfig; } default_config_files[] = { - { CONFIG_QEMU_DATADIR "/cpus-" TARGET_ARCH ".conf", false }, { CONFIG_QEMU_CONFDIR "/qemu.conf", true }, { CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true }, { NULL }, /* end of list */ @@ -171,6 +178,92 @@ static int is_dup_page(uint8_t *page) return 1; } +/* struct contains XBZRLE cache and a static page + used by the compression */ +static struct { + /* buffer used for XBZRLE encoding */ + uint8_t *encoded_buf; + /* buffer for storing page content */ + uint8_t *current_buf; + /* buffer used for XBZRLE decoding */ + uint8_t *decoded_buf; + /* Cache for XBZRLE */ + PageCache *cache; +} XBZRLE = { + .encoded_buf = NULL, + .current_buf = NULL, + .decoded_buf = NULL, + .cache = NULL, +}; + + +int64_t xbzrle_cache_resize(int64_t new_size) +{ + if (XBZRLE.cache != NULL) { + return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) * + TARGET_PAGE_SIZE; + } + return pow2floor(new_size); +} + +/* accounting for migration statistics */ +typedef struct AccountingInfo { + uint64_t dup_pages; + uint64_t norm_pages; + uint64_t iterations; + uint64_t xbzrle_bytes; + uint64_t xbzrle_pages; + uint64_t xbzrle_cache_miss; + uint64_t xbzrle_overflows; +} AccountingInfo; + +static AccountingInfo acct_info; + +static void acct_clear(void) +{ + memset(&acct_info, 0, sizeof(acct_info)); +} + +uint64_t dup_mig_bytes_transferred(void) +{ + return acct_info.dup_pages * TARGET_PAGE_SIZE; +} + +uint64_t dup_mig_pages_transferred(void) +{ + return acct_info.dup_pages; +} + +uint64_t norm_mig_bytes_transferred(void) +{ + return acct_info.norm_pages * TARGET_PAGE_SIZE; +} + +uint64_t norm_mig_pages_transferred(void) +{ + return acct_info.norm_pages; +} + +uint64_t xbzrle_mig_bytes_transferred(void) +{ + return acct_info.xbzrle_bytes; +} + +uint64_t xbzrle_mig_pages_transferred(void) +{ + return acct_info.xbzrle_pages; +} + +uint64_t xbzrle_mig_pages_cache_miss(void) +{ + return acct_info.xbzrle_cache_miss; +} + +uint64_t xbzrle_mig_pages_overflow(void) +{ + return acct_info.xbzrle_overflows; +} + static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, int cont, int flag) { @@ -183,8 +276,135 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, } +#define ENCODING_FLAG_XBZRLE 0x1 + +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, + ram_addr_t current_addr, RAMBlock *block, + ram_addr_t offset, int cont, bool last_stage) +{ + int encoded_len = 0, bytes_sent = -1; + uint8_t *prev_cached_page; + + if (!cache_is_cached(XBZRLE.cache, current_addr)) { + if (!last_stage) { + cache_insert(XBZRLE.cache, current_addr, + g_memdup(current_data, TARGET_PAGE_SIZE)); + } + acct_info.xbzrle_cache_miss++; + return -1; + } + + prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); + + /* save current buffer into memory */ + memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); + + /* XBZRLE encoding (if there is no overflow) */ + encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, + TARGET_PAGE_SIZE, XBZRLE.encoded_buf, + TARGET_PAGE_SIZE); + if (encoded_len == 0) { + DPRINTF("Skipping unmodified page\n"); + return 0; + } else if (encoded_len == -1) { + DPRINTF("Overflow\n"); + acct_info.xbzrle_overflows++; + /* update data in the cache */ + memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); + return -1; + } + + /* we need to update the data in the cache, in order to get the same data */ + if (!last_stage) { + memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); + } + + /* Send XBZRLE based compressed page */ + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(f, ENCODING_FLAG_XBZRLE); + qemu_put_be16(f, encoded_len); + qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_sent = encoded_len + 1 + 2; + acct_info.xbzrle_pages++; + acct_info.xbzrle_bytes += bytes_sent; + + return bytes_sent; +} + static RAMBlock *last_block; static ram_addr_t last_offset; +static unsigned long *migration_bitmap; +static uint64_t migration_dirty_pages; + +static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr, + ram_addr_t offset) +{ + bool ret; + int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; + + ret = test_and_clear_bit(nr, migration_bitmap); + + if (ret) { + migration_dirty_pages--; + } + return ret; +} + +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, + ram_addr_t offset) +{ + bool ret; + int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS; + + ret = test_and_set_bit(nr, migration_bitmap); + + if (!ret) { + migration_dirty_pages++; + } + return ret; +} + +static void migration_bitmap_sync(void) +{ + RAMBlock *block; + ram_addr_t addr; + uint64_t num_dirty_pages_init = migration_dirty_pages; + MigrationState *s = migrate_get_current(); + static int64_t start_time; + static int64_t num_dirty_pages_period; + int64_t end_time; + + if (!start_time) { + start_time = qemu_get_clock_ms(rt_clock); + } + + trace_migration_bitmap_sync_start(); + memory_global_sync_dirty_bitmap(get_system_memory()); + + QLIST_FOREACH(block, &ram_list.blocks, next) { + for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { + if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { + migration_bitmap_set_dirty(block->mr, addr); + } + } + memory_region_reset_dirty(block->mr, 0, block->length, + DIRTY_MEMORY_MIGRATION); + } + trace_migration_bitmap_sync_end(migration_dirty_pages + - num_dirty_pages_init); + num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init; + end_time = qemu_get_clock_ms(rt_clock); + + /* more than 1 second = 1000 millisecons */ + if (end_time > start_time + 1000) { + s->dirty_pages_rate = num_dirty_pages_period * 1000 + / (end_time - start_time); + start_time = end_time; + num_dirty_pages_period = 0; + } +} + /* * ram_save_block: Writes a page of memory to the stream f @@ -194,39 +414,51 @@ static ram_addr_t last_offset; * n: the amount of bytes written in other case */ -static int ram_save_block(QEMUFile *f) +static int ram_save_block(QEMUFile *f, bool last_stage) { RAMBlock *block = last_block; ram_addr_t offset = last_offset; int bytes_sent = -1; MemoryRegion *mr; + ram_addr_t current_addr; if (!block) block = QLIST_FIRST(&ram_list.blocks); do { mr = block->mr; - if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { + if (migration_bitmap_test_and_reset_dirty(mr, offset)) { uint8_t *p; int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0; - memory_region_reset_dirty(mr, offset, TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION); - p = memory_region_get_ram_ptr(mr) + offset; if (is_dup_page(p)) { + acct_info.dup_pages++; save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, *p); bytes_sent = 1; - } else { + } else if (migrate_use_xbzrle()) { + current_addr = block->offset + offset; + bytes_sent = save_xbzrle_page(f, p, current_addr, block, + offset, cont, last_stage); + if (!last_stage) { + p = get_cached_data(XBZRLE.cache, current_addr); + } + } + + /* either we didn't send yet (we may have had XBZRLE overflow) */ + if (bytes_sent == -1) { save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); qemu_put_buffer(f, p, TARGET_PAGE_SIZE); bytes_sent = TARGET_PAGE_SIZE; + acct_info.norm_pages++; } - break; + /* if page is unmodified, continue to the next */ + if (bytes_sent != 0) { + break; + } } offset += TARGET_PAGE_SIZE; @@ -248,7 +480,7 @@ static uint64_t bytes_transferred; static ram_addr_t ram_save_remaining(void) { - return ram_list.dirty_pages; + return migration_dirty_pages; } uint64_t ram_bytes_remaining(void) @@ -304,6 +536,15 @@ static void sort_ram_list(void) static void migration_end(void) { memory_global_dirty_log_stop(); + + if (migrate_use_xbzrle()) { + cache_fini(XBZRLE.cache); + g_free(XBZRLE.cache); + g_free(XBZRLE.encoded_buf); + g_free(XBZRLE.current_buf); + g_free(XBZRLE.decoded_buf); + XBZRLE.cache = NULL; + } } static void ram_migration_cancel(void *opaque) @@ -311,29 +552,43 @@ static void ram_migration_cancel(void *opaque) migration_end(); } + +static void reset_ram_globals(void) +{ + last_block = NULL; + last_offset = 0; + sort_ram_list(); +} + #define MAX_WAIT 50 /* ms, half buffered_file limit */ static int ram_save_setup(QEMUFile *f, void *opaque) { - ram_addr_t addr; RAMBlock *block; + int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; - bytes_transferred = 0; - last_block = NULL; - last_offset = 0; - sort_ram_list(); + migration_bitmap = bitmap_new(ram_pages); + bitmap_set(migration_bitmap, 0, ram_pages); + migration_dirty_pages = ram_pages; - /* Make sure all dirty bits are set */ - QLIST_FOREACH(block, &ram_list.blocks, next) { - for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) { - if (!memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { - memory_region_set_dirty(block->mr, addr, TARGET_PAGE_SIZE); - } + bytes_transferred = 0; + reset_ram_globals(); + + if (migrate_use_xbzrle()) { + XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / + TARGET_PAGE_SIZE, + TARGET_PAGE_SIZE); + if (!XBZRLE.cache) { + DPRINTF("Error creating cache\n"); + return -1; } + XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE); + XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE); + acct_clear(); } memory_global_dirty_log_start(); + migration_bitmap_sync(); qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); @@ -354,7 +609,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) double bwidth = 0; int ret; int i; - uint64_t expected_time; + uint64_t expected_downtime; + MigrationState *s = migrate_get_current(); bytes_transferred_last = bytes_transferred; bwidth = qemu_get_clock_ns(rt_clock); @@ -363,12 +619,13 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; - bytes_sent = ram_save_block(f); + bytes_sent = ram_save_block(f, false); /* no more blocks to sent */ if (bytes_sent < 0) { break; } bytes_transferred += bytes_sent; + acct_info.iterations++; /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. qemu_get_clock_ns() is a bit expensive, so we only check each some @@ -377,7 +634,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) if ((i & 63) == 0) { uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000; if (t1 > MAX_WAIT) { - DPRINTF("big wait: " PRIu64 " milliseconds, %d iterations\n", + DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", t1, i); break; } @@ -392,31 +649,32 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) bwidth = qemu_get_clock_ns(rt_clock) - bwidth; bwidth = (bytes_transferred - bytes_transferred_last) / bwidth; - /* if we haven't transferred anything this round, force expected_time to a - * a very high value, but without crashing */ + /* if we haven't transferred anything this round, force + * expected_downtime to a very high value, but without + * crashing */ if (bwidth == 0) { bwidth = 0.000001; } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; - - DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n", - expected_time, migrate_max_downtime()); + expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(" PRIu64 ")?\n", + expected_downtime, migrate_max_downtime()); - if (expected_time <= migrate_max_downtime()) { - memory_global_sync_dirty_bitmap(get_system_memory()); - expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + if (expected_downtime <= migrate_max_downtime()) { + migration_bitmap_sync(); + expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; + s->expected_downtime = expected_downtime / 1000000; /* ns -> ms */ - return expected_time <= migrate_max_downtime(); + return expected_downtime <= migrate_max_downtime(); } return 0; } static int ram_save_complete(QEMUFile *f, void *opaque) { - memory_global_sync_dirty_bitmap(get_system_memory()); + migration_bitmap_sync(); /* try transferring iterative blocks of memory */ @@ -424,7 +682,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) while (true) { int bytes_sent; - bytes_sent = ram_save_block(f); + bytes_sent = ram_save_block(f, true); /* no more blocks to sent */ if (bytes_sent < 0) { break; @@ -435,9 +693,53 @@ static int ram_save_complete(QEMUFile *f, void *opaque) qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + g_free(migration_bitmap); + migration_bitmap = NULL; + return 0; } +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) +{ + int ret, rc = 0; + unsigned int xh_len; + int xh_flags; + + if (!XBZRLE.decoded_buf) { + XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE); + } + + /* extract RLE header */ + xh_flags = qemu_get_byte(f); + xh_len = qemu_get_be16(f); + + if (xh_flags != ENCODING_FLAG_XBZRLE) { + fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); + return -1; + } + + if (xh_len > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); + return -1; + } + /* load data and decode */ + qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len); + + /* decode RLE */ + ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host, + TARGET_PAGE_SIZE); + if (ret == -1) { + fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); + rc = -1; + } else if (ret > TARGET_PAGE_SIZE) { + fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", + ret, TARGET_PAGE_SIZE); + abort(); + } + + return rc; +} + static inline void *host_from_stream_offset(QEMUFile *f, ram_addr_t offset, int flags) @@ -538,7 +840,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) memset(host, ch, TARGET_PAGE_SIZE); #ifndef _WIN32 if (ch == 0 && - (!kvm_enabled() || kvm_has_sync_mmu())) { + (!kvm_enabled() || kvm_has_sync_mmu()) && + getpagesize() <= TARGET_PAGE_SIZE) { qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); } #endif @@ -551,6 +854,19 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } qemu_get_buffer(f, host, TARGET_PAGE_SIZE); + } else if (flags & RAM_SAVE_FLAG_XBZRLE) { + if (!migrate_use_xbzrle()) { + return -EINVAL; + } + void *host = host_from_stream_offset(f, addr, flags); + if (!host) { + return -EINVAL; + } + + if (load_xbzrle(f, addr, host) < 0) { + ret = -EINVAL; + goto done; + } } error = qemu_file_get_error(f); if (error) { @@ -560,8 +876,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } while (!(flags & RAM_SAVE_FLAG_EOS)); done: - DPRINTF("Completed load of VM with exit code %d seq iteration " PRIu64 "\n", - ret, seq_iter); + DPRINTF("Completed load of VM with exit code %d seq iteration " + "%" PRIu64 "\n", ret, seq_iter); return ret; } @@ -680,15 +996,20 @@ void select_soundhw(const char *optarg) { struct soundhw *c; - if (*optarg == '?') { + if (is_help_option(optarg)) { show_valid_cards: +#ifdef HAS_AUDIO_CHOICE printf("Valid sound card names (comma separated):\n"); for (c = soundhw; c->name; ++c) { printf ("%-11s %s\n", c->name, c->descr); } printf("\n-soundhw all will enable all of the above\n"); - exit(*optarg != '?'); +#else + printf("Machine has no user-selectable audio hardware " + "(it may or may not have always-present audio hardware).\n"); +#endif + exit(!is_help_option(optarg)); } else { size_t l; @@ -842,3 +1163,13 @@ int xen_available(void) return 0; #endif } + + +TargetInfo *qmp_query_target(Error **errp) +{ + TargetInfo *info = g_malloc0(sizeof(*info)); + + info->arch = TARGET_TYPE; + + return info; +}