]> git.proxmox.com Git - qemu.git/blobdiff - arch_init.c
Open 2.0 development tree
[qemu.git] / arch_init.c
index 9b46bfcaa53651539fa804dcf70f1f63a5530bd7..e0acbc56611413c11677c540af706ed7dca39879 100644 (file)
 #include <sys/mman.h>
 #endif
 #include "config.h"
-#include "monitor.h"
-#include "sysemu.h"
-#include "arch_init.h"
+#include "monitor/monitor.h"
+#include "sysemu/sysemu.h"
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "sysemu/arch_init.h"
 #include "audio/audio.h"
-#include "hw/pc.h"
-#include "hw/pci.h"
-#include "hw/audiodev.h"
-#include "kvm.h"
-#include "migration.h"
-#include "net.h"
-#include "gdbstub.h"
-#include "hw/smbios.h"
-#include "exec-memory.h"
-#include "hw/pcspk.h"
-#include "qemu/page_cache.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+#include "hw/audio/audio.h"
+#include "sysemu/kvm.h"
+#include "migration/migration.h"
+#include "hw/i386/smbios.h"
+#include "exec/address-spaces.h"
+#include "hw/audio/pcspk.h"
+#include "migration/page_cache.h"
+#include "qemu/config-file.h"
+#include "qmp-commands.h"
+#include "trace.h"
+#include "exec/cpu-all.h"
+#include "hw/acpi/acpi.h"
 
 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -60,7 +65,7 @@ int graphic_depth = 8;
 #else
 int graphic_width = 800;
 int graphic_height = 600;
-int graphic_depth = 15;
+int graphic_depth = 32;
 #endif
 
 
@@ -80,6 +85,8 @@ int graphic_depth = 15;
 #define QEMU_ARCH QEMU_ARCH_MICROBLAZE
 #elif defined(TARGET_MIPS)
 #define QEMU_ARCH QEMU_ARCH_MIPS
+#elif defined(TARGET_MOXIE)
+#define QEMU_ARCH QEMU_ARCH_MOXIE
 #elif defined(TARGET_OPENRISC)
 #define QEMU_ARCH QEMU_ARCH_OPENRISC
 #elif defined(TARGET_PPC)
@@ -97,6 +104,9 @@ int graphic_depth = 15;
 #endif
 
 const uint32_t arch_type = QEMU_ARCH;
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
 
 /***********************************************************/
 /* ram save/restore */
@@ -108,26 +118,7 @@ const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
-
-#ifdef __ALTIVEC__
-#include <altivec.h>
-#define VECTYPE        vector unsigned char
-#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
-#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
-/* altivec.h may redefine the bool macro as vector type.
- * Reset it to POSIX semantics. */
-#undef bool
-#define bool _Bool
-#elif defined __SSE2__
-#include <emmintrin.h>
-#define VECTYPE        __m128i
-#define SPLAT(p)       _mm_set1_epi8(*(p))
-#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
-#else
-#define VECTYPE        unsigned long
-#define SPLAT(p)       (*(p) * (~0UL / 255))
-#define ALL_EQ(v1, v2) ((v1) == (v2))
-#endif
+/* 0x80 is reserved in migration.h start with 0x100 next */
 
 
 static struct defconfig_file {
@@ -135,9 +126,8 @@ static struct defconfig_file {
     /* Indicates it is an user config file (disabled by -no-user-config) */
     bool userconfig;
 } default_config_files[] = {
-    { CONFIG_QEMU_DATADIR "/cpus-" TARGET_ARCH ".conf",  false },
     { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
-    { CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true },
+    { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
     { NULL }, /* end of list */
 };
 
@@ -156,23 +146,13 @@ int qemu_read_default_config_files(bool userconfig)
             return ret;
         }
     }
-    
+
     return 0;
 }
 
-static int is_dup_page(uint8_t *page)
+static inline bool is_zero_range(uint8_t *p, uint64_t size)
 {
-    VECTYPE *p = (VECTYPE *)page;
-    VECTYPE val = SPLAT(page);
-    int i;
-
-    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
-        if (!ALL_EQ(val, p[i])) {
-            return 0;
-        }
-    }
-
-    return 1;
+    return buffer_find_nonzero_offset(p, size) == size;
 }
 
 /* struct contains XBZRLE cache and a static page
@@ -206,6 +186,7 @@ int64_t xbzrle_cache_resize(int64_t new_size)
 /* accounting for migration statistics */
 typedef struct AccountingInfo {
     uint64_t dup_pages;
+    uint64_t skipped_pages;
     uint64_t norm_pages;
     uint64_t iterations;
     uint64_t xbzrle_bytes;
@@ -231,6 +212,16 @@ uint64_t dup_mig_pages_transferred(void)
     return acct_info.dup_pages;
 }
 
+uint64_t skipped_mig_bytes_transferred(void)
+{
+    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t skipped_mig_pages_transferred(void)
+{
+    return acct_info.skipped_pages;
+}
+
 uint64_t norm_mig_bytes_transferred(void)
 {
     return acct_info.norm_pages * TARGET_PAGE_SIZE;
@@ -261,16 +252,21 @@ uint64_t xbzrle_mig_pages_overflow(void)
     return acct_info.xbzrle_overflows;
 }
 
-static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
-        int cont, int flag)
+static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+                             int cont, int flag)
 {
-        qemu_put_be64(f, offset | cont | flag);
-        if (!cont) {
-                qemu_put_byte(f, strlen(block->idstr));
-                qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                strlen(block->idstr));
-        }
+    size_t size;
+
+    qemu_put_be64(f, offset | cont | flag);
+    size = 8;
 
+    if (!cont) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr,
+                        strlen(block->idstr));
+        size += 1 + strlen(block->idstr);
+    }
+    return size;
 }
 
 #define ENCODING_FLAG_XBZRLE 0x1
@@ -284,8 +280,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
 
     if (!cache_is_cached(XBZRLE.cache, current_addr)) {
         if (!last_stage) {
-            cache_insert(XBZRLE.cache, current_addr,
-                         g_memdup(current_data, TARGET_PAGE_SIZE));
+            cache_insert(XBZRLE.cache, current_addr, current_data);
         }
         acct_info.xbzrle_cache_miss++;
         return -1;
@@ -317,57 +312,197 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
     }
 
     /* Send XBZRLE based compressed page */
-    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
     qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
     qemu_put_be16(f, encoded_len);
     qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
-    bytes_sent = encoded_len + 1 + 2;
+    bytes_sent += encoded_len + 1 + 2;
     acct_info.xbzrle_pages++;
     acct_info.xbzrle_bytes += bytes_sent;
 
     return bytes_sent;
 }
 
-static RAMBlock *last_block;
+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
 static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+static inline
+ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
+                                                 ram_addr_t start)
+{
+    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
+    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
+    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+
+    unsigned long next;
+
+    if (ram_bulk_stage && nr > base) {
+        next = nr + 1;
+    } else {
+        next = find_next_bit(migration_bitmap, size, nr);
+    }
+
+    if (next < size) {
+        clear_bit(next, migration_bitmap);
+        migration_dirty_pages--;
+    }
+    return (next - base) << TARGET_PAGE_BITS;
+}
+
+static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
+                                              ram_addr_t offset)
+{
+    bool ret;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+
+    ret = test_and_set_bit(nr, migration_bitmap);
+
+    if (!ret) {
+        migration_dirty_pages++;
+    }
+    return ret;
+}
+
+/* Needs iothread lock! */
+
+static void migration_bitmap_sync(void)
+{
+    RAMBlock *block;
+    ram_addr_t addr;
+    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    MigrationState *s = migrate_get_current();
+    static int64_t start_time;
+    static int64_t bytes_xfer_prev;
+    static int64_t num_dirty_pages_period;
+    int64_t end_time;
+    int64_t bytes_xfer_now;
+
+    if (!bytes_xfer_prev) {
+        bytes_xfer_prev = ram_bytes_transferred();
+    }
+
+    if (!start_time) {
+        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    }
+
+    trace_migration_bitmap_sync_start();
+    address_space_sync_dirty_bitmap(&address_space_memory);
+
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
+            if (memory_region_test_and_clear_dirty(block->mr,
+                                                   addr, TARGET_PAGE_SIZE,
+                                                   DIRTY_MEMORY_MIGRATION)) {
+                migration_bitmap_set_dirty(block->mr, addr);
+            }
+        }
+    }
+    trace_migration_bitmap_sync_end(migration_dirty_pages
+                                    - num_dirty_pages_init);
+    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+    /* more than 1 second = 1000 millisecons */
+    if (end_time > start_time + 1000) {
+        if (migrate_auto_converge()) {
+            /* The following detection logic can be refined later. For now:
+               Check to see if the dirtied bytes is 50% more than the approx.
+               amount of bytes that just got transferred since the last time we
+               were in this routine. If that happens >N times (for now N==4)
+               we turn on the throttle down logic */
+            bytes_xfer_now = ram_bytes_transferred();
+            if (s->dirty_pages_rate &&
+               (num_dirty_pages_period * TARGET_PAGE_SIZE >
+                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
+               (dirty_rate_high_cnt++ > 4)) {
+                    trace_migration_throttle();
+                    mig_throttle_on = true;
+                    dirty_rate_high_cnt = 0;
+             }
+             bytes_xfer_prev = bytes_xfer_now;
+        } else {
+             mig_throttle_on = false;
+        }
+        s->dirty_pages_rate = num_dirty_pages_period * 1000
+            / (end_time - start_time);
+        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
+        start_time = end_time;
+        num_dirty_pages_period = 0;
+    }
+}
 
 /*
  * ram_save_block: Writes a page of memory to the stream f
  *
- * Returns:  0: if the page hasn't changed
- *          -1: if there are no more dirty pages
- *           n: the amount of bytes written in other case
+ * Returns:  The number of bytes written.
+ *           0 means no dirty pages
  */
 
 static int ram_save_block(QEMUFile *f, bool last_stage)
 {
-    RAMBlock *block = last_block;
+    RAMBlock *block = last_seen_block;
     ram_addr_t offset = last_offset;
-    int bytes_sent = -1;
+    bool complete_round = false;
+    int bytes_sent = 0;
     MemoryRegion *mr;
     ram_addr_t current_addr;
 
     if (!block)
-        block = QLIST_FIRST(&ram_list.blocks);
+        block = QTAILQ_FIRST(&ram_list.blocks);
 
-    do {
+    while (true) {
         mr = block->mr;
-        if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
-                                    DIRTY_MEMORY_MIGRATION)) {
+        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+        if (complete_round && block == last_seen_block &&
+            offset >= last_offset) {
+            break;
+        }
+        if (offset >= block->length) {
+            offset = 0;
+            block = QTAILQ_NEXT(block, next);
+            if (!block) {
+                block = QTAILQ_FIRST(&ram_list.blocks);
+                complete_round = true;
+                ram_bulk_stage = false;
+            }
+        } else {
+            int ret;
             uint8_t *p;
-            int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
-
-            memory_region_reset_dirty(mr, offset, TARGET_PAGE_SIZE,
-                                      DIRTY_MEMORY_MIGRATION);
+            int cont = (block == last_sent_block) ?
+                RAM_SAVE_FLAG_CONTINUE : 0;
 
             p = memory_region_get_ram_ptr(mr) + offset;
 
-            if (is_dup_page(p)) {
+            /* In doubt sent page as normal */
+            bytes_sent = -1;
+            ret = ram_control_save_page(f, block->offset,
+                               offset, TARGET_PAGE_SIZE, &bytes_sent);
+
+            if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+                if (ret != RAM_SAVE_CONTROL_DELAYED) {
+                    if (bytes_sent > 0) {
+                        acct_info.norm_pages++;
+                    } else if (bytes_sent == 0) {
+                        acct_info.dup_pages++;
+                    }
+                }
+            } else if (is_zero_range(p, TARGET_PAGE_SIZE)) {
                 acct_info.dup_pages++;
-                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, *p);
-                bytes_sent = 1;
-            } else if (migrate_use_xbzrle()) {
+                bytes_sent = save_block_hdr(f, block, offset, cont,
+                                            RAM_SAVE_FLAG_COMPRESS);
+                qemu_put_byte(f, 0);
+                bytes_sent++;
+            } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
                 current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                               offset, cont, last_stage);
@@ -376,30 +511,22 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                 }
             }
 
-            /* either we didn't send yet (we may have had XBZRLE overflow) */
+            /* XBZRLE overflow or normal page */
             if (bytes_sent == -1) {
-                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-                bytes_sent = TARGET_PAGE_SIZE;
+                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
+                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                bytes_sent += TARGET_PAGE_SIZE;
                 acct_info.norm_pages++;
             }
 
             /* if page is unmodified, continue to the next */
-            if (bytes_sent != 0) {
+            if (bytes_sent > 0) {
+                last_sent_block = block;
                 break;
             }
         }
-
-        offset += TARGET_PAGE_SIZE;
-        if (offset >= block->length) {
-            offset = 0;
-            block = QLIST_NEXT(block, next);
-            if (!block)
-                block = QLIST_FIRST(&ram_list.blocks);
-        }
-    } while (block != last_block || offset != last_offset);
-
-    last_block = block;
+    }
+    last_seen_block = block;
     last_offset = offset;
 
     return bytes_sent;
@@ -407,9 +534,21 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
 
 static uint64_t bytes_transferred;
 
+void acct_update_position(QEMUFile *f, size_t size, bool zero)
+{
+    uint64_t pages = size / TARGET_PAGE_SIZE;
+    if (zero) {
+        acct_info.dup_pages += pages;
+    } else {
+        acct_info.norm_pages += pages;
+        bytes_transferred += size;
+        qemu_update_position(f, size);
+    }
+}
+
 static ram_addr_t ram_save_remaining(void)
 {
-    return ram_list.dirty_pages;
+    return migration_dirty_pages;
 }
 
 uint64_t ram_bytes_remaining(void)
@@ -427,46 +566,21 @@ uint64_t ram_bytes_total(void)
     RAMBlock *block;
     uint64_t total = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next)
+    QTAILQ_FOREACH(block, &ram_list.blocks, next)
         total += block->length;
 
     return total;
 }
 
-static int block_compar(const void *a, const void *b)
-{
-    RAMBlock * const *ablock = a;
-    RAMBlock * const *bblock = b;
-
-    return strcmp((*ablock)->idstr, (*bblock)->idstr);
-}
-
-static void sort_ram_list(void)
-{
-    RAMBlock *block, *nblock, **blocks;
-    int n;
-    n = 0;
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        ++n;
-    }
-    blocks = g_malloc(n * sizeof *blocks);
-    n = 0;
-    QLIST_FOREACH_SAFE(block, &ram_list.blocks, next, nblock) {
-        blocks[n++] = block;
-        QLIST_REMOVE(block, next);
-    }
-    qsort(blocks, n, sizeof *blocks, block_compar);
-    while (--n >= 0) {
-        QLIST_INSERT_HEAD(&ram_list.blocks, blocks[n], next);
-    }
-    g_free(blocks);
-}
-
 static void migration_end(void)
 {
-    memory_global_dirty_log_stop();
+    if (migration_bitmap) {
+        memory_global_dirty_log_stop();
+        g_free(migration_bitmap);
+        migration_bitmap = NULL;
+    }
 
-    if (migrate_use_xbzrle()) {
+    if (XBZRLE.cache) {
         cache_fini(XBZRLE.cache);
         g_free(XBZRLE.cache);
         g_free(XBZRLE.encoded_buf);
@@ -481,17 +595,27 @@ static void ram_migration_cancel(void *opaque)
     migration_end();
 }
 
+static void reset_ram_globals(void)
+{
+    last_seen_block = NULL;
+    last_sent_block = NULL;
+    last_offset = 0;
+    last_version = ram_list.version;
+    ram_bulk_stage = true;
+}
+
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
 
 static int ram_save_setup(QEMUFile *f, void *opaque)
 {
-    ram_addr_t addr;
     RAMBlock *block;
+    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
 
-    bytes_transferred = 0;
-    last_block = NULL;
-    last_offset = 0;
-    sort_ram_list();
+    migration_bitmap = bitmap_new(ram_pages);
+    bitmap_set(migration_bitmap, 0, ram_pages);
+    migration_dirty_pages = ram_pages;
+    mig_throttle_on = false;
+    dirty_rate_high_cnt = 0;
 
     if (migrate_use_xbzrle()) {
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -506,26 +630,28 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
         acct_clear();
     }
 
-    /* Make sure all dirty bits are set */
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (!memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
-                                         DIRTY_MEMORY_MIGRATION)) {
-                memory_region_set_dirty(block->mr, addr, TARGET_PAGE_SIZE);
-            }
-        }
-    }
+    qemu_mutex_lock_iothread();
+    qemu_mutex_lock_ramlist();
+    bytes_transferred = 0;
+    reset_ram_globals();
 
     memory_global_dirty_log_start();
+    migration_bitmap_sync();
+    qemu_mutex_unlock_iothread();
 
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         qemu_put_byte(f, strlen(block->idstr));
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
         qemu_put_be64(f, block->length);
     }
 
+    qemu_mutex_unlock_ramlist();
+
+    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
@@ -533,35 +659,41 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 
 static int ram_save_iterate(QEMUFile *f, void *opaque)
 {
-    uint64_t bytes_transferred_last;
-    double bwidth = 0;
     int ret;
     int i;
-    uint64_t expected_time;
+    int64_t t0;
+    int total_sent = 0;
 
-    bytes_transferred_last = bytes_transferred;
-    bwidth = qemu_get_clock_ns(rt_clock);
+    qemu_mutex_lock_ramlist();
 
+    if (ram_list.version != last_version) {
+        reset_ram_globals();
+    }
+
+    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+
+    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     i = 0;
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int bytes_sent;
 
         bytes_sent = ram_save_block(f, false);
         /* no more blocks to sent */
-        if (bytes_sent < 0) {
+        if (bytes_sent == 0) {
             break;
         }
-        bytes_transferred += bytes_sent;
+        total_sent += bytes_sent;
         acct_info.iterations++;
+        check_guest_throttling();
         /* we want to check in the 1st loop, just in case it was the 1st time
            and we had to sync the dirty bitmap.
            qemu_get_clock_ns() is a bit expensive, so we only check each some
            iterations
         */
         if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000;
+            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
             if (t1 > MAX_WAIT) {
-                DPRINTF("big wait: " PRIu64 " milliseconds, %d iterations\n",
+                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
                         t1, i);
                 break;
             }
@@ -569,38 +701,37 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         i++;
     }
 
-    if (ret < 0) {
-        return ret;
-    }
+    qemu_mutex_unlock_ramlist();
 
-    bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
-    bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
+    /*
+     * Must occur before EOS (or any QEMUFile operation)
+     * because of RDMA protocol.
+     */
+    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
 
-    /* if we haven't transferred anything this round, force expected_time to a
-     * a very high value, but without crashing */
-    if (bwidth == 0) {
-        bwidth = 0.000001;
-    }
+    bytes_transferred += total_sent;
 
+    /*
+     * Do not count these 8 bytes into total_sent, so that we can
+     * return 0 if no page had been dirtied.
+     */
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    bytes_transferred += 8;
 
-    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-
-    DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n",
-            expected_time, migrate_max_downtime());
-
-    if (expected_time <= migrate_max_downtime()) {
-        memory_global_sync_dirty_bitmap(get_system_memory());
-        expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-
-        return expected_time <= migrate_max_downtime();
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        return ret;
     }
-    return 0;
+
+    return total_sent;
 }
 
 static int ram_save_complete(QEMUFile *f, void *opaque)
 {
-    memory_global_sync_dirty_bitmap(get_system_memory());
+    qemu_mutex_lock_ramlist();
+    migration_bitmap_sync();
+
+    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
 
     /* try transferring iterative blocks of memory */
 
@@ -610,18 +741,36 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
 
         bytes_sent = ram_save_block(f, true);
         /* no more blocks to sent */
-        if (bytes_sent < 0) {
+        if (bytes_sent == 0) {
             break;
         }
         bytes_transferred += bytes_sent;
     }
-    memory_global_dirty_log_stop();
 
+    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
+    migration_end();
+
+    qemu_mutex_unlock_ramlist();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
 }
 
+static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+    uint64_t remaining_size;
+
+    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+
+    if (remaining_size < max_size) {
+        qemu_mutex_lock_iothread();
+        migration_bitmap_sync();
+        qemu_mutex_unlock_iothread();
+        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+    }
+    return remaining_size;
+}
+
 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
 {
     int ret, rc = 0;
@@ -684,7 +833,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     qemu_get_buffer(f, (uint8_t *)id, len);
     id[len] = 0;
 
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
         if (!strncmp(id, block->idstr, sizeof(id)))
             return memory_region_get_ram_ptr(block->mr) + offset;
     }
@@ -693,6 +842,17 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     return NULL;
 }
 
+/*
+ * If a page (or a whole RDMA chunk) has been
+ * determined to be zero, then zap it.
+ */
+void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
+{
+    if (ch != 0 || !is_zero_range(host, size)) {
+        memset(host, ch, size);
+    }
+}
+
 static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     ram_addr_t addr;
@@ -728,9 +888,13 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                     id[len] = 0;
                     length = qemu_get_be64(f);
 
-                    QLIST_FOREACH(block, &ram_list.blocks, next) {
+                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
                             if (block->length != length) {
+                                fprintf(stderr,
+                                        "Length mismatch: %s: " RAM_ADDR_FMT
+                                        " in != " RAM_ADDR_FMT "\n", id, length,
+                                        block->length);
                                 ret =  -EINVAL;
                                 goto done;
                             }
@@ -760,13 +924,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
             }
 
             ch = qemu_get_byte(f);
-            memset(host, ch, TARGET_PAGE_SIZE);
-#ifndef _WIN32
-            if (ch == 0 &&
-                (!kvm_enabled() || kvm_has_sync_mmu())) {
-                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
-            }
-#endif
+            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
             void *host;
 
@@ -777,9 +935,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
         } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
-            if (!migrate_use_xbzrle()) {
-                return -EINVAL;
-            }
             void *host = host_from_stream_offset(f, addr, flags);
             if (!host) {
                 return -EINVAL;
@@ -789,6 +944,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 ret = -EINVAL;
                 goto done;
             }
+        } else if (flags & RAM_SAVE_FLAG_HOOK) {
+            ram_control_load_hook(f, flags);
         }
         error = qemu_file_get_error(f);
         if (error) {
@@ -798,8 +955,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
     } while (!(flags & RAM_SAVE_FLAG_EOS));
 
 done:
-    DPRINTF("Completed load of VM with exit code %d seq iteration " PRIu64 "\n",
-            ret, seq_iter);
+    DPRINTF("Completed load of VM with exit code %d seq iteration "
+            "%" PRIu64 "\n", ret, seq_iter);
     return ret;
 }
 
@@ -807,11 +964,11 @@ SaveVMHandlers savevm_ram_handlers = {
     .save_live_setup = ram_save_setup,
     .save_live_iterate = ram_save_iterate,
     .save_live_complete = ram_save_complete,
+    .save_live_pending = ram_save_pending,
     .load_state = ram_load,
     .cancel = ram_migration_cancel,
 };
 
-#ifdef HAS_AUDIO
 struct soundhw {
     const char *name;
     const char *descr;
@@ -823,96 +980,30 @@ struct soundhw {
     } init;
 };
 
-static struct soundhw soundhw[] = {
-#ifdef HAS_AUDIO_CHOICE
-#ifdef CONFIG_PCSPK
-    {
-        "pcspk",
-        "PC speaker",
-        0,
-        1,
-        { .init_isa = pcspk_audio_init }
-    },
-#endif
-
-#ifdef CONFIG_SB16
-    {
-        "sb16",
-        "Creative Sound Blaster 16",
-        0,
-        1,
-        { .init_isa = SB16_init }
-    },
-#endif
+static struct soundhw soundhw[9];
+static int soundhw_count;
 
-#ifdef CONFIG_CS4231A
-    {
-        "cs4231a",
-        "CS4231A",
-        0,
-        1,
-        { .init_isa = cs4231a_init }
-    },
-#endif
-
-#ifdef CONFIG_ADLIB
-    {
-        "adlib",
-#ifdef HAS_YMF262
-        "Yamaha YMF262 (OPL3)",
-#else
-        "Yamaha YM3812 (OPL2)",
-#endif
-        0,
-        1,
-        { .init_isa = Adlib_init }
-    },
-#endif
-
-#ifdef CONFIG_GUS
-    {
-        "gus",
-        "Gravis Ultrasound GF1",
-        0,
-        1,
-        { .init_isa = GUS_init }
-    },
-#endif
-
-#ifdef CONFIG_AC97
-    {
-        "ac97",
-        "Intel 82801AA AC97 Audio",
-        0,
-        0,
-        { .init_pci = ac97_init }
-    },
-#endif
-
-#ifdef CONFIG_ES1370
-    {
-        "es1370",
-        "ENSONIQ AudioPCI ES1370",
-        0,
-        0,
-        { .init_pci = es1370_init }
-    },
-#endif
-
-#ifdef CONFIG_HDA
-    {
-        "hda",
-        "Intel HD Audio",
-        0,
-        0,
-        { .init_pci = intel_hda_and_codec_init }
-    },
-#endif
-
-#endif /* HAS_AUDIO_CHOICE */
+void isa_register_soundhw(const char *name, const char *descr,
+                          int (*init_isa)(ISABus *bus))
+{
+    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
+    soundhw[soundhw_count].name = name;
+    soundhw[soundhw_count].descr = descr;
+    soundhw[soundhw_count].isa = 1;
+    soundhw[soundhw_count].init.init_isa = init_isa;
+    soundhw_count++;
+}
 
-    { NULL, NULL, 0, 0, { NULL } }
-};
+void pci_register_soundhw(const char *name, const char *descr,
+                          int (*init_pci)(PCIBus *bus))
+{
+    assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
+    soundhw[soundhw_count].name = name;
+    soundhw[soundhw_count].descr = descr;
+    soundhw[soundhw_count].isa = 0;
+    soundhw[soundhw_count].init.init_pci = init_pci;
+    soundhw_count++;
+}
 
 void select_soundhw(const char *optarg)
 {
@@ -921,11 +1012,16 @@ void select_soundhw(const char *optarg)
     if (is_help_option(optarg)) {
     show_valid_cards:
 
-        printf("Valid sound card names (comma separated):\n");
-        for (c = soundhw; c->name; ++c) {
-            printf ("%-11s %s\n", c->name, c->descr);
+        if (soundhw_count) {
+             printf("Valid sound card names (comma separated):\n");
+             for (c = soundhw; c->name; ++c) {
+                 printf ("%-11s %s\n", c->name, c->descr);
+             }
+             printf("\n-soundhw all will enable all of the above\n");
+        } else {
+             printf("Machine has no user-selectable audio hardware "
+                    "(it may or may not have always-present audio hardware).\n");
         }
-        printf("\n-soundhw all will enable all of the above\n");
         exit(!is_help_option(optarg));
     }
     else {
@@ -973,32 +1069,30 @@ void select_soundhw(const char *optarg)
     }
 }
 
-void audio_init(ISABus *isa_bus, PCIBus *pci_bus)
+void audio_init(void)
 {
     struct soundhw *c;
+    ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
+    PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
 
     for (c = soundhw; c->name; ++c) {
         if (c->enabled) {
             if (c->isa) {
-                if (isa_bus) {
-                    c->init.init_isa(isa_bus);
+                if (!isa_bus) {
+                    fprintf(stderr, "ISA bus not available for %s\n", c->name);
+                    exit(1);
                 }
+                c->init.init_isa(isa_bus);
             } else {
-                if (pci_bus) {
-                    c->init.init_pci(pci_bus);
+                if (!pci_bus) {
+                    fprintf(stderr, "PCI bus not available for %s\n", c->name);
+                    exit(1);
                 }
+                c->init.init_pci(pci_bus);
             }
         }
     }
 }
-#else
-void select_soundhw(const char *optarg)
-{
-}
-void audio_init(ISABus *isa_bus, PCIBus *pci_bus)
-{
-}
-#endif
 
 int qemu_uuid_parse(const char *str, uint8_t *uuid)
 {
@@ -1016,29 +1110,28 @@ int qemu_uuid_parse(const char *str, uint8_t *uuid)
     if (ret != 16) {
         return -1;
     }
-#ifdef TARGET_I386
-    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), 16, uuid);
-#endif
     return 0;
 }
 
-void do_acpitable_option(const char *optarg)
+void do_acpitable_option(const QemuOpts *opts)
 {
 #ifdef TARGET_I386
-    if (acpi_table_add(optarg) < 0) {
-        fprintf(stderr, "Wrong acpi table provided\n");
+    Error *err = NULL;
+
+    acpi_table_add(opts, &err);
+    if (err) {
+        error_report("Wrong acpi table provided: %s",
+                     error_get_pretty(err));
+        error_free(err);
         exit(1);
     }
 #endif
 }
 
-void do_smbios_option(const char *optarg)
+void do_smbios_option(QemuOpts *opts)
 {
 #ifdef TARGET_I386
-    if (smbios_entry_add(optarg) < 0) {
-        fprintf(stderr, "Wrong smbios provided\n");
-        exit(1);
-    }
+    smbios_entry_add(opts);
 #endif
 }
 
@@ -1049,15 +1142,6 @@ void cpudef_init(void)
 #endif
 }
 
-int audio_available(void)
-{
-#ifdef HAS_AUDIO
-    return 1;
-#else
-    return 0;
-#endif
-}
-
 int tcg_available(void)
 {
     return 1;
@@ -1080,3 +1164,62 @@ int xen_available(void)
     return 0;
 #endif
 }
+
+
+TargetInfo *qmp_query_target(Error **errp)
+{
+    TargetInfo *info = g_malloc0(sizeof(*info));
+
+    info->arch = g_strdup(TARGET_NAME);
+
+    return info;
+}
+
+/* Stub function that's gets run on the vcpu when its brought out of the
+   VM to run inside qemu via async_run_on_cpu()*/
+static void mig_sleep_cpu(void *opq)
+{
+    qemu_mutex_unlock_iothread();
+    g_usleep(30*1000);
+    qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+   much time in the VM. The migration thread will try to catchup.
+   Workload will experience a performance drop.
+*/
+static void mig_throttle_guest_down(void)
+{
+    CPUState *cpu;
+
+    qemu_mutex_lock_iothread();
+    CPU_FOREACH(cpu) {
+        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+    }
+    qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+    static int64_t t0;
+    int64_t        t1;
+
+    if (!mig_throttle_on) {
+        return;
+    }
+
+    if (!t0)  {
+        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+        return;
+    }
+
+    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+    /* If it has been more than 40 ms since the last time the guest
+     * was throttled then do it again.
+     */
+    if (40 < (t1-t0)/1000000) {
+        mig_throttle_guest_down();
+        t0 = t1;
+    }
+}