]> git.proxmox.com Git - mirror_qemu.git/blobdiff - migration/rdma.c
io: fix incoming client socket initialization
[mirror_qemu.git] / migration / rdma.c
index e6c3a67b54cfa252f9a072236345f0c652b937f0..674ccab12e12767fa57a48d1d44b2df56b4b3cb6 100644 (file)
@@ -2,29 +2,32 @@
  * RDMA protocol and interfaces
  *
  * Copyright IBM, Corp. 2010-2013
+ * Copyright Red Hat, Inc. 2015-2016
  *
  * Authors:
  *  Michael R. Hines <mrhines@us.ibm.com>
  *  Jiuxing Liu <jl@us.ibm.com>
+ *  Daniel P. Berrange <berrange@redhat.com>
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or
  * later.  See the COPYING file in the top-level directory.
  *
  */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/cutils.h"
 #include "migration/migration.h"
 #include "migration/qemu-file.h"
 #include "exec/cpu-common.h"
+#include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/sockets.h"
 #include "qemu/bitmap.h"
-#include "block/coroutine.h"
-#include <stdio.h>
-#include <sys/types.h>
+#include "qemu/coroutine.h"
 #include <sys/socket.h>
 #include <netdb.h>
 #include <arpa/inet.h>
-#include <string.h>
 #include <rdma/rdma_cma.h>
 #include "trace.h"
 
@@ -214,17 +217,19 @@ static void network_to_caps(RDMACapabilities *cap)
  * the information. It's small anyway, so a list is overkill.
  */
 typedef struct RDMALocalBlock {
-    uint8_t  *local_host_addr; /* local virtual address */
-    uint64_t remote_host_addr; /* remote virtual address */
-    uint64_t offset;
-    uint64_t length;
-    struct   ibv_mr **pmr;     /* MRs for chunk-level registration */
-    struct   ibv_mr *mr;       /* MR for non-chunk-level registration */
-    uint32_t *remote_keys;     /* rkeys for chunk-level registration */
-    uint32_t remote_rkey;      /* rkeys for non-chunk-level registration */
-    int      index;            /* which block are we */
-    bool     is_ram_block;
-    int      nb_chunks;
+    char          *block_name;
+    uint8_t       *local_host_addr; /* local virtual address */
+    uint64_t       remote_host_addr; /* remote virtual address */
+    uint64_t       offset;
+    uint64_t       length;
+    struct         ibv_mr **pmr;    /* MRs for chunk-level registration */
+    struct         ibv_mr *mr;      /* MR for non-chunk-level registration */
+    uint32_t      *remote_keys;     /* rkeys for chunk-level registration */
+    uint32_t       remote_rkey;     /* rkeys for non-chunk-level registration */
+    int            index;           /* which block are we */
+    unsigned int   src_index;       /* (Only used on dest) */
+    bool           is_ram_block;
+    int            nb_chunks;
     unsigned long *transit_bitmap;
     unsigned long *unregister_bitmap;
 } RDMALocalBlock;
@@ -236,13 +241,13 @@ typedef struct RDMALocalBlock {
  * corresponding RDMALocalBlock with
  * the information needed to perform the actual RDMA.
  */
-typedef struct QEMU_PACKED RDMARemoteBlock {
+typedef struct QEMU_PACKED RDMADestBlock {
     uint64_t remote_host_addr;
     uint64_t offset;
     uint64_t length;
     uint32_t remote_rkey;
     uint32_t padding;
-} RDMARemoteBlock;
+} RDMADestBlock;
 
 static uint64_t htonll(uint64_t v)
 {
@@ -258,20 +263,20 @@ static uint64_t ntohll(uint64_t v) {
     return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]);
 }
 
-static void remote_block_to_network(RDMARemoteBlock *rb)
+static void dest_block_to_network(RDMADestBlock *db)
 {
-    rb->remote_host_addr = htonll(rb->remote_host_addr);
-    rb->offset = htonll(rb->offset);
-    rb->length = htonll(rb->length);
-    rb->remote_rkey = htonl(rb->remote_rkey);
+    db->remote_host_addr = htonll(db->remote_host_addr);
+    db->offset = htonll(db->offset);
+    db->length = htonll(db->length);
+    db->remote_rkey = htonl(db->remote_rkey);
 }
 
-static void network_to_remote_block(RDMARemoteBlock *rb)
+static void network_to_dest_block(RDMADestBlock *db)
 {
-    rb->remote_host_addr = ntohll(rb->remote_host_addr);
-    rb->offset = ntohll(rb->offset);
-    rb->length = ntohll(rb->length);
-    rb->remote_rkey = ntohl(rb->remote_rkey);
+    db->remote_host_addr = ntohll(db->remote_host_addr);
+    db->offset = ntohll(db->offset);
+    db->length = ntohll(db->length);
+    db->remote_rkey = ntohl(db->remote_rkey);
 }
 
 /*
@@ -345,12 +350,16 @@ typedef struct RDMAContext {
      */
     int error_state;
     int error_reported;
+    int received_error;
 
     /*
      * Description of ram blocks used throughout the code.
      */
     RDMALocalBlocks local_ram_blocks;
-    RDMARemoteBlock *block;
+    RDMADestBlock  *dest_blocks;
+
+    /* Index of the next RAMBlock received during block registration */
+    unsigned int    next_src_index;
 
     /*
      * Migration on *destination* started.
@@ -368,14 +377,20 @@ typedef struct RDMAContext {
     GHashTable *blockmap;
 } RDMAContext;
 
-/*
- * Interface to the rest of the migration call stack.
- */
-typedef struct QEMUFileRDMA {
+#define TYPE_QIO_CHANNEL_RDMA "qio-channel-rdma"
+#define QIO_CHANNEL_RDMA(obj)                                     \
+    OBJECT_CHECK(QIOChannelRDMA, (obj), TYPE_QIO_CHANNEL_RDMA)
+
+typedef struct QIOChannelRDMA QIOChannelRDMA;
+
+
+struct QIOChannelRDMA {
+    QIOChannel parent;
     RDMAContext *rdma;
+    QEMUFile *file;
     size_t len;
-    void *file;
-} QEMUFileRDMA;
+    bool blocking; /* XXX we don't actually honour this yet */
+};
 
 /*
  * Main structure for IB Send/Recv control messages.
@@ -410,7 +425,7 @@ static void network_to_control(RDMAControlHeader *control)
  */
 typedef struct QEMU_PACKED {
     union QEMU_PACKED {
-        uint64_t current_addr;  /* offset into the ramblock of the chunk */
+        uint64_t current_addr;  /* offset into the ram_addr_t space */
         uint64_t chunk;         /* chunk to lookup if unregistering */
     } key;
     uint32_t current_index; /* which ramblock the chunk belongs to */
@@ -418,8 +433,19 @@ typedef struct QEMU_PACKED {
     uint64_t chunks;            /* how many sequential chunks to register */
 } RDMARegister;
 
-static void register_to_network(RDMARegister *reg)
+static void register_to_network(RDMAContext *rdma, RDMARegister *reg)
 {
+    RDMALocalBlock *local_block;
+    local_block  = &rdma->local_ram_blocks.block[reg->current_index];
+
+    if (local_block->is_ram_block) {
+        /*
+         * current_addr as passed in is an address in the local ram_addr_t
+         * space, we need to translate this for the destination
+         */
+        reg->key.current_addr -= local_block->offset;
+        reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset;
+    }
     reg->key.current_addr = htonll(reg->key.current_addr);
     reg->current_index = htonl(reg->current_index);
     reg->chunks = htonll(reg->chunks);
@@ -435,13 +461,19 @@ static void network_to_register(RDMARegister *reg)
 typedef struct QEMU_PACKED {
     uint32_t value;     /* if zero, we will madvise() */
     uint32_t block_idx; /* which ram block index */
-    uint64_t offset;    /* where in the remote ramblock this chunk */
+    uint64_t offset;    /* Address in remote ram_addr_t space */
     uint64_t length;    /* length of the chunk */
 } RDMACompress;
 
-static void compress_to_network(RDMACompress *comp)
+static void compress_to_network(RDMAContext *rdma, RDMACompress *comp)
 {
     comp->value = htonl(comp->value);
+    /*
+     * comp->offset as passed in is an address in the local ram_addr_t
+     * space, we need to translate this for the destination
+     */
+    comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset;
+    comp->offset += rdma->dest_blocks[comp->block_idx].offset;
     comp->block_idx = htonl(comp->block_idx);
     comp->offset = htonll(comp->offset);
     comp->length = htonll(comp->length);
@@ -510,27 +542,27 @@ static inline uint8_t *ram_chunk_end(const RDMALocalBlock *rdma_ram_block,
     return result;
 }
 
-static int rdma_add_block(RDMAContext *rdma, void *host_addr,
+static int rdma_add_block(RDMAContext *rdma, const char *block_name,
+                         void *host_addr,
                          ram_addr_t block_offset, uint64_t length)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
-    RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
-        (void *)(uintptr_t)block_offset);
+    RDMALocalBlock *block;
     RDMALocalBlock *old = local->block;
 
-    assert(block == NULL);
-
-    local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1));
+    local->block = g_new0(RDMALocalBlock, local->nb_blocks + 1);
 
     if (local->nb_blocks) {
         int x;
 
-        for (x = 0; x < local->nb_blocks; x++) {
-            g_hash_table_remove(rdma->blockmap,
-                                (void *)(uintptr_t)old[x].offset);
-            g_hash_table_insert(rdma->blockmap,
-                                (void *)(uintptr_t)old[x].offset,
-                                &local->block[x]);
+        if (rdma->blockmap) {
+            for (x = 0; x < local->nb_blocks; x++) {
+                g_hash_table_remove(rdma->blockmap,
+                                    (void *)(uintptr_t)old[x].offset);
+                g_hash_table_insert(rdma->blockmap,
+                                    (void *)(uintptr_t)old[x].offset,
+                                    &local->block[x]);
+            }
         }
         memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks);
         g_free(old);
@@ -538,22 +570,27 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
 
     block = &local->block[local->nb_blocks];
 
+    block->block_name = g_strdup(block_name);
     block->local_host_addr = host_addr;
     block->offset = block_offset;
     block->length = length;
     block->index = local->nb_blocks;
+    block->src_index = ~0U; /* Filled in by the receipt of the block list */
     block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL;
     block->transit_bitmap = bitmap_new(block->nb_chunks);
     bitmap_clear(block->transit_bitmap, 0, block->nb_chunks);
     block->unregister_bitmap = bitmap_new(block->nb_chunks);
     bitmap_clear(block->unregister_bitmap, 0, block->nb_chunks);
-    block->remote_keys = g_malloc0(block->nb_chunks * sizeof(uint32_t));
+    block->remote_keys = g_new0(uint32_t, block->nb_chunks);
 
     block->is_ram_block = local->init ? false : true;
 
-    g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+    if (rdma->blockmap) {
+        g_hash_table_insert(rdma->blockmap, (void *)(uintptr_t)block_offset, block);
+    }
 
-    trace_rdma_add_block(local->nb_blocks, (uintptr_t) block->local_host_addr,
+    trace_rdma_add_block(block_name, local->nb_blocks,
+                         (uintptr_t) block->local_host_addr,
                          block->offset, block->length,
                          (uintptr_t) (block->local_host_addr + block->length),
                          BITS_TO_LONGS(block->nb_chunks) *
@@ -570,10 +607,10 @@ static int rdma_add_block(RDMAContext *rdma, void *host_addr,
  * in advanced before the migration starts. This tells us where the RAM blocks
  * are so that we can register them individually.
  */
-static void qemu_rdma_init_one_block(void *host_addr,
+static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
     ram_addr_t block_offset, ram_addr_t length, void *opaque)
 {
-    rdma_add_block(opaque, host_addr, block_offset, length);
+    return rdma_add_block(opaque, block_name, host_addr, block_offset, length);
 }
 
 /*
@@ -586,26 +623,28 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma)
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
 
     assert(rdma->blockmap == NULL);
-    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
     memset(local, 0, sizeof *local);
     qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
     trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
-    rdma->block = (RDMARemoteBlock *) g_malloc0(sizeof(RDMARemoteBlock) *
-                        rdma->local_ram_blocks.nb_blocks);
+    rdma->dest_blocks = g_new0(RDMADestBlock,
+                               rdma->local_ram_blocks.nb_blocks);
     local->init = true;
     return 0;
 }
 
-static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
+/*
+ * Note: If used outside of cleanup, the caller must ensure that the destination
+ * block structures are also updated
+ */
+static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
-    RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
-        (void *) block_offset);
     RDMALocalBlock *old = local->block;
     int x;
 
-    assert(block);
-
+    if (rdma->blockmap) {
+        g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset);
+    }
     if (block->pmr) {
         int j;
 
@@ -635,14 +674,19 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
     g_free(block->remote_keys);
     block->remote_keys = NULL;
 
-    for (x = 0; x < local->nb_blocks; x++) {
-        g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)old[x].offset);
+    g_free(block->block_name);
+    block->block_name = NULL;
+
+    if (rdma->blockmap) {
+        for (x = 0; x < local->nb_blocks; x++) {
+            g_hash_table_remove(rdma->blockmap,
+                                (void *)(uintptr_t)old[x].offset);
+        }
     }
 
     if (local->nb_blocks > 1) {
 
-        local->block = g_malloc0(sizeof(RDMALocalBlock) *
-                                    (local->nb_blocks - 1));
+        local->block = g_new0(RDMALocalBlock, local->nb_blocks - 1);
 
         if (block->index) {
             memcpy(local->block, old, sizeof(RDMALocalBlock) * block->index);
@@ -658,8 +702,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
         local->block = NULL;
     }
 
-    trace_rdma_delete_block(local->nb_blocks,
-                           (uintptr_t)block->local_host_addr,
+    trace_rdma_delete_block(block, (uintptr_t)block->local_host_addr,
                            block->offset, block->length,
                             (uintptr_t)(block->local_host_addr + block->length),
                            BITS_TO_LONGS(block->nb_chunks) *
@@ -669,7 +712,7 @@ static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
 
     local->nb_blocks--;
 
-    if (local->nb_blocks) {
+    if (local->nb_blocks && rdma->blockmap) {
         for (x = 0; x < local->nb_blocks; x++) {
             g_hash_table_insert(rdma->blockmap,
                                 (void *)(uintptr_t)local->block[x].offset,
@@ -743,7 +786,7 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id)
  *
  *  If the source VM connects with an IPv4 address without knowing that the
  *  destination has bound to '[::]' the migration will unconditionally fail
- *  unless the management software is explicitly listening on the the IPv4
+ *  unless the management software is explicitly listening on the IPv4
  *  address while using a RoCE-based device.
  *
  *  If the source VM connects with an IPv6 address, then we're OK because we can
@@ -790,6 +833,13 @@ static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs)
 
         for (x = 0; x < num_devices; x++) {
             verbs = ibv_open_device(dev_list[x]);
+            if (!verbs) {
+                if (errno == EPERM) {
+                    continue;
+                } else {
+                    return -EINVAL;
+                }
+            }
 
             if (ibv_query_port(verbs, 1, &port_attr)) {
                 ibv_close_device(verbs);
@@ -1122,7 +1172,7 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma,
 
     /* allocate memory to store chunk MRs */
     if (!block->pmr) {
-        block->pmr = g_malloc0(block->nb_chunks * sizeof(struct ibv_mr *));
+        block->pmr = g_new0(struct ibv_mr *, block->nb_chunks);
     }
 
     /*
@@ -1215,7 +1265,7 @@ const char *print_wrid(int wrid)
 
 /*
  * Perform a non-optimized memory unregistration after every transfer
- * for demonsration purposes, only if pin-all is not requested.
+ * for demonstration purposes, only if pin-all is not requested.
  *
  * Potential optimizations:
  * 1. Start a new thread to run this function continuously
@@ -1281,7 +1331,7 @@ static int qemu_rdma_unregister_waiting(RDMAContext *rdma)
         rdma->total_registrations--;
 
         reg.key.chunk = chunk;
-        register_to_network(&reg);
+        register_to_network(rdma, &reg);
         ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
                                 &resp, NULL, NULL);
         if (ret < 0) {
@@ -1462,7 +1512,7 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested,
 
     while (1) {
         /*
-         * Coroutine doesn't start until process_incoming_migration()
+         * Coroutine doesn't start until migration_fd_process_incoming()
          * so don't yield unless we know we're running inside of a coroutine.
          */
         if (rdma->migration_started_on_destination) {
@@ -1627,6 +1677,9 @@ static int qemu_rdma_exchange_get_response(RDMAContext *rdma,
                 ", but got: %s (%d), length: %d",
                 control_desc[expecting], expecting,
                 control_desc[head->type], head->type, head->len);
+        if (head->type == RDMA_CONTROL_ERROR) {
+            rdma->received_error = true;
+        }
         return -EIO;
     }
     if (head->len > RDMA_CONTROL_MAX_BUFFER - sizeof(*head)) {
@@ -1885,10 +1938,7 @@ retry:
              * memset() + madvise() the entire chunk without RDMA.
              */
 
-            if (can_use_buffer_find_nonzero_offset((void *)(uintptr_t)sge.addr,
-                                                   length)
-                   && buffer_find_nonzero_offset((void *)(uintptr_t)sge.addr,
-                                                    length) == length) {
+            if (buffer_is_zero((void *)(uintptr_t)sge.addr, length)) {
                 RDMACompress comp = {
                                         .offset = current_addr,
                                         .value = 0,
@@ -1902,7 +1952,7 @@ retry:
                 trace_qemu_rdma_write_one_zero(chunk, sge.length,
                                                current_index, current_addr);
 
-                compress_to_network(&comp);
+                compress_to_network(rdma, &comp);
                 ret = qemu_rdma_exchange_send(rdma, &head,
                                 (uint8_t *) &comp, NULL, NULL, NULL);
 
@@ -1929,7 +1979,7 @@ retry:
             trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index,
                                               current_addr);
 
-            register_to_network(&reg);
+            register_to_network(rdma, &reg);
             ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
                                     &resp, &reg_result_idx, NULL);
             if (ret < 0) {
@@ -2156,7 +2206,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
     int ret, idx;
 
     if (rdma->cm_id && rdma->connected) {
-        if (rdma->error_state) {
+        if (rdma->error_state && !rdma->received_error) {
             RDMAControlHeader head = { .len = 0,
                                        .type = RDMA_CONTROL_ERROR,
                                        .repeat = 1,
@@ -2177,8 +2227,8 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
         rdma->connected = false;
     }
 
-    g_free(rdma->block);
-    rdma->block = NULL;
+    g_free(rdma->dest_blocks);
+    rdma->dest_blocks = NULL;
 
     for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
         if (rdma->wr_data[idx].control_mr) {
@@ -2190,10 +2240,14 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
 
     if (rdma->local_ram_blocks.block) {
         while (rdma->local_ram_blocks.nb_blocks) {
-            rdma_delete_block(rdma, rdma->local_ram_blocks.block->offset);
+            rdma_delete_block(rdma, &rdma->local_ram_blocks.block[0]);
         }
     }
 
+    if (rdma->qp) {
+        rdma_destroy_qp(rdma->cm_id);
+        rdma->qp = NULL;
+    }
     if (rdma->cq) {
         ibv_destroy_cq(rdma->cq);
         rdma->cq = NULL;
@@ -2206,18 +2260,14 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
         ibv_dealloc_pd(rdma->pd);
         rdma->pd = NULL;
     }
-    if (rdma->listen_id) {
-        rdma_destroy_id(rdma->listen_id);
-        rdma->listen_id = NULL;
-    }
     if (rdma->cm_id) {
-        if (rdma->qp) {
-            rdma_destroy_qp(rdma->cm_id);
-            rdma->qp = NULL;
-        }
         rdma_destroy_id(rdma->cm_id);
         rdma->cm_id = NULL;
     }
+    if (rdma->listen_id) {
+        rdma_destroy_id(rdma->listen_id);
+        rdma->listen_id = NULL;
+    }
     if (rdma->channel) {
         rdma_destroy_event_channel(rdma->channel);
         rdma->channel = NULL;
@@ -2263,6 +2313,14 @@ static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all)
         goto err_rdma_source_init;
     }
 
+    /* Build the hash that maps from offset to RAMBlock */
+    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
+    for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
+        g_hash_table_insert(rdma->blockmap,
+                (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
+                &rdma->local_ram_blocks.block[idx]);
+    }
+
     for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
         ret = qemu_rdma_reg_control(rdma, idx);
         if (ret) {
@@ -2309,8 +2367,6 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
     if (ret) {
         perror("rdma_connect");
         ERROR(errp, "connecting to destination!");
-        rdma_destroy_id(rdma->cm_id);
-        rdma->cm_id = NULL;
         goto err_rdma_source_connect;
     }
 
@@ -2319,8 +2375,6 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
         perror("rdma_get_cm_event after rdma_connect");
         ERROR(errp, "connecting to destination!");
         rdma_ack_cm_event(cm_event);
-        rdma_destroy_id(rdma->cm_id);
-        rdma->cm_id = NULL;
         goto err_rdma_source_connect;
     }
 
@@ -2328,8 +2382,6 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp)
         perror("rdma_get_cm_event != EVENT_ESTABLISHED after rdma_connect");
         ERROR(errp, "connecting to destination!");
         rdma_ack_cm_event(cm_event);
-        rdma_destroy_id(rdma->cm_id);
-        rdma->cm_id = NULL;
         goto err_rdma_source_connect;
     }
     rdma->connected = true;
@@ -2450,8 +2502,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
     InetSocketAddress *addr;
 
     if (host_port) {
-        rdma = g_malloc0(sizeof(RDMAContext));
-        memset(rdma, 0, sizeof(RDMAContext));
+        rdma = g_new0(RDMAContext, 1);
         rdma->current_index = -1;
         rdma->current_chunk = -1;
 
@@ -2476,15 +2527,19 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp)
  * SEND messages for control only.
  * VM's ram is handled with regular RDMA messages.
  */
-static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
-                                int64_t pos, int size)
-{
-    QEMUFileRDMA *r = opaque;
-    QEMUFile *f = r->file;
-    RDMAContext *rdma = r->rdma;
-    size_t remaining = size;
-    uint8_t * data = (void *) buf;
+static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
+                                       const struct iovec *iov,
+                                       size_t niov,
+                                       int *fds,
+                                       size_t nfds,
+                                       Error **errp)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    QEMUFile *f = rioc->file;
+    RDMAContext *rdma = rioc->rdma;
     int ret;
+    ssize_t done = 0;
+    size_t i;
 
     CHECK_ERROR_STATE();
 
@@ -2498,30 +2553,35 @@ static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf,
         return ret;
     }
 
-    while (remaining) {
-        RDMAControlHeader head;
+    for (i = 0; i < niov; i++) {
+        size_t remaining = iov[i].iov_len;
+        uint8_t * data = (void *)iov[i].iov_base;
+        while (remaining) {
+            RDMAControlHeader head;
 
-        r->len = MIN(remaining, RDMA_SEND_INCREMENT);
-        remaining -= r->len;
+            rioc->len = MIN(remaining, RDMA_SEND_INCREMENT);
+            remaining -= rioc->len;
 
-        head.len = r->len;
-        head.type = RDMA_CONTROL_QEMU_FILE;
+            head.len = rioc->len;
+            head.type = RDMA_CONTROL_QEMU_FILE;
 
-        ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
+            ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL);
 
-        if (ret < 0) {
-            rdma->error_state = ret;
-            return ret;
-        }
+            if (ret < 0) {
+                rdma->error_state = ret;
+                return ret;
+            }
 
-        data += r->len;
+            data += rioc->len;
+            done += rioc->len;
+        }
     }
 
-    return size;
+    return done;
 }
 
 static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
-                             int size, int idx)
+                             size_t size, int idx)
 {
     size_t len = 0;
 
@@ -2542,41 +2602,74 @@ static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf,
  * RDMA links don't use bytestreams, so we have to
  * return bytes to QEMUFile opportunistically.
  */
-static int qemu_rdma_get_buffer(void *opaque, uint8_t *buf,
-                                int64_t pos, int size)
-{
-    QEMUFileRDMA *r = opaque;
-    RDMAContext *rdma = r->rdma;
+static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
+                                      const struct iovec *iov,
+                                      size_t niov,
+                                      int **fds,
+                                      size_t *nfds,
+                                      Error **errp)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    RDMAContext *rdma = rioc->rdma;
     RDMAControlHeader head;
     int ret = 0;
+    ssize_t i;
+    size_t done = 0;
 
     CHECK_ERROR_STATE();
 
-    /*
-     * First, we hold on to the last SEND message we
-     * were given and dish out the bytes until we run
-     * out of bytes.
-     */
-    r->len = qemu_rdma_fill(r->rdma, buf, size, 0);
-    if (r->len) {
-        return r->len;
-    }
+    for (i = 0; i < niov; i++) {
+        size_t want = iov[i].iov_len;
+        uint8_t *data = (void *)iov[i].iov_base;
 
-    /*
-     * Once we run out, we block and wait for another
-     * SEND message to arrive.
-     */
-    ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
+        /*
+         * First, we hold on to the last SEND message we
+         * were given and dish out the bytes until we run
+         * out of bytes.
+         */
+        ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
+        done += ret;
+        want -= ret;
+        /* Got what we needed, so go to next iovec */
+        if (want == 0) {
+            continue;
+        }
 
-    if (ret < 0) {
-        rdma->error_state = ret;
-        return ret;
-    }
+        /* If we got any data so far, then don't wait
+         * for more, just return what we have */
+        if (done > 0) {
+            break;
+        }
 
-    /*
-     * SEND was received with new bytes, now try again.
-     */
-    return qemu_rdma_fill(r->rdma, buf, size, 0);
+
+        /* We've got nothing at all, so lets wait for
+         * more to arrive
+         */
+        ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_QEMU_FILE);
+
+        if (ret < 0) {
+            rdma->error_state = ret;
+            return ret;
+        }
+
+        /*
+         * SEND was received with new bytes, now try again.
+         */
+        ret = qemu_rdma_fill(rioc->rdma, data, want, 0);
+        done += ret;
+        want -= ret;
+
+        /* Still didn't get enough, so lets just return */
+        if (want) {
+            if (done == 0) {
+                return QIO_CHANNEL_ERR_BLOCK;
+            } else {
+                break;
+            }
+        }
+    }
+    rioc->len = done;
+    return rioc->len;
 }
 
 /*
@@ -2603,15 +2696,125 @@ static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma)
     return 0;
 }
 
-static int qemu_rdma_close(void *opaque)
+
+static int qio_channel_rdma_set_blocking(QIOChannel *ioc,
+                                         bool blocking,
+                                         Error **errp)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    /* XXX we should make readv/writev actually honour this :-) */
+    rioc->blocking = blocking;
+    return 0;
+}
+
+
+typedef struct QIOChannelRDMASource QIOChannelRDMASource;
+struct QIOChannelRDMASource {
+    GSource parent;
+    QIOChannelRDMA *rioc;
+    GIOCondition condition;
+};
+
+static gboolean
+qio_channel_rdma_source_prepare(GSource *source,
+                                gint *timeout)
+{
+    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
+    RDMAContext *rdma = rsource->rioc->rdma;
+    GIOCondition cond = 0;
+    *timeout = -1;
+
+    if (rdma->wr_data[0].control_len) {
+        cond |= G_IO_IN;
+    }
+    cond |= G_IO_OUT;
+
+    return cond & rsource->condition;
+}
+
+static gboolean
+qio_channel_rdma_source_check(GSource *source)
+{
+    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
+    RDMAContext *rdma = rsource->rioc->rdma;
+    GIOCondition cond = 0;
+
+    if (rdma->wr_data[0].control_len) {
+        cond |= G_IO_IN;
+    }
+    cond |= G_IO_OUT;
+
+    return cond & rsource->condition;
+}
+
+static gboolean
+qio_channel_rdma_source_dispatch(GSource *source,
+                                 GSourceFunc callback,
+                                 gpointer user_data)
+{
+    QIOChannelFunc func = (QIOChannelFunc)callback;
+    QIOChannelRDMASource *rsource = (QIOChannelRDMASource *)source;
+    RDMAContext *rdma = rsource->rioc->rdma;
+    GIOCondition cond = 0;
+
+    if (rdma->wr_data[0].control_len) {
+        cond |= G_IO_IN;
+    }
+    cond |= G_IO_OUT;
+
+    return (*func)(QIO_CHANNEL(rsource->rioc),
+                   (cond & rsource->condition),
+                   user_data);
+}
+
+static void
+qio_channel_rdma_source_finalize(GSource *source)
 {
+    QIOChannelRDMASource *ssource = (QIOChannelRDMASource *)source;
+
+    object_unref(OBJECT(ssource->rioc));
+}
+
+GSourceFuncs qio_channel_rdma_source_funcs = {
+    qio_channel_rdma_source_prepare,
+    qio_channel_rdma_source_check,
+    qio_channel_rdma_source_dispatch,
+    qio_channel_rdma_source_finalize
+};
+
+static GSource *qio_channel_rdma_create_watch(QIOChannel *ioc,
+                                              GIOCondition condition)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+    QIOChannelRDMASource *ssource;
+    GSource *source;
+
+    source = g_source_new(&qio_channel_rdma_source_funcs,
+                          sizeof(QIOChannelRDMASource));
+    ssource = (QIOChannelRDMASource *)source;
+
+    ssource->rioc = rioc;
+    object_ref(OBJECT(rioc));
+
+    ssource->condition = condition;
+
+    return source;
+}
+
+
+static int qio_channel_rdma_close(QIOChannel *ioc,
+                                  Error **errp)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
     trace_qemu_rdma_close();
-    QEMUFileRDMA *r = opaque;
-    if (r->rdma) {
-        qemu_rdma_cleanup(r->rdma);
-        g_free(r->rdma);
+    if (rioc->rdma) {
+        if (!rioc->rdma->error_state) {
+            rioc->rdma->error_state = qemu_file_get_error(rioc->file);
+        }
+        qemu_rdma_cleanup(rioc->rdma);
+        g_free(rioc->rdma);
+        rioc->rdma = NULL;
     }
-    g_free(r);
     return 0;
 }
 
@@ -2653,8 +2856,8 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
                                   ram_addr_t block_offset, ram_addr_t offset,
                                   size_t size, uint64_t *bytes_sent)
 {
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
     int ret;
 
     CHECK_ERROR_STATE();
@@ -2840,7 +3043,7 @@ static int qemu_rdma_accept(RDMAContext *rdma)
         }
     }
 
-    qemu_set_fd_handler2(rdma->channel->fd, NULL, NULL, NULL, NULL);
+    qemu_set_fd_handler(rdma->channel->fd, NULL, NULL, NULL);
 
     ret = rdma_accept(rdma->cm_id, &conn_param);
     if (ret) {
@@ -2879,6 +3082,14 @@ err_rdma_dest_wait:
     return ret;
 }
 
+static int dest_ram_sort_func(const void *a, const void *b)
+{
+    unsigned int a_index = ((const RDMALocalBlock *)a)->src_index;
+    unsigned int b_index = ((const RDMALocalBlock *)b)->src_index;
+
+    return (a_index < b_index) ? -1 : (a_index != b_index);
+}
+
 /*
  * During each iteration of the migration, we listen for instructions
  * by the source VM to perform dynamic page registrations before they
@@ -2888,8 +3099,7 @@ err_rdma_dest_wait:
  *
  * Keep doing this until the source tells us to stop.
  */
-static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
-                                         uint64_t flags)
+static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
 {
     RDMAControlHeader reg_resp = { .len = sizeof(RDMARegisterResult),
                                .type = RDMA_CONTROL_REGISTER_RESULT,
@@ -2901,8 +3111,8 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
                              };
     RDMAControlHeader blocks = { .type = RDMA_CONTROL_RAM_BLOCKS_RESULT,
                                  .repeat = 1 };
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
     RDMAControlHeader head;
     RDMARegister *reg, *registers;
@@ -2919,7 +3129,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
     CHECK_ERROR_STATE();
 
     do {
-        trace_qemu_rdma_registration_handle_wait(flags);
+        trace_qemu_rdma_registration_handle_wait();
 
         ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE);
 
@@ -2942,6 +3152,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
             trace_qemu_rdma_registration_handle_compress(comp->length,
                                                          comp->block_idx,
                                                          comp->offset);
+            if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) {
+                error_report("rdma: 'compress' bad block index %u (vs %d)",
+                             (unsigned int)comp->block_idx,
+                             rdma->local_ram_blocks.nb_blocks);
+                ret = -EIO;
+                goto out;
+            }
             block = &(rdma->local_ram_blocks.block[comp->block_idx]);
 
             host_addr = block->local_host_addr +
@@ -2957,6 +3174,13 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
         case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
             trace_qemu_rdma_registration_handle_ram_blocks();
 
+            /* Sort our local RAM Block list so it's the same as the source,
+             * we can do this since we've filled in a src_index in the list
+             * as we received the RAMBlock list earlier.
+             */
+            qsort(rdma->local_ram_blocks.block,
+                  rdma->local_ram_blocks.nb_blocks,
+                  sizeof(RDMALocalBlock), dest_ram_sort_func);
             if (rdma->pin_all) {
                 ret = qemu_rdma_reg_whole_ram_blocks(rdma);
                 if (ret) {
@@ -2973,25 +3197,31 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
              * their "local" descriptions with what was sent.
              */
             for (i = 0; i < local->nb_blocks; i++) {
-                rdma->block[i].remote_host_addr =
+                rdma->dest_blocks[i].remote_host_addr =
                     (uintptr_t)(local->block[i].local_host_addr);
 
                 if (rdma->pin_all) {
-                    rdma->block[i].remote_rkey = local->block[i].mr->rkey;
+                    rdma->dest_blocks[i].remote_rkey = local->block[i].mr->rkey;
                 }
 
-                rdma->block[i].offset = local->block[i].offset;
-                rdma->block[i].length = local->block[i].length;
+                rdma->dest_blocks[i].offset = local->block[i].offset;
+                rdma->dest_blocks[i].length = local->block[i].length;
 
-                remote_block_to_network(&rdma->block[i]);
+                dest_block_to_network(&rdma->dest_blocks[i]);
+                trace_qemu_rdma_registration_handle_ram_blocks_loop(
+                    local->block[i].block_name,
+                    local->block[i].offset,
+                    local->block[i].length,
+                    local->block[i].local_host_addr,
+                    local->block[i].src_index);
             }
 
             blocks.len = rdma->local_ram_blocks.nb_blocks
-                                                * sizeof(RDMARemoteBlock);
+                                                * sizeof(RDMADestBlock);
 
 
             ret = qemu_rdma_post_send_control(rdma,
-                                        (uint8_t *) rdma->block, &blocks);
+                                        (uint8_t *) rdma->dest_blocks, &blocks);
 
             if (ret < 0) {
                 error_report("rdma migration: error sending remote info");
@@ -3017,8 +3247,23 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
                 trace_qemu_rdma_registration_handle_register_loop(count,
                          reg->current_index, reg->key.current_addr, reg->chunks);
 
+                if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) {
+                    error_report("rdma: 'register' bad block index %u (vs %d)",
+                                 (unsigned int)reg->current_index,
+                                 rdma->local_ram_blocks.nb_blocks);
+                    ret = -ENOENT;
+                    goto out;
+                }
                 block = &(rdma->local_ram_blocks.block[reg->current_index]);
                 if (block->is_ram_block) {
+                    if (block->offset > reg->key.current_addr) {
+                        error_report("rdma: bad register address for block %s"
+                            " offset: %" PRIx64 " current_addr: %" PRIx64,
+                            block->block_name, block->offset,
+                            reg->key.current_addr);
+                        ret = -ERANGE;
+                        goto out;
+                    }
                     host_addr = (block->local_host_addr +
                                 (reg->key.current_addr - block->offset));
                     chunk = ram_chunk_index(block->local_host_addr,
@@ -3027,6 +3272,14 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
                     chunk = reg->key.chunk;
                     host_addr = block->local_host_addr +
                         (reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT));
+                    /* Check for particularly bad chunk value */
+                    if (host_addr < (void *)block->local_host_addr) {
+                        error_report("rdma: bad chunk for block %s"
+                            " chunk: %" PRIx64,
+                            block->block_name, reg->key.chunk);
+                        ret = -ERANGE;
+                        goto out;
+                    }
                 }
                 chunk_start = ram_chunk_start(block, chunk);
                 chunk_end = ram_chunk_end(block, chunk + reg->chunks);
@@ -3107,11 +3360,60 @@ out:
     return ret;
 }
 
+/* Destination:
+ * Called via a ram_control_load_hook during the initial RAM load section which
+ * lists the RAMBlocks by name.  This lets us know the order of the RAMBlocks
+ * on the source.
+ * We've already built our local RAMBlock list, but not yet sent the list to
+ * the source.
+ */
+static int
+rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name)
+{
+    RDMAContext *rdma = rioc->rdma;
+    int curr;
+    int found = -1;
+
+    /* Find the matching RAMBlock in our local list */
+    for (curr = 0; curr < rdma->local_ram_blocks.nb_blocks; curr++) {
+        if (!strcmp(rdma->local_ram_blocks.block[curr].block_name, name)) {
+            found = curr;
+            break;
+        }
+    }
+
+    if (found == -1) {
+        error_report("RAMBlock '%s' not found on destination", name);
+        return -ENOENT;
+    }
+
+    rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index;
+    trace_rdma_block_notification_handle(name, rdma->next_src_index);
+    rdma->next_src_index++;
+
+    return 0;
+}
+
+static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
+{
+    switch (flags) {
+    case RAM_CONTROL_BLOCK_REG:
+        return rdma_block_notification_handle(opaque, data);
+
+    case RAM_CONTROL_HOOK:
+        return qemu_rdma_registration_handle(f, opaque);
+
+    default:
+        /* Shouldn't be called with any other values */
+        abort();
+    }
+}
+
 static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
-                                        uint64_t flags)
+                                        uint64_t flags, void *data)
 {
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
 
     CHECK_ERROR_STATE();
 
@@ -3127,11 +3429,11 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
  * First, flush writes, if any.
  */
 static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
-                                       uint64_t flags)
+                                       uint64_t flags, void *data)
 {
     Error *local_err = NULL, **errp = &local_err;
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
+    RDMAContext *rdma = rioc->rdma;
     RDMAControlHeader head = { .len = 0, .repeat = 1 };
     int ret = 0;
 
@@ -3147,7 +3449,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
     if (flags == RAM_CONTROL_SETUP) {
         RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
         RDMALocalBlocks *local = &rdma->local_ram_blocks;
-        int reg_result_idx, i, j, nb_remote_blocks;
+        int reg_result_idx, i, nb_dest_blocks;
 
         head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
         trace_qemu_rdma_registration_stop_ram();
@@ -3168,7 +3470,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
             return ret;
         }
 
-        nb_remote_blocks = resp.len / sizeof(RDMARemoteBlock);
+        nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
 
         /*
          * The protocol uses two different sets of rkeys (mutually exclusive):
@@ -3182,43 +3484,33 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
          * and then propagates the remote ram block descriptions to his local copy.
          */
 
-        if (local->nb_blocks != nb_remote_blocks) {
-            ERROR(errp, "ram blocks mismatch #1! "
+        if (local->nb_blocks != nb_dest_blocks) {
+            ERROR(errp, "ram blocks mismatch (Number of blocks %d vs %d) "
                         "Your QEMU command line parameters are probably "
-                        "not identical on both the source and destination.");
+                        "not identical on both the source and destination.",
+                        local->nb_blocks, nb_dest_blocks);
+            rdma->error_state = -EINVAL;
             return -EINVAL;
         }
 
         qemu_rdma_move_header(rdma, reg_result_idx, &resp);
-        memcpy(rdma->block,
+        memcpy(rdma->dest_blocks,
             rdma->wr_data[reg_result_idx].control_curr, resp.len);
-        for (i = 0; i < nb_remote_blocks; i++) {
-            network_to_remote_block(&rdma->block[i]);
-
-            /* search local ram blocks */
-            for (j = 0; j < local->nb_blocks; j++) {
-                if (rdma->block[i].offset != local->block[j].offset) {
-                    continue;
-                }
-
-                if (rdma->block[i].length != local->block[j].length) {
-                    ERROR(errp, "ram blocks mismatch #2! "
-                        "Your QEMU command line parameters are probably "
-                        "not identical on both the source and destination.");
-                    return -EINVAL;
-                }
-                local->block[j].remote_host_addr =
-                        rdma->block[i].remote_host_addr;
-                local->block[j].remote_rkey = rdma->block[i].remote_rkey;
-                break;
-            }
-
-            if (j >= local->nb_blocks) {
-                ERROR(errp, "ram blocks mismatch #3! "
-                        "Your QEMU command line parameters are probably "
-                        "not identical on both the source and destination.");
+        for (i = 0; i < nb_dest_blocks; i++) {
+            network_to_dest_block(&rdma->dest_blocks[i]);
+
+            /* We require that the blocks are in the same order */
+            if (rdma->dest_blocks[i].length != local->block[i].length) {
+                ERROR(errp, "Block %s/%d has a different length %" PRIu64
+                            "vs %" PRIu64, local->block[i].block_name, i,
+                            local->block[i].length,
+                            rdma->dest_blocks[i].length);
+                rdma->error_state = -EINVAL;
                 return -EINVAL;
             }
+            local->block[i].remote_host_addr =
+                    rdma->dest_blocks[i].remote_host_addr;
+            local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
         }
     }
 
@@ -3237,46 +3529,74 @@ err:
     return ret;
 }
 
-static int qemu_rdma_get_fd(void *opaque)
-{
-    QEMUFileRDMA *rfile = opaque;
-    RDMAContext *rdma = rfile->rdma;
-
-    return rdma->comp_channel->fd;
-}
-
-static const QEMUFileOps rdma_read_ops = {
-    .get_buffer    = qemu_rdma_get_buffer,
-    .get_fd        = qemu_rdma_get_fd,
-    .close         = qemu_rdma_close,
-    .hook_ram_load = qemu_rdma_registration_handle,
+static const QEMUFileHooks rdma_read_hooks = {
+    .hook_ram_load = rdma_load_hook,
 };
 
-static const QEMUFileOps rdma_write_ops = {
-    .put_buffer         = qemu_rdma_put_buffer,
-    .close              = qemu_rdma_close,
+static const QEMUFileHooks rdma_write_hooks = {
     .before_ram_iterate = qemu_rdma_registration_start,
     .after_ram_iterate  = qemu_rdma_registration_stop,
     .save_page          = qemu_rdma_save_page,
 };
 
-static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
+
+static void qio_channel_rdma_finalize(Object *obj)
+{
+    QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(obj);
+    if (rioc->rdma) {
+        qemu_rdma_cleanup(rioc->rdma);
+        g_free(rioc->rdma);
+        rioc->rdma = NULL;
+    }
+}
+
+static void qio_channel_rdma_class_init(ObjectClass *klass,
+                                        void *class_data G_GNUC_UNUSED)
 {
-    QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA));
+    QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
+
+    ioc_klass->io_writev = qio_channel_rdma_writev;
+    ioc_klass->io_readv = qio_channel_rdma_readv;
+    ioc_klass->io_set_blocking = qio_channel_rdma_set_blocking;
+    ioc_klass->io_close = qio_channel_rdma_close;
+    ioc_klass->io_create_watch = qio_channel_rdma_create_watch;
+}
+
+static const TypeInfo qio_channel_rdma_info = {
+    .parent = TYPE_QIO_CHANNEL,
+    .name = TYPE_QIO_CHANNEL_RDMA,
+    .instance_size = sizeof(QIOChannelRDMA),
+    .instance_finalize = qio_channel_rdma_finalize,
+    .class_init = qio_channel_rdma_class_init,
+};
+
+static void qio_channel_rdma_register_types(void)
+{
+    type_register_static(&qio_channel_rdma_info);
+}
+
+type_init(qio_channel_rdma_register_types);
+
+static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
+{
+    QIOChannelRDMA *rioc;
 
     if (qemu_file_mode_is_not_valid(mode)) {
         return NULL;
     }
 
-    r->rdma = rdma;
+    rioc = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA));
+    rioc->rdma = rdma;
 
     if (mode[0] == 'w') {
-        r->file = qemu_fopen_ops(r, &rdma_write_ops);
+        rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc));
+        qemu_file_set_hooks(rioc->file, &rdma_write_hooks);
     } else {
-        r->file = qemu_fopen_ops(r, &rdma_read_ops);
+        rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc));
+        qemu_file_set_hooks(rioc->file, &rdma_read_hooks);
     }
 
-    return r->file;
+    return rioc->file;
 }
 
 static void rdma_accept_incoming_migration(void *opaque)
@@ -3286,7 +3606,7 @@ static void rdma_accept_incoming_migration(void *opaque)
     QEMUFile *f;
     Error *local_err = NULL, **errp = &local_err;
 
-    trace_qemu_dma_accept_incoming_migration();
+    trace_qemu_rdma_accept_incoming_migration();
     ret = qemu_rdma_accept(rdma);
 
     if (ret) {
@@ -3294,7 +3614,7 @@ static void rdma_accept_incoming_migration(void *opaque)
         return;
     }
 
-    trace_qemu_dma_accept_incoming_migration_accepted();
+    trace_qemu_rdma_accept_incoming_migration_accepted();
 
     f = qemu_fopen_rdma(rdma, "rb");
     if (f == NULL) {
@@ -3304,7 +3624,7 @@ static void rdma_accept_incoming_migration(void *opaque)
     }
 
     rdma->migration_started_on_destination = 1;
-    process_incoming_migration(f);
+    migration_fd_process_incoming(f);
 }
 
 void rdma_start_incoming_migration(const char *host_port, Error **errp)
@@ -3337,9 +3657,8 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp)
 
     trace_rdma_start_incoming_migration_after_rdma_listen();
 
-    qemu_set_fd_handler2(rdma->channel->fd, NULL,
-                         rdma_accept_incoming_migration, NULL,
-                            (void *)(intptr_t) rdma);
+    qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
+                        NULL, (void *)(intptr_t)rdma);
     return;
 err:
     error_propagate(errp, local_err);
@@ -3350,16 +3669,14 @@ void rdma_start_outgoing_migration(void *opaque,
                             const char *host_port, Error **errp)
 {
     MigrationState *s = opaque;
-    Error *local_err = NULL, **temp = &local_err;
-    RDMAContext *rdma = qemu_rdma_data_init(host_port, &local_err);
+    RDMAContext *rdma = qemu_rdma_data_init(host_port, errp);
     int ret = 0;
 
     if (rdma == NULL) {
-        ERROR(temp, "Failed to initialize RDMA data structures! %d", ret);
         goto err;
     }
 
-    ret = qemu_rdma_source_init(rdma, &local_err,
+    ret = qemu_rdma_source_init(rdma, errp,
         s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
 
     if (ret) {
@@ -3367,7 +3684,7 @@ void rdma_start_outgoing_migration(void *opaque,
     }
 
     trace_rdma_start_outgoing_migration_after_rdma_source_init();
-    ret = qemu_rdma_connect(rdma, &local_err);
+    ret = qemu_rdma_connect(rdma, errp);
 
     if (ret) {
         goto err;
@@ -3375,11 +3692,9 @@ void rdma_start_outgoing_migration(void *opaque,
 
     trace_rdma_start_outgoing_migration_after_rdma_connect();
 
-    s->file = qemu_fopen_rdma(rdma, "wb");
+    s->to_dst_file = qemu_fopen_rdma(rdma, "wb");
     migrate_fd_connect(s);
     return;
 err:
-    error_propagate(errp, local_err);
     g_free(rdma);
-    migrate_fd_error(s);
 }