migration/ram.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  * Copyright (c) 2011-2015 Red Hat Inc
   6  *
   7  * Authors:
   8  *  Juan Quintela <quintela@redhat.com>
   9  *
  10  * Permission is hereby granted, free of charge, to any person obtaining a copy
  11  * of this software and associated documentation files (the "Software"), to deal
  12  * in the Software without restriction, including without limitation the rights
  13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14  * copies of the Software, and to permit persons to whom the Software is
  15  * furnished to do so, subject to the following conditions:
  16  *
  17  * The above copyright notice and this permission notice shall be included in
  18  * all copies or substantial portions of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26  * THE SOFTWARE.
  27  */
  28 #include "qemu/osdep.h"
  29 #include "qemu-common.h"
  30 #include "cpu.h"
  31 #include <zlib.h>
  32 #include "qapi-event.h"
  33 #include "qemu/cutils.h"
  34 #include "qemu/bitops.h"
  35 #include "qemu/bitmap.h"
  36 #include "qemu/timer.h"
  37 #include "qemu/main-loop.h"
  38 #include "migration/migration.h"
  39 #include "migration/postcopy-ram.h"
  40 #include "exec/address-spaces.h"
  41 #include "migration/page_cache.h"
  42 #include "qemu/error-report.h"
  43 #include "trace.h"
  44 #include "exec/ram_addr.h"
  45 #include "qemu/rcu_queue.h"
  46 #include "migration/colo.h"
  47
  48 /***********************************************************/
  49 /* ram save/restore */
  50
  51 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
  52 #define RAM_SAVE_FLAG_COMPRESS 0x02
  53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
  54 #define RAM_SAVE_FLAG_PAGE     0x08
  55 #define RAM_SAVE_FLAG_EOS      0x10
  56 #define RAM_SAVE_FLAG_CONTINUE 0x20
  57 #define RAM_SAVE_FLAG_XBZRLE   0x40
  58 /* 0x80 is reserved in migration.h start with 0x100 next */
  59 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
  60
  61 static uint8_t *ZERO_TARGET_PAGE;
  62
  63 static inline bool is_zero_range(uint8_t *p, uint64_t size)
  64 {
  65     return buffer_is_zero(p, size);
  66 }
  67
  68 /* struct contains XBZRLE cache and a static page
  69    used by the compression */
  70 static struct {
  71     /* buffer used for XBZRLE encoding */
  72     uint8_t *encoded_buf;
  73     /* buffer for storing page content */
  74     uint8_t *current_buf;
  75     /* Cache for XBZRLE, Protected by lock. */
  76     PageCache *cache;
  77     QemuMutex lock;
  78 } XBZRLE;
  79
  80 /* buffer used for XBZRLE decoding */
  81 static uint8_t *xbzrle_decoded_buf;
  82
  83 static void XBZRLE_cache_lock(void)
  84 {
  85     if (migrate_use_xbzrle())
  86         qemu_mutex_lock(&XBZRLE.lock);
  87 }
  88
  89 static void XBZRLE_cache_unlock(void)
  90 {
  91     if (migrate_use_xbzrle())
  92         qemu_mutex_unlock(&XBZRLE.lock);
  93 }
  94
  95 /**
  96  * xbzrle_cache_resize: resize the xbzrle cache
  97  *
  98  * This function is called from qmp_migrate_set_cache_size in main
  99  * thread, possibly while a migration is in progress.  A running
 100  * migration may be using the cache and might finish during this call,
 101  * hence changes to the cache are protected by XBZRLE.lock().
 102  *
 103  * Returns the new_size or negative in case of error.
 104  *
 105  * @new_size: new cache size
 106  */
 107 int64_t xbzrle_cache_resize(int64_t new_size)
 108 {
 109     PageCache *new_cache;
 110     int64_t ret;
 111
 112     if (new_size < TARGET_PAGE_SIZE) {
 113         return -1;
 114     }
 115
 116     XBZRLE_cache_lock();
 117
 118     if (XBZRLE.cache != NULL) {
 119         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
 120             goto out_new_size;
 121         }
 122         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
 123                                         TARGET_PAGE_SIZE);
 124         if (!new_cache) {
 125             error_report("Error creating cache");
 126             ret = -1;
 127             goto out;
 128         }
 129
 130         cache_fini(XBZRLE.cache);
 131         XBZRLE.cache = new_cache;
 132     }
 133
 134 out_new_size:
 135     ret = pow2floor(new_size);
 136 out:
 137     XBZRLE_cache_unlock();
 138     return ret;
 139 }
 140
 141 struct RAMBitmap {
 142     struct rcu_head rcu;
 143     /* Main migration bitmap */
 144     unsigned long *bmap;
 145     /* bitmap of pages that haven't been sent even once
 146      * only maintained and used in postcopy at the moment
 147      * where it's used to send the dirtymap at the start
 148      * of the postcopy phase
 149      */
 150     unsigned long *unsentmap;
 151 };
 152 typedef struct RAMBitmap RAMBitmap;
 153
 154 /*
 155  * An outstanding page request, on the source, having been received
 156  * and queued
 157  */
 158 struct RAMSrcPageRequest {
 159     RAMBlock *rb;
 160     hwaddr    offset;
 161     hwaddr    len;
 162
 163     QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
 164 };
 165
 166 /* State of RAM for migration */
 167 struct RAMState {
 168     /* QEMUFile used for this migration */
 169     QEMUFile *f;
 170     /* Last block that we have visited searching for dirty pages */
 171     RAMBlock *last_seen_block;
 172     /* Last block from where we have sent data */
 173     RAMBlock *last_sent_block;
 174     /* Last offset we have sent data from */
 175     ram_addr_t last_offset;
 176     /* last ram version we have seen */
 177     uint32_t last_version;
 178     /* We are in the first round */
 179     bool ram_bulk_stage;
 180     /* How many times we have dirty too many pages */
 181     int dirty_rate_high_cnt;
 182     /* How many times we have synchronized the bitmap */
 183     uint64_t bitmap_sync_count;
 184     /* these variables are used for bitmap sync */
 185     /* last time we did a full bitmap_sync */
 186     int64_t time_last_bitmap_sync;
 187     /* bytes transferred at start_time */
 188     uint64_t bytes_xfer_prev;
 189     /* number of dirty pages since start_time */
 190     uint64_t num_dirty_pages_period;
 191     /* xbzrle misses since the beginning of the period */
 192     uint64_t xbzrle_cache_miss_prev;
 193     /* number of iterations at the beginning of period */
 194     uint64_t iterations_prev;
 195     /* Accounting fields */
 196     /* number of zero pages.  It used to be pages filled by the same char. */
 197     uint64_t zero_pages;
 198     /* number of normal transferred pages */
 199     uint64_t norm_pages;
 200     /* Iterations since start */
 201     uint64_t iterations;
 202     /* xbzrle transmitted bytes.  Notice that this is with
 203      * compression, they can't be calculated from the pages */
 204     uint64_t xbzrle_bytes;
 205     /* xbzrle transmmited pages */
 206     uint64_t xbzrle_pages;
 207     /* xbzrle number of cache miss */
 208     uint64_t xbzrle_cache_miss;
 209     /* xbzrle miss rate */
 210     double xbzrle_cache_miss_rate;
 211     /* xbzrle number of overflows */
 212     uint64_t xbzrle_overflows;
 213     /* number of dirty bits in the bitmap */
 214     uint64_t migration_dirty_pages;
 215     /* total number of bytes transferred */
 216     uint64_t bytes_transferred;
 217     /* number of dirtied pages in the last second */
 218     uint64_t dirty_pages_rate;
 219     /* Count of requests incoming from destination */
 220     uint64_t postcopy_requests;
 221     /* protects modification of the bitmap */
 222     QemuMutex bitmap_mutex;
 223     /* Ram Bitmap protected by RCU */
 224     RAMBitmap *ram_bitmap;
 225     /* The RAMBlock used in the last src_page_requests */
 226     RAMBlock *last_req_rb;
 227     /* Queue of outstanding page requests from the destination */
 228     QemuMutex src_page_req_mutex;
 229     QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
 230 };
 231 typedef struct RAMState RAMState;
 232
 233 static RAMState ram_state;
 234
 235 uint64_t dup_mig_pages_transferred(void)
 236 {
 237     return ram_state.zero_pages;
 238 }
 239
 240 uint64_t norm_mig_pages_transferred(void)
 241 {
 242     return ram_state.norm_pages;
 243 }
 244
 245 uint64_t xbzrle_mig_bytes_transferred(void)
 246 {
 247     return ram_state.xbzrle_bytes;
 248 }
 249
 250 uint64_t xbzrle_mig_pages_transferred(void)
 251 {
 252     return ram_state.xbzrle_pages;
 253 }
 254
 255 uint64_t xbzrle_mig_pages_cache_miss(void)
 256 {
 257     return ram_state.xbzrle_cache_miss;
 258 }
 259
 260 double xbzrle_mig_cache_miss_rate(void)
 261 {
 262     return ram_state.xbzrle_cache_miss_rate;
 263 }
 264
 265 uint64_t xbzrle_mig_pages_overflow(void)
 266 {
 267     return ram_state.xbzrle_overflows;
 268 }
 269
 270 uint64_t ram_bytes_transferred(void)
 271 {
 272     return ram_state.bytes_transferred;
 273 }
 274
 275 uint64_t ram_bytes_remaining(void)
 276 {
 277     return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
 278 }
 279
 280 uint64_t ram_dirty_sync_count(void)
 281 {
 282     return ram_state.bitmap_sync_count;
 283 }
 284
 285 uint64_t ram_dirty_pages_rate(void)
 286 {
 287     return ram_state.dirty_pages_rate;
 288 }
 289
 290 uint64_t ram_postcopy_requests(void)
 291 {
 292     return ram_state.postcopy_requests;
 293 }
 294
 295 /* used by the search for pages to send */
 296 struct PageSearchStatus {
 297     /* Current block being searched */
 298     RAMBlock    *block;
 299     /* Current offset to search from */
 300     ram_addr_t   offset;
 301     /* Set once we wrap around */
 302     bool         complete_round;
 303 };
 304 typedef struct PageSearchStatus PageSearchStatus;
 305
 306 struct CompressParam {
 307     bool done;
 308     bool quit;
 309     QEMUFile *file;
 310     QemuMutex mutex;
 311     QemuCond cond;
 312     RAMBlock *block;
 313     ram_addr_t offset;
 314 };
 315 typedef struct CompressParam CompressParam;
 316
 317 struct DecompressParam {
 318     bool done;
 319     bool quit;
 320     QemuMutex mutex;
 321     QemuCond cond;
 322     void *des;
 323     uint8_t *compbuf;
 324     int len;
 325 };
 326 typedef struct DecompressParam DecompressParam;
 327
 328 static CompressParam *comp_param;
 329 static QemuThread *compress_threads;
 330 /* comp_done_cond is used to wake up the migration thread when
 331  * one of the compression threads has finished the compression.
 332  * comp_done_lock is used to co-work with comp_done_cond.
 333  */
 334 static QemuMutex comp_done_lock;
 335 static QemuCond comp_done_cond;
 336 /* The empty QEMUFileOps will be used by file in CompressParam */
 337 static const QEMUFileOps empty_ops = { };
 338
 339 static DecompressParam *decomp_param;
 340 static QemuThread *decompress_threads;
 341 static QemuMutex decomp_done_lock;
 342 static QemuCond decomp_done_cond;
 343
 344 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 345                                 ram_addr_t offset);
 346
 347 static void *do_data_compress(void *opaque)
 348 {
 349     CompressParam *param = opaque;
 350     RAMBlock *block;
 351     ram_addr_t offset;
 352
 353     qemu_mutex_lock(&param->mutex);
 354     while (!param->quit) {
 355         if (param->block) {
 356             block = param->block;
 357             offset = param->offset;
 358             param->block = NULL;
 359             qemu_mutex_unlock(&param->mutex);
 360
 361             do_compress_ram_page(param->file, block, offset);
 362
 363             qemu_mutex_lock(&comp_done_lock);
 364             param->done = true;
 365             qemu_cond_signal(&comp_done_cond);
 366             qemu_mutex_unlock(&comp_done_lock);
 367
 368             qemu_mutex_lock(&param->mutex);
 369         } else {
 370             qemu_cond_wait(&param->cond, &param->mutex);
 371         }
 372     }
 373     qemu_mutex_unlock(&param->mutex);
 374
 375     return NULL;
 376 }
 377
 378 static inline void terminate_compression_threads(void)
 379 {
 380     int idx, thread_count;
 381
 382     thread_count = migrate_compress_threads();
 383
 384     for (idx = 0; idx < thread_count; idx++) {
 385         qemu_mutex_lock(&comp_param[idx].mutex);
 386         comp_param[idx].quit = true;
 387         qemu_cond_signal(&comp_param[idx].cond);
 388         qemu_mutex_unlock(&comp_param[idx].mutex);
 389     }
 390 }
 391
 392 void migrate_compress_threads_join(void)
 393 {
 394     int i, thread_count;
 395
 396     if (!migrate_use_compression()) {
 397         return;
 398     }
 399     terminate_compression_threads();
 400     thread_count = migrate_compress_threads();
 401     for (i = 0; i < thread_count; i++) {
 402         qemu_thread_join(compress_threads + i);
 403         qemu_fclose(comp_param[i].file);
 404         qemu_mutex_destroy(&comp_param[i].mutex);
 405         qemu_cond_destroy(&comp_param[i].cond);
 406     }
 407     qemu_mutex_destroy(&comp_done_lock);
 408     qemu_cond_destroy(&comp_done_cond);
 409     g_free(compress_threads);
 410     g_free(comp_param);
 411     compress_threads = NULL;
 412     comp_param = NULL;
 413 }
 414
 415 void migrate_compress_threads_create(void)
 416 {
 417     int i, thread_count;
 418
 419     if (!migrate_use_compression()) {
 420         return;
 421     }
 422     thread_count = migrate_compress_threads();
 423     compress_threads = g_new0(QemuThread, thread_count);
 424     comp_param = g_new0(CompressParam, thread_count);
 425     qemu_cond_init(&comp_done_cond);
 426     qemu_mutex_init(&comp_done_lock);
 427     for (i = 0; i < thread_count; i++) {
 428         /* comp_param[i].file is just used as a dummy buffer to save data,
 429          * set its ops to empty.
 430          */
 431         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
 432         comp_param[i].done = true;
 433         comp_param[i].quit = false;
 434         qemu_mutex_init(&comp_param[i].mutex);
 435         qemu_cond_init(&comp_param[i].cond);
 436         qemu_thread_create(compress_threads + i, "compress",
 437                            do_data_compress, comp_param + i,
 438                            QEMU_THREAD_JOINABLE);
 439     }
 440 }
 441
 442 /**
 443  * save_page_header: write page header to wire
 444  *
 445  * If this is the 1st block, it also writes the block identification
 446  *
 447  * Returns the number of bytes written
 448  *
 449  * @f: QEMUFile where to send the data
 450  * @block: block that contains the page we want to send
 451  * @offset: offset inside the block for the page
 452  *          in the lower bits, it contains flags
 453  */
 454 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
 455 {
 456     size_t size, len;
 457
 458     qemu_put_be64(f, offset);
 459     size = 8;
 460
 461     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
 462         len = strlen(block->idstr);
 463         qemu_put_byte(f, len);
 464         qemu_put_buffer(f, (uint8_t *)block->idstr, len);
 465         size += 1 + len;
 466     }
 467     return size;
 468 }
 469
 470 /**
 471  * mig_throttle_guest_down: throotle down the guest
 472  *
 473  * Reduce amount of guest cpu execution to hopefully slow down memory
 474  * writes. If guest dirty memory rate is reduced below the rate at
 475  * which we can transfer pages to the destination then we should be
 476  * able to complete migration. Some workloads dirty memory way too
 477  * fast and will not effectively converge, even with auto-converge.
 478  */
 479 static void mig_throttle_guest_down(void)
 480 {
 481     MigrationState *s = migrate_get_current();
 482     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
 483     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
 484
 485     /* We have not started throttling yet. Let's start it. */
 486     if (!cpu_throttle_active()) {
 487         cpu_throttle_set(pct_initial);
 488     } else {
 489         /* Throttling already on, just increase the rate */
 490         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
 491     }
 492 }
 493
 494 /**
 495  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
 496  *
 497  * @rs: current RAM state
 498  * @current_addr: address for the zero page
 499  *
 500  * Update the xbzrle cache to reflect a page that's been sent as all 0.
 501  * The important thing is that a stale (not-yet-0'd) page be replaced
 502  * by the new data.
 503  * As a bonus, if the page wasn't in the cache it gets added so that
 504  * when a small write is made into the 0'd page it gets XBZRLE sent.
 505  */
 506 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
 507 {
 508     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
 509         return;
 510     }
 511
 512     /* We don't care if this fails to allocate a new cache page
 513      * as long as it updated an old one */
 514     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
 515                  rs->bitmap_sync_count);
 516 }
 517
 518 #define ENCODING_FLAG_XBZRLE 0x1
 519
 520 /**
 521  * save_xbzrle_page: compress and send current page
 522  *
 523  * Returns: 1 means that we wrote the page
 524  *          0 means that page is identical to the one already sent
 525  *          -1 means that xbzrle would be longer than normal
 526  *
 527  * @rs: current RAM state
 528  * @current_data: pointer to the address of the page contents
 529  * @current_addr: addr of the page
 530  * @block: block that contains the page we want to send
 531  * @offset: offset inside the block for the page
 532  * @last_stage: if we are at the completion stage
 533  */
 534 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
 535                             ram_addr_t current_addr, RAMBlock *block,
 536                             ram_addr_t offset, bool last_stage)
 537 {
 538     int encoded_len = 0, bytes_xbzrle;
 539     uint8_t *prev_cached_page;
 540
 541     if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
 542         rs->xbzrle_cache_miss++;
 543         if (!last_stage) {
 544             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
 545                              rs->bitmap_sync_count) == -1) {
 546                 return -1;
 547             } else {
 548                 /* update *current_data when the page has been
 549                    inserted into cache */
 550                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
 551             }
 552         }
 553         return -1;
 554     }
 555
 556     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
 557
 558     /* save current buffer into memory */
 559     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 560
 561     /* XBZRLE encoding (if there is no overflow) */
 562     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
 563                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
 564                                        TARGET_PAGE_SIZE);
 565     if (encoded_len == 0) {
 566         trace_save_xbzrle_page_skipping();
 567         return 0;
 568     } else if (encoded_len == -1) {
 569         trace_save_xbzrle_page_overflow();
 570         rs->xbzrle_overflows++;
 571         /* update data in the cache */
 572         if (!last_stage) {
 573             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
 574             *current_data = prev_cached_page;
 575         }
 576         return -1;
 577     }
 578
 579     /* we need to update the data in the cache, in order to get the same data */
 580     if (!last_stage) {
 581         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
 582     }
 583
 584     /* Send XBZRLE based compressed page */
 585     bytes_xbzrle = save_page_header(rs->f, block,
 586                                     offset | RAM_SAVE_FLAG_XBZRLE);
 587     qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
 588     qemu_put_be16(rs->f, encoded_len);
 589     qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
 590     bytes_xbzrle += encoded_len + 1 + 2;
 591     rs->xbzrle_pages++;
 592     rs->xbzrle_bytes += bytes_xbzrle;
 593     rs->bytes_transferred += bytes_xbzrle;
 594
 595     return 1;
 596 }
 597
 598 /**
 599  * migration_bitmap_find_dirty: find the next dirty page from start
 600  *
 601  * Called with rcu_read_lock() to protect migration_bitmap
 602  *
 603  * Returns the byte offset within memory region of the start of a dirty page
 604  *
 605  * @rs: current RAM state
 606  * @rb: RAMBlock where to search for dirty pages
 607  * @start: starting address (typically so we can continue from previous page)
 608  * @ram_addr_abs: pointer into which to store the address of the dirty page
 609  *                within the global ram_addr space
 610  */
 611 static inline
 612 ram_addr_t migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
 613                                        ram_addr_t start,
 614                                        ram_addr_t *ram_addr_abs)
 615 {
 616     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
 617     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
 618     uint64_t rb_size = rb->used_length;
 619     unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
 620     unsigned long *bitmap;
 621
 622     unsigned long next;
 623
 624     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 625     if (rs->ram_bulk_stage && nr > base) {
 626         next = nr + 1;
 627     } else {
 628         next = find_next_bit(bitmap, size, nr);
 629     }
 630
 631     *ram_addr_abs = next << TARGET_PAGE_BITS;
 632     return (next - base) << TARGET_PAGE_BITS;
 633 }
 634
 635 static inline bool migration_bitmap_clear_dirty(RAMState *rs, ram_addr_t addr)
 636 {
 637     bool ret;
 638     int nr = addr >> TARGET_PAGE_BITS;
 639     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 640
 641     ret = test_and_clear_bit(nr, bitmap);
 642
 643     if (ret) {
 644         rs->migration_dirty_pages--;
 645     }
 646     return ret;
 647 }
 648
 649 static void migration_bitmap_sync_range(RAMState *rs, ram_addr_t start,
 650                                         ram_addr_t length)
 651 {
 652     unsigned long *bitmap;
 653     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 654     rs->migration_dirty_pages +=
 655         cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length,
 656                                               &rs->num_dirty_pages_period);
 657 }
 658
 659 /**
 660  * ram_pagesize_summary: calculate all the pagesizes of a VM
 661  *
 662  * Returns a summary bitmap of the page sizes of all RAMBlocks
 663  *
 664  * For VMs with just normal pages this is equivalent to the host page
 665  * size. If it's got some huge pages then it's the OR of all the
 666  * different page sizes.
 667  */
 668 uint64_t ram_pagesize_summary(void)
 669 {
 670     RAMBlock *block;
 671     uint64_t summary = 0;
 672
 673     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 674         summary |= block->page_size;
 675     }
 676
 677     return summary;
 678 }
 679
 680 static void migration_bitmap_sync(RAMState *rs)
 681 {
 682     RAMBlock *block;
 683     int64_t end_time;
 684     uint64_t bytes_xfer_now;
 685
 686     rs->bitmap_sync_count++;
 687
 688     if (!rs->bytes_xfer_prev) {
 689         rs->bytes_xfer_prev = ram_bytes_transferred();
 690     }
 691
 692     if (!rs->time_last_bitmap_sync) {
 693         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 694     }
 695
 696     trace_migration_bitmap_sync_start();
 697     memory_global_dirty_log_sync();
 698
 699     qemu_mutex_lock(&rs->bitmap_mutex);
 700     rcu_read_lock();
 701     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 702         migration_bitmap_sync_range(rs, block->offset, block->used_length);
 703     }
 704     rcu_read_unlock();
 705     qemu_mutex_unlock(&rs->bitmap_mutex);
 706
 707     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
 708
 709     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 710
 711     /* more than 1 second = 1000 millisecons */
 712     if (end_time > rs->time_last_bitmap_sync + 1000) {
 713         if (migrate_auto_converge()) {
 714             /* The following detection logic can be refined later. For now:
 715                Check to see if the dirtied bytes is 50% more than the approx.
 716                amount of bytes that just got transferred since the last time we
 717                were in this routine. If that happens twice, start or increase
 718                throttling */
 719             bytes_xfer_now = ram_bytes_transferred();
 720
 721             if (rs->dirty_pages_rate &&
 722                (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
 723                    (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
 724                (rs->dirty_rate_high_cnt++ >= 2)) {
 725                     trace_migration_throttle();
 726                     rs->dirty_rate_high_cnt = 0;
 727                     mig_throttle_guest_down();
 728              }
 729              rs->bytes_xfer_prev = bytes_xfer_now;
 730         }
 731
 732         if (migrate_use_xbzrle()) {
 733             if (rs->iterations_prev != rs->iterations) {
 734                 rs->xbzrle_cache_miss_rate =
 735                    (double)(rs->xbzrle_cache_miss -
 736                             rs->xbzrle_cache_miss_prev) /
 737                    (rs->iterations - rs->iterations_prev);
 738             }
 739             rs->iterations_prev = rs->iterations;
 740             rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
 741         }
 742         rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000
 743             / (end_time - rs->time_last_bitmap_sync);
 744         rs->time_last_bitmap_sync = end_time;
 745         rs->num_dirty_pages_period = 0;
 746     }
 747     if (migrate_use_events()) {
 748         qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
 749     }
 750 }
 751
 752 /**
 753  * save_zero_page: send the zero page to the stream
 754  *
 755  * Returns the number of pages written.
 756  *
 757  * @rs: current RAM state
 758  * @block: block that contains the page we want to send
 759  * @offset: offset inside the block for the page
 760  * @p: pointer to the page
 761  */
 762 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
 763                           uint8_t *p)
 764 {
 765     int pages = -1;
 766
 767     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
 768         rs->zero_pages++;
 769         rs->bytes_transferred +=
 770             save_page_header(rs->f, block, offset | RAM_SAVE_FLAG_COMPRESS);
 771         qemu_put_byte(rs->f, 0);
 772         rs->bytes_transferred += 1;
 773         pages = 1;
 774     }
 775
 776     return pages;
 777 }
 778
 779 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
 780 {
 781     if (!migrate_release_ram() || !migration_in_postcopy()) {
 782         return;
 783     }
 784
 785     ram_discard_range(NULL, rbname, offset, pages << TARGET_PAGE_BITS);
 786 }
 787
 788 /**
 789  * ram_save_page: send the given page to the stream
 790  *
 791  * Returns the number of pages written.
 792  *          < 0 - error
 793  *          >=0 - Number of pages written - this might legally be 0
 794  *                if xbzrle noticed the page was the same.
 795  *
 796  * @rs: current RAM state
 797  * @block: block that contains the page we want to send
 798  * @offset: offset inside the block for the page
 799  * @last_stage: if we are at the completion stage
 800  */
 801 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
 802 {
 803     int pages = -1;
 804     uint64_t bytes_xmit;
 805     ram_addr_t current_addr;
 806     uint8_t *p;
 807     int ret;
 808     bool send_async = true;
 809     RAMBlock *block = pss->block;
 810     ram_addr_t offset = pss->offset;
 811
 812     p = block->host + offset;
 813
 814     /* In doubt sent page as normal */
 815     bytes_xmit = 0;
 816     ret = ram_control_save_page(rs->f, block->offset,
 817                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
 818     if (bytes_xmit) {
 819         rs->bytes_transferred += bytes_xmit;
 820         pages = 1;
 821     }
 822
 823     XBZRLE_cache_lock();
 824
 825     current_addr = block->offset + offset;
 826
 827     if (block == rs->last_sent_block) {
 828         offset |= RAM_SAVE_FLAG_CONTINUE;
 829     }
 830     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 831         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 832             if (bytes_xmit > 0) {
 833                 rs->norm_pages++;
 834             } else if (bytes_xmit == 0) {
 835                 rs->zero_pages++;
 836             }
 837         }
 838     } else {
 839         pages = save_zero_page(rs, block, offset, p);
 840         if (pages > 0) {
 841             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
 842              * page would be stale
 843              */
 844             xbzrle_cache_zero_page(rs, current_addr);
 845             ram_release_pages(block->idstr, pss->offset, pages);
 846         } else if (!rs->ram_bulk_stage &&
 847                    !migration_in_postcopy() && migrate_use_xbzrle()) {
 848             pages = save_xbzrle_page(rs, &p, current_addr, block,
 849                                      offset, last_stage);
 850             if (!last_stage) {
 851                 /* Can't send this cached data async, since the cache page
 852                  * might get updated before it gets to the wire
 853                  */
 854                 send_async = false;
 855             }
 856         }
 857     }
 858
 859     /* XBZRLE overflow or normal page */
 860     if (pages == -1) {
 861         rs->bytes_transferred += save_page_header(rs->f, block,
 862                                                offset | RAM_SAVE_FLAG_PAGE);
 863         if (send_async) {
 864             qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
 865                                   migrate_release_ram() &
 866                                   migration_in_postcopy());
 867         } else {
 868             qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
 869         }
 870         rs->bytes_transferred += TARGET_PAGE_SIZE;
 871         pages = 1;
 872         rs->norm_pages++;
 873     }
 874
 875     XBZRLE_cache_unlock();
 876
 877     return pages;
 878 }
 879
 880 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 881                                 ram_addr_t offset)
 882 {
 883     int bytes_sent, blen;
 884     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
 885
 886     bytes_sent = save_page_header(f, block, offset |
 887                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
 888     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
 889                                      migrate_compress_level());
 890     if (blen < 0) {
 891         bytes_sent = 0;
 892         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
 893         error_report("compressed data failed!");
 894     } else {
 895         bytes_sent += blen;
 896         ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
 897     }
 898
 899     return bytes_sent;
 900 }
 901
 902 static void flush_compressed_data(RAMState *rs)
 903 {
 904     int idx, len, thread_count;
 905
 906     if (!migrate_use_compression()) {
 907         return;
 908     }
 909     thread_count = migrate_compress_threads();
 910
 911     qemu_mutex_lock(&comp_done_lock);
 912     for (idx = 0; idx < thread_count; idx++) {
 913         while (!comp_param[idx].done) {
 914             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 915         }
 916     }
 917     qemu_mutex_unlock(&comp_done_lock);
 918
 919     for (idx = 0; idx < thread_count; idx++) {
 920         qemu_mutex_lock(&comp_param[idx].mutex);
 921         if (!comp_param[idx].quit) {
 922             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
 923             rs->bytes_transferred += len;
 924         }
 925         qemu_mutex_unlock(&comp_param[idx].mutex);
 926     }
 927 }
 928
 929 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
 930                                        ram_addr_t offset)
 931 {
 932     param->block = block;
 933     param->offset = offset;
 934 }
 935
 936 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
 937                                            ram_addr_t offset)
 938 {
 939     int idx, thread_count, bytes_xmit = -1, pages = -1;
 940
 941     thread_count = migrate_compress_threads();
 942     qemu_mutex_lock(&comp_done_lock);
 943     while (true) {
 944         for (idx = 0; idx < thread_count; idx++) {
 945             if (comp_param[idx].done) {
 946                 comp_param[idx].done = false;
 947                 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
 948                 qemu_mutex_lock(&comp_param[idx].mutex);
 949                 set_compress_params(&comp_param[idx], block, offset);
 950                 qemu_cond_signal(&comp_param[idx].cond);
 951                 qemu_mutex_unlock(&comp_param[idx].mutex);
 952                 pages = 1;
 953                 rs->norm_pages++;
 954                 rs->bytes_transferred += bytes_xmit;
 955                 break;
 956             }
 957         }
 958         if (pages > 0) {
 959             break;
 960         } else {
 961             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 962         }
 963     }
 964     qemu_mutex_unlock(&comp_done_lock);
 965
 966     return pages;
 967 }
 968
 969 /**
 970  * ram_save_compressed_page: compress the given page and send it to the stream
 971  *
 972  * Returns the number of pages written.
 973  *
 974  * @rs: current RAM state
 975  * @block: block that contains the page we want to send
 976  * @offset: offset inside the block for the page
 977  * @last_stage: if we are at the completion stage
 978  */
 979 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
 980                                     bool last_stage)
 981 {
 982     int pages = -1;
 983     uint64_t bytes_xmit = 0;
 984     uint8_t *p;
 985     int ret, blen;
 986     RAMBlock *block = pss->block;
 987     ram_addr_t offset = pss->offset;
 988
 989     p = block->host + offset;
 990
 991     ret = ram_control_save_page(rs->f, block->offset,
 992                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
 993     if (bytes_xmit) {
 994         rs->bytes_transferred += bytes_xmit;
 995         pages = 1;
 996     }
 997     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 998         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 999             if (bytes_xmit > 0) {
1000                 rs->norm_pages++;
1001             } else if (bytes_xmit == 0) {
1002                 rs->zero_pages++;
1003             }
1004         }
1005     } else {
1006         /* When starting the process of a new block, the first page of
1007          * the block should be sent out before other pages in the same
1008          * block, and all the pages in last block should have been sent
1009          * out, keeping this order is important, because the 'cont' flag
1010          * is used to avoid resending the block name.
1011          */
1012         if (block != rs->last_sent_block) {
1013             flush_compressed_data(rs);
1014             pages = save_zero_page(rs, block, offset, p);
1015             if (pages == -1) {
1016                 /* Make sure the first page is sent out before other pages */
1017                 bytes_xmit = save_page_header(rs->f, block, offset |
1018                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
1019                 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
1020                                                  migrate_compress_level());
1021                 if (blen > 0) {
1022                     rs->bytes_transferred += bytes_xmit + blen;
1023                     rs->norm_pages++;
1024                     pages = 1;
1025                 } else {
1026                     qemu_file_set_error(rs->f, blen);
1027                     error_report("compressed data failed!");
1028                 }
1029             }
1030             if (pages > 0) {
1031                 ram_release_pages(block->idstr, pss->offset, pages);
1032             }
1033         } else {
1034             offset |= RAM_SAVE_FLAG_CONTINUE;
1035             pages = save_zero_page(rs, block, offset, p);
1036             if (pages == -1) {
1037                 pages = compress_page_with_multi_thread(rs, block, offset);
1038             } else {
1039                 ram_release_pages(block->idstr, pss->offset, pages);
1040             }
1041         }
1042     }
1043
1044     return pages;
1045 }
1046
1047 /**
1048  * find_dirty_block: find the next dirty page and update any state
1049  * associated with the search process.
1050  *
1051  * Returns if a page is found
1052  *
1053  * @rs: current RAM state
1054  * @pss: data about the state of the current dirty page scan
1055  * @again: set to false if the search has scanned the whole of RAM
1056  * @ram_addr_abs: pointer into which to store the address of the dirty page
1057  *                within the global ram_addr space
1058  */
1059 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss,
1060                              bool *again, ram_addr_t *ram_addr_abs)
1061 {
1062     pss->offset = migration_bitmap_find_dirty(rs, pss->block, pss->offset,
1063                                               ram_addr_abs);
1064     if (pss->complete_round && pss->block == rs->last_seen_block &&
1065         pss->offset >= rs->last_offset) {
1066         /*
1067          * We've been once around the RAM and haven't found anything.
1068          * Give up.
1069          */
1070         *again = false;
1071         return false;
1072     }
1073     if (pss->offset >= pss->block->used_length) {
1074         /* Didn't find anything in this RAM Block */
1075         pss->offset = 0;
1076         pss->block = QLIST_NEXT_RCU(pss->block, next);
1077         if (!pss->block) {
1078             /* Hit the end of the list */
1079             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1080             /* Flag that we've looped */
1081             pss->complete_round = true;
1082             rs->ram_bulk_stage = false;
1083             if (migrate_use_xbzrle()) {
1084                 /* If xbzrle is on, stop using the data compression at this
1085                  * point. In theory, xbzrle can do better than compression.
1086                  */
1087                 flush_compressed_data(rs);
1088             }
1089         }
1090         /* Didn't find anything this time, but try again on the new block */
1091         *again = true;
1092         return false;
1093     } else {
1094         /* Can go around again, but... */
1095         *again = true;
1096         /* We've found something so probably don't need to */
1097         return true;
1098     }
1099 }
1100
1101 /**
1102  * unqueue_page: gets a page of the queue
1103  *
1104  * Helper for 'get_queued_page' - gets a page off the queue
1105  *
1106  * Returns the block of the page (or NULL if none available)
1107  *
1108  * @rs: current RAM state
1109  * @offset: used to return the offset within the RAMBlock
1110  * @ram_addr_abs: pointer into which to store the address of the dirty page
1111  *                within the global ram_addr space
1112  */
1113 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset,
1114                               ram_addr_t *ram_addr_abs)
1115 {
1116     RAMBlock *block = NULL;
1117
1118     qemu_mutex_lock(&rs->src_page_req_mutex);
1119     if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1120         struct RAMSrcPageRequest *entry =
1121                                 QSIMPLEQ_FIRST(&rs->src_page_requests);
1122         block = entry->rb;
1123         *offset = entry->offset;
1124         *ram_addr_abs = (entry->offset + entry->rb->offset) &
1125                         TARGET_PAGE_MASK;
1126
1127         if (entry->len > TARGET_PAGE_SIZE) {
1128             entry->len -= TARGET_PAGE_SIZE;
1129             entry->offset += TARGET_PAGE_SIZE;
1130         } else {
1131             memory_region_unref(block->mr);
1132             QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1133             g_free(entry);
1134         }
1135     }
1136     qemu_mutex_unlock(&rs->src_page_req_mutex);
1137
1138     return block;
1139 }
1140
1141 /**
1142  * get_queued_page: unqueue a page from the postocpy requests
1143  *
1144  * Skips pages that are already sent (!dirty)
1145  *
1146  * Returns if a queued page is found
1147  *
1148  * @rs: current RAM state
1149  * @pss: data about the state of the current dirty page scan
1150  * @ram_addr_abs: pointer into which to store the address of the dirty page
1151  *                within the global ram_addr space
1152  */
1153 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss,
1154                             ram_addr_t *ram_addr_abs)
1155 {
1156     RAMBlock  *block;
1157     ram_addr_t offset;
1158     bool dirty;
1159
1160     do {
1161         block = unqueue_page(rs, &offset, ram_addr_abs);
1162         /*
1163          * We're sending this page, and since it's postcopy nothing else
1164          * will dirty it, and we must make sure it doesn't get sent again
1165          * even if this queue request was received after the background
1166          * search already sent it.
1167          */
1168         if (block) {
1169             unsigned long *bitmap;
1170             bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1171             dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1172             if (!dirty) {
1173                 trace_get_queued_page_not_dirty(
1174                     block->idstr, (uint64_t)offset,
1175                     (uint64_t)*ram_addr_abs,
1176                     test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1177                          atomic_rcu_read(&rs->ram_bitmap)->unsentmap));
1178             } else {
1179                 trace_get_queued_page(block->idstr,
1180                                       (uint64_t)offset,
1181                                       (uint64_t)*ram_addr_abs);
1182             }
1183         }
1184
1185     } while (block && !dirty);
1186
1187     if (block) {
1188         /*
1189          * As soon as we start servicing pages out of order, then we have
1190          * to kill the bulk stage, since the bulk stage assumes
1191          * in (migration_bitmap_find_and_reset_dirty) that every page is
1192          * dirty, that's no longer true.
1193          */
1194         rs->ram_bulk_stage = false;
1195
1196         /*
1197          * We want the background search to continue from the queued page
1198          * since the guest is likely to want other pages near to the page
1199          * it just requested.
1200          */
1201         pss->block = block;
1202         pss->offset = offset;
1203     }
1204
1205     return !!block;
1206 }
1207
1208 /**
1209  * migration_page_queue_free: drop any remaining pages in the ram
1210  * request queue
1211  *
1212  * It should be empty at the end anyway, but in error cases there may
1213  * be some left.  in case that there is any page left, we drop it.
1214  *
1215  */
1216 void migration_page_queue_free(void)
1217 {
1218     struct RAMSrcPageRequest *mspr, *next_mspr;
1219     RAMState *rs = &ram_state;
1220     /* This queue generally should be empty - but in the case of a failed
1221      * migration might have some droppings in.
1222      */
1223     rcu_read_lock();
1224     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1225         memory_region_unref(mspr->rb->mr);
1226         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1227         g_free(mspr);
1228     }
1229     rcu_read_unlock();
1230 }
1231
1232 /**
1233  * ram_save_queue_pages: queue the page for transmission
1234  *
1235  * A request from postcopy destination for example.
1236  *
1237  * Returns zero on success or negative on error
1238  *
1239  * @rbname: Name of the RAMBLock of the request. NULL means the
1240  *          same that last one.
1241  * @start: starting address from the start of the RAMBlock
1242  * @len: length (in bytes) to send
1243  */
1244 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1245 {
1246     RAMBlock *ramblock;
1247     RAMState *rs = &ram_state;
1248
1249     rs->postcopy_requests++;
1250     rcu_read_lock();
1251     if (!rbname) {
1252         /* Reuse last RAMBlock */
1253         ramblock = rs->last_req_rb;
1254
1255         if (!ramblock) {
1256             /*
1257              * Shouldn't happen, we can't reuse the last RAMBlock if
1258              * it's the 1st request.
1259              */
1260             error_report("ram_save_queue_pages no previous block");
1261             goto err;
1262         }
1263     } else {
1264         ramblock = qemu_ram_block_by_name(rbname);
1265
1266         if (!ramblock) {
1267             /* We shouldn't be asked for a non-existent RAMBlock */
1268             error_report("ram_save_queue_pages no block '%s'", rbname);
1269             goto err;
1270         }
1271         rs->last_req_rb = ramblock;
1272     }
1273     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1274     if (start+len > ramblock->used_length) {
1275         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1276                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1277                      __func__, start, len, ramblock->used_length);
1278         goto err;
1279     }
1280
1281     struct RAMSrcPageRequest *new_entry =
1282         g_malloc0(sizeof(struct RAMSrcPageRequest));
1283     new_entry->rb = ramblock;
1284     new_entry->offset = start;
1285     new_entry->len = len;
1286
1287     memory_region_ref(ramblock->mr);
1288     qemu_mutex_lock(&rs->src_page_req_mutex);
1289     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1290     qemu_mutex_unlock(&rs->src_page_req_mutex);
1291     rcu_read_unlock();
1292
1293     return 0;
1294
1295 err:
1296     rcu_read_unlock();
1297     return -1;
1298 }
1299
1300 /**
1301  * ram_save_target_page: save one target page
1302  *
1303  * Returns the number of pages written
1304  *
1305  * @rs: current RAM state
1306  * @ms: current migration state
1307  * @pss: data about the page we want to send
1308  * @last_stage: if we are at the completion stage
1309  * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
1310  */
1311 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1312                                 bool last_stage, ram_addr_t dirty_ram_abs)
1313 {
1314     int res = 0;
1315
1316     /* Check the pages is dirty and if it is send it */
1317     if (migration_bitmap_clear_dirty(rs, dirty_ram_abs)) {
1318         unsigned long *unsentmap;
1319         /*
1320          * If xbzrle is on, stop using the data compression after first
1321          * round of migration even if compression is enabled. In theory,
1322          * xbzrle can do better than compression.
1323          */
1324
1325         if (migrate_use_compression()
1326             && (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1327             res = ram_save_compressed_page(rs, pss, last_stage);
1328         } else {
1329             res = ram_save_page(rs, pss, last_stage);
1330         }
1331
1332         if (res < 0) {
1333             return res;
1334         }
1335         unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1336         if (unsentmap) {
1337             clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1338         }
1339         /* Only update last_sent_block if a block was actually sent; xbzrle
1340          * might have decided the page was identical so didn't bother writing
1341          * to the stream.
1342          */
1343         if (res > 0) {
1344             rs->last_sent_block = pss->block;
1345         }
1346     }
1347
1348     return res;
1349 }
1350
1351 /**
1352  * ram_save_host_page: save a whole host page
1353  *
1354  * Starting at *offset send pages up to the end of the current host
1355  * page. It's valid for the initial offset to point into the middle of
1356  * a host page in which case the remainder of the hostpage is sent.
1357  * Only dirty target pages are sent. Note that the host page size may
1358  * be a huge page for this block.
1359  *
1360  * Returns the number of pages written or negative on error
1361  *
1362  * @rs: current RAM state
1363  * @ms: current migration state
1364  * @pss: data about the page we want to send
1365  * @last_stage: if we are at the completion stage
1366  * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1367  */
1368 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1369                               bool last_stage,
1370                               ram_addr_t dirty_ram_abs)
1371 {
1372     int tmppages, pages = 0;
1373     size_t pagesize = qemu_ram_pagesize(pss->block);
1374
1375     do {
1376         tmppages = ram_save_target_page(rs, pss, last_stage, dirty_ram_abs);
1377         if (tmppages < 0) {
1378             return tmppages;
1379         }
1380
1381         pages += tmppages;
1382         pss->offset += TARGET_PAGE_SIZE;
1383         dirty_ram_abs += TARGET_PAGE_SIZE;
1384     } while (pss->offset & (pagesize - 1));
1385
1386     /* The offset we leave with is the last one we looked at */
1387     pss->offset -= TARGET_PAGE_SIZE;
1388     return pages;
1389 }
1390
1391 /**
1392  * ram_find_and_save_block: finds a dirty page and sends it to f
1393  *
1394  * Called within an RCU critical section.
1395  *
1396  * Returns the number of pages written where zero means no dirty pages
1397  *
1398  * @rs: current RAM state
1399  * @last_stage: if we are at the completion stage
1400  *
1401  * On systems where host-page-size > target-page-size it will send all the
1402  * pages in a host page that are dirty.
1403  */
1404
1405 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1406 {
1407     PageSearchStatus pss;
1408     int pages = 0;
1409     bool again, found;
1410     ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1411                                  ram_addr_t space */
1412
1413     /* No dirty page as there is zero RAM */
1414     if (!ram_bytes_total()) {
1415         return pages;
1416     }
1417
1418     pss.block = rs->last_seen_block;
1419     pss.offset = rs->last_offset;
1420     pss.complete_round = false;
1421
1422     if (!pss.block) {
1423         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1424     }
1425
1426     do {
1427         again = true;
1428         found = get_queued_page(rs, &pss, &dirty_ram_abs);
1429
1430         if (!found) {
1431             /* priority queue empty, so just search for something dirty */
1432             found = find_dirty_block(rs, &pss, &again, &dirty_ram_abs);
1433         }
1434
1435         if (found) {
1436             pages = ram_save_host_page(rs, &pss, last_stage, dirty_ram_abs);
1437         }
1438     } while (!pages && again);
1439
1440     rs->last_seen_block = pss.block;
1441     rs->last_offset = pss.offset;
1442
1443     return pages;
1444 }
1445
1446 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1447 {
1448     uint64_t pages = size / TARGET_PAGE_SIZE;
1449     RAMState *rs = &ram_state;
1450
1451     if (zero) {
1452         rs->zero_pages += pages;
1453     } else {
1454         rs->norm_pages += pages;
1455         rs->bytes_transferred += size;
1456         qemu_update_position(f, size);
1457     }
1458 }
1459
1460 uint64_t ram_bytes_total(void)
1461 {
1462     RAMBlock *block;
1463     uint64_t total = 0;
1464
1465     rcu_read_lock();
1466     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1467         total += block->used_length;
1468     rcu_read_unlock();
1469     return total;
1470 }
1471
1472 void free_xbzrle_decoded_buf(void)
1473 {
1474     g_free(xbzrle_decoded_buf);
1475     xbzrle_decoded_buf = NULL;
1476 }
1477
1478 static void migration_bitmap_free(struct RAMBitmap *bmap)
1479 {
1480     g_free(bmap->bmap);
1481     g_free(bmap->unsentmap);
1482     g_free(bmap);
1483 }
1484
1485 static void ram_migration_cleanup(void *opaque)
1486 {
1487     RAMState *rs = opaque;
1488
1489     /* caller have hold iothread lock or is in a bh, so there is
1490      * no writing race against this migration_bitmap
1491      */
1492     struct RAMBitmap *bitmap = rs->ram_bitmap;
1493     atomic_rcu_set(&rs->ram_bitmap, NULL);
1494     if (bitmap) {
1495         memory_global_dirty_log_stop();
1496         call_rcu(bitmap, migration_bitmap_free, rcu);
1497     }
1498
1499     XBZRLE_cache_lock();
1500     if (XBZRLE.cache) {
1501         cache_fini(XBZRLE.cache);
1502         g_free(XBZRLE.encoded_buf);
1503         g_free(XBZRLE.current_buf);
1504         g_free(ZERO_TARGET_PAGE);
1505         XBZRLE.cache = NULL;
1506         XBZRLE.encoded_buf = NULL;
1507         XBZRLE.current_buf = NULL;
1508     }
1509     XBZRLE_cache_unlock();
1510 }
1511
1512 static void ram_state_reset(RAMState *rs)
1513 {
1514     rs->last_seen_block = NULL;
1515     rs->last_sent_block = NULL;
1516     rs->last_offset = 0;
1517     rs->last_version = ram_list.version;
1518     rs->ram_bulk_stage = true;
1519 }
1520
1521 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1522
1523 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1524 {
1525     RAMState *rs = &ram_state;
1526
1527     /* called in qemu main thread, so there is
1528      * no writing race against this migration_bitmap
1529      */
1530     if (rs->ram_bitmap) {
1531         struct RAMBitmap *old_bitmap = rs->ram_bitmap, *bitmap;
1532         bitmap = g_new(struct RAMBitmap, 1);
1533         bitmap->bmap = bitmap_new(new);
1534
1535         /* prevent migration_bitmap content from being set bit
1536          * by migration_bitmap_sync_range() at the same time.
1537          * it is safe to migration if migration_bitmap is cleared bit
1538          * at the same time.
1539          */
1540         qemu_mutex_lock(&rs->bitmap_mutex);
1541         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1542         bitmap_set(bitmap->bmap, old, new - old);
1543
1544         /* We don't have a way to safely extend the sentmap
1545          * with RCU; so mark it as missing, entry to postcopy
1546          * will fail.
1547          */
1548         bitmap->unsentmap = NULL;
1549
1550         atomic_rcu_set(&rs->ram_bitmap, bitmap);
1551         qemu_mutex_unlock(&rs->bitmap_mutex);
1552         rs->migration_dirty_pages += new - old;
1553         call_rcu(old_bitmap, migration_bitmap_free, rcu);
1554     }
1555 }
1556
1557 /*
1558  * 'expected' is the value you expect the bitmap mostly to be full
1559  * of; it won't bother printing lines that are all this value.
1560  * If 'todump' is null the migration bitmap is dumped.
1561  */
1562 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1563 {
1564     int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1565     RAMState *rs = &ram_state;
1566     int64_t cur;
1567     int64_t linelen = 128;
1568     char linebuf[129];
1569
1570     if (!todump) {
1571         todump = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1572     }
1573
1574     for (cur = 0; cur < ram_pages; cur += linelen) {
1575         int64_t curb;
1576         bool found = false;
1577         /*
1578          * Last line; catch the case where the line length
1579          * is longer than remaining ram
1580          */
1581         if (cur + linelen > ram_pages) {
1582             linelen = ram_pages - cur;
1583         }
1584         for (curb = 0; curb < linelen; curb++) {
1585             bool thisbit = test_bit(cur + curb, todump);
1586             linebuf[curb] = thisbit ? '1' : '.';
1587             found = found || (thisbit != expected);
1588         }
1589         if (found) {
1590             linebuf[curb] = '\0';
1591             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1592         }
1593     }
1594 }
1595
1596 /* **** functions for postcopy ***** */
1597
1598 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1599 {
1600     RAMState *rs = &ram_state;
1601     struct RAMBlock *block;
1602     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1603
1604     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1605         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1606         unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1607         unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1608
1609         while (run_start < range) {
1610             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1611             ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1612                               (run_end - run_start) << TARGET_PAGE_BITS);
1613             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1614         }
1615     }
1616 }
1617
1618 /**
1619  * postcopy_send_discard_bm_ram: discard a RAMBlock
1620  *
1621  * Returns zero on success
1622  *
1623  * Callback from postcopy_each_ram_send_discard for each RAMBlock
1624  * Note: At this point the 'unsentmap' is the processed bitmap combined
1625  *       with the dirtymap; so a '1' means it's either dirty or unsent.
1626  *
1627  * @ms: current migration state
1628  * @pds: state for postcopy
1629  * @start: RAMBlock starting page
1630  * @length: RAMBlock size
1631  */
1632 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1633                                         PostcopyDiscardState *pds,
1634                                         unsigned long start,
1635                                         unsigned long length)
1636 {
1637     RAMState *rs = &ram_state;
1638     unsigned long end = start + length; /* one after the end */
1639     unsigned long current;
1640     unsigned long *unsentmap;
1641
1642     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1643     for (current = start; current < end; ) {
1644         unsigned long one = find_next_bit(unsentmap, end, current);
1645
1646         if (one <= end) {
1647             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1648             unsigned long discard_length;
1649
1650             if (zero >= end) {
1651                 discard_length = end - one;
1652             } else {
1653                 discard_length = zero - one;
1654             }
1655             if (discard_length) {
1656                 postcopy_discard_send_range(ms, pds, one, discard_length);
1657             }
1658             current = one + discard_length;
1659         } else {
1660             current = one;
1661         }
1662     }
1663
1664     return 0;
1665 }
1666
1667 /**
1668  * postcopy_each_ram_send_discard: discard all RAMBlocks
1669  *
1670  * Returns 0 for success or negative for error
1671  *
1672  * Utility for the outgoing postcopy code.
1673  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
1674  *   passing it bitmap indexes and name.
1675  * (qemu_ram_foreach_block ends up passing unscaled lengths
1676  *  which would mean postcopy code would have to deal with target page)
1677  *
1678  * @ms: current migration state
1679  */
1680 static int postcopy_each_ram_send_discard(MigrationState *ms)
1681 {
1682     struct RAMBlock *block;
1683     int ret;
1684
1685     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1686         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1687         PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1688                                                                first,
1689                                                                block->idstr);
1690
1691         /*
1692          * Postcopy sends chunks of bitmap over the wire, but it
1693          * just needs indexes at this point, avoids it having
1694          * target page specific code.
1695          */
1696         ret = postcopy_send_discard_bm_ram(ms, pds, first,
1697                                     block->used_length >> TARGET_PAGE_BITS);
1698         postcopy_discard_send_finish(ms, pds);
1699         if (ret) {
1700             return ret;
1701         }
1702     }
1703
1704     return 0;
1705 }
1706
1707 /**
1708  * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1709  *
1710  * Helper for postcopy_chunk_hostpages; it's called twice to
1711  * canonicalize the two bitmaps, that are similar, but one is
1712  * inverted.
1713  *
1714  * Postcopy requires that all target pages in a hostpage are dirty or
1715  * clean, not a mix.  This function canonicalizes the bitmaps.
1716  *
1717  * @ms: current migration state
1718  * @unsent_pass: if true we need to canonicalize partially unsent host pages
1719  *               otherwise we need to canonicalize partially dirty host pages
1720  * @block: block that contains the page we want to canonicalize
1721  * @pds: state for postcopy
1722  */
1723 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1724                                           RAMBlock *block,
1725                                           PostcopyDiscardState *pds)
1726 {
1727     RAMState *rs = &ram_state;
1728     unsigned long *bitmap;
1729     unsigned long *unsentmap;
1730     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1731     unsigned long first = block->offset >> TARGET_PAGE_BITS;
1732     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1733     unsigned long last = first + (len - 1);
1734     unsigned long run_start;
1735
1736     if (block->page_size == TARGET_PAGE_SIZE) {
1737         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1738         return;
1739     }
1740
1741     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1742     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1743
1744     if (unsent_pass) {
1745         /* Find a sent page */
1746         run_start = find_next_zero_bit(unsentmap, last + 1, first);
1747     } else {
1748         /* Find a dirty page */
1749         run_start = find_next_bit(bitmap, last + 1, first);
1750     }
1751
1752     while (run_start <= last) {
1753         bool do_fixup = false;
1754         unsigned long fixup_start_addr;
1755         unsigned long host_offset;
1756
1757         /*
1758          * If the start of this run of pages is in the middle of a host
1759          * page, then we need to fixup this host page.
1760          */
1761         host_offset = run_start % host_ratio;
1762         if (host_offset) {
1763             do_fixup = true;
1764             run_start -= host_offset;
1765             fixup_start_addr = run_start;
1766             /* For the next pass */
1767             run_start = run_start + host_ratio;
1768         } else {
1769             /* Find the end of this run */
1770             unsigned long run_end;
1771             if (unsent_pass) {
1772                 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1773             } else {
1774                 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1775             }
1776             /*
1777              * If the end isn't at the start of a host page, then the
1778              * run doesn't finish at the end of a host page
1779              * and we need to discard.
1780              */
1781             host_offset = run_end % host_ratio;
1782             if (host_offset) {
1783                 do_fixup = true;
1784                 fixup_start_addr = run_end - host_offset;
1785                 /*
1786                  * This host page has gone, the next loop iteration starts
1787                  * from after the fixup
1788                  */
1789                 run_start = fixup_start_addr + host_ratio;
1790             } else {
1791                 /*
1792                  * No discards on this iteration, next loop starts from
1793                  * next sent/dirty page
1794                  */
1795                 run_start = run_end + 1;
1796             }
1797         }
1798
1799         if (do_fixup) {
1800             unsigned long page;
1801
1802             /* Tell the destination to discard this page */
1803             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1804                 /* For the unsent_pass we:
1805                  *     discard partially sent pages
1806                  * For the !unsent_pass (dirty) we:
1807                  *     discard partially dirty pages that were sent
1808                  *     (any partially sent pages were already discarded
1809                  *     by the previous unsent_pass)
1810                  */
1811                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1812                                             host_ratio);
1813             }
1814
1815             /* Clean up the bitmap */
1816             for (page = fixup_start_addr;
1817                  page < fixup_start_addr + host_ratio; page++) {
1818                 /* All pages in this host page are now not sent */
1819                 set_bit(page, unsentmap);
1820
1821                 /*
1822                  * Remark them as dirty, updating the count for any pages
1823                  * that weren't previously dirty.
1824                  */
1825                 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1826             }
1827         }
1828
1829         if (unsent_pass) {
1830             /* Find the next sent page for the next iteration */
1831             run_start = find_next_zero_bit(unsentmap, last + 1,
1832                                            run_start);
1833         } else {
1834             /* Find the next dirty page for the next iteration */
1835             run_start = find_next_bit(bitmap, last + 1, run_start);
1836         }
1837     }
1838 }
1839
1840 /**
1841  * postcopy_chuck_hostpages: discrad any partially sent host page
1842  *
1843  * Utility for the outgoing postcopy code.
1844  *
1845  * Discard any partially sent host-page size chunks, mark any partially
1846  * dirty host-page size chunks as all dirty.  In this case the host-page
1847  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1848  *
1849  * Returns zero on success
1850  *
1851  * @ms: current migration state
1852  */
1853 static int postcopy_chunk_hostpages(MigrationState *ms)
1854 {
1855     RAMState *rs = &ram_state;
1856     struct RAMBlock *block;
1857
1858     /* Easiest way to make sure we don't resume in the middle of a host-page */
1859     rs->last_seen_block = NULL;
1860     rs->last_sent_block = NULL;
1861     rs->last_offset     = 0;
1862
1863     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1864         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1865
1866         PostcopyDiscardState *pds =
1867                          postcopy_discard_send_init(ms, first, block->idstr);
1868
1869         /* First pass: Discard all partially sent host pages */
1870         postcopy_chunk_hostpages_pass(ms, true, block, pds);
1871         /*
1872          * Second pass: Ensure that all partially dirty host pages are made
1873          * fully dirty.
1874          */
1875         postcopy_chunk_hostpages_pass(ms, false, block, pds);
1876
1877         postcopy_discard_send_finish(ms, pds);
1878     } /* ram_list loop */
1879
1880     return 0;
1881 }
1882
1883 /**
1884  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1885  *
1886  * Returns zero on success
1887  *
1888  * Transmit the set of pages to be discarded after precopy to the target
1889  * these are pages that:
1890  *     a) Have been previously transmitted but are now dirty again
1891  *     b) Pages that have never been transmitted, this ensures that
1892  *        any pages on the destination that have been mapped by background
1893  *        tasks get discarded (transparent huge pages is the specific concern)
1894  * Hopefully this is pretty sparse
1895  *
1896  * @ms: current migration state
1897  */
1898 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1899 {
1900     RAMState *rs = &ram_state;
1901     int ret;
1902     unsigned long *bitmap, *unsentmap;
1903
1904     rcu_read_lock();
1905
1906     /* This should be our last sync, the src is now paused */
1907     migration_bitmap_sync(rs);
1908
1909     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1910     if (!unsentmap) {
1911         /* We don't have a safe way to resize the sentmap, so
1912          * if the bitmap was resized it will be NULL at this
1913          * point.
1914          */
1915         error_report("migration ram resized during precopy phase");
1916         rcu_read_unlock();
1917         return -EINVAL;
1918     }
1919
1920     /* Deal with TPS != HPS and huge pages */
1921     ret = postcopy_chunk_hostpages(ms);
1922     if (ret) {
1923         rcu_read_unlock();
1924         return ret;
1925     }
1926
1927     /*
1928      * Update the unsentmap to be unsentmap = unsentmap | dirty
1929      */
1930     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1931     bitmap_or(unsentmap, unsentmap, bitmap,
1932                last_ram_offset() >> TARGET_PAGE_BITS);
1933
1934
1935     trace_ram_postcopy_send_discard_bitmap();
1936 #ifdef DEBUG_POSTCOPY
1937     ram_debug_dump_bitmap(unsentmap, true);
1938 #endif
1939
1940     ret = postcopy_each_ram_send_discard(ms);
1941     rcu_read_unlock();
1942
1943     return ret;
1944 }
1945
1946 /**
1947  * ram_discard_range: discard dirtied pages at the beginning of postcopy
1948  *
1949  * Returns zero on success
1950  *
1951  * @mis: current migration incoming state
1952  * @rbname: name of the RAMBlock of the request. NULL means the
1953  *          same that last one.
1954  * @start: RAMBlock starting page
1955  * @length: RAMBlock size
1956  */
1957 int ram_discard_range(MigrationIncomingState *mis,
1958                       const char *rbname,
1959                       uint64_t start, size_t length)
1960 {
1961     int ret = -1;
1962
1963     trace_ram_discard_range(rbname, start, length);
1964
1965     rcu_read_lock();
1966     RAMBlock *rb = qemu_ram_block_by_name(rbname);
1967
1968     if (!rb) {
1969         error_report("ram_discard_range: Failed to find block '%s'", rbname);
1970         goto err;
1971     }
1972
1973     ret = ram_block_discard_range(rb, start, length);
1974
1975 err:
1976     rcu_read_unlock();
1977
1978     return ret;
1979 }
1980
1981 static int ram_state_init(RAMState *rs)
1982 {
1983     int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1984
1985     memset(rs, 0, sizeof(*rs));
1986     qemu_mutex_init(&rs->bitmap_mutex);
1987     qemu_mutex_init(&rs->src_page_req_mutex);
1988     QSIMPLEQ_INIT(&rs->src_page_requests);
1989
1990     if (migrate_use_xbzrle()) {
1991         XBZRLE_cache_lock();
1992         ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1993         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1994                                   TARGET_PAGE_SIZE,
1995                                   TARGET_PAGE_SIZE);
1996         if (!XBZRLE.cache) {
1997             XBZRLE_cache_unlock();
1998             error_report("Error creating cache");
1999             return -1;
2000         }
2001         XBZRLE_cache_unlock();
2002
2003         /* We prefer not to abort if there is no memory */
2004         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2005         if (!XBZRLE.encoded_buf) {
2006             error_report("Error allocating encoded_buf");
2007             return -1;
2008         }
2009
2010         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2011         if (!XBZRLE.current_buf) {
2012             error_report("Error allocating current_buf");
2013             g_free(XBZRLE.encoded_buf);
2014             XBZRLE.encoded_buf = NULL;
2015             return -1;
2016         }
2017     }
2018
2019     /* For memory_global_dirty_log_start below.  */
2020     qemu_mutex_lock_iothread();
2021
2022     qemu_mutex_lock_ramlist();
2023     rcu_read_lock();
2024     ram_state_reset(rs);
2025
2026     rs->ram_bitmap = g_new0(struct RAMBitmap, 1);
2027     /* Skip setting bitmap if there is no RAM */
2028     if (ram_bytes_total()) {
2029         ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2030         rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages);
2031         bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages);
2032
2033         if (migrate_postcopy_ram()) {
2034             rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages);
2035             bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages);
2036         }
2037     }
2038
2039     /*
2040      * Count the total number of pages used by ram blocks not including any
2041      * gaps due to alignment or unplugs.
2042      */
2043     rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2044
2045     memory_global_dirty_log_start();
2046     migration_bitmap_sync(rs);
2047     qemu_mutex_unlock_ramlist();
2048     qemu_mutex_unlock_iothread();
2049     rcu_read_unlock();
2050
2051     return 0;
2052 }
2053
2054 /*
2055  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2056  * long-running RCU critical section.  When rcu-reclaims in the code
2057  * start to become numerous it will be necessary to reduce the
2058  * granularity of these critical sections.
2059  */
2060
2061 /**
2062  * ram_save_setup: Setup RAM for migration
2063  *
2064  * Returns zero to indicate success and negative for error
2065  *
2066  * @f: QEMUFile where to send the data
2067  * @opaque: RAMState pointer
2068  */
2069 static int ram_save_setup(QEMUFile *f, void *opaque)
2070 {
2071     RAMState *rs = opaque;
2072     RAMBlock *block;
2073
2074     /* migration has already setup the bitmap, reuse it. */
2075     if (!migration_in_colo_state()) {
2076         if (ram_state_init(rs) < 0) {
2077             return -1;
2078          }
2079     }
2080     rs->f = f;
2081
2082     rcu_read_lock();
2083
2084     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2085
2086     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2087         qemu_put_byte(f, strlen(block->idstr));
2088         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2089         qemu_put_be64(f, block->used_length);
2090         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2091             qemu_put_be64(f, block->page_size);
2092         }
2093     }
2094
2095     rcu_read_unlock();
2096
2097     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2098     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2099
2100     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2101
2102     return 0;
2103 }
2104
2105 /**
2106  * ram_save_iterate: iterative stage for migration
2107  *
2108  * Returns zero to indicate success and negative for error
2109  *
2110  * @f: QEMUFile where to send the data
2111  * @opaque: RAMState pointer
2112  */
2113 static int ram_save_iterate(QEMUFile *f, void *opaque)
2114 {
2115     RAMState *rs = opaque;
2116     int ret;
2117     int i;
2118     int64_t t0;
2119     int done = 0;
2120
2121     rcu_read_lock();
2122     if (ram_list.version != rs->last_version) {
2123         ram_state_reset(rs);
2124     }
2125
2126     /* Read version before ram_list.blocks */
2127     smp_rmb();
2128
2129     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2130
2131     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2132     i = 0;
2133     while ((ret = qemu_file_rate_limit(f)) == 0) {
2134         int pages;
2135
2136         pages = ram_find_and_save_block(rs, false);
2137         /* no more pages to sent */
2138         if (pages == 0) {
2139             done = 1;
2140             break;
2141         }
2142         rs->iterations++;
2143
2144         /* we want to check in the 1st loop, just in case it was the 1st time
2145            and we had to sync the dirty bitmap.
2146            qemu_get_clock_ns() is a bit expensive, so we only check each some
2147            iterations
2148         */
2149         if ((i & 63) == 0) {
2150             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2151             if (t1 > MAX_WAIT) {
2152                 trace_ram_save_iterate_big_wait(t1, i);
2153                 break;
2154             }
2155         }
2156         i++;
2157     }
2158     flush_compressed_data(rs);
2159     rcu_read_unlock();
2160
2161     /*
2162      * Must occur before EOS (or any QEMUFile operation)
2163      * because of RDMA protocol.
2164      */
2165     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2166
2167     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2168     rs->bytes_transferred += 8;
2169
2170     ret = qemu_file_get_error(f);
2171     if (ret < 0) {
2172         return ret;
2173     }
2174
2175     return done;
2176 }
2177
2178 /**
2179  * ram_save_complete: function called to send the remaining amount of ram
2180  *
2181  * Returns zero to indicate success
2182  *
2183  * Called with iothread lock
2184  *
2185  * @f: QEMUFile where to send the data
2186  * @opaque: RAMState pointer
2187  */
2188 static int ram_save_complete(QEMUFile *f, void *opaque)
2189 {
2190     RAMState *rs = opaque;
2191
2192     rcu_read_lock();
2193
2194     if (!migration_in_postcopy()) {
2195         migration_bitmap_sync(rs);
2196     }
2197
2198     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2199
2200     /* try transferring iterative blocks of memory */
2201
2202     /* flush all remaining blocks regardless of rate limiting */
2203     while (true) {
2204         int pages;
2205
2206         pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2207         /* no more blocks to sent */
2208         if (pages == 0) {
2209             break;
2210         }
2211     }
2212
2213     flush_compressed_data(rs);
2214     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2215
2216     rcu_read_unlock();
2217
2218     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2219
2220     return 0;
2221 }
2222
2223 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2224                              uint64_t *non_postcopiable_pending,
2225                              uint64_t *postcopiable_pending)
2226 {
2227     RAMState *rs = opaque;
2228     uint64_t remaining_size;
2229
2230     remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2231
2232     if (!migration_in_postcopy() &&
2233         remaining_size < max_size) {
2234         qemu_mutex_lock_iothread();
2235         rcu_read_lock();
2236         migration_bitmap_sync(rs);
2237         rcu_read_unlock();
2238         qemu_mutex_unlock_iothread();
2239         remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2240     }
2241
2242     /* We can do postcopy, and all the data is postcopiable */
2243     *postcopiable_pending += remaining_size;
2244 }
2245
2246 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2247 {
2248     unsigned int xh_len;
2249     int xh_flags;
2250     uint8_t *loaded_data;
2251
2252     if (!xbzrle_decoded_buf) {
2253         xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2254     }
2255     loaded_data = xbzrle_decoded_buf;
2256
2257     /* extract RLE header */
2258     xh_flags = qemu_get_byte(f);
2259     xh_len = qemu_get_be16(f);
2260
2261     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2262         error_report("Failed to load XBZRLE page - wrong compression!");
2263         return -1;
2264     }
2265
2266     if (xh_len > TARGET_PAGE_SIZE) {
2267         error_report("Failed to load XBZRLE page - len overflow!");
2268         return -1;
2269     }
2270     /* load data and decode */
2271     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2272
2273     /* decode RLE */
2274     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2275                              TARGET_PAGE_SIZE) == -1) {
2276         error_report("Failed to load XBZRLE page - decode error!");
2277         return -1;
2278     }
2279
2280     return 0;
2281 }
2282
2283 /**
2284  * ram_block_from_stream: read a RAMBlock id from the migration stream
2285  *
2286  * Must be called from within a rcu critical section.
2287  *
2288  * Returns a pointer from within the RCU-protected ram_list.
2289  *
2290  * @f: QEMUFile where to read the data from
2291  * @flags: Page flags (mostly to see if it's a continuation of previous block)
2292  */
2293 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2294 {
2295     static RAMBlock *block = NULL;
2296     char id[256];
2297     uint8_t len;
2298
2299     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2300         if (!block) {
2301             error_report("Ack, bad migration stream!");
2302             return NULL;
2303         }
2304         return block;
2305     }
2306
2307     len = qemu_get_byte(f);
2308     qemu_get_buffer(f, (uint8_t *)id, len);
2309     id[len] = 0;
2310
2311     block = qemu_ram_block_by_name(id);
2312     if (!block) {
2313         error_report("Can't find block %s", id);
2314         return NULL;
2315     }
2316
2317     return block;
2318 }
2319
2320 static inline void *host_from_ram_block_offset(RAMBlock *block,
2321                                                ram_addr_t offset)
2322 {
2323     if (!offset_in_ramblock(block, offset)) {
2324         return NULL;
2325     }
2326
2327     return block->host + offset;
2328 }
2329
2330 /**
2331  * ram_handle_compressed: handle the zero page case
2332  *
2333  * If a page (or a whole RDMA chunk) has been
2334  * determined to be zero, then zap it.
2335  *
2336  * @host: host address for the zero page
2337  * @ch: what the page is filled from.  We only support zero
2338  * @size: size of the zero page
2339  */
2340 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2341 {
2342     if (ch != 0 || !is_zero_range(host, size)) {
2343         memset(host, ch, size);
2344     }
2345 }
2346
2347 static void *do_data_decompress(void *opaque)
2348 {
2349     DecompressParam *param = opaque;
2350     unsigned long pagesize;
2351     uint8_t *des;
2352     int len;
2353
2354     qemu_mutex_lock(&param->mutex);
2355     while (!param->quit) {
2356         if (param->des) {
2357             des = param->des;
2358             len = param->len;
2359             param->des = 0;
2360             qemu_mutex_unlock(&param->mutex);
2361
2362             pagesize = TARGET_PAGE_SIZE;
2363             /* uncompress() will return failed in some case, especially
2364              * when the page is dirted when doing the compression, it's
2365              * not a problem because the dirty page will be retransferred
2366              * and uncompress() won't break the data in other pages.
2367              */
2368             uncompress((Bytef *)des, &pagesize,
2369                        (const Bytef *)param->compbuf, len);
2370
2371             qemu_mutex_lock(&decomp_done_lock);
2372             param->done = true;
2373             qemu_cond_signal(&decomp_done_cond);
2374             qemu_mutex_unlock(&decomp_done_lock);
2375
2376             qemu_mutex_lock(&param->mutex);
2377         } else {
2378             qemu_cond_wait(&param->cond, &param->mutex);
2379         }
2380     }
2381     qemu_mutex_unlock(&param->mutex);
2382
2383     return NULL;
2384 }
2385
2386 static void wait_for_decompress_done(void)
2387 {
2388     int idx, thread_count;
2389
2390     if (!migrate_use_compression()) {
2391         return;
2392     }
2393
2394     thread_count = migrate_decompress_threads();
2395     qemu_mutex_lock(&decomp_done_lock);
2396     for (idx = 0; idx < thread_count; idx++) {
2397         while (!decomp_param[idx].done) {
2398             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2399         }
2400     }
2401     qemu_mutex_unlock(&decomp_done_lock);
2402 }
2403
2404 void migrate_decompress_threads_create(void)
2405 {
2406     int i, thread_count;
2407
2408     thread_count = migrate_decompress_threads();
2409     decompress_threads = g_new0(QemuThread, thread_count);
2410     decomp_param = g_new0(DecompressParam, thread_count);
2411     qemu_mutex_init(&decomp_done_lock);
2412     qemu_cond_init(&decomp_done_cond);
2413     for (i = 0; i < thread_count; i++) {
2414         qemu_mutex_init(&decomp_param[i].mutex);
2415         qemu_cond_init(&decomp_param[i].cond);
2416         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2417         decomp_param[i].done = true;
2418         decomp_param[i].quit = false;
2419         qemu_thread_create(decompress_threads + i, "decompress",
2420                            do_data_decompress, decomp_param + i,
2421                            QEMU_THREAD_JOINABLE);
2422     }
2423 }
2424
2425 void migrate_decompress_threads_join(void)
2426 {
2427     int i, thread_count;
2428
2429     thread_count = migrate_decompress_threads();
2430     for (i = 0; i < thread_count; i++) {
2431         qemu_mutex_lock(&decomp_param[i].mutex);
2432         decomp_param[i].quit = true;
2433         qemu_cond_signal(&decomp_param[i].cond);
2434         qemu_mutex_unlock(&decomp_param[i].mutex);
2435     }
2436     for (i = 0; i < thread_count; i++) {
2437         qemu_thread_join(decompress_threads + i);
2438         qemu_mutex_destroy(&decomp_param[i].mutex);
2439         qemu_cond_destroy(&decomp_param[i].cond);
2440         g_free(decomp_param[i].compbuf);
2441     }
2442     g_free(decompress_threads);
2443     g_free(decomp_param);
2444     decompress_threads = NULL;
2445     decomp_param = NULL;
2446 }
2447
2448 static void decompress_data_with_multi_threads(QEMUFile *f,
2449                                                void *host, int len)
2450 {
2451     int idx, thread_count;
2452
2453     thread_count = migrate_decompress_threads();
2454     qemu_mutex_lock(&decomp_done_lock);
2455     while (true) {
2456         for (idx = 0; idx < thread_count; idx++) {
2457             if (decomp_param[idx].done) {
2458                 decomp_param[idx].done = false;
2459                 qemu_mutex_lock(&decomp_param[idx].mutex);
2460                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2461                 decomp_param[idx].des = host;
2462                 decomp_param[idx].len = len;
2463                 qemu_cond_signal(&decomp_param[idx].cond);
2464                 qemu_mutex_unlock(&decomp_param[idx].mutex);
2465                 break;
2466             }
2467         }
2468         if (idx < thread_count) {
2469             break;
2470         } else {
2471             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2472         }
2473     }
2474     qemu_mutex_unlock(&decomp_done_lock);
2475 }
2476
2477 /**
2478  * ram_postcopy_incoming_init: allocate postcopy data structures
2479  *
2480  * Returns 0 for success and negative if there was one error
2481  *
2482  * @mis: current migration incoming state
2483  *
2484  * Allocate data structures etc needed by incoming migration with
2485  * postcopy-ram. postcopy-ram's similarly names
2486  * postcopy_ram_incoming_init does the work.
2487  */
2488 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2489 {
2490     size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2491
2492     return postcopy_ram_incoming_init(mis, ram_pages);
2493 }
2494
2495 /**
2496  * ram_load_postcopy: load a page in postcopy case
2497  *
2498  * Returns 0 for success or -errno in case of error
2499  *
2500  * Called in postcopy mode by ram_load().
2501  * rcu_read_lock is taken prior to this being called.
2502  *
2503  * @f: QEMUFile where to send the data
2504  */
2505 static int ram_load_postcopy(QEMUFile *f)
2506 {
2507     int flags = 0, ret = 0;
2508     bool place_needed = false;
2509     bool matching_page_sizes = false;
2510     MigrationIncomingState *mis = migration_incoming_get_current();
2511     /* Temporary page that is later 'placed' */
2512     void *postcopy_host_page = postcopy_get_tmp_page(mis);
2513     void *last_host = NULL;
2514     bool all_zero = false;
2515
2516     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2517         ram_addr_t addr;
2518         void *host = NULL;
2519         void *page_buffer = NULL;
2520         void *place_source = NULL;
2521         RAMBlock *block = NULL;
2522         uint8_t ch;
2523
2524         addr = qemu_get_be64(f);
2525         flags = addr & ~TARGET_PAGE_MASK;
2526         addr &= TARGET_PAGE_MASK;
2527
2528         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2529         place_needed = false;
2530         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2531             block = ram_block_from_stream(f, flags);
2532
2533             host = host_from_ram_block_offset(block, addr);
2534             if (!host) {
2535                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2536                 ret = -EINVAL;
2537                 break;
2538             }
2539             matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2540             /*
2541              * Postcopy requires that we place whole host pages atomically;
2542              * these may be huge pages for RAMBlocks that are backed by
2543              * hugetlbfs.
2544              * To make it atomic, the data is read into a temporary page
2545              * that's moved into place later.
2546              * The migration protocol uses,  possibly smaller, target-pages
2547              * however the source ensures it always sends all the components
2548              * of a host page in order.
2549              */
2550             page_buffer = postcopy_host_page +
2551                           ((uintptr_t)host & (block->page_size - 1));
2552             /* If all TP are zero then we can optimise the place */
2553             if (!((uintptr_t)host & (block->page_size - 1))) {
2554                 all_zero = true;
2555             } else {
2556                 /* not the 1st TP within the HP */
2557                 if (host != (last_host + TARGET_PAGE_SIZE)) {
2558                     error_report("Non-sequential target page %p/%p",
2559                                   host, last_host);
2560                     ret = -EINVAL;
2561                     break;
2562                 }
2563             }
2564
2565
2566             /*
2567              * If it's the last part of a host page then we place the host
2568              * page
2569              */
2570             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2571                                      (block->page_size - 1)) == 0;
2572             place_source = postcopy_host_page;
2573         }
2574         last_host = host;
2575
2576         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2577         case RAM_SAVE_FLAG_COMPRESS:
2578             ch = qemu_get_byte(f);
2579             memset(page_buffer, ch, TARGET_PAGE_SIZE);
2580             if (ch) {
2581                 all_zero = false;
2582             }
2583             break;
2584
2585         case RAM_SAVE_FLAG_PAGE:
2586             all_zero = false;
2587             if (!place_needed || !matching_page_sizes) {
2588                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2589             } else {
2590                 /* Avoids the qemu_file copy during postcopy, which is
2591                  * going to do a copy later; can only do it when we
2592                  * do this read in one go (matching page sizes)
2593                  */
2594                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2595                                          TARGET_PAGE_SIZE);
2596             }
2597             break;
2598         case RAM_SAVE_FLAG_EOS:
2599             /* normal exit */
2600             break;
2601         default:
2602             error_report("Unknown combination of migration flags: %#x"
2603                          " (postcopy mode)", flags);
2604             ret = -EINVAL;
2605         }
2606
2607         if (place_needed) {
2608             /* This gets called at the last target page in the host page */
2609             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2610
2611             if (all_zero) {
2612                 ret = postcopy_place_page_zero(mis, place_dest,
2613                                                block->page_size);
2614             } else {
2615                 ret = postcopy_place_page(mis, place_dest,
2616                                           place_source, block->page_size);
2617             }
2618         }
2619         if (!ret) {
2620             ret = qemu_file_get_error(f);
2621         }
2622     }
2623
2624     return ret;
2625 }
2626
2627 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2628 {
2629     int flags = 0, ret = 0;
2630     static uint64_t seq_iter;
2631     int len = 0;
2632     /*
2633      * If system is running in postcopy mode, page inserts to host memory must
2634      * be atomic
2635      */
2636     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2637     /* ADVISE is earlier, it shows the source has the postcopy capability on */
2638     bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2639
2640     seq_iter++;
2641
2642     if (version_id != 4) {
2643         ret = -EINVAL;
2644     }
2645
2646     /* This RCU critical section can be very long running.
2647      * When RCU reclaims in the code start to become numerous,
2648      * it will be necessary to reduce the granularity of this
2649      * critical section.
2650      */
2651     rcu_read_lock();
2652
2653     if (postcopy_running) {
2654         ret = ram_load_postcopy(f);
2655     }
2656
2657     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2658         ram_addr_t addr, total_ram_bytes;
2659         void *host = NULL;
2660         uint8_t ch;
2661
2662         addr = qemu_get_be64(f);
2663         flags = addr & ~TARGET_PAGE_MASK;
2664         addr &= TARGET_PAGE_MASK;
2665
2666         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2667                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2668             RAMBlock *block = ram_block_from_stream(f, flags);
2669
2670             host = host_from_ram_block_offset(block, addr);
2671             if (!host) {
2672                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2673                 ret = -EINVAL;
2674                 break;
2675             }
2676         }
2677
2678         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2679         case RAM_SAVE_FLAG_MEM_SIZE:
2680             /* Synchronize RAM block list */
2681             total_ram_bytes = addr;
2682             while (!ret && total_ram_bytes) {
2683                 RAMBlock *block;
2684                 char id[256];
2685                 ram_addr_t length;
2686
2687                 len = qemu_get_byte(f);
2688                 qemu_get_buffer(f, (uint8_t *)id, len);
2689                 id[len] = 0;
2690                 length = qemu_get_be64(f);
2691
2692                 block = qemu_ram_block_by_name(id);
2693                 if (block) {
2694                     if (length != block->used_length) {
2695                         Error *local_err = NULL;
2696
2697                         ret = qemu_ram_resize(block, length,
2698                                               &local_err);
2699                         if (local_err) {
2700                             error_report_err(local_err);
2701                         }
2702                     }
2703                     /* For postcopy we need to check hugepage sizes match */
2704                     if (postcopy_advised &&
2705                         block->page_size != qemu_host_page_size) {
2706                         uint64_t remote_page_size = qemu_get_be64(f);
2707                         if (remote_page_size != block->page_size) {
2708                             error_report("Mismatched RAM page size %s "
2709                                          "(local) %zd != %" PRId64,
2710                                          id, block->page_size,
2711                                          remote_page_size);
2712                             ret = -EINVAL;
2713                         }
2714                     }
2715                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2716                                           block->idstr);
2717                 } else {
2718                     error_report("Unknown ramblock \"%s\", cannot "
2719                                  "accept migration", id);
2720                     ret = -EINVAL;
2721                 }
2722
2723                 total_ram_bytes -= length;
2724             }
2725             break;
2726
2727         case RAM_SAVE_FLAG_COMPRESS:
2728             ch = qemu_get_byte(f);
2729             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2730             break;
2731
2732         case RAM_SAVE_FLAG_PAGE:
2733             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2734             break;
2735
2736         case RAM_SAVE_FLAG_COMPRESS_PAGE:
2737             len = qemu_get_be32(f);
2738             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2739                 error_report("Invalid compressed data length: %d", len);
2740                 ret = -EINVAL;
2741                 break;
2742             }
2743             decompress_data_with_multi_threads(f, host, len);
2744             break;
2745
2746         case RAM_SAVE_FLAG_XBZRLE:
2747             if (load_xbzrle(f, addr, host) < 0) {
2748                 error_report("Failed to decompress XBZRLE page at "
2749                              RAM_ADDR_FMT, addr);
2750                 ret = -EINVAL;
2751                 break;
2752             }
2753             break;
2754         case RAM_SAVE_FLAG_EOS:
2755             /* normal exit */
2756             break;
2757         default:
2758             if (flags & RAM_SAVE_FLAG_HOOK) {
2759                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2760             } else {
2761                 error_report("Unknown combination of migration flags: %#x",
2762                              flags);
2763                 ret = -EINVAL;
2764             }
2765         }
2766         if (!ret) {
2767             ret = qemu_file_get_error(f);
2768         }
2769     }
2770
2771     wait_for_decompress_done();
2772     rcu_read_unlock();
2773     trace_ram_load_complete(ret, seq_iter);
2774     return ret;
2775 }
2776
2777 static SaveVMHandlers savevm_ram_handlers = {
2778     .save_live_setup = ram_save_setup,
2779     .save_live_iterate = ram_save_iterate,
2780     .save_live_complete_postcopy = ram_save_complete,
2781     .save_live_complete_precopy = ram_save_complete,
2782     .save_live_pending = ram_save_pending,
2783     .load_state = ram_load,
2784     .cleanup = ram_migration_cleanup,
2785 };
2786
2787 void ram_mig_init(void)
2788 {
2789     qemu_mutex_init(&XBZRLE.lock);
2790     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
2791 }