]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
migration: discard non-dirty ram pages after the start of postcopy
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
1393a485 28#include "qemu/osdep.h"
33c11879
PB
29#include "qemu-common.h"
30#include "cpu.h"
56e93d26 31#include <zlib.h>
4addcd4f 32#include "qapi-event.h"
f348b6d1 33#include "qemu/cutils.h"
56e93d26
JQ
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
7205c9ec
JQ
36#include "qemu/timer.h"
37#include "qemu/main-loop.h"
56e93d26 38#include "migration/migration.h"
e0b266f0 39#include "migration/postcopy-ram.h"
56e93d26
JQ
40#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
56e93d26 42#include "qemu/error-report.h"
56e93d26 43#include "trace.h"
56e93d26 44#include "exec/ram_addr.h"
56e93d26 45#include "qemu/rcu_queue.h"
a91246c9 46#include "migration/colo.h"
56e93d26 47
56e93d26 48static int dirty_rate_high_cnt;
56e93d26
JQ
49
50static uint64_t bitmap_sync_count;
51
52/***********************************************************/
53/* ram save/restore */
54
55#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
56#define RAM_SAVE_FLAG_COMPRESS 0x02
57#define RAM_SAVE_FLAG_MEM_SIZE 0x04
58#define RAM_SAVE_FLAG_PAGE 0x08
59#define RAM_SAVE_FLAG_EOS 0x10
60#define RAM_SAVE_FLAG_CONTINUE 0x20
61#define RAM_SAVE_FLAG_XBZRLE 0x40
62/* 0x80 is reserved in migration.h start with 0x100 next */
63#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
64
adb65dec 65static uint8_t *ZERO_TARGET_PAGE;
56e93d26
JQ
66
67static inline bool is_zero_range(uint8_t *p, uint64_t size)
68{
a1febc49 69 return buffer_is_zero(p, size);
56e93d26
JQ
70}
71
72/* struct contains XBZRLE cache and a static page
73 used by the compression */
74static struct {
75 /* buffer used for XBZRLE encoding */
76 uint8_t *encoded_buf;
77 /* buffer for storing page content */
78 uint8_t *current_buf;
79 /* Cache for XBZRLE, Protected by lock. */
80 PageCache *cache;
81 QemuMutex lock;
82} XBZRLE;
83
84/* buffer used for XBZRLE decoding */
85static uint8_t *xbzrle_decoded_buf;
86
87static void XBZRLE_cache_lock(void)
88{
89 if (migrate_use_xbzrle())
90 qemu_mutex_lock(&XBZRLE.lock);
91}
92
93static void XBZRLE_cache_unlock(void)
94{
95 if (migrate_use_xbzrle())
96 qemu_mutex_unlock(&XBZRLE.lock);
97}
98
99/*
100 * called from qmp_migrate_set_cache_size in main thread, possibly while
101 * a migration is in progress.
102 * A running migration maybe using the cache and might finish during this
103 * call, hence changes to the cache are protected by XBZRLE.lock().
104 */
105int64_t xbzrle_cache_resize(int64_t new_size)
106{
107 PageCache *new_cache;
108 int64_t ret;
109
110 if (new_size < TARGET_PAGE_SIZE) {
111 return -1;
112 }
113
114 XBZRLE_cache_lock();
115
116 if (XBZRLE.cache != NULL) {
117 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
118 goto out_new_size;
119 }
120 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
121 TARGET_PAGE_SIZE);
122 if (!new_cache) {
123 error_report("Error creating cache");
124 ret = -1;
125 goto out;
126 }
127
128 cache_fini(XBZRLE.cache);
129 XBZRLE.cache = new_cache;
130 }
131
132out_new_size:
133 ret = pow2floor(new_size);
134out:
135 XBZRLE_cache_unlock();
136 return ret;
137}
138
139/* accounting for migration statistics */
140typedef struct AccountingInfo {
141 uint64_t dup_pages;
142 uint64_t skipped_pages;
143 uint64_t norm_pages;
144 uint64_t iterations;
145 uint64_t xbzrle_bytes;
146 uint64_t xbzrle_pages;
147 uint64_t xbzrle_cache_miss;
148 double xbzrle_cache_miss_rate;
149 uint64_t xbzrle_overflows;
150} AccountingInfo;
151
152static AccountingInfo acct_info;
153
154static void acct_clear(void)
155{
156 memset(&acct_info, 0, sizeof(acct_info));
157}
158
159uint64_t dup_mig_bytes_transferred(void)
160{
161 return acct_info.dup_pages * TARGET_PAGE_SIZE;
162}
163
164uint64_t dup_mig_pages_transferred(void)
165{
166 return acct_info.dup_pages;
167}
168
169uint64_t skipped_mig_bytes_transferred(void)
170{
171 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
172}
173
174uint64_t skipped_mig_pages_transferred(void)
175{
176 return acct_info.skipped_pages;
177}
178
179uint64_t norm_mig_bytes_transferred(void)
180{
181 return acct_info.norm_pages * TARGET_PAGE_SIZE;
182}
183
184uint64_t norm_mig_pages_transferred(void)
185{
186 return acct_info.norm_pages;
187}
188
189uint64_t xbzrle_mig_bytes_transferred(void)
190{
191 return acct_info.xbzrle_bytes;
192}
193
194uint64_t xbzrle_mig_pages_transferred(void)
195{
196 return acct_info.xbzrle_pages;
197}
198
199uint64_t xbzrle_mig_pages_cache_miss(void)
200{
201 return acct_info.xbzrle_cache_miss;
202}
203
204double xbzrle_mig_cache_miss_rate(void)
205{
206 return acct_info.xbzrle_cache_miss_rate;
207}
208
209uint64_t xbzrle_mig_pages_overflow(void)
210{
211 return acct_info.xbzrle_overflows;
212}
213
214/* This is the last block that we have visited serching for dirty pages
215 */
216static RAMBlock *last_seen_block;
217/* This is the last block from where we have sent data */
218static RAMBlock *last_sent_block;
219static ram_addr_t last_offset;
dd631697 220static QemuMutex migration_bitmap_mutex;
56e93d26
JQ
221static uint64_t migration_dirty_pages;
222static uint32_t last_version;
223static bool ram_bulk_stage;
224
b8fb8cb7
DDAG
225/* used by the search for pages to send */
226struct PageSearchStatus {
227 /* Current block being searched */
228 RAMBlock *block;
229 /* Current offset to search from */
230 ram_addr_t offset;
231 /* Set once we wrap around */
232 bool complete_round;
233};
234typedef struct PageSearchStatus PageSearchStatus;
235
60be6340
DL
236static struct BitmapRcu {
237 struct rcu_head rcu;
f3f491fc 238 /* Main migration bitmap */
60be6340 239 unsigned long *bmap;
f3f491fc
DDAG
240 /* bitmap of pages that haven't been sent even once
241 * only maintained and used in postcopy at the moment
242 * where it's used to send the dirtymap at the start
243 * of the postcopy phase
244 */
245 unsigned long *unsentmap;
60be6340
DL
246} *migration_bitmap_rcu;
247
56e93d26 248struct CompressParam {
56e93d26 249 bool done;
90e56fb4 250 bool quit;
56e93d26
JQ
251 QEMUFile *file;
252 QemuMutex mutex;
253 QemuCond cond;
254 RAMBlock *block;
255 ram_addr_t offset;
256};
257typedef struct CompressParam CompressParam;
258
259struct DecompressParam {
73a8912b 260 bool done;
90e56fb4 261 bool quit;
56e93d26
JQ
262 QemuMutex mutex;
263 QemuCond cond;
264 void *des;
d341d9f3 265 uint8_t *compbuf;
56e93d26
JQ
266 int len;
267};
268typedef struct DecompressParam DecompressParam;
269
270static CompressParam *comp_param;
271static QemuThread *compress_threads;
272/* comp_done_cond is used to wake up the migration thread when
273 * one of the compression threads has finished the compression.
274 * comp_done_lock is used to co-work with comp_done_cond.
275 */
0d9f9a5c
LL
276static QemuMutex comp_done_lock;
277static QemuCond comp_done_cond;
56e93d26
JQ
278/* The empty QEMUFileOps will be used by file in CompressParam */
279static const QEMUFileOps empty_ops = { };
280
281static bool compression_switch;
56e93d26
JQ
282static DecompressParam *decomp_param;
283static QemuThread *decompress_threads;
73a8912b
LL
284static QemuMutex decomp_done_lock;
285static QemuCond decomp_done_cond;
56e93d26 286
a7a9a88f
LL
287static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
288 ram_addr_t offset);
56e93d26
JQ
289
290static void *do_data_compress(void *opaque)
291{
292 CompressParam *param = opaque;
a7a9a88f
LL
293 RAMBlock *block;
294 ram_addr_t offset;
56e93d26 295
a7a9a88f 296 qemu_mutex_lock(&param->mutex);
90e56fb4 297 while (!param->quit) {
a7a9a88f
LL
298 if (param->block) {
299 block = param->block;
300 offset = param->offset;
301 param->block = NULL;
302 qemu_mutex_unlock(&param->mutex);
303
304 do_compress_ram_page(param->file, block, offset);
305
0d9f9a5c 306 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 307 param->done = true;
0d9f9a5c
LL
308 qemu_cond_signal(&comp_done_cond);
309 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
310
311 qemu_mutex_lock(&param->mutex);
312 } else {
56e93d26
JQ
313 qemu_cond_wait(&param->cond, &param->mutex);
314 }
56e93d26 315 }
a7a9a88f 316 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
317
318 return NULL;
319}
320
321static inline void terminate_compression_threads(void)
322{
323 int idx, thread_count;
324
325 thread_count = migrate_compress_threads();
56e93d26
JQ
326 for (idx = 0; idx < thread_count; idx++) {
327 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 328 comp_param[idx].quit = true;
56e93d26
JQ
329 qemu_cond_signal(&comp_param[idx].cond);
330 qemu_mutex_unlock(&comp_param[idx].mutex);
331 }
332}
333
334void migrate_compress_threads_join(void)
335{
336 int i, thread_count;
337
338 if (!migrate_use_compression()) {
339 return;
340 }
341 terminate_compression_threads();
342 thread_count = migrate_compress_threads();
343 for (i = 0; i < thread_count; i++) {
344 qemu_thread_join(compress_threads + i);
345 qemu_fclose(comp_param[i].file);
346 qemu_mutex_destroy(&comp_param[i].mutex);
347 qemu_cond_destroy(&comp_param[i].cond);
348 }
0d9f9a5c
LL
349 qemu_mutex_destroy(&comp_done_lock);
350 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
351 g_free(compress_threads);
352 g_free(comp_param);
56e93d26
JQ
353 compress_threads = NULL;
354 comp_param = NULL;
56e93d26
JQ
355}
356
357void migrate_compress_threads_create(void)
358{
359 int i, thread_count;
360
361 if (!migrate_use_compression()) {
362 return;
363 }
56e93d26
JQ
364 compression_switch = true;
365 thread_count = migrate_compress_threads();
366 compress_threads = g_new0(QemuThread, thread_count);
367 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
368 qemu_cond_init(&comp_done_cond);
369 qemu_mutex_init(&comp_done_lock);
56e93d26 370 for (i = 0; i < thread_count; i++) {
e110aa91
C
371 /* comp_param[i].file is just used as a dummy buffer to save data,
372 * set its ops to empty.
56e93d26
JQ
373 */
374 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
375 comp_param[i].done = true;
90e56fb4 376 comp_param[i].quit = false;
56e93d26
JQ
377 qemu_mutex_init(&comp_param[i].mutex);
378 qemu_cond_init(&comp_param[i].cond);
379 qemu_thread_create(compress_threads + i, "compress",
380 do_data_compress, comp_param + i,
381 QEMU_THREAD_JOINABLE);
382 }
383}
384
385/**
386 * save_page_header: Write page header to wire
387 *
388 * If this is the 1st block, it also writes the block identification
389 *
390 * Returns: Number of bytes written
391 *
392 * @f: QEMUFile where to send the data
393 * @block: block that contains the page we want to send
394 * @offset: offset inside the block for the page
395 * in the lower bits, it contains flags
396 */
397static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
398{
9f5f380b 399 size_t size, len;
56e93d26
JQ
400
401 qemu_put_be64(f, offset);
402 size = 8;
403
404 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b
LL
405 len = strlen(block->idstr);
406 qemu_put_byte(f, len);
407 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
408 size += 1 + len;
56e93d26
JQ
409 }
410 return size;
411}
412
070afca2
JH
413/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
414 * If guest dirty memory rate is reduced below the rate at which we can
415 * transfer pages to the destination then we should be able to complete
416 * migration. Some workloads dirty memory way too fast and will not effectively
417 * converge, even with auto-converge.
418 */
419static void mig_throttle_guest_down(void)
420{
421 MigrationState *s = migrate_get_current();
2594f56d
DB
422 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
423 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
424
425 /* We have not started throttling yet. Let's start it. */
426 if (!cpu_throttle_active()) {
427 cpu_throttle_set(pct_initial);
428 } else {
429 /* Throttling already on, just increase the rate */
430 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
431 }
432}
433
56e93d26
JQ
434/* Update the xbzrle cache to reflect a page that's been sent as all 0.
435 * The important thing is that a stale (not-yet-0'd) page be replaced
436 * by the new data.
437 * As a bonus, if the page wasn't in the cache it gets added so that
438 * when a small write is made into the 0'd page it gets XBZRLE sent
439 */
440static void xbzrle_cache_zero_page(ram_addr_t current_addr)
441{
442 if (ram_bulk_stage || !migrate_use_xbzrle()) {
443 return;
444 }
445
446 /* We don't care if this fails to allocate a new cache page
447 * as long as it updated an old one */
448 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
449 bitmap_sync_count);
450}
451
452#define ENCODING_FLAG_XBZRLE 0x1
453
454/**
455 * save_xbzrle_page: compress and send current page
456 *
457 * Returns: 1 means that we wrote the page
458 * 0 means that page is identical to the one already sent
459 * -1 means that xbzrle would be longer than normal
460 *
461 * @f: QEMUFile where to send the data
462 * @current_data:
463 * @current_addr:
464 * @block: block that contains the page we want to send
465 * @offset: offset inside the block for the page
466 * @last_stage: if we are at the completion stage
467 * @bytes_transferred: increase it with the number of transferred bytes
468 */
469static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
470 ram_addr_t current_addr, RAMBlock *block,
471 ram_addr_t offset, bool last_stage,
472 uint64_t *bytes_transferred)
473{
474 int encoded_len = 0, bytes_xbzrle;
475 uint8_t *prev_cached_page;
476
477 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
478 acct_info.xbzrle_cache_miss++;
479 if (!last_stage) {
480 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
481 bitmap_sync_count) == -1) {
482 return -1;
483 } else {
484 /* update *current_data when the page has been
485 inserted into cache */
486 *current_data = get_cached_data(XBZRLE.cache, current_addr);
487 }
488 }
489 return -1;
490 }
491
492 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
493
494 /* save current buffer into memory */
495 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
496
497 /* XBZRLE encoding (if there is no overflow) */
498 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
499 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
500 TARGET_PAGE_SIZE);
501 if (encoded_len == 0) {
55c4446b 502 trace_save_xbzrle_page_skipping();
56e93d26
JQ
503 return 0;
504 } else if (encoded_len == -1) {
55c4446b 505 trace_save_xbzrle_page_overflow();
56e93d26
JQ
506 acct_info.xbzrle_overflows++;
507 /* update data in the cache */
508 if (!last_stage) {
509 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
510 *current_data = prev_cached_page;
511 }
512 return -1;
513 }
514
515 /* we need to update the data in the cache, in order to get the same data */
516 if (!last_stage) {
517 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
518 }
519
520 /* Send XBZRLE based compressed page */
521 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
522 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
523 qemu_put_be16(f, encoded_len);
524 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
525 bytes_xbzrle += encoded_len + 1 + 2;
526 acct_info.xbzrle_pages++;
527 acct_info.xbzrle_bytes += bytes_xbzrle;
528 *bytes_transferred += bytes_xbzrle;
529
530 return 1;
531}
532
f3f491fc
DDAG
533/* Called with rcu_read_lock() to protect migration_bitmap
534 * rb: The RAMBlock to search for dirty pages in
535 * start: Start address (typically so we can continue from previous page)
536 * ram_addr_abs: Pointer into which to store the address of the dirty page
537 * within the global ram_addr space
538 *
539 * Returns: byte offset within memory region of the start of a dirty page
540 */
56e93d26 541static inline
a82d593b
DDAG
542ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
543 ram_addr_t start,
544 ram_addr_t *ram_addr_abs)
56e93d26 545{
2f68e399 546 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
56e93d26 547 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
2f68e399
DDAG
548 uint64_t rb_size = rb->used_length;
549 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
2ff64038 550 unsigned long *bitmap;
56e93d26
JQ
551
552 unsigned long next;
553
60be6340 554 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
56e93d26
JQ
555 if (ram_bulk_stage && nr > base) {
556 next = nr + 1;
557 } else {
2ff64038 558 next = find_next_bit(bitmap, size, nr);
56e93d26
JQ
559 }
560
f3f491fc 561 *ram_addr_abs = next << TARGET_PAGE_BITS;
56e93d26
JQ
562 return (next - base) << TARGET_PAGE_BITS;
563}
564
a82d593b
DDAG
565static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
566{
567 bool ret;
568 int nr = addr >> TARGET_PAGE_BITS;
569 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
570
571 ret = test_and_clear_bit(nr, bitmap);
572
573 if (ret) {
574 migration_dirty_pages--;
575 }
576 return ret;
577}
578
56e93d26
JQ
579static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
580{
2ff64038 581 unsigned long *bitmap;
60be6340 582 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
56e93d26 583 migration_dirty_pages +=
2ff64038 584 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
56e93d26
JQ
585}
586
56e93d26
JQ
587/* Fix me: there are too many global variables used in migration process. */
588static int64_t start_time;
589static int64_t bytes_xfer_prev;
590static int64_t num_dirty_pages_period;
591static uint64_t xbzrle_cache_miss_prev;
592static uint64_t iterations_prev;
593
594static void migration_bitmap_sync_init(void)
595{
596 start_time = 0;
597 bytes_xfer_prev = 0;
598 num_dirty_pages_period = 0;
599 xbzrle_cache_miss_prev = 0;
600 iterations_prev = 0;
601}
602
56e93d26
JQ
603static void migration_bitmap_sync(void)
604{
605 RAMBlock *block;
606 uint64_t num_dirty_pages_init = migration_dirty_pages;
607 MigrationState *s = migrate_get_current();
608 int64_t end_time;
609 int64_t bytes_xfer_now;
610
611 bitmap_sync_count++;
612
613 if (!bytes_xfer_prev) {
614 bytes_xfer_prev = ram_bytes_transferred();
615 }
616
617 if (!start_time) {
618 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
619 }
620
621 trace_migration_bitmap_sync_start();
9c1f8f44 622 memory_global_dirty_log_sync();
56e93d26 623
dd631697 624 qemu_mutex_lock(&migration_bitmap_mutex);
56e93d26
JQ
625 rcu_read_lock();
626 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2f68e399 627 migration_bitmap_sync_range(block->offset, block->used_length);
56e93d26
JQ
628 }
629 rcu_read_unlock();
dd631697 630 qemu_mutex_unlock(&migration_bitmap_mutex);
56e93d26
JQ
631
632 trace_migration_bitmap_sync_end(migration_dirty_pages
633 - num_dirty_pages_init);
634 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
635 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
636
637 /* more than 1 second = 1000 millisecons */
638 if (end_time > start_time + 1000) {
639 if (migrate_auto_converge()) {
640 /* The following detection logic can be refined later. For now:
641 Check to see if the dirtied bytes is 50% more than the approx.
642 amount of bytes that just got transferred since the last time we
070afca2
JH
643 were in this routine. If that happens twice, start or increase
644 throttling */
56e93d26 645 bytes_xfer_now = ram_bytes_transferred();
070afca2 646
56e93d26
JQ
647 if (s->dirty_pages_rate &&
648 (num_dirty_pages_period * TARGET_PAGE_SIZE >
649 (bytes_xfer_now - bytes_xfer_prev)/2) &&
070afca2 650 (dirty_rate_high_cnt++ >= 2)) {
56e93d26 651 trace_migration_throttle();
56e93d26 652 dirty_rate_high_cnt = 0;
070afca2 653 mig_throttle_guest_down();
56e93d26
JQ
654 }
655 bytes_xfer_prev = bytes_xfer_now;
56e93d26 656 }
070afca2 657
56e93d26
JQ
658 if (migrate_use_xbzrle()) {
659 if (iterations_prev != acct_info.iterations) {
660 acct_info.xbzrle_cache_miss_rate =
661 (double)(acct_info.xbzrle_cache_miss -
662 xbzrle_cache_miss_prev) /
663 (acct_info.iterations - iterations_prev);
664 }
665 iterations_prev = acct_info.iterations;
666 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
667 }
668 s->dirty_pages_rate = num_dirty_pages_period * 1000
669 / (end_time - start_time);
670 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
671 start_time = end_time;
672 num_dirty_pages_period = 0;
673 }
674 s->dirty_sync_count = bitmap_sync_count;
4addcd4f
DDAG
675 if (migrate_use_events()) {
676 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
677 }
56e93d26
JQ
678}
679
680/**
681 * save_zero_page: Send the zero page to the stream
682 *
683 * Returns: Number of pages written.
684 *
685 * @f: QEMUFile where to send the data
686 * @block: block that contains the page we want to send
687 * @offset: offset inside the block for the page
688 * @p: pointer to the page
689 * @bytes_transferred: increase it with the number of transferred bytes
690 */
691static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
692 uint8_t *p, uint64_t *bytes_transferred)
693{
694 int pages = -1;
695
696 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
697 acct_info.dup_pages++;
698 *bytes_transferred += save_page_header(f, block,
699 offset | RAM_SAVE_FLAG_COMPRESS);
700 qemu_put_byte(f, 0);
701 *bytes_transferred += 1;
702 pages = 1;
703 }
704
705 return pages;
706}
707
53f09a10
PB
708static void ram_release_pages(MigrationState *ms, const char *block_name,
709 uint64_t offset, int pages)
710{
711 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
712 return;
713 }
714
715 ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
716}
717
56e93d26
JQ
718/**
719 * ram_save_page: Send the given page to the stream
720 *
721 * Returns: Number of pages written.
3fd3c4b3
DDAG
722 * < 0 - error
723 * >=0 - Number of pages written - this might legally be 0
724 * if xbzrle noticed the page was the same.
56e93d26 725 *
9eb14766 726 * @ms: The current migration state.
56e93d26
JQ
727 * @f: QEMUFile where to send the data
728 * @block: block that contains the page we want to send
729 * @offset: offset inside the block for the page
730 * @last_stage: if we are at the completion stage
731 * @bytes_transferred: increase it with the number of transferred bytes
732 */
9eb14766 733static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
56e93d26
JQ
734 bool last_stage, uint64_t *bytes_transferred)
735{
736 int pages = -1;
737 uint64_t bytes_xmit;
738 ram_addr_t current_addr;
56e93d26
JQ
739 uint8_t *p;
740 int ret;
741 bool send_async = true;
a08f6890
HZ
742 RAMBlock *block = pss->block;
743 ram_addr_t offset = pss->offset;
56e93d26 744
2f68e399 745 p = block->host + offset;
56e93d26
JQ
746
747 /* In doubt sent page as normal */
748 bytes_xmit = 0;
749 ret = ram_control_save_page(f, block->offset,
750 offset, TARGET_PAGE_SIZE, &bytes_xmit);
751 if (bytes_xmit) {
752 *bytes_transferred += bytes_xmit;
753 pages = 1;
754 }
755
756 XBZRLE_cache_lock();
757
758 current_addr = block->offset + offset;
759
760 if (block == last_sent_block) {
761 offset |= RAM_SAVE_FLAG_CONTINUE;
762 }
763 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
764 if (ret != RAM_SAVE_CONTROL_DELAYED) {
765 if (bytes_xmit > 0) {
766 acct_info.norm_pages++;
767 } else if (bytes_xmit == 0) {
768 acct_info.dup_pages++;
769 }
770 }
771 } else {
772 pages = save_zero_page(f, block, offset, p, bytes_transferred);
773 if (pages > 0) {
774 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
775 * page would be stale
776 */
777 xbzrle_cache_zero_page(current_addr);
53f09a10 778 ram_release_pages(ms, block->idstr, pss->offset, pages);
2ebeaec0 779 } else if (!ram_bulk_stage &&
9eb14766 780 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
56e93d26
JQ
781 pages = save_xbzrle_page(f, &p, current_addr, block,
782 offset, last_stage, bytes_transferred);
783 if (!last_stage) {
784 /* Can't send this cached data async, since the cache page
785 * might get updated before it gets to the wire
786 */
787 send_async = false;
788 }
789 }
790 }
791
792 /* XBZRLE overflow or normal page */
793 if (pages == -1) {
794 *bytes_transferred += save_page_header(f, block,
795 offset | RAM_SAVE_FLAG_PAGE);
796 if (send_async) {
53f09a10
PB
797 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
798 migrate_release_ram() &
799 migration_in_postcopy(ms));
56e93d26
JQ
800 } else {
801 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
802 }
803 *bytes_transferred += TARGET_PAGE_SIZE;
804 pages = 1;
805 acct_info.norm_pages++;
806 }
807
808 XBZRLE_cache_unlock();
809
810 return pages;
811}
812
a7a9a88f
LL
813static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
814 ram_addr_t offset)
56e93d26
JQ
815{
816 int bytes_sent, blen;
a7a9a88f 817 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 818
a7a9a88f 819 bytes_sent = save_page_header(f, block, offset |
56e93d26 820 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 821 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 822 migrate_compress_level());
b3be2896
LL
823 if (blen < 0) {
824 bytes_sent = 0;
825 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
826 error_report("compressed data failed!");
827 } else {
828 bytes_sent += blen;
53f09a10
PB
829 ram_release_pages(migrate_get_current(), block->idstr,
830 offset & TARGET_PAGE_MASK, 1);
b3be2896 831 }
56e93d26
JQ
832
833 return bytes_sent;
834}
835
56e93d26
JQ
836static uint64_t bytes_transferred;
837
838static void flush_compressed_data(QEMUFile *f)
839{
840 int idx, len, thread_count;
841
842 if (!migrate_use_compression()) {
843 return;
844 }
845 thread_count = migrate_compress_threads();
a7a9a88f 846
0d9f9a5c 847 qemu_mutex_lock(&comp_done_lock);
56e93d26 848 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 849 while (!comp_param[idx].done) {
0d9f9a5c 850 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 851 }
a7a9a88f 852 }
0d9f9a5c 853 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
854
855 for (idx = 0; idx < thread_count; idx++) {
856 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 857 if (!comp_param[idx].quit) {
56e93d26
JQ
858 len = qemu_put_qemu_file(f, comp_param[idx].file);
859 bytes_transferred += len;
860 }
a7a9a88f 861 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
862 }
863}
864
865static inline void set_compress_params(CompressParam *param, RAMBlock *block,
866 ram_addr_t offset)
867{
868 param->block = block;
869 param->offset = offset;
870}
871
872static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
873 ram_addr_t offset,
874 uint64_t *bytes_transferred)
875{
876 int idx, thread_count, bytes_xmit = -1, pages = -1;
877
878 thread_count = migrate_compress_threads();
0d9f9a5c 879 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
880 while (true) {
881 for (idx = 0; idx < thread_count; idx++) {
882 if (comp_param[idx].done) {
a7a9a88f 883 comp_param[idx].done = false;
56e93d26 884 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
a7a9a88f 885 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 886 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
887 qemu_cond_signal(&comp_param[idx].cond);
888 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
889 pages = 1;
890 acct_info.norm_pages++;
891 *bytes_transferred += bytes_xmit;
892 break;
893 }
894 }
895 if (pages > 0) {
896 break;
897 } else {
0d9f9a5c 898 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
899 }
900 }
0d9f9a5c 901 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
902
903 return pages;
904}
905
906/**
907 * ram_save_compressed_page: compress the given page and send it to the stream
908 *
909 * Returns: Number of pages written.
910 *
9eb14766 911 * @ms: The current migration state.
56e93d26
JQ
912 * @f: QEMUFile where to send the data
913 * @block: block that contains the page we want to send
914 * @offset: offset inside the block for the page
915 * @last_stage: if we are at the completion stage
916 * @bytes_transferred: increase it with the number of transferred bytes
917 */
9eb14766
PB
918static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
919 PageSearchStatus *pss, bool last_stage,
56e93d26
JQ
920 uint64_t *bytes_transferred)
921{
922 int pages = -1;
fc50438e 923 uint64_t bytes_xmit = 0;
56e93d26 924 uint8_t *p;
fc50438e 925 int ret, blen;
a08f6890
HZ
926 RAMBlock *block = pss->block;
927 ram_addr_t offset = pss->offset;
56e93d26 928
2f68e399 929 p = block->host + offset;
56e93d26 930
56e93d26
JQ
931 ret = ram_control_save_page(f, block->offset,
932 offset, TARGET_PAGE_SIZE, &bytes_xmit);
933 if (bytes_xmit) {
934 *bytes_transferred += bytes_xmit;
935 pages = 1;
936 }
56e93d26
JQ
937 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
938 if (ret != RAM_SAVE_CONTROL_DELAYED) {
939 if (bytes_xmit > 0) {
940 acct_info.norm_pages++;
941 } else if (bytes_xmit == 0) {
942 acct_info.dup_pages++;
943 }
944 }
945 } else {
946 /* When starting the process of a new block, the first page of
947 * the block should be sent out before other pages in the same
948 * block, and all the pages in last block should have been sent
949 * out, keeping this order is important, because the 'cont' flag
950 * is used to avoid resending the block name.
951 */
952 if (block != last_sent_block) {
953 flush_compressed_data(f);
954 pages = save_zero_page(f, block, offset, p, bytes_transferred);
955 if (pages == -1) {
fc50438e
LL
956 /* Make sure the first page is sent out before other pages */
957 bytes_xmit = save_page_header(f, block, offset |
958 RAM_SAVE_FLAG_COMPRESS_PAGE);
959 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
960 migrate_compress_level());
961 if (blen > 0) {
962 *bytes_transferred += bytes_xmit + blen;
b3be2896 963 acct_info.norm_pages++;
b3be2896 964 pages = 1;
fc50438e
LL
965 } else {
966 qemu_file_set_error(f, blen);
967 error_report("compressed data failed!");
b3be2896 968 }
56e93d26 969 }
53f09a10
PB
970 if (pages > 0) {
971 ram_release_pages(ms, block->idstr, pss->offset, pages);
972 }
56e93d26 973 } else {
fc50438e 974 offset |= RAM_SAVE_FLAG_CONTINUE;
56e93d26
JQ
975 pages = save_zero_page(f, block, offset, p, bytes_transferred);
976 if (pages == -1) {
977 pages = compress_page_with_multi_thread(f, block, offset,
978 bytes_transferred);
53f09a10
PB
979 } else {
980 ram_release_pages(ms, block->idstr, pss->offset, pages);
56e93d26
JQ
981 }
982 }
983 }
984
985 return pages;
986}
987
b9e60928
DDAG
988/*
989 * Find the next dirty page and update any state associated with
990 * the search process.
991 *
992 * Returns: True if a page is found
993 *
994 * @f: Current migration stream.
995 * @pss: Data about the state of the current dirty page scan.
996 * @*again: Set to false if the search has scanned the whole of RAM
e0b266f0
DDAG
997 * *ram_addr_abs: Pointer into which to store the address of the dirty page
998 * within the global ram_addr space
b9e60928
DDAG
999 */
1000static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
f3f491fc 1001 bool *again, ram_addr_t *ram_addr_abs)
b9e60928 1002{
a82d593b
DDAG
1003 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
1004 ram_addr_abs);
b9e60928
DDAG
1005 if (pss->complete_round && pss->block == last_seen_block &&
1006 pss->offset >= last_offset) {
1007 /*
1008 * We've been once around the RAM and haven't found anything.
1009 * Give up.
1010 */
1011 *again = false;
1012 return false;
1013 }
1014 if (pss->offset >= pss->block->used_length) {
1015 /* Didn't find anything in this RAM Block */
1016 pss->offset = 0;
1017 pss->block = QLIST_NEXT_RCU(pss->block, next);
1018 if (!pss->block) {
1019 /* Hit the end of the list */
1020 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1021 /* Flag that we've looped */
1022 pss->complete_round = true;
1023 ram_bulk_stage = false;
1024 if (migrate_use_xbzrle()) {
1025 /* If xbzrle is on, stop using the data compression at this
1026 * point. In theory, xbzrle can do better than compression.
1027 */
1028 flush_compressed_data(f);
1029 compression_switch = false;
1030 }
1031 }
1032 /* Didn't find anything this time, but try again on the new block */
1033 *again = true;
1034 return false;
1035 } else {
1036 /* Can go around again, but... */
1037 *again = true;
1038 /* We've found something so probably don't need to */
1039 return true;
1040 }
1041}
1042
a82d593b
DDAG
1043/*
1044 * Helper for 'get_queued_page' - gets a page off the queue
1045 * ms: MigrationState in
1046 * *offset: Used to return the offset within the RAMBlock
1047 * ram_addr_abs: global offset in the dirty/sent bitmaps
1048 *
1049 * Returns: block (or NULL if none available)
1050 */
1051static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1052 ram_addr_t *ram_addr_abs)
1053{
1054 RAMBlock *block = NULL;
1055
1056 qemu_mutex_lock(&ms->src_page_req_mutex);
1057 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1058 struct MigrationSrcPageRequest *entry =
1059 QSIMPLEQ_FIRST(&ms->src_page_requests);
1060 block = entry->rb;
1061 *offset = entry->offset;
1062 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1063 TARGET_PAGE_MASK;
1064
1065 if (entry->len > TARGET_PAGE_SIZE) {
1066 entry->len -= TARGET_PAGE_SIZE;
1067 entry->offset += TARGET_PAGE_SIZE;
1068 } else {
1069 memory_region_unref(block->mr);
1070 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1071 g_free(entry);
1072 }
1073 }
1074 qemu_mutex_unlock(&ms->src_page_req_mutex);
1075
1076 return block;
1077}
1078
1079/*
1080 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1081 * that are already sent (!dirty)
1082 *
1083 * ms: MigrationState in
1084 * pss: PageSearchStatus structure updated with found block/offset
1085 * ram_addr_abs: global offset in the dirty/sent bitmaps
1086 *
1087 * Returns: true if a queued page is found
1088 */
1089static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1090 ram_addr_t *ram_addr_abs)
1091{
1092 RAMBlock *block;
1093 ram_addr_t offset;
1094 bool dirty;
1095
1096 do {
1097 block = unqueue_page(ms, &offset, ram_addr_abs);
1098 /*
1099 * We're sending this page, and since it's postcopy nothing else
1100 * will dirty it, and we must make sure it doesn't get sent again
1101 * even if this queue request was received after the background
1102 * search already sent it.
1103 */
1104 if (block) {
1105 unsigned long *bitmap;
1106 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1107 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1108 if (!dirty) {
1109 trace_get_queued_page_not_dirty(
1110 block->idstr, (uint64_t)offset,
1111 (uint64_t)*ram_addr_abs,
1112 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1113 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1114 } else {
1115 trace_get_queued_page(block->idstr,
1116 (uint64_t)offset,
1117 (uint64_t)*ram_addr_abs);
1118 }
1119 }
1120
1121 } while (block && !dirty);
1122
1123 if (block) {
1124 /*
1125 * As soon as we start servicing pages out of order, then we have
1126 * to kill the bulk stage, since the bulk stage assumes
1127 * in (migration_bitmap_find_and_reset_dirty) that every page is
1128 * dirty, that's no longer true.
1129 */
1130 ram_bulk_stage = false;
1131
1132 /*
1133 * We want the background search to continue from the queued page
1134 * since the guest is likely to want other pages near to the page
1135 * it just requested.
1136 */
1137 pss->block = block;
1138 pss->offset = offset;
1139 }
1140
1141 return !!block;
1142}
1143
6c595cde
DDAG
1144/**
1145 * flush_page_queue: Flush any remaining pages in the ram request queue
1146 * it should be empty at the end anyway, but in error cases there may be
1147 * some left.
1148 *
1149 * ms: MigrationState
1150 */
1151void flush_page_queue(MigrationState *ms)
1152{
1153 struct MigrationSrcPageRequest *mspr, *next_mspr;
1154 /* This queue generally should be empty - but in the case of a failed
1155 * migration might have some droppings in.
1156 */
1157 rcu_read_lock();
1158 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1159 memory_region_unref(mspr->rb->mr);
1160 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1161 g_free(mspr);
1162 }
1163 rcu_read_unlock();
1164}
1165
1166/**
1167 * Queue the pages for transmission, e.g. a request from postcopy destination
1168 * ms: MigrationStatus in which the queue is held
1169 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1170 * start: Offset from the start of the RAMBlock
1171 * len: Length (in bytes) to send
1172 * Return: 0 on success
1173 */
1174int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1175 ram_addr_t start, ram_addr_t len)
1176{
1177 RAMBlock *ramblock;
1178
d3bf5418 1179 ms->postcopy_requests++;
6c595cde
DDAG
1180 rcu_read_lock();
1181 if (!rbname) {
1182 /* Reuse last RAMBlock */
1183 ramblock = ms->last_req_rb;
1184
1185 if (!ramblock) {
1186 /*
1187 * Shouldn't happen, we can't reuse the last RAMBlock if
1188 * it's the 1st request.
1189 */
1190 error_report("ram_save_queue_pages no previous block");
1191 goto err;
1192 }
1193 } else {
1194 ramblock = qemu_ram_block_by_name(rbname);
1195
1196 if (!ramblock) {
1197 /* We shouldn't be asked for a non-existent RAMBlock */
1198 error_report("ram_save_queue_pages no block '%s'", rbname);
1199 goto err;
1200 }
1201 ms->last_req_rb = ramblock;
1202 }
1203 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1204 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1205 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1206 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1207 __func__, start, len, ramblock->used_length);
1208 goto err;
1209 }
1210
1211 struct MigrationSrcPageRequest *new_entry =
1212 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1213 new_entry->rb = ramblock;
1214 new_entry->offset = start;
1215 new_entry->len = len;
1216
1217 memory_region_ref(ramblock->mr);
1218 qemu_mutex_lock(&ms->src_page_req_mutex);
1219 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1220 qemu_mutex_unlock(&ms->src_page_req_mutex);
1221 rcu_read_unlock();
1222
1223 return 0;
1224
1225err:
1226 rcu_read_unlock();
1227 return -1;
1228}
1229
a82d593b
DDAG
1230/**
1231 * ram_save_target_page: Save one target page
1232 *
1233 *
1234 * @f: QEMUFile where to send the data
1235 * @block: pointer to block that contains the page we want to send
1236 * @offset: offset inside the block for the page;
1237 * @last_stage: if we are at the completion stage
1238 * @bytes_transferred: increase it with the number of transferred bytes
1239 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1240 *
1241 * Returns: Number of pages written.
1242 */
1243static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
a08f6890 1244 PageSearchStatus *pss,
a82d593b
DDAG
1245 bool last_stage,
1246 uint64_t *bytes_transferred,
1247 ram_addr_t dirty_ram_abs)
1248{
1249 int res = 0;
1250
1251 /* Check the pages is dirty and if it is send it */
1252 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1253 unsigned long *unsentmap;
1254 if (compression_switch && migrate_use_compression()) {
9eb14766 1255 res = ram_save_compressed_page(ms, f, pss,
a82d593b
DDAG
1256 last_stage,
1257 bytes_transferred);
1258 } else {
9eb14766 1259 res = ram_save_page(ms, f, pss, last_stage,
a82d593b
DDAG
1260 bytes_transferred);
1261 }
1262
1263 if (res < 0) {
1264 return res;
1265 }
1266 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1267 if (unsentmap) {
1268 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1269 }
3fd3c4b3
DDAG
1270 /* Only update last_sent_block if a block was actually sent; xbzrle
1271 * might have decided the page was identical so didn't bother writing
1272 * to the stream.
1273 */
1274 if (res > 0) {
a08f6890 1275 last_sent_block = pss->block;
3fd3c4b3 1276 }
a82d593b
DDAG
1277 }
1278
1279 return res;
1280}
1281
1282/**
cb8d4c8f 1283 * ram_save_host_page: Starting at *offset send pages up to the end
a82d593b
DDAG
1284 * of the current host page. It's valid for the initial
1285 * offset to point into the middle of a host page
1286 * in which case the remainder of the hostpage is sent.
1287 * Only dirty target pages are sent.
1288 *
1289 * Returns: Number of pages written.
1290 *
1291 * @f: QEMUFile where to send the data
1292 * @block: pointer to block that contains the page we want to send
1293 * @offset: offset inside the block for the page; updated to last target page
1294 * sent
1295 * @last_stage: if we are at the completion stage
1296 * @bytes_transferred: increase it with the number of transferred bytes
1297 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1298 */
a08f6890
HZ
1299static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1300 PageSearchStatus *pss,
1301 bool last_stage,
a82d593b
DDAG
1302 uint64_t *bytes_transferred,
1303 ram_addr_t dirty_ram_abs)
1304{
1305 int tmppages, pages = 0;
1306 do {
a08f6890 1307 tmppages = ram_save_target_page(ms, f, pss, last_stage,
a82d593b
DDAG
1308 bytes_transferred, dirty_ram_abs);
1309 if (tmppages < 0) {
1310 return tmppages;
1311 }
1312
1313 pages += tmppages;
a08f6890 1314 pss->offset += TARGET_PAGE_SIZE;
a82d593b 1315 dirty_ram_abs += TARGET_PAGE_SIZE;
a08f6890 1316 } while (pss->offset & (qemu_host_page_size - 1));
a82d593b
DDAG
1317
1318 /* The offset we leave with is the last one we looked at */
a08f6890 1319 pss->offset -= TARGET_PAGE_SIZE;
a82d593b
DDAG
1320 return pages;
1321}
6c595cde 1322
56e93d26
JQ
1323/**
1324 * ram_find_and_save_block: Finds a dirty page and sends it to f
1325 *
1326 * Called within an RCU critical section.
1327 *
1328 * Returns: The number of pages written
1329 * 0 means no dirty pages
1330 *
1331 * @f: QEMUFile where to send the data
1332 * @last_stage: if we are at the completion stage
1333 * @bytes_transferred: increase it with the number of transferred bytes
a82d593b
DDAG
1334 *
1335 * On systems where host-page-size > target-page-size it will send all the
1336 * pages in a host page that are dirty.
56e93d26
JQ
1337 */
1338
1339static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1340 uint64_t *bytes_transferred)
1341{
b8fb8cb7 1342 PageSearchStatus pss;
a82d593b 1343 MigrationState *ms = migrate_get_current();
56e93d26 1344 int pages = 0;
b9e60928 1345 bool again, found;
f3f491fc
DDAG
1346 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1347 ram_addr_t space */
56e93d26 1348
b8fb8cb7
DDAG
1349 pss.block = last_seen_block;
1350 pss.offset = last_offset;
1351 pss.complete_round = false;
1352
1353 if (!pss.block) {
1354 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1355 }
56e93d26 1356
b9e60928 1357 do {
a82d593b
DDAG
1358 again = true;
1359 found = get_queued_page(ms, &pss, &dirty_ram_abs);
b9e60928 1360
a82d593b
DDAG
1361 if (!found) {
1362 /* priority queue empty, so just search for something dirty */
1363 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1364 }
f3f491fc 1365
a82d593b 1366 if (found) {
a08f6890 1367 pages = ram_save_host_page(ms, f, &pss,
a82d593b
DDAG
1368 last_stage, bytes_transferred,
1369 dirty_ram_abs);
56e93d26 1370 }
b9e60928 1371 } while (!pages && again);
56e93d26 1372
b8fb8cb7
DDAG
1373 last_seen_block = pss.block;
1374 last_offset = pss.offset;
56e93d26
JQ
1375
1376 return pages;
1377}
1378
1379void acct_update_position(QEMUFile *f, size_t size, bool zero)
1380{
1381 uint64_t pages = size / TARGET_PAGE_SIZE;
1382 if (zero) {
1383 acct_info.dup_pages += pages;
1384 } else {
1385 acct_info.norm_pages += pages;
1386 bytes_transferred += size;
1387 qemu_update_position(f, size);
1388 }
1389}
1390
1391static ram_addr_t ram_save_remaining(void)
1392{
1393 return migration_dirty_pages;
1394}
1395
1396uint64_t ram_bytes_remaining(void)
1397{
1398 return ram_save_remaining() * TARGET_PAGE_SIZE;
1399}
1400
1401uint64_t ram_bytes_transferred(void)
1402{
1403 return bytes_transferred;
1404}
1405
1406uint64_t ram_bytes_total(void)
1407{
1408 RAMBlock *block;
1409 uint64_t total = 0;
1410
1411 rcu_read_lock();
1412 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1413 total += block->used_length;
1414 rcu_read_unlock();
1415 return total;
1416}
1417
1418void free_xbzrle_decoded_buf(void)
1419{
1420 g_free(xbzrle_decoded_buf);
1421 xbzrle_decoded_buf = NULL;
1422}
1423
60be6340
DL
1424static void migration_bitmap_free(struct BitmapRcu *bmap)
1425{
1426 g_free(bmap->bmap);
f3f491fc 1427 g_free(bmap->unsentmap);
60be6340
DL
1428 g_free(bmap);
1429}
1430
6ad2a215 1431static void ram_migration_cleanup(void *opaque)
56e93d26 1432{
2ff64038
LZ
1433 /* caller have hold iothread lock or is in a bh, so there is
1434 * no writing race against this migration_bitmap
1435 */
60be6340
DL
1436 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1437 atomic_rcu_set(&migration_bitmap_rcu, NULL);
2ff64038 1438 if (bitmap) {
56e93d26 1439 memory_global_dirty_log_stop();
60be6340 1440 call_rcu(bitmap, migration_bitmap_free, rcu);
56e93d26
JQ
1441 }
1442
1443 XBZRLE_cache_lock();
1444 if (XBZRLE.cache) {
1445 cache_fini(XBZRLE.cache);
1446 g_free(XBZRLE.encoded_buf);
1447 g_free(XBZRLE.current_buf);
adb65dec 1448 g_free(ZERO_TARGET_PAGE);
56e93d26
JQ
1449 XBZRLE.cache = NULL;
1450 XBZRLE.encoded_buf = NULL;
1451 XBZRLE.current_buf = NULL;
1452 }
1453 XBZRLE_cache_unlock();
1454}
1455
56e93d26
JQ
1456static void reset_ram_globals(void)
1457{
1458 last_seen_block = NULL;
1459 last_sent_block = NULL;
1460 last_offset = 0;
1461 last_version = ram_list.version;
1462 ram_bulk_stage = true;
1463}
1464
1465#define MAX_WAIT 50 /* ms, half buffered_file limit */
1466
dd631697
LZ
1467void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1468{
1469 /* called in qemu main thread, so there is
1470 * no writing race against this migration_bitmap
1471 */
60be6340
DL
1472 if (migration_bitmap_rcu) {
1473 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1474 bitmap = g_new(struct BitmapRcu, 1);
1475 bitmap->bmap = bitmap_new(new);
dd631697
LZ
1476
1477 /* prevent migration_bitmap content from being set bit
1478 * by migration_bitmap_sync_range() at the same time.
1479 * it is safe to migration if migration_bitmap is cleared bit
1480 * at the same time.
1481 */
1482 qemu_mutex_lock(&migration_bitmap_mutex);
60be6340
DL
1483 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1484 bitmap_set(bitmap->bmap, old, new - old);
f3f491fc
DDAG
1485
1486 /* We don't have a way to safely extend the sentmap
1487 * with RCU; so mark it as missing, entry to postcopy
1488 * will fail.
1489 */
1490 bitmap->unsentmap = NULL;
1491
60be6340 1492 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
dd631697
LZ
1493 qemu_mutex_unlock(&migration_bitmap_mutex);
1494 migration_dirty_pages += new - old;
60be6340 1495 call_rcu(old_bitmap, migration_bitmap_free, rcu);
dd631697
LZ
1496 }
1497}
56e93d26 1498
4f2e4252
DDAG
1499/*
1500 * 'expected' is the value you expect the bitmap mostly to be full
1501 * of; it won't bother printing lines that are all this value.
1502 * If 'todump' is null the migration bitmap is dumped.
1503 */
1504void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1505{
1506 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1507
1508 int64_t cur;
1509 int64_t linelen = 128;
1510 char linebuf[129];
1511
1512 if (!todump) {
1513 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1514 }
1515
1516 for (cur = 0; cur < ram_pages; cur += linelen) {
1517 int64_t curb;
1518 bool found = false;
1519 /*
1520 * Last line; catch the case where the line length
1521 * is longer than remaining ram
1522 */
1523 if (cur + linelen > ram_pages) {
1524 linelen = ram_pages - cur;
1525 }
1526 for (curb = 0; curb < linelen; curb++) {
1527 bool thisbit = test_bit(cur + curb, todump);
1528 linebuf[curb] = thisbit ? '1' : '.';
1529 found = found || (thisbit != expected);
1530 }
1531 if (found) {
1532 linebuf[curb] = '\0';
1533 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1534 }
1535 }
1536}
1537
e0b266f0
DDAG
1538/* **** functions for postcopy ***** */
1539
ced1c616
PB
1540void ram_postcopy_migrated_memory_release(MigrationState *ms)
1541{
1542 struct RAMBlock *block;
1543 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1544
1545 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1546 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1547 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1548 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1549
1550 while (run_start < range) {
1551 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1552 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1553 (run_end - run_start) << TARGET_PAGE_BITS);
1554 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1555 }
1556 }
1557}
1558
e0b266f0
DDAG
1559/*
1560 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1561 * Note: At this point the 'unsentmap' is the processed bitmap combined
1562 * with the dirtymap; so a '1' means it's either dirty or unsent.
1563 * start,length: Indexes into the bitmap for the first bit
1564 * representing the named block and length in target-pages
1565 */
1566static int postcopy_send_discard_bm_ram(MigrationState *ms,
1567 PostcopyDiscardState *pds,
1568 unsigned long start,
1569 unsigned long length)
1570{
1571 unsigned long end = start + length; /* one after the end */
1572 unsigned long current;
1573 unsigned long *unsentmap;
1574
1575 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1576 for (current = start; current < end; ) {
1577 unsigned long one = find_next_bit(unsentmap, end, current);
1578
1579 if (one <= end) {
1580 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1581 unsigned long discard_length;
1582
1583 if (zero >= end) {
1584 discard_length = end - one;
1585 } else {
1586 discard_length = zero - one;
1587 }
d688c62d
DDAG
1588 if (discard_length) {
1589 postcopy_discard_send_range(ms, pds, one, discard_length);
1590 }
e0b266f0
DDAG
1591 current = one + discard_length;
1592 } else {
1593 current = one;
1594 }
1595 }
1596
1597 return 0;
1598}
1599
1600/*
1601 * Utility for the outgoing postcopy code.
1602 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1603 * passing it bitmap indexes and name.
1604 * Returns: 0 on success
1605 * (qemu_ram_foreach_block ends up passing unscaled lengths
1606 * which would mean postcopy code would have to deal with target page)
1607 */
1608static int postcopy_each_ram_send_discard(MigrationState *ms)
1609{
1610 struct RAMBlock *block;
1611 int ret;
1612
1613 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1614 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1615 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1616 first,
1617 block->idstr);
1618
1619 /*
1620 * Postcopy sends chunks of bitmap over the wire, but it
1621 * just needs indexes at this point, avoids it having
1622 * target page specific code.
1623 */
1624 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1625 block->used_length >> TARGET_PAGE_BITS);
1626 postcopy_discard_send_finish(ms, pds);
1627 if (ret) {
1628 return ret;
1629 }
1630 }
1631
1632 return 0;
1633}
1634
99e314eb
DDAG
1635/*
1636 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1637 * the two bitmaps, that are similar, but one is inverted.
1638 *
1639 * We search for runs of target-pages that don't start or end on a
1640 * host page boundary;
1641 * unsent_pass=true: Cleans up partially unsent host pages by searching
1642 * the unsentmap
1643 * unsent_pass=false: Cleans up partially dirty host pages by searching
1644 * the main migration bitmap
1645 *
1646 */
1647static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1648 RAMBlock *block,
1649 PostcopyDiscardState *pds)
1650{
1651 unsigned long *bitmap;
1652 unsigned long *unsentmap;
1653 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1654 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1655 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1656 unsigned long last = first + (len - 1);
1657 unsigned long run_start;
1658
1659 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1660 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1661
1662 if (unsent_pass) {
1663 /* Find a sent page */
1664 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1665 } else {
1666 /* Find a dirty page */
1667 run_start = find_next_bit(bitmap, last + 1, first);
1668 }
1669
1670 while (run_start <= last) {
1671 bool do_fixup = false;
1672 unsigned long fixup_start_addr;
1673 unsigned long host_offset;
1674
1675 /*
1676 * If the start of this run of pages is in the middle of a host
1677 * page, then we need to fixup this host page.
1678 */
1679 host_offset = run_start % host_ratio;
1680 if (host_offset) {
1681 do_fixup = true;
1682 run_start -= host_offset;
1683 fixup_start_addr = run_start;
1684 /* For the next pass */
1685 run_start = run_start + host_ratio;
1686 } else {
1687 /* Find the end of this run */
1688 unsigned long run_end;
1689 if (unsent_pass) {
1690 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1691 } else {
1692 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1693 }
1694 /*
1695 * If the end isn't at the start of a host page, then the
1696 * run doesn't finish at the end of a host page
1697 * and we need to discard.
1698 */
1699 host_offset = run_end % host_ratio;
1700 if (host_offset) {
1701 do_fixup = true;
1702 fixup_start_addr = run_end - host_offset;
1703 /*
1704 * This host page has gone, the next loop iteration starts
1705 * from after the fixup
1706 */
1707 run_start = fixup_start_addr + host_ratio;
1708 } else {
1709 /*
1710 * No discards on this iteration, next loop starts from
1711 * next sent/dirty page
1712 */
1713 run_start = run_end + 1;
1714 }
1715 }
1716
1717 if (do_fixup) {
1718 unsigned long page;
1719
1720 /* Tell the destination to discard this page */
1721 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1722 /* For the unsent_pass we:
1723 * discard partially sent pages
1724 * For the !unsent_pass (dirty) we:
1725 * discard partially dirty pages that were sent
1726 * (any partially sent pages were already discarded
1727 * by the previous unsent_pass)
1728 */
1729 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1730 host_ratio);
1731 }
1732
1733 /* Clean up the bitmap */
1734 for (page = fixup_start_addr;
1735 page < fixup_start_addr + host_ratio; page++) {
1736 /* All pages in this host page are now not sent */
1737 set_bit(page, unsentmap);
1738
1739 /*
1740 * Remark them as dirty, updating the count for any pages
1741 * that weren't previously dirty.
1742 */
1743 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1744 }
1745 }
1746
1747 if (unsent_pass) {
1748 /* Find the next sent page for the next iteration */
1749 run_start = find_next_zero_bit(unsentmap, last + 1,
1750 run_start);
1751 } else {
1752 /* Find the next dirty page for the next iteration */
1753 run_start = find_next_bit(bitmap, last + 1, run_start);
1754 }
1755 }
1756}
1757
1758/*
1759 * Utility for the outgoing postcopy code.
1760 *
1761 * Discard any partially sent host-page size chunks, mark any partially
1762 * dirty host-page size chunks as all dirty.
1763 *
1764 * Returns: 0 on success
1765 */
1766static int postcopy_chunk_hostpages(MigrationState *ms)
1767{
1768 struct RAMBlock *block;
1769
1770 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1771 /* Easy case - TPS==HPS - nothing to be done */
1772 return 0;
1773 }
1774
1775 /* Easiest way to make sure we don't resume in the middle of a host-page */
1776 last_seen_block = NULL;
1777 last_sent_block = NULL;
1778 last_offset = 0;
1779
1780 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1781 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1782
1783 PostcopyDiscardState *pds =
1784 postcopy_discard_send_init(ms, first, block->idstr);
1785
1786 /* First pass: Discard all partially sent host pages */
1787 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1788 /*
1789 * Second pass: Ensure that all partially dirty host pages are made
1790 * fully dirty.
1791 */
1792 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1793
1794 postcopy_discard_send_finish(ms, pds);
1795 } /* ram_list loop */
1796
1797 return 0;
1798}
1799
e0b266f0
DDAG
1800/*
1801 * Transmit the set of pages to be discarded after precopy to the target
1802 * these are pages that:
1803 * a) Have been previously transmitted but are now dirty again
1804 * b) Pages that have never been transmitted, this ensures that
1805 * any pages on the destination that have been mapped by background
1806 * tasks get discarded (transparent huge pages is the specific concern)
1807 * Hopefully this is pretty sparse
1808 */
1809int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1810{
1811 int ret;
1812 unsigned long *bitmap, *unsentmap;
1813
1814 rcu_read_lock();
1815
1816 /* This should be our last sync, the src is now paused */
1817 migration_bitmap_sync();
1818
1819 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1820 if (!unsentmap) {
1821 /* We don't have a safe way to resize the sentmap, so
1822 * if the bitmap was resized it will be NULL at this
1823 * point.
1824 */
1825 error_report("migration ram resized during precopy phase");
1826 rcu_read_unlock();
1827 return -EINVAL;
1828 }
1829
99e314eb
DDAG
1830 /* Deal with TPS != HPS */
1831 ret = postcopy_chunk_hostpages(ms);
1832 if (ret) {
1833 rcu_read_unlock();
1834 return ret;
1835 }
1836
e0b266f0
DDAG
1837 /*
1838 * Update the unsentmap to be unsentmap = unsentmap | dirty
1839 */
1840 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1841 bitmap_or(unsentmap, unsentmap, bitmap,
1842 last_ram_offset() >> TARGET_PAGE_BITS);
1843
1844
1845 trace_ram_postcopy_send_discard_bitmap();
1846#ifdef DEBUG_POSTCOPY
1847 ram_debug_dump_bitmap(unsentmap, true);
1848#endif
1849
1850 ret = postcopy_each_ram_send_discard(ms);
1851 rcu_read_unlock();
1852
1853 return ret;
1854}
1855
1856/*
1857 * At the start of the postcopy phase of migration, any now-dirty
1858 * precopied pages are discarded.
1859 *
1860 * start, length describe a byte address range within the RAMBlock
1861 *
1862 * Returns 0 on success.
1863 */
1864int ram_discard_range(MigrationIncomingState *mis,
1865 const char *block_name,
1866 uint64_t start, size_t length)
1867{
1868 int ret = -1;
1869
1870 rcu_read_lock();
1871 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1872
1873 if (!rb) {
1874 error_report("ram_discard_range: Failed to find block '%s'",
1875 block_name);
1876 goto err;
1877 }
1878
1879 uint8_t *host_startaddr = rb->host + start;
1880
1881 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1882 error_report("ram_discard_range: Unaligned start address: %p",
1883 host_startaddr);
1884 goto err;
1885 }
1886
1887 if ((start + length) <= rb->used_length) {
1888 uint8_t *host_endaddr = host_startaddr + length;
1889 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1890 error_report("ram_discard_range: Unaligned end address: %p",
1891 host_endaddr);
1892 goto err;
1893 }
1894 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1895 } else {
1896 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
9458ad6b 1897 "/%zx/" RAM_ADDR_FMT")",
e0b266f0
DDAG
1898 block_name, start, length, rb->used_length);
1899 }
1900
1901err:
1902 rcu_read_unlock();
1903
1904 return ret;
1905}
1906
a91246c9 1907static int ram_save_init_globals(void)
56e93d26 1908{
56e93d26
JQ
1909 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1910
56e93d26
JQ
1911 dirty_rate_high_cnt = 0;
1912 bitmap_sync_count = 0;
1913 migration_bitmap_sync_init();
dd631697 1914 qemu_mutex_init(&migration_bitmap_mutex);
56e93d26
JQ
1915
1916 if (migrate_use_xbzrle()) {
1917 XBZRLE_cache_lock();
adb65dec 1918 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
56e93d26
JQ
1919 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1920 TARGET_PAGE_SIZE,
1921 TARGET_PAGE_SIZE);
1922 if (!XBZRLE.cache) {
1923 XBZRLE_cache_unlock();
1924 error_report("Error creating cache");
1925 return -1;
1926 }
1927 XBZRLE_cache_unlock();
1928
1929 /* We prefer not to abort if there is no memory */
1930 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1931 if (!XBZRLE.encoded_buf) {
1932 error_report("Error allocating encoded_buf");
1933 return -1;
1934 }
1935
1936 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1937 if (!XBZRLE.current_buf) {
1938 error_report("Error allocating current_buf");
1939 g_free(XBZRLE.encoded_buf);
1940 XBZRLE.encoded_buf = NULL;
1941 return -1;
1942 }
1943
1944 acct_clear();
1945 }
1946
49877834
PB
1947 /* For memory_global_dirty_log_start below. */
1948 qemu_mutex_lock_iothread();
1949
56e93d26
JQ
1950 qemu_mutex_lock_ramlist();
1951 rcu_read_lock();
1952 bytes_transferred = 0;
1953 reset_ram_globals();
1954
1955 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
f3f491fc 1956 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
60be6340
DL
1957 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1958 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
56e93d26 1959
f3f491fc
DDAG
1960 if (migrate_postcopy_ram()) {
1961 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1962 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1963 }
1964
56e93d26
JQ
1965 /*
1966 * Count the total number of pages used by ram blocks not including any
1967 * gaps due to alignment or unplugs.
1968 */
1969 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1970
1971 memory_global_dirty_log_start();
1972 migration_bitmap_sync();
1973 qemu_mutex_unlock_ramlist();
49877834 1974 qemu_mutex_unlock_iothread();
a91246c9
HZ
1975 rcu_read_unlock();
1976
1977 return 0;
1978}
1979
1980/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1981 * long-running RCU critical section. When rcu-reclaims in the code
1982 * start to become numerous it will be necessary to reduce the
1983 * granularity of these critical sections.
1984 */
1985
1986static int ram_save_setup(QEMUFile *f, void *opaque)
1987{
1988 RAMBlock *block;
1989
1990 /* migration has already setup the bitmap, reuse it. */
1991 if (!migration_in_colo_state()) {
1992 if (ram_save_init_globals() < 0) {
1993 return -1;
1994 }
1995 }
1996
1997 rcu_read_lock();
56e93d26
JQ
1998
1999 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2000
2001 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2002 qemu_put_byte(f, strlen(block->idstr));
2003 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2004 qemu_put_be64(f, block->used_length);
2005 }
2006
2007 rcu_read_unlock();
2008
2009 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2010 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2011
2012 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2013
2014 return 0;
2015}
2016
2017static int ram_save_iterate(QEMUFile *f, void *opaque)
2018{
2019 int ret;
2020 int i;
2021 int64_t t0;
5c90308f 2022 int done = 0;
56e93d26
JQ
2023
2024 rcu_read_lock();
2025 if (ram_list.version != last_version) {
2026 reset_ram_globals();
2027 }
2028
2029 /* Read version before ram_list.blocks */
2030 smp_rmb();
2031
2032 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2033
2034 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2035 i = 0;
2036 while ((ret = qemu_file_rate_limit(f)) == 0) {
2037 int pages;
2038
2039 pages = ram_find_and_save_block(f, false, &bytes_transferred);
2040 /* no more pages to sent */
2041 if (pages == 0) {
5c90308f 2042 done = 1;
56e93d26
JQ
2043 break;
2044 }
56e93d26 2045 acct_info.iterations++;
070afca2 2046
56e93d26
JQ
2047 /* we want to check in the 1st loop, just in case it was the 1st time
2048 and we had to sync the dirty bitmap.
2049 qemu_get_clock_ns() is a bit expensive, so we only check each some
2050 iterations
2051 */
2052 if ((i & 63) == 0) {
2053 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2054 if (t1 > MAX_WAIT) {
55c4446b 2055 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2056 break;
2057 }
2058 }
2059 i++;
2060 }
2061 flush_compressed_data(f);
2062 rcu_read_unlock();
2063
2064 /*
2065 * Must occur before EOS (or any QEMUFile operation)
2066 * because of RDMA protocol.
2067 */
2068 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2069
2070 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2071 bytes_transferred += 8;
2072
2073 ret = qemu_file_get_error(f);
2074 if (ret < 0) {
2075 return ret;
2076 }
2077
5c90308f 2078 return done;
56e93d26
JQ
2079}
2080
2081/* Called with iothread lock */
2082static int ram_save_complete(QEMUFile *f, void *opaque)
2083{
2084 rcu_read_lock();
2085
663e6c1d
DDAG
2086 if (!migration_in_postcopy(migrate_get_current())) {
2087 migration_bitmap_sync();
2088 }
56e93d26
JQ
2089
2090 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2091
2092 /* try transferring iterative blocks of memory */
2093
2094 /* flush all remaining blocks regardless of rate limiting */
2095 while (true) {
2096 int pages;
2097
a91246c9
HZ
2098 pages = ram_find_and_save_block(f, !migration_in_colo_state(),
2099 &bytes_transferred);
56e93d26
JQ
2100 /* no more blocks to sent */
2101 if (pages == 0) {
2102 break;
2103 }
2104 }
2105
2106 flush_compressed_data(f);
2107 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2108
2109 rcu_read_unlock();
d09a6fde 2110
56e93d26
JQ
2111 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2112
2113 return 0;
2114}
2115
c31b098f
DDAG
2116static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2117 uint64_t *non_postcopiable_pending,
2118 uint64_t *postcopiable_pending)
56e93d26
JQ
2119{
2120 uint64_t remaining_size;
2121
2122 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2123
663e6c1d
DDAG
2124 if (!migration_in_postcopy(migrate_get_current()) &&
2125 remaining_size < max_size) {
56e93d26
JQ
2126 qemu_mutex_lock_iothread();
2127 rcu_read_lock();
2128 migration_bitmap_sync();
2129 rcu_read_unlock();
2130 qemu_mutex_unlock_iothread();
2131 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2132 }
c31b098f
DDAG
2133
2134 /* We can do postcopy, and all the data is postcopiable */
2135 *postcopiable_pending += remaining_size;
56e93d26
JQ
2136}
2137
2138static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2139{
2140 unsigned int xh_len;
2141 int xh_flags;
063e760a 2142 uint8_t *loaded_data;
56e93d26
JQ
2143
2144 if (!xbzrle_decoded_buf) {
2145 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2146 }
063e760a 2147 loaded_data = xbzrle_decoded_buf;
56e93d26
JQ
2148
2149 /* extract RLE header */
2150 xh_flags = qemu_get_byte(f);
2151 xh_len = qemu_get_be16(f);
2152
2153 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2154 error_report("Failed to load XBZRLE page - wrong compression!");
2155 return -1;
2156 }
2157
2158 if (xh_len > TARGET_PAGE_SIZE) {
2159 error_report("Failed to load XBZRLE page - len overflow!");
2160 return -1;
2161 }
2162 /* load data and decode */
063e760a 2163 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2164
2165 /* decode RLE */
063e760a 2166 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2167 TARGET_PAGE_SIZE) == -1) {
2168 error_report("Failed to load XBZRLE page - decode error!");
2169 return -1;
2170 }
2171
2172 return 0;
2173}
2174
2175/* Must be called from within a rcu critical section.
2176 * Returns a pointer from within the RCU-protected ram_list.
2177 */
a7180877 2178/*
4c4bad48 2179 * Read a RAMBlock ID from the stream f.
a7180877
DDAG
2180 *
2181 * f: Stream to read from
a7180877
DDAG
2182 * flags: Page flags (mostly to see if it's a continuation of previous block)
2183 */
4c4bad48
HZ
2184static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2185 int flags)
56e93d26
JQ
2186{
2187 static RAMBlock *block = NULL;
2188 char id[256];
2189 uint8_t len;
2190
2191 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2192 if (!block) {
56e93d26
JQ
2193 error_report("Ack, bad migration stream!");
2194 return NULL;
2195 }
4c4bad48 2196 return block;
56e93d26
JQ
2197 }
2198
2199 len = qemu_get_byte(f);
2200 qemu_get_buffer(f, (uint8_t *)id, len);
2201 id[len] = 0;
2202
e3dd7493 2203 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2204 if (!block) {
2205 error_report("Can't find block %s", id);
2206 return NULL;
56e93d26
JQ
2207 }
2208
4c4bad48
HZ
2209 return block;
2210}
2211
2212static inline void *host_from_ram_block_offset(RAMBlock *block,
2213 ram_addr_t offset)
2214{
2215 if (!offset_in_ramblock(block, offset)) {
2216 return NULL;
2217 }
2218
2219 return block->host + offset;
56e93d26
JQ
2220}
2221
2222/*
2223 * If a page (or a whole RDMA chunk) has been
2224 * determined to be zero, then zap it.
2225 */
2226void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2227{
2228 if (ch != 0 || !is_zero_range(host, size)) {
2229 memset(host, ch, size);
2230 }
2231}
2232
2233static void *do_data_decompress(void *opaque)
2234{
2235 DecompressParam *param = opaque;
2236 unsigned long pagesize;
33d151f4
LL
2237 uint8_t *des;
2238 int len;
56e93d26 2239
33d151f4 2240 qemu_mutex_lock(&param->mutex);
90e56fb4 2241 while (!param->quit) {
33d151f4
LL
2242 if (param->des) {
2243 des = param->des;
2244 len = param->len;
2245 param->des = 0;
2246 qemu_mutex_unlock(&param->mutex);
2247
56e93d26 2248 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2249 /* uncompress() will return failed in some case, especially
2250 * when the page is dirted when doing the compression, it's
2251 * not a problem because the dirty page will be retransferred
2252 * and uncompress() won't break the data in other pages.
2253 */
33d151f4
LL
2254 uncompress((Bytef *)des, &pagesize,
2255 (const Bytef *)param->compbuf, len);
73a8912b 2256
33d151f4
LL
2257 qemu_mutex_lock(&decomp_done_lock);
2258 param->done = true;
2259 qemu_cond_signal(&decomp_done_cond);
2260 qemu_mutex_unlock(&decomp_done_lock);
2261
2262 qemu_mutex_lock(&param->mutex);
2263 } else {
2264 qemu_cond_wait(&param->cond, &param->mutex);
2265 }
56e93d26 2266 }
33d151f4 2267 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2268
2269 return NULL;
2270}
2271
5533b2e9
LL
2272static void wait_for_decompress_done(void)
2273{
2274 int idx, thread_count;
2275
2276 if (!migrate_use_compression()) {
2277 return;
2278 }
2279
2280 thread_count = migrate_decompress_threads();
2281 qemu_mutex_lock(&decomp_done_lock);
2282 for (idx = 0; idx < thread_count; idx++) {
2283 while (!decomp_param[idx].done) {
2284 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2285 }
2286 }
2287 qemu_mutex_unlock(&decomp_done_lock);
2288}
2289
56e93d26
JQ
2290void migrate_decompress_threads_create(void)
2291{
2292 int i, thread_count;
2293
2294 thread_count = migrate_decompress_threads();
2295 decompress_threads = g_new0(QemuThread, thread_count);
2296 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2297 qemu_mutex_init(&decomp_done_lock);
2298 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2299 for (i = 0; i < thread_count; i++) {
2300 qemu_mutex_init(&decomp_param[i].mutex);
2301 qemu_cond_init(&decomp_param[i].cond);
2302 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2303 decomp_param[i].done = true;
90e56fb4 2304 decomp_param[i].quit = false;
56e93d26
JQ
2305 qemu_thread_create(decompress_threads + i, "decompress",
2306 do_data_decompress, decomp_param + i,
2307 QEMU_THREAD_JOINABLE);
2308 }
2309}
2310
2311void migrate_decompress_threads_join(void)
2312{
2313 int i, thread_count;
2314
56e93d26
JQ
2315 thread_count = migrate_decompress_threads();
2316 for (i = 0; i < thread_count; i++) {
2317 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2318 decomp_param[i].quit = true;
56e93d26
JQ
2319 qemu_cond_signal(&decomp_param[i].cond);
2320 qemu_mutex_unlock(&decomp_param[i].mutex);
2321 }
2322 for (i = 0; i < thread_count; i++) {
2323 qemu_thread_join(decompress_threads + i);
2324 qemu_mutex_destroy(&decomp_param[i].mutex);
2325 qemu_cond_destroy(&decomp_param[i].cond);
2326 g_free(decomp_param[i].compbuf);
2327 }
2328 g_free(decompress_threads);
2329 g_free(decomp_param);
56e93d26
JQ
2330 decompress_threads = NULL;
2331 decomp_param = NULL;
56e93d26
JQ
2332}
2333
c1bc6626 2334static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2335 void *host, int len)
2336{
2337 int idx, thread_count;
2338
2339 thread_count = migrate_decompress_threads();
73a8912b 2340 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2341 while (true) {
2342 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2343 if (decomp_param[idx].done) {
33d151f4
LL
2344 decomp_param[idx].done = false;
2345 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2346 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2347 decomp_param[idx].des = host;
2348 decomp_param[idx].len = len;
33d151f4
LL
2349 qemu_cond_signal(&decomp_param[idx].cond);
2350 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2351 break;
2352 }
2353 }
2354 if (idx < thread_count) {
2355 break;
73a8912b
LL
2356 } else {
2357 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2358 }
2359 }
73a8912b 2360 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2361}
2362
1caddf8a
DDAG
2363/*
2364 * Allocate data structures etc needed by incoming migration with postcopy-ram
2365 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2366 */
2367int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2368{
2369 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2370
2371 return postcopy_ram_incoming_init(mis, ram_pages);
2372}
2373
a7180877
DDAG
2374/*
2375 * Called in postcopy mode by ram_load().
2376 * rcu_read_lock is taken prior to this being called.
2377 */
2378static int ram_load_postcopy(QEMUFile *f)
2379{
2380 int flags = 0, ret = 0;
2381 bool place_needed = false;
2382 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2383 MigrationIncomingState *mis = migration_incoming_get_current();
2384 /* Temporary page that is later 'placed' */
2385 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2386 void *last_host = NULL;
a3b6ff6d 2387 bool all_zero = false;
a7180877
DDAG
2388
2389 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2390 ram_addr_t addr;
2391 void *host = NULL;
2392 void *page_buffer = NULL;
2393 void *place_source = NULL;
2394 uint8_t ch;
a7180877
DDAG
2395
2396 addr = qemu_get_be64(f);
2397 flags = addr & ~TARGET_PAGE_MASK;
2398 addr &= TARGET_PAGE_MASK;
2399
2400 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2401 place_needed = false;
2402 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
4c4bad48
HZ
2403 RAMBlock *block = ram_block_from_stream(f, flags);
2404
2405 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2406 if (!host) {
2407 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2408 ret = -EINVAL;
2409 break;
2410 }
a7180877
DDAG
2411 /*
2412 * Postcopy requires that we place whole host pages atomically.
2413 * To make it atomic, the data is read into a temporary page
2414 * that's moved into place later.
2415 * The migration protocol uses, possibly smaller, target-pages
2416 * however the source ensures it always sends all the components
2417 * of a host page in order.
2418 */
2419 page_buffer = postcopy_host_page +
2420 ((uintptr_t)host & ~qemu_host_page_mask);
2421 /* If all TP are zero then we can optimise the place */
2422 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2423 all_zero = true;
c53b7ddc
DDAG
2424 } else {
2425 /* not the 1st TP within the HP */
2426 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2427 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2428 host, last_host);
2429 ret = -EINVAL;
2430 break;
2431 }
a7180877
DDAG
2432 }
2433
c53b7ddc 2434
a7180877
DDAG
2435 /*
2436 * If it's the last part of a host page then we place the host
2437 * page
2438 */
2439 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2440 ~qemu_host_page_mask) == 0;
2441 place_source = postcopy_host_page;
2442 }
c53b7ddc 2443 last_host = host;
a7180877
DDAG
2444
2445 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2446 case RAM_SAVE_FLAG_COMPRESS:
2447 ch = qemu_get_byte(f);
2448 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2449 if (ch) {
2450 all_zero = false;
2451 }
2452 break;
2453
2454 case RAM_SAVE_FLAG_PAGE:
2455 all_zero = false;
2456 if (!place_needed || !matching_page_sizes) {
2457 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2458 } else {
2459 /* Avoids the qemu_file copy during postcopy, which is
2460 * going to do a copy later; can only do it when we
2461 * do this read in one go (matching page sizes)
2462 */
2463 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2464 TARGET_PAGE_SIZE);
2465 }
2466 break;
2467 case RAM_SAVE_FLAG_EOS:
2468 /* normal exit */
2469 break;
2470 default:
2471 error_report("Unknown combination of migration flags: %#x"
2472 " (postcopy mode)", flags);
2473 ret = -EINVAL;
2474 }
2475
2476 if (place_needed) {
2477 /* This gets called at the last target page in the host page */
2478 if (all_zero) {
2479 ret = postcopy_place_page_zero(mis,
2480 host + TARGET_PAGE_SIZE -
2481 qemu_host_page_size);
2482 } else {
2483 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2484 qemu_host_page_size,
2485 place_source);
2486 }
2487 }
2488 if (!ret) {
2489 ret = qemu_file_get_error(f);
2490 }
2491 }
2492
2493 return ret;
2494}
2495
56e93d26
JQ
2496static int ram_load(QEMUFile *f, void *opaque, int version_id)
2497{
2498 int flags = 0, ret = 0;
2499 static uint64_t seq_iter;
2500 int len = 0;
a7180877
DDAG
2501 /*
2502 * If system is running in postcopy mode, page inserts to host memory must
2503 * be atomic
2504 */
2505 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
56e93d26
JQ
2506
2507 seq_iter++;
2508
2509 if (version_id != 4) {
2510 ret = -EINVAL;
2511 }
2512
2513 /* This RCU critical section can be very long running.
2514 * When RCU reclaims in the code start to become numerous,
2515 * it will be necessary to reduce the granularity of this
2516 * critical section.
2517 */
2518 rcu_read_lock();
a7180877
DDAG
2519
2520 if (postcopy_running) {
2521 ret = ram_load_postcopy(f);
2522 }
2523
2524 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2525 ram_addr_t addr, total_ram_bytes;
a776aa15 2526 void *host = NULL;
56e93d26
JQ
2527 uint8_t ch;
2528
2529 addr = qemu_get_be64(f);
2530 flags = addr & ~TARGET_PAGE_MASK;
2531 addr &= TARGET_PAGE_MASK;
2532
a776aa15
DDAG
2533 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2534 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2535 RAMBlock *block = ram_block_from_stream(f, flags);
2536
2537 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2538 if (!host) {
2539 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2540 ret = -EINVAL;
2541 break;
2542 }
2543 }
2544
56e93d26
JQ
2545 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2546 case RAM_SAVE_FLAG_MEM_SIZE:
2547 /* Synchronize RAM block list */
2548 total_ram_bytes = addr;
2549 while (!ret && total_ram_bytes) {
2550 RAMBlock *block;
56e93d26
JQ
2551 char id[256];
2552 ram_addr_t length;
2553
2554 len = qemu_get_byte(f);
2555 qemu_get_buffer(f, (uint8_t *)id, len);
2556 id[len] = 0;
2557 length = qemu_get_be64(f);
2558
e3dd7493
DDAG
2559 block = qemu_ram_block_by_name(id);
2560 if (block) {
2561 if (length != block->used_length) {
2562 Error *local_err = NULL;
56e93d26 2563
fa53a0e5 2564 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2565 &local_err);
2566 if (local_err) {
2567 error_report_err(local_err);
56e93d26 2568 }
56e93d26 2569 }
e3dd7493
DDAG
2570 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2571 block->idstr);
2572 } else {
56e93d26
JQ
2573 error_report("Unknown ramblock \"%s\", cannot "
2574 "accept migration", id);
2575 ret = -EINVAL;
2576 }
2577
2578 total_ram_bytes -= length;
2579 }
2580 break;
a776aa15 2581
56e93d26 2582 case RAM_SAVE_FLAG_COMPRESS:
56e93d26
JQ
2583 ch = qemu_get_byte(f);
2584 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2585 break;
a776aa15 2586
56e93d26 2587 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2588 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2589 break;
56e93d26 2590
a776aa15 2591 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2592 len = qemu_get_be32(f);
2593 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2594 error_report("Invalid compressed data length: %d", len);
2595 ret = -EINVAL;
2596 break;
2597 }
c1bc6626 2598 decompress_data_with_multi_threads(f, host, len);
56e93d26 2599 break;
a776aa15 2600
56e93d26 2601 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2602 if (load_xbzrle(f, addr, host) < 0) {
2603 error_report("Failed to decompress XBZRLE page at "
2604 RAM_ADDR_FMT, addr);
2605 ret = -EINVAL;
2606 break;
2607 }
2608 break;
2609 case RAM_SAVE_FLAG_EOS:
2610 /* normal exit */
2611 break;
2612 default:
2613 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2614 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2615 } else {
2616 error_report("Unknown combination of migration flags: %#x",
2617 flags);
2618 ret = -EINVAL;
2619 }
2620 }
2621 if (!ret) {
2622 ret = qemu_file_get_error(f);
2623 }
2624 }
2625
5533b2e9 2626 wait_for_decompress_done();
56e93d26 2627 rcu_read_unlock();
55c4446b 2628 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
2629 return ret;
2630}
2631
2632static SaveVMHandlers savevm_ram_handlers = {
2633 .save_live_setup = ram_save_setup,
2634 .save_live_iterate = ram_save_iterate,
763c906b 2635 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 2636 .save_live_complete_precopy = ram_save_complete,
56e93d26
JQ
2637 .save_live_pending = ram_save_pending,
2638 .load_state = ram_load,
6ad2a215 2639 .cleanup = ram_migration_cleanup,
56e93d26
JQ
2640};
2641
2642void ram_mig_init(void)
2643{
2644 qemu_mutex_init(&XBZRLE.lock);
2645 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2646}