]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
postcopy: Mask fault addresses to huge page boundary
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
1393a485 28#include "qemu/osdep.h"
33c11879
PB
29#include "qemu-common.h"
30#include "cpu.h"
56e93d26 31#include <zlib.h>
4addcd4f 32#include "qapi-event.h"
f348b6d1 33#include "qemu/cutils.h"
56e93d26
JQ
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
7205c9ec
JQ
36#include "qemu/timer.h"
37#include "qemu/main-loop.h"
56e93d26 38#include "migration/migration.h"
e0b266f0 39#include "migration/postcopy-ram.h"
56e93d26
JQ
40#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
56e93d26 42#include "qemu/error-report.h"
56e93d26 43#include "trace.h"
56e93d26 44#include "exec/ram_addr.h"
56e93d26 45#include "qemu/rcu_queue.h"
a91246c9 46#include "migration/colo.h"
56e93d26 47
56e93d26 48static int dirty_rate_high_cnt;
56e93d26
JQ
49
50static uint64_t bitmap_sync_count;
51
52/***********************************************************/
53/* ram save/restore */
54
55#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
56#define RAM_SAVE_FLAG_COMPRESS 0x02
57#define RAM_SAVE_FLAG_MEM_SIZE 0x04
58#define RAM_SAVE_FLAG_PAGE 0x08
59#define RAM_SAVE_FLAG_EOS 0x10
60#define RAM_SAVE_FLAG_CONTINUE 0x20
61#define RAM_SAVE_FLAG_XBZRLE 0x40
62/* 0x80 is reserved in migration.h start with 0x100 next */
63#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
64
adb65dec 65static uint8_t *ZERO_TARGET_PAGE;
56e93d26
JQ
66
67static inline bool is_zero_range(uint8_t *p, uint64_t size)
68{
a1febc49 69 return buffer_is_zero(p, size);
56e93d26
JQ
70}
71
72/* struct contains XBZRLE cache and a static page
73 used by the compression */
74static struct {
75 /* buffer used for XBZRLE encoding */
76 uint8_t *encoded_buf;
77 /* buffer for storing page content */
78 uint8_t *current_buf;
79 /* Cache for XBZRLE, Protected by lock. */
80 PageCache *cache;
81 QemuMutex lock;
82} XBZRLE;
83
84/* buffer used for XBZRLE decoding */
85static uint8_t *xbzrle_decoded_buf;
86
87static void XBZRLE_cache_lock(void)
88{
89 if (migrate_use_xbzrle())
90 qemu_mutex_lock(&XBZRLE.lock);
91}
92
93static void XBZRLE_cache_unlock(void)
94{
95 if (migrate_use_xbzrle())
96 qemu_mutex_unlock(&XBZRLE.lock);
97}
98
99/*
100 * called from qmp_migrate_set_cache_size in main thread, possibly while
101 * a migration is in progress.
102 * A running migration maybe using the cache and might finish during this
103 * call, hence changes to the cache are protected by XBZRLE.lock().
104 */
105int64_t xbzrle_cache_resize(int64_t new_size)
106{
107 PageCache *new_cache;
108 int64_t ret;
109
110 if (new_size < TARGET_PAGE_SIZE) {
111 return -1;
112 }
113
114 XBZRLE_cache_lock();
115
116 if (XBZRLE.cache != NULL) {
117 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
118 goto out_new_size;
119 }
120 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
121 TARGET_PAGE_SIZE);
122 if (!new_cache) {
123 error_report("Error creating cache");
124 ret = -1;
125 goto out;
126 }
127
128 cache_fini(XBZRLE.cache);
129 XBZRLE.cache = new_cache;
130 }
131
132out_new_size:
133 ret = pow2floor(new_size);
134out:
135 XBZRLE_cache_unlock();
136 return ret;
137}
138
139/* accounting for migration statistics */
140typedef struct AccountingInfo {
141 uint64_t dup_pages;
142 uint64_t skipped_pages;
143 uint64_t norm_pages;
144 uint64_t iterations;
145 uint64_t xbzrle_bytes;
146 uint64_t xbzrle_pages;
147 uint64_t xbzrle_cache_miss;
148 double xbzrle_cache_miss_rate;
149 uint64_t xbzrle_overflows;
150} AccountingInfo;
151
152static AccountingInfo acct_info;
153
154static void acct_clear(void)
155{
156 memset(&acct_info, 0, sizeof(acct_info));
157}
158
159uint64_t dup_mig_bytes_transferred(void)
160{
161 return acct_info.dup_pages * TARGET_PAGE_SIZE;
162}
163
164uint64_t dup_mig_pages_transferred(void)
165{
166 return acct_info.dup_pages;
167}
168
169uint64_t skipped_mig_bytes_transferred(void)
170{
171 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
172}
173
174uint64_t skipped_mig_pages_transferred(void)
175{
176 return acct_info.skipped_pages;
177}
178
179uint64_t norm_mig_bytes_transferred(void)
180{
181 return acct_info.norm_pages * TARGET_PAGE_SIZE;
182}
183
184uint64_t norm_mig_pages_transferred(void)
185{
186 return acct_info.norm_pages;
187}
188
189uint64_t xbzrle_mig_bytes_transferred(void)
190{
191 return acct_info.xbzrle_bytes;
192}
193
194uint64_t xbzrle_mig_pages_transferred(void)
195{
196 return acct_info.xbzrle_pages;
197}
198
199uint64_t xbzrle_mig_pages_cache_miss(void)
200{
201 return acct_info.xbzrle_cache_miss;
202}
203
204double xbzrle_mig_cache_miss_rate(void)
205{
206 return acct_info.xbzrle_cache_miss_rate;
207}
208
209uint64_t xbzrle_mig_pages_overflow(void)
210{
211 return acct_info.xbzrle_overflows;
212}
213
214/* This is the last block that we have visited serching for dirty pages
215 */
216static RAMBlock *last_seen_block;
217/* This is the last block from where we have sent data */
218static RAMBlock *last_sent_block;
219static ram_addr_t last_offset;
dd631697 220static QemuMutex migration_bitmap_mutex;
56e93d26
JQ
221static uint64_t migration_dirty_pages;
222static uint32_t last_version;
223static bool ram_bulk_stage;
224
b8fb8cb7
DDAG
225/* used by the search for pages to send */
226struct PageSearchStatus {
227 /* Current block being searched */
228 RAMBlock *block;
229 /* Current offset to search from */
230 ram_addr_t offset;
231 /* Set once we wrap around */
232 bool complete_round;
233};
234typedef struct PageSearchStatus PageSearchStatus;
235
60be6340
DL
236static struct BitmapRcu {
237 struct rcu_head rcu;
f3f491fc 238 /* Main migration bitmap */
60be6340 239 unsigned long *bmap;
f3f491fc
DDAG
240 /* bitmap of pages that haven't been sent even once
241 * only maintained and used in postcopy at the moment
242 * where it's used to send the dirtymap at the start
243 * of the postcopy phase
244 */
245 unsigned long *unsentmap;
60be6340
DL
246} *migration_bitmap_rcu;
247
56e93d26 248struct CompressParam {
56e93d26 249 bool done;
90e56fb4 250 bool quit;
56e93d26
JQ
251 QEMUFile *file;
252 QemuMutex mutex;
253 QemuCond cond;
254 RAMBlock *block;
255 ram_addr_t offset;
256};
257typedef struct CompressParam CompressParam;
258
259struct DecompressParam {
73a8912b 260 bool done;
90e56fb4 261 bool quit;
56e93d26
JQ
262 QemuMutex mutex;
263 QemuCond cond;
264 void *des;
d341d9f3 265 uint8_t *compbuf;
56e93d26
JQ
266 int len;
267};
268typedef struct DecompressParam DecompressParam;
269
270static CompressParam *comp_param;
271static QemuThread *compress_threads;
272/* comp_done_cond is used to wake up the migration thread when
273 * one of the compression threads has finished the compression.
274 * comp_done_lock is used to co-work with comp_done_cond.
275 */
0d9f9a5c
LL
276static QemuMutex comp_done_lock;
277static QemuCond comp_done_cond;
56e93d26
JQ
278/* The empty QEMUFileOps will be used by file in CompressParam */
279static const QEMUFileOps empty_ops = { };
280
281static bool compression_switch;
56e93d26
JQ
282static DecompressParam *decomp_param;
283static QemuThread *decompress_threads;
73a8912b
LL
284static QemuMutex decomp_done_lock;
285static QemuCond decomp_done_cond;
56e93d26 286
a7a9a88f
LL
287static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
288 ram_addr_t offset);
56e93d26
JQ
289
290static void *do_data_compress(void *opaque)
291{
292 CompressParam *param = opaque;
a7a9a88f
LL
293 RAMBlock *block;
294 ram_addr_t offset;
56e93d26 295
a7a9a88f 296 qemu_mutex_lock(&param->mutex);
90e56fb4 297 while (!param->quit) {
a7a9a88f
LL
298 if (param->block) {
299 block = param->block;
300 offset = param->offset;
301 param->block = NULL;
302 qemu_mutex_unlock(&param->mutex);
303
304 do_compress_ram_page(param->file, block, offset);
305
0d9f9a5c 306 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 307 param->done = true;
0d9f9a5c
LL
308 qemu_cond_signal(&comp_done_cond);
309 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
310
311 qemu_mutex_lock(&param->mutex);
312 } else {
56e93d26
JQ
313 qemu_cond_wait(&param->cond, &param->mutex);
314 }
56e93d26 315 }
a7a9a88f 316 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
317
318 return NULL;
319}
320
321static inline void terminate_compression_threads(void)
322{
323 int idx, thread_count;
324
325 thread_count = migrate_compress_threads();
56e93d26
JQ
326 for (idx = 0; idx < thread_count; idx++) {
327 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 328 comp_param[idx].quit = true;
56e93d26
JQ
329 qemu_cond_signal(&comp_param[idx].cond);
330 qemu_mutex_unlock(&comp_param[idx].mutex);
331 }
332}
333
334void migrate_compress_threads_join(void)
335{
336 int i, thread_count;
337
338 if (!migrate_use_compression()) {
339 return;
340 }
341 terminate_compression_threads();
342 thread_count = migrate_compress_threads();
343 for (i = 0; i < thread_count; i++) {
344 qemu_thread_join(compress_threads + i);
345 qemu_fclose(comp_param[i].file);
346 qemu_mutex_destroy(&comp_param[i].mutex);
347 qemu_cond_destroy(&comp_param[i].cond);
348 }
0d9f9a5c
LL
349 qemu_mutex_destroy(&comp_done_lock);
350 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
351 g_free(compress_threads);
352 g_free(comp_param);
56e93d26
JQ
353 compress_threads = NULL;
354 comp_param = NULL;
56e93d26
JQ
355}
356
357void migrate_compress_threads_create(void)
358{
359 int i, thread_count;
360
361 if (!migrate_use_compression()) {
362 return;
363 }
56e93d26
JQ
364 compression_switch = true;
365 thread_count = migrate_compress_threads();
366 compress_threads = g_new0(QemuThread, thread_count);
367 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
368 qemu_cond_init(&comp_done_cond);
369 qemu_mutex_init(&comp_done_lock);
56e93d26 370 for (i = 0; i < thread_count; i++) {
e110aa91
C
371 /* comp_param[i].file is just used as a dummy buffer to save data,
372 * set its ops to empty.
56e93d26
JQ
373 */
374 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
375 comp_param[i].done = true;
90e56fb4 376 comp_param[i].quit = false;
56e93d26
JQ
377 qemu_mutex_init(&comp_param[i].mutex);
378 qemu_cond_init(&comp_param[i].cond);
379 qemu_thread_create(compress_threads + i, "compress",
380 do_data_compress, comp_param + i,
381 QEMU_THREAD_JOINABLE);
382 }
383}
384
385/**
386 * save_page_header: Write page header to wire
387 *
388 * If this is the 1st block, it also writes the block identification
389 *
390 * Returns: Number of bytes written
391 *
392 * @f: QEMUFile where to send the data
393 * @block: block that contains the page we want to send
394 * @offset: offset inside the block for the page
395 * in the lower bits, it contains flags
396 */
397static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
398{
9f5f380b 399 size_t size, len;
56e93d26
JQ
400
401 qemu_put_be64(f, offset);
402 size = 8;
403
404 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b
LL
405 len = strlen(block->idstr);
406 qemu_put_byte(f, len);
407 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
408 size += 1 + len;
56e93d26
JQ
409 }
410 return size;
411}
412
070afca2
JH
413/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
414 * If guest dirty memory rate is reduced below the rate at which we can
415 * transfer pages to the destination then we should be able to complete
416 * migration. Some workloads dirty memory way too fast and will not effectively
417 * converge, even with auto-converge.
418 */
419static void mig_throttle_guest_down(void)
420{
421 MigrationState *s = migrate_get_current();
2594f56d
DB
422 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
423 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
424
425 /* We have not started throttling yet. Let's start it. */
426 if (!cpu_throttle_active()) {
427 cpu_throttle_set(pct_initial);
428 } else {
429 /* Throttling already on, just increase the rate */
430 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
431 }
432}
433
56e93d26
JQ
434/* Update the xbzrle cache to reflect a page that's been sent as all 0.
435 * The important thing is that a stale (not-yet-0'd) page be replaced
436 * by the new data.
437 * As a bonus, if the page wasn't in the cache it gets added so that
438 * when a small write is made into the 0'd page it gets XBZRLE sent
439 */
440static void xbzrle_cache_zero_page(ram_addr_t current_addr)
441{
442 if (ram_bulk_stage || !migrate_use_xbzrle()) {
443 return;
444 }
445
446 /* We don't care if this fails to allocate a new cache page
447 * as long as it updated an old one */
448 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
449 bitmap_sync_count);
450}
451
452#define ENCODING_FLAG_XBZRLE 0x1
453
454/**
455 * save_xbzrle_page: compress and send current page
456 *
457 * Returns: 1 means that we wrote the page
458 * 0 means that page is identical to the one already sent
459 * -1 means that xbzrle would be longer than normal
460 *
461 * @f: QEMUFile where to send the data
462 * @current_data:
463 * @current_addr:
464 * @block: block that contains the page we want to send
465 * @offset: offset inside the block for the page
466 * @last_stage: if we are at the completion stage
467 * @bytes_transferred: increase it with the number of transferred bytes
468 */
469static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
470 ram_addr_t current_addr, RAMBlock *block,
471 ram_addr_t offset, bool last_stage,
472 uint64_t *bytes_transferred)
473{
474 int encoded_len = 0, bytes_xbzrle;
475 uint8_t *prev_cached_page;
476
477 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
478 acct_info.xbzrle_cache_miss++;
479 if (!last_stage) {
480 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
481 bitmap_sync_count) == -1) {
482 return -1;
483 } else {
484 /* update *current_data when the page has been
485 inserted into cache */
486 *current_data = get_cached_data(XBZRLE.cache, current_addr);
487 }
488 }
489 return -1;
490 }
491
492 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
493
494 /* save current buffer into memory */
495 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
496
497 /* XBZRLE encoding (if there is no overflow) */
498 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
499 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
500 TARGET_PAGE_SIZE);
501 if (encoded_len == 0) {
55c4446b 502 trace_save_xbzrle_page_skipping();
56e93d26
JQ
503 return 0;
504 } else if (encoded_len == -1) {
55c4446b 505 trace_save_xbzrle_page_overflow();
56e93d26
JQ
506 acct_info.xbzrle_overflows++;
507 /* update data in the cache */
508 if (!last_stage) {
509 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
510 *current_data = prev_cached_page;
511 }
512 return -1;
513 }
514
515 /* we need to update the data in the cache, in order to get the same data */
516 if (!last_stage) {
517 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
518 }
519
520 /* Send XBZRLE based compressed page */
521 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
522 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
523 qemu_put_be16(f, encoded_len);
524 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
525 bytes_xbzrle += encoded_len + 1 + 2;
526 acct_info.xbzrle_pages++;
527 acct_info.xbzrle_bytes += bytes_xbzrle;
528 *bytes_transferred += bytes_xbzrle;
529
530 return 1;
531}
532
f3f491fc
DDAG
533/* Called with rcu_read_lock() to protect migration_bitmap
534 * rb: The RAMBlock to search for dirty pages in
535 * start: Start address (typically so we can continue from previous page)
536 * ram_addr_abs: Pointer into which to store the address of the dirty page
537 * within the global ram_addr space
538 *
539 * Returns: byte offset within memory region of the start of a dirty page
540 */
56e93d26 541static inline
a82d593b
DDAG
542ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
543 ram_addr_t start,
544 ram_addr_t *ram_addr_abs)
56e93d26 545{
2f68e399 546 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
56e93d26 547 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
2f68e399
DDAG
548 uint64_t rb_size = rb->used_length;
549 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
2ff64038 550 unsigned long *bitmap;
56e93d26
JQ
551
552 unsigned long next;
553
60be6340 554 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
56e93d26
JQ
555 if (ram_bulk_stage && nr > base) {
556 next = nr + 1;
557 } else {
2ff64038 558 next = find_next_bit(bitmap, size, nr);
56e93d26
JQ
559 }
560
f3f491fc 561 *ram_addr_abs = next << TARGET_PAGE_BITS;
56e93d26
JQ
562 return (next - base) << TARGET_PAGE_BITS;
563}
564
a82d593b
DDAG
565static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
566{
567 bool ret;
568 int nr = addr >> TARGET_PAGE_BITS;
569 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
570
571 ret = test_and_clear_bit(nr, bitmap);
572
573 if (ret) {
574 migration_dirty_pages--;
575 }
576 return ret;
577}
578
56e93d26
JQ
579static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
580{
2ff64038 581 unsigned long *bitmap;
60be6340 582 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
56e93d26 583 migration_dirty_pages +=
2ff64038 584 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
56e93d26
JQ
585}
586
56e93d26
JQ
587/* Fix me: there are too many global variables used in migration process. */
588static int64_t start_time;
589static int64_t bytes_xfer_prev;
590static int64_t num_dirty_pages_period;
591static uint64_t xbzrle_cache_miss_prev;
592static uint64_t iterations_prev;
593
594static void migration_bitmap_sync_init(void)
595{
596 start_time = 0;
597 bytes_xfer_prev = 0;
598 num_dirty_pages_period = 0;
599 xbzrle_cache_miss_prev = 0;
600 iterations_prev = 0;
601}
602
e8ca1db2
DDAG
603/* Returns a summary bitmap of the page sizes of all RAMBlocks;
604 * for VMs with just normal pages this is equivalent to the
605 * host page size. If it's got some huge pages then it's the OR
606 * of all the different page sizes.
607 */
608uint64_t ram_pagesize_summary(void)
609{
610 RAMBlock *block;
611 uint64_t summary = 0;
612
613 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
614 summary |= block->page_size;
615 }
616
617 return summary;
618}
619
56e93d26
JQ
620static void migration_bitmap_sync(void)
621{
622 RAMBlock *block;
623 uint64_t num_dirty_pages_init = migration_dirty_pages;
624 MigrationState *s = migrate_get_current();
625 int64_t end_time;
626 int64_t bytes_xfer_now;
627
628 bitmap_sync_count++;
629
630 if (!bytes_xfer_prev) {
631 bytes_xfer_prev = ram_bytes_transferred();
632 }
633
634 if (!start_time) {
635 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
636 }
637
638 trace_migration_bitmap_sync_start();
9c1f8f44 639 memory_global_dirty_log_sync();
56e93d26 640
dd631697 641 qemu_mutex_lock(&migration_bitmap_mutex);
56e93d26
JQ
642 rcu_read_lock();
643 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2f68e399 644 migration_bitmap_sync_range(block->offset, block->used_length);
56e93d26
JQ
645 }
646 rcu_read_unlock();
dd631697 647 qemu_mutex_unlock(&migration_bitmap_mutex);
56e93d26
JQ
648
649 trace_migration_bitmap_sync_end(migration_dirty_pages
650 - num_dirty_pages_init);
651 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
652 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
653
654 /* more than 1 second = 1000 millisecons */
655 if (end_time > start_time + 1000) {
656 if (migrate_auto_converge()) {
657 /* The following detection logic can be refined later. For now:
658 Check to see if the dirtied bytes is 50% more than the approx.
659 amount of bytes that just got transferred since the last time we
070afca2
JH
660 were in this routine. If that happens twice, start or increase
661 throttling */
56e93d26 662 bytes_xfer_now = ram_bytes_transferred();
070afca2 663
56e93d26
JQ
664 if (s->dirty_pages_rate &&
665 (num_dirty_pages_period * TARGET_PAGE_SIZE >
666 (bytes_xfer_now - bytes_xfer_prev)/2) &&
070afca2 667 (dirty_rate_high_cnt++ >= 2)) {
56e93d26 668 trace_migration_throttle();
56e93d26 669 dirty_rate_high_cnt = 0;
070afca2 670 mig_throttle_guest_down();
56e93d26
JQ
671 }
672 bytes_xfer_prev = bytes_xfer_now;
56e93d26 673 }
070afca2 674
56e93d26
JQ
675 if (migrate_use_xbzrle()) {
676 if (iterations_prev != acct_info.iterations) {
677 acct_info.xbzrle_cache_miss_rate =
678 (double)(acct_info.xbzrle_cache_miss -
679 xbzrle_cache_miss_prev) /
680 (acct_info.iterations - iterations_prev);
681 }
682 iterations_prev = acct_info.iterations;
683 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
684 }
685 s->dirty_pages_rate = num_dirty_pages_period * 1000
686 / (end_time - start_time);
687 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
688 start_time = end_time;
689 num_dirty_pages_period = 0;
690 }
691 s->dirty_sync_count = bitmap_sync_count;
4addcd4f
DDAG
692 if (migrate_use_events()) {
693 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
694 }
56e93d26
JQ
695}
696
697/**
698 * save_zero_page: Send the zero page to the stream
699 *
700 * Returns: Number of pages written.
701 *
702 * @f: QEMUFile where to send the data
703 * @block: block that contains the page we want to send
704 * @offset: offset inside the block for the page
705 * @p: pointer to the page
706 * @bytes_transferred: increase it with the number of transferred bytes
707 */
708static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
709 uint8_t *p, uint64_t *bytes_transferred)
710{
711 int pages = -1;
712
713 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
714 acct_info.dup_pages++;
715 *bytes_transferred += save_page_header(f, block,
716 offset | RAM_SAVE_FLAG_COMPRESS);
717 qemu_put_byte(f, 0);
718 *bytes_transferred += 1;
719 pages = 1;
720 }
721
722 return pages;
723}
724
53f09a10
PB
725static void ram_release_pages(MigrationState *ms, const char *block_name,
726 uint64_t offset, int pages)
727{
728 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
729 return;
730 }
731
732 ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
733}
734
56e93d26
JQ
735/**
736 * ram_save_page: Send the given page to the stream
737 *
738 * Returns: Number of pages written.
3fd3c4b3
DDAG
739 * < 0 - error
740 * >=0 - Number of pages written - this might legally be 0
741 * if xbzrle noticed the page was the same.
56e93d26 742 *
9eb14766 743 * @ms: The current migration state.
56e93d26
JQ
744 * @f: QEMUFile where to send the data
745 * @block: block that contains the page we want to send
746 * @offset: offset inside the block for the page
747 * @last_stage: if we are at the completion stage
748 * @bytes_transferred: increase it with the number of transferred bytes
749 */
9eb14766 750static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
56e93d26
JQ
751 bool last_stage, uint64_t *bytes_transferred)
752{
753 int pages = -1;
754 uint64_t bytes_xmit;
755 ram_addr_t current_addr;
56e93d26
JQ
756 uint8_t *p;
757 int ret;
758 bool send_async = true;
a08f6890
HZ
759 RAMBlock *block = pss->block;
760 ram_addr_t offset = pss->offset;
56e93d26 761
2f68e399 762 p = block->host + offset;
56e93d26
JQ
763
764 /* In doubt sent page as normal */
765 bytes_xmit = 0;
766 ret = ram_control_save_page(f, block->offset,
767 offset, TARGET_PAGE_SIZE, &bytes_xmit);
768 if (bytes_xmit) {
769 *bytes_transferred += bytes_xmit;
770 pages = 1;
771 }
772
773 XBZRLE_cache_lock();
774
775 current_addr = block->offset + offset;
776
777 if (block == last_sent_block) {
778 offset |= RAM_SAVE_FLAG_CONTINUE;
779 }
780 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
781 if (ret != RAM_SAVE_CONTROL_DELAYED) {
782 if (bytes_xmit > 0) {
783 acct_info.norm_pages++;
784 } else if (bytes_xmit == 0) {
785 acct_info.dup_pages++;
786 }
787 }
788 } else {
789 pages = save_zero_page(f, block, offset, p, bytes_transferred);
790 if (pages > 0) {
791 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
792 * page would be stale
793 */
794 xbzrle_cache_zero_page(current_addr);
53f09a10 795 ram_release_pages(ms, block->idstr, pss->offset, pages);
2ebeaec0 796 } else if (!ram_bulk_stage &&
9eb14766 797 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
56e93d26
JQ
798 pages = save_xbzrle_page(f, &p, current_addr, block,
799 offset, last_stage, bytes_transferred);
800 if (!last_stage) {
801 /* Can't send this cached data async, since the cache page
802 * might get updated before it gets to the wire
803 */
804 send_async = false;
805 }
806 }
807 }
808
809 /* XBZRLE overflow or normal page */
810 if (pages == -1) {
811 *bytes_transferred += save_page_header(f, block,
812 offset | RAM_SAVE_FLAG_PAGE);
813 if (send_async) {
53f09a10
PB
814 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
815 migrate_release_ram() &
816 migration_in_postcopy(ms));
56e93d26
JQ
817 } else {
818 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
819 }
820 *bytes_transferred += TARGET_PAGE_SIZE;
821 pages = 1;
822 acct_info.norm_pages++;
823 }
824
825 XBZRLE_cache_unlock();
826
827 return pages;
828}
829
a7a9a88f
LL
830static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
831 ram_addr_t offset)
56e93d26
JQ
832{
833 int bytes_sent, blen;
a7a9a88f 834 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 835
a7a9a88f 836 bytes_sent = save_page_header(f, block, offset |
56e93d26 837 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 838 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 839 migrate_compress_level());
b3be2896
LL
840 if (blen < 0) {
841 bytes_sent = 0;
842 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
843 error_report("compressed data failed!");
844 } else {
845 bytes_sent += blen;
53f09a10
PB
846 ram_release_pages(migrate_get_current(), block->idstr,
847 offset & TARGET_PAGE_MASK, 1);
b3be2896 848 }
56e93d26
JQ
849
850 return bytes_sent;
851}
852
56e93d26
JQ
853static uint64_t bytes_transferred;
854
855static void flush_compressed_data(QEMUFile *f)
856{
857 int idx, len, thread_count;
858
859 if (!migrate_use_compression()) {
860 return;
861 }
862 thread_count = migrate_compress_threads();
a7a9a88f 863
0d9f9a5c 864 qemu_mutex_lock(&comp_done_lock);
56e93d26 865 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 866 while (!comp_param[idx].done) {
0d9f9a5c 867 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 868 }
a7a9a88f 869 }
0d9f9a5c 870 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
871
872 for (idx = 0; idx < thread_count; idx++) {
873 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 874 if (!comp_param[idx].quit) {
56e93d26
JQ
875 len = qemu_put_qemu_file(f, comp_param[idx].file);
876 bytes_transferred += len;
877 }
a7a9a88f 878 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
879 }
880}
881
882static inline void set_compress_params(CompressParam *param, RAMBlock *block,
883 ram_addr_t offset)
884{
885 param->block = block;
886 param->offset = offset;
887}
888
889static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
890 ram_addr_t offset,
891 uint64_t *bytes_transferred)
892{
893 int idx, thread_count, bytes_xmit = -1, pages = -1;
894
895 thread_count = migrate_compress_threads();
0d9f9a5c 896 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
897 while (true) {
898 for (idx = 0; idx < thread_count; idx++) {
899 if (comp_param[idx].done) {
a7a9a88f 900 comp_param[idx].done = false;
56e93d26 901 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
a7a9a88f 902 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 903 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
904 qemu_cond_signal(&comp_param[idx].cond);
905 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
906 pages = 1;
907 acct_info.norm_pages++;
908 *bytes_transferred += bytes_xmit;
909 break;
910 }
911 }
912 if (pages > 0) {
913 break;
914 } else {
0d9f9a5c 915 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
916 }
917 }
0d9f9a5c 918 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
919
920 return pages;
921}
922
923/**
924 * ram_save_compressed_page: compress the given page and send it to the stream
925 *
926 * Returns: Number of pages written.
927 *
9eb14766 928 * @ms: The current migration state.
56e93d26
JQ
929 * @f: QEMUFile where to send the data
930 * @block: block that contains the page we want to send
931 * @offset: offset inside the block for the page
932 * @last_stage: if we are at the completion stage
933 * @bytes_transferred: increase it with the number of transferred bytes
934 */
9eb14766
PB
935static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
936 PageSearchStatus *pss, bool last_stage,
56e93d26
JQ
937 uint64_t *bytes_transferred)
938{
939 int pages = -1;
fc50438e 940 uint64_t bytes_xmit = 0;
56e93d26 941 uint8_t *p;
fc50438e 942 int ret, blen;
a08f6890
HZ
943 RAMBlock *block = pss->block;
944 ram_addr_t offset = pss->offset;
56e93d26 945
2f68e399 946 p = block->host + offset;
56e93d26 947
56e93d26
JQ
948 ret = ram_control_save_page(f, block->offset,
949 offset, TARGET_PAGE_SIZE, &bytes_xmit);
950 if (bytes_xmit) {
951 *bytes_transferred += bytes_xmit;
952 pages = 1;
953 }
56e93d26
JQ
954 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
955 if (ret != RAM_SAVE_CONTROL_DELAYED) {
956 if (bytes_xmit > 0) {
957 acct_info.norm_pages++;
958 } else if (bytes_xmit == 0) {
959 acct_info.dup_pages++;
960 }
961 }
962 } else {
963 /* When starting the process of a new block, the first page of
964 * the block should be sent out before other pages in the same
965 * block, and all the pages in last block should have been sent
966 * out, keeping this order is important, because the 'cont' flag
967 * is used to avoid resending the block name.
968 */
969 if (block != last_sent_block) {
970 flush_compressed_data(f);
971 pages = save_zero_page(f, block, offset, p, bytes_transferred);
972 if (pages == -1) {
fc50438e
LL
973 /* Make sure the first page is sent out before other pages */
974 bytes_xmit = save_page_header(f, block, offset |
975 RAM_SAVE_FLAG_COMPRESS_PAGE);
976 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
977 migrate_compress_level());
978 if (blen > 0) {
979 *bytes_transferred += bytes_xmit + blen;
b3be2896 980 acct_info.norm_pages++;
b3be2896 981 pages = 1;
fc50438e
LL
982 } else {
983 qemu_file_set_error(f, blen);
984 error_report("compressed data failed!");
b3be2896 985 }
56e93d26 986 }
53f09a10
PB
987 if (pages > 0) {
988 ram_release_pages(ms, block->idstr, pss->offset, pages);
989 }
56e93d26 990 } else {
fc50438e 991 offset |= RAM_SAVE_FLAG_CONTINUE;
56e93d26
JQ
992 pages = save_zero_page(f, block, offset, p, bytes_transferred);
993 if (pages == -1) {
994 pages = compress_page_with_multi_thread(f, block, offset,
995 bytes_transferred);
53f09a10
PB
996 } else {
997 ram_release_pages(ms, block->idstr, pss->offset, pages);
56e93d26
JQ
998 }
999 }
1000 }
1001
1002 return pages;
1003}
1004
b9e60928
DDAG
1005/*
1006 * Find the next dirty page and update any state associated with
1007 * the search process.
1008 *
1009 * Returns: True if a page is found
1010 *
1011 * @f: Current migration stream.
1012 * @pss: Data about the state of the current dirty page scan.
1013 * @*again: Set to false if the search has scanned the whole of RAM
e0b266f0
DDAG
1014 * *ram_addr_abs: Pointer into which to store the address of the dirty page
1015 * within the global ram_addr space
b9e60928
DDAG
1016 */
1017static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
f3f491fc 1018 bool *again, ram_addr_t *ram_addr_abs)
b9e60928 1019{
a82d593b
DDAG
1020 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
1021 ram_addr_abs);
b9e60928
DDAG
1022 if (pss->complete_round && pss->block == last_seen_block &&
1023 pss->offset >= last_offset) {
1024 /*
1025 * We've been once around the RAM and haven't found anything.
1026 * Give up.
1027 */
1028 *again = false;
1029 return false;
1030 }
1031 if (pss->offset >= pss->block->used_length) {
1032 /* Didn't find anything in this RAM Block */
1033 pss->offset = 0;
1034 pss->block = QLIST_NEXT_RCU(pss->block, next);
1035 if (!pss->block) {
1036 /* Hit the end of the list */
1037 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1038 /* Flag that we've looped */
1039 pss->complete_round = true;
1040 ram_bulk_stage = false;
1041 if (migrate_use_xbzrle()) {
1042 /* If xbzrle is on, stop using the data compression at this
1043 * point. In theory, xbzrle can do better than compression.
1044 */
1045 flush_compressed_data(f);
1046 compression_switch = false;
1047 }
1048 }
1049 /* Didn't find anything this time, but try again on the new block */
1050 *again = true;
1051 return false;
1052 } else {
1053 /* Can go around again, but... */
1054 *again = true;
1055 /* We've found something so probably don't need to */
1056 return true;
1057 }
1058}
1059
a82d593b
DDAG
1060/*
1061 * Helper for 'get_queued_page' - gets a page off the queue
1062 * ms: MigrationState in
1063 * *offset: Used to return the offset within the RAMBlock
1064 * ram_addr_abs: global offset in the dirty/sent bitmaps
1065 *
1066 * Returns: block (or NULL if none available)
1067 */
1068static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1069 ram_addr_t *ram_addr_abs)
1070{
1071 RAMBlock *block = NULL;
1072
1073 qemu_mutex_lock(&ms->src_page_req_mutex);
1074 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1075 struct MigrationSrcPageRequest *entry =
1076 QSIMPLEQ_FIRST(&ms->src_page_requests);
1077 block = entry->rb;
1078 *offset = entry->offset;
1079 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1080 TARGET_PAGE_MASK;
1081
1082 if (entry->len > TARGET_PAGE_SIZE) {
1083 entry->len -= TARGET_PAGE_SIZE;
1084 entry->offset += TARGET_PAGE_SIZE;
1085 } else {
1086 memory_region_unref(block->mr);
1087 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1088 g_free(entry);
1089 }
1090 }
1091 qemu_mutex_unlock(&ms->src_page_req_mutex);
1092
1093 return block;
1094}
1095
1096/*
1097 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1098 * that are already sent (!dirty)
1099 *
1100 * ms: MigrationState in
1101 * pss: PageSearchStatus structure updated with found block/offset
1102 * ram_addr_abs: global offset in the dirty/sent bitmaps
1103 *
1104 * Returns: true if a queued page is found
1105 */
1106static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1107 ram_addr_t *ram_addr_abs)
1108{
1109 RAMBlock *block;
1110 ram_addr_t offset;
1111 bool dirty;
1112
1113 do {
1114 block = unqueue_page(ms, &offset, ram_addr_abs);
1115 /*
1116 * We're sending this page, and since it's postcopy nothing else
1117 * will dirty it, and we must make sure it doesn't get sent again
1118 * even if this queue request was received after the background
1119 * search already sent it.
1120 */
1121 if (block) {
1122 unsigned long *bitmap;
1123 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1124 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1125 if (!dirty) {
1126 trace_get_queued_page_not_dirty(
1127 block->idstr, (uint64_t)offset,
1128 (uint64_t)*ram_addr_abs,
1129 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1130 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1131 } else {
1132 trace_get_queued_page(block->idstr,
1133 (uint64_t)offset,
1134 (uint64_t)*ram_addr_abs);
1135 }
1136 }
1137
1138 } while (block && !dirty);
1139
1140 if (block) {
1141 /*
1142 * As soon as we start servicing pages out of order, then we have
1143 * to kill the bulk stage, since the bulk stage assumes
1144 * in (migration_bitmap_find_and_reset_dirty) that every page is
1145 * dirty, that's no longer true.
1146 */
1147 ram_bulk_stage = false;
1148
1149 /*
1150 * We want the background search to continue from the queued page
1151 * since the guest is likely to want other pages near to the page
1152 * it just requested.
1153 */
1154 pss->block = block;
1155 pss->offset = offset;
1156 }
1157
1158 return !!block;
1159}
1160
6c595cde
DDAG
1161/**
1162 * flush_page_queue: Flush any remaining pages in the ram request queue
1163 * it should be empty at the end anyway, but in error cases there may be
1164 * some left.
1165 *
1166 * ms: MigrationState
1167 */
1168void flush_page_queue(MigrationState *ms)
1169{
1170 struct MigrationSrcPageRequest *mspr, *next_mspr;
1171 /* This queue generally should be empty - but in the case of a failed
1172 * migration might have some droppings in.
1173 */
1174 rcu_read_lock();
1175 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1176 memory_region_unref(mspr->rb->mr);
1177 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1178 g_free(mspr);
1179 }
1180 rcu_read_unlock();
1181}
1182
1183/**
1184 * Queue the pages for transmission, e.g. a request from postcopy destination
1185 * ms: MigrationStatus in which the queue is held
1186 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1187 * start: Offset from the start of the RAMBlock
1188 * len: Length (in bytes) to send
1189 * Return: 0 on success
1190 */
1191int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1192 ram_addr_t start, ram_addr_t len)
1193{
1194 RAMBlock *ramblock;
1195
d3bf5418 1196 ms->postcopy_requests++;
6c595cde
DDAG
1197 rcu_read_lock();
1198 if (!rbname) {
1199 /* Reuse last RAMBlock */
1200 ramblock = ms->last_req_rb;
1201
1202 if (!ramblock) {
1203 /*
1204 * Shouldn't happen, we can't reuse the last RAMBlock if
1205 * it's the 1st request.
1206 */
1207 error_report("ram_save_queue_pages no previous block");
1208 goto err;
1209 }
1210 } else {
1211 ramblock = qemu_ram_block_by_name(rbname);
1212
1213 if (!ramblock) {
1214 /* We shouldn't be asked for a non-existent RAMBlock */
1215 error_report("ram_save_queue_pages no block '%s'", rbname);
1216 goto err;
1217 }
1218 ms->last_req_rb = ramblock;
1219 }
1220 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1221 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1222 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1223 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1224 __func__, start, len, ramblock->used_length);
1225 goto err;
1226 }
1227
1228 struct MigrationSrcPageRequest *new_entry =
1229 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1230 new_entry->rb = ramblock;
1231 new_entry->offset = start;
1232 new_entry->len = len;
1233
1234 memory_region_ref(ramblock->mr);
1235 qemu_mutex_lock(&ms->src_page_req_mutex);
1236 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1237 qemu_mutex_unlock(&ms->src_page_req_mutex);
1238 rcu_read_unlock();
1239
1240 return 0;
1241
1242err:
1243 rcu_read_unlock();
1244 return -1;
1245}
1246
a82d593b
DDAG
1247/**
1248 * ram_save_target_page: Save one target page
1249 *
1250 *
1251 * @f: QEMUFile where to send the data
1252 * @block: pointer to block that contains the page we want to send
1253 * @offset: offset inside the block for the page;
1254 * @last_stage: if we are at the completion stage
1255 * @bytes_transferred: increase it with the number of transferred bytes
1256 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1257 *
1258 * Returns: Number of pages written.
1259 */
1260static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
a08f6890 1261 PageSearchStatus *pss,
a82d593b
DDAG
1262 bool last_stage,
1263 uint64_t *bytes_transferred,
1264 ram_addr_t dirty_ram_abs)
1265{
1266 int res = 0;
1267
1268 /* Check the pages is dirty and if it is send it */
1269 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1270 unsigned long *unsentmap;
1271 if (compression_switch && migrate_use_compression()) {
9eb14766 1272 res = ram_save_compressed_page(ms, f, pss,
a82d593b
DDAG
1273 last_stage,
1274 bytes_transferred);
1275 } else {
9eb14766 1276 res = ram_save_page(ms, f, pss, last_stage,
a82d593b
DDAG
1277 bytes_transferred);
1278 }
1279
1280 if (res < 0) {
1281 return res;
1282 }
1283 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1284 if (unsentmap) {
1285 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1286 }
3fd3c4b3
DDAG
1287 /* Only update last_sent_block if a block was actually sent; xbzrle
1288 * might have decided the page was identical so didn't bother writing
1289 * to the stream.
1290 */
1291 if (res > 0) {
a08f6890 1292 last_sent_block = pss->block;
3fd3c4b3 1293 }
a82d593b
DDAG
1294 }
1295
1296 return res;
1297}
1298
1299/**
cb8d4c8f 1300 * ram_save_host_page: Starting at *offset send pages up to the end
a82d593b
DDAG
1301 * of the current host page. It's valid for the initial
1302 * offset to point into the middle of a host page
1303 * in which case the remainder of the hostpage is sent.
1304 * Only dirty target pages are sent.
1305 *
1306 * Returns: Number of pages written.
1307 *
1308 * @f: QEMUFile where to send the data
1309 * @block: pointer to block that contains the page we want to send
1310 * @offset: offset inside the block for the page; updated to last target page
1311 * sent
1312 * @last_stage: if we are at the completion stage
1313 * @bytes_transferred: increase it with the number of transferred bytes
1314 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1315 */
a08f6890
HZ
1316static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1317 PageSearchStatus *pss,
1318 bool last_stage,
a82d593b
DDAG
1319 uint64_t *bytes_transferred,
1320 ram_addr_t dirty_ram_abs)
1321{
1322 int tmppages, pages = 0;
1323 do {
a08f6890 1324 tmppages = ram_save_target_page(ms, f, pss, last_stage,
a82d593b
DDAG
1325 bytes_transferred, dirty_ram_abs);
1326 if (tmppages < 0) {
1327 return tmppages;
1328 }
1329
1330 pages += tmppages;
a08f6890 1331 pss->offset += TARGET_PAGE_SIZE;
a82d593b 1332 dirty_ram_abs += TARGET_PAGE_SIZE;
a08f6890 1333 } while (pss->offset & (qemu_host_page_size - 1));
a82d593b
DDAG
1334
1335 /* The offset we leave with is the last one we looked at */
a08f6890 1336 pss->offset -= TARGET_PAGE_SIZE;
a82d593b
DDAG
1337 return pages;
1338}
6c595cde 1339
56e93d26
JQ
1340/**
1341 * ram_find_and_save_block: Finds a dirty page and sends it to f
1342 *
1343 * Called within an RCU critical section.
1344 *
1345 * Returns: The number of pages written
1346 * 0 means no dirty pages
1347 *
1348 * @f: QEMUFile where to send the data
1349 * @last_stage: if we are at the completion stage
1350 * @bytes_transferred: increase it with the number of transferred bytes
a82d593b
DDAG
1351 *
1352 * On systems where host-page-size > target-page-size it will send all the
1353 * pages in a host page that are dirty.
56e93d26
JQ
1354 */
1355
1356static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1357 uint64_t *bytes_transferred)
1358{
b8fb8cb7 1359 PageSearchStatus pss;
a82d593b 1360 MigrationState *ms = migrate_get_current();
56e93d26 1361 int pages = 0;
b9e60928 1362 bool again, found;
f3f491fc
DDAG
1363 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1364 ram_addr_t space */
56e93d26 1365
0827b9e9
AA
1366 /* No dirty page as there is zero RAM */
1367 if (!ram_bytes_total()) {
1368 return pages;
1369 }
1370
b8fb8cb7
DDAG
1371 pss.block = last_seen_block;
1372 pss.offset = last_offset;
1373 pss.complete_round = false;
1374
1375 if (!pss.block) {
1376 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1377 }
56e93d26 1378
b9e60928 1379 do {
a82d593b
DDAG
1380 again = true;
1381 found = get_queued_page(ms, &pss, &dirty_ram_abs);
b9e60928 1382
a82d593b
DDAG
1383 if (!found) {
1384 /* priority queue empty, so just search for something dirty */
1385 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1386 }
f3f491fc 1387
a82d593b 1388 if (found) {
a08f6890 1389 pages = ram_save_host_page(ms, f, &pss,
a82d593b
DDAG
1390 last_stage, bytes_transferred,
1391 dirty_ram_abs);
56e93d26 1392 }
b9e60928 1393 } while (!pages && again);
56e93d26 1394
b8fb8cb7
DDAG
1395 last_seen_block = pss.block;
1396 last_offset = pss.offset;
56e93d26
JQ
1397
1398 return pages;
1399}
1400
1401void acct_update_position(QEMUFile *f, size_t size, bool zero)
1402{
1403 uint64_t pages = size / TARGET_PAGE_SIZE;
1404 if (zero) {
1405 acct_info.dup_pages += pages;
1406 } else {
1407 acct_info.norm_pages += pages;
1408 bytes_transferred += size;
1409 qemu_update_position(f, size);
1410 }
1411}
1412
1413static ram_addr_t ram_save_remaining(void)
1414{
1415 return migration_dirty_pages;
1416}
1417
1418uint64_t ram_bytes_remaining(void)
1419{
1420 return ram_save_remaining() * TARGET_PAGE_SIZE;
1421}
1422
1423uint64_t ram_bytes_transferred(void)
1424{
1425 return bytes_transferred;
1426}
1427
1428uint64_t ram_bytes_total(void)
1429{
1430 RAMBlock *block;
1431 uint64_t total = 0;
1432
1433 rcu_read_lock();
1434 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1435 total += block->used_length;
1436 rcu_read_unlock();
1437 return total;
1438}
1439
1440void free_xbzrle_decoded_buf(void)
1441{
1442 g_free(xbzrle_decoded_buf);
1443 xbzrle_decoded_buf = NULL;
1444}
1445
60be6340
DL
1446static void migration_bitmap_free(struct BitmapRcu *bmap)
1447{
1448 g_free(bmap->bmap);
f3f491fc 1449 g_free(bmap->unsentmap);
60be6340
DL
1450 g_free(bmap);
1451}
1452
6ad2a215 1453static void ram_migration_cleanup(void *opaque)
56e93d26 1454{
2ff64038
LZ
1455 /* caller have hold iothread lock or is in a bh, so there is
1456 * no writing race against this migration_bitmap
1457 */
60be6340
DL
1458 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1459 atomic_rcu_set(&migration_bitmap_rcu, NULL);
2ff64038 1460 if (bitmap) {
56e93d26 1461 memory_global_dirty_log_stop();
60be6340 1462 call_rcu(bitmap, migration_bitmap_free, rcu);
56e93d26
JQ
1463 }
1464
1465 XBZRLE_cache_lock();
1466 if (XBZRLE.cache) {
1467 cache_fini(XBZRLE.cache);
1468 g_free(XBZRLE.encoded_buf);
1469 g_free(XBZRLE.current_buf);
adb65dec 1470 g_free(ZERO_TARGET_PAGE);
56e93d26
JQ
1471 XBZRLE.cache = NULL;
1472 XBZRLE.encoded_buf = NULL;
1473 XBZRLE.current_buf = NULL;
1474 }
1475 XBZRLE_cache_unlock();
1476}
1477
56e93d26
JQ
1478static void reset_ram_globals(void)
1479{
1480 last_seen_block = NULL;
1481 last_sent_block = NULL;
1482 last_offset = 0;
1483 last_version = ram_list.version;
1484 ram_bulk_stage = true;
1485}
1486
1487#define MAX_WAIT 50 /* ms, half buffered_file limit */
1488
dd631697
LZ
1489void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1490{
1491 /* called in qemu main thread, so there is
1492 * no writing race against this migration_bitmap
1493 */
60be6340
DL
1494 if (migration_bitmap_rcu) {
1495 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1496 bitmap = g_new(struct BitmapRcu, 1);
1497 bitmap->bmap = bitmap_new(new);
dd631697
LZ
1498
1499 /* prevent migration_bitmap content from being set bit
1500 * by migration_bitmap_sync_range() at the same time.
1501 * it is safe to migration if migration_bitmap is cleared bit
1502 * at the same time.
1503 */
1504 qemu_mutex_lock(&migration_bitmap_mutex);
60be6340
DL
1505 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1506 bitmap_set(bitmap->bmap, old, new - old);
f3f491fc
DDAG
1507
1508 /* We don't have a way to safely extend the sentmap
1509 * with RCU; so mark it as missing, entry to postcopy
1510 * will fail.
1511 */
1512 bitmap->unsentmap = NULL;
1513
60be6340 1514 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
dd631697
LZ
1515 qemu_mutex_unlock(&migration_bitmap_mutex);
1516 migration_dirty_pages += new - old;
60be6340 1517 call_rcu(old_bitmap, migration_bitmap_free, rcu);
dd631697
LZ
1518 }
1519}
56e93d26 1520
4f2e4252
DDAG
1521/*
1522 * 'expected' is the value you expect the bitmap mostly to be full
1523 * of; it won't bother printing lines that are all this value.
1524 * If 'todump' is null the migration bitmap is dumped.
1525 */
1526void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1527{
1528 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1529
1530 int64_t cur;
1531 int64_t linelen = 128;
1532 char linebuf[129];
1533
1534 if (!todump) {
1535 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1536 }
1537
1538 for (cur = 0; cur < ram_pages; cur += linelen) {
1539 int64_t curb;
1540 bool found = false;
1541 /*
1542 * Last line; catch the case where the line length
1543 * is longer than remaining ram
1544 */
1545 if (cur + linelen > ram_pages) {
1546 linelen = ram_pages - cur;
1547 }
1548 for (curb = 0; curb < linelen; curb++) {
1549 bool thisbit = test_bit(cur + curb, todump);
1550 linebuf[curb] = thisbit ? '1' : '.';
1551 found = found || (thisbit != expected);
1552 }
1553 if (found) {
1554 linebuf[curb] = '\0';
1555 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1556 }
1557 }
1558}
1559
e0b266f0
DDAG
1560/* **** functions for postcopy ***** */
1561
ced1c616
PB
1562void ram_postcopy_migrated_memory_release(MigrationState *ms)
1563{
1564 struct RAMBlock *block;
1565 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1566
1567 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1568 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1569 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1570 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1571
1572 while (run_start < range) {
1573 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1574 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1575 (run_end - run_start) << TARGET_PAGE_BITS);
1576 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1577 }
1578 }
1579}
1580
e0b266f0
DDAG
1581/*
1582 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1583 * Note: At this point the 'unsentmap' is the processed bitmap combined
1584 * with the dirtymap; so a '1' means it's either dirty or unsent.
1585 * start,length: Indexes into the bitmap for the first bit
1586 * representing the named block and length in target-pages
1587 */
1588static int postcopy_send_discard_bm_ram(MigrationState *ms,
1589 PostcopyDiscardState *pds,
1590 unsigned long start,
1591 unsigned long length)
1592{
1593 unsigned long end = start + length; /* one after the end */
1594 unsigned long current;
1595 unsigned long *unsentmap;
1596
1597 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1598 for (current = start; current < end; ) {
1599 unsigned long one = find_next_bit(unsentmap, end, current);
1600
1601 if (one <= end) {
1602 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1603 unsigned long discard_length;
1604
1605 if (zero >= end) {
1606 discard_length = end - one;
1607 } else {
1608 discard_length = zero - one;
1609 }
d688c62d
DDAG
1610 if (discard_length) {
1611 postcopy_discard_send_range(ms, pds, one, discard_length);
1612 }
e0b266f0
DDAG
1613 current = one + discard_length;
1614 } else {
1615 current = one;
1616 }
1617 }
1618
1619 return 0;
1620}
1621
1622/*
1623 * Utility for the outgoing postcopy code.
1624 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1625 * passing it bitmap indexes and name.
1626 * Returns: 0 on success
1627 * (qemu_ram_foreach_block ends up passing unscaled lengths
1628 * which would mean postcopy code would have to deal with target page)
1629 */
1630static int postcopy_each_ram_send_discard(MigrationState *ms)
1631{
1632 struct RAMBlock *block;
1633 int ret;
1634
1635 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1636 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1637 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1638 first,
1639 block->idstr);
1640
1641 /*
1642 * Postcopy sends chunks of bitmap over the wire, but it
1643 * just needs indexes at this point, avoids it having
1644 * target page specific code.
1645 */
1646 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1647 block->used_length >> TARGET_PAGE_BITS);
1648 postcopy_discard_send_finish(ms, pds);
1649 if (ret) {
1650 return ret;
1651 }
1652 }
1653
1654 return 0;
1655}
1656
99e314eb
DDAG
1657/*
1658 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1659 * the two bitmaps, that are similar, but one is inverted.
1660 *
1661 * We search for runs of target-pages that don't start or end on a
1662 * host page boundary;
1663 * unsent_pass=true: Cleans up partially unsent host pages by searching
1664 * the unsentmap
1665 * unsent_pass=false: Cleans up partially dirty host pages by searching
1666 * the main migration bitmap
1667 *
1668 */
1669static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1670 RAMBlock *block,
1671 PostcopyDiscardState *pds)
1672{
1673 unsigned long *bitmap;
1674 unsigned long *unsentmap;
29c59172 1675 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
99e314eb
DDAG
1676 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1677 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1678 unsigned long last = first + (len - 1);
1679 unsigned long run_start;
1680
29c59172
DDAG
1681 if (block->page_size == TARGET_PAGE_SIZE) {
1682 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1683 return;
1684 }
1685
99e314eb
DDAG
1686 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1687 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1688
1689 if (unsent_pass) {
1690 /* Find a sent page */
1691 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1692 } else {
1693 /* Find a dirty page */
1694 run_start = find_next_bit(bitmap, last + 1, first);
1695 }
1696
1697 while (run_start <= last) {
1698 bool do_fixup = false;
1699 unsigned long fixup_start_addr;
1700 unsigned long host_offset;
1701
1702 /*
1703 * If the start of this run of pages is in the middle of a host
1704 * page, then we need to fixup this host page.
1705 */
1706 host_offset = run_start % host_ratio;
1707 if (host_offset) {
1708 do_fixup = true;
1709 run_start -= host_offset;
1710 fixup_start_addr = run_start;
1711 /* For the next pass */
1712 run_start = run_start + host_ratio;
1713 } else {
1714 /* Find the end of this run */
1715 unsigned long run_end;
1716 if (unsent_pass) {
1717 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1718 } else {
1719 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1720 }
1721 /*
1722 * If the end isn't at the start of a host page, then the
1723 * run doesn't finish at the end of a host page
1724 * and we need to discard.
1725 */
1726 host_offset = run_end % host_ratio;
1727 if (host_offset) {
1728 do_fixup = true;
1729 fixup_start_addr = run_end - host_offset;
1730 /*
1731 * This host page has gone, the next loop iteration starts
1732 * from after the fixup
1733 */
1734 run_start = fixup_start_addr + host_ratio;
1735 } else {
1736 /*
1737 * No discards on this iteration, next loop starts from
1738 * next sent/dirty page
1739 */
1740 run_start = run_end + 1;
1741 }
1742 }
1743
1744 if (do_fixup) {
1745 unsigned long page;
1746
1747 /* Tell the destination to discard this page */
1748 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1749 /* For the unsent_pass we:
1750 * discard partially sent pages
1751 * For the !unsent_pass (dirty) we:
1752 * discard partially dirty pages that were sent
1753 * (any partially sent pages were already discarded
1754 * by the previous unsent_pass)
1755 */
1756 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1757 host_ratio);
1758 }
1759
1760 /* Clean up the bitmap */
1761 for (page = fixup_start_addr;
1762 page < fixup_start_addr + host_ratio; page++) {
1763 /* All pages in this host page are now not sent */
1764 set_bit(page, unsentmap);
1765
1766 /*
1767 * Remark them as dirty, updating the count for any pages
1768 * that weren't previously dirty.
1769 */
1770 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1771 }
1772 }
1773
1774 if (unsent_pass) {
1775 /* Find the next sent page for the next iteration */
1776 run_start = find_next_zero_bit(unsentmap, last + 1,
1777 run_start);
1778 } else {
1779 /* Find the next dirty page for the next iteration */
1780 run_start = find_next_bit(bitmap, last + 1, run_start);
1781 }
1782 }
1783}
1784
1785/*
1786 * Utility for the outgoing postcopy code.
1787 *
1788 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
1789 * dirty host-page size chunks as all dirty. In this case the host-page
1790 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb
DDAG
1791 *
1792 * Returns: 0 on success
1793 */
1794static int postcopy_chunk_hostpages(MigrationState *ms)
1795{
1796 struct RAMBlock *block;
1797
99e314eb
DDAG
1798 /* Easiest way to make sure we don't resume in the middle of a host-page */
1799 last_seen_block = NULL;
1800 last_sent_block = NULL;
1801 last_offset = 0;
1802
1803 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1804 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1805
1806 PostcopyDiscardState *pds =
1807 postcopy_discard_send_init(ms, first, block->idstr);
1808
1809 /* First pass: Discard all partially sent host pages */
1810 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1811 /*
1812 * Second pass: Ensure that all partially dirty host pages are made
1813 * fully dirty.
1814 */
1815 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1816
1817 postcopy_discard_send_finish(ms, pds);
1818 } /* ram_list loop */
1819
1820 return 0;
1821}
1822
e0b266f0
DDAG
1823/*
1824 * Transmit the set of pages to be discarded after precopy to the target
1825 * these are pages that:
1826 * a) Have been previously transmitted but are now dirty again
1827 * b) Pages that have never been transmitted, this ensures that
1828 * any pages on the destination that have been mapped by background
1829 * tasks get discarded (transparent huge pages is the specific concern)
1830 * Hopefully this is pretty sparse
1831 */
1832int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1833{
1834 int ret;
1835 unsigned long *bitmap, *unsentmap;
1836
1837 rcu_read_lock();
1838
1839 /* This should be our last sync, the src is now paused */
1840 migration_bitmap_sync();
1841
1842 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1843 if (!unsentmap) {
1844 /* We don't have a safe way to resize the sentmap, so
1845 * if the bitmap was resized it will be NULL at this
1846 * point.
1847 */
1848 error_report("migration ram resized during precopy phase");
1849 rcu_read_unlock();
1850 return -EINVAL;
1851 }
1852
29c59172 1853 /* Deal with TPS != HPS and huge pages */
99e314eb
DDAG
1854 ret = postcopy_chunk_hostpages(ms);
1855 if (ret) {
1856 rcu_read_unlock();
1857 return ret;
1858 }
1859
e0b266f0
DDAG
1860 /*
1861 * Update the unsentmap to be unsentmap = unsentmap | dirty
1862 */
1863 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1864 bitmap_or(unsentmap, unsentmap, bitmap,
1865 last_ram_offset() >> TARGET_PAGE_BITS);
1866
1867
1868 trace_ram_postcopy_send_discard_bitmap();
1869#ifdef DEBUG_POSTCOPY
1870 ram_debug_dump_bitmap(unsentmap, true);
1871#endif
1872
1873 ret = postcopy_each_ram_send_discard(ms);
1874 rcu_read_unlock();
1875
1876 return ret;
1877}
1878
1879/*
1880 * At the start of the postcopy phase of migration, any now-dirty
1881 * precopied pages are discarded.
1882 *
1883 * start, length describe a byte address range within the RAMBlock
1884 *
1885 * Returns 0 on success.
1886 */
1887int ram_discard_range(MigrationIncomingState *mis,
1888 const char *block_name,
1889 uint64_t start, size_t length)
1890{
1891 int ret = -1;
1892
d3a5038c
DDAG
1893 trace_ram_discard_range(block_name, start, length);
1894
e0b266f0
DDAG
1895 rcu_read_lock();
1896 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1897
1898 if (!rb) {
1899 error_report("ram_discard_range: Failed to find block '%s'",
1900 block_name);
1901 goto err;
1902 }
1903
d3a5038c 1904 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
1905
1906err:
1907 rcu_read_unlock();
1908
1909 return ret;
1910}
1911
a91246c9 1912static int ram_save_init_globals(void)
56e93d26 1913{
56e93d26
JQ
1914 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1915
56e93d26
JQ
1916 dirty_rate_high_cnt = 0;
1917 bitmap_sync_count = 0;
1918 migration_bitmap_sync_init();
dd631697 1919 qemu_mutex_init(&migration_bitmap_mutex);
56e93d26
JQ
1920
1921 if (migrate_use_xbzrle()) {
1922 XBZRLE_cache_lock();
adb65dec 1923 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
56e93d26
JQ
1924 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1925 TARGET_PAGE_SIZE,
1926 TARGET_PAGE_SIZE);
1927 if (!XBZRLE.cache) {
1928 XBZRLE_cache_unlock();
1929 error_report("Error creating cache");
1930 return -1;
1931 }
1932 XBZRLE_cache_unlock();
1933
1934 /* We prefer not to abort if there is no memory */
1935 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1936 if (!XBZRLE.encoded_buf) {
1937 error_report("Error allocating encoded_buf");
1938 return -1;
1939 }
1940
1941 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1942 if (!XBZRLE.current_buf) {
1943 error_report("Error allocating current_buf");
1944 g_free(XBZRLE.encoded_buf);
1945 XBZRLE.encoded_buf = NULL;
1946 return -1;
1947 }
1948
1949 acct_clear();
1950 }
1951
49877834
PB
1952 /* For memory_global_dirty_log_start below. */
1953 qemu_mutex_lock_iothread();
1954
56e93d26
JQ
1955 qemu_mutex_lock_ramlist();
1956 rcu_read_lock();
1957 bytes_transferred = 0;
1958 reset_ram_globals();
1959
f3f491fc 1960 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
0827b9e9
AA
1961 /* Skip setting bitmap if there is no RAM */
1962 if (ram_bytes_total()) {
1963 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1964 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1965 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1966
1967 if (migrate_postcopy_ram()) {
1968 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1969 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1970 }
f3f491fc
DDAG
1971 }
1972
56e93d26
JQ
1973 /*
1974 * Count the total number of pages used by ram blocks not including any
1975 * gaps due to alignment or unplugs.
1976 */
1977 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1978
1979 memory_global_dirty_log_start();
1980 migration_bitmap_sync();
1981 qemu_mutex_unlock_ramlist();
49877834 1982 qemu_mutex_unlock_iothread();
a91246c9
HZ
1983 rcu_read_unlock();
1984
1985 return 0;
1986}
1987
1988/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1989 * long-running RCU critical section. When rcu-reclaims in the code
1990 * start to become numerous it will be necessary to reduce the
1991 * granularity of these critical sections.
1992 */
1993
1994static int ram_save_setup(QEMUFile *f, void *opaque)
1995{
1996 RAMBlock *block;
1997
1998 /* migration has already setup the bitmap, reuse it. */
1999 if (!migration_in_colo_state()) {
2000 if (ram_save_init_globals() < 0) {
2001 return -1;
2002 }
2003 }
2004
2005 rcu_read_lock();
56e93d26
JQ
2006
2007 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2008
2009 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2010 qemu_put_byte(f, strlen(block->idstr));
2011 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2012 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2013 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2014 qemu_put_be64(f, block->page_size);
2015 }
56e93d26
JQ
2016 }
2017
2018 rcu_read_unlock();
2019
2020 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2021 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2022
2023 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2024
2025 return 0;
2026}
2027
2028static int ram_save_iterate(QEMUFile *f, void *opaque)
2029{
2030 int ret;
2031 int i;
2032 int64_t t0;
5c90308f 2033 int done = 0;
56e93d26
JQ
2034
2035 rcu_read_lock();
2036 if (ram_list.version != last_version) {
2037 reset_ram_globals();
2038 }
2039
2040 /* Read version before ram_list.blocks */
2041 smp_rmb();
2042
2043 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2044
2045 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2046 i = 0;
2047 while ((ret = qemu_file_rate_limit(f)) == 0) {
2048 int pages;
2049
2050 pages = ram_find_and_save_block(f, false, &bytes_transferred);
2051 /* no more pages to sent */
2052 if (pages == 0) {
5c90308f 2053 done = 1;
56e93d26
JQ
2054 break;
2055 }
56e93d26 2056 acct_info.iterations++;
070afca2 2057
56e93d26
JQ
2058 /* we want to check in the 1st loop, just in case it was the 1st time
2059 and we had to sync the dirty bitmap.
2060 qemu_get_clock_ns() is a bit expensive, so we only check each some
2061 iterations
2062 */
2063 if ((i & 63) == 0) {
2064 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2065 if (t1 > MAX_WAIT) {
55c4446b 2066 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2067 break;
2068 }
2069 }
2070 i++;
2071 }
2072 flush_compressed_data(f);
2073 rcu_read_unlock();
2074
2075 /*
2076 * Must occur before EOS (or any QEMUFile operation)
2077 * because of RDMA protocol.
2078 */
2079 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2080
2081 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2082 bytes_transferred += 8;
2083
2084 ret = qemu_file_get_error(f);
2085 if (ret < 0) {
2086 return ret;
2087 }
2088
5c90308f 2089 return done;
56e93d26
JQ
2090}
2091
2092/* Called with iothread lock */
2093static int ram_save_complete(QEMUFile *f, void *opaque)
2094{
2095 rcu_read_lock();
2096
663e6c1d
DDAG
2097 if (!migration_in_postcopy(migrate_get_current())) {
2098 migration_bitmap_sync();
2099 }
56e93d26
JQ
2100
2101 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2102
2103 /* try transferring iterative blocks of memory */
2104
2105 /* flush all remaining blocks regardless of rate limiting */
2106 while (true) {
2107 int pages;
2108
a91246c9
HZ
2109 pages = ram_find_and_save_block(f, !migration_in_colo_state(),
2110 &bytes_transferred);
56e93d26
JQ
2111 /* no more blocks to sent */
2112 if (pages == 0) {
2113 break;
2114 }
2115 }
2116
2117 flush_compressed_data(f);
2118 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2119
2120 rcu_read_unlock();
d09a6fde 2121
56e93d26
JQ
2122 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2123
2124 return 0;
2125}
2126
c31b098f
DDAG
2127static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2128 uint64_t *non_postcopiable_pending,
2129 uint64_t *postcopiable_pending)
56e93d26
JQ
2130{
2131 uint64_t remaining_size;
2132
2133 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2134
663e6c1d
DDAG
2135 if (!migration_in_postcopy(migrate_get_current()) &&
2136 remaining_size < max_size) {
56e93d26
JQ
2137 qemu_mutex_lock_iothread();
2138 rcu_read_lock();
2139 migration_bitmap_sync();
2140 rcu_read_unlock();
2141 qemu_mutex_unlock_iothread();
2142 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2143 }
c31b098f
DDAG
2144
2145 /* We can do postcopy, and all the data is postcopiable */
2146 *postcopiable_pending += remaining_size;
56e93d26
JQ
2147}
2148
2149static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2150{
2151 unsigned int xh_len;
2152 int xh_flags;
063e760a 2153 uint8_t *loaded_data;
56e93d26
JQ
2154
2155 if (!xbzrle_decoded_buf) {
2156 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2157 }
063e760a 2158 loaded_data = xbzrle_decoded_buf;
56e93d26
JQ
2159
2160 /* extract RLE header */
2161 xh_flags = qemu_get_byte(f);
2162 xh_len = qemu_get_be16(f);
2163
2164 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2165 error_report("Failed to load XBZRLE page - wrong compression!");
2166 return -1;
2167 }
2168
2169 if (xh_len > TARGET_PAGE_SIZE) {
2170 error_report("Failed to load XBZRLE page - len overflow!");
2171 return -1;
2172 }
2173 /* load data and decode */
063e760a 2174 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2175
2176 /* decode RLE */
063e760a 2177 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2178 TARGET_PAGE_SIZE) == -1) {
2179 error_report("Failed to load XBZRLE page - decode error!");
2180 return -1;
2181 }
2182
2183 return 0;
2184}
2185
2186/* Must be called from within a rcu critical section.
2187 * Returns a pointer from within the RCU-protected ram_list.
2188 */
a7180877 2189/*
4c4bad48 2190 * Read a RAMBlock ID from the stream f.
a7180877
DDAG
2191 *
2192 * f: Stream to read from
a7180877
DDAG
2193 * flags: Page flags (mostly to see if it's a continuation of previous block)
2194 */
4c4bad48
HZ
2195static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2196 int flags)
56e93d26
JQ
2197{
2198 static RAMBlock *block = NULL;
2199 char id[256];
2200 uint8_t len;
2201
2202 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2203 if (!block) {
56e93d26
JQ
2204 error_report("Ack, bad migration stream!");
2205 return NULL;
2206 }
4c4bad48 2207 return block;
56e93d26
JQ
2208 }
2209
2210 len = qemu_get_byte(f);
2211 qemu_get_buffer(f, (uint8_t *)id, len);
2212 id[len] = 0;
2213
e3dd7493 2214 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2215 if (!block) {
2216 error_report("Can't find block %s", id);
2217 return NULL;
56e93d26
JQ
2218 }
2219
4c4bad48
HZ
2220 return block;
2221}
2222
2223static inline void *host_from_ram_block_offset(RAMBlock *block,
2224 ram_addr_t offset)
2225{
2226 if (!offset_in_ramblock(block, offset)) {
2227 return NULL;
2228 }
2229
2230 return block->host + offset;
56e93d26
JQ
2231}
2232
2233/*
2234 * If a page (or a whole RDMA chunk) has been
2235 * determined to be zero, then zap it.
2236 */
2237void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2238{
2239 if (ch != 0 || !is_zero_range(host, size)) {
2240 memset(host, ch, size);
2241 }
2242}
2243
2244static void *do_data_decompress(void *opaque)
2245{
2246 DecompressParam *param = opaque;
2247 unsigned long pagesize;
33d151f4
LL
2248 uint8_t *des;
2249 int len;
56e93d26 2250
33d151f4 2251 qemu_mutex_lock(&param->mutex);
90e56fb4 2252 while (!param->quit) {
33d151f4
LL
2253 if (param->des) {
2254 des = param->des;
2255 len = param->len;
2256 param->des = 0;
2257 qemu_mutex_unlock(&param->mutex);
2258
56e93d26 2259 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2260 /* uncompress() will return failed in some case, especially
2261 * when the page is dirted when doing the compression, it's
2262 * not a problem because the dirty page will be retransferred
2263 * and uncompress() won't break the data in other pages.
2264 */
33d151f4
LL
2265 uncompress((Bytef *)des, &pagesize,
2266 (const Bytef *)param->compbuf, len);
73a8912b 2267
33d151f4
LL
2268 qemu_mutex_lock(&decomp_done_lock);
2269 param->done = true;
2270 qemu_cond_signal(&decomp_done_cond);
2271 qemu_mutex_unlock(&decomp_done_lock);
2272
2273 qemu_mutex_lock(&param->mutex);
2274 } else {
2275 qemu_cond_wait(&param->cond, &param->mutex);
2276 }
56e93d26 2277 }
33d151f4 2278 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2279
2280 return NULL;
2281}
2282
5533b2e9
LL
2283static void wait_for_decompress_done(void)
2284{
2285 int idx, thread_count;
2286
2287 if (!migrate_use_compression()) {
2288 return;
2289 }
2290
2291 thread_count = migrate_decompress_threads();
2292 qemu_mutex_lock(&decomp_done_lock);
2293 for (idx = 0; idx < thread_count; idx++) {
2294 while (!decomp_param[idx].done) {
2295 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2296 }
2297 }
2298 qemu_mutex_unlock(&decomp_done_lock);
2299}
2300
56e93d26
JQ
2301void migrate_decompress_threads_create(void)
2302{
2303 int i, thread_count;
2304
2305 thread_count = migrate_decompress_threads();
2306 decompress_threads = g_new0(QemuThread, thread_count);
2307 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2308 qemu_mutex_init(&decomp_done_lock);
2309 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2310 for (i = 0; i < thread_count; i++) {
2311 qemu_mutex_init(&decomp_param[i].mutex);
2312 qemu_cond_init(&decomp_param[i].cond);
2313 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2314 decomp_param[i].done = true;
90e56fb4 2315 decomp_param[i].quit = false;
56e93d26
JQ
2316 qemu_thread_create(decompress_threads + i, "decompress",
2317 do_data_decompress, decomp_param + i,
2318 QEMU_THREAD_JOINABLE);
2319 }
2320}
2321
2322void migrate_decompress_threads_join(void)
2323{
2324 int i, thread_count;
2325
56e93d26
JQ
2326 thread_count = migrate_decompress_threads();
2327 for (i = 0; i < thread_count; i++) {
2328 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2329 decomp_param[i].quit = true;
56e93d26
JQ
2330 qemu_cond_signal(&decomp_param[i].cond);
2331 qemu_mutex_unlock(&decomp_param[i].mutex);
2332 }
2333 for (i = 0; i < thread_count; i++) {
2334 qemu_thread_join(decompress_threads + i);
2335 qemu_mutex_destroy(&decomp_param[i].mutex);
2336 qemu_cond_destroy(&decomp_param[i].cond);
2337 g_free(decomp_param[i].compbuf);
2338 }
2339 g_free(decompress_threads);
2340 g_free(decomp_param);
56e93d26
JQ
2341 decompress_threads = NULL;
2342 decomp_param = NULL;
56e93d26
JQ
2343}
2344
c1bc6626 2345static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2346 void *host, int len)
2347{
2348 int idx, thread_count;
2349
2350 thread_count = migrate_decompress_threads();
73a8912b 2351 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2352 while (true) {
2353 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2354 if (decomp_param[idx].done) {
33d151f4
LL
2355 decomp_param[idx].done = false;
2356 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2357 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2358 decomp_param[idx].des = host;
2359 decomp_param[idx].len = len;
33d151f4
LL
2360 qemu_cond_signal(&decomp_param[idx].cond);
2361 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2362 break;
2363 }
2364 }
2365 if (idx < thread_count) {
2366 break;
73a8912b
LL
2367 } else {
2368 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2369 }
2370 }
73a8912b 2371 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2372}
2373
1caddf8a
DDAG
2374/*
2375 * Allocate data structures etc needed by incoming migration with postcopy-ram
2376 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2377 */
2378int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2379{
2380 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2381
2382 return postcopy_ram_incoming_init(mis, ram_pages);
2383}
2384
a7180877
DDAG
2385/*
2386 * Called in postcopy mode by ram_load().
2387 * rcu_read_lock is taken prior to this being called.
2388 */
2389static int ram_load_postcopy(QEMUFile *f)
2390{
2391 int flags = 0, ret = 0;
2392 bool place_needed = false;
28abd200 2393 bool matching_page_sizes = false;
a7180877
DDAG
2394 MigrationIncomingState *mis = migration_incoming_get_current();
2395 /* Temporary page that is later 'placed' */
2396 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2397 void *last_host = NULL;
a3b6ff6d 2398 bool all_zero = false;
a7180877
DDAG
2399
2400 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2401 ram_addr_t addr;
2402 void *host = NULL;
2403 void *page_buffer = NULL;
2404 void *place_source = NULL;
df9ff5e1 2405 RAMBlock *block = NULL;
a7180877 2406 uint8_t ch;
a7180877
DDAG
2407
2408 addr = qemu_get_be64(f);
2409 flags = addr & ~TARGET_PAGE_MASK;
2410 addr &= TARGET_PAGE_MASK;
2411
2412 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2413 place_needed = false;
2414 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2415 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2416
2417 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2418 if (!host) {
2419 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2420 ret = -EINVAL;
2421 break;
2422 }
28abd200 2423 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2424 /*
28abd200
DDAG
2425 * Postcopy requires that we place whole host pages atomically;
2426 * these may be huge pages for RAMBlocks that are backed by
2427 * hugetlbfs.
a7180877
DDAG
2428 * To make it atomic, the data is read into a temporary page
2429 * that's moved into place later.
2430 * The migration protocol uses, possibly smaller, target-pages
2431 * however the source ensures it always sends all the components
2432 * of a host page in order.
2433 */
2434 page_buffer = postcopy_host_page +
28abd200 2435 ((uintptr_t)host & (block->page_size - 1));
a7180877 2436 /* If all TP are zero then we can optimise the place */
28abd200 2437 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2438 all_zero = true;
c53b7ddc
DDAG
2439 } else {
2440 /* not the 1st TP within the HP */
2441 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2442 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2443 host, last_host);
2444 ret = -EINVAL;
2445 break;
2446 }
a7180877
DDAG
2447 }
2448
c53b7ddc 2449
a7180877
DDAG
2450 /*
2451 * If it's the last part of a host page then we place the host
2452 * page
2453 */
2454 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2455 (block->page_size - 1)) == 0;
a7180877
DDAG
2456 place_source = postcopy_host_page;
2457 }
c53b7ddc 2458 last_host = host;
a7180877
DDAG
2459
2460 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2461 case RAM_SAVE_FLAG_COMPRESS:
2462 ch = qemu_get_byte(f);
2463 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2464 if (ch) {
2465 all_zero = false;
2466 }
2467 break;
2468
2469 case RAM_SAVE_FLAG_PAGE:
2470 all_zero = false;
2471 if (!place_needed || !matching_page_sizes) {
2472 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2473 } else {
2474 /* Avoids the qemu_file copy during postcopy, which is
2475 * going to do a copy later; can only do it when we
2476 * do this read in one go (matching page sizes)
2477 */
2478 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2479 TARGET_PAGE_SIZE);
2480 }
2481 break;
2482 case RAM_SAVE_FLAG_EOS:
2483 /* normal exit */
2484 break;
2485 default:
2486 error_report("Unknown combination of migration flags: %#x"
2487 " (postcopy mode)", flags);
2488 ret = -EINVAL;
2489 }
2490
2491 if (place_needed) {
2492 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2493 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2494
a7180877 2495 if (all_zero) {
df9ff5e1
DDAG
2496 ret = postcopy_place_page_zero(mis, place_dest,
2497 block->page_size);
a7180877 2498 } else {
df9ff5e1
DDAG
2499 ret = postcopy_place_page(mis, place_dest,
2500 place_source, block->page_size);
a7180877
DDAG
2501 }
2502 }
2503 if (!ret) {
2504 ret = qemu_file_get_error(f);
2505 }
2506 }
2507
2508 return ret;
2509}
2510
56e93d26
JQ
2511static int ram_load(QEMUFile *f, void *opaque, int version_id)
2512{
2513 int flags = 0, ret = 0;
2514 static uint64_t seq_iter;
2515 int len = 0;
a7180877
DDAG
2516 /*
2517 * If system is running in postcopy mode, page inserts to host memory must
2518 * be atomic
2519 */
2520 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
ef08fb38
DDAG
2521 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2522 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
56e93d26
JQ
2523
2524 seq_iter++;
2525
2526 if (version_id != 4) {
2527 ret = -EINVAL;
2528 }
2529
2530 /* This RCU critical section can be very long running.
2531 * When RCU reclaims in the code start to become numerous,
2532 * it will be necessary to reduce the granularity of this
2533 * critical section.
2534 */
2535 rcu_read_lock();
a7180877
DDAG
2536
2537 if (postcopy_running) {
2538 ret = ram_load_postcopy(f);
2539 }
2540
2541 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2542 ram_addr_t addr, total_ram_bytes;
a776aa15 2543 void *host = NULL;
56e93d26
JQ
2544 uint8_t ch;
2545
2546 addr = qemu_get_be64(f);
2547 flags = addr & ~TARGET_PAGE_MASK;
2548 addr &= TARGET_PAGE_MASK;
2549
a776aa15
DDAG
2550 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2551 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2552 RAMBlock *block = ram_block_from_stream(f, flags);
2553
2554 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2555 if (!host) {
2556 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2557 ret = -EINVAL;
2558 break;
2559 }
2560 }
2561
56e93d26
JQ
2562 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2563 case RAM_SAVE_FLAG_MEM_SIZE:
2564 /* Synchronize RAM block list */
2565 total_ram_bytes = addr;
2566 while (!ret && total_ram_bytes) {
2567 RAMBlock *block;
56e93d26
JQ
2568 char id[256];
2569 ram_addr_t length;
2570
2571 len = qemu_get_byte(f);
2572 qemu_get_buffer(f, (uint8_t *)id, len);
2573 id[len] = 0;
2574 length = qemu_get_be64(f);
2575
e3dd7493
DDAG
2576 block = qemu_ram_block_by_name(id);
2577 if (block) {
2578 if (length != block->used_length) {
2579 Error *local_err = NULL;
56e93d26 2580
fa53a0e5 2581 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2582 &local_err);
2583 if (local_err) {
2584 error_report_err(local_err);
56e93d26 2585 }
56e93d26 2586 }
ef08fb38
DDAG
2587 /* For postcopy we need to check hugepage sizes match */
2588 if (postcopy_advised &&
2589 block->page_size != qemu_host_page_size) {
2590 uint64_t remote_page_size = qemu_get_be64(f);
2591 if (remote_page_size != block->page_size) {
2592 error_report("Mismatched RAM page size %s "
2593 "(local) %zd != %" PRId64,
2594 id, block->page_size,
2595 remote_page_size);
2596 ret = -EINVAL;
2597 }
2598 }
e3dd7493
DDAG
2599 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2600 block->idstr);
2601 } else {
56e93d26
JQ
2602 error_report("Unknown ramblock \"%s\", cannot "
2603 "accept migration", id);
2604 ret = -EINVAL;
2605 }
2606
2607 total_ram_bytes -= length;
2608 }
2609 break;
a776aa15 2610
56e93d26 2611 case RAM_SAVE_FLAG_COMPRESS:
56e93d26
JQ
2612 ch = qemu_get_byte(f);
2613 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2614 break;
a776aa15 2615
56e93d26 2616 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2617 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2618 break;
56e93d26 2619
a776aa15 2620 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2621 len = qemu_get_be32(f);
2622 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2623 error_report("Invalid compressed data length: %d", len);
2624 ret = -EINVAL;
2625 break;
2626 }
c1bc6626 2627 decompress_data_with_multi_threads(f, host, len);
56e93d26 2628 break;
a776aa15 2629
56e93d26 2630 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2631 if (load_xbzrle(f, addr, host) < 0) {
2632 error_report("Failed to decompress XBZRLE page at "
2633 RAM_ADDR_FMT, addr);
2634 ret = -EINVAL;
2635 break;
2636 }
2637 break;
2638 case RAM_SAVE_FLAG_EOS:
2639 /* normal exit */
2640 break;
2641 default:
2642 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2643 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2644 } else {
2645 error_report("Unknown combination of migration flags: %#x",
2646 flags);
2647 ret = -EINVAL;
2648 }
2649 }
2650 if (!ret) {
2651 ret = qemu_file_get_error(f);
2652 }
2653 }
2654
5533b2e9 2655 wait_for_decompress_done();
56e93d26 2656 rcu_read_unlock();
55c4446b 2657 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
2658 return ret;
2659}
2660
2661static SaveVMHandlers savevm_ram_handlers = {
2662 .save_live_setup = ram_save_setup,
2663 .save_live_iterate = ram_save_iterate,
763c906b 2664 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 2665 .save_live_complete_precopy = ram_save_complete,
56e93d26
JQ
2666 .save_live_pending = ram_save_pending,
2667 .load_state = ram_load,
6ad2a215 2668 .cleanup = ram_migration_cleanup,
56e93d26
JQ
2669};
2670
2671void ram_mig_init(void)
2672{
2673 qemu_mutex_init(&XBZRLE.lock);
2674 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2675}