]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
hmp: fix "dump-quest-memory" segfault
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
1393a485 28#include "qemu/osdep.h"
33c11879
PB
29#include "qemu-common.h"
30#include "cpu.h"
56e93d26 31#include <zlib.h>
4addcd4f 32#include "qapi-event.h"
f348b6d1 33#include "qemu/cutils.h"
56e93d26
JQ
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
7205c9ec
JQ
36#include "qemu/timer.h"
37#include "qemu/main-loop.h"
56e93d26 38#include "migration/migration.h"
e0b266f0 39#include "migration/postcopy-ram.h"
56e93d26
JQ
40#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
56e93d26 42#include "qemu/error-report.h"
56e93d26 43#include "trace.h"
56e93d26 44#include "exec/ram_addr.h"
56e93d26 45#include "qemu/rcu_queue.h"
a91246c9 46#include "migration/colo.h"
56e93d26 47
56e93d26 48static int dirty_rate_high_cnt;
56e93d26
JQ
49
50static uint64_t bitmap_sync_count;
51
52/***********************************************************/
53/* ram save/restore */
54
55#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
56#define RAM_SAVE_FLAG_COMPRESS 0x02
57#define RAM_SAVE_FLAG_MEM_SIZE 0x04
58#define RAM_SAVE_FLAG_PAGE 0x08
59#define RAM_SAVE_FLAG_EOS 0x10
60#define RAM_SAVE_FLAG_CONTINUE 0x20
61#define RAM_SAVE_FLAG_XBZRLE 0x40
62/* 0x80 is reserved in migration.h start with 0x100 next */
63#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
64
adb65dec 65static uint8_t *ZERO_TARGET_PAGE;
56e93d26
JQ
66
67static inline bool is_zero_range(uint8_t *p, uint64_t size)
68{
a1febc49 69 return buffer_is_zero(p, size);
56e93d26
JQ
70}
71
72/* struct contains XBZRLE cache and a static page
73 used by the compression */
74static struct {
75 /* buffer used for XBZRLE encoding */
76 uint8_t *encoded_buf;
77 /* buffer for storing page content */
78 uint8_t *current_buf;
79 /* Cache for XBZRLE, Protected by lock. */
80 PageCache *cache;
81 QemuMutex lock;
82} XBZRLE;
83
84/* buffer used for XBZRLE decoding */
85static uint8_t *xbzrle_decoded_buf;
86
87static void XBZRLE_cache_lock(void)
88{
89 if (migrate_use_xbzrle())
90 qemu_mutex_lock(&XBZRLE.lock);
91}
92
93static void XBZRLE_cache_unlock(void)
94{
95 if (migrate_use_xbzrle())
96 qemu_mutex_unlock(&XBZRLE.lock);
97}
98
99/*
100 * called from qmp_migrate_set_cache_size in main thread, possibly while
101 * a migration is in progress.
102 * A running migration maybe using the cache and might finish during this
103 * call, hence changes to the cache are protected by XBZRLE.lock().
104 */
105int64_t xbzrle_cache_resize(int64_t new_size)
106{
107 PageCache *new_cache;
108 int64_t ret;
109
110 if (new_size < TARGET_PAGE_SIZE) {
111 return -1;
112 }
113
114 XBZRLE_cache_lock();
115
116 if (XBZRLE.cache != NULL) {
117 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
118 goto out_new_size;
119 }
120 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
121 TARGET_PAGE_SIZE);
122 if (!new_cache) {
123 error_report("Error creating cache");
124 ret = -1;
125 goto out;
126 }
127
128 cache_fini(XBZRLE.cache);
129 XBZRLE.cache = new_cache;
130 }
131
132out_new_size:
133 ret = pow2floor(new_size);
134out:
135 XBZRLE_cache_unlock();
136 return ret;
137}
138
139/* accounting for migration statistics */
140typedef struct AccountingInfo {
141 uint64_t dup_pages;
142 uint64_t skipped_pages;
143 uint64_t norm_pages;
144 uint64_t iterations;
145 uint64_t xbzrle_bytes;
146 uint64_t xbzrle_pages;
147 uint64_t xbzrle_cache_miss;
148 double xbzrle_cache_miss_rate;
149 uint64_t xbzrle_overflows;
150} AccountingInfo;
151
152static AccountingInfo acct_info;
153
154static void acct_clear(void)
155{
156 memset(&acct_info, 0, sizeof(acct_info));
157}
158
159uint64_t dup_mig_bytes_transferred(void)
160{
161 return acct_info.dup_pages * TARGET_PAGE_SIZE;
162}
163
164uint64_t dup_mig_pages_transferred(void)
165{
166 return acct_info.dup_pages;
167}
168
169uint64_t skipped_mig_bytes_transferred(void)
170{
171 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
172}
173
174uint64_t skipped_mig_pages_transferred(void)
175{
176 return acct_info.skipped_pages;
177}
178
179uint64_t norm_mig_bytes_transferred(void)
180{
181 return acct_info.norm_pages * TARGET_PAGE_SIZE;
182}
183
184uint64_t norm_mig_pages_transferred(void)
185{
186 return acct_info.norm_pages;
187}
188
189uint64_t xbzrle_mig_bytes_transferred(void)
190{
191 return acct_info.xbzrle_bytes;
192}
193
194uint64_t xbzrle_mig_pages_transferred(void)
195{
196 return acct_info.xbzrle_pages;
197}
198
199uint64_t xbzrle_mig_pages_cache_miss(void)
200{
201 return acct_info.xbzrle_cache_miss;
202}
203
204double xbzrle_mig_cache_miss_rate(void)
205{
206 return acct_info.xbzrle_cache_miss_rate;
207}
208
209uint64_t xbzrle_mig_pages_overflow(void)
210{
211 return acct_info.xbzrle_overflows;
212}
213
214/* This is the last block that we have visited serching for dirty pages
215 */
216static RAMBlock *last_seen_block;
217/* This is the last block from where we have sent data */
218static RAMBlock *last_sent_block;
219static ram_addr_t last_offset;
dd631697 220static QemuMutex migration_bitmap_mutex;
56e93d26
JQ
221static uint64_t migration_dirty_pages;
222static uint32_t last_version;
223static bool ram_bulk_stage;
224
b8fb8cb7
DDAG
225/* used by the search for pages to send */
226struct PageSearchStatus {
227 /* Current block being searched */
228 RAMBlock *block;
229 /* Current offset to search from */
230 ram_addr_t offset;
231 /* Set once we wrap around */
232 bool complete_round;
233};
234typedef struct PageSearchStatus PageSearchStatus;
235
60be6340
DL
236static struct BitmapRcu {
237 struct rcu_head rcu;
f3f491fc 238 /* Main migration bitmap */
60be6340 239 unsigned long *bmap;
f3f491fc
DDAG
240 /* bitmap of pages that haven't been sent even once
241 * only maintained and used in postcopy at the moment
242 * where it's used to send the dirtymap at the start
243 * of the postcopy phase
244 */
245 unsigned long *unsentmap;
60be6340
DL
246} *migration_bitmap_rcu;
247
56e93d26 248struct CompressParam {
56e93d26 249 bool done;
90e56fb4 250 bool quit;
56e93d26
JQ
251 QEMUFile *file;
252 QemuMutex mutex;
253 QemuCond cond;
254 RAMBlock *block;
255 ram_addr_t offset;
256};
257typedef struct CompressParam CompressParam;
258
259struct DecompressParam {
73a8912b 260 bool done;
90e56fb4 261 bool quit;
56e93d26
JQ
262 QemuMutex mutex;
263 QemuCond cond;
264 void *des;
d341d9f3 265 uint8_t *compbuf;
56e93d26
JQ
266 int len;
267};
268typedef struct DecompressParam DecompressParam;
269
270static CompressParam *comp_param;
271static QemuThread *compress_threads;
272/* comp_done_cond is used to wake up the migration thread when
273 * one of the compression threads has finished the compression.
274 * comp_done_lock is used to co-work with comp_done_cond.
275 */
0d9f9a5c
LL
276static QemuMutex comp_done_lock;
277static QemuCond comp_done_cond;
56e93d26
JQ
278/* The empty QEMUFileOps will be used by file in CompressParam */
279static const QEMUFileOps empty_ops = { };
280
281static bool compression_switch;
56e93d26
JQ
282static DecompressParam *decomp_param;
283static QemuThread *decompress_threads;
73a8912b
LL
284static QemuMutex decomp_done_lock;
285static QemuCond decomp_done_cond;
56e93d26 286
a7a9a88f
LL
287static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
288 ram_addr_t offset);
56e93d26
JQ
289
290static void *do_data_compress(void *opaque)
291{
292 CompressParam *param = opaque;
a7a9a88f
LL
293 RAMBlock *block;
294 ram_addr_t offset;
56e93d26 295
a7a9a88f 296 qemu_mutex_lock(&param->mutex);
90e56fb4 297 while (!param->quit) {
a7a9a88f
LL
298 if (param->block) {
299 block = param->block;
300 offset = param->offset;
301 param->block = NULL;
302 qemu_mutex_unlock(&param->mutex);
303
304 do_compress_ram_page(param->file, block, offset);
305
0d9f9a5c 306 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 307 param->done = true;
0d9f9a5c
LL
308 qemu_cond_signal(&comp_done_cond);
309 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
310
311 qemu_mutex_lock(&param->mutex);
312 } else {
56e93d26
JQ
313 qemu_cond_wait(&param->cond, &param->mutex);
314 }
56e93d26 315 }
a7a9a88f 316 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
317
318 return NULL;
319}
320
321static inline void terminate_compression_threads(void)
322{
323 int idx, thread_count;
324
325 thread_count = migrate_compress_threads();
56e93d26
JQ
326 for (idx = 0; idx < thread_count; idx++) {
327 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 328 comp_param[idx].quit = true;
56e93d26
JQ
329 qemu_cond_signal(&comp_param[idx].cond);
330 qemu_mutex_unlock(&comp_param[idx].mutex);
331 }
332}
333
334void migrate_compress_threads_join(void)
335{
336 int i, thread_count;
337
338 if (!migrate_use_compression()) {
339 return;
340 }
341 terminate_compression_threads();
342 thread_count = migrate_compress_threads();
343 for (i = 0; i < thread_count; i++) {
344 qemu_thread_join(compress_threads + i);
345 qemu_fclose(comp_param[i].file);
346 qemu_mutex_destroy(&comp_param[i].mutex);
347 qemu_cond_destroy(&comp_param[i].cond);
348 }
0d9f9a5c
LL
349 qemu_mutex_destroy(&comp_done_lock);
350 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
351 g_free(compress_threads);
352 g_free(comp_param);
56e93d26
JQ
353 compress_threads = NULL;
354 comp_param = NULL;
56e93d26
JQ
355}
356
357void migrate_compress_threads_create(void)
358{
359 int i, thread_count;
360
361 if (!migrate_use_compression()) {
362 return;
363 }
56e93d26
JQ
364 compression_switch = true;
365 thread_count = migrate_compress_threads();
366 compress_threads = g_new0(QemuThread, thread_count);
367 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
368 qemu_cond_init(&comp_done_cond);
369 qemu_mutex_init(&comp_done_lock);
56e93d26 370 for (i = 0; i < thread_count; i++) {
e110aa91
C
371 /* comp_param[i].file is just used as a dummy buffer to save data,
372 * set its ops to empty.
56e93d26
JQ
373 */
374 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
375 comp_param[i].done = true;
90e56fb4 376 comp_param[i].quit = false;
56e93d26
JQ
377 qemu_mutex_init(&comp_param[i].mutex);
378 qemu_cond_init(&comp_param[i].cond);
379 qemu_thread_create(compress_threads + i, "compress",
380 do_data_compress, comp_param + i,
381 QEMU_THREAD_JOINABLE);
382 }
383}
384
385/**
386 * save_page_header: Write page header to wire
387 *
388 * If this is the 1st block, it also writes the block identification
389 *
390 * Returns: Number of bytes written
391 *
392 * @f: QEMUFile where to send the data
393 * @block: block that contains the page we want to send
394 * @offset: offset inside the block for the page
395 * in the lower bits, it contains flags
396 */
397static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
398{
9f5f380b 399 size_t size, len;
56e93d26
JQ
400
401 qemu_put_be64(f, offset);
402 size = 8;
403
404 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b
LL
405 len = strlen(block->idstr);
406 qemu_put_byte(f, len);
407 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
408 size += 1 + len;
56e93d26
JQ
409 }
410 return size;
411}
412
070afca2
JH
413/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
414 * If guest dirty memory rate is reduced below the rate at which we can
415 * transfer pages to the destination then we should be able to complete
416 * migration. Some workloads dirty memory way too fast and will not effectively
417 * converge, even with auto-converge.
418 */
419static void mig_throttle_guest_down(void)
420{
421 MigrationState *s = migrate_get_current();
2594f56d
DB
422 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
423 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
424
425 /* We have not started throttling yet. Let's start it. */
426 if (!cpu_throttle_active()) {
427 cpu_throttle_set(pct_initial);
428 } else {
429 /* Throttling already on, just increase the rate */
430 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
431 }
432}
433
56e93d26
JQ
434/* Update the xbzrle cache to reflect a page that's been sent as all 0.
435 * The important thing is that a stale (not-yet-0'd) page be replaced
436 * by the new data.
437 * As a bonus, if the page wasn't in the cache it gets added so that
438 * when a small write is made into the 0'd page it gets XBZRLE sent
439 */
440static void xbzrle_cache_zero_page(ram_addr_t current_addr)
441{
442 if (ram_bulk_stage || !migrate_use_xbzrle()) {
443 return;
444 }
445
446 /* We don't care if this fails to allocate a new cache page
447 * as long as it updated an old one */
448 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
449 bitmap_sync_count);
450}
451
452#define ENCODING_FLAG_XBZRLE 0x1
453
454/**
455 * save_xbzrle_page: compress and send current page
456 *
457 * Returns: 1 means that we wrote the page
458 * 0 means that page is identical to the one already sent
459 * -1 means that xbzrle would be longer than normal
460 *
461 * @f: QEMUFile where to send the data
462 * @current_data:
463 * @current_addr:
464 * @block: block that contains the page we want to send
465 * @offset: offset inside the block for the page
466 * @last_stage: if we are at the completion stage
467 * @bytes_transferred: increase it with the number of transferred bytes
468 */
469static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
470 ram_addr_t current_addr, RAMBlock *block,
471 ram_addr_t offset, bool last_stage,
472 uint64_t *bytes_transferred)
473{
474 int encoded_len = 0, bytes_xbzrle;
475 uint8_t *prev_cached_page;
476
477 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
478 acct_info.xbzrle_cache_miss++;
479 if (!last_stage) {
480 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
481 bitmap_sync_count) == -1) {
482 return -1;
483 } else {
484 /* update *current_data when the page has been
485 inserted into cache */
486 *current_data = get_cached_data(XBZRLE.cache, current_addr);
487 }
488 }
489 return -1;
490 }
491
492 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
493
494 /* save current buffer into memory */
495 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
496
497 /* XBZRLE encoding (if there is no overflow) */
498 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
499 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
500 TARGET_PAGE_SIZE);
501 if (encoded_len == 0) {
55c4446b 502 trace_save_xbzrle_page_skipping();
56e93d26
JQ
503 return 0;
504 } else if (encoded_len == -1) {
55c4446b 505 trace_save_xbzrle_page_overflow();
56e93d26
JQ
506 acct_info.xbzrle_overflows++;
507 /* update data in the cache */
508 if (!last_stage) {
509 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
510 *current_data = prev_cached_page;
511 }
512 return -1;
513 }
514
515 /* we need to update the data in the cache, in order to get the same data */
516 if (!last_stage) {
517 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
518 }
519
520 /* Send XBZRLE based compressed page */
521 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
522 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
523 qemu_put_be16(f, encoded_len);
524 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
525 bytes_xbzrle += encoded_len + 1 + 2;
526 acct_info.xbzrle_pages++;
527 acct_info.xbzrle_bytes += bytes_xbzrle;
528 *bytes_transferred += bytes_xbzrle;
529
530 return 1;
531}
532
f3f491fc
DDAG
533/* Called with rcu_read_lock() to protect migration_bitmap
534 * rb: The RAMBlock to search for dirty pages in
535 * start: Start address (typically so we can continue from previous page)
536 * ram_addr_abs: Pointer into which to store the address of the dirty page
537 * within the global ram_addr space
538 *
539 * Returns: byte offset within memory region of the start of a dirty page
540 */
56e93d26 541static inline
a82d593b
DDAG
542ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
543 ram_addr_t start,
544 ram_addr_t *ram_addr_abs)
56e93d26 545{
2f68e399 546 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
56e93d26 547 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
2f68e399
DDAG
548 uint64_t rb_size = rb->used_length;
549 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
2ff64038 550 unsigned long *bitmap;
56e93d26
JQ
551
552 unsigned long next;
553
60be6340 554 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
56e93d26
JQ
555 if (ram_bulk_stage && nr > base) {
556 next = nr + 1;
557 } else {
2ff64038 558 next = find_next_bit(bitmap, size, nr);
56e93d26
JQ
559 }
560
f3f491fc 561 *ram_addr_abs = next << TARGET_PAGE_BITS;
56e93d26
JQ
562 return (next - base) << TARGET_PAGE_BITS;
563}
564
a82d593b
DDAG
565static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
566{
567 bool ret;
568 int nr = addr >> TARGET_PAGE_BITS;
569 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
570
571 ret = test_and_clear_bit(nr, bitmap);
572
573 if (ret) {
574 migration_dirty_pages--;
575 }
576 return ret;
577}
578
1ffb5dfd 579static int64_t num_dirty_pages_period;
56e93d26
JQ
580static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
581{
2ff64038 582 unsigned long *bitmap;
60be6340 583 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1ffb5dfd
CF
584 migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap,
585 start, length, &num_dirty_pages_period);
56e93d26
JQ
586}
587
56e93d26
JQ
588/* Fix me: there are too many global variables used in migration process. */
589static int64_t start_time;
590static int64_t bytes_xfer_prev;
56e93d26
JQ
591static uint64_t xbzrle_cache_miss_prev;
592static uint64_t iterations_prev;
593
594static void migration_bitmap_sync_init(void)
595{
596 start_time = 0;
597 bytes_xfer_prev = 0;
598 num_dirty_pages_period = 0;
599 xbzrle_cache_miss_prev = 0;
600 iterations_prev = 0;
601}
602
e8ca1db2
DDAG
603/* Returns a summary bitmap of the page sizes of all RAMBlocks;
604 * for VMs with just normal pages this is equivalent to the
605 * host page size. If it's got some huge pages then it's the OR
606 * of all the different page sizes.
607 */
608uint64_t ram_pagesize_summary(void)
609{
610 RAMBlock *block;
611 uint64_t summary = 0;
612
613 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
614 summary |= block->page_size;
615 }
616
617 return summary;
618}
619
56e93d26
JQ
620static void migration_bitmap_sync(void)
621{
622 RAMBlock *block;
56e93d26
JQ
623 MigrationState *s = migrate_get_current();
624 int64_t end_time;
625 int64_t bytes_xfer_now;
626
627 bitmap_sync_count++;
628
629 if (!bytes_xfer_prev) {
630 bytes_xfer_prev = ram_bytes_transferred();
631 }
632
633 if (!start_time) {
634 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
635 }
636
637 trace_migration_bitmap_sync_start();
9c1f8f44 638 memory_global_dirty_log_sync();
56e93d26 639
dd631697 640 qemu_mutex_lock(&migration_bitmap_mutex);
56e93d26
JQ
641 rcu_read_lock();
642 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2f68e399 643 migration_bitmap_sync_range(block->offset, block->used_length);
56e93d26
JQ
644 }
645 rcu_read_unlock();
dd631697 646 qemu_mutex_unlock(&migration_bitmap_mutex);
56e93d26 647
1ffb5dfd
CF
648 trace_migration_bitmap_sync_end(num_dirty_pages_period);
649
56e93d26
JQ
650 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
651
652 /* more than 1 second = 1000 millisecons */
653 if (end_time > start_time + 1000) {
654 if (migrate_auto_converge()) {
655 /* The following detection logic can be refined later. For now:
656 Check to see if the dirtied bytes is 50% more than the approx.
657 amount of bytes that just got transferred since the last time we
070afca2
JH
658 were in this routine. If that happens twice, start or increase
659 throttling */
56e93d26 660 bytes_xfer_now = ram_bytes_transferred();
070afca2 661
56e93d26
JQ
662 if (s->dirty_pages_rate &&
663 (num_dirty_pages_period * TARGET_PAGE_SIZE >
664 (bytes_xfer_now - bytes_xfer_prev)/2) &&
070afca2 665 (dirty_rate_high_cnt++ >= 2)) {
56e93d26 666 trace_migration_throttle();
56e93d26 667 dirty_rate_high_cnt = 0;
070afca2 668 mig_throttle_guest_down();
56e93d26
JQ
669 }
670 bytes_xfer_prev = bytes_xfer_now;
56e93d26 671 }
070afca2 672
56e93d26
JQ
673 if (migrate_use_xbzrle()) {
674 if (iterations_prev != acct_info.iterations) {
675 acct_info.xbzrle_cache_miss_rate =
676 (double)(acct_info.xbzrle_cache_miss -
677 xbzrle_cache_miss_prev) /
678 (acct_info.iterations - iterations_prev);
679 }
680 iterations_prev = acct_info.iterations;
681 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
682 }
683 s->dirty_pages_rate = num_dirty_pages_period * 1000
684 / (end_time - start_time);
685 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
686 start_time = end_time;
687 num_dirty_pages_period = 0;
688 }
689 s->dirty_sync_count = bitmap_sync_count;
4addcd4f
DDAG
690 if (migrate_use_events()) {
691 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
692 }
56e93d26
JQ
693}
694
695/**
696 * save_zero_page: Send the zero page to the stream
697 *
698 * Returns: Number of pages written.
699 *
700 * @f: QEMUFile where to send the data
701 * @block: block that contains the page we want to send
702 * @offset: offset inside the block for the page
703 * @p: pointer to the page
704 * @bytes_transferred: increase it with the number of transferred bytes
705 */
706static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
707 uint8_t *p, uint64_t *bytes_transferred)
708{
709 int pages = -1;
710
711 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
712 acct_info.dup_pages++;
713 *bytes_transferred += save_page_header(f, block,
714 offset | RAM_SAVE_FLAG_COMPRESS);
715 qemu_put_byte(f, 0);
716 *bytes_transferred += 1;
717 pages = 1;
718 }
719
720 return pages;
721}
722
53f09a10
PB
723static void ram_release_pages(MigrationState *ms, const char *block_name,
724 uint64_t offset, int pages)
725{
726 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
727 return;
728 }
729
730 ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
731}
732
56e93d26
JQ
733/**
734 * ram_save_page: Send the given page to the stream
735 *
736 * Returns: Number of pages written.
3fd3c4b3
DDAG
737 * < 0 - error
738 * >=0 - Number of pages written - this might legally be 0
739 * if xbzrle noticed the page was the same.
56e93d26 740 *
9eb14766 741 * @ms: The current migration state.
56e93d26
JQ
742 * @f: QEMUFile where to send the data
743 * @block: block that contains the page we want to send
744 * @offset: offset inside the block for the page
745 * @last_stage: if we are at the completion stage
746 * @bytes_transferred: increase it with the number of transferred bytes
747 */
9eb14766 748static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
56e93d26
JQ
749 bool last_stage, uint64_t *bytes_transferred)
750{
751 int pages = -1;
752 uint64_t bytes_xmit;
753 ram_addr_t current_addr;
56e93d26
JQ
754 uint8_t *p;
755 int ret;
756 bool send_async = true;
a08f6890
HZ
757 RAMBlock *block = pss->block;
758 ram_addr_t offset = pss->offset;
56e93d26 759
2f68e399 760 p = block->host + offset;
56e93d26
JQ
761
762 /* In doubt sent page as normal */
763 bytes_xmit = 0;
764 ret = ram_control_save_page(f, block->offset,
765 offset, TARGET_PAGE_SIZE, &bytes_xmit);
766 if (bytes_xmit) {
767 *bytes_transferred += bytes_xmit;
768 pages = 1;
769 }
770
771 XBZRLE_cache_lock();
772
773 current_addr = block->offset + offset;
774
775 if (block == last_sent_block) {
776 offset |= RAM_SAVE_FLAG_CONTINUE;
777 }
778 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
779 if (ret != RAM_SAVE_CONTROL_DELAYED) {
780 if (bytes_xmit > 0) {
781 acct_info.norm_pages++;
782 } else if (bytes_xmit == 0) {
783 acct_info.dup_pages++;
784 }
785 }
786 } else {
787 pages = save_zero_page(f, block, offset, p, bytes_transferred);
788 if (pages > 0) {
789 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
790 * page would be stale
791 */
792 xbzrle_cache_zero_page(current_addr);
53f09a10 793 ram_release_pages(ms, block->idstr, pss->offset, pages);
2ebeaec0 794 } else if (!ram_bulk_stage &&
9eb14766 795 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
56e93d26
JQ
796 pages = save_xbzrle_page(f, &p, current_addr, block,
797 offset, last_stage, bytes_transferred);
798 if (!last_stage) {
799 /* Can't send this cached data async, since the cache page
800 * might get updated before it gets to the wire
801 */
802 send_async = false;
803 }
804 }
805 }
806
807 /* XBZRLE overflow or normal page */
808 if (pages == -1) {
809 *bytes_transferred += save_page_header(f, block,
810 offset | RAM_SAVE_FLAG_PAGE);
811 if (send_async) {
53f09a10
PB
812 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
813 migrate_release_ram() &
814 migration_in_postcopy(ms));
56e93d26
JQ
815 } else {
816 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
817 }
818 *bytes_transferred += TARGET_PAGE_SIZE;
819 pages = 1;
820 acct_info.norm_pages++;
821 }
822
823 XBZRLE_cache_unlock();
824
825 return pages;
826}
827
a7a9a88f
LL
828static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
829 ram_addr_t offset)
56e93d26
JQ
830{
831 int bytes_sent, blen;
a7a9a88f 832 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 833
a7a9a88f 834 bytes_sent = save_page_header(f, block, offset |
56e93d26 835 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 836 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 837 migrate_compress_level());
b3be2896
LL
838 if (blen < 0) {
839 bytes_sent = 0;
840 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
841 error_report("compressed data failed!");
842 } else {
843 bytes_sent += blen;
53f09a10
PB
844 ram_release_pages(migrate_get_current(), block->idstr,
845 offset & TARGET_PAGE_MASK, 1);
b3be2896 846 }
56e93d26
JQ
847
848 return bytes_sent;
849}
850
56e93d26
JQ
851static uint64_t bytes_transferred;
852
853static void flush_compressed_data(QEMUFile *f)
854{
855 int idx, len, thread_count;
856
857 if (!migrate_use_compression()) {
858 return;
859 }
860 thread_count = migrate_compress_threads();
a7a9a88f 861
0d9f9a5c 862 qemu_mutex_lock(&comp_done_lock);
56e93d26 863 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 864 while (!comp_param[idx].done) {
0d9f9a5c 865 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 866 }
a7a9a88f 867 }
0d9f9a5c 868 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
869
870 for (idx = 0; idx < thread_count; idx++) {
871 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 872 if (!comp_param[idx].quit) {
56e93d26
JQ
873 len = qemu_put_qemu_file(f, comp_param[idx].file);
874 bytes_transferred += len;
875 }
a7a9a88f 876 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
877 }
878}
879
880static inline void set_compress_params(CompressParam *param, RAMBlock *block,
881 ram_addr_t offset)
882{
883 param->block = block;
884 param->offset = offset;
885}
886
887static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
888 ram_addr_t offset,
889 uint64_t *bytes_transferred)
890{
891 int idx, thread_count, bytes_xmit = -1, pages = -1;
892
893 thread_count = migrate_compress_threads();
0d9f9a5c 894 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
895 while (true) {
896 for (idx = 0; idx < thread_count; idx++) {
897 if (comp_param[idx].done) {
a7a9a88f 898 comp_param[idx].done = false;
56e93d26 899 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
a7a9a88f 900 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 901 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
902 qemu_cond_signal(&comp_param[idx].cond);
903 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
904 pages = 1;
905 acct_info.norm_pages++;
906 *bytes_transferred += bytes_xmit;
907 break;
908 }
909 }
910 if (pages > 0) {
911 break;
912 } else {
0d9f9a5c 913 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
914 }
915 }
0d9f9a5c 916 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
917
918 return pages;
919}
920
921/**
922 * ram_save_compressed_page: compress the given page and send it to the stream
923 *
924 * Returns: Number of pages written.
925 *
9eb14766 926 * @ms: The current migration state.
56e93d26
JQ
927 * @f: QEMUFile where to send the data
928 * @block: block that contains the page we want to send
929 * @offset: offset inside the block for the page
930 * @last_stage: if we are at the completion stage
931 * @bytes_transferred: increase it with the number of transferred bytes
932 */
9eb14766
PB
933static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
934 PageSearchStatus *pss, bool last_stage,
56e93d26
JQ
935 uint64_t *bytes_transferred)
936{
937 int pages = -1;
fc50438e 938 uint64_t bytes_xmit = 0;
56e93d26 939 uint8_t *p;
fc50438e 940 int ret, blen;
a08f6890
HZ
941 RAMBlock *block = pss->block;
942 ram_addr_t offset = pss->offset;
56e93d26 943
2f68e399 944 p = block->host + offset;
56e93d26 945
56e93d26
JQ
946 ret = ram_control_save_page(f, block->offset,
947 offset, TARGET_PAGE_SIZE, &bytes_xmit);
948 if (bytes_xmit) {
949 *bytes_transferred += bytes_xmit;
950 pages = 1;
951 }
56e93d26
JQ
952 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
953 if (ret != RAM_SAVE_CONTROL_DELAYED) {
954 if (bytes_xmit > 0) {
955 acct_info.norm_pages++;
956 } else if (bytes_xmit == 0) {
957 acct_info.dup_pages++;
958 }
959 }
960 } else {
961 /* When starting the process of a new block, the first page of
962 * the block should be sent out before other pages in the same
963 * block, and all the pages in last block should have been sent
964 * out, keeping this order is important, because the 'cont' flag
965 * is used to avoid resending the block name.
966 */
967 if (block != last_sent_block) {
968 flush_compressed_data(f);
969 pages = save_zero_page(f, block, offset, p, bytes_transferred);
970 if (pages == -1) {
fc50438e
LL
971 /* Make sure the first page is sent out before other pages */
972 bytes_xmit = save_page_header(f, block, offset |
973 RAM_SAVE_FLAG_COMPRESS_PAGE);
974 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
975 migrate_compress_level());
976 if (blen > 0) {
977 *bytes_transferred += bytes_xmit + blen;
b3be2896 978 acct_info.norm_pages++;
b3be2896 979 pages = 1;
fc50438e
LL
980 } else {
981 qemu_file_set_error(f, blen);
982 error_report("compressed data failed!");
b3be2896 983 }
56e93d26 984 }
53f09a10
PB
985 if (pages > 0) {
986 ram_release_pages(ms, block->idstr, pss->offset, pages);
987 }
56e93d26 988 } else {
fc50438e 989 offset |= RAM_SAVE_FLAG_CONTINUE;
56e93d26
JQ
990 pages = save_zero_page(f, block, offset, p, bytes_transferred);
991 if (pages == -1) {
992 pages = compress_page_with_multi_thread(f, block, offset,
993 bytes_transferred);
53f09a10
PB
994 } else {
995 ram_release_pages(ms, block->idstr, pss->offset, pages);
56e93d26
JQ
996 }
997 }
998 }
999
1000 return pages;
1001}
1002
b9e60928
DDAG
1003/*
1004 * Find the next dirty page and update any state associated with
1005 * the search process.
1006 *
1007 * Returns: True if a page is found
1008 *
1009 * @f: Current migration stream.
1010 * @pss: Data about the state of the current dirty page scan.
1011 * @*again: Set to false if the search has scanned the whole of RAM
e0b266f0
DDAG
1012 * *ram_addr_abs: Pointer into which to store the address of the dirty page
1013 * within the global ram_addr space
b9e60928
DDAG
1014 */
1015static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
f3f491fc 1016 bool *again, ram_addr_t *ram_addr_abs)
b9e60928 1017{
a82d593b
DDAG
1018 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
1019 ram_addr_abs);
b9e60928
DDAG
1020 if (pss->complete_round && pss->block == last_seen_block &&
1021 pss->offset >= last_offset) {
1022 /*
1023 * We've been once around the RAM and haven't found anything.
1024 * Give up.
1025 */
1026 *again = false;
1027 return false;
1028 }
1029 if (pss->offset >= pss->block->used_length) {
1030 /* Didn't find anything in this RAM Block */
1031 pss->offset = 0;
1032 pss->block = QLIST_NEXT_RCU(pss->block, next);
1033 if (!pss->block) {
1034 /* Hit the end of the list */
1035 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1036 /* Flag that we've looped */
1037 pss->complete_round = true;
1038 ram_bulk_stage = false;
1039 if (migrate_use_xbzrle()) {
1040 /* If xbzrle is on, stop using the data compression at this
1041 * point. In theory, xbzrle can do better than compression.
1042 */
1043 flush_compressed_data(f);
1044 compression_switch = false;
1045 }
1046 }
1047 /* Didn't find anything this time, but try again on the new block */
1048 *again = true;
1049 return false;
1050 } else {
1051 /* Can go around again, but... */
1052 *again = true;
1053 /* We've found something so probably don't need to */
1054 return true;
1055 }
1056}
1057
a82d593b
DDAG
1058/*
1059 * Helper for 'get_queued_page' - gets a page off the queue
1060 * ms: MigrationState in
1061 * *offset: Used to return the offset within the RAMBlock
1062 * ram_addr_abs: global offset in the dirty/sent bitmaps
1063 *
1064 * Returns: block (or NULL if none available)
1065 */
1066static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1067 ram_addr_t *ram_addr_abs)
1068{
1069 RAMBlock *block = NULL;
1070
1071 qemu_mutex_lock(&ms->src_page_req_mutex);
1072 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1073 struct MigrationSrcPageRequest *entry =
1074 QSIMPLEQ_FIRST(&ms->src_page_requests);
1075 block = entry->rb;
1076 *offset = entry->offset;
1077 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1078 TARGET_PAGE_MASK;
1079
1080 if (entry->len > TARGET_PAGE_SIZE) {
1081 entry->len -= TARGET_PAGE_SIZE;
1082 entry->offset += TARGET_PAGE_SIZE;
1083 } else {
1084 memory_region_unref(block->mr);
1085 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1086 g_free(entry);
1087 }
1088 }
1089 qemu_mutex_unlock(&ms->src_page_req_mutex);
1090
1091 return block;
1092}
1093
1094/*
1095 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1096 * that are already sent (!dirty)
1097 *
1098 * ms: MigrationState in
1099 * pss: PageSearchStatus structure updated with found block/offset
1100 * ram_addr_abs: global offset in the dirty/sent bitmaps
1101 *
1102 * Returns: true if a queued page is found
1103 */
1104static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1105 ram_addr_t *ram_addr_abs)
1106{
1107 RAMBlock *block;
1108 ram_addr_t offset;
1109 bool dirty;
1110
1111 do {
1112 block = unqueue_page(ms, &offset, ram_addr_abs);
1113 /*
1114 * We're sending this page, and since it's postcopy nothing else
1115 * will dirty it, and we must make sure it doesn't get sent again
1116 * even if this queue request was received after the background
1117 * search already sent it.
1118 */
1119 if (block) {
1120 unsigned long *bitmap;
1121 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1122 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1123 if (!dirty) {
1124 trace_get_queued_page_not_dirty(
1125 block->idstr, (uint64_t)offset,
1126 (uint64_t)*ram_addr_abs,
1127 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1128 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1129 } else {
1130 trace_get_queued_page(block->idstr,
1131 (uint64_t)offset,
1132 (uint64_t)*ram_addr_abs);
1133 }
1134 }
1135
1136 } while (block && !dirty);
1137
1138 if (block) {
1139 /*
1140 * As soon as we start servicing pages out of order, then we have
1141 * to kill the bulk stage, since the bulk stage assumes
1142 * in (migration_bitmap_find_and_reset_dirty) that every page is
1143 * dirty, that's no longer true.
1144 */
1145 ram_bulk_stage = false;
1146
1147 /*
1148 * We want the background search to continue from the queued page
1149 * since the guest is likely to want other pages near to the page
1150 * it just requested.
1151 */
1152 pss->block = block;
1153 pss->offset = offset;
1154 }
1155
1156 return !!block;
1157}
1158
6c595cde
DDAG
1159/**
1160 * flush_page_queue: Flush any remaining pages in the ram request queue
1161 * it should be empty at the end anyway, but in error cases there may be
1162 * some left.
1163 *
1164 * ms: MigrationState
1165 */
1166void flush_page_queue(MigrationState *ms)
1167{
1168 struct MigrationSrcPageRequest *mspr, *next_mspr;
1169 /* This queue generally should be empty - but in the case of a failed
1170 * migration might have some droppings in.
1171 */
1172 rcu_read_lock();
1173 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1174 memory_region_unref(mspr->rb->mr);
1175 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1176 g_free(mspr);
1177 }
1178 rcu_read_unlock();
1179}
1180
1181/**
1182 * Queue the pages for transmission, e.g. a request from postcopy destination
1183 * ms: MigrationStatus in which the queue is held
1184 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1185 * start: Offset from the start of the RAMBlock
1186 * len: Length (in bytes) to send
1187 * Return: 0 on success
1188 */
1189int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1190 ram_addr_t start, ram_addr_t len)
1191{
1192 RAMBlock *ramblock;
1193
d3bf5418 1194 ms->postcopy_requests++;
6c595cde
DDAG
1195 rcu_read_lock();
1196 if (!rbname) {
1197 /* Reuse last RAMBlock */
1198 ramblock = ms->last_req_rb;
1199
1200 if (!ramblock) {
1201 /*
1202 * Shouldn't happen, we can't reuse the last RAMBlock if
1203 * it's the 1st request.
1204 */
1205 error_report("ram_save_queue_pages no previous block");
1206 goto err;
1207 }
1208 } else {
1209 ramblock = qemu_ram_block_by_name(rbname);
1210
1211 if (!ramblock) {
1212 /* We shouldn't be asked for a non-existent RAMBlock */
1213 error_report("ram_save_queue_pages no block '%s'", rbname);
1214 goto err;
1215 }
1216 ms->last_req_rb = ramblock;
1217 }
1218 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1219 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1220 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1221 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1222 __func__, start, len, ramblock->used_length);
1223 goto err;
1224 }
1225
1226 struct MigrationSrcPageRequest *new_entry =
1227 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1228 new_entry->rb = ramblock;
1229 new_entry->offset = start;
1230 new_entry->len = len;
1231
1232 memory_region_ref(ramblock->mr);
1233 qemu_mutex_lock(&ms->src_page_req_mutex);
1234 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1235 qemu_mutex_unlock(&ms->src_page_req_mutex);
1236 rcu_read_unlock();
1237
1238 return 0;
1239
1240err:
1241 rcu_read_unlock();
1242 return -1;
1243}
1244
a82d593b
DDAG
1245/**
1246 * ram_save_target_page: Save one target page
1247 *
1248 *
1249 * @f: QEMUFile where to send the data
1250 * @block: pointer to block that contains the page we want to send
1251 * @offset: offset inside the block for the page;
1252 * @last_stage: if we are at the completion stage
1253 * @bytes_transferred: increase it with the number of transferred bytes
1254 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1255 *
1256 * Returns: Number of pages written.
1257 */
1258static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
a08f6890 1259 PageSearchStatus *pss,
a82d593b
DDAG
1260 bool last_stage,
1261 uint64_t *bytes_transferred,
1262 ram_addr_t dirty_ram_abs)
1263{
1264 int res = 0;
1265
1266 /* Check the pages is dirty and if it is send it */
1267 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1268 unsigned long *unsentmap;
1269 if (compression_switch && migrate_use_compression()) {
9eb14766 1270 res = ram_save_compressed_page(ms, f, pss,
a82d593b
DDAG
1271 last_stage,
1272 bytes_transferred);
1273 } else {
9eb14766 1274 res = ram_save_page(ms, f, pss, last_stage,
a82d593b
DDAG
1275 bytes_transferred);
1276 }
1277
1278 if (res < 0) {
1279 return res;
1280 }
1281 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1282 if (unsentmap) {
1283 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1284 }
3fd3c4b3
DDAG
1285 /* Only update last_sent_block if a block was actually sent; xbzrle
1286 * might have decided the page was identical so didn't bother writing
1287 * to the stream.
1288 */
1289 if (res > 0) {
a08f6890 1290 last_sent_block = pss->block;
3fd3c4b3 1291 }
a82d593b
DDAG
1292 }
1293
1294 return res;
1295}
1296
1297/**
cb8d4c8f 1298 * ram_save_host_page: Starting at *offset send pages up to the end
a82d593b
DDAG
1299 * of the current host page. It's valid for the initial
1300 * offset to point into the middle of a host page
1301 * in which case the remainder of the hostpage is sent.
1302 * Only dirty target pages are sent.
4c011c37
DDAG
1303 * Note that the host page size may be a huge page for this
1304 * block.
a82d593b
DDAG
1305 *
1306 * Returns: Number of pages written.
1307 *
1308 * @f: QEMUFile where to send the data
1309 * @block: pointer to block that contains the page we want to send
1310 * @offset: offset inside the block for the page; updated to last target page
1311 * sent
1312 * @last_stage: if we are at the completion stage
1313 * @bytes_transferred: increase it with the number of transferred bytes
1314 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1315 */
a08f6890
HZ
1316static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1317 PageSearchStatus *pss,
1318 bool last_stage,
a82d593b
DDAG
1319 uint64_t *bytes_transferred,
1320 ram_addr_t dirty_ram_abs)
1321{
1322 int tmppages, pages = 0;
4c011c37
DDAG
1323 size_t pagesize = qemu_ram_pagesize(pss->block);
1324
a82d593b 1325 do {
a08f6890 1326 tmppages = ram_save_target_page(ms, f, pss, last_stage,
a82d593b
DDAG
1327 bytes_transferred, dirty_ram_abs);
1328 if (tmppages < 0) {
1329 return tmppages;
1330 }
1331
1332 pages += tmppages;
a08f6890 1333 pss->offset += TARGET_PAGE_SIZE;
a82d593b 1334 dirty_ram_abs += TARGET_PAGE_SIZE;
4c011c37 1335 } while (pss->offset & (pagesize - 1));
a82d593b
DDAG
1336
1337 /* The offset we leave with is the last one we looked at */
a08f6890 1338 pss->offset -= TARGET_PAGE_SIZE;
a82d593b
DDAG
1339 return pages;
1340}
6c595cde 1341
56e93d26
JQ
1342/**
1343 * ram_find_and_save_block: Finds a dirty page and sends it to f
1344 *
1345 * Called within an RCU critical section.
1346 *
1347 * Returns: The number of pages written
1348 * 0 means no dirty pages
1349 *
1350 * @f: QEMUFile where to send the data
1351 * @last_stage: if we are at the completion stage
1352 * @bytes_transferred: increase it with the number of transferred bytes
a82d593b
DDAG
1353 *
1354 * On systems where host-page-size > target-page-size it will send all the
1355 * pages in a host page that are dirty.
56e93d26
JQ
1356 */
1357
1358static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1359 uint64_t *bytes_transferred)
1360{
b8fb8cb7 1361 PageSearchStatus pss;
a82d593b 1362 MigrationState *ms = migrate_get_current();
56e93d26 1363 int pages = 0;
b9e60928 1364 bool again, found;
f3f491fc
DDAG
1365 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1366 ram_addr_t space */
56e93d26 1367
0827b9e9
AA
1368 /* No dirty page as there is zero RAM */
1369 if (!ram_bytes_total()) {
1370 return pages;
1371 }
1372
b8fb8cb7
DDAG
1373 pss.block = last_seen_block;
1374 pss.offset = last_offset;
1375 pss.complete_round = false;
1376
1377 if (!pss.block) {
1378 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1379 }
56e93d26 1380
b9e60928 1381 do {
a82d593b
DDAG
1382 again = true;
1383 found = get_queued_page(ms, &pss, &dirty_ram_abs);
b9e60928 1384
a82d593b
DDAG
1385 if (!found) {
1386 /* priority queue empty, so just search for something dirty */
1387 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1388 }
f3f491fc 1389
a82d593b 1390 if (found) {
a08f6890 1391 pages = ram_save_host_page(ms, f, &pss,
a82d593b
DDAG
1392 last_stage, bytes_transferred,
1393 dirty_ram_abs);
56e93d26 1394 }
b9e60928 1395 } while (!pages && again);
56e93d26 1396
b8fb8cb7
DDAG
1397 last_seen_block = pss.block;
1398 last_offset = pss.offset;
56e93d26
JQ
1399
1400 return pages;
1401}
1402
1403void acct_update_position(QEMUFile *f, size_t size, bool zero)
1404{
1405 uint64_t pages = size / TARGET_PAGE_SIZE;
1406 if (zero) {
1407 acct_info.dup_pages += pages;
1408 } else {
1409 acct_info.norm_pages += pages;
1410 bytes_transferred += size;
1411 qemu_update_position(f, size);
1412 }
1413}
1414
1415static ram_addr_t ram_save_remaining(void)
1416{
1417 return migration_dirty_pages;
1418}
1419
1420uint64_t ram_bytes_remaining(void)
1421{
1422 return ram_save_remaining() * TARGET_PAGE_SIZE;
1423}
1424
1425uint64_t ram_bytes_transferred(void)
1426{
1427 return bytes_transferred;
1428}
1429
1430uint64_t ram_bytes_total(void)
1431{
1432 RAMBlock *block;
1433 uint64_t total = 0;
1434
1435 rcu_read_lock();
1436 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1437 total += block->used_length;
1438 rcu_read_unlock();
1439 return total;
1440}
1441
1442void free_xbzrle_decoded_buf(void)
1443{
1444 g_free(xbzrle_decoded_buf);
1445 xbzrle_decoded_buf = NULL;
1446}
1447
60be6340
DL
1448static void migration_bitmap_free(struct BitmapRcu *bmap)
1449{
1450 g_free(bmap->bmap);
f3f491fc 1451 g_free(bmap->unsentmap);
60be6340
DL
1452 g_free(bmap);
1453}
1454
6ad2a215 1455static void ram_migration_cleanup(void *opaque)
56e93d26 1456{
2ff64038
LZ
1457 /* caller have hold iothread lock or is in a bh, so there is
1458 * no writing race against this migration_bitmap
1459 */
60be6340
DL
1460 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1461 atomic_rcu_set(&migration_bitmap_rcu, NULL);
2ff64038 1462 if (bitmap) {
56e93d26 1463 memory_global_dirty_log_stop();
60be6340 1464 call_rcu(bitmap, migration_bitmap_free, rcu);
56e93d26
JQ
1465 }
1466
1467 XBZRLE_cache_lock();
1468 if (XBZRLE.cache) {
1469 cache_fini(XBZRLE.cache);
1470 g_free(XBZRLE.encoded_buf);
1471 g_free(XBZRLE.current_buf);
adb65dec 1472 g_free(ZERO_TARGET_PAGE);
56e93d26
JQ
1473 XBZRLE.cache = NULL;
1474 XBZRLE.encoded_buf = NULL;
1475 XBZRLE.current_buf = NULL;
1476 }
1477 XBZRLE_cache_unlock();
1478}
1479
56e93d26
JQ
1480static void reset_ram_globals(void)
1481{
1482 last_seen_block = NULL;
1483 last_sent_block = NULL;
1484 last_offset = 0;
1485 last_version = ram_list.version;
1486 ram_bulk_stage = true;
1487}
1488
1489#define MAX_WAIT 50 /* ms, half buffered_file limit */
1490
dd631697
LZ
1491void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1492{
1493 /* called in qemu main thread, so there is
1494 * no writing race against this migration_bitmap
1495 */
60be6340
DL
1496 if (migration_bitmap_rcu) {
1497 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1498 bitmap = g_new(struct BitmapRcu, 1);
1499 bitmap->bmap = bitmap_new(new);
dd631697
LZ
1500
1501 /* prevent migration_bitmap content from being set bit
1502 * by migration_bitmap_sync_range() at the same time.
1503 * it is safe to migration if migration_bitmap is cleared bit
1504 * at the same time.
1505 */
1506 qemu_mutex_lock(&migration_bitmap_mutex);
60be6340
DL
1507 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1508 bitmap_set(bitmap->bmap, old, new - old);
f3f491fc
DDAG
1509
1510 /* We don't have a way to safely extend the sentmap
1511 * with RCU; so mark it as missing, entry to postcopy
1512 * will fail.
1513 */
1514 bitmap->unsentmap = NULL;
1515
60be6340 1516 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
dd631697
LZ
1517 qemu_mutex_unlock(&migration_bitmap_mutex);
1518 migration_dirty_pages += new - old;
60be6340 1519 call_rcu(old_bitmap, migration_bitmap_free, rcu);
dd631697
LZ
1520 }
1521}
56e93d26 1522
4f2e4252
DDAG
1523/*
1524 * 'expected' is the value you expect the bitmap mostly to be full
1525 * of; it won't bother printing lines that are all this value.
1526 * If 'todump' is null the migration bitmap is dumped.
1527 */
1528void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1529{
1530 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1531
1532 int64_t cur;
1533 int64_t linelen = 128;
1534 char linebuf[129];
1535
1536 if (!todump) {
1537 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1538 }
1539
1540 for (cur = 0; cur < ram_pages; cur += linelen) {
1541 int64_t curb;
1542 bool found = false;
1543 /*
1544 * Last line; catch the case where the line length
1545 * is longer than remaining ram
1546 */
1547 if (cur + linelen > ram_pages) {
1548 linelen = ram_pages - cur;
1549 }
1550 for (curb = 0; curb < linelen; curb++) {
1551 bool thisbit = test_bit(cur + curb, todump);
1552 linebuf[curb] = thisbit ? '1' : '.';
1553 found = found || (thisbit != expected);
1554 }
1555 if (found) {
1556 linebuf[curb] = '\0';
1557 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1558 }
1559 }
1560}
1561
e0b266f0
DDAG
1562/* **** functions for postcopy ***** */
1563
ced1c616
PB
1564void ram_postcopy_migrated_memory_release(MigrationState *ms)
1565{
1566 struct RAMBlock *block;
1567 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1568
1569 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1570 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1571 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1572 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1573
1574 while (run_start < range) {
1575 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1576 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1577 (run_end - run_start) << TARGET_PAGE_BITS);
1578 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1579 }
1580 }
1581}
1582
e0b266f0
DDAG
1583/*
1584 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1585 * Note: At this point the 'unsentmap' is the processed bitmap combined
1586 * with the dirtymap; so a '1' means it's either dirty or unsent.
1587 * start,length: Indexes into the bitmap for the first bit
1588 * representing the named block and length in target-pages
1589 */
1590static int postcopy_send_discard_bm_ram(MigrationState *ms,
1591 PostcopyDiscardState *pds,
1592 unsigned long start,
1593 unsigned long length)
1594{
1595 unsigned long end = start + length; /* one after the end */
1596 unsigned long current;
1597 unsigned long *unsentmap;
1598
1599 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1600 for (current = start; current < end; ) {
1601 unsigned long one = find_next_bit(unsentmap, end, current);
1602
1603 if (one <= end) {
1604 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1605 unsigned long discard_length;
1606
1607 if (zero >= end) {
1608 discard_length = end - one;
1609 } else {
1610 discard_length = zero - one;
1611 }
d688c62d
DDAG
1612 if (discard_length) {
1613 postcopy_discard_send_range(ms, pds, one, discard_length);
1614 }
e0b266f0
DDAG
1615 current = one + discard_length;
1616 } else {
1617 current = one;
1618 }
1619 }
1620
1621 return 0;
1622}
1623
1624/*
1625 * Utility for the outgoing postcopy code.
1626 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1627 * passing it bitmap indexes and name.
1628 * Returns: 0 on success
1629 * (qemu_ram_foreach_block ends up passing unscaled lengths
1630 * which would mean postcopy code would have to deal with target page)
1631 */
1632static int postcopy_each_ram_send_discard(MigrationState *ms)
1633{
1634 struct RAMBlock *block;
1635 int ret;
1636
1637 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1638 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1639 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1640 first,
1641 block->idstr);
1642
1643 /*
1644 * Postcopy sends chunks of bitmap over the wire, but it
1645 * just needs indexes at this point, avoids it having
1646 * target page specific code.
1647 */
1648 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1649 block->used_length >> TARGET_PAGE_BITS);
1650 postcopy_discard_send_finish(ms, pds);
1651 if (ret) {
1652 return ret;
1653 }
1654 }
1655
1656 return 0;
1657}
1658
99e314eb
DDAG
1659/*
1660 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1661 * the two bitmaps, that are similar, but one is inverted.
1662 *
1663 * We search for runs of target-pages that don't start or end on a
1664 * host page boundary;
1665 * unsent_pass=true: Cleans up partially unsent host pages by searching
1666 * the unsentmap
1667 * unsent_pass=false: Cleans up partially dirty host pages by searching
1668 * the main migration bitmap
1669 *
1670 */
1671static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1672 RAMBlock *block,
1673 PostcopyDiscardState *pds)
1674{
1675 unsigned long *bitmap;
1676 unsigned long *unsentmap;
29c59172 1677 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
99e314eb
DDAG
1678 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1679 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1680 unsigned long last = first + (len - 1);
1681 unsigned long run_start;
1682
29c59172
DDAG
1683 if (block->page_size == TARGET_PAGE_SIZE) {
1684 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1685 return;
1686 }
1687
99e314eb
DDAG
1688 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1689 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1690
1691 if (unsent_pass) {
1692 /* Find a sent page */
1693 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1694 } else {
1695 /* Find a dirty page */
1696 run_start = find_next_bit(bitmap, last + 1, first);
1697 }
1698
1699 while (run_start <= last) {
1700 bool do_fixup = false;
1701 unsigned long fixup_start_addr;
1702 unsigned long host_offset;
1703
1704 /*
1705 * If the start of this run of pages is in the middle of a host
1706 * page, then we need to fixup this host page.
1707 */
1708 host_offset = run_start % host_ratio;
1709 if (host_offset) {
1710 do_fixup = true;
1711 run_start -= host_offset;
1712 fixup_start_addr = run_start;
1713 /* For the next pass */
1714 run_start = run_start + host_ratio;
1715 } else {
1716 /* Find the end of this run */
1717 unsigned long run_end;
1718 if (unsent_pass) {
1719 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1720 } else {
1721 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1722 }
1723 /*
1724 * If the end isn't at the start of a host page, then the
1725 * run doesn't finish at the end of a host page
1726 * and we need to discard.
1727 */
1728 host_offset = run_end % host_ratio;
1729 if (host_offset) {
1730 do_fixup = true;
1731 fixup_start_addr = run_end - host_offset;
1732 /*
1733 * This host page has gone, the next loop iteration starts
1734 * from after the fixup
1735 */
1736 run_start = fixup_start_addr + host_ratio;
1737 } else {
1738 /*
1739 * No discards on this iteration, next loop starts from
1740 * next sent/dirty page
1741 */
1742 run_start = run_end + 1;
1743 }
1744 }
1745
1746 if (do_fixup) {
1747 unsigned long page;
1748
1749 /* Tell the destination to discard this page */
1750 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1751 /* For the unsent_pass we:
1752 * discard partially sent pages
1753 * For the !unsent_pass (dirty) we:
1754 * discard partially dirty pages that were sent
1755 * (any partially sent pages were already discarded
1756 * by the previous unsent_pass)
1757 */
1758 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1759 host_ratio);
1760 }
1761
1762 /* Clean up the bitmap */
1763 for (page = fixup_start_addr;
1764 page < fixup_start_addr + host_ratio; page++) {
1765 /* All pages in this host page are now not sent */
1766 set_bit(page, unsentmap);
1767
1768 /*
1769 * Remark them as dirty, updating the count for any pages
1770 * that weren't previously dirty.
1771 */
1772 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1773 }
1774 }
1775
1776 if (unsent_pass) {
1777 /* Find the next sent page for the next iteration */
1778 run_start = find_next_zero_bit(unsentmap, last + 1,
1779 run_start);
1780 } else {
1781 /* Find the next dirty page for the next iteration */
1782 run_start = find_next_bit(bitmap, last + 1, run_start);
1783 }
1784 }
1785}
1786
1787/*
1788 * Utility for the outgoing postcopy code.
1789 *
1790 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
1791 * dirty host-page size chunks as all dirty. In this case the host-page
1792 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb
DDAG
1793 *
1794 * Returns: 0 on success
1795 */
1796static int postcopy_chunk_hostpages(MigrationState *ms)
1797{
1798 struct RAMBlock *block;
1799
99e314eb
DDAG
1800 /* Easiest way to make sure we don't resume in the middle of a host-page */
1801 last_seen_block = NULL;
1802 last_sent_block = NULL;
1803 last_offset = 0;
1804
1805 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1806 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1807
1808 PostcopyDiscardState *pds =
1809 postcopy_discard_send_init(ms, first, block->idstr);
1810
1811 /* First pass: Discard all partially sent host pages */
1812 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1813 /*
1814 * Second pass: Ensure that all partially dirty host pages are made
1815 * fully dirty.
1816 */
1817 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1818
1819 postcopy_discard_send_finish(ms, pds);
1820 } /* ram_list loop */
1821
1822 return 0;
1823}
1824
e0b266f0
DDAG
1825/*
1826 * Transmit the set of pages to be discarded after precopy to the target
1827 * these are pages that:
1828 * a) Have been previously transmitted but are now dirty again
1829 * b) Pages that have never been transmitted, this ensures that
1830 * any pages on the destination that have been mapped by background
1831 * tasks get discarded (transparent huge pages is the specific concern)
1832 * Hopefully this is pretty sparse
1833 */
1834int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1835{
1836 int ret;
1837 unsigned long *bitmap, *unsentmap;
1838
1839 rcu_read_lock();
1840
1841 /* This should be our last sync, the src is now paused */
1842 migration_bitmap_sync();
1843
1844 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1845 if (!unsentmap) {
1846 /* We don't have a safe way to resize the sentmap, so
1847 * if the bitmap was resized it will be NULL at this
1848 * point.
1849 */
1850 error_report("migration ram resized during precopy phase");
1851 rcu_read_unlock();
1852 return -EINVAL;
1853 }
1854
29c59172 1855 /* Deal with TPS != HPS and huge pages */
99e314eb
DDAG
1856 ret = postcopy_chunk_hostpages(ms);
1857 if (ret) {
1858 rcu_read_unlock();
1859 return ret;
1860 }
1861
e0b266f0
DDAG
1862 /*
1863 * Update the unsentmap to be unsentmap = unsentmap | dirty
1864 */
1865 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1866 bitmap_or(unsentmap, unsentmap, bitmap,
1867 last_ram_offset() >> TARGET_PAGE_BITS);
1868
1869
1870 trace_ram_postcopy_send_discard_bitmap();
1871#ifdef DEBUG_POSTCOPY
1872 ram_debug_dump_bitmap(unsentmap, true);
1873#endif
1874
1875 ret = postcopy_each_ram_send_discard(ms);
1876 rcu_read_unlock();
1877
1878 return ret;
1879}
1880
1881/*
1882 * At the start of the postcopy phase of migration, any now-dirty
1883 * precopied pages are discarded.
1884 *
1885 * start, length describe a byte address range within the RAMBlock
1886 *
1887 * Returns 0 on success.
1888 */
1889int ram_discard_range(MigrationIncomingState *mis,
1890 const char *block_name,
1891 uint64_t start, size_t length)
1892{
1893 int ret = -1;
1894
d3a5038c
DDAG
1895 trace_ram_discard_range(block_name, start, length);
1896
e0b266f0
DDAG
1897 rcu_read_lock();
1898 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1899
1900 if (!rb) {
1901 error_report("ram_discard_range: Failed to find block '%s'",
1902 block_name);
1903 goto err;
1904 }
1905
d3a5038c 1906 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
1907
1908err:
1909 rcu_read_unlock();
1910
1911 return ret;
1912}
1913
a91246c9 1914static int ram_save_init_globals(void)
56e93d26 1915{
56e93d26
JQ
1916 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1917
56e93d26
JQ
1918 dirty_rate_high_cnt = 0;
1919 bitmap_sync_count = 0;
1920 migration_bitmap_sync_init();
dd631697 1921 qemu_mutex_init(&migration_bitmap_mutex);
56e93d26
JQ
1922
1923 if (migrate_use_xbzrle()) {
1924 XBZRLE_cache_lock();
adb65dec 1925 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
56e93d26
JQ
1926 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1927 TARGET_PAGE_SIZE,
1928 TARGET_PAGE_SIZE);
1929 if (!XBZRLE.cache) {
1930 XBZRLE_cache_unlock();
1931 error_report("Error creating cache");
1932 return -1;
1933 }
1934 XBZRLE_cache_unlock();
1935
1936 /* We prefer not to abort if there is no memory */
1937 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1938 if (!XBZRLE.encoded_buf) {
1939 error_report("Error allocating encoded_buf");
1940 return -1;
1941 }
1942
1943 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1944 if (!XBZRLE.current_buf) {
1945 error_report("Error allocating current_buf");
1946 g_free(XBZRLE.encoded_buf);
1947 XBZRLE.encoded_buf = NULL;
1948 return -1;
1949 }
1950
1951 acct_clear();
1952 }
1953
49877834
PB
1954 /* For memory_global_dirty_log_start below. */
1955 qemu_mutex_lock_iothread();
1956
56e93d26
JQ
1957 qemu_mutex_lock_ramlist();
1958 rcu_read_lock();
1959 bytes_transferred = 0;
1960 reset_ram_globals();
1961
f3f491fc 1962 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
0827b9e9
AA
1963 /* Skip setting bitmap if there is no RAM */
1964 if (ram_bytes_total()) {
1965 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1966 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1967 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1968
1969 if (migrate_postcopy_ram()) {
1970 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1971 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1972 }
f3f491fc
DDAG
1973 }
1974
56e93d26
JQ
1975 /*
1976 * Count the total number of pages used by ram blocks not including any
1977 * gaps due to alignment or unplugs.
1978 */
1979 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1980
1981 memory_global_dirty_log_start();
1982 migration_bitmap_sync();
1983 qemu_mutex_unlock_ramlist();
49877834 1984 qemu_mutex_unlock_iothread();
a91246c9
HZ
1985 rcu_read_unlock();
1986
1987 return 0;
1988}
1989
1990/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1991 * long-running RCU critical section. When rcu-reclaims in the code
1992 * start to become numerous it will be necessary to reduce the
1993 * granularity of these critical sections.
1994 */
1995
1996static int ram_save_setup(QEMUFile *f, void *opaque)
1997{
1998 RAMBlock *block;
1999
2000 /* migration has already setup the bitmap, reuse it. */
2001 if (!migration_in_colo_state()) {
2002 if (ram_save_init_globals() < 0) {
2003 return -1;
2004 }
2005 }
2006
2007 rcu_read_lock();
56e93d26
JQ
2008
2009 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2010
2011 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2012 qemu_put_byte(f, strlen(block->idstr));
2013 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2014 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2015 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2016 qemu_put_be64(f, block->page_size);
2017 }
56e93d26
JQ
2018 }
2019
2020 rcu_read_unlock();
2021
2022 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2023 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2024
2025 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2026
2027 return 0;
2028}
2029
2030static int ram_save_iterate(QEMUFile *f, void *opaque)
2031{
2032 int ret;
2033 int i;
2034 int64_t t0;
5c90308f 2035 int done = 0;
56e93d26
JQ
2036
2037 rcu_read_lock();
2038 if (ram_list.version != last_version) {
2039 reset_ram_globals();
2040 }
2041
2042 /* Read version before ram_list.blocks */
2043 smp_rmb();
2044
2045 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2046
2047 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2048 i = 0;
2049 while ((ret = qemu_file_rate_limit(f)) == 0) {
2050 int pages;
2051
2052 pages = ram_find_and_save_block(f, false, &bytes_transferred);
2053 /* no more pages to sent */
2054 if (pages == 0) {
5c90308f 2055 done = 1;
56e93d26
JQ
2056 break;
2057 }
56e93d26 2058 acct_info.iterations++;
070afca2 2059
56e93d26
JQ
2060 /* we want to check in the 1st loop, just in case it was the 1st time
2061 and we had to sync the dirty bitmap.
2062 qemu_get_clock_ns() is a bit expensive, so we only check each some
2063 iterations
2064 */
2065 if ((i & 63) == 0) {
2066 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2067 if (t1 > MAX_WAIT) {
55c4446b 2068 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2069 break;
2070 }
2071 }
2072 i++;
2073 }
2074 flush_compressed_data(f);
2075 rcu_read_unlock();
2076
2077 /*
2078 * Must occur before EOS (or any QEMUFile operation)
2079 * because of RDMA protocol.
2080 */
2081 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2082
2083 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2084 bytes_transferred += 8;
2085
2086 ret = qemu_file_get_error(f);
2087 if (ret < 0) {
2088 return ret;
2089 }
2090
5c90308f 2091 return done;
56e93d26
JQ
2092}
2093
2094/* Called with iothread lock */
2095static int ram_save_complete(QEMUFile *f, void *opaque)
2096{
2097 rcu_read_lock();
2098
663e6c1d
DDAG
2099 if (!migration_in_postcopy(migrate_get_current())) {
2100 migration_bitmap_sync();
2101 }
56e93d26
JQ
2102
2103 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2104
2105 /* try transferring iterative blocks of memory */
2106
2107 /* flush all remaining blocks regardless of rate limiting */
2108 while (true) {
2109 int pages;
2110
a91246c9
HZ
2111 pages = ram_find_and_save_block(f, !migration_in_colo_state(),
2112 &bytes_transferred);
56e93d26
JQ
2113 /* no more blocks to sent */
2114 if (pages == 0) {
2115 break;
2116 }
2117 }
2118
2119 flush_compressed_data(f);
2120 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2121
2122 rcu_read_unlock();
d09a6fde 2123
56e93d26
JQ
2124 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2125
2126 return 0;
2127}
2128
c31b098f
DDAG
2129static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2130 uint64_t *non_postcopiable_pending,
2131 uint64_t *postcopiable_pending)
56e93d26
JQ
2132{
2133 uint64_t remaining_size;
2134
2135 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2136
663e6c1d
DDAG
2137 if (!migration_in_postcopy(migrate_get_current()) &&
2138 remaining_size < max_size) {
56e93d26
JQ
2139 qemu_mutex_lock_iothread();
2140 rcu_read_lock();
2141 migration_bitmap_sync();
2142 rcu_read_unlock();
2143 qemu_mutex_unlock_iothread();
2144 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2145 }
c31b098f
DDAG
2146
2147 /* We can do postcopy, and all the data is postcopiable */
2148 *postcopiable_pending += remaining_size;
56e93d26
JQ
2149}
2150
2151static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2152{
2153 unsigned int xh_len;
2154 int xh_flags;
063e760a 2155 uint8_t *loaded_data;
56e93d26
JQ
2156
2157 if (!xbzrle_decoded_buf) {
2158 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2159 }
063e760a 2160 loaded_data = xbzrle_decoded_buf;
56e93d26
JQ
2161
2162 /* extract RLE header */
2163 xh_flags = qemu_get_byte(f);
2164 xh_len = qemu_get_be16(f);
2165
2166 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2167 error_report("Failed to load XBZRLE page - wrong compression!");
2168 return -1;
2169 }
2170
2171 if (xh_len > TARGET_PAGE_SIZE) {
2172 error_report("Failed to load XBZRLE page - len overflow!");
2173 return -1;
2174 }
2175 /* load data and decode */
063e760a 2176 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2177
2178 /* decode RLE */
063e760a 2179 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2180 TARGET_PAGE_SIZE) == -1) {
2181 error_report("Failed to load XBZRLE page - decode error!");
2182 return -1;
2183 }
2184
2185 return 0;
2186}
2187
2188/* Must be called from within a rcu critical section.
2189 * Returns a pointer from within the RCU-protected ram_list.
2190 */
a7180877 2191/*
4c4bad48 2192 * Read a RAMBlock ID from the stream f.
a7180877
DDAG
2193 *
2194 * f: Stream to read from
a7180877
DDAG
2195 * flags: Page flags (mostly to see if it's a continuation of previous block)
2196 */
4c4bad48
HZ
2197static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2198 int flags)
56e93d26
JQ
2199{
2200 static RAMBlock *block = NULL;
2201 char id[256];
2202 uint8_t len;
2203
2204 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2205 if (!block) {
56e93d26
JQ
2206 error_report("Ack, bad migration stream!");
2207 return NULL;
2208 }
4c4bad48 2209 return block;
56e93d26
JQ
2210 }
2211
2212 len = qemu_get_byte(f);
2213 qemu_get_buffer(f, (uint8_t *)id, len);
2214 id[len] = 0;
2215
e3dd7493 2216 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2217 if (!block) {
2218 error_report("Can't find block %s", id);
2219 return NULL;
56e93d26
JQ
2220 }
2221
4c4bad48
HZ
2222 return block;
2223}
2224
2225static inline void *host_from_ram_block_offset(RAMBlock *block,
2226 ram_addr_t offset)
2227{
2228 if (!offset_in_ramblock(block, offset)) {
2229 return NULL;
2230 }
2231
2232 return block->host + offset;
56e93d26
JQ
2233}
2234
2235/*
2236 * If a page (or a whole RDMA chunk) has been
2237 * determined to be zero, then zap it.
2238 */
2239void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2240{
2241 if (ch != 0 || !is_zero_range(host, size)) {
2242 memset(host, ch, size);
2243 }
2244}
2245
2246static void *do_data_decompress(void *opaque)
2247{
2248 DecompressParam *param = opaque;
2249 unsigned long pagesize;
33d151f4
LL
2250 uint8_t *des;
2251 int len;
56e93d26 2252
33d151f4 2253 qemu_mutex_lock(&param->mutex);
90e56fb4 2254 while (!param->quit) {
33d151f4
LL
2255 if (param->des) {
2256 des = param->des;
2257 len = param->len;
2258 param->des = 0;
2259 qemu_mutex_unlock(&param->mutex);
2260
56e93d26 2261 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2262 /* uncompress() will return failed in some case, especially
2263 * when the page is dirted when doing the compression, it's
2264 * not a problem because the dirty page will be retransferred
2265 * and uncompress() won't break the data in other pages.
2266 */
33d151f4
LL
2267 uncompress((Bytef *)des, &pagesize,
2268 (const Bytef *)param->compbuf, len);
73a8912b 2269
33d151f4
LL
2270 qemu_mutex_lock(&decomp_done_lock);
2271 param->done = true;
2272 qemu_cond_signal(&decomp_done_cond);
2273 qemu_mutex_unlock(&decomp_done_lock);
2274
2275 qemu_mutex_lock(&param->mutex);
2276 } else {
2277 qemu_cond_wait(&param->cond, &param->mutex);
2278 }
56e93d26 2279 }
33d151f4 2280 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2281
2282 return NULL;
2283}
2284
5533b2e9
LL
2285static void wait_for_decompress_done(void)
2286{
2287 int idx, thread_count;
2288
2289 if (!migrate_use_compression()) {
2290 return;
2291 }
2292
2293 thread_count = migrate_decompress_threads();
2294 qemu_mutex_lock(&decomp_done_lock);
2295 for (idx = 0; idx < thread_count; idx++) {
2296 while (!decomp_param[idx].done) {
2297 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2298 }
2299 }
2300 qemu_mutex_unlock(&decomp_done_lock);
2301}
2302
56e93d26
JQ
2303void migrate_decompress_threads_create(void)
2304{
2305 int i, thread_count;
2306
2307 thread_count = migrate_decompress_threads();
2308 decompress_threads = g_new0(QemuThread, thread_count);
2309 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2310 qemu_mutex_init(&decomp_done_lock);
2311 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2312 for (i = 0; i < thread_count; i++) {
2313 qemu_mutex_init(&decomp_param[i].mutex);
2314 qemu_cond_init(&decomp_param[i].cond);
2315 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2316 decomp_param[i].done = true;
90e56fb4 2317 decomp_param[i].quit = false;
56e93d26
JQ
2318 qemu_thread_create(decompress_threads + i, "decompress",
2319 do_data_decompress, decomp_param + i,
2320 QEMU_THREAD_JOINABLE);
2321 }
2322}
2323
2324void migrate_decompress_threads_join(void)
2325{
2326 int i, thread_count;
2327
56e93d26
JQ
2328 thread_count = migrate_decompress_threads();
2329 for (i = 0; i < thread_count; i++) {
2330 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2331 decomp_param[i].quit = true;
56e93d26
JQ
2332 qemu_cond_signal(&decomp_param[i].cond);
2333 qemu_mutex_unlock(&decomp_param[i].mutex);
2334 }
2335 for (i = 0; i < thread_count; i++) {
2336 qemu_thread_join(decompress_threads + i);
2337 qemu_mutex_destroy(&decomp_param[i].mutex);
2338 qemu_cond_destroy(&decomp_param[i].cond);
2339 g_free(decomp_param[i].compbuf);
2340 }
2341 g_free(decompress_threads);
2342 g_free(decomp_param);
56e93d26
JQ
2343 decompress_threads = NULL;
2344 decomp_param = NULL;
56e93d26
JQ
2345}
2346
c1bc6626 2347static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2348 void *host, int len)
2349{
2350 int idx, thread_count;
2351
2352 thread_count = migrate_decompress_threads();
73a8912b 2353 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2354 while (true) {
2355 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2356 if (decomp_param[idx].done) {
33d151f4
LL
2357 decomp_param[idx].done = false;
2358 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2359 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2360 decomp_param[idx].des = host;
2361 decomp_param[idx].len = len;
33d151f4
LL
2362 qemu_cond_signal(&decomp_param[idx].cond);
2363 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2364 break;
2365 }
2366 }
2367 if (idx < thread_count) {
2368 break;
73a8912b
LL
2369 } else {
2370 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2371 }
2372 }
73a8912b 2373 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2374}
2375
1caddf8a
DDAG
2376/*
2377 * Allocate data structures etc needed by incoming migration with postcopy-ram
2378 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2379 */
2380int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2381{
2382 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2383
2384 return postcopy_ram_incoming_init(mis, ram_pages);
2385}
2386
a7180877
DDAG
2387/*
2388 * Called in postcopy mode by ram_load().
2389 * rcu_read_lock is taken prior to this being called.
2390 */
2391static int ram_load_postcopy(QEMUFile *f)
2392{
2393 int flags = 0, ret = 0;
2394 bool place_needed = false;
28abd200 2395 bool matching_page_sizes = false;
a7180877
DDAG
2396 MigrationIncomingState *mis = migration_incoming_get_current();
2397 /* Temporary page that is later 'placed' */
2398 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2399 void *last_host = NULL;
a3b6ff6d 2400 bool all_zero = false;
a7180877
DDAG
2401
2402 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2403 ram_addr_t addr;
2404 void *host = NULL;
2405 void *page_buffer = NULL;
2406 void *place_source = NULL;
df9ff5e1 2407 RAMBlock *block = NULL;
a7180877 2408 uint8_t ch;
a7180877
DDAG
2409
2410 addr = qemu_get_be64(f);
2411 flags = addr & ~TARGET_PAGE_MASK;
2412 addr &= TARGET_PAGE_MASK;
2413
2414 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2415 place_needed = false;
2416 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2417 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2418
2419 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2420 if (!host) {
2421 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2422 ret = -EINVAL;
2423 break;
2424 }
28abd200 2425 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2426 /*
28abd200
DDAG
2427 * Postcopy requires that we place whole host pages atomically;
2428 * these may be huge pages for RAMBlocks that are backed by
2429 * hugetlbfs.
a7180877
DDAG
2430 * To make it atomic, the data is read into a temporary page
2431 * that's moved into place later.
2432 * The migration protocol uses, possibly smaller, target-pages
2433 * however the source ensures it always sends all the components
2434 * of a host page in order.
2435 */
2436 page_buffer = postcopy_host_page +
28abd200 2437 ((uintptr_t)host & (block->page_size - 1));
a7180877 2438 /* If all TP are zero then we can optimise the place */
28abd200 2439 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2440 all_zero = true;
c53b7ddc
DDAG
2441 } else {
2442 /* not the 1st TP within the HP */
2443 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2444 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2445 host, last_host);
2446 ret = -EINVAL;
2447 break;
2448 }
a7180877
DDAG
2449 }
2450
c53b7ddc 2451
a7180877
DDAG
2452 /*
2453 * If it's the last part of a host page then we place the host
2454 * page
2455 */
2456 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2457 (block->page_size - 1)) == 0;
a7180877
DDAG
2458 place_source = postcopy_host_page;
2459 }
c53b7ddc 2460 last_host = host;
a7180877
DDAG
2461
2462 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2463 case RAM_SAVE_FLAG_COMPRESS:
2464 ch = qemu_get_byte(f);
2465 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2466 if (ch) {
2467 all_zero = false;
2468 }
2469 break;
2470
2471 case RAM_SAVE_FLAG_PAGE:
2472 all_zero = false;
2473 if (!place_needed || !matching_page_sizes) {
2474 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2475 } else {
2476 /* Avoids the qemu_file copy during postcopy, which is
2477 * going to do a copy later; can only do it when we
2478 * do this read in one go (matching page sizes)
2479 */
2480 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2481 TARGET_PAGE_SIZE);
2482 }
2483 break;
2484 case RAM_SAVE_FLAG_EOS:
2485 /* normal exit */
2486 break;
2487 default:
2488 error_report("Unknown combination of migration flags: %#x"
2489 " (postcopy mode)", flags);
2490 ret = -EINVAL;
2491 }
2492
2493 if (place_needed) {
2494 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2495 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2496
a7180877 2497 if (all_zero) {
df9ff5e1
DDAG
2498 ret = postcopy_place_page_zero(mis, place_dest,
2499 block->page_size);
a7180877 2500 } else {
df9ff5e1
DDAG
2501 ret = postcopy_place_page(mis, place_dest,
2502 place_source, block->page_size);
a7180877
DDAG
2503 }
2504 }
2505 if (!ret) {
2506 ret = qemu_file_get_error(f);
2507 }
2508 }
2509
2510 return ret;
2511}
2512
56e93d26
JQ
2513static int ram_load(QEMUFile *f, void *opaque, int version_id)
2514{
2515 int flags = 0, ret = 0;
2516 static uint64_t seq_iter;
2517 int len = 0;
a7180877
DDAG
2518 /*
2519 * If system is running in postcopy mode, page inserts to host memory must
2520 * be atomic
2521 */
2522 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
ef08fb38
DDAG
2523 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2524 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
56e93d26
JQ
2525
2526 seq_iter++;
2527
2528 if (version_id != 4) {
2529 ret = -EINVAL;
2530 }
2531
2532 /* This RCU critical section can be very long running.
2533 * When RCU reclaims in the code start to become numerous,
2534 * it will be necessary to reduce the granularity of this
2535 * critical section.
2536 */
2537 rcu_read_lock();
a7180877
DDAG
2538
2539 if (postcopy_running) {
2540 ret = ram_load_postcopy(f);
2541 }
2542
2543 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2544 ram_addr_t addr, total_ram_bytes;
a776aa15 2545 void *host = NULL;
56e93d26
JQ
2546 uint8_t ch;
2547
2548 addr = qemu_get_be64(f);
2549 flags = addr & ~TARGET_PAGE_MASK;
2550 addr &= TARGET_PAGE_MASK;
2551
a776aa15
DDAG
2552 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2553 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2554 RAMBlock *block = ram_block_from_stream(f, flags);
2555
2556 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2557 if (!host) {
2558 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2559 ret = -EINVAL;
2560 break;
2561 }
2562 }
2563
56e93d26
JQ
2564 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2565 case RAM_SAVE_FLAG_MEM_SIZE:
2566 /* Synchronize RAM block list */
2567 total_ram_bytes = addr;
2568 while (!ret && total_ram_bytes) {
2569 RAMBlock *block;
56e93d26
JQ
2570 char id[256];
2571 ram_addr_t length;
2572
2573 len = qemu_get_byte(f);
2574 qemu_get_buffer(f, (uint8_t *)id, len);
2575 id[len] = 0;
2576 length = qemu_get_be64(f);
2577
e3dd7493
DDAG
2578 block = qemu_ram_block_by_name(id);
2579 if (block) {
2580 if (length != block->used_length) {
2581 Error *local_err = NULL;
56e93d26 2582
fa53a0e5 2583 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2584 &local_err);
2585 if (local_err) {
2586 error_report_err(local_err);
56e93d26 2587 }
56e93d26 2588 }
ef08fb38
DDAG
2589 /* For postcopy we need to check hugepage sizes match */
2590 if (postcopy_advised &&
2591 block->page_size != qemu_host_page_size) {
2592 uint64_t remote_page_size = qemu_get_be64(f);
2593 if (remote_page_size != block->page_size) {
2594 error_report("Mismatched RAM page size %s "
2595 "(local) %zd != %" PRId64,
2596 id, block->page_size,
2597 remote_page_size);
2598 ret = -EINVAL;
2599 }
2600 }
e3dd7493
DDAG
2601 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2602 block->idstr);
2603 } else {
56e93d26
JQ
2604 error_report("Unknown ramblock \"%s\", cannot "
2605 "accept migration", id);
2606 ret = -EINVAL;
2607 }
2608
2609 total_ram_bytes -= length;
2610 }
2611 break;
a776aa15 2612
56e93d26 2613 case RAM_SAVE_FLAG_COMPRESS:
56e93d26
JQ
2614 ch = qemu_get_byte(f);
2615 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2616 break;
a776aa15 2617
56e93d26 2618 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2619 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2620 break;
56e93d26 2621
a776aa15 2622 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2623 len = qemu_get_be32(f);
2624 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2625 error_report("Invalid compressed data length: %d", len);
2626 ret = -EINVAL;
2627 break;
2628 }
c1bc6626 2629 decompress_data_with_multi_threads(f, host, len);
56e93d26 2630 break;
a776aa15 2631
56e93d26 2632 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2633 if (load_xbzrle(f, addr, host) < 0) {
2634 error_report("Failed to decompress XBZRLE page at "
2635 RAM_ADDR_FMT, addr);
2636 ret = -EINVAL;
2637 break;
2638 }
2639 break;
2640 case RAM_SAVE_FLAG_EOS:
2641 /* normal exit */
2642 break;
2643 default:
2644 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2645 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2646 } else {
2647 error_report("Unknown combination of migration flags: %#x",
2648 flags);
2649 ret = -EINVAL;
2650 }
2651 }
2652 if (!ret) {
2653 ret = qemu_file_get_error(f);
2654 }
2655 }
2656
5533b2e9 2657 wait_for_decompress_done();
56e93d26 2658 rcu_read_unlock();
55c4446b 2659 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
2660 return ret;
2661}
2662
2663static SaveVMHandlers savevm_ram_handlers = {
2664 .save_live_setup = ram_save_setup,
2665 .save_live_iterate = ram_save_iterate,
763c906b 2666 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 2667 .save_live_complete_precopy = ram_save_complete,
56e93d26
JQ
2668 .save_live_pending = ram_save_pending,
2669 .load_state = ram_load,
6ad2a215 2670 .cleanup = ram_migration_cleanup,
56e93d26
JQ
2671};
2672
2673void ram_mig_init(void)
2674{
2675 qemu_mutex_init(&XBZRLE.lock);
2676 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2677}