]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
docs: sync pci-ids.txt
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
1393a485 28#include "qemu/osdep.h"
33c11879
PB
29#include "qemu-common.h"
30#include "cpu.h"
56e93d26 31#include <zlib.h>
4addcd4f 32#include "qapi-event.h"
f348b6d1 33#include "qemu/cutils.h"
56e93d26
JQ
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
7205c9ec
JQ
36#include "qemu/timer.h"
37#include "qemu/main-loop.h"
56e93d26 38#include "migration/migration.h"
e0b266f0 39#include "migration/postcopy-ram.h"
56e93d26
JQ
40#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
56e93d26 42#include "qemu/error-report.h"
56e93d26 43#include "trace.h"
56e93d26 44#include "exec/ram_addr.h"
56e93d26 45#include "qemu/rcu_queue.h"
a91246c9 46#include "migration/colo.h"
56e93d26
JQ
47
48#ifdef DEBUG_MIGRATION_RAM
49#define DPRINTF(fmt, ...) \
50 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
51#else
52#define DPRINTF(fmt, ...) \
53 do { } while (0)
54#endif
55
56e93d26 56static int dirty_rate_high_cnt;
56e93d26
JQ
57
58static uint64_t bitmap_sync_count;
59
60/***********************************************************/
61/* ram save/restore */
62
63#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
64#define RAM_SAVE_FLAG_COMPRESS 0x02
65#define RAM_SAVE_FLAG_MEM_SIZE 0x04
66#define RAM_SAVE_FLAG_PAGE 0x08
67#define RAM_SAVE_FLAG_EOS 0x10
68#define RAM_SAVE_FLAG_CONTINUE 0x20
69#define RAM_SAVE_FLAG_XBZRLE 0x40
70/* 0x80 is reserved in migration.h start with 0x100 next */
71#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
72
adb65dec 73static uint8_t *ZERO_TARGET_PAGE;
56e93d26
JQ
74
75static inline bool is_zero_range(uint8_t *p, uint64_t size)
76{
a1febc49 77 return buffer_is_zero(p, size);
56e93d26
JQ
78}
79
80/* struct contains XBZRLE cache and a static page
81 used by the compression */
82static struct {
83 /* buffer used for XBZRLE encoding */
84 uint8_t *encoded_buf;
85 /* buffer for storing page content */
86 uint8_t *current_buf;
87 /* Cache for XBZRLE, Protected by lock. */
88 PageCache *cache;
89 QemuMutex lock;
90} XBZRLE;
91
92/* buffer used for XBZRLE decoding */
93static uint8_t *xbzrle_decoded_buf;
94
95static void XBZRLE_cache_lock(void)
96{
97 if (migrate_use_xbzrle())
98 qemu_mutex_lock(&XBZRLE.lock);
99}
100
101static void XBZRLE_cache_unlock(void)
102{
103 if (migrate_use_xbzrle())
104 qemu_mutex_unlock(&XBZRLE.lock);
105}
106
107/*
108 * called from qmp_migrate_set_cache_size in main thread, possibly while
109 * a migration is in progress.
110 * A running migration maybe using the cache and might finish during this
111 * call, hence changes to the cache are protected by XBZRLE.lock().
112 */
113int64_t xbzrle_cache_resize(int64_t new_size)
114{
115 PageCache *new_cache;
116 int64_t ret;
117
118 if (new_size < TARGET_PAGE_SIZE) {
119 return -1;
120 }
121
122 XBZRLE_cache_lock();
123
124 if (XBZRLE.cache != NULL) {
125 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
126 goto out_new_size;
127 }
128 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
129 TARGET_PAGE_SIZE);
130 if (!new_cache) {
131 error_report("Error creating cache");
132 ret = -1;
133 goto out;
134 }
135
136 cache_fini(XBZRLE.cache);
137 XBZRLE.cache = new_cache;
138 }
139
140out_new_size:
141 ret = pow2floor(new_size);
142out:
143 XBZRLE_cache_unlock();
144 return ret;
145}
146
147/* accounting for migration statistics */
148typedef struct AccountingInfo {
149 uint64_t dup_pages;
150 uint64_t skipped_pages;
151 uint64_t norm_pages;
152 uint64_t iterations;
153 uint64_t xbzrle_bytes;
154 uint64_t xbzrle_pages;
155 uint64_t xbzrle_cache_miss;
156 double xbzrle_cache_miss_rate;
157 uint64_t xbzrle_overflows;
158} AccountingInfo;
159
160static AccountingInfo acct_info;
161
162static void acct_clear(void)
163{
164 memset(&acct_info, 0, sizeof(acct_info));
165}
166
167uint64_t dup_mig_bytes_transferred(void)
168{
169 return acct_info.dup_pages * TARGET_PAGE_SIZE;
170}
171
172uint64_t dup_mig_pages_transferred(void)
173{
174 return acct_info.dup_pages;
175}
176
177uint64_t skipped_mig_bytes_transferred(void)
178{
179 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
180}
181
182uint64_t skipped_mig_pages_transferred(void)
183{
184 return acct_info.skipped_pages;
185}
186
187uint64_t norm_mig_bytes_transferred(void)
188{
189 return acct_info.norm_pages * TARGET_PAGE_SIZE;
190}
191
192uint64_t norm_mig_pages_transferred(void)
193{
194 return acct_info.norm_pages;
195}
196
197uint64_t xbzrle_mig_bytes_transferred(void)
198{
199 return acct_info.xbzrle_bytes;
200}
201
202uint64_t xbzrle_mig_pages_transferred(void)
203{
204 return acct_info.xbzrle_pages;
205}
206
207uint64_t xbzrle_mig_pages_cache_miss(void)
208{
209 return acct_info.xbzrle_cache_miss;
210}
211
212double xbzrle_mig_cache_miss_rate(void)
213{
214 return acct_info.xbzrle_cache_miss_rate;
215}
216
217uint64_t xbzrle_mig_pages_overflow(void)
218{
219 return acct_info.xbzrle_overflows;
220}
221
222/* This is the last block that we have visited serching for dirty pages
223 */
224static RAMBlock *last_seen_block;
225/* This is the last block from where we have sent data */
226static RAMBlock *last_sent_block;
227static ram_addr_t last_offset;
dd631697 228static QemuMutex migration_bitmap_mutex;
56e93d26
JQ
229static uint64_t migration_dirty_pages;
230static uint32_t last_version;
231static bool ram_bulk_stage;
232
b8fb8cb7
DDAG
233/* used by the search for pages to send */
234struct PageSearchStatus {
235 /* Current block being searched */
236 RAMBlock *block;
237 /* Current offset to search from */
238 ram_addr_t offset;
239 /* Set once we wrap around */
240 bool complete_round;
241};
242typedef struct PageSearchStatus PageSearchStatus;
243
60be6340
DL
244static struct BitmapRcu {
245 struct rcu_head rcu;
f3f491fc 246 /* Main migration bitmap */
60be6340 247 unsigned long *bmap;
f3f491fc
DDAG
248 /* bitmap of pages that haven't been sent even once
249 * only maintained and used in postcopy at the moment
250 * where it's used to send the dirtymap at the start
251 * of the postcopy phase
252 */
253 unsigned long *unsentmap;
60be6340
DL
254} *migration_bitmap_rcu;
255
56e93d26 256struct CompressParam {
56e93d26 257 bool done;
90e56fb4 258 bool quit;
56e93d26
JQ
259 QEMUFile *file;
260 QemuMutex mutex;
261 QemuCond cond;
262 RAMBlock *block;
263 ram_addr_t offset;
264};
265typedef struct CompressParam CompressParam;
266
267struct DecompressParam {
73a8912b 268 bool done;
90e56fb4 269 bool quit;
56e93d26
JQ
270 QemuMutex mutex;
271 QemuCond cond;
272 void *des;
d341d9f3 273 uint8_t *compbuf;
56e93d26
JQ
274 int len;
275};
276typedef struct DecompressParam DecompressParam;
277
278static CompressParam *comp_param;
279static QemuThread *compress_threads;
280/* comp_done_cond is used to wake up the migration thread when
281 * one of the compression threads has finished the compression.
282 * comp_done_lock is used to co-work with comp_done_cond.
283 */
0d9f9a5c
LL
284static QemuMutex comp_done_lock;
285static QemuCond comp_done_cond;
56e93d26
JQ
286/* The empty QEMUFileOps will be used by file in CompressParam */
287static const QEMUFileOps empty_ops = { };
288
289static bool compression_switch;
56e93d26
JQ
290static DecompressParam *decomp_param;
291static QemuThread *decompress_threads;
73a8912b
LL
292static QemuMutex decomp_done_lock;
293static QemuCond decomp_done_cond;
56e93d26 294
a7a9a88f
LL
295static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
296 ram_addr_t offset);
56e93d26
JQ
297
298static void *do_data_compress(void *opaque)
299{
300 CompressParam *param = opaque;
a7a9a88f
LL
301 RAMBlock *block;
302 ram_addr_t offset;
56e93d26 303
a7a9a88f 304 qemu_mutex_lock(&param->mutex);
90e56fb4 305 while (!param->quit) {
a7a9a88f
LL
306 if (param->block) {
307 block = param->block;
308 offset = param->offset;
309 param->block = NULL;
310 qemu_mutex_unlock(&param->mutex);
311
312 do_compress_ram_page(param->file, block, offset);
313
0d9f9a5c 314 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 315 param->done = true;
0d9f9a5c
LL
316 qemu_cond_signal(&comp_done_cond);
317 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
318
319 qemu_mutex_lock(&param->mutex);
320 } else {
56e93d26
JQ
321 qemu_cond_wait(&param->cond, &param->mutex);
322 }
56e93d26 323 }
a7a9a88f 324 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
325
326 return NULL;
327}
328
329static inline void terminate_compression_threads(void)
330{
331 int idx, thread_count;
332
333 thread_count = migrate_compress_threads();
56e93d26
JQ
334 for (idx = 0; idx < thread_count; idx++) {
335 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 336 comp_param[idx].quit = true;
56e93d26
JQ
337 qemu_cond_signal(&comp_param[idx].cond);
338 qemu_mutex_unlock(&comp_param[idx].mutex);
339 }
340}
341
342void migrate_compress_threads_join(void)
343{
344 int i, thread_count;
345
346 if (!migrate_use_compression()) {
347 return;
348 }
349 terminate_compression_threads();
350 thread_count = migrate_compress_threads();
351 for (i = 0; i < thread_count; i++) {
352 qemu_thread_join(compress_threads + i);
353 qemu_fclose(comp_param[i].file);
354 qemu_mutex_destroy(&comp_param[i].mutex);
355 qemu_cond_destroy(&comp_param[i].cond);
356 }
0d9f9a5c
LL
357 qemu_mutex_destroy(&comp_done_lock);
358 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
359 g_free(compress_threads);
360 g_free(comp_param);
56e93d26
JQ
361 compress_threads = NULL;
362 comp_param = NULL;
56e93d26
JQ
363}
364
365void migrate_compress_threads_create(void)
366{
367 int i, thread_count;
368
369 if (!migrate_use_compression()) {
370 return;
371 }
56e93d26
JQ
372 compression_switch = true;
373 thread_count = migrate_compress_threads();
374 compress_threads = g_new0(QemuThread, thread_count);
375 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
376 qemu_cond_init(&comp_done_cond);
377 qemu_mutex_init(&comp_done_lock);
56e93d26 378 for (i = 0; i < thread_count; i++) {
e110aa91
C
379 /* comp_param[i].file is just used as a dummy buffer to save data,
380 * set its ops to empty.
56e93d26
JQ
381 */
382 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
383 comp_param[i].done = true;
90e56fb4 384 comp_param[i].quit = false;
56e93d26
JQ
385 qemu_mutex_init(&comp_param[i].mutex);
386 qemu_cond_init(&comp_param[i].cond);
387 qemu_thread_create(compress_threads + i, "compress",
388 do_data_compress, comp_param + i,
389 QEMU_THREAD_JOINABLE);
390 }
391}
392
393/**
394 * save_page_header: Write page header to wire
395 *
396 * If this is the 1st block, it also writes the block identification
397 *
398 * Returns: Number of bytes written
399 *
400 * @f: QEMUFile where to send the data
401 * @block: block that contains the page we want to send
402 * @offset: offset inside the block for the page
403 * in the lower bits, it contains flags
404 */
405static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
406{
9f5f380b 407 size_t size, len;
56e93d26
JQ
408
409 qemu_put_be64(f, offset);
410 size = 8;
411
412 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b
LL
413 len = strlen(block->idstr);
414 qemu_put_byte(f, len);
415 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
416 size += 1 + len;
56e93d26
JQ
417 }
418 return size;
419}
420
070afca2
JH
421/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
422 * If guest dirty memory rate is reduced below the rate at which we can
423 * transfer pages to the destination then we should be able to complete
424 * migration. Some workloads dirty memory way too fast and will not effectively
425 * converge, even with auto-converge.
426 */
427static void mig_throttle_guest_down(void)
428{
429 MigrationState *s = migrate_get_current();
2594f56d
DB
430 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
431 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
432
433 /* We have not started throttling yet. Let's start it. */
434 if (!cpu_throttle_active()) {
435 cpu_throttle_set(pct_initial);
436 } else {
437 /* Throttling already on, just increase the rate */
438 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
439 }
440}
441
56e93d26
JQ
442/* Update the xbzrle cache to reflect a page that's been sent as all 0.
443 * The important thing is that a stale (not-yet-0'd) page be replaced
444 * by the new data.
445 * As a bonus, if the page wasn't in the cache it gets added so that
446 * when a small write is made into the 0'd page it gets XBZRLE sent
447 */
448static void xbzrle_cache_zero_page(ram_addr_t current_addr)
449{
450 if (ram_bulk_stage || !migrate_use_xbzrle()) {
451 return;
452 }
453
454 /* We don't care if this fails to allocate a new cache page
455 * as long as it updated an old one */
456 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
457 bitmap_sync_count);
458}
459
460#define ENCODING_FLAG_XBZRLE 0x1
461
462/**
463 * save_xbzrle_page: compress and send current page
464 *
465 * Returns: 1 means that we wrote the page
466 * 0 means that page is identical to the one already sent
467 * -1 means that xbzrle would be longer than normal
468 *
469 * @f: QEMUFile where to send the data
470 * @current_data:
471 * @current_addr:
472 * @block: block that contains the page we want to send
473 * @offset: offset inside the block for the page
474 * @last_stage: if we are at the completion stage
475 * @bytes_transferred: increase it with the number of transferred bytes
476 */
477static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
478 ram_addr_t current_addr, RAMBlock *block,
479 ram_addr_t offset, bool last_stage,
480 uint64_t *bytes_transferred)
481{
482 int encoded_len = 0, bytes_xbzrle;
483 uint8_t *prev_cached_page;
484
485 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
486 acct_info.xbzrle_cache_miss++;
487 if (!last_stage) {
488 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
489 bitmap_sync_count) == -1) {
490 return -1;
491 } else {
492 /* update *current_data when the page has been
493 inserted into cache */
494 *current_data = get_cached_data(XBZRLE.cache, current_addr);
495 }
496 }
497 return -1;
498 }
499
500 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
501
502 /* save current buffer into memory */
503 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
504
505 /* XBZRLE encoding (if there is no overflow) */
506 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
507 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
508 TARGET_PAGE_SIZE);
509 if (encoded_len == 0) {
510 DPRINTF("Skipping unmodified page\n");
511 return 0;
512 } else if (encoded_len == -1) {
513 DPRINTF("Overflow\n");
514 acct_info.xbzrle_overflows++;
515 /* update data in the cache */
516 if (!last_stage) {
517 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
518 *current_data = prev_cached_page;
519 }
520 return -1;
521 }
522
523 /* we need to update the data in the cache, in order to get the same data */
524 if (!last_stage) {
525 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
526 }
527
528 /* Send XBZRLE based compressed page */
529 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
530 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
531 qemu_put_be16(f, encoded_len);
532 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
533 bytes_xbzrle += encoded_len + 1 + 2;
534 acct_info.xbzrle_pages++;
535 acct_info.xbzrle_bytes += bytes_xbzrle;
536 *bytes_transferred += bytes_xbzrle;
537
538 return 1;
539}
540
f3f491fc
DDAG
541/* Called with rcu_read_lock() to protect migration_bitmap
542 * rb: The RAMBlock to search for dirty pages in
543 * start: Start address (typically so we can continue from previous page)
544 * ram_addr_abs: Pointer into which to store the address of the dirty page
545 * within the global ram_addr space
546 *
547 * Returns: byte offset within memory region of the start of a dirty page
548 */
56e93d26 549static inline
a82d593b
DDAG
550ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
551 ram_addr_t start,
552 ram_addr_t *ram_addr_abs)
56e93d26 553{
2f68e399 554 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
56e93d26 555 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
2f68e399
DDAG
556 uint64_t rb_size = rb->used_length;
557 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
2ff64038 558 unsigned long *bitmap;
56e93d26
JQ
559
560 unsigned long next;
561
60be6340 562 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
56e93d26
JQ
563 if (ram_bulk_stage && nr > base) {
564 next = nr + 1;
565 } else {
2ff64038 566 next = find_next_bit(bitmap, size, nr);
56e93d26
JQ
567 }
568
f3f491fc 569 *ram_addr_abs = next << TARGET_PAGE_BITS;
56e93d26
JQ
570 return (next - base) << TARGET_PAGE_BITS;
571}
572
a82d593b
DDAG
573static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
574{
575 bool ret;
576 int nr = addr >> TARGET_PAGE_BITS;
577 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
578
579 ret = test_and_clear_bit(nr, bitmap);
580
581 if (ret) {
582 migration_dirty_pages--;
583 }
584 return ret;
585}
586
56e93d26
JQ
587static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
588{
2ff64038 589 unsigned long *bitmap;
60be6340 590 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
56e93d26 591 migration_dirty_pages +=
2ff64038 592 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
56e93d26
JQ
593}
594
56e93d26
JQ
595/* Fix me: there are too many global variables used in migration process. */
596static int64_t start_time;
597static int64_t bytes_xfer_prev;
598static int64_t num_dirty_pages_period;
599static uint64_t xbzrle_cache_miss_prev;
600static uint64_t iterations_prev;
601
602static void migration_bitmap_sync_init(void)
603{
604 start_time = 0;
605 bytes_xfer_prev = 0;
606 num_dirty_pages_period = 0;
607 xbzrle_cache_miss_prev = 0;
608 iterations_prev = 0;
609}
610
56e93d26
JQ
611static void migration_bitmap_sync(void)
612{
613 RAMBlock *block;
614 uint64_t num_dirty_pages_init = migration_dirty_pages;
615 MigrationState *s = migrate_get_current();
616 int64_t end_time;
617 int64_t bytes_xfer_now;
618
619 bitmap_sync_count++;
620
621 if (!bytes_xfer_prev) {
622 bytes_xfer_prev = ram_bytes_transferred();
623 }
624
625 if (!start_time) {
626 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
627 }
628
629 trace_migration_bitmap_sync_start();
9c1f8f44 630 memory_global_dirty_log_sync();
56e93d26 631
dd631697 632 qemu_mutex_lock(&migration_bitmap_mutex);
56e93d26
JQ
633 rcu_read_lock();
634 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2f68e399 635 migration_bitmap_sync_range(block->offset, block->used_length);
56e93d26
JQ
636 }
637 rcu_read_unlock();
dd631697 638 qemu_mutex_unlock(&migration_bitmap_mutex);
56e93d26
JQ
639
640 trace_migration_bitmap_sync_end(migration_dirty_pages
641 - num_dirty_pages_init);
642 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
643 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
644
645 /* more than 1 second = 1000 millisecons */
646 if (end_time > start_time + 1000) {
647 if (migrate_auto_converge()) {
648 /* The following detection logic can be refined later. For now:
649 Check to see if the dirtied bytes is 50% more than the approx.
650 amount of bytes that just got transferred since the last time we
070afca2
JH
651 were in this routine. If that happens twice, start or increase
652 throttling */
56e93d26 653 bytes_xfer_now = ram_bytes_transferred();
070afca2 654
56e93d26
JQ
655 if (s->dirty_pages_rate &&
656 (num_dirty_pages_period * TARGET_PAGE_SIZE >
657 (bytes_xfer_now - bytes_xfer_prev)/2) &&
070afca2 658 (dirty_rate_high_cnt++ >= 2)) {
56e93d26 659 trace_migration_throttle();
56e93d26 660 dirty_rate_high_cnt = 0;
070afca2 661 mig_throttle_guest_down();
56e93d26
JQ
662 }
663 bytes_xfer_prev = bytes_xfer_now;
56e93d26 664 }
070afca2 665
56e93d26
JQ
666 if (migrate_use_xbzrle()) {
667 if (iterations_prev != acct_info.iterations) {
668 acct_info.xbzrle_cache_miss_rate =
669 (double)(acct_info.xbzrle_cache_miss -
670 xbzrle_cache_miss_prev) /
671 (acct_info.iterations - iterations_prev);
672 }
673 iterations_prev = acct_info.iterations;
674 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
675 }
676 s->dirty_pages_rate = num_dirty_pages_period * 1000
677 / (end_time - start_time);
678 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
679 start_time = end_time;
680 num_dirty_pages_period = 0;
681 }
682 s->dirty_sync_count = bitmap_sync_count;
4addcd4f
DDAG
683 if (migrate_use_events()) {
684 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
685 }
56e93d26
JQ
686}
687
688/**
689 * save_zero_page: Send the zero page to the stream
690 *
691 * Returns: Number of pages written.
692 *
693 * @f: QEMUFile where to send the data
694 * @block: block that contains the page we want to send
695 * @offset: offset inside the block for the page
696 * @p: pointer to the page
697 * @bytes_transferred: increase it with the number of transferred bytes
698 */
699static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
700 uint8_t *p, uint64_t *bytes_transferred)
701{
702 int pages = -1;
703
704 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
705 acct_info.dup_pages++;
706 *bytes_transferred += save_page_header(f, block,
707 offset | RAM_SAVE_FLAG_COMPRESS);
708 qemu_put_byte(f, 0);
709 *bytes_transferred += 1;
710 pages = 1;
711 }
712
713 return pages;
714}
715
716/**
717 * ram_save_page: Send the given page to the stream
718 *
719 * Returns: Number of pages written.
3fd3c4b3
DDAG
720 * < 0 - error
721 * >=0 - Number of pages written - this might legally be 0
722 * if xbzrle noticed the page was the same.
56e93d26
JQ
723 *
724 * @f: QEMUFile where to send the data
725 * @block: block that contains the page we want to send
726 * @offset: offset inside the block for the page
727 * @last_stage: if we are at the completion stage
728 * @bytes_transferred: increase it with the number of transferred bytes
729 */
a08f6890 730static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
56e93d26
JQ
731 bool last_stage, uint64_t *bytes_transferred)
732{
733 int pages = -1;
734 uint64_t bytes_xmit;
735 ram_addr_t current_addr;
56e93d26
JQ
736 uint8_t *p;
737 int ret;
738 bool send_async = true;
a08f6890
HZ
739 RAMBlock *block = pss->block;
740 ram_addr_t offset = pss->offset;
56e93d26 741
2f68e399 742 p = block->host + offset;
56e93d26
JQ
743
744 /* In doubt sent page as normal */
745 bytes_xmit = 0;
746 ret = ram_control_save_page(f, block->offset,
747 offset, TARGET_PAGE_SIZE, &bytes_xmit);
748 if (bytes_xmit) {
749 *bytes_transferred += bytes_xmit;
750 pages = 1;
751 }
752
753 XBZRLE_cache_lock();
754
755 current_addr = block->offset + offset;
756
757 if (block == last_sent_block) {
758 offset |= RAM_SAVE_FLAG_CONTINUE;
759 }
760 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
761 if (ret != RAM_SAVE_CONTROL_DELAYED) {
762 if (bytes_xmit > 0) {
763 acct_info.norm_pages++;
764 } else if (bytes_xmit == 0) {
765 acct_info.dup_pages++;
766 }
767 }
768 } else {
769 pages = save_zero_page(f, block, offset, p, bytes_transferred);
770 if (pages > 0) {
771 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
772 * page would be stale
773 */
774 xbzrle_cache_zero_page(current_addr);
2ebeaec0
DDAG
775 } else if (!ram_bulk_stage &&
776 !migration_in_postcopy(migrate_get_current()) &&
777 migrate_use_xbzrle()) {
56e93d26
JQ
778 pages = save_xbzrle_page(f, &p, current_addr, block,
779 offset, last_stage, bytes_transferred);
780 if (!last_stage) {
781 /* Can't send this cached data async, since the cache page
782 * might get updated before it gets to the wire
783 */
784 send_async = false;
785 }
786 }
787 }
788
789 /* XBZRLE overflow or normal page */
790 if (pages == -1) {
791 *bytes_transferred += save_page_header(f, block,
792 offset | RAM_SAVE_FLAG_PAGE);
793 if (send_async) {
794 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
795 } else {
796 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
797 }
798 *bytes_transferred += TARGET_PAGE_SIZE;
799 pages = 1;
800 acct_info.norm_pages++;
801 }
802
803 XBZRLE_cache_unlock();
804
805 return pages;
806}
807
a7a9a88f
LL
808static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
809 ram_addr_t offset)
56e93d26
JQ
810{
811 int bytes_sent, blen;
a7a9a88f 812 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 813
a7a9a88f 814 bytes_sent = save_page_header(f, block, offset |
56e93d26 815 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 816 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 817 migrate_compress_level());
b3be2896
LL
818 if (blen < 0) {
819 bytes_sent = 0;
820 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
821 error_report("compressed data failed!");
822 } else {
823 bytes_sent += blen;
824 }
56e93d26
JQ
825
826 return bytes_sent;
827}
828
56e93d26
JQ
829static uint64_t bytes_transferred;
830
831static void flush_compressed_data(QEMUFile *f)
832{
833 int idx, len, thread_count;
834
835 if (!migrate_use_compression()) {
836 return;
837 }
838 thread_count = migrate_compress_threads();
a7a9a88f 839
0d9f9a5c 840 qemu_mutex_lock(&comp_done_lock);
56e93d26 841 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 842 while (!comp_param[idx].done) {
0d9f9a5c 843 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 844 }
a7a9a88f 845 }
0d9f9a5c 846 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
847
848 for (idx = 0; idx < thread_count; idx++) {
849 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 850 if (!comp_param[idx].quit) {
56e93d26
JQ
851 len = qemu_put_qemu_file(f, comp_param[idx].file);
852 bytes_transferred += len;
853 }
a7a9a88f 854 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
855 }
856}
857
858static inline void set_compress_params(CompressParam *param, RAMBlock *block,
859 ram_addr_t offset)
860{
861 param->block = block;
862 param->offset = offset;
863}
864
865static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
866 ram_addr_t offset,
867 uint64_t *bytes_transferred)
868{
869 int idx, thread_count, bytes_xmit = -1, pages = -1;
870
871 thread_count = migrate_compress_threads();
0d9f9a5c 872 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
873 while (true) {
874 for (idx = 0; idx < thread_count; idx++) {
875 if (comp_param[idx].done) {
a7a9a88f 876 comp_param[idx].done = false;
56e93d26 877 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
a7a9a88f 878 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 879 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
880 qemu_cond_signal(&comp_param[idx].cond);
881 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
882 pages = 1;
883 acct_info.norm_pages++;
884 *bytes_transferred += bytes_xmit;
885 break;
886 }
887 }
888 if (pages > 0) {
889 break;
890 } else {
0d9f9a5c 891 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
892 }
893 }
0d9f9a5c 894 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
895
896 return pages;
897}
898
899/**
900 * ram_save_compressed_page: compress the given page and send it to the stream
901 *
902 * Returns: Number of pages written.
903 *
904 * @f: QEMUFile where to send the data
905 * @block: block that contains the page we want to send
906 * @offset: offset inside the block for the page
907 * @last_stage: if we are at the completion stage
908 * @bytes_transferred: increase it with the number of transferred bytes
909 */
a08f6890
HZ
910static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
911 bool last_stage,
56e93d26
JQ
912 uint64_t *bytes_transferred)
913{
914 int pages = -1;
fc50438e 915 uint64_t bytes_xmit = 0;
56e93d26 916 uint8_t *p;
fc50438e 917 int ret, blen;
a08f6890
HZ
918 RAMBlock *block = pss->block;
919 ram_addr_t offset = pss->offset;
56e93d26 920
2f68e399 921 p = block->host + offset;
56e93d26 922
56e93d26
JQ
923 ret = ram_control_save_page(f, block->offset,
924 offset, TARGET_PAGE_SIZE, &bytes_xmit);
925 if (bytes_xmit) {
926 *bytes_transferred += bytes_xmit;
927 pages = 1;
928 }
56e93d26
JQ
929 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
930 if (ret != RAM_SAVE_CONTROL_DELAYED) {
931 if (bytes_xmit > 0) {
932 acct_info.norm_pages++;
933 } else if (bytes_xmit == 0) {
934 acct_info.dup_pages++;
935 }
936 }
937 } else {
938 /* When starting the process of a new block, the first page of
939 * the block should be sent out before other pages in the same
940 * block, and all the pages in last block should have been sent
941 * out, keeping this order is important, because the 'cont' flag
942 * is used to avoid resending the block name.
943 */
944 if (block != last_sent_block) {
945 flush_compressed_data(f);
946 pages = save_zero_page(f, block, offset, p, bytes_transferred);
947 if (pages == -1) {
fc50438e
LL
948 /* Make sure the first page is sent out before other pages */
949 bytes_xmit = save_page_header(f, block, offset |
950 RAM_SAVE_FLAG_COMPRESS_PAGE);
951 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
952 migrate_compress_level());
953 if (blen > 0) {
954 *bytes_transferred += bytes_xmit + blen;
b3be2896 955 acct_info.norm_pages++;
b3be2896 956 pages = 1;
fc50438e
LL
957 } else {
958 qemu_file_set_error(f, blen);
959 error_report("compressed data failed!");
b3be2896 960 }
56e93d26
JQ
961 }
962 } else {
fc50438e 963 offset |= RAM_SAVE_FLAG_CONTINUE;
56e93d26
JQ
964 pages = save_zero_page(f, block, offset, p, bytes_transferred);
965 if (pages == -1) {
966 pages = compress_page_with_multi_thread(f, block, offset,
967 bytes_transferred);
968 }
969 }
970 }
971
972 return pages;
973}
974
b9e60928
DDAG
975/*
976 * Find the next dirty page and update any state associated with
977 * the search process.
978 *
979 * Returns: True if a page is found
980 *
981 * @f: Current migration stream.
982 * @pss: Data about the state of the current dirty page scan.
983 * @*again: Set to false if the search has scanned the whole of RAM
e0b266f0
DDAG
984 * *ram_addr_abs: Pointer into which to store the address of the dirty page
985 * within the global ram_addr space
b9e60928
DDAG
986 */
987static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
f3f491fc 988 bool *again, ram_addr_t *ram_addr_abs)
b9e60928 989{
a82d593b
DDAG
990 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
991 ram_addr_abs);
b9e60928
DDAG
992 if (pss->complete_round && pss->block == last_seen_block &&
993 pss->offset >= last_offset) {
994 /*
995 * We've been once around the RAM and haven't found anything.
996 * Give up.
997 */
998 *again = false;
999 return false;
1000 }
1001 if (pss->offset >= pss->block->used_length) {
1002 /* Didn't find anything in this RAM Block */
1003 pss->offset = 0;
1004 pss->block = QLIST_NEXT_RCU(pss->block, next);
1005 if (!pss->block) {
1006 /* Hit the end of the list */
1007 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1008 /* Flag that we've looped */
1009 pss->complete_round = true;
1010 ram_bulk_stage = false;
1011 if (migrate_use_xbzrle()) {
1012 /* If xbzrle is on, stop using the data compression at this
1013 * point. In theory, xbzrle can do better than compression.
1014 */
1015 flush_compressed_data(f);
1016 compression_switch = false;
1017 }
1018 }
1019 /* Didn't find anything this time, but try again on the new block */
1020 *again = true;
1021 return false;
1022 } else {
1023 /* Can go around again, but... */
1024 *again = true;
1025 /* We've found something so probably don't need to */
1026 return true;
1027 }
1028}
1029
a82d593b
DDAG
1030/*
1031 * Helper for 'get_queued_page' - gets a page off the queue
1032 * ms: MigrationState in
1033 * *offset: Used to return the offset within the RAMBlock
1034 * ram_addr_abs: global offset in the dirty/sent bitmaps
1035 *
1036 * Returns: block (or NULL if none available)
1037 */
1038static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1039 ram_addr_t *ram_addr_abs)
1040{
1041 RAMBlock *block = NULL;
1042
1043 qemu_mutex_lock(&ms->src_page_req_mutex);
1044 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1045 struct MigrationSrcPageRequest *entry =
1046 QSIMPLEQ_FIRST(&ms->src_page_requests);
1047 block = entry->rb;
1048 *offset = entry->offset;
1049 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1050 TARGET_PAGE_MASK;
1051
1052 if (entry->len > TARGET_PAGE_SIZE) {
1053 entry->len -= TARGET_PAGE_SIZE;
1054 entry->offset += TARGET_PAGE_SIZE;
1055 } else {
1056 memory_region_unref(block->mr);
1057 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1058 g_free(entry);
1059 }
1060 }
1061 qemu_mutex_unlock(&ms->src_page_req_mutex);
1062
1063 return block;
1064}
1065
1066/*
1067 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1068 * that are already sent (!dirty)
1069 *
1070 * ms: MigrationState in
1071 * pss: PageSearchStatus structure updated with found block/offset
1072 * ram_addr_abs: global offset in the dirty/sent bitmaps
1073 *
1074 * Returns: true if a queued page is found
1075 */
1076static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1077 ram_addr_t *ram_addr_abs)
1078{
1079 RAMBlock *block;
1080 ram_addr_t offset;
1081 bool dirty;
1082
1083 do {
1084 block = unqueue_page(ms, &offset, ram_addr_abs);
1085 /*
1086 * We're sending this page, and since it's postcopy nothing else
1087 * will dirty it, and we must make sure it doesn't get sent again
1088 * even if this queue request was received after the background
1089 * search already sent it.
1090 */
1091 if (block) {
1092 unsigned long *bitmap;
1093 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1094 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1095 if (!dirty) {
1096 trace_get_queued_page_not_dirty(
1097 block->idstr, (uint64_t)offset,
1098 (uint64_t)*ram_addr_abs,
1099 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1100 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1101 } else {
1102 trace_get_queued_page(block->idstr,
1103 (uint64_t)offset,
1104 (uint64_t)*ram_addr_abs);
1105 }
1106 }
1107
1108 } while (block && !dirty);
1109
1110 if (block) {
1111 /*
1112 * As soon as we start servicing pages out of order, then we have
1113 * to kill the bulk stage, since the bulk stage assumes
1114 * in (migration_bitmap_find_and_reset_dirty) that every page is
1115 * dirty, that's no longer true.
1116 */
1117 ram_bulk_stage = false;
1118
1119 /*
1120 * We want the background search to continue from the queued page
1121 * since the guest is likely to want other pages near to the page
1122 * it just requested.
1123 */
1124 pss->block = block;
1125 pss->offset = offset;
1126 }
1127
1128 return !!block;
1129}
1130
6c595cde
DDAG
1131/**
1132 * flush_page_queue: Flush any remaining pages in the ram request queue
1133 * it should be empty at the end anyway, but in error cases there may be
1134 * some left.
1135 *
1136 * ms: MigrationState
1137 */
1138void flush_page_queue(MigrationState *ms)
1139{
1140 struct MigrationSrcPageRequest *mspr, *next_mspr;
1141 /* This queue generally should be empty - but in the case of a failed
1142 * migration might have some droppings in.
1143 */
1144 rcu_read_lock();
1145 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1146 memory_region_unref(mspr->rb->mr);
1147 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1148 g_free(mspr);
1149 }
1150 rcu_read_unlock();
1151}
1152
1153/**
1154 * Queue the pages for transmission, e.g. a request from postcopy destination
1155 * ms: MigrationStatus in which the queue is held
1156 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1157 * start: Offset from the start of the RAMBlock
1158 * len: Length (in bytes) to send
1159 * Return: 0 on success
1160 */
1161int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1162 ram_addr_t start, ram_addr_t len)
1163{
1164 RAMBlock *ramblock;
1165
d3bf5418 1166 ms->postcopy_requests++;
6c595cde
DDAG
1167 rcu_read_lock();
1168 if (!rbname) {
1169 /* Reuse last RAMBlock */
1170 ramblock = ms->last_req_rb;
1171
1172 if (!ramblock) {
1173 /*
1174 * Shouldn't happen, we can't reuse the last RAMBlock if
1175 * it's the 1st request.
1176 */
1177 error_report("ram_save_queue_pages no previous block");
1178 goto err;
1179 }
1180 } else {
1181 ramblock = qemu_ram_block_by_name(rbname);
1182
1183 if (!ramblock) {
1184 /* We shouldn't be asked for a non-existent RAMBlock */
1185 error_report("ram_save_queue_pages no block '%s'", rbname);
1186 goto err;
1187 }
1188 ms->last_req_rb = ramblock;
1189 }
1190 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1191 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1192 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1193 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1194 __func__, start, len, ramblock->used_length);
1195 goto err;
1196 }
1197
1198 struct MigrationSrcPageRequest *new_entry =
1199 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1200 new_entry->rb = ramblock;
1201 new_entry->offset = start;
1202 new_entry->len = len;
1203
1204 memory_region_ref(ramblock->mr);
1205 qemu_mutex_lock(&ms->src_page_req_mutex);
1206 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1207 qemu_mutex_unlock(&ms->src_page_req_mutex);
1208 rcu_read_unlock();
1209
1210 return 0;
1211
1212err:
1213 rcu_read_unlock();
1214 return -1;
1215}
1216
a82d593b
DDAG
1217/**
1218 * ram_save_target_page: Save one target page
1219 *
1220 *
1221 * @f: QEMUFile where to send the data
1222 * @block: pointer to block that contains the page we want to send
1223 * @offset: offset inside the block for the page;
1224 * @last_stage: if we are at the completion stage
1225 * @bytes_transferred: increase it with the number of transferred bytes
1226 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1227 *
1228 * Returns: Number of pages written.
1229 */
1230static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
a08f6890 1231 PageSearchStatus *pss,
a82d593b
DDAG
1232 bool last_stage,
1233 uint64_t *bytes_transferred,
1234 ram_addr_t dirty_ram_abs)
1235{
1236 int res = 0;
1237
1238 /* Check the pages is dirty and if it is send it */
1239 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1240 unsigned long *unsentmap;
1241 if (compression_switch && migrate_use_compression()) {
a08f6890 1242 res = ram_save_compressed_page(f, pss,
a82d593b
DDAG
1243 last_stage,
1244 bytes_transferred);
1245 } else {
a08f6890 1246 res = ram_save_page(f, pss, last_stage,
a82d593b
DDAG
1247 bytes_transferred);
1248 }
1249
1250 if (res < 0) {
1251 return res;
1252 }
1253 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1254 if (unsentmap) {
1255 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1256 }
3fd3c4b3
DDAG
1257 /* Only update last_sent_block if a block was actually sent; xbzrle
1258 * might have decided the page was identical so didn't bother writing
1259 * to the stream.
1260 */
1261 if (res > 0) {
a08f6890 1262 last_sent_block = pss->block;
3fd3c4b3 1263 }
a82d593b
DDAG
1264 }
1265
1266 return res;
1267}
1268
1269/**
cb8d4c8f 1270 * ram_save_host_page: Starting at *offset send pages up to the end
a82d593b
DDAG
1271 * of the current host page. It's valid for the initial
1272 * offset to point into the middle of a host page
1273 * in which case the remainder of the hostpage is sent.
1274 * Only dirty target pages are sent.
1275 *
1276 * Returns: Number of pages written.
1277 *
1278 * @f: QEMUFile where to send the data
1279 * @block: pointer to block that contains the page we want to send
1280 * @offset: offset inside the block for the page; updated to last target page
1281 * sent
1282 * @last_stage: if we are at the completion stage
1283 * @bytes_transferred: increase it with the number of transferred bytes
1284 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1285 */
a08f6890
HZ
1286static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1287 PageSearchStatus *pss,
1288 bool last_stage,
a82d593b
DDAG
1289 uint64_t *bytes_transferred,
1290 ram_addr_t dirty_ram_abs)
1291{
1292 int tmppages, pages = 0;
1293 do {
a08f6890 1294 tmppages = ram_save_target_page(ms, f, pss, last_stage,
a82d593b
DDAG
1295 bytes_transferred, dirty_ram_abs);
1296 if (tmppages < 0) {
1297 return tmppages;
1298 }
1299
1300 pages += tmppages;
a08f6890 1301 pss->offset += TARGET_PAGE_SIZE;
a82d593b 1302 dirty_ram_abs += TARGET_PAGE_SIZE;
a08f6890 1303 } while (pss->offset & (qemu_host_page_size - 1));
a82d593b
DDAG
1304
1305 /* The offset we leave with is the last one we looked at */
a08f6890 1306 pss->offset -= TARGET_PAGE_SIZE;
a82d593b
DDAG
1307 return pages;
1308}
6c595cde 1309
56e93d26
JQ
1310/**
1311 * ram_find_and_save_block: Finds a dirty page and sends it to f
1312 *
1313 * Called within an RCU critical section.
1314 *
1315 * Returns: The number of pages written
1316 * 0 means no dirty pages
1317 *
1318 * @f: QEMUFile where to send the data
1319 * @last_stage: if we are at the completion stage
1320 * @bytes_transferred: increase it with the number of transferred bytes
a82d593b
DDAG
1321 *
1322 * On systems where host-page-size > target-page-size it will send all the
1323 * pages in a host page that are dirty.
56e93d26
JQ
1324 */
1325
1326static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1327 uint64_t *bytes_transferred)
1328{
b8fb8cb7 1329 PageSearchStatus pss;
a82d593b 1330 MigrationState *ms = migrate_get_current();
56e93d26 1331 int pages = 0;
b9e60928 1332 bool again, found;
f3f491fc
DDAG
1333 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1334 ram_addr_t space */
56e93d26 1335
b8fb8cb7
DDAG
1336 pss.block = last_seen_block;
1337 pss.offset = last_offset;
1338 pss.complete_round = false;
1339
1340 if (!pss.block) {
1341 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1342 }
56e93d26 1343
b9e60928 1344 do {
a82d593b
DDAG
1345 again = true;
1346 found = get_queued_page(ms, &pss, &dirty_ram_abs);
b9e60928 1347
a82d593b
DDAG
1348 if (!found) {
1349 /* priority queue empty, so just search for something dirty */
1350 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1351 }
f3f491fc 1352
a82d593b 1353 if (found) {
a08f6890 1354 pages = ram_save_host_page(ms, f, &pss,
a82d593b
DDAG
1355 last_stage, bytes_transferred,
1356 dirty_ram_abs);
56e93d26 1357 }
b9e60928 1358 } while (!pages && again);
56e93d26 1359
b8fb8cb7
DDAG
1360 last_seen_block = pss.block;
1361 last_offset = pss.offset;
56e93d26
JQ
1362
1363 return pages;
1364}
1365
1366void acct_update_position(QEMUFile *f, size_t size, bool zero)
1367{
1368 uint64_t pages = size / TARGET_PAGE_SIZE;
1369 if (zero) {
1370 acct_info.dup_pages += pages;
1371 } else {
1372 acct_info.norm_pages += pages;
1373 bytes_transferred += size;
1374 qemu_update_position(f, size);
1375 }
1376}
1377
1378static ram_addr_t ram_save_remaining(void)
1379{
1380 return migration_dirty_pages;
1381}
1382
1383uint64_t ram_bytes_remaining(void)
1384{
1385 return ram_save_remaining() * TARGET_PAGE_SIZE;
1386}
1387
1388uint64_t ram_bytes_transferred(void)
1389{
1390 return bytes_transferred;
1391}
1392
1393uint64_t ram_bytes_total(void)
1394{
1395 RAMBlock *block;
1396 uint64_t total = 0;
1397
1398 rcu_read_lock();
1399 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1400 total += block->used_length;
1401 rcu_read_unlock();
1402 return total;
1403}
1404
1405void free_xbzrle_decoded_buf(void)
1406{
1407 g_free(xbzrle_decoded_buf);
1408 xbzrle_decoded_buf = NULL;
1409}
1410
60be6340
DL
1411static void migration_bitmap_free(struct BitmapRcu *bmap)
1412{
1413 g_free(bmap->bmap);
f3f491fc 1414 g_free(bmap->unsentmap);
60be6340
DL
1415 g_free(bmap);
1416}
1417
6ad2a215 1418static void ram_migration_cleanup(void *opaque)
56e93d26 1419{
2ff64038
LZ
1420 /* caller have hold iothread lock or is in a bh, so there is
1421 * no writing race against this migration_bitmap
1422 */
60be6340
DL
1423 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1424 atomic_rcu_set(&migration_bitmap_rcu, NULL);
2ff64038 1425 if (bitmap) {
56e93d26 1426 memory_global_dirty_log_stop();
60be6340 1427 call_rcu(bitmap, migration_bitmap_free, rcu);
56e93d26
JQ
1428 }
1429
1430 XBZRLE_cache_lock();
1431 if (XBZRLE.cache) {
1432 cache_fini(XBZRLE.cache);
1433 g_free(XBZRLE.encoded_buf);
1434 g_free(XBZRLE.current_buf);
adb65dec 1435 g_free(ZERO_TARGET_PAGE);
56e93d26
JQ
1436 XBZRLE.cache = NULL;
1437 XBZRLE.encoded_buf = NULL;
1438 XBZRLE.current_buf = NULL;
1439 }
1440 XBZRLE_cache_unlock();
1441}
1442
56e93d26
JQ
1443static void reset_ram_globals(void)
1444{
1445 last_seen_block = NULL;
1446 last_sent_block = NULL;
1447 last_offset = 0;
1448 last_version = ram_list.version;
1449 ram_bulk_stage = true;
1450}
1451
1452#define MAX_WAIT 50 /* ms, half buffered_file limit */
1453
dd631697
LZ
1454void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1455{
1456 /* called in qemu main thread, so there is
1457 * no writing race against this migration_bitmap
1458 */
60be6340
DL
1459 if (migration_bitmap_rcu) {
1460 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1461 bitmap = g_new(struct BitmapRcu, 1);
1462 bitmap->bmap = bitmap_new(new);
dd631697
LZ
1463
1464 /* prevent migration_bitmap content from being set bit
1465 * by migration_bitmap_sync_range() at the same time.
1466 * it is safe to migration if migration_bitmap is cleared bit
1467 * at the same time.
1468 */
1469 qemu_mutex_lock(&migration_bitmap_mutex);
60be6340
DL
1470 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1471 bitmap_set(bitmap->bmap, old, new - old);
f3f491fc
DDAG
1472
1473 /* We don't have a way to safely extend the sentmap
1474 * with RCU; so mark it as missing, entry to postcopy
1475 * will fail.
1476 */
1477 bitmap->unsentmap = NULL;
1478
60be6340 1479 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
dd631697
LZ
1480 qemu_mutex_unlock(&migration_bitmap_mutex);
1481 migration_dirty_pages += new - old;
60be6340 1482 call_rcu(old_bitmap, migration_bitmap_free, rcu);
dd631697
LZ
1483 }
1484}
56e93d26 1485
4f2e4252
DDAG
1486/*
1487 * 'expected' is the value you expect the bitmap mostly to be full
1488 * of; it won't bother printing lines that are all this value.
1489 * If 'todump' is null the migration bitmap is dumped.
1490 */
1491void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1492{
1493 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1494
1495 int64_t cur;
1496 int64_t linelen = 128;
1497 char linebuf[129];
1498
1499 if (!todump) {
1500 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1501 }
1502
1503 for (cur = 0; cur < ram_pages; cur += linelen) {
1504 int64_t curb;
1505 bool found = false;
1506 /*
1507 * Last line; catch the case where the line length
1508 * is longer than remaining ram
1509 */
1510 if (cur + linelen > ram_pages) {
1511 linelen = ram_pages - cur;
1512 }
1513 for (curb = 0; curb < linelen; curb++) {
1514 bool thisbit = test_bit(cur + curb, todump);
1515 linebuf[curb] = thisbit ? '1' : '.';
1516 found = found || (thisbit != expected);
1517 }
1518 if (found) {
1519 linebuf[curb] = '\0';
1520 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1521 }
1522 }
1523}
1524
e0b266f0
DDAG
1525/* **** functions for postcopy ***** */
1526
1527/*
1528 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1529 * Note: At this point the 'unsentmap' is the processed bitmap combined
1530 * with the dirtymap; so a '1' means it's either dirty or unsent.
1531 * start,length: Indexes into the bitmap for the first bit
1532 * representing the named block and length in target-pages
1533 */
1534static int postcopy_send_discard_bm_ram(MigrationState *ms,
1535 PostcopyDiscardState *pds,
1536 unsigned long start,
1537 unsigned long length)
1538{
1539 unsigned long end = start + length; /* one after the end */
1540 unsigned long current;
1541 unsigned long *unsentmap;
1542
1543 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1544 for (current = start; current < end; ) {
1545 unsigned long one = find_next_bit(unsentmap, end, current);
1546
1547 if (one <= end) {
1548 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1549 unsigned long discard_length;
1550
1551 if (zero >= end) {
1552 discard_length = end - one;
1553 } else {
1554 discard_length = zero - one;
1555 }
d688c62d
DDAG
1556 if (discard_length) {
1557 postcopy_discard_send_range(ms, pds, one, discard_length);
1558 }
e0b266f0
DDAG
1559 current = one + discard_length;
1560 } else {
1561 current = one;
1562 }
1563 }
1564
1565 return 0;
1566}
1567
1568/*
1569 * Utility for the outgoing postcopy code.
1570 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1571 * passing it bitmap indexes and name.
1572 * Returns: 0 on success
1573 * (qemu_ram_foreach_block ends up passing unscaled lengths
1574 * which would mean postcopy code would have to deal with target page)
1575 */
1576static int postcopy_each_ram_send_discard(MigrationState *ms)
1577{
1578 struct RAMBlock *block;
1579 int ret;
1580
1581 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1582 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1583 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1584 first,
1585 block->idstr);
1586
1587 /*
1588 * Postcopy sends chunks of bitmap over the wire, but it
1589 * just needs indexes at this point, avoids it having
1590 * target page specific code.
1591 */
1592 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1593 block->used_length >> TARGET_PAGE_BITS);
1594 postcopy_discard_send_finish(ms, pds);
1595 if (ret) {
1596 return ret;
1597 }
1598 }
1599
1600 return 0;
1601}
1602
99e314eb
DDAG
1603/*
1604 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1605 * the two bitmaps, that are similar, but one is inverted.
1606 *
1607 * We search for runs of target-pages that don't start or end on a
1608 * host page boundary;
1609 * unsent_pass=true: Cleans up partially unsent host pages by searching
1610 * the unsentmap
1611 * unsent_pass=false: Cleans up partially dirty host pages by searching
1612 * the main migration bitmap
1613 *
1614 */
1615static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1616 RAMBlock *block,
1617 PostcopyDiscardState *pds)
1618{
1619 unsigned long *bitmap;
1620 unsigned long *unsentmap;
1621 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1622 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1623 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1624 unsigned long last = first + (len - 1);
1625 unsigned long run_start;
1626
1627 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1628 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1629
1630 if (unsent_pass) {
1631 /* Find a sent page */
1632 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1633 } else {
1634 /* Find a dirty page */
1635 run_start = find_next_bit(bitmap, last + 1, first);
1636 }
1637
1638 while (run_start <= last) {
1639 bool do_fixup = false;
1640 unsigned long fixup_start_addr;
1641 unsigned long host_offset;
1642
1643 /*
1644 * If the start of this run of pages is in the middle of a host
1645 * page, then we need to fixup this host page.
1646 */
1647 host_offset = run_start % host_ratio;
1648 if (host_offset) {
1649 do_fixup = true;
1650 run_start -= host_offset;
1651 fixup_start_addr = run_start;
1652 /* For the next pass */
1653 run_start = run_start + host_ratio;
1654 } else {
1655 /* Find the end of this run */
1656 unsigned long run_end;
1657 if (unsent_pass) {
1658 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1659 } else {
1660 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1661 }
1662 /*
1663 * If the end isn't at the start of a host page, then the
1664 * run doesn't finish at the end of a host page
1665 * and we need to discard.
1666 */
1667 host_offset = run_end % host_ratio;
1668 if (host_offset) {
1669 do_fixup = true;
1670 fixup_start_addr = run_end - host_offset;
1671 /*
1672 * This host page has gone, the next loop iteration starts
1673 * from after the fixup
1674 */
1675 run_start = fixup_start_addr + host_ratio;
1676 } else {
1677 /*
1678 * No discards on this iteration, next loop starts from
1679 * next sent/dirty page
1680 */
1681 run_start = run_end + 1;
1682 }
1683 }
1684
1685 if (do_fixup) {
1686 unsigned long page;
1687
1688 /* Tell the destination to discard this page */
1689 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1690 /* For the unsent_pass we:
1691 * discard partially sent pages
1692 * For the !unsent_pass (dirty) we:
1693 * discard partially dirty pages that were sent
1694 * (any partially sent pages were already discarded
1695 * by the previous unsent_pass)
1696 */
1697 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1698 host_ratio);
1699 }
1700
1701 /* Clean up the bitmap */
1702 for (page = fixup_start_addr;
1703 page < fixup_start_addr + host_ratio; page++) {
1704 /* All pages in this host page are now not sent */
1705 set_bit(page, unsentmap);
1706
1707 /*
1708 * Remark them as dirty, updating the count for any pages
1709 * that weren't previously dirty.
1710 */
1711 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1712 }
1713 }
1714
1715 if (unsent_pass) {
1716 /* Find the next sent page for the next iteration */
1717 run_start = find_next_zero_bit(unsentmap, last + 1,
1718 run_start);
1719 } else {
1720 /* Find the next dirty page for the next iteration */
1721 run_start = find_next_bit(bitmap, last + 1, run_start);
1722 }
1723 }
1724}
1725
1726/*
1727 * Utility for the outgoing postcopy code.
1728 *
1729 * Discard any partially sent host-page size chunks, mark any partially
1730 * dirty host-page size chunks as all dirty.
1731 *
1732 * Returns: 0 on success
1733 */
1734static int postcopy_chunk_hostpages(MigrationState *ms)
1735{
1736 struct RAMBlock *block;
1737
1738 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1739 /* Easy case - TPS==HPS - nothing to be done */
1740 return 0;
1741 }
1742
1743 /* Easiest way to make sure we don't resume in the middle of a host-page */
1744 last_seen_block = NULL;
1745 last_sent_block = NULL;
1746 last_offset = 0;
1747
1748 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1749 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1750
1751 PostcopyDiscardState *pds =
1752 postcopy_discard_send_init(ms, first, block->idstr);
1753
1754 /* First pass: Discard all partially sent host pages */
1755 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1756 /*
1757 * Second pass: Ensure that all partially dirty host pages are made
1758 * fully dirty.
1759 */
1760 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1761
1762 postcopy_discard_send_finish(ms, pds);
1763 } /* ram_list loop */
1764
1765 return 0;
1766}
1767
e0b266f0
DDAG
1768/*
1769 * Transmit the set of pages to be discarded after precopy to the target
1770 * these are pages that:
1771 * a) Have been previously transmitted but are now dirty again
1772 * b) Pages that have never been transmitted, this ensures that
1773 * any pages on the destination that have been mapped by background
1774 * tasks get discarded (transparent huge pages is the specific concern)
1775 * Hopefully this is pretty sparse
1776 */
1777int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1778{
1779 int ret;
1780 unsigned long *bitmap, *unsentmap;
1781
1782 rcu_read_lock();
1783
1784 /* This should be our last sync, the src is now paused */
1785 migration_bitmap_sync();
1786
1787 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1788 if (!unsentmap) {
1789 /* We don't have a safe way to resize the sentmap, so
1790 * if the bitmap was resized it will be NULL at this
1791 * point.
1792 */
1793 error_report("migration ram resized during precopy phase");
1794 rcu_read_unlock();
1795 return -EINVAL;
1796 }
1797
99e314eb
DDAG
1798 /* Deal with TPS != HPS */
1799 ret = postcopy_chunk_hostpages(ms);
1800 if (ret) {
1801 rcu_read_unlock();
1802 return ret;
1803 }
1804
e0b266f0
DDAG
1805 /*
1806 * Update the unsentmap to be unsentmap = unsentmap | dirty
1807 */
1808 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1809 bitmap_or(unsentmap, unsentmap, bitmap,
1810 last_ram_offset() >> TARGET_PAGE_BITS);
1811
1812
1813 trace_ram_postcopy_send_discard_bitmap();
1814#ifdef DEBUG_POSTCOPY
1815 ram_debug_dump_bitmap(unsentmap, true);
1816#endif
1817
1818 ret = postcopy_each_ram_send_discard(ms);
1819 rcu_read_unlock();
1820
1821 return ret;
1822}
1823
1824/*
1825 * At the start of the postcopy phase of migration, any now-dirty
1826 * precopied pages are discarded.
1827 *
1828 * start, length describe a byte address range within the RAMBlock
1829 *
1830 * Returns 0 on success.
1831 */
1832int ram_discard_range(MigrationIncomingState *mis,
1833 const char *block_name,
1834 uint64_t start, size_t length)
1835{
1836 int ret = -1;
1837
1838 rcu_read_lock();
1839 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1840
1841 if (!rb) {
1842 error_report("ram_discard_range: Failed to find block '%s'",
1843 block_name);
1844 goto err;
1845 }
1846
1847 uint8_t *host_startaddr = rb->host + start;
1848
1849 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1850 error_report("ram_discard_range: Unaligned start address: %p",
1851 host_startaddr);
1852 goto err;
1853 }
1854
1855 if ((start + length) <= rb->used_length) {
1856 uint8_t *host_endaddr = host_startaddr + length;
1857 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1858 error_report("ram_discard_range: Unaligned end address: %p",
1859 host_endaddr);
1860 goto err;
1861 }
1862 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1863 } else {
1864 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
9458ad6b 1865 "/%zx/" RAM_ADDR_FMT")",
e0b266f0
DDAG
1866 block_name, start, length, rb->used_length);
1867 }
1868
1869err:
1870 rcu_read_unlock();
1871
1872 return ret;
1873}
1874
a91246c9 1875static int ram_save_init_globals(void)
56e93d26 1876{
56e93d26
JQ
1877 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1878
56e93d26
JQ
1879 dirty_rate_high_cnt = 0;
1880 bitmap_sync_count = 0;
1881 migration_bitmap_sync_init();
dd631697 1882 qemu_mutex_init(&migration_bitmap_mutex);
56e93d26
JQ
1883
1884 if (migrate_use_xbzrle()) {
1885 XBZRLE_cache_lock();
adb65dec 1886 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
56e93d26
JQ
1887 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1888 TARGET_PAGE_SIZE,
1889 TARGET_PAGE_SIZE);
1890 if (!XBZRLE.cache) {
1891 XBZRLE_cache_unlock();
1892 error_report("Error creating cache");
1893 return -1;
1894 }
1895 XBZRLE_cache_unlock();
1896
1897 /* We prefer not to abort if there is no memory */
1898 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1899 if (!XBZRLE.encoded_buf) {
1900 error_report("Error allocating encoded_buf");
1901 return -1;
1902 }
1903
1904 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1905 if (!XBZRLE.current_buf) {
1906 error_report("Error allocating current_buf");
1907 g_free(XBZRLE.encoded_buf);
1908 XBZRLE.encoded_buf = NULL;
1909 return -1;
1910 }
1911
1912 acct_clear();
1913 }
1914
49877834
PB
1915 /* For memory_global_dirty_log_start below. */
1916 qemu_mutex_lock_iothread();
1917
56e93d26
JQ
1918 qemu_mutex_lock_ramlist();
1919 rcu_read_lock();
1920 bytes_transferred = 0;
1921 reset_ram_globals();
1922
1923 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
f3f491fc 1924 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
60be6340
DL
1925 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1926 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
56e93d26 1927
f3f491fc
DDAG
1928 if (migrate_postcopy_ram()) {
1929 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1930 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1931 }
1932
56e93d26
JQ
1933 /*
1934 * Count the total number of pages used by ram blocks not including any
1935 * gaps due to alignment or unplugs.
1936 */
1937 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1938
1939 memory_global_dirty_log_start();
1940 migration_bitmap_sync();
1941 qemu_mutex_unlock_ramlist();
49877834 1942 qemu_mutex_unlock_iothread();
a91246c9
HZ
1943 rcu_read_unlock();
1944
1945 return 0;
1946}
1947
1948/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1949 * long-running RCU critical section. When rcu-reclaims in the code
1950 * start to become numerous it will be necessary to reduce the
1951 * granularity of these critical sections.
1952 */
1953
1954static int ram_save_setup(QEMUFile *f, void *opaque)
1955{
1956 RAMBlock *block;
1957
1958 /* migration has already setup the bitmap, reuse it. */
1959 if (!migration_in_colo_state()) {
1960 if (ram_save_init_globals() < 0) {
1961 return -1;
1962 }
1963 }
1964
1965 rcu_read_lock();
56e93d26
JQ
1966
1967 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1968
1969 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1970 qemu_put_byte(f, strlen(block->idstr));
1971 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1972 qemu_put_be64(f, block->used_length);
1973 }
1974
1975 rcu_read_unlock();
1976
1977 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1978 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1979
1980 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1981
1982 return 0;
1983}
1984
1985static int ram_save_iterate(QEMUFile *f, void *opaque)
1986{
1987 int ret;
1988 int i;
1989 int64_t t0;
5c90308f 1990 int done = 0;
56e93d26
JQ
1991
1992 rcu_read_lock();
1993 if (ram_list.version != last_version) {
1994 reset_ram_globals();
1995 }
1996
1997 /* Read version before ram_list.blocks */
1998 smp_rmb();
1999
2000 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2001
2002 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2003 i = 0;
2004 while ((ret = qemu_file_rate_limit(f)) == 0) {
2005 int pages;
2006
2007 pages = ram_find_and_save_block(f, false, &bytes_transferred);
2008 /* no more pages to sent */
2009 if (pages == 0) {
5c90308f 2010 done = 1;
56e93d26
JQ
2011 break;
2012 }
56e93d26 2013 acct_info.iterations++;
070afca2 2014
56e93d26
JQ
2015 /* we want to check in the 1st loop, just in case it was the 1st time
2016 and we had to sync the dirty bitmap.
2017 qemu_get_clock_ns() is a bit expensive, so we only check each some
2018 iterations
2019 */
2020 if ((i & 63) == 0) {
2021 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2022 if (t1 > MAX_WAIT) {
2023 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
2024 t1, i);
2025 break;
2026 }
2027 }
2028 i++;
2029 }
2030 flush_compressed_data(f);
2031 rcu_read_unlock();
2032
2033 /*
2034 * Must occur before EOS (or any QEMUFile operation)
2035 * because of RDMA protocol.
2036 */
2037 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2038
2039 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2040 bytes_transferred += 8;
2041
2042 ret = qemu_file_get_error(f);
2043 if (ret < 0) {
2044 return ret;
2045 }
2046
5c90308f 2047 return done;
56e93d26
JQ
2048}
2049
2050/* Called with iothread lock */
2051static int ram_save_complete(QEMUFile *f, void *opaque)
2052{
2053 rcu_read_lock();
2054
663e6c1d
DDAG
2055 if (!migration_in_postcopy(migrate_get_current())) {
2056 migration_bitmap_sync();
2057 }
56e93d26
JQ
2058
2059 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2060
2061 /* try transferring iterative blocks of memory */
2062
2063 /* flush all remaining blocks regardless of rate limiting */
2064 while (true) {
2065 int pages;
2066
a91246c9
HZ
2067 pages = ram_find_and_save_block(f, !migration_in_colo_state(),
2068 &bytes_transferred);
56e93d26
JQ
2069 /* no more blocks to sent */
2070 if (pages == 0) {
2071 break;
2072 }
2073 }
2074
2075 flush_compressed_data(f);
2076 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2077
2078 rcu_read_unlock();
d09a6fde 2079
56e93d26
JQ
2080 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2081
2082 return 0;
2083}
2084
c31b098f
DDAG
2085static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2086 uint64_t *non_postcopiable_pending,
2087 uint64_t *postcopiable_pending)
56e93d26
JQ
2088{
2089 uint64_t remaining_size;
2090
2091 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2092
663e6c1d
DDAG
2093 if (!migration_in_postcopy(migrate_get_current()) &&
2094 remaining_size < max_size) {
56e93d26
JQ
2095 qemu_mutex_lock_iothread();
2096 rcu_read_lock();
2097 migration_bitmap_sync();
2098 rcu_read_unlock();
2099 qemu_mutex_unlock_iothread();
2100 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2101 }
c31b098f
DDAG
2102
2103 /* We can do postcopy, and all the data is postcopiable */
2104 *postcopiable_pending += remaining_size;
56e93d26
JQ
2105}
2106
2107static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2108{
2109 unsigned int xh_len;
2110 int xh_flags;
063e760a 2111 uint8_t *loaded_data;
56e93d26
JQ
2112
2113 if (!xbzrle_decoded_buf) {
2114 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2115 }
063e760a 2116 loaded_data = xbzrle_decoded_buf;
56e93d26
JQ
2117
2118 /* extract RLE header */
2119 xh_flags = qemu_get_byte(f);
2120 xh_len = qemu_get_be16(f);
2121
2122 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2123 error_report("Failed to load XBZRLE page - wrong compression!");
2124 return -1;
2125 }
2126
2127 if (xh_len > TARGET_PAGE_SIZE) {
2128 error_report("Failed to load XBZRLE page - len overflow!");
2129 return -1;
2130 }
2131 /* load data and decode */
063e760a 2132 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2133
2134 /* decode RLE */
063e760a 2135 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2136 TARGET_PAGE_SIZE) == -1) {
2137 error_report("Failed to load XBZRLE page - decode error!");
2138 return -1;
2139 }
2140
2141 return 0;
2142}
2143
2144/* Must be called from within a rcu critical section.
2145 * Returns a pointer from within the RCU-protected ram_list.
2146 */
a7180877 2147/*
4c4bad48 2148 * Read a RAMBlock ID from the stream f.
a7180877
DDAG
2149 *
2150 * f: Stream to read from
a7180877
DDAG
2151 * flags: Page flags (mostly to see if it's a continuation of previous block)
2152 */
4c4bad48
HZ
2153static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2154 int flags)
56e93d26
JQ
2155{
2156 static RAMBlock *block = NULL;
2157 char id[256];
2158 uint8_t len;
2159
2160 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2161 if (!block) {
56e93d26
JQ
2162 error_report("Ack, bad migration stream!");
2163 return NULL;
2164 }
4c4bad48 2165 return block;
56e93d26
JQ
2166 }
2167
2168 len = qemu_get_byte(f);
2169 qemu_get_buffer(f, (uint8_t *)id, len);
2170 id[len] = 0;
2171
e3dd7493 2172 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2173 if (!block) {
2174 error_report("Can't find block %s", id);
2175 return NULL;
56e93d26
JQ
2176 }
2177
4c4bad48
HZ
2178 return block;
2179}
2180
2181static inline void *host_from_ram_block_offset(RAMBlock *block,
2182 ram_addr_t offset)
2183{
2184 if (!offset_in_ramblock(block, offset)) {
2185 return NULL;
2186 }
2187
2188 return block->host + offset;
56e93d26
JQ
2189}
2190
2191/*
2192 * If a page (or a whole RDMA chunk) has been
2193 * determined to be zero, then zap it.
2194 */
2195void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2196{
2197 if (ch != 0 || !is_zero_range(host, size)) {
2198 memset(host, ch, size);
2199 }
2200}
2201
2202static void *do_data_decompress(void *opaque)
2203{
2204 DecompressParam *param = opaque;
2205 unsigned long pagesize;
33d151f4
LL
2206 uint8_t *des;
2207 int len;
56e93d26 2208
33d151f4 2209 qemu_mutex_lock(&param->mutex);
90e56fb4 2210 while (!param->quit) {
33d151f4
LL
2211 if (param->des) {
2212 des = param->des;
2213 len = param->len;
2214 param->des = 0;
2215 qemu_mutex_unlock(&param->mutex);
2216
56e93d26 2217 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2218 /* uncompress() will return failed in some case, especially
2219 * when the page is dirted when doing the compression, it's
2220 * not a problem because the dirty page will be retransferred
2221 * and uncompress() won't break the data in other pages.
2222 */
33d151f4
LL
2223 uncompress((Bytef *)des, &pagesize,
2224 (const Bytef *)param->compbuf, len);
73a8912b 2225
33d151f4
LL
2226 qemu_mutex_lock(&decomp_done_lock);
2227 param->done = true;
2228 qemu_cond_signal(&decomp_done_cond);
2229 qemu_mutex_unlock(&decomp_done_lock);
2230
2231 qemu_mutex_lock(&param->mutex);
2232 } else {
2233 qemu_cond_wait(&param->cond, &param->mutex);
2234 }
56e93d26 2235 }
33d151f4 2236 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2237
2238 return NULL;
2239}
2240
5533b2e9
LL
2241static void wait_for_decompress_done(void)
2242{
2243 int idx, thread_count;
2244
2245 if (!migrate_use_compression()) {
2246 return;
2247 }
2248
2249 thread_count = migrate_decompress_threads();
2250 qemu_mutex_lock(&decomp_done_lock);
2251 for (idx = 0; idx < thread_count; idx++) {
2252 while (!decomp_param[idx].done) {
2253 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2254 }
2255 }
2256 qemu_mutex_unlock(&decomp_done_lock);
2257}
2258
56e93d26
JQ
2259void migrate_decompress_threads_create(void)
2260{
2261 int i, thread_count;
2262
2263 thread_count = migrate_decompress_threads();
2264 decompress_threads = g_new0(QemuThread, thread_count);
2265 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2266 qemu_mutex_init(&decomp_done_lock);
2267 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2268 for (i = 0; i < thread_count; i++) {
2269 qemu_mutex_init(&decomp_param[i].mutex);
2270 qemu_cond_init(&decomp_param[i].cond);
2271 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2272 decomp_param[i].done = true;
90e56fb4 2273 decomp_param[i].quit = false;
56e93d26
JQ
2274 qemu_thread_create(decompress_threads + i, "decompress",
2275 do_data_decompress, decomp_param + i,
2276 QEMU_THREAD_JOINABLE);
2277 }
2278}
2279
2280void migrate_decompress_threads_join(void)
2281{
2282 int i, thread_count;
2283
56e93d26
JQ
2284 thread_count = migrate_decompress_threads();
2285 for (i = 0; i < thread_count; i++) {
2286 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2287 decomp_param[i].quit = true;
56e93d26
JQ
2288 qemu_cond_signal(&decomp_param[i].cond);
2289 qemu_mutex_unlock(&decomp_param[i].mutex);
2290 }
2291 for (i = 0; i < thread_count; i++) {
2292 qemu_thread_join(decompress_threads + i);
2293 qemu_mutex_destroy(&decomp_param[i].mutex);
2294 qemu_cond_destroy(&decomp_param[i].cond);
2295 g_free(decomp_param[i].compbuf);
2296 }
2297 g_free(decompress_threads);
2298 g_free(decomp_param);
56e93d26
JQ
2299 decompress_threads = NULL;
2300 decomp_param = NULL;
56e93d26
JQ
2301}
2302
c1bc6626 2303static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2304 void *host, int len)
2305{
2306 int idx, thread_count;
2307
2308 thread_count = migrate_decompress_threads();
73a8912b 2309 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2310 while (true) {
2311 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2312 if (decomp_param[idx].done) {
33d151f4
LL
2313 decomp_param[idx].done = false;
2314 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2315 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2316 decomp_param[idx].des = host;
2317 decomp_param[idx].len = len;
33d151f4
LL
2318 qemu_cond_signal(&decomp_param[idx].cond);
2319 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2320 break;
2321 }
2322 }
2323 if (idx < thread_count) {
2324 break;
73a8912b
LL
2325 } else {
2326 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2327 }
2328 }
73a8912b 2329 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2330}
2331
1caddf8a
DDAG
2332/*
2333 * Allocate data structures etc needed by incoming migration with postcopy-ram
2334 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2335 */
2336int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2337{
2338 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2339
2340 return postcopy_ram_incoming_init(mis, ram_pages);
2341}
2342
a7180877
DDAG
2343/*
2344 * Called in postcopy mode by ram_load().
2345 * rcu_read_lock is taken prior to this being called.
2346 */
2347static int ram_load_postcopy(QEMUFile *f)
2348{
2349 int flags = 0, ret = 0;
2350 bool place_needed = false;
2351 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2352 MigrationIncomingState *mis = migration_incoming_get_current();
2353 /* Temporary page that is later 'placed' */
2354 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2355 void *last_host = NULL;
a3b6ff6d 2356 bool all_zero = false;
a7180877
DDAG
2357
2358 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2359 ram_addr_t addr;
2360 void *host = NULL;
2361 void *page_buffer = NULL;
2362 void *place_source = NULL;
2363 uint8_t ch;
a7180877
DDAG
2364
2365 addr = qemu_get_be64(f);
2366 flags = addr & ~TARGET_PAGE_MASK;
2367 addr &= TARGET_PAGE_MASK;
2368
2369 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2370 place_needed = false;
2371 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
4c4bad48
HZ
2372 RAMBlock *block = ram_block_from_stream(f, flags);
2373
2374 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2375 if (!host) {
2376 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2377 ret = -EINVAL;
2378 break;
2379 }
a7180877
DDAG
2380 /*
2381 * Postcopy requires that we place whole host pages atomically.
2382 * To make it atomic, the data is read into a temporary page
2383 * that's moved into place later.
2384 * The migration protocol uses, possibly smaller, target-pages
2385 * however the source ensures it always sends all the components
2386 * of a host page in order.
2387 */
2388 page_buffer = postcopy_host_page +
2389 ((uintptr_t)host & ~qemu_host_page_mask);
2390 /* If all TP are zero then we can optimise the place */
2391 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2392 all_zero = true;
c53b7ddc
DDAG
2393 } else {
2394 /* not the 1st TP within the HP */
2395 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2396 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2397 host, last_host);
2398 ret = -EINVAL;
2399 break;
2400 }
a7180877
DDAG
2401 }
2402
c53b7ddc 2403
a7180877
DDAG
2404 /*
2405 * If it's the last part of a host page then we place the host
2406 * page
2407 */
2408 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2409 ~qemu_host_page_mask) == 0;
2410 place_source = postcopy_host_page;
2411 }
c53b7ddc 2412 last_host = host;
a7180877
DDAG
2413
2414 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2415 case RAM_SAVE_FLAG_COMPRESS:
2416 ch = qemu_get_byte(f);
2417 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2418 if (ch) {
2419 all_zero = false;
2420 }
2421 break;
2422
2423 case RAM_SAVE_FLAG_PAGE:
2424 all_zero = false;
2425 if (!place_needed || !matching_page_sizes) {
2426 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2427 } else {
2428 /* Avoids the qemu_file copy during postcopy, which is
2429 * going to do a copy later; can only do it when we
2430 * do this read in one go (matching page sizes)
2431 */
2432 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2433 TARGET_PAGE_SIZE);
2434 }
2435 break;
2436 case RAM_SAVE_FLAG_EOS:
2437 /* normal exit */
2438 break;
2439 default:
2440 error_report("Unknown combination of migration flags: %#x"
2441 " (postcopy mode)", flags);
2442 ret = -EINVAL;
2443 }
2444
2445 if (place_needed) {
2446 /* This gets called at the last target page in the host page */
2447 if (all_zero) {
2448 ret = postcopy_place_page_zero(mis,
2449 host + TARGET_PAGE_SIZE -
2450 qemu_host_page_size);
2451 } else {
2452 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2453 qemu_host_page_size,
2454 place_source);
2455 }
2456 }
2457 if (!ret) {
2458 ret = qemu_file_get_error(f);
2459 }
2460 }
2461
2462 return ret;
2463}
2464
56e93d26
JQ
2465static int ram_load(QEMUFile *f, void *opaque, int version_id)
2466{
2467 int flags = 0, ret = 0;
2468 static uint64_t seq_iter;
2469 int len = 0;
a7180877
DDAG
2470 /*
2471 * If system is running in postcopy mode, page inserts to host memory must
2472 * be atomic
2473 */
2474 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
56e93d26
JQ
2475
2476 seq_iter++;
2477
2478 if (version_id != 4) {
2479 ret = -EINVAL;
2480 }
2481
2482 /* This RCU critical section can be very long running.
2483 * When RCU reclaims in the code start to become numerous,
2484 * it will be necessary to reduce the granularity of this
2485 * critical section.
2486 */
2487 rcu_read_lock();
a7180877
DDAG
2488
2489 if (postcopy_running) {
2490 ret = ram_load_postcopy(f);
2491 }
2492
2493 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2494 ram_addr_t addr, total_ram_bytes;
a776aa15 2495 void *host = NULL;
56e93d26
JQ
2496 uint8_t ch;
2497
2498 addr = qemu_get_be64(f);
2499 flags = addr & ~TARGET_PAGE_MASK;
2500 addr &= TARGET_PAGE_MASK;
2501
a776aa15
DDAG
2502 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2503 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2504 RAMBlock *block = ram_block_from_stream(f, flags);
2505
2506 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2507 if (!host) {
2508 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2509 ret = -EINVAL;
2510 break;
2511 }
2512 }
2513
56e93d26
JQ
2514 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2515 case RAM_SAVE_FLAG_MEM_SIZE:
2516 /* Synchronize RAM block list */
2517 total_ram_bytes = addr;
2518 while (!ret && total_ram_bytes) {
2519 RAMBlock *block;
56e93d26
JQ
2520 char id[256];
2521 ram_addr_t length;
2522
2523 len = qemu_get_byte(f);
2524 qemu_get_buffer(f, (uint8_t *)id, len);
2525 id[len] = 0;
2526 length = qemu_get_be64(f);
2527
e3dd7493
DDAG
2528 block = qemu_ram_block_by_name(id);
2529 if (block) {
2530 if (length != block->used_length) {
2531 Error *local_err = NULL;
56e93d26 2532
fa53a0e5 2533 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2534 &local_err);
2535 if (local_err) {
2536 error_report_err(local_err);
56e93d26 2537 }
56e93d26 2538 }
e3dd7493
DDAG
2539 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2540 block->idstr);
2541 } else {
56e93d26
JQ
2542 error_report("Unknown ramblock \"%s\", cannot "
2543 "accept migration", id);
2544 ret = -EINVAL;
2545 }
2546
2547 total_ram_bytes -= length;
2548 }
2549 break;
a776aa15 2550
56e93d26 2551 case RAM_SAVE_FLAG_COMPRESS:
56e93d26
JQ
2552 ch = qemu_get_byte(f);
2553 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2554 break;
a776aa15 2555
56e93d26 2556 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2557 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2558 break;
56e93d26 2559
a776aa15 2560 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2561 len = qemu_get_be32(f);
2562 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2563 error_report("Invalid compressed data length: %d", len);
2564 ret = -EINVAL;
2565 break;
2566 }
c1bc6626 2567 decompress_data_with_multi_threads(f, host, len);
56e93d26 2568 break;
a776aa15 2569
56e93d26 2570 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2571 if (load_xbzrle(f, addr, host) < 0) {
2572 error_report("Failed to decompress XBZRLE page at "
2573 RAM_ADDR_FMT, addr);
2574 ret = -EINVAL;
2575 break;
2576 }
2577 break;
2578 case RAM_SAVE_FLAG_EOS:
2579 /* normal exit */
2580 break;
2581 default:
2582 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2583 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2584 } else {
2585 error_report("Unknown combination of migration flags: %#x",
2586 flags);
2587 ret = -EINVAL;
2588 }
2589 }
2590 if (!ret) {
2591 ret = qemu_file_get_error(f);
2592 }
2593 }
2594
5533b2e9 2595 wait_for_decompress_done();
56e93d26
JQ
2596 rcu_read_unlock();
2597 DPRINTF("Completed load of VM with exit code %d seq iteration "
2598 "%" PRIu64 "\n", ret, seq_iter);
2599 return ret;
2600}
2601
2602static SaveVMHandlers savevm_ram_handlers = {
2603 .save_live_setup = ram_save_setup,
2604 .save_live_iterate = ram_save_iterate,
763c906b 2605 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 2606 .save_live_complete_precopy = ram_save_complete,
56e93d26
JQ
2607 .save_live_pending = ram_save_pending,
2608 .load_state = ram_load,
6ad2a215 2609 .cleanup = ram_migration_cleanup,
56e93d26
JQ
2610};
2611
2612void ram_mig_init(void)
2613{
2614 qemu_mutex_init(&XBZRLE.lock);
2615 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2616}