]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
tests/migration: Add source to PC boot block
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
e688df6b 28
1393a485 29#include "qemu/osdep.h"
33c11879 30#include "cpu.h"
56e93d26 31#include <zlib.h>
4addcd4f 32#include "qapi-event.h"
f348b6d1 33#include "qemu/cutils.h"
56e93d26
JQ
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
7205c9ec 36#include "qemu/main-loop.h"
709e3fe8 37#include "xbzrle.h"
7b1e1a22 38#include "ram.h"
6666c96a 39#include "migration.h"
f2a8f0a6 40#include "migration/register.h"
7b1e1a22 41#include "migration/misc.h"
08a0aee1 42#include "qemu-file.h"
be07b0ac 43#include "postcopy-ram.h"
56e93d26 44#include "migration/page_cache.h"
56e93d26 45#include "qemu/error-report.h"
e688df6b 46#include "qapi/error.h"
8acabf69 47#include "qapi/qmp/qerror.h"
56e93d26 48#include "trace.h"
56e93d26 49#include "exec/ram_addr.h"
f9494614 50#include "exec/target_page.h"
56e93d26 51#include "qemu/rcu_queue.h"
a91246c9 52#include "migration/colo.h"
9ac78b61 53#include "migration/block.h"
56e93d26 54
56e93d26
JQ
55/***********************************************************/
56/* ram save/restore */
57
bb890ed5
JQ
58/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
59 * worked for pages that where filled with the same char. We switched
60 * it to only search for the zero value. And to avoid confusion with
61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
62 */
63
56e93d26 64#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
bb890ed5 65#define RAM_SAVE_FLAG_ZERO 0x02
56e93d26
JQ
66#define RAM_SAVE_FLAG_MEM_SIZE 0x04
67#define RAM_SAVE_FLAG_PAGE 0x08
68#define RAM_SAVE_FLAG_EOS 0x10
69#define RAM_SAVE_FLAG_CONTINUE 0x20
70#define RAM_SAVE_FLAG_XBZRLE 0x40
71/* 0x80 is reserved in migration.h start with 0x100 next */
72#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
73
56e93d26
JQ
74static inline bool is_zero_range(uint8_t *p, uint64_t size)
75{
a1febc49 76 return buffer_is_zero(p, size);
56e93d26
JQ
77}
78
9360447d
JQ
79XBZRLECacheStats xbzrle_counters;
80
56e93d26
JQ
81/* struct contains XBZRLE cache and a static page
82 used by the compression */
83static struct {
84 /* buffer used for XBZRLE encoding */
85 uint8_t *encoded_buf;
86 /* buffer for storing page content */
87 uint8_t *current_buf;
88 /* Cache for XBZRLE, Protected by lock. */
89 PageCache *cache;
90 QemuMutex lock;
c00e0928
JQ
91 /* it will store a page full of zeros */
92 uint8_t *zero_target_page;
f265e0e4
JQ
93 /* buffer used for XBZRLE decoding */
94 uint8_t *decoded_buf;
56e93d26
JQ
95} XBZRLE;
96
56e93d26
JQ
97static void XBZRLE_cache_lock(void)
98{
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE.lock);
101}
102
103static void XBZRLE_cache_unlock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE.lock);
107}
108
3d0684b2
JQ
109/**
110 * xbzrle_cache_resize: resize the xbzrle cache
111 *
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
116 *
c9dede2d 117 * Returns 0 for success or -1 for error
3d0684b2
JQ
118 *
119 * @new_size: new cache size
8acabf69 120 * @errp: set *errp if the check failed, with reason
56e93d26 121 */
c9dede2d 122int xbzrle_cache_resize(int64_t new_size, Error **errp)
56e93d26
JQ
123{
124 PageCache *new_cache;
c9dede2d 125 int64_t ret = 0;
56e93d26 126
8acabf69
JQ
127 /* Check for truncation */
128 if (new_size != (size_t)new_size) {
129 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
130 "exceeding address space");
131 return -1;
132 }
133
2a313e5c
JQ
134 if (new_size == migrate_xbzrle_cache_size()) {
135 /* nothing to do */
c9dede2d 136 return 0;
2a313e5c
JQ
137 }
138
56e93d26
JQ
139 XBZRLE_cache_lock();
140
141 if (XBZRLE.cache != NULL) {
80f8dfde 142 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
56e93d26 143 if (!new_cache) {
56e93d26
JQ
144 ret = -1;
145 goto out;
146 }
147
148 cache_fini(XBZRLE.cache);
149 XBZRLE.cache = new_cache;
150 }
56e93d26
JQ
151out:
152 XBZRLE_cache_unlock();
153 return ret;
154}
155
f9494614
AP
156static void ramblock_recv_map_init(void)
157{
158 RAMBlock *rb;
159
160 RAMBLOCK_FOREACH(rb) {
161 assert(!rb->receivedmap);
162 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
163 }
164}
165
166int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
167{
168 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
169 rb->receivedmap);
170}
171
172void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
173{
174 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
175}
176
177void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
178 size_t nr)
179{
180 bitmap_set_atomic(rb->receivedmap,
181 ramblock_recv_bitmap_offset(host_addr, rb),
182 nr);
183}
184
ec481c6c
JQ
185/*
186 * An outstanding page request, on the source, having been received
187 * and queued
188 */
189struct RAMSrcPageRequest {
190 RAMBlock *rb;
191 hwaddr offset;
192 hwaddr len;
193
194 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
195};
196
6f37bb8b
JQ
197/* State of RAM for migration */
198struct RAMState {
204b88b8
JQ
199 /* QEMUFile used for this migration */
200 QEMUFile *f;
6f37bb8b
JQ
201 /* Last block that we have visited searching for dirty pages */
202 RAMBlock *last_seen_block;
203 /* Last block from where we have sent data */
204 RAMBlock *last_sent_block;
269ace29
JQ
205 /* Last dirty target page we have sent */
206 ram_addr_t last_page;
6f37bb8b
JQ
207 /* last ram version we have seen */
208 uint32_t last_version;
209 /* We are in the first round */
210 bool ram_bulk_stage;
8d820d6f
JQ
211 /* How many times we have dirty too many pages */
212 int dirty_rate_high_cnt;
f664da80
JQ
213 /* these variables are used for bitmap sync */
214 /* last time we did a full bitmap_sync */
215 int64_t time_last_bitmap_sync;
eac74159 216 /* bytes transferred at start_time */
c4bdf0cf 217 uint64_t bytes_xfer_prev;
a66cd90c 218 /* number of dirty pages since start_time */
68908ed6 219 uint64_t num_dirty_pages_period;
b5833fde
JQ
220 /* xbzrle misses since the beginning of the period */
221 uint64_t xbzrle_cache_miss_prev;
36040d9c
JQ
222 /* number of iterations at the beginning of period */
223 uint64_t iterations_prev;
23b28c3c
JQ
224 /* Iterations since start */
225 uint64_t iterations;
9360447d 226 /* number of dirty bits in the bitmap */
2dfaf12e
PX
227 uint64_t migration_dirty_pages;
228 /* protects modification of the bitmap */
108cfae0 229 QemuMutex bitmap_mutex;
68a098f3
JQ
230 /* The RAMBlock used in the last src_page_requests */
231 RAMBlock *last_req_rb;
ec481c6c
JQ
232 /* Queue of outstanding page requests from the destination */
233 QemuMutex src_page_req_mutex;
234 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
6f37bb8b
JQ
235};
236typedef struct RAMState RAMState;
237
53518d94 238static RAMState *ram_state;
6f37bb8b 239
9edabd4d 240uint64_t ram_bytes_remaining(void)
2f4fde93 241{
bae416e5
DDAG
242 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
243 0;
2f4fde93
JQ
244}
245
9360447d 246MigrationStats ram_counters;
96506894 247
b8fb8cb7
DDAG
248/* used by the search for pages to send */
249struct PageSearchStatus {
250 /* Current block being searched */
251 RAMBlock *block;
a935e30f
JQ
252 /* Current page to search from */
253 unsigned long page;
b8fb8cb7
DDAG
254 /* Set once we wrap around */
255 bool complete_round;
256};
257typedef struct PageSearchStatus PageSearchStatus;
258
56e93d26 259struct CompressParam {
56e93d26 260 bool done;
90e56fb4 261 bool quit;
56e93d26
JQ
262 QEMUFile *file;
263 QemuMutex mutex;
264 QemuCond cond;
265 RAMBlock *block;
266 ram_addr_t offset;
267};
268typedef struct CompressParam CompressParam;
269
270struct DecompressParam {
73a8912b 271 bool done;
90e56fb4 272 bool quit;
56e93d26
JQ
273 QemuMutex mutex;
274 QemuCond cond;
275 void *des;
d341d9f3 276 uint8_t *compbuf;
56e93d26
JQ
277 int len;
278};
279typedef struct DecompressParam DecompressParam;
280
281static CompressParam *comp_param;
282static QemuThread *compress_threads;
283/* comp_done_cond is used to wake up the migration thread when
284 * one of the compression threads has finished the compression.
285 * comp_done_lock is used to co-work with comp_done_cond.
286 */
0d9f9a5c
LL
287static QemuMutex comp_done_lock;
288static QemuCond comp_done_cond;
56e93d26
JQ
289/* The empty QEMUFileOps will be used by file in CompressParam */
290static const QEMUFileOps empty_ops = { };
291
56e93d26
JQ
292static DecompressParam *decomp_param;
293static QemuThread *decompress_threads;
73a8912b
LL
294static QemuMutex decomp_done_lock;
295static QemuCond decomp_done_cond;
56e93d26 296
a7a9a88f
LL
297static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
298 ram_addr_t offset);
56e93d26
JQ
299
300static void *do_data_compress(void *opaque)
301{
302 CompressParam *param = opaque;
a7a9a88f
LL
303 RAMBlock *block;
304 ram_addr_t offset;
56e93d26 305
a7a9a88f 306 qemu_mutex_lock(&param->mutex);
90e56fb4 307 while (!param->quit) {
a7a9a88f
LL
308 if (param->block) {
309 block = param->block;
310 offset = param->offset;
311 param->block = NULL;
312 qemu_mutex_unlock(&param->mutex);
313
314 do_compress_ram_page(param->file, block, offset);
315
0d9f9a5c 316 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 317 param->done = true;
0d9f9a5c
LL
318 qemu_cond_signal(&comp_done_cond);
319 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
320
321 qemu_mutex_lock(&param->mutex);
322 } else {
56e93d26
JQ
323 qemu_cond_wait(&param->cond, &param->mutex);
324 }
56e93d26 325 }
a7a9a88f 326 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
327
328 return NULL;
329}
330
331static inline void terminate_compression_threads(void)
332{
333 int idx, thread_count;
334
335 thread_count = migrate_compress_threads();
3d0684b2 336
56e93d26
JQ
337 for (idx = 0; idx < thread_count; idx++) {
338 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 339 comp_param[idx].quit = true;
56e93d26
JQ
340 qemu_cond_signal(&comp_param[idx].cond);
341 qemu_mutex_unlock(&comp_param[idx].mutex);
342 }
343}
344
f0afa331 345static void compress_threads_save_cleanup(void)
56e93d26
JQ
346{
347 int i, thread_count;
348
349 if (!migrate_use_compression()) {
350 return;
351 }
352 terminate_compression_threads();
353 thread_count = migrate_compress_threads();
354 for (i = 0; i < thread_count; i++) {
355 qemu_thread_join(compress_threads + i);
356 qemu_fclose(comp_param[i].file);
357 qemu_mutex_destroy(&comp_param[i].mutex);
358 qemu_cond_destroy(&comp_param[i].cond);
359 }
0d9f9a5c
LL
360 qemu_mutex_destroy(&comp_done_lock);
361 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
362 g_free(compress_threads);
363 g_free(comp_param);
56e93d26
JQ
364 compress_threads = NULL;
365 comp_param = NULL;
56e93d26
JQ
366}
367
f0afa331 368static void compress_threads_save_setup(void)
56e93d26
JQ
369{
370 int i, thread_count;
371
372 if (!migrate_use_compression()) {
373 return;
374 }
56e93d26
JQ
375 thread_count = migrate_compress_threads();
376 compress_threads = g_new0(QemuThread, thread_count);
377 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
378 qemu_cond_init(&comp_done_cond);
379 qemu_mutex_init(&comp_done_lock);
56e93d26 380 for (i = 0; i < thread_count; i++) {
e110aa91
C
381 /* comp_param[i].file is just used as a dummy buffer to save data,
382 * set its ops to empty.
56e93d26
JQ
383 */
384 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
385 comp_param[i].done = true;
90e56fb4 386 comp_param[i].quit = false;
56e93d26
JQ
387 qemu_mutex_init(&comp_param[i].mutex);
388 qemu_cond_init(&comp_param[i].cond);
389 qemu_thread_create(compress_threads + i, "compress",
390 do_data_compress, comp_param + i,
391 QEMU_THREAD_JOINABLE);
392 }
393}
394
f986c3d2
JQ
395/* Multiple fd's */
396
397struct MultiFDSendParams {
398 uint8_t id;
399 char *name;
400 QemuThread thread;
401 QemuSemaphore sem;
402 QemuMutex mutex;
403 bool quit;
404};
405typedef struct MultiFDSendParams MultiFDSendParams;
406
407struct {
408 MultiFDSendParams *params;
409 /* number of created threads */
410 int count;
411} *multifd_send_state;
412
413static void terminate_multifd_send_threads(Error *errp)
414{
415 int i;
416
417 for (i = 0; i < multifd_send_state->count; i++) {
418 MultiFDSendParams *p = &multifd_send_state->params[i];
419
420 qemu_mutex_lock(&p->mutex);
421 p->quit = true;
422 qemu_sem_post(&p->sem);
423 qemu_mutex_unlock(&p->mutex);
424 }
425}
426
427int multifd_save_cleanup(Error **errp)
428{
429 int i;
430 int ret = 0;
431
432 if (!migrate_use_multifd()) {
433 return 0;
434 }
435 terminate_multifd_send_threads(NULL);
436 for (i = 0; i < multifd_send_state->count; i++) {
437 MultiFDSendParams *p = &multifd_send_state->params[i];
438
439 qemu_thread_join(&p->thread);
440 qemu_mutex_destroy(&p->mutex);
441 qemu_sem_destroy(&p->sem);
442 g_free(p->name);
443 p->name = NULL;
444 }
445 g_free(multifd_send_state->params);
446 multifd_send_state->params = NULL;
447 g_free(multifd_send_state);
448 multifd_send_state = NULL;
449 return ret;
450}
451
452static void *multifd_send_thread(void *opaque)
453{
454 MultiFDSendParams *p = opaque;
455
456 while (true) {
457 qemu_mutex_lock(&p->mutex);
458 if (p->quit) {
459 qemu_mutex_unlock(&p->mutex);
460 break;
461 }
462 qemu_mutex_unlock(&p->mutex);
463 qemu_sem_wait(&p->sem);
464 }
465
466 return NULL;
467}
468
469int multifd_save_setup(void)
470{
471 int thread_count;
472 uint8_t i;
473
474 if (!migrate_use_multifd()) {
475 return 0;
476 }
477 thread_count = migrate_multifd_channels();
478 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
479 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
480 multifd_send_state->count = 0;
481 for (i = 0; i < thread_count; i++) {
482 MultiFDSendParams *p = &multifd_send_state->params[i];
483
484 qemu_mutex_init(&p->mutex);
485 qemu_sem_init(&p->sem, 0);
486 p->quit = false;
487 p->id = i;
488 p->name = g_strdup_printf("multifdsend_%d", i);
489 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
490 QEMU_THREAD_JOINABLE);
491
492 multifd_send_state->count++;
493 }
494 return 0;
495}
496
497struct MultiFDRecvParams {
498 uint8_t id;
499 char *name;
500 QemuThread thread;
501 QemuSemaphore sem;
502 QemuMutex mutex;
503 bool quit;
504};
505typedef struct MultiFDRecvParams MultiFDRecvParams;
506
507struct {
508 MultiFDRecvParams *params;
509 /* number of created threads */
510 int count;
511} *multifd_recv_state;
512
513static void terminate_multifd_recv_threads(Error *errp)
514{
515 int i;
516
517 for (i = 0; i < multifd_recv_state->count; i++) {
518 MultiFDRecvParams *p = &multifd_recv_state->params[i];
519
520 qemu_mutex_lock(&p->mutex);
521 p->quit = true;
522 qemu_sem_post(&p->sem);
523 qemu_mutex_unlock(&p->mutex);
524 }
525}
526
527int multifd_load_cleanup(Error **errp)
528{
529 int i;
530 int ret = 0;
531
532 if (!migrate_use_multifd()) {
533 return 0;
534 }
535 terminate_multifd_recv_threads(NULL);
536 for (i = 0; i < multifd_recv_state->count; i++) {
537 MultiFDRecvParams *p = &multifd_recv_state->params[i];
538
539 qemu_thread_join(&p->thread);
540 qemu_mutex_destroy(&p->mutex);
541 qemu_sem_destroy(&p->sem);
542 g_free(p->name);
543 p->name = NULL;
544 }
545 g_free(multifd_recv_state->params);
546 multifd_recv_state->params = NULL;
547 g_free(multifd_recv_state);
548 multifd_recv_state = NULL;
549
550 return ret;
551}
552
553static void *multifd_recv_thread(void *opaque)
554{
555 MultiFDRecvParams *p = opaque;
556
557 while (true) {
558 qemu_mutex_lock(&p->mutex);
559 if (p->quit) {
560 qemu_mutex_unlock(&p->mutex);
561 break;
562 }
563 qemu_mutex_unlock(&p->mutex);
564 qemu_sem_wait(&p->sem);
565 }
566
567 return NULL;
568}
569
570int multifd_load_setup(void)
571{
572 int thread_count;
573 uint8_t i;
574
575 if (!migrate_use_multifd()) {
576 return 0;
577 }
578 thread_count = migrate_multifd_channels();
579 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
580 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
581 multifd_recv_state->count = 0;
582 for (i = 0; i < thread_count; i++) {
583 MultiFDRecvParams *p = &multifd_recv_state->params[i];
584
585 qemu_mutex_init(&p->mutex);
586 qemu_sem_init(&p->sem, 0);
587 p->quit = false;
588 p->id = i;
589 p->name = g_strdup_printf("multifdrecv_%d", i);
590 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
591 QEMU_THREAD_JOINABLE);
592 multifd_recv_state->count++;
593 }
594 return 0;
595}
596
56e93d26 597/**
3d0684b2 598 * save_page_header: write page header to wire
56e93d26
JQ
599 *
600 * If this is the 1st block, it also writes the block identification
601 *
3d0684b2 602 * Returns the number of bytes written
56e93d26
JQ
603 *
604 * @f: QEMUFile where to send the data
605 * @block: block that contains the page we want to send
606 * @offset: offset inside the block for the page
607 * in the lower bits, it contains flags
608 */
2bf3aa85
JQ
609static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
610 ram_addr_t offset)
56e93d26 611{
9f5f380b 612 size_t size, len;
56e93d26 613
24795694
JQ
614 if (block == rs->last_sent_block) {
615 offset |= RAM_SAVE_FLAG_CONTINUE;
616 }
2bf3aa85 617 qemu_put_be64(f, offset);
56e93d26
JQ
618 size = 8;
619
620 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b 621 len = strlen(block->idstr);
2bf3aa85
JQ
622 qemu_put_byte(f, len);
623 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
9f5f380b 624 size += 1 + len;
24795694 625 rs->last_sent_block = block;
56e93d26
JQ
626 }
627 return size;
628}
629
3d0684b2
JQ
630/**
631 * mig_throttle_guest_down: throotle down the guest
632 *
633 * Reduce amount of guest cpu execution to hopefully slow down memory
634 * writes. If guest dirty memory rate is reduced below the rate at
635 * which we can transfer pages to the destination then we should be
636 * able to complete migration. Some workloads dirty memory way too
637 * fast and will not effectively converge, even with auto-converge.
070afca2
JH
638 */
639static void mig_throttle_guest_down(void)
640{
641 MigrationState *s = migrate_get_current();
2594f56d
DB
642 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
643 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
644
645 /* We have not started throttling yet. Let's start it. */
646 if (!cpu_throttle_active()) {
647 cpu_throttle_set(pct_initial);
648 } else {
649 /* Throttling already on, just increase the rate */
650 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
651 }
652}
653
3d0684b2
JQ
654/**
655 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
656 *
6f37bb8b 657 * @rs: current RAM state
3d0684b2
JQ
658 * @current_addr: address for the zero page
659 *
660 * Update the xbzrle cache to reflect a page that's been sent as all 0.
56e93d26
JQ
661 * The important thing is that a stale (not-yet-0'd) page be replaced
662 * by the new data.
663 * As a bonus, if the page wasn't in the cache it gets added so that
3d0684b2 664 * when a small write is made into the 0'd page it gets XBZRLE sent.
56e93d26 665 */
6f37bb8b 666static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
56e93d26 667{
6f37bb8b 668 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
56e93d26
JQ
669 return;
670 }
671
672 /* We don't care if this fails to allocate a new cache page
673 * as long as it updated an old one */
c00e0928 674 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
9360447d 675 ram_counters.dirty_sync_count);
56e93d26
JQ
676}
677
678#define ENCODING_FLAG_XBZRLE 0x1
679
680/**
681 * save_xbzrle_page: compress and send current page
682 *
683 * Returns: 1 means that we wrote the page
684 * 0 means that page is identical to the one already sent
685 * -1 means that xbzrle would be longer than normal
686 *
5a987738 687 * @rs: current RAM state
3d0684b2
JQ
688 * @current_data: pointer to the address of the page contents
689 * @current_addr: addr of the page
56e93d26
JQ
690 * @block: block that contains the page we want to send
691 * @offset: offset inside the block for the page
692 * @last_stage: if we are at the completion stage
56e93d26 693 */
204b88b8 694static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
56e93d26 695 ram_addr_t current_addr, RAMBlock *block,
072c2511 696 ram_addr_t offset, bool last_stage)
56e93d26
JQ
697{
698 int encoded_len = 0, bytes_xbzrle;
699 uint8_t *prev_cached_page;
700
9360447d
JQ
701 if (!cache_is_cached(XBZRLE.cache, current_addr,
702 ram_counters.dirty_sync_count)) {
703 xbzrle_counters.cache_miss++;
56e93d26
JQ
704 if (!last_stage) {
705 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
9360447d 706 ram_counters.dirty_sync_count) == -1) {
56e93d26
JQ
707 return -1;
708 } else {
709 /* update *current_data when the page has been
710 inserted into cache */
711 *current_data = get_cached_data(XBZRLE.cache, current_addr);
712 }
713 }
714 return -1;
715 }
716
717 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
718
719 /* save current buffer into memory */
720 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
721
722 /* XBZRLE encoding (if there is no overflow) */
723 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
724 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
725 TARGET_PAGE_SIZE);
726 if (encoded_len == 0) {
55c4446b 727 trace_save_xbzrle_page_skipping();
56e93d26
JQ
728 return 0;
729 } else if (encoded_len == -1) {
55c4446b 730 trace_save_xbzrle_page_overflow();
9360447d 731 xbzrle_counters.overflow++;
56e93d26
JQ
732 /* update data in the cache */
733 if (!last_stage) {
734 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
735 *current_data = prev_cached_page;
736 }
737 return -1;
738 }
739
740 /* we need to update the data in the cache, in order to get the same data */
741 if (!last_stage) {
742 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
743 }
744
745 /* Send XBZRLE based compressed page */
2bf3aa85 746 bytes_xbzrle = save_page_header(rs, rs->f, block,
204b88b8
JQ
747 offset | RAM_SAVE_FLAG_XBZRLE);
748 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
749 qemu_put_be16(rs->f, encoded_len);
750 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
56e93d26 751 bytes_xbzrle += encoded_len + 1 + 2;
9360447d
JQ
752 xbzrle_counters.pages++;
753 xbzrle_counters.bytes += bytes_xbzrle;
754 ram_counters.transferred += bytes_xbzrle;
56e93d26
JQ
755
756 return 1;
757}
758
3d0684b2
JQ
759/**
760 * migration_bitmap_find_dirty: find the next dirty page from start
f3f491fc 761 *
3d0684b2
JQ
762 * Called with rcu_read_lock() to protect migration_bitmap
763 *
764 * Returns the byte offset within memory region of the start of a dirty page
765 *
6f37bb8b 766 * @rs: current RAM state
3d0684b2 767 * @rb: RAMBlock where to search for dirty pages
a935e30f 768 * @start: page where we start the search
f3f491fc 769 */
56e93d26 770static inline
a935e30f 771unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
f20e2865 772 unsigned long start)
56e93d26 773{
6b6712ef
JQ
774 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
775 unsigned long *bitmap = rb->bmap;
56e93d26
JQ
776 unsigned long next;
777
6b6712ef
JQ
778 if (rs->ram_bulk_stage && start > 0) {
779 next = start + 1;
56e93d26 780 } else {
6b6712ef 781 next = find_next_bit(bitmap, size, start);
56e93d26
JQ
782 }
783
6b6712ef 784 return next;
56e93d26
JQ
785}
786
06b10688 787static inline bool migration_bitmap_clear_dirty(RAMState *rs,
f20e2865
JQ
788 RAMBlock *rb,
789 unsigned long page)
a82d593b
DDAG
790{
791 bool ret;
a82d593b 792
6b6712ef 793 ret = test_and_clear_bit(page, rb->bmap);
a82d593b
DDAG
794
795 if (ret) {
0d8ec885 796 rs->migration_dirty_pages--;
a82d593b
DDAG
797 }
798 return ret;
799}
800
15440dd5
JQ
801static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
802 ram_addr_t start, ram_addr_t length)
56e93d26 803{
0d8ec885 804 rs->migration_dirty_pages +=
6b6712ef 805 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
0d8ec885 806 &rs->num_dirty_pages_period);
56e93d26
JQ
807}
808
3d0684b2
JQ
809/**
810 * ram_pagesize_summary: calculate all the pagesizes of a VM
811 *
812 * Returns a summary bitmap of the page sizes of all RAMBlocks
813 *
814 * For VMs with just normal pages this is equivalent to the host page
815 * size. If it's got some huge pages then it's the OR of all the
816 * different page sizes.
e8ca1db2
DDAG
817 */
818uint64_t ram_pagesize_summary(void)
819{
820 RAMBlock *block;
821 uint64_t summary = 0;
822
99e15582 823 RAMBLOCK_FOREACH(block) {
e8ca1db2
DDAG
824 summary |= block->page_size;
825 }
826
827 return summary;
828}
829
8d820d6f 830static void migration_bitmap_sync(RAMState *rs)
56e93d26
JQ
831{
832 RAMBlock *block;
56e93d26 833 int64_t end_time;
c4bdf0cf 834 uint64_t bytes_xfer_now;
56e93d26 835
9360447d 836 ram_counters.dirty_sync_count++;
56e93d26 837
f664da80
JQ
838 if (!rs->time_last_bitmap_sync) {
839 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56e93d26
JQ
840 }
841
842 trace_migration_bitmap_sync_start();
9c1f8f44 843 memory_global_dirty_log_sync();
56e93d26 844
108cfae0 845 qemu_mutex_lock(&rs->bitmap_mutex);
56e93d26 846 rcu_read_lock();
99e15582 847 RAMBLOCK_FOREACH(block) {
15440dd5 848 migration_bitmap_sync_range(rs, block, 0, block->used_length);
56e93d26
JQ
849 }
850 rcu_read_unlock();
108cfae0 851 qemu_mutex_unlock(&rs->bitmap_mutex);
56e93d26 852
a66cd90c 853 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1ffb5dfd 854
56e93d26
JQ
855 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
856
857 /* more than 1 second = 1000 millisecons */
f664da80 858 if (end_time > rs->time_last_bitmap_sync + 1000) {
d693c6f1 859 /* calculate period counters */
9360447d 860 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
d693c6f1 861 / (end_time - rs->time_last_bitmap_sync);
9360447d 862 bytes_xfer_now = ram_counters.transferred;
d693c6f1 863
9ac78b61
PL
864 /* During block migration the auto-converge logic incorrectly detects
865 * that ram migration makes no progress. Avoid this by disabling the
866 * throttling logic during the bulk phase of block migration. */
867 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
56e93d26
JQ
868 /* The following detection logic can be refined later. For now:
869 Check to see if the dirtied bytes is 50% more than the approx.
870 amount of bytes that just got transferred since the last time we
070afca2
JH
871 were in this routine. If that happens twice, start or increase
872 throttling */
070afca2 873
d693c6f1 874 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
eac74159 875 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
b4a3c64b 876 (++rs->dirty_rate_high_cnt >= 2)) {
56e93d26 877 trace_migration_throttle();
8d820d6f 878 rs->dirty_rate_high_cnt = 0;
070afca2 879 mig_throttle_guest_down();
d693c6f1 880 }
56e93d26 881 }
070afca2 882
56e93d26 883 if (migrate_use_xbzrle()) {
23b28c3c 884 if (rs->iterations_prev != rs->iterations) {
9360447d
JQ
885 xbzrle_counters.cache_miss_rate =
886 (double)(xbzrle_counters.cache_miss -
b5833fde 887 rs->xbzrle_cache_miss_prev) /
23b28c3c 888 (rs->iterations - rs->iterations_prev);
56e93d26 889 }
23b28c3c 890 rs->iterations_prev = rs->iterations;
9360447d 891 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
56e93d26 892 }
d693c6f1
FF
893
894 /* reset period counters */
f664da80 895 rs->time_last_bitmap_sync = end_time;
a66cd90c 896 rs->num_dirty_pages_period = 0;
d2a4d85a 897 rs->bytes_xfer_prev = bytes_xfer_now;
56e93d26 898 }
4addcd4f 899 if (migrate_use_events()) {
9360447d 900 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
4addcd4f 901 }
56e93d26
JQ
902}
903
904/**
3d0684b2 905 * save_zero_page: send the zero page to the stream
56e93d26 906 *
3d0684b2 907 * Returns the number of pages written.
56e93d26 908 *
f7ccd61b 909 * @rs: current RAM state
56e93d26
JQ
910 * @block: block that contains the page we want to send
911 * @offset: offset inside the block for the page
56e93d26 912 */
7faccdc3 913static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
56e93d26 914{
7faccdc3 915 uint8_t *p = block->host + offset;
56e93d26
JQ
916 int pages = -1;
917
918 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
9360447d
JQ
919 ram_counters.duplicate++;
920 ram_counters.transferred +=
bb890ed5 921 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
ce25d337 922 qemu_put_byte(rs->f, 0);
9360447d 923 ram_counters.transferred += 1;
56e93d26
JQ
924 pages = 1;
925 }
926
927 return pages;
928}
929
5727309d 930static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
53f09a10 931{
5727309d 932 if (!migrate_release_ram() || !migration_in_postcopy()) {
53f09a10
PB
933 return;
934 }
935
aaa2064c 936 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
53f09a10
PB
937}
938
56e93d26 939/**
3d0684b2 940 * ram_save_page: send the given page to the stream
56e93d26 941 *
3d0684b2 942 * Returns the number of pages written.
3fd3c4b3
DDAG
943 * < 0 - error
944 * >=0 - Number of pages written - this might legally be 0
945 * if xbzrle noticed the page was the same.
56e93d26 946 *
6f37bb8b 947 * @rs: current RAM state
56e93d26
JQ
948 * @block: block that contains the page we want to send
949 * @offset: offset inside the block for the page
950 * @last_stage: if we are at the completion stage
56e93d26 951 */
a0a8aa14 952static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
56e93d26
JQ
953{
954 int pages = -1;
955 uint64_t bytes_xmit;
956 ram_addr_t current_addr;
56e93d26
JQ
957 uint8_t *p;
958 int ret;
959 bool send_async = true;
a08f6890 960 RAMBlock *block = pss->block;
a935e30f 961 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
56e93d26 962
2f68e399 963 p = block->host + offset;
1db9d8e5 964 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
56e93d26
JQ
965
966 /* In doubt sent page as normal */
967 bytes_xmit = 0;
ce25d337 968 ret = ram_control_save_page(rs->f, block->offset,
56e93d26
JQ
969 offset, TARGET_PAGE_SIZE, &bytes_xmit);
970 if (bytes_xmit) {
9360447d 971 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
972 pages = 1;
973 }
974
975 XBZRLE_cache_lock();
976
977 current_addr = block->offset + offset;
978
56e93d26
JQ
979 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
980 if (ret != RAM_SAVE_CONTROL_DELAYED) {
981 if (bytes_xmit > 0) {
9360447d 982 ram_counters.normal++;
56e93d26 983 } else if (bytes_xmit == 0) {
9360447d 984 ram_counters.duplicate++;
56e93d26
JQ
985 }
986 }
987 } else {
7faccdc3 988 pages = save_zero_page(rs, block, offset);
56e93d26
JQ
989 if (pages > 0) {
990 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
991 * page would be stale
992 */
6f37bb8b 993 xbzrle_cache_zero_page(rs, current_addr);
a935e30f 994 ram_release_pages(block->idstr, offset, pages);
6f37bb8b 995 } else if (!rs->ram_bulk_stage &&
5727309d 996 !migration_in_postcopy() && migrate_use_xbzrle()) {
204b88b8 997 pages = save_xbzrle_page(rs, &p, current_addr, block,
072c2511 998 offset, last_stage);
56e93d26
JQ
999 if (!last_stage) {
1000 /* Can't send this cached data async, since the cache page
1001 * might get updated before it gets to the wire
1002 */
1003 send_async = false;
1004 }
1005 }
1006 }
1007
1008 /* XBZRLE overflow or normal page */
1009 if (pages == -1) {
9360447d
JQ
1010 ram_counters.transferred +=
1011 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
56e93d26 1012 if (send_async) {
ce25d337 1013 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
53f09a10 1014 migrate_release_ram() &
5727309d 1015 migration_in_postcopy());
56e93d26 1016 } else {
ce25d337 1017 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
56e93d26 1018 }
9360447d 1019 ram_counters.transferred += TARGET_PAGE_SIZE;
56e93d26 1020 pages = 1;
9360447d 1021 ram_counters.normal++;
56e93d26
JQ
1022 }
1023
1024 XBZRLE_cache_unlock();
1025
1026 return pages;
1027}
1028
a7a9a88f
LL
1029static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
1030 ram_addr_t offset)
56e93d26 1031{
53518d94 1032 RAMState *rs = ram_state;
56e93d26 1033 int bytes_sent, blen;
a7a9a88f 1034 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 1035
2bf3aa85 1036 bytes_sent = save_page_header(rs, f, block, offset |
56e93d26 1037 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 1038 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 1039 migrate_compress_level());
b3be2896
LL
1040 if (blen < 0) {
1041 bytes_sent = 0;
1042 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1043 error_report("compressed data failed!");
1044 } else {
1045 bytes_sent += blen;
5727309d 1046 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
b3be2896 1047 }
56e93d26
JQ
1048
1049 return bytes_sent;
1050}
1051
ce25d337 1052static void flush_compressed_data(RAMState *rs)
56e93d26
JQ
1053{
1054 int idx, len, thread_count;
1055
1056 if (!migrate_use_compression()) {
1057 return;
1058 }
1059 thread_count = migrate_compress_threads();
a7a9a88f 1060
0d9f9a5c 1061 qemu_mutex_lock(&comp_done_lock);
56e93d26 1062 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 1063 while (!comp_param[idx].done) {
0d9f9a5c 1064 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 1065 }
a7a9a88f 1066 }
0d9f9a5c 1067 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
1068
1069 for (idx = 0; idx < thread_count; idx++) {
1070 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 1071 if (!comp_param[idx].quit) {
ce25d337 1072 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
9360447d 1073 ram_counters.transferred += len;
56e93d26 1074 }
a7a9a88f 1075 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
1076 }
1077}
1078
1079static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1080 ram_addr_t offset)
1081{
1082 param->block = block;
1083 param->offset = offset;
1084}
1085
ce25d337
JQ
1086static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1087 ram_addr_t offset)
56e93d26
JQ
1088{
1089 int idx, thread_count, bytes_xmit = -1, pages = -1;
1090
1091 thread_count = migrate_compress_threads();
0d9f9a5c 1092 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
1093 while (true) {
1094 for (idx = 0; idx < thread_count; idx++) {
1095 if (comp_param[idx].done) {
a7a9a88f 1096 comp_param[idx].done = false;
ce25d337 1097 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
a7a9a88f 1098 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 1099 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
1100 qemu_cond_signal(&comp_param[idx].cond);
1101 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26 1102 pages = 1;
9360447d
JQ
1103 ram_counters.normal++;
1104 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
1105 break;
1106 }
1107 }
1108 if (pages > 0) {
1109 break;
1110 } else {
0d9f9a5c 1111 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
1112 }
1113 }
0d9f9a5c 1114 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
1115
1116 return pages;
1117}
1118
1119/**
1120 * ram_save_compressed_page: compress the given page and send it to the stream
1121 *
3d0684b2 1122 * Returns the number of pages written.
56e93d26 1123 *
6f37bb8b 1124 * @rs: current RAM state
56e93d26
JQ
1125 * @block: block that contains the page we want to send
1126 * @offset: offset inside the block for the page
1127 * @last_stage: if we are at the completion stage
56e93d26 1128 */
a0a8aa14
JQ
1129static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
1130 bool last_stage)
56e93d26
JQ
1131{
1132 int pages = -1;
fc50438e 1133 uint64_t bytes_xmit = 0;
56e93d26 1134 uint8_t *p;
fc50438e 1135 int ret, blen;
a08f6890 1136 RAMBlock *block = pss->block;
a935e30f 1137 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
56e93d26 1138
2f68e399 1139 p = block->host + offset;
56e93d26 1140
ce25d337 1141 ret = ram_control_save_page(rs->f, block->offset,
56e93d26
JQ
1142 offset, TARGET_PAGE_SIZE, &bytes_xmit);
1143 if (bytes_xmit) {
9360447d 1144 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
1145 pages = 1;
1146 }
56e93d26
JQ
1147 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
1148 if (ret != RAM_SAVE_CONTROL_DELAYED) {
1149 if (bytes_xmit > 0) {
9360447d 1150 ram_counters.normal++;
56e93d26 1151 } else if (bytes_xmit == 0) {
9360447d 1152 ram_counters.duplicate++;
56e93d26
JQ
1153 }
1154 }
1155 } else {
1156 /* When starting the process of a new block, the first page of
1157 * the block should be sent out before other pages in the same
1158 * block, and all the pages in last block should have been sent
1159 * out, keeping this order is important, because the 'cont' flag
1160 * is used to avoid resending the block name.
1161 */
6f37bb8b 1162 if (block != rs->last_sent_block) {
ce25d337 1163 flush_compressed_data(rs);
7faccdc3 1164 pages = save_zero_page(rs, block, offset);
56e93d26 1165 if (pages == -1) {
fc50438e 1166 /* Make sure the first page is sent out before other pages */
2bf3aa85 1167 bytes_xmit = save_page_header(rs, rs->f, block, offset |
fc50438e 1168 RAM_SAVE_FLAG_COMPRESS_PAGE);
ce25d337 1169 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
fc50438e
LL
1170 migrate_compress_level());
1171 if (blen > 0) {
9360447d
JQ
1172 ram_counters.transferred += bytes_xmit + blen;
1173 ram_counters.normal++;
b3be2896 1174 pages = 1;
fc50438e 1175 } else {
ce25d337 1176 qemu_file_set_error(rs->f, blen);
fc50438e 1177 error_report("compressed data failed!");
b3be2896 1178 }
56e93d26 1179 }
53f09a10 1180 if (pages > 0) {
a935e30f 1181 ram_release_pages(block->idstr, offset, pages);
53f09a10 1182 }
56e93d26 1183 } else {
7faccdc3 1184 pages = save_zero_page(rs, block, offset);
56e93d26 1185 if (pages == -1) {
ce25d337 1186 pages = compress_page_with_multi_thread(rs, block, offset);
53f09a10 1187 } else {
a935e30f 1188 ram_release_pages(block->idstr, offset, pages);
56e93d26
JQ
1189 }
1190 }
1191 }
1192
1193 return pages;
1194}
1195
3d0684b2
JQ
1196/**
1197 * find_dirty_block: find the next dirty page and update any state
1198 * associated with the search process.
b9e60928 1199 *
3d0684b2 1200 * Returns if a page is found
b9e60928 1201 *
6f37bb8b 1202 * @rs: current RAM state
3d0684b2
JQ
1203 * @pss: data about the state of the current dirty page scan
1204 * @again: set to false if the search has scanned the whole of RAM
b9e60928 1205 */
f20e2865 1206static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
b9e60928 1207{
f20e2865 1208 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
6f37bb8b 1209 if (pss->complete_round && pss->block == rs->last_seen_block &&
a935e30f 1210 pss->page >= rs->last_page) {
b9e60928
DDAG
1211 /*
1212 * We've been once around the RAM and haven't found anything.
1213 * Give up.
1214 */
1215 *again = false;
1216 return false;
1217 }
a935e30f 1218 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
b9e60928 1219 /* Didn't find anything in this RAM Block */
a935e30f 1220 pss->page = 0;
b9e60928
DDAG
1221 pss->block = QLIST_NEXT_RCU(pss->block, next);
1222 if (!pss->block) {
1223 /* Hit the end of the list */
1224 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1225 /* Flag that we've looped */
1226 pss->complete_round = true;
6f37bb8b 1227 rs->ram_bulk_stage = false;
b9e60928
DDAG
1228 if (migrate_use_xbzrle()) {
1229 /* If xbzrle is on, stop using the data compression at this
1230 * point. In theory, xbzrle can do better than compression.
1231 */
ce25d337 1232 flush_compressed_data(rs);
b9e60928
DDAG
1233 }
1234 }
1235 /* Didn't find anything this time, but try again on the new block */
1236 *again = true;
1237 return false;
1238 } else {
1239 /* Can go around again, but... */
1240 *again = true;
1241 /* We've found something so probably don't need to */
1242 return true;
1243 }
1244}
1245
3d0684b2
JQ
1246/**
1247 * unqueue_page: gets a page of the queue
1248 *
a82d593b 1249 * Helper for 'get_queued_page' - gets a page off the queue
a82d593b 1250 *
3d0684b2
JQ
1251 * Returns the block of the page (or NULL if none available)
1252 *
ec481c6c 1253 * @rs: current RAM state
3d0684b2 1254 * @offset: used to return the offset within the RAMBlock
a82d593b 1255 */
f20e2865 1256static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
a82d593b
DDAG
1257{
1258 RAMBlock *block = NULL;
1259
ec481c6c
JQ
1260 qemu_mutex_lock(&rs->src_page_req_mutex);
1261 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1262 struct RAMSrcPageRequest *entry =
1263 QSIMPLEQ_FIRST(&rs->src_page_requests);
a82d593b
DDAG
1264 block = entry->rb;
1265 *offset = entry->offset;
a82d593b
DDAG
1266
1267 if (entry->len > TARGET_PAGE_SIZE) {
1268 entry->len -= TARGET_PAGE_SIZE;
1269 entry->offset += TARGET_PAGE_SIZE;
1270 } else {
1271 memory_region_unref(block->mr);
ec481c6c 1272 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
a82d593b
DDAG
1273 g_free(entry);
1274 }
1275 }
ec481c6c 1276 qemu_mutex_unlock(&rs->src_page_req_mutex);
a82d593b
DDAG
1277
1278 return block;
1279}
1280
3d0684b2
JQ
1281/**
1282 * get_queued_page: unqueue a page from the postocpy requests
1283 *
1284 * Skips pages that are already sent (!dirty)
a82d593b 1285 *
3d0684b2 1286 * Returns if a queued page is found
a82d593b 1287 *
6f37bb8b 1288 * @rs: current RAM state
3d0684b2 1289 * @pss: data about the state of the current dirty page scan
a82d593b 1290 */
f20e2865 1291static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
a82d593b
DDAG
1292{
1293 RAMBlock *block;
1294 ram_addr_t offset;
1295 bool dirty;
1296
1297 do {
f20e2865 1298 block = unqueue_page(rs, &offset);
a82d593b
DDAG
1299 /*
1300 * We're sending this page, and since it's postcopy nothing else
1301 * will dirty it, and we must make sure it doesn't get sent again
1302 * even if this queue request was received after the background
1303 * search already sent it.
1304 */
1305 if (block) {
f20e2865
JQ
1306 unsigned long page;
1307
6b6712ef
JQ
1308 page = offset >> TARGET_PAGE_BITS;
1309 dirty = test_bit(page, block->bmap);
a82d593b 1310 if (!dirty) {
06b10688 1311 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
6b6712ef 1312 page, test_bit(page, block->unsentmap));
a82d593b 1313 } else {
f20e2865 1314 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
a82d593b
DDAG
1315 }
1316 }
1317
1318 } while (block && !dirty);
1319
1320 if (block) {
1321 /*
1322 * As soon as we start servicing pages out of order, then we have
1323 * to kill the bulk stage, since the bulk stage assumes
1324 * in (migration_bitmap_find_and_reset_dirty) that every page is
1325 * dirty, that's no longer true.
1326 */
6f37bb8b 1327 rs->ram_bulk_stage = false;
a82d593b
DDAG
1328
1329 /*
1330 * We want the background search to continue from the queued page
1331 * since the guest is likely to want other pages near to the page
1332 * it just requested.
1333 */
1334 pss->block = block;
a935e30f 1335 pss->page = offset >> TARGET_PAGE_BITS;
a82d593b
DDAG
1336 }
1337
1338 return !!block;
1339}
1340
6c595cde 1341/**
5e58f968
JQ
1342 * migration_page_queue_free: drop any remaining pages in the ram
1343 * request queue
6c595cde 1344 *
3d0684b2
JQ
1345 * It should be empty at the end anyway, but in error cases there may
1346 * be some left. in case that there is any page left, we drop it.
1347 *
6c595cde 1348 */
83c13382 1349static void migration_page_queue_free(RAMState *rs)
6c595cde 1350{
ec481c6c 1351 struct RAMSrcPageRequest *mspr, *next_mspr;
6c595cde
DDAG
1352 /* This queue generally should be empty - but in the case of a failed
1353 * migration might have some droppings in.
1354 */
1355 rcu_read_lock();
ec481c6c 1356 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
6c595cde 1357 memory_region_unref(mspr->rb->mr);
ec481c6c 1358 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
6c595cde
DDAG
1359 g_free(mspr);
1360 }
1361 rcu_read_unlock();
1362}
1363
1364/**
3d0684b2
JQ
1365 * ram_save_queue_pages: queue the page for transmission
1366 *
1367 * A request from postcopy destination for example.
1368 *
1369 * Returns zero on success or negative on error
1370 *
3d0684b2
JQ
1371 * @rbname: Name of the RAMBLock of the request. NULL means the
1372 * same that last one.
1373 * @start: starting address from the start of the RAMBlock
1374 * @len: length (in bytes) to send
6c595cde 1375 */
96506894 1376int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
6c595cde
DDAG
1377{
1378 RAMBlock *ramblock;
53518d94 1379 RAMState *rs = ram_state;
6c595cde 1380
9360447d 1381 ram_counters.postcopy_requests++;
6c595cde
DDAG
1382 rcu_read_lock();
1383 if (!rbname) {
1384 /* Reuse last RAMBlock */
68a098f3 1385 ramblock = rs->last_req_rb;
6c595cde
DDAG
1386
1387 if (!ramblock) {
1388 /*
1389 * Shouldn't happen, we can't reuse the last RAMBlock if
1390 * it's the 1st request.
1391 */
1392 error_report("ram_save_queue_pages no previous block");
1393 goto err;
1394 }
1395 } else {
1396 ramblock = qemu_ram_block_by_name(rbname);
1397
1398 if (!ramblock) {
1399 /* We shouldn't be asked for a non-existent RAMBlock */
1400 error_report("ram_save_queue_pages no block '%s'", rbname);
1401 goto err;
1402 }
68a098f3 1403 rs->last_req_rb = ramblock;
6c595cde
DDAG
1404 }
1405 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1406 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1407 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1408 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1409 __func__, start, len, ramblock->used_length);
1410 goto err;
1411 }
1412
ec481c6c
JQ
1413 struct RAMSrcPageRequest *new_entry =
1414 g_malloc0(sizeof(struct RAMSrcPageRequest));
6c595cde
DDAG
1415 new_entry->rb = ramblock;
1416 new_entry->offset = start;
1417 new_entry->len = len;
1418
1419 memory_region_ref(ramblock->mr);
ec481c6c
JQ
1420 qemu_mutex_lock(&rs->src_page_req_mutex);
1421 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1422 qemu_mutex_unlock(&rs->src_page_req_mutex);
6c595cde
DDAG
1423 rcu_read_unlock();
1424
1425 return 0;
1426
1427err:
1428 rcu_read_unlock();
1429 return -1;
1430}
1431
a82d593b 1432/**
3d0684b2 1433 * ram_save_target_page: save one target page
a82d593b 1434 *
3d0684b2 1435 * Returns the number of pages written
a82d593b 1436 *
6f37bb8b 1437 * @rs: current RAM state
3d0684b2 1438 * @ms: current migration state
3d0684b2 1439 * @pss: data about the page we want to send
a82d593b 1440 * @last_stage: if we are at the completion stage
a82d593b 1441 */
a0a8aa14 1442static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1443 bool last_stage)
a82d593b
DDAG
1444{
1445 int res = 0;
1446
1447 /* Check the pages is dirty and if it is send it */
f20e2865 1448 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
6d358d94
JQ
1449 /*
1450 * If xbzrle is on, stop using the data compression after first
1451 * round of migration even if compression is enabled. In theory,
1452 * xbzrle can do better than compression.
1453 */
6b6712ef
JQ
1454 if (migrate_use_compression() &&
1455 (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
a0a8aa14 1456 res = ram_save_compressed_page(rs, pss, last_stage);
a82d593b 1457 } else {
a0a8aa14 1458 res = ram_save_page(rs, pss, last_stage);
a82d593b
DDAG
1459 }
1460
1461 if (res < 0) {
1462 return res;
1463 }
6b6712ef
JQ
1464 if (pss->block->unsentmap) {
1465 clear_bit(pss->page, pss->block->unsentmap);
a82d593b
DDAG
1466 }
1467 }
1468
1469 return res;
1470}
1471
1472/**
3d0684b2 1473 * ram_save_host_page: save a whole host page
a82d593b 1474 *
3d0684b2
JQ
1475 * Starting at *offset send pages up to the end of the current host
1476 * page. It's valid for the initial offset to point into the middle of
1477 * a host page in which case the remainder of the hostpage is sent.
1478 * Only dirty target pages are sent. Note that the host page size may
1479 * be a huge page for this block.
1eb3fc0a
DDAG
1480 * The saving stops at the boundary of the used_length of the block
1481 * if the RAMBlock isn't a multiple of the host page size.
a82d593b 1482 *
3d0684b2
JQ
1483 * Returns the number of pages written or negative on error
1484 *
6f37bb8b 1485 * @rs: current RAM state
3d0684b2 1486 * @ms: current migration state
3d0684b2 1487 * @pss: data about the page we want to send
a82d593b 1488 * @last_stage: if we are at the completion stage
a82d593b 1489 */
a0a8aa14 1490static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1491 bool last_stage)
a82d593b
DDAG
1492{
1493 int tmppages, pages = 0;
a935e30f
JQ
1494 size_t pagesize_bits =
1495 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
4c011c37 1496
a82d593b 1497 do {
f20e2865 1498 tmppages = ram_save_target_page(rs, pss, last_stage);
a82d593b
DDAG
1499 if (tmppages < 0) {
1500 return tmppages;
1501 }
1502
1503 pages += tmppages;
a935e30f 1504 pss->page++;
1eb3fc0a
DDAG
1505 } while ((pss->page & (pagesize_bits - 1)) &&
1506 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
a82d593b
DDAG
1507
1508 /* The offset we leave with is the last one we looked at */
a935e30f 1509 pss->page--;
a82d593b
DDAG
1510 return pages;
1511}
6c595cde 1512
56e93d26 1513/**
3d0684b2 1514 * ram_find_and_save_block: finds a dirty page and sends it to f
56e93d26
JQ
1515 *
1516 * Called within an RCU critical section.
1517 *
3d0684b2 1518 * Returns the number of pages written where zero means no dirty pages
56e93d26 1519 *
6f37bb8b 1520 * @rs: current RAM state
56e93d26 1521 * @last_stage: if we are at the completion stage
a82d593b
DDAG
1522 *
1523 * On systems where host-page-size > target-page-size it will send all the
1524 * pages in a host page that are dirty.
56e93d26
JQ
1525 */
1526
ce25d337 1527static int ram_find_and_save_block(RAMState *rs, bool last_stage)
56e93d26 1528{
b8fb8cb7 1529 PageSearchStatus pss;
56e93d26 1530 int pages = 0;
b9e60928 1531 bool again, found;
56e93d26 1532
0827b9e9
AA
1533 /* No dirty page as there is zero RAM */
1534 if (!ram_bytes_total()) {
1535 return pages;
1536 }
1537
6f37bb8b 1538 pss.block = rs->last_seen_block;
a935e30f 1539 pss.page = rs->last_page;
b8fb8cb7
DDAG
1540 pss.complete_round = false;
1541
1542 if (!pss.block) {
1543 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1544 }
56e93d26 1545
b9e60928 1546 do {
a82d593b 1547 again = true;
f20e2865 1548 found = get_queued_page(rs, &pss);
b9e60928 1549
a82d593b
DDAG
1550 if (!found) {
1551 /* priority queue empty, so just search for something dirty */
f20e2865 1552 found = find_dirty_block(rs, &pss, &again);
a82d593b 1553 }
f3f491fc 1554
a82d593b 1555 if (found) {
f20e2865 1556 pages = ram_save_host_page(rs, &pss, last_stage);
56e93d26 1557 }
b9e60928 1558 } while (!pages && again);
56e93d26 1559
6f37bb8b 1560 rs->last_seen_block = pss.block;
a935e30f 1561 rs->last_page = pss.page;
56e93d26
JQ
1562
1563 return pages;
1564}
1565
1566void acct_update_position(QEMUFile *f, size_t size, bool zero)
1567{
1568 uint64_t pages = size / TARGET_PAGE_SIZE;
f7ccd61b 1569
56e93d26 1570 if (zero) {
9360447d 1571 ram_counters.duplicate += pages;
56e93d26 1572 } else {
9360447d
JQ
1573 ram_counters.normal += pages;
1574 ram_counters.transferred += size;
56e93d26
JQ
1575 qemu_update_position(f, size);
1576 }
1577}
1578
56e93d26
JQ
1579uint64_t ram_bytes_total(void)
1580{
1581 RAMBlock *block;
1582 uint64_t total = 0;
1583
1584 rcu_read_lock();
99e15582 1585 RAMBLOCK_FOREACH(block) {
56e93d26 1586 total += block->used_length;
99e15582 1587 }
56e93d26
JQ
1588 rcu_read_unlock();
1589 return total;
1590}
1591
f265e0e4 1592static void xbzrle_load_setup(void)
56e93d26 1593{
f265e0e4 1594 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
56e93d26
JQ
1595}
1596
f265e0e4
JQ
1597static void xbzrle_load_cleanup(void)
1598{
1599 g_free(XBZRLE.decoded_buf);
1600 XBZRLE.decoded_buf = NULL;
1601}
1602
7d7c96be
PX
1603static void ram_state_cleanup(RAMState **rsp)
1604{
1605 migration_page_queue_free(*rsp);
1606 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1607 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1608 g_free(*rsp);
1609 *rsp = NULL;
1610}
1611
84593a08
PX
1612static void xbzrle_cleanup(void)
1613{
1614 XBZRLE_cache_lock();
1615 if (XBZRLE.cache) {
1616 cache_fini(XBZRLE.cache);
1617 g_free(XBZRLE.encoded_buf);
1618 g_free(XBZRLE.current_buf);
1619 g_free(XBZRLE.zero_target_page);
1620 XBZRLE.cache = NULL;
1621 XBZRLE.encoded_buf = NULL;
1622 XBZRLE.current_buf = NULL;
1623 XBZRLE.zero_target_page = NULL;
1624 }
1625 XBZRLE_cache_unlock();
1626}
1627
f265e0e4 1628static void ram_save_cleanup(void *opaque)
56e93d26 1629{
53518d94 1630 RAMState **rsp = opaque;
6b6712ef 1631 RAMBlock *block;
eb859c53 1632
2ff64038
LZ
1633 /* caller have hold iothread lock or is in a bh, so there is
1634 * no writing race against this migration_bitmap
1635 */
6b6712ef
JQ
1636 memory_global_dirty_log_stop();
1637
1638 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1639 g_free(block->bmap);
1640 block->bmap = NULL;
1641 g_free(block->unsentmap);
1642 block->unsentmap = NULL;
56e93d26
JQ
1643 }
1644
84593a08 1645 xbzrle_cleanup();
f0afa331 1646 compress_threads_save_cleanup();
7d7c96be 1647 ram_state_cleanup(rsp);
56e93d26
JQ
1648}
1649
6f37bb8b 1650static void ram_state_reset(RAMState *rs)
56e93d26 1651{
6f37bb8b
JQ
1652 rs->last_seen_block = NULL;
1653 rs->last_sent_block = NULL;
269ace29 1654 rs->last_page = 0;
6f37bb8b
JQ
1655 rs->last_version = ram_list.version;
1656 rs->ram_bulk_stage = true;
56e93d26
JQ
1657}
1658
1659#define MAX_WAIT 50 /* ms, half buffered_file limit */
1660
4f2e4252
DDAG
1661/*
1662 * 'expected' is the value you expect the bitmap mostly to be full
1663 * of; it won't bother printing lines that are all this value.
1664 * If 'todump' is null the migration bitmap is dumped.
1665 */
6b6712ef
JQ
1666void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1667 unsigned long pages)
4f2e4252 1668{
4f2e4252
DDAG
1669 int64_t cur;
1670 int64_t linelen = 128;
1671 char linebuf[129];
1672
6b6712ef 1673 for (cur = 0; cur < pages; cur += linelen) {
4f2e4252
DDAG
1674 int64_t curb;
1675 bool found = false;
1676 /*
1677 * Last line; catch the case where the line length
1678 * is longer than remaining ram
1679 */
6b6712ef
JQ
1680 if (cur + linelen > pages) {
1681 linelen = pages - cur;
4f2e4252
DDAG
1682 }
1683 for (curb = 0; curb < linelen; curb++) {
1684 bool thisbit = test_bit(cur + curb, todump);
1685 linebuf[curb] = thisbit ? '1' : '.';
1686 found = found || (thisbit != expected);
1687 }
1688 if (found) {
1689 linebuf[curb] = '\0';
1690 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1691 }
1692 }
1693}
1694
e0b266f0
DDAG
1695/* **** functions for postcopy ***** */
1696
ced1c616
PB
1697void ram_postcopy_migrated_memory_release(MigrationState *ms)
1698{
1699 struct RAMBlock *block;
ced1c616 1700
99e15582 1701 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1702 unsigned long *bitmap = block->bmap;
1703 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1704 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
ced1c616
PB
1705
1706 while (run_start < range) {
1707 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
aaa2064c 1708 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
ced1c616
PB
1709 (run_end - run_start) << TARGET_PAGE_BITS);
1710 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1711 }
1712 }
1713}
1714
3d0684b2
JQ
1715/**
1716 * postcopy_send_discard_bm_ram: discard a RAMBlock
1717 *
1718 * Returns zero on success
1719 *
e0b266f0
DDAG
1720 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1721 * Note: At this point the 'unsentmap' is the processed bitmap combined
1722 * with the dirtymap; so a '1' means it's either dirty or unsent.
3d0684b2
JQ
1723 *
1724 * @ms: current migration state
1725 * @pds: state for postcopy
1726 * @start: RAMBlock starting page
1727 * @length: RAMBlock size
e0b266f0
DDAG
1728 */
1729static int postcopy_send_discard_bm_ram(MigrationState *ms,
1730 PostcopyDiscardState *pds,
6b6712ef 1731 RAMBlock *block)
e0b266f0 1732{
6b6712ef 1733 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
e0b266f0 1734 unsigned long current;
6b6712ef 1735 unsigned long *unsentmap = block->unsentmap;
e0b266f0 1736
6b6712ef 1737 for (current = 0; current < end; ) {
e0b266f0
DDAG
1738 unsigned long one = find_next_bit(unsentmap, end, current);
1739
1740 if (one <= end) {
1741 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1742 unsigned long discard_length;
1743
1744 if (zero >= end) {
1745 discard_length = end - one;
1746 } else {
1747 discard_length = zero - one;
1748 }
d688c62d
DDAG
1749 if (discard_length) {
1750 postcopy_discard_send_range(ms, pds, one, discard_length);
1751 }
e0b266f0
DDAG
1752 current = one + discard_length;
1753 } else {
1754 current = one;
1755 }
1756 }
1757
1758 return 0;
1759}
1760
3d0684b2
JQ
1761/**
1762 * postcopy_each_ram_send_discard: discard all RAMBlocks
1763 *
1764 * Returns 0 for success or negative for error
1765 *
e0b266f0
DDAG
1766 * Utility for the outgoing postcopy code.
1767 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1768 * passing it bitmap indexes and name.
e0b266f0
DDAG
1769 * (qemu_ram_foreach_block ends up passing unscaled lengths
1770 * which would mean postcopy code would have to deal with target page)
3d0684b2
JQ
1771 *
1772 * @ms: current migration state
e0b266f0
DDAG
1773 */
1774static int postcopy_each_ram_send_discard(MigrationState *ms)
1775{
1776 struct RAMBlock *block;
1777 int ret;
1778
99e15582 1779 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1780 PostcopyDiscardState *pds =
1781 postcopy_discard_send_init(ms, block->idstr);
e0b266f0
DDAG
1782
1783 /*
1784 * Postcopy sends chunks of bitmap over the wire, but it
1785 * just needs indexes at this point, avoids it having
1786 * target page specific code.
1787 */
6b6712ef 1788 ret = postcopy_send_discard_bm_ram(ms, pds, block);
e0b266f0
DDAG
1789 postcopy_discard_send_finish(ms, pds);
1790 if (ret) {
1791 return ret;
1792 }
1793 }
1794
1795 return 0;
1796}
1797
3d0684b2
JQ
1798/**
1799 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1800 *
1801 * Helper for postcopy_chunk_hostpages; it's called twice to
1802 * canonicalize the two bitmaps, that are similar, but one is
1803 * inverted.
99e314eb 1804 *
3d0684b2
JQ
1805 * Postcopy requires that all target pages in a hostpage are dirty or
1806 * clean, not a mix. This function canonicalizes the bitmaps.
99e314eb 1807 *
3d0684b2
JQ
1808 * @ms: current migration state
1809 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1810 * otherwise we need to canonicalize partially dirty host pages
1811 * @block: block that contains the page we want to canonicalize
1812 * @pds: state for postcopy
99e314eb
DDAG
1813 */
1814static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1815 RAMBlock *block,
1816 PostcopyDiscardState *pds)
1817{
53518d94 1818 RAMState *rs = ram_state;
6b6712ef
JQ
1819 unsigned long *bitmap = block->bmap;
1820 unsigned long *unsentmap = block->unsentmap;
29c59172 1821 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
6b6712ef 1822 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
99e314eb
DDAG
1823 unsigned long run_start;
1824
29c59172
DDAG
1825 if (block->page_size == TARGET_PAGE_SIZE) {
1826 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1827 return;
1828 }
1829
99e314eb
DDAG
1830 if (unsent_pass) {
1831 /* Find a sent page */
6b6712ef 1832 run_start = find_next_zero_bit(unsentmap, pages, 0);
99e314eb
DDAG
1833 } else {
1834 /* Find a dirty page */
6b6712ef 1835 run_start = find_next_bit(bitmap, pages, 0);
99e314eb
DDAG
1836 }
1837
6b6712ef 1838 while (run_start < pages) {
99e314eb
DDAG
1839 bool do_fixup = false;
1840 unsigned long fixup_start_addr;
1841 unsigned long host_offset;
1842
1843 /*
1844 * If the start of this run of pages is in the middle of a host
1845 * page, then we need to fixup this host page.
1846 */
1847 host_offset = run_start % host_ratio;
1848 if (host_offset) {
1849 do_fixup = true;
1850 run_start -= host_offset;
1851 fixup_start_addr = run_start;
1852 /* For the next pass */
1853 run_start = run_start + host_ratio;
1854 } else {
1855 /* Find the end of this run */
1856 unsigned long run_end;
1857 if (unsent_pass) {
6b6712ef 1858 run_end = find_next_bit(unsentmap, pages, run_start + 1);
99e314eb 1859 } else {
6b6712ef 1860 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
99e314eb
DDAG
1861 }
1862 /*
1863 * If the end isn't at the start of a host page, then the
1864 * run doesn't finish at the end of a host page
1865 * and we need to discard.
1866 */
1867 host_offset = run_end % host_ratio;
1868 if (host_offset) {
1869 do_fixup = true;
1870 fixup_start_addr = run_end - host_offset;
1871 /*
1872 * This host page has gone, the next loop iteration starts
1873 * from after the fixup
1874 */
1875 run_start = fixup_start_addr + host_ratio;
1876 } else {
1877 /*
1878 * No discards on this iteration, next loop starts from
1879 * next sent/dirty page
1880 */
1881 run_start = run_end + 1;
1882 }
1883 }
1884
1885 if (do_fixup) {
1886 unsigned long page;
1887
1888 /* Tell the destination to discard this page */
1889 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1890 /* For the unsent_pass we:
1891 * discard partially sent pages
1892 * For the !unsent_pass (dirty) we:
1893 * discard partially dirty pages that were sent
1894 * (any partially sent pages were already discarded
1895 * by the previous unsent_pass)
1896 */
1897 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1898 host_ratio);
1899 }
1900
1901 /* Clean up the bitmap */
1902 for (page = fixup_start_addr;
1903 page < fixup_start_addr + host_ratio; page++) {
1904 /* All pages in this host page are now not sent */
1905 set_bit(page, unsentmap);
1906
1907 /*
1908 * Remark them as dirty, updating the count for any pages
1909 * that weren't previously dirty.
1910 */
0d8ec885 1911 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
99e314eb
DDAG
1912 }
1913 }
1914
1915 if (unsent_pass) {
1916 /* Find the next sent page for the next iteration */
6b6712ef 1917 run_start = find_next_zero_bit(unsentmap, pages, run_start);
99e314eb
DDAG
1918 } else {
1919 /* Find the next dirty page for the next iteration */
6b6712ef 1920 run_start = find_next_bit(bitmap, pages, run_start);
99e314eb
DDAG
1921 }
1922 }
1923}
1924
3d0684b2
JQ
1925/**
1926 * postcopy_chuck_hostpages: discrad any partially sent host page
1927 *
99e314eb
DDAG
1928 * Utility for the outgoing postcopy code.
1929 *
1930 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
1931 * dirty host-page size chunks as all dirty. In this case the host-page
1932 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb 1933 *
3d0684b2
JQ
1934 * Returns zero on success
1935 *
1936 * @ms: current migration state
6b6712ef 1937 * @block: block we want to work with
99e314eb 1938 */
6b6712ef 1939static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
99e314eb 1940{
6b6712ef
JQ
1941 PostcopyDiscardState *pds =
1942 postcopy_discard_send_init(ms, block->idstr);
99e314eb 1943
6b6712ef
JQ
1944 /* First pass: Discard all partially sent host pages */
1945 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1946 /*
1947 * Second pass: Ensure that all partially dirty host pages are made
1948 * fully dirty.
1949 */
1950 postcopy_chunk_hostpages_pass(ms, false, block, pds);
99e314eb 1951
6b6712ef 1952 postcopy_discard_send_finish(ms, pds);
99e314eb
DDAG
1953 return 0;
1954}
1955
3d0684b2
JQ
1956/**
1957 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1958 *
1959 * Returns zero on success
1960 *
e0b266f0
DDAG
1961 * Transmit the set of pages to be discarded after precopy to the target
1962 * these are pages that:
1963 * a) Have been previously transmitted but are now dirty again
1964 * b) Pages that have never been transmitted, this ensures that
1965 * any pages on the destination that have been mapped by background
1966 * tasks get discarded (transparent huge pages is the specific concern)
1967 * Hopefully this is pretty sparse
3d0684b2
JQ
1968 *
1969 * @ms: current migration state
e0b266f0
DDAG
1970 */
1971int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1972{
53518d94 1973 RAMState *rs = ram_state;
6b6712ef 1974 RAMBlock *block;
e0b266f0 1975 int ret;
e0b266f0
DDAG
1976
1977 rcu_read_lock();
1978
1979 /* This should be our last sync, the src is now paused */
eb859c53 1980 migration_bitmap_sync(rs);
e0b266f0 1981
6b6712ef
JQ
1982 /* Easiest way to make sure we don't resume in the middle of a host-page */
1983 rs->last_seen_block = NULL;
1984 rs->last_sent_block = NULL;
1985 rs->last_page = 0;
e0b266f0 1986
6b6712ef
JQ
1987 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1988 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1989 unsigned long *bitmap = block->bmap;
1990 unsigned long *unsentmap = block->unsentmap;
1991
1992 if (!unsentmap) {
1993 /* We don't have a safe way to resize the sentmap, so
1994 * if the bitmap was resized it will be NULL at this
1995 * point.
1996 */
1997 error_report("migration ram resized during precopy phase");
1998 rcu_read_unlock();
1999 return -EINVAL;
2000 }
2001 /* Deal with TPS != HPS and huge pages */
2002 ret = postcopy_chunk_hostpages(ms, block);
2003 if (ret) {
2004 rcu_read_unlock();
2005 return ret;
2006 }
e0b266f0 2007
6b6712ef
JQ
2008 /*
2009 * Update the unsentmap to be unsentmap = unsentmap | dirty
2010 */
2011 bitmap_or(unsentmap, unsentmap, bitmap, pages);
e0b266f0 2012#ifdef DEBUG_POSTCOPY
6b6712ef 2013 ram_debug_dump_bitmap(unsentmap, true, pages);
e0b266f0 2014#endif
6b6712ef
JQ
2015 }
2016 trace_ram_postcopy_send_discard_bitmap();
e0b266f0
DDAG
2017
2018 ret = postcopy_each_ram_send_discard(ms);
2019 rcu_read_unlock();
2020
2021 return ret;
2022}
2023
3d0684b2
JQ
2024/**
2025 * ram_discard_range: discard dirtied pages at the beginning of postcopy
e0b266f0 2026 *
3d0684b2 2027 * Returns zero on success
e0b266f0 2028 *
36449157
JQ
2029 * @rbname: name of the RAMBlock of the request. NULL means the
2030 * same that last one.
3d0684b2
JQ
2031 * @start: RAMBlock starting page
2032 * @length: RAMBlock size
e0b266f0 2033 */
aaa2064c 2034int ram_discard_range(const char *rbname, uint64_t start, size_t length)
e0b266f0
DDAG
2035{
2036 int ret = -1;
2037
36449157 2038 trace_ram_discard_range(rbname, start, length);
d3a5038c 2039
e0b266f0 2040 rcu_read_lock();
36449157 2041 RAMBlock *rb = qemu_ram_block_by_name(rbname);
e0b266f0
DDAG
2042
2043 if (!rb) {
36449157 2044 error_report("ram_discard_range: Failed to find block '%s'", rbname);
e0b266f0
DDAG
2045 goto err;
2046 }
2047
f9494614
AP
2048 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2049 length >> qemu_target_page_bits());
d3a5038c 2050 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
2051
2052err:
2053 rcu_read_unlock();
2054
2055 return ret;
2056}
2057
84593a08
PX
2058/*
2059 * For every allocation, we will try not to crash the VM if the
2060 * allocation failed.
2061 */
2062static int xbzrle_init(void)
2063{
2064 Error *local_err = NULL;
2065
2066 if (!migrate_use_xbzrle()) {
2067 return 0;
2068 }
2069
2070 XBZRLE_cache_lock();
2071
2072 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2073 if (!XBZRLE.zero_target_page) {
2074 error_report("%s: Error allocating zero page", __func__);
2075 goto err_out;
2076 }
2077
2078 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2079 TARGET_PAGE_SIZE, &local_err);
2080 if (!XBZRLE.cache) {
2081 error_report_err(local_err);
2082 goto free_zero_page;
2083 }
2084
2085 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2086 if (!XBZRLE.encoded_buf) {
2087 error_report("%s: Error allocating encoded_buf", __func__);
2088 goto free_cache;
2089 }
2090
2091 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2092 if (!XBZRLE.current_buf) {
2093 error_report("%s: Error allocating current_buf", __func__);
2094 goto free_encoded_buf;
2095 }
2096
2097 /* We are all good */
2098 XBZRLE_cache_unlock();
2099 return 0;
2100
2101free_encoded_buf:
2102 g_free(XBZRLE.encoded_buf);
2103 XBZRLE.encoded_buf = NULL;
2104free_cache:
2105 cache_fini(XBZRLE.cache);
2106 XBZRLE.cache = NULL;
2107free_zero_page:
2108 g_free(XBZRLE.zero_target_page);
2109 XBZRLE.zero_target_page = NULL;
2110err_out:
2111 XBZRLE_cache_unlock();
2112 return -ENOMEM;
2113}
2114
53518d94 2115static int ram_state_init(RAMState **rsp)
56e93d26 2116{
7d00ee6a
PX
2117 *rsp = g_try_new0(RAMState, 1);
2118
2119 if (!*rsp) {
2120 error_report("%s: Init ramstate fail", __func__);
2121 return -1;
2122 }
53518d94
JQ
2123
2124 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2125 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2126 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
56e93d26 2127
7d00ee6a
PX
2128 /*
2129 * Count the total number of pages used by ram blocks not including any
2130 * gaps due to alignment or unplugs.
2131 */
2132 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2133
2134 ram_state_reset(*rsp);
2135
2136 return 0;
2137}
2138
d6eff5d7 2139static void ram_list_init_bitmaps(void)
7d00ee6a 2140{
d6eff5d7
PX
2141 RAMBlock *block;
2142 unsigned long pages;
56e93d26 2143
0827b9e9
AA
2144 /* Skip setting bitmap if there is no RAM */
2145 if (ram_bytes_total()) {
6b6712ef 2146 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
d6eff5d7 2147 pages = block->max_length >> TARGET_PAGE_BITS;
6b6712ef
JQ
2148 block->bmap = bitmap_new(pages);
2149 bitmap_set(block->bmap, 0, pages);
2150 if (migrate_postcopy_ram()) {
2151 block->unsentmap = bitmap_new(pages);
2152 bitmap_set(block->unsentmap, 0, pages);
2153 }
0827b9e9 2154 }
f3f491fc 2155 }
d6eff5d7
PX
2156}
2157
2158static void ram_init_bitmaps(RAMState *rs)
2159{
2160 /* For memory_global_dirty_log_start below. */
2161 qemu_mutex_lock_iothread();
2162 qemu_mutex_lock_ramlist();
2163 rcu_read_lock();
f3f491fc 2164
d6eff5d7 2165 ram_list_init_bitmaps();
56e93d26 2166 memory_global_dirty_log_start();
d6eff5d7
PX
2167 migration_bitmap_sync(rs);
2168
2169 rcu_read_unlock();
56e93d26 2170 qemu_mutex_unlock_ramlist();
49877834 2171 qemu_mutex_unlock_iothread();
d6eff5d7
PX
2172}
2173
2174static int ram_init_all(RAMState **rsp)
2175{
2176 if (ram_state_init(rsp)) {
2177 return -1;
2178 }
2179
2180 if (xbzrle_init()) {
2181 ram_state_cleanup(rsp);
2182 return -1;
2183 }
2184
2185 ram_init_bitmaps(*rsp);
a91246c9
HZ
2186
2187 return 0;
2188}
2189
3d0684b2
JQ
2190/*
2191 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
a91246c9
HZ
2192 * long-running RCU critical section. When rcu-reclaims in the code
2193 * start to become numerous it will be necessary to reduce the
2194 * granularity of these critical sections.
2195 */
2196
3d0684b2
JQ
2197/**
2198 * ram_save_setup: Setup RAM for migration
2199 *
2200 * Returns zero to indicate success and negative for error
2201 *
2202 * @f: QEMUFile where to send the data
2203 * @opaque: RAMState pointer
2204 */
a91246c9
HZ
2205static int ram_save_setup(QEMUFile *f, void *opaque)
2206{
53518d94 2207 RAMState **rsp = opaque;
a91246c9
HZ
2208 RAMBlock *block;
2209
2210 /* migration has already setup the bitmap, reuse it. */
2211 if (!migration_in_colo_state()) {
7d00ee6a 2212 if (ram_init_all(rsp) != 0) {
a91246c9 2213 return -1;
53518d94 2214 }
a91246c9 2215 }
53518d94 2216 (*rsp)->f = f;
a91246c9
HZ
2217
2218 rcu_read_lock();
56e93d26
JQ
2219
2220 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2221
99e15582 2222 RAMBLOCK_FOREACH(block) {
56e93d26
JQ
2223 qemu_put_byte(f, strlen(block->idstr));
2224 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2225 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2226 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2227 qemu_put_be64(f, block->page_size);
2228 }
56e93d26
JQ
2229 }
2230
2231 rcu_read_unlock();
f0afa331 2232 compress_threads_save_setup();
56e93d26
JQ
2233
2234 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2235 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2236
2237 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2238
2239 return 0;
2240}
2241
3d0684b2
JQ
2242/**
2243 * ram_save_iterate: iterative stage for migration
2244 *
2245 * Returns zero to indicate success and negative for error
2246 *
2247 * @f: QEMUFile where to send the data
2248 * @opaque: RAMState pointer
2249 */
56e93d26
JQ
2250static int ram_save_iterate(QEMUFile *f, void *opaque)
2251{
53518d94
JQ
2252 RAMState **temp = opaque;
2253 RAMState *rs = *temp;
56e93d26
JQ
2254 int ret;
2255 int i;
2256 int64_t t0;
5c90308f 2257 int done = 0;
56e93d26
JQ
2258
2259 rcu_read_lock();
6f37bb8b
JQ
2260 if (ram_list.version != rs->last_version) {
2261 ram_state_reset(rs);
56e93d26
JQ
2262 }
2263
2264 /* Read version before ram_list.blocks */
2265 smp_rmb();
2266
2267 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2268
2269 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2270 i = 0;
2271 while ((ret = qemu_file_rate_limit(f)) == 0) {
2272 int pages;
2273
ce25d337 2274 pages = ram_find_and_save_block(rs, false);
56e93d26
JQ
2275 /* no more pages to sent */
2276 if (pages == 0) {
5c90308f 2277 done = 1;
56e93d26
JQ
2278 break;
2279 }
23b28c3c 2280 rs->iterations++;
070afca2 2281
56e93d26
JQ
2282 /* we want to check in the 1st loop, just in case it was the 1st time
2283 and we had to sync the dirty bitmap.
2284 qemu_get_clock_ns() is a bit expensive, so we only check each some
2285 iterations
2286 */
2287 if ((i & 63) == 0) {
2288 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2289 if (t1 > MAX_WAIT) {
55c4446b 2290 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2291 break;
2292 }
2293 }
2294 i++;
2295 }
ce25d337 2296 flush_compressed_data(rs);
56e93d26
JQ
2297 rcu_read_unlock();
2298
2299 /*
2300 * Must occur before EOS (or any QEMUFile operation)
2301 * because of RDMA protocol.
2302 */
2303 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2304
2305 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
9360447d 2306 ram_counters.transferred += 8;
56e93d26
JQ
2307
2308 ret = qemu_file_get_error(f);
2309 if (ret < 0) {
2310 return ret;
2311 }
2312
5c90308f 2313 return done;
56e93d26
JQ
2314}
2315
3d0684b2
JQ
2316/**
2317 * ram_save_complete: function called to send the remaining amount of ram
2318 *
2319 * Returns zero to indicate success
2320 *
2321 * Called with iothread lock
2322 *
2323 * @f: QEMUFile where to send the data
2324 * @opaque: RAMState pointer
2325 */
56e93d26
JQ
2326static int ram_save_complete(QEMUFile *f, void *opaque)
2327{
53518d94
JQ
2328 RAMState **temp = opaque;
2329 RAMState *rs = *temp;
6f37bb8b 2330
56e93d26
JQ
2331 rcu_read_lock();
2332
5727309d 2333 if (!migration_in_postcopy()) {
8d820d6f 2334 migration_bitmap_sync(rs);
663e6c1d 2335 }
56e93d26
JQ
2336
2337 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2338
2339 /* try transferring iterative blocks of memory */
2340
2341 /* flush all remaining blocks regardless of rate limiting */
2342 while (true) {
2343 int pages;
2344
ce25d337 2345 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
56e93d26
JQ
2346 /* no more blocks to sent */
2347 if (pages == 0) {
2348 break;
2349 }
2350 }
2351
ce25d337 2352 flush_compressed_data(rs);
56e93d26 2353 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2354
2355 rcu_read_unlock();
d09a6fde 2356
56e93d26
JQ
2357 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2358
2359 return 0;
2360}
2361
c31b098f
DDAG
2362static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2363 uint64_t *non_postcopiable_pending,
2364 uint64_t *postcopiable_pending)
56e93d26 2365{
53518d94
JQ
2366 RAMState **temp = opaque;
2367 RAMState *rs = *temp;
56e93d26
JQ
2368 uint64_t remaining_size;
2369
9edabd4d 2370 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2371
5727309d 2372 if (!migration_in_postcopy() &&
663e6c1d 2373 remaining_size < max_size) {
56e93d26
JQ
2374 qemu_mutex_lock_iothread();
2375 rcu_read_lock();
8d820d6f 2376 migration_bitmap_sync(rs);
56e93d26
JQ
2377 rcu_read_unlock();
2378 qemu_mutex_unlock_iothread();
9edabd4d 2379 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2380 }
c31b098f 2381
86e1167e
VSO
2382 if (migrate_postcopy_ram()) {
2383 /* We can do postcopy, and all the data is postcopiable */
2384 *postcopiable_pending += remaining_size;
2385 } else {
2386 *non_postcopiable_pending += remaining_size;
2387 }
56e93d26
JQ
2388}
2389
2390static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2391{
2392 unsigned int xh_len;
2393 int xh_flags;
063e760a 2394 uint8_t *loaded_data;
56e93d26 2395
56e93d26
JQ
2396 /* extract RLE header */
2397 xh_flags = qemu_get_byte(f);
2398 xh_len = qemu_get_be16(f);
2399
2400 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2401 error_report("Failed to load XBZRLE page - wrong compression!");
2402 return -1;
2403 }
2404
2405 if (xh_len > TARGET_PAGE_SIZE) {
2406 error_report("Failed to load XBZRLE page - len overflow!");
2407 return -1;
2408 }
f265e0e4 2409 loaded_data = XBZRLE.decoded_buf;
56e93d26 2410 /* load data and decode */
f265e0e4 2411 /* it can change loaded_data to point to an internal buffer */
063e760a 2412 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2413
2414 /* decode RLE */
063e760a 2415 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2416 TARGET_PAGE_SIZE) == -1) {
2417 error_report("Failed to load XBZRLE page - decode error!");
2418 return -1;
2419 }
2420
2421 return 0;
2422}
2423
3d0684b2
JQ
2424/**
2425 * ram_block_from_stream: read a RAMBlock id from the migration stream
2426 *
2427 * Must be called from within a rcu critical section.
2428 *
56e93d26 2429 * Returns a pointer from within the RCU-protected ram_list.
a7180877 2430 *
3d0684b2
JQ
2431 * @f: QEMUFile where to read the data from
2432 * @flags: Page flags (mostly to see if it's a continuation of previous block)
a7180877 2433 */
3d0684b2 2434static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
56e93d26
JQ
2435{
2436 static RAMBlock *block = NULL;
2437 char id[256];
2438 uint8_t len;
2439
2440 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2441 if (!block) {
56e93d26
JQ
2442 error_report("Ack, bad migration stream!");
2443 return NULL;
2444 }
4c4bad48 2445 return block;
56e93d26
JQ
2446 }
2447
2448 len = qemu_get_byte(f);
2449 qemu_get_buffer(f, (uint8_t *)id, len);
2450 id[len] = 0;
2451
e3dd7493 2452 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2453 if (!block) {
2454 error_report("Can't find block %s", id);
2455 return NULL;
56e93d26
JQ
2456 }
2457
4c4bad48
HZ
2458 return block;
2459}
2460
2461static inline void *host_from_ram_block_offset(RAMBlock *block,
2462 ram_addr_t offset)
2463{
2464 if (!offset_in_ramblock(block, offset)) {
2465 return NULL;
2466 }
2467
2468 return block->host + offset;
56e93d26
JQ
2469}
2470
3d0684b2
JQ
2471/**
2472 * ram_handle_compressed: handle the zero page case
2473 *
56e93d26
JQ
2474 * If a page (or a whole RDMA chunk) has been
2475 * determined to be zero, then zap it.
3d0684b2
JQ
2476 *
2477 * @host: host address for the zero page
2478 * @ch: what the page is filled from. We only support zero
2479 * @size: size of the zero page
56e93d26
JQ
2480 */
2481void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2482{
2483 if (ch != 0 || !is_zero_range(host, size)) {
2484 memset(host, ch, size);
2485 }
2486}
2487
2488static void *do_data_decompress(void *opaque)
2489{
2490 DecompressParam *param = opaque;
2491 unsigned long pagesize;
33d151f4
LL
2492 uint8_t *des;
2493 int len;
56e93d26 2494
33d151f4 2495 qemu_mutex_lock(&param->mutex);
90e56fb4 2496 while (!param->quit) {
33d151f4
LL
2497 if (param->des) {
2498 des = param->des;
2499 len = param->len;
2500 param->des = 0;
2501 qemu_mutex_unlock(&param->mutex);
2502
56e93d26 2503 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2504 /* uncompress() will return failed in some case, especially
2505 * when the page is dirted when doing the compression, it's
2506 * not a problem because the dirty page will be retransferred
2507 * and uncompress() won't break the data in other pages.
2508 */
33d151f4
LL
2509 uncompress((Bytef *)des, &pagesize,
2510 (const Bytef *)param->compbuf, len);
73a8912b 2511
33d151f4
LL
2512 qemu_mutex_lock(&decomp_done_lock);
2513 param->done = true;
2514 qemu_cond_signal(&decomp_done_cond);
2515 qemu_mutex_unlock(&decomp_done_lock);
2516
2517 qemu_mutex_lock(&param->mutex);
2518 } else {
2519 qemu_cond_wait(&param->cond, &param->mutex);
2520 }
56e93d26 2521 }
33d151f4 2522 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2523
2524 return NULL;
2525}
2526
5533b2e9
LL
2527static void wait_for_decompress_done(void)
2528{
2529 int idx, thread_count;
2530
2531 if (!migrate_use_compression()) {
2532 return;
2533 }
2534
2535 thread_count = migrate_decompress_threads();
2536 qemu_mutex_lock(&decomp_done_lock);
2537 for (idx = 0; idx < thread_count; idx++) {
2538 while (!decomp_param[idx].done) {
2539 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2540 }
2541 }
2542 qemu_mutex_unlock(&decomp_done_lock);
2543}
2544
f0afa331 2545static void compress_threads_load_setup(void)
56e93d26
JQ
2546{
2547 int i, thread_count;
2548
3416ab5b
JQ
2549 if (!migrate_use_compression()) {
2550 return;
2551 }
56e93d26
JQ
2552 thread_count = migrate_decompress_threads();
2553 decompress_threads = g_new0(QemuThread, thread_count);
2554 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2555 qemu_mutex_init(&decomp_done_lock);
2556 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2557 for (i = 0; i < thread_count; i++) {
2558 qemu_mutex_init(&decomp_param[i].mutex);
2559 qemu_cond_init(&decomp_param[i].cond);
2560 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2561 decomp_param[i].done = true;
90e56fb4 2562 decomp_param[i].quit = false;
56e93d26
JQ
2563 qemu_thread_create(decompress_threads + i, "decompress",
2564 do_data_decompress, decomp_param + i,
2565 QEMU_THREAD_JOINABLE);
2566 }
2567}
2568
f0afa331 2569static void compress_threads_load_cleanup(void)
56e93d26
JQ
2570{
2571 int i, thread_count;
2572
3416ab5b
JQ
2573 if (!migrate_use_compression()) {
2574 return;
2575 }
56e93d26
JQ
2576 thread_count = migrate_decompress_threads();
2577 for (i = 0; i < thread_count; i++) {
2578 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2579 decomp_param[i].quit = true;
56e93d26
JQ
2580 qemu_cond_signal(&decomp_param[i].cond);
2581 qemu_mutex_unlock(&decomp_param[i].mutex);
2582 }
2583 for (i = 0; i < thread_count; i++) {
2584 qemu_thread_join(decompress_threads + i);
2585 qemu_mutex_destroy(&decomp_param[i].mutex);
2586 qemu_cond_destroy(&decomp_param[i].cond);
2587 g_free(decomp_param[i].compbuf);
2588 }
2589 g_free(decompress_threads);
2590 g_free(decomp_param);
56e93d26
JQ
2591 decompress_threads = NULL;
2592 decomp_param = NULL;
56e93d26
JQ
2593}
2594
c1bc6626 2595static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2596 void *host, int len)
2597{
2598 int idx, thread_count;
2599
2600 thread_count = migrate_decompress_threads();
73a8912b 2601 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2602 while (true) {
2603 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2604 if (decomp_param[idx].done) {
33d151f4
LL
2605 decomp_param[idx].done = false;
2606 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2607 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2608 decomp_param[idx].des = host;
2609 decomp_param[idx].len = len;
33d151f4
LL
2610 qemu_cond_signal(&decomp_param[idx].cond);
2611 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2612 break;
2613 }
2614 }
2615 if (idx < thread_count) {
2616 break;
73a8912b
LL
2617 } else {
2618 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2619 }
2620 }
73a8912b 2621 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2622}
2623
f265e0e4
JQ
2624/**
2625 * ram_load_setup: Setup RAM for migration incoming side
2626 *
2627 * Returns zero to indicate success and negative for error
2628 *
2629 * @f: QEMUFile where to receive the data
2630 * @opaque: RAMState pointer
2631 */
2632static int ram_load_setup(QEMUFile *f, void *opaque)
2633{
2634 xbzrle_load_setup();
f0afa331 2635 compress_threads_load_setup();
f9494614 2636 ramblock_recv_map_init();
f265e0e4
JQ
2637 return 0;
2638}
2639
2640static int ram_load_cleanup(void *opaque)
2641{
f9494614 2642 RAMBlock *rb;
f265e0e4 2643 xbzrle_load_cleanup();
f0afa331 2644 compress_threads_load_cleanup();
f9494614
AP
2645
2646 RAMBLOCK_FOREACH(rb) {
2647 g_free(rb->receivedmap);
2648 rb->receivedmap = NULL;
2649 }
f265e0e4
JQ
2650 return 0;
2651}
2652
3d0684b2
JQ
2653/**
2654 * ram_postcopy_incoming_init: allocate postcopy data structures
2655 *
2656 * Returns 0 for success and negative if there was one error
2657 *
2658 * @mis: current migration incoming state
2659 *
2660 * Allocate data structures etc needed by incoming migration with
2661 * postcopy-ram. postcopy-ram's similarly names
2662 * postcopy_ram_incoming_init does the work.
1caddf8a
DDAG
2663 */
2664int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2665{
b8c48993 2666 unsigned long ram_pages = last_ram_page();
1caddf8a
DDAG
2667
2668 return postcopy_ram_incoming_init(mis, ram_pages);
2669}
2670
3d0684b2
JQ
2671/**
2672 * ram_load_postcopy: load a page in postcopy case
2673 *
2674 * Returns 0 for success or -errno in case of error
2675 *
a7180877
DDAG
2676 * Called in postcopy mode by ram_load().
2677 * rcu_read_lock is taken prior to this being called.
3d0684b2
JQ
2678 *
2679 * @f: QEMUFile where to send the data
a7180877
DDAG
2680 */
2681static int ram_load_postcopy(QEMUFile *f)
2682{
2683 int flags = 0, ret = 0;
2684 bool place_needed = false;
28abd200 2685 bool matching_page_sizes = false;
a7180877
DDAG
2686 MigrationIncomingState *mis = migration_incoming_get_current();
2687 /* Temporary page that is later 'placed' */
2688 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2689 void *last_host = NULL;
a3b6ff6d 2690 bool all_zero = false;
a7180877
DDAG
2691
2692 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2693 ram_addr_t addr;
2694 void *host = NULL;
2695 void *page_buffer = NULL;
2696 void *place_source = NULL;
df9ff5e1 2697 RAMBlock *block = NULL;
a7180877 2698 uint8_t ch;
a7180877
DDAG
2699
2700 addr = qemu_get_be64(f);
2701 flags = addr & ~TARGET_PAGE_MASK;
2702 addr &= TARGET_PAGE_MASK;
2703
2704 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2705 place_needed = false;
bb890ed5 2706 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2707 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2708
2709 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2710 if (!host) {
2711 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2712 ret = -EINVAL;
2713 break;
2714 }
28abd200 2715 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2716 /*
28abd200
DDAG
2717 * Postcopy requires that we place whole host pages atomically;
2718 * these may be huge pages for RAMBlocks that are backed by
2719 * hugetlbfs.
a7180877
DDAG
2720 * To make it atomic, the data is read into a temporary page
2721 * that's moved into place later.
2722 * The migration protocol uses, possibly smaller, target-pages
2723 * however the source ensures it always sends all the components
2724 * of a host page in order.
2725 */
2726 page_buffer = postcopy_host_page +
28abd200 2727 ((uintptr_t)host & (block->page_size - 1));
a7180877 2728 /* If all TP are zero then we can optimise the place */
28abd200 2729 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2730 all_zero = true;
c53b7ddc
DDAG
2731 } else {
2732 /* not the 1st TP within the HP */
2733 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2734 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2735 host, last_host);
2736 ret = -EINVAL;
2737 break;
2738 }
a7180877
DDAG
2739 }
2740
c53b7ddc 2741
a7180877
DDAG
2742 /*
2743 * If it's the last part of a host page then we place the host
2744 * page
2745 */
2746 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2747 (block->page_size - 1)) == 0;
a7180877
DDAG
2748 place_source = postcopy_host_page;
2749 }
c53b7ddc 2750 last_host = host;
a7180877
DDAG
2751
2752 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
bb890ed5 2753 case RAM_SAVE_FLAG_ZERO:
a7180877
DDAG
2754 ch = qemu_get_byte(f);
2755 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2756 if (ch) {
2757 all_zero = false;
2758 }
2759 break;
2760
2761 case RAM_SAVE_FLAG_PAGE:
2762 all_zero = false;
2763 if (!place_needed || !matching_page_sizes) {
2764 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2765 } else {
2766 /* Avoids the qemu_file copy during postcopy, which is
2767 * going to do a copy later; can only do it when we
2768 * do this read in one go (matching page sizes)
2769 */
2770 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2771 TARGET_PAGE_SIZE);
2772 }
2773 break;
2774 case RAM_SAVE_FLAG_EOS:
2775 /* normal exit */
2776 break;
2777 default:
2778 error_report("Unknown combination of migration flags: %#x"
2779 " (postcopy mode)", flags);
2780 ret = -EINVAL;
2781 }
2782
2783 if (place_needed) {
2784 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2785 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2786
a7180877 2787 if (all_zero) {
df9ff5e1 2788 ret = postcopy_place_page_zero(mis, place_dest,
8be4620b 2789 block);
a7180877 2790 } else {
df9ff5e1 2791 ret = postcopy_place_page(mis, place_dest,
8be4620b 2792 place_source, block);
a7180877
DDAG
2793 }
2794 }
2795 if (!ret) {
2796 ret = qemu_file_get_error(f);
2797 }
2798 }
2799
2800 return ret;
2801}
2802
acab30b8
DHB
2803static bool postcopy_is_advised(void)
2804{
2805 PostcopyState ps = postcopy_state_get();
2806 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2807}
2808
2809static bool postcopy_is_running(void)
2810{
2811 PostcopyState ps = postcopy_state_get();
2812 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
2813}
2814
56e93d26
JQ
2815static int ram_load(QEMUFile *f, void *opaque, int version_id)
2816{
edc60127 2817 int flags = 0, ret = 0, invalid_flags = 0;
56e93d26
JQ
2818 static uint64_t seq_iter;
2819 int len = 0;
a7180877
DDAG
2820 /*
2821 * If system is running in postcopy mode, page inserts to host memory must
2822 * be atomic
2823 */
acab30b8 2824 bool postcopy_running = postcopy_is_running();
ef08fb38 2825 /* ADVISE is earlier, it shows the source has the postcopy capability on */
acab30b8 2826 bool postcopy_advised = postcopy_is_advised();
56e93d26
JQ
2827
2828 seq_iter++;
2829
2830 if (version_id != 4) {
2831 ret = -EINVAL;
2832 }
2833
edc60127
JQ
2834 if (!migrate_use_compression()) {
2835 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
2836 }
56e93d26
JQ
2837 /* This RCU critical section can be very long running.
2838 * When RCU reclaims in the code start to become numerous,
2839 * it will be necessary to reduce the granularity of this
2840 * critical section.
2841 */
2842 rcu_read_lock();
a7180877
DDAG
2843
2844 if (postcopy_running) {
2845 ret = ram_load_postcopy(f);
2846 }
2847
2848 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2849 ram_addr_t addr, total_ram_bytes;
a776aa15 2850 void *host = NULL;
56e93d26
JQ
2851 uint8_t ch;
2852
2853 addr = qemu_get_be64(f);
2854 flags = addr & ~TARGET_PAGE_MASK;
2855 addr &= TARGET_PAGE_MASK;
2856
edc60127
JQ
2857 if (flags & invalid_flags) {
2858 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
2859 error_report("Received an unexpected compressed page");
2860 }
2861
2862 ret = -EINVAL;
2863 break;
2864 }
2865
bb890ed5 2866 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
a776aa15 2867 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2868 RAMBlock *block = ram_block_from_stream(f, flags);
2869
2870 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2871 if (!host) {
2872 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2873 ret = -EINVAL;
2874 break;
2875 }
f9494614 2876 ramblock_recv_bitmap_set(block, host);
1db9d8e5 2877 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
a776aa15
DDAG
2878 }
2879
56e93d26
JQ
2880 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2881 case RAM_SAVE_FLAG_MEM_SIZE:
2882 /* Synchronize RAM block list */
2883 total_ram_bytes = addr;
2884 while (!ret && total_ram_bytes) {
2885 RAMBlock *block;
56e93d26
JQ
2886 char id[256];
2887 ram_addr_t length;
2888
2889 len = qemu_get_byte(f);
2890 qemu_get_buffer(f, (uint8_t *)id, len);
2891 id[len] = 0;
2892 length = qemu_get_be64(f);
2893
e3dd7493
DDAG
2894 block = qemu_ram_block_by_name(id);
2895 if (block) {
2896 if (length != block->used_length) {
2897 Error *local_err = NULL;
56e93d26 2898
fa53a0e5 2899 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2900 &local_err);
2901 if (local_err) {
2902 error_report_err(local_err);
56e93d26 2903 }
56e93d26 2904 }
ef08fb38
DDAG
2905 /* For postcopy we need to check hugepage sizes match */
2906 if (postcopy_advised &&
2907 block->page_size != qemu_host_page_size) {
2908 uint64_t remote_page_size = qemu_get_be64(f);
2909 if (remote_page_size != block->page_size) {
2910 error_report("Mismatched RAM page size %s "
2911 "(local) %zd != %" PRId64,
2912 id, block->page_size,
2913 remote_page_size);
2914 ret = -EINVAL;
2915 }
2916 }
e3dd7493
DDAG
2917 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2918 block->idstr);
2919 } else {
56e93d26
JQ
2920 error_report("Unknown ramblock \"%s\", cannot "
2921 "accept migration", id);
2922 ret = -EINVAL;
2923 }
2924
2925 total_ram_bytes -= length;
2926 }
2927 break;
a776aa15 2928
bb890ed5 2929 case RAM_SAVE_FLAG_ZERO:
56e93d26
JQ
2930 ch = qemu_get_byte(f);
2931 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2932 break;
a776aa15 2933
56e93d26 2934 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2935 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2936 break;
56e93d26 2937
a776aa15 2938 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2939 len = qemu_get_be32(f);
2940 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2941 error_report("Invalid compressed data length: %d", len);
2942 ret = -EINVAL;
2943 break;
2944 }
c1bc6626 2945 decompress_data_with_multi_threads(f, host, len);
56e93d26 2946 break;
a776aa15 2947
56e93d26 2948 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2949 if (load_xbzrle(f, addr, host) < 0) {
2950 error_report("Failed to decompress XBZRLE page at "
2951 RAM_ADDR_FMT, addr);
2952 ret = -EINVAL;
2953 break;
2954 }
2955 break;
2956 case RAM_SAVE_FLAG_EOS:
2957 /* normal exit */
2958 break;
2959 default:
2960 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2961 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2962 } else {
2963 error_report("Unknown combination of migration flags: %#x",
2964 flags);
2965 ret = -EINVAL;
2966 }
2967 }
2968 if (!ret) {
2969 ret = qemu_file_get_error(f);
2970 }
2971 }
2972
5533b2e9 2973 wait_for_decompress_done();
56e93d26 2974 rcu_read_unlock();
55c4446b 2975 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
2976 return ret;
2977}
2978
c6467627
VSO
2979static bool ram_has_postcopy(void *opaque)
2980{
2981 return migrate_postcopy_ram();
2982}
2983
56e93d26 2984static SaveVMHandlers savevm_ram_handlers = {
9907e842 2985 .save_setup = ram_save_setup,
56e93d26 2986 .save_live_iterate = ram_save_iterate,
763c906b 2987 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 2988 .save_live_complete_precopy = ram_save_complete,
c6467627 2989 .has_postcopy = ram_has_postcopy,
56e93d26
JQ
2990 .save_live_pending = ram_save_pending,
2991 .load_state = ram_load,
f265e0e4
JQ
2992 .save_cleanup = ram_save_cleanup,
2993 .load_setup = ram_load_setup,
2994 .load_cleanup = ram_load_cleanup,
56e93d26
JQ
2995};
2996
2997void ram_mig_init(void)
2998{
2999 qemu_mutex_init(&XBZRLE.lock);
6f37bb8b 3000 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
56e93d26 3001}