]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
migration: move some code to ram_save_host_page
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
e688df6b 28
1393a485 29#include "qemu/osdep.h"
33c11879 30#include "cpu.h"
56e93d26 31#include <zlib.h>
f348b6d1 32#include "qemu/cutils.h"
56e93d26
JQ
33#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
7205c9ec 35#include "qemu/main-loop.h"
709e3fe8 36#include "xbzrle.h"
7b1e1a22 37#include "ram.h"
6666c96a 38#include "migration.h"
f2a8f0a6 39#include "migration/register.h"
7b1e1a22 40#include "migration/misc.h"
08a0aee1 41#include "qemu-file.h"
be07b0ac 42#include "postcopy-ram.h"
56e93d26 43#include "migration/page_cache.h"
56e93d26 44#include "qemu/error-report.h"
e688df6b 45#include "qapi/error.h"
9af23989 46#include "qapi/qapi-events-migration.h"
8acabf69 47#include "qapi/qmp/qerror.h"
56e93d26 48#include "trace.h"
56e93d26 49#include "exec/ram_addr.h"
f9494614 50#include "exec/target_page.h"
56e93d26 51#include "qemu/rcu_queue.h"
a91246c9 52#include "migration/colo.h"
9ac78b61 53#include "migration/block.h"
56e93d26 54
56e93d26
JQ
55/***********************************************************/
56/* ram save/restore */
57
bb890ed5
JQ
58/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
59 * worked for pages that where filled with the same char. We switched
60 * it to only search for the zero value. And to avoid confusion with
61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
62 */
63
56e93d26 64#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
bb890ed5 65#define RAM_SAVE_FLAG_ZERO 0x02
56e93d26
JQ
66#define RAM_SAVE_FLAG_MEM_SIZE 0x04
67#define RAM_SAVE_FLAG_PAGE 0x08
68#define RAM_SAVE_FLAG_EOS 0x10
69#define RAM_SAVE_FLAG_CONTINUE 0x20
70#define RAM_SAVE_FLAG_XBZRLE 0x40
71/* 0x80 is reserved in migration.h start with 0x100 next */
72#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
73
56e93d26
JQ
74static inline bool is_zero_range(uint8_t *p, uint64_t size)
75{
a1febc49 76 return buffer_is_zero(p, size);
56e93d26
JQ
77}
78
9360447d
JQ
79XBZRLECacheStats xbzrle_counters;
80
56e93d26
JQ
81/* struct contains XBZRLE cache and a static page
82 used by the compression */
83static struct {
84 /* buffer used for XBZRLE encoding */
85 uint8_t *encoded_buf;
86 /* buffer for storing page content */
87 uint8_t *current_buf;
88 /* Cache for XBZRLE, Protected by lock. */
89 PageCache *cache;
90 QemuMutex lock;
c00e0928
JQ
91 /* it will store a page full of zeros */
92 uint8_t *zero_target_page;
f265e0e4
JQ
93 /* buffer used for XBZRLE decoding */
94 uint8_t *decoded_buf;
56e93d26
JQ
95} XBZRLE;
96
56e93d26
JQ
97static void XBZRLE_cache_lock(void)
98{
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE.lock);
101}
102
103static void XBZRLE_cache_unlock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE.lock);
107}
108
3d0684b2
JQ
109/**
110 * xbzrle_cache_resize: resize the xbzrle cache
111 *
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
116 *
c9dede2d 117 * Returns 0 for success or -1 for error
3d0684b2
JQ
118 *
119 * @new_size: new cache size
8acabf69 120 * @errp: set *errp if the check failed, with reason
56e93d26 121 */
c9dede2d 122int xbzrle_cache_resize(int64_t new_size, Error **errp)
56e93d26
JQ
123{
124 PageCache *new_cache;
c9dede2d 125 int64_t ret = 0;
56e93d26 126
8acabf69
JQ
127 /* Check for truncation */
128 if (new_size != (size_t)new_size) {
129 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
130 "exceeding address space");
131 return -1;
132 }
133
2a313e5c
JQ
134 if (new_size == migrate_xbzrle_cache_size()) {
135 /* nothing to do */
c9dede2d 136 return 0;
2a313e5c
JQ
137 }
138
56e93d26
JQ
139 XBZRLE_cache_lock();
140
141 if (XBZRLE.cache != NULL) {
80f8dfde 142 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
56e93d26 143 if (!new_cache) {
56e93d26
JQ
144 ret = -1;
145 goto out;
146 }
147
148 cache_fini(XBZRLE.cache);
149 XBZRLE.cache = new_cache;
150 }
56e93d26
JQ
151out:
152 XBZRLE_cache_unlock();
153 return ret;
154}
155
f9494614
AP
156static void ramblock_recv_map_init(void)
157{
158 RAMBlock *rb;
159
160 RAMBLOCK_FOREACH(rb) {
161 assert(!rb->receivedmap);
162 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
163 }
164}
165
166int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
167{
168 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
169 rb->receivedmap);
170}
171
1cba9f6e
DDAG
172bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
173{
174 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
175}
176
f9494614
AP
177void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
178{
179 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
180}
181
182void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
183 size_t nr)
184{
185 bitmap_set_atomic(rb->receivedmap,
186 ramblock_recv_bitmap_offset(host_addr, rb),
187 nr);
188}
189
ec481c6c
JQ
190/*
191 * An outstanding page request, on the source, having been received
192 * and queued
193 */
194struct RAMSrcPageRequest {
195 RAMBlock *rb;
196 hwaddr offset;
197 hwaddr len;
198
199 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
200};
201
6f37bb8b
JQ
202/* State of RAM for migration */
203struct RAMState {
204b88b8
JQ
204 /* QEMUFile used for this migration */
205 QEMUFile *f;
6f37bb8b
JQ
206 /* Last block that we have visited searching for dirty pages */
207 RAMBlock *last_seen_block;
208 /* Last block from where we have sent data */
209 RAMBlock *last_sent_block;
269ace29
JQ
210 /* Last dirty target page we have sent */
211 ram_addr_t last_page;
6f37bb8b
JQ
212 /* last ram version we have seen */
213 uint32_t last_version;
214 /* We are in the first round */
215 bool ram_bulk_stage;
8d820d6f
JQ
216 /* How many times we have dirty too many pages */
217 int dirty_rate_high_cnt;
f664da80
JQ
218 /* these variables are used for bitmap sync */
219 /* last time we did a full bitmap_sync */
220 int64_t time_last_bitmap_sync;
eac74159 221 /* bytes transferred at start_time */
c4bdf0cf 222 uint64_t bytes_xfer_prev;
a66cd90c 223 /* number of dirty pages since start_time */
68908ed6 224 uint64_t num_dirty_pages_period;
b5833fde
JQ
225 /* xbzrle misses since the beginning of the period */
226 uint64_t xbzrle_cache_miss_prev;
36040d9c
JQ
227 /* number of iterations at the beginning of period */
228 uint64_t iterations_prev;
23b28c3c
JQ
229 /* Iterations since start */
230 uint64_t iterations;
9360447d 231 /* number of dirty bits in the bitmap */
2dfaf12e
PX
232 uint64_t migration_dirty_pages;
233 /* protects modification of the bitmap */
108cfae0 234 QemuMutex bitmap_mutex;
68a098f3
JQ
235 /* The RAMBlock used in the last src_page_requests */
236 RAMBlock *last_req_rb;
ec481c6c
JQ
237 /* Queue of outstanding page requests from the destination */
238 QemuMutex src_page_req_mutex;
239 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
6f37bb8b
JQ
240};
241typedef struct RAMState RAMState;
242
53518d94 243static RAMState *ram_state;
6f37bb8b 244
9edabd4d 245uint64_t ram_bytes_remaining(void)
2f4fde93 246{
bae416e5
DDAG
247 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
248 0;
2f4fde93
JQ
249}
250
9360447d 251MigrationStats ram_counters;
96506894 252
b8fb8cb7
DDAG
253/* used by the search for pages to send */
254struct PageSearchStatus {
255 /* Current block being searched */
256 RAMBlock *block;
a935e30f
JQ
257 /* Current page to search from */
258 unsigned long page;
b8fb8cb7
DDAG
259 /* Set once we wrap around */
260 bool complete_round;
261};
262typedef struct PageSearchStatus PageSearchStatus;
263
56e93d26 264struct CompressParam {
56e93d26 265 bool done;
90e56fb4 266 bool quit;
56e93d26
JQ
267 QEMUFile *file;
268 QemuMutex mutex;
269 QemuCond cond;
270 RAMBlock *block;
271 ram_addr_t offset;
34ab9e97
XG
272
273 /* internally used fields */
dcaf446e 274 z_stream stream;
34ab9e97 275 uint8_t *originbuf;
56e93d26
JQ
276};
277typedef struct CompressParam CompressParam;
278
279struct DecompressParam {
73a8912b 280 bool done;
90e56fb4 281 bool quit;
56e93d26
JQ
282 QemuMutex mutex;
283 QemuCond cond;
284 void *des;
d341d9f3 285 uint8_t *compbuf;
56e93d26 286 int len;
797ca154 287 z_stream stream;
56e93d26
JQ
288};
289typedef struct DecompressParam DecompressParam;
290
291static CompressParam *comp_param;
292static QemuThread *compress_threads;
293/* comp_done_cond is used to wake up the migration thread when
294 * one of the compression threads has finished the compression.
295 * comp_done_lock is used to co-work with comp_done_cond.
296 */
0d9f9a5c
LL
297static QemuMutex comp_done_lock;
298static QemuCond comp_done_cond;
56e93d26
JQ
299/* The empty QEMUFileOps will be used by file in CompressParam */
300static const QEMUFileOps empty_ops = { };
301
34ab9e97 302static QEMUFile *decomp_file;
56e93d26
JQ
303static DecompressParam *decomp_param;
304static QemuThread *decompress_threads;
73a8912b
LL
305static QemuMutex decomp_done_lock;
306static QemuCond decomp_done_cond;
56e93d26 307
dcaf446e 308static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
34ab9e97 309 ram_addr_t offset, uint8_t *source_buf);
56e93d26
JQ
310
311static void *do_data_compress(void *opaque)
312{
313 CompressParam *param = opaque;
a7a9a88f
LL
314 RAMBlock *block;
315 ram_addr_t offset;
56e93d26 316
a7a9a88f 317 qemu_mutex_lock(&param->mutex);
90e56fb4 318 while (!param->quit) {
a7a9a88f
LL
319 if (param->block) {
320 block = param->block;
321 offset = param->offset;
322 param->block = NULL;
323 qemu_mutex_unlock(&param->mutex);
324
34ab9e97
XG
325 do_compress_ram_page(param->file, &param->stream, block, offset,
326 param->originbuf);
a7a9a88f 327
0d9f9a5c 328 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 329 param->done = true;
0d9f9a5c
LL
330 qemu_cond_signal(&comp_done_cond);
331 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
332
333 qemu_mutex_lock(&param->mutex);
334 } else {
56e93d26
JQ
335 qemu_cond_wait(&param->cond, &param->mutex);
336 }
56e93d26 337 }
a7a9a88f 338 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
339
340 return NULL;
341}
342
343static inline void terminate_compression_threads(void)
344{
345 int idx, thread_count;
346
347 thread_count = migrate_compress_threads();
3d0684b2 348
56e93d26
JQ
349 for (idx = 0; idx < thread_count; idx++) {
350 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 351 comp_param[idx].quit = true;
56e93d26
JQ
352 qemu_cond_signal(&comp_param[idx].cond);
353 qemu_mutex_unlock(&comp_param[idx].mutex);
354 }
355}
356
f0afa331 357static void compress_threads_save_cleanup(void)
56e93d26
JQ
358{
359 int i, thread_count;
360
361 if (!migrate_use_compression()) {
362 return;
363 }
364 terminate_compression_threads();
365 thread_count = migrate_compress_threads();
366 for (i = 0; i < thread_count; i++) {
dcaf446e
XG
367 /*
368 * we use it as a indicator which shows if the thread is
369 * properly init'd or not
370 */
371 if (!comp_param[i].file) {
372 break;
373 }
56e93d26 374 qemu_thread_join(compress_threads + i);
56e93d26
JQ
375 qemu_mutex_destroy(&comp_param[i].mutex);
376 qemu_cond_destroy(&comp_param[i].cond);
dcaf446e 377 deflateEnd(&comp_param[i].stream);
34ab9e97 378 g_free(comp_param[i].originbuf);
dcaf446e
XG
379 qemu_fclose(comp_param[i].file);
380 comp_param[i].file = NULL;
56e93d26 381 }
0d9f9a5c
LL
382 qemu_mutex_destroy(&comp_done_lock);
383 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
384 g_free(compress_threads);
385 g_free(comp_param);
56e93d26
JQ
386 compress_threads = NULL;
387 comp_param = NULL;
56e93d26
JQ
388}
389
dcaf446e 390static int compress_threads_save_setup(void)
56e93d26
JQ
391{
392 int i, thread_count;
393
394 if (!migrate_use_compression()) {
dcaf446e 395 return 0;
56e93d26 396 }
56e93d26
JQ
397 thread_count = migrate_compress_threads();
398 compress_threads = g_new0(QemuThread, thread_count);
399 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
400 qemu_cond_init(&comp_done_cond);
401 qemu_mutex_init(&comp_done_lock);
56e93d26 402 for (i = 0; i < thread_count; i++) {
34ab9e97
XG
403 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
404 if (!comp_param[i].originbuf) {
405 goto exit;
406 }
407
dcaf446e
XG
408 if (deflateInit(&comp_param[i].stream,
409 migrate_compress_level()) != Z_OK) {
34ab9e97 410 g_free(comp_param[i].originbuf);
dcaf446e
XG
411 goto exit;
412 }
413
e110aa91
C
414 /* comp_param[i].file is just used as a dummy buffer to save data,
415 * set its ops to empty.
56e93d26
JQ
416 */
417 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
418 comp_param[i].done = true;
90e56fb4 419 comp_param[i].quit = false;
56e93d26
JQ
420 qemu_mutex_init(&comp_param[i].mutex);
421 qemu_cond_init(&comp_param[i].cond);
422 qemu_thread_create(compress_threads + i, "compress",
423 do_data_compress, comp_param + i,
424 QEMU_THREAD_JOINABLE);
425 }
dcaf446e
XG
426 return 0;
427
428exit:
429 compress_threads_save_cleanup();
430 return -1;
56e93d26
JQ
431}
432
f986c3d2
JQ
433/* Multiple fd's */
434
435struct MultiFDSendParams {
436 uint8_t id;
437 char *name;
438 QemuThread thread;
439 QemuSemaphore sem;
440 QemuMutex mutex;
441 bool quit;
442};
443typedef struct MultiFDSendParams MultiFDSendParams;
444
445struct {
446 MultiFDSendParams *params;
447 /* number of created threads */
448 int count;
449} *multifd_send_state;
450
451static void terminate_multifd_send_threads(Error *errp)
452{
453 int i;
454
455 for (i = 0; i < multifd_send_state->count; i++) {
456 MultiFDSendParams *p = &multifd_send_state->params[i];
457
458 qemu_mutex_lock(&p->mutex);
459 p->quit = true;
460 qemu_sem_post(&p->sem);
461 qemu_mutex_unlock(&p->mutex);
462 }
463}
464
465int multifd_save_cleanup(Error **errp)
466{
467 int i;
468 int ret = 0;
469
470 if (!migrate_use_multifd()) {
471 return 0;
472 }
473 terminate_multifd_send_threads(NULL);
474 for (i = 0; i < multifd_send_state->count; i++) {
475 MultiFDSendParams *p = &multifd_send_state->params[i];
476
477 qemu_thread_join(&p->thread);
478 qemu_mutex_destroy(&p->mutex);
479 qemu_sem_destroy(&p->sem);
480 g_free(p->name);
481 p->name = NULL;
482 }
483 g_free(multifd_send_state->params);
484 multifd_send_state->params = NULL;
485 g_free(multifd_send_state);
486 multifd_send_state = NULL;
487 return ret;
488}
489
490static void *multifd_send_thread(void *opaque)
491{
492 MultiFDSendParams *p = opaque;
493
494 while (true) {
495 qemu_mutex_lock(&p->mutex);
496 if (p->quit) {
497 qemu_mutex_unlock(&p->mutex);
498 break;
499 }
500 qemu_mutex_unlock(&p->mutex);
501 qemu_sem_wait(&p->sem);
502 }
503
504 return NULL;
505}
506
507int multifd_save_setup(void)
508{
509 int thread_count;
510 uint8_t i;
511
512 if (!migrate_use_multifd()) {
513 return 0;
514 }
515 thread_count = migrate_multifd_channels();
516 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
517 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
518 multifd_send_state->count = 0;
519 for (i = 0; i < thread_count; i++) {
520 MultiFDSendParams *p = &multifd_send_state->params[i];
521
522 qemu_mutex_init(&p->mutex);
523 qemu_sem_init(&p->sem, 0);
524 p->quit = false;
525 p->id = i;
526 p->name = g_strdup_printf("multifdsend_%d", i);
527 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
528 QEMU_THREAD_JOINABLE);
529
530 multifd_send_state->count++;
531 }
532 return 0;
533}
534
535struct MultiFDRecvParams {
536 uint8_t id;
537 char *name;
538 QemuThread thread;
539 QemuSemaphore sem;
540 QemuMutex mutex;
541 bool quit;
542};
543typedef struct MultiFDRecvParams MultiFDRecvParams;
544
545struct {
546 MultiFDRecvParams *params;
547 /* number of created threads */
548 int count;
549} *multifd_recv_state;
550
551static void terminate_multifd_recv_threads(Error *errp)
552{
553 int i;
554
555 for (i = 0; i < multifd_recv_state->count; i++) {
556 MultiFDRecvParams *p = &multifd_recv_state->params[i];
557
558 qemu_mutex_lock(&p->mutex);
559 p->quit = true;
560 qemu_sem_post(&p->sem);
561 qemu_mutex_unlock(&p->mutex);
562 }
563}
564
565int multifd_load_cleanup(Error **errp)
566{
567 int i;
568 int ret = 0;
569
570 if (!migrate_use_multifd()) {
571 return 0;
572 }
573 terminate_multifd_recv_threads(NULL);
574 for (i = 0; i < multifd_recv_state->count; i++) {
575 MultiFDRecvParams *p = &multifd_recv_state->params[i];
576
577 qemu_thread_join(&p->thread);
578 qemu_mutex_destroy(&p->mutex);
579 qemu_sem_destroy(&p->sem);
580 g_free(p->name);
581 p->name = NULL;
582 }
583 g_free(multifd_recv_state->params);
584 multifd_recv_state->params = NULL;
585 g_free(multifd_recv_state);
586 multifd_recv_state = NULL;
587
588 return ret;
589}
590
591static void *multifd_recv_thread(void *opaque)
592{
593 MultiFDRecvParams *p = opaque;
594
595 while (true) {
596 qemu_mutex_lock(&p->mutex);
597 if (p->quit) {
598 qemu_mutex_unlock(&p->mutex);
599 break;
600 }
601 qemu_mutex_unlock(&p->mutex);
602 qemu_sem_wait(&p->sem);
603 }
604
605 return NULL;
606}
607
608int multifd_load_setup(void)
609{
610 int thread_count;
611 uint8_t i;
612
613 if (!migrate_use_multifd()) {
614 return 0;
615 }
616 thread_count = migrate_multifd_channels();
617 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
618 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
619 multifd_recv_state->count = 0;
620 for (i = 0; i < thread_count; i++) {
621 MultiFDRecvParams *p = &multifd_recv_state->params[i];
622
623 qemu_mutex_init(&p->mutex);
624 qemu_sem_init(&p->sem, 0);
625 p->quit = false;
626 p->id = i;
627 p->name = g_strdup_printf("multifdrecv_%d", i);
628 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
629 QEMU_THREAD_JOINABLE);
630 multifd_recv_state->count++;
631 }
632 return 0;
633}
634
56e93d26 635/**
3d0684b2 636 * save_page_header: write page header to wire
56e93d26
JQ
637 *
638 * If this is the 1st block, it also writes the block identification
639 *
3d0684b2 640 * Returns the number of bytes written
56e93d26
JQ
641 *
642 * @f: QEMUFile where to send the data
643 * @block: block that contains the page we want to send
644 * @offset: offset inside the block for the page
645 * in the lower bits, it contains flags
646 */
2bf3aa85
JQ
647static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
648 ram_addr_t offset)
56e93d26 649{
9f5f380b 650 size_t size, len;
56e93d26 651
24795694
JQ
652 if (block == rs->last_sent_block) {
653 offset |= RAM_SAVE_FLAG_CONTINUE;
654 }
2bf3aa85 655 qemu_put_be64(f, offset);
56e93d26
JQ
656 size = 8;
657
658 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b 659 len = strlen(block->idstr);
2bf3aa85
JQ
660 qemu_put_byte(f, len);
661 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
9f5f380b 662 size += 1 + len;
24795694 663 rs->last_sent_block = block;
56e93d26
JQ
664 }
665 return size;
666}
667
3d0684b2
JQ
668/**
669 * mig_throttle_guest_down: throotle down the guest
670 *
671 * Reduce amount of guest cpu execution to hopefully slow down memory
672 * writes. If guest dirty memory rate is reduced below the rate at
673 * which we can transfer pages to the destination then we should be
674 * able to complete migration. Some workloads dirty memory way too
675 * fast and will not effectively converge, even with auto-converge.
070afca2
JH
676 */
677static void mig_throttle_guest_down(void)
678{
679 MigrationState *s = migrate_get_current();
2594f56d
DB
680 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
681 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
682
683 /* We have not started throttling yet. Let's start it. */
684 if (!cpu_throttle_active()) {
685 cpu_throttle_set(pct_initial);
686 } else {
687 /* Throttling already on, just increase the rate */
688 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
689 }
690}
691
3d0684b2
JQ
692/**
693 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
694 *
6f37bb8b 695 * @rs: current RAM state
3d0684b2
JQ
696 * @current_addr: address for the zero page
697 *
698 * Update the xbzrle cache to reflect a page that's been sent as all 0.
56e93d26
JQ
699 * The important thing is that a stale (not-yet-0'd) page be replaced
700 * by the new data.
701 * As a bonus, if the page wasn't in the cache it gets added so that
3d0684b2 702 * when a small write is made into the 0'd page it gets XBZRLE sent.
56e93d26 703 */
6f37bb8b 704static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
56e93d26 705{
6f37bb8b 706 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
56e93d26
JQ
707 return;
708 }
709
710 /* We don't care if this fails to allocate a new cache page
711 * as long as it updated an old one */
c00e0928 712 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
9360447d 713 ram_counters.dirty_sync_count);
56e93d26
JQ
714}
715
716#define ENCODING_FLAG_XBZRLE 0x1
717
718/**
719 * save_xbzrle_page: compress and send current page
720 *
721 * Returns: 1 means that we wrote the page
722 * 0 means that page is identical to the one already sent
723 * -1 means that xbzrle would be longer than normal
724 *
5a987738 725 * @rs: current RAM state
3d0684b2
JQ
726 * @current_data: pointer to the address of the page contents
727 * @current_addr: addr of the page
56e93d26
JQ
728 * @block: block that contains the page we want to send
729 * @offset: offset inside the block for the page
730 * @last_stage: if we are at the completion stage
56e93d26 731 */
204b88b8 732static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
56e93d26 733 ram_addr_t current_addr, RAMBlock *block,
072c2511 734 ram_addr_t offset, bool last_stage)
56e93d26
JQ
735{
736 int encoded_len = 0, bytes_xbzrle;
737 uint8_t *prev_cached_page;
738
9360447d
JQ
739 if (!cache_is_cached(XBZRLE.cache, current_addr,
740 ram_counters.dirty_sync_count)) {
741 xbzrle_counters.cache_miss++;
56e93d26
JQ
742 if (!last_stage) {
743 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
9360447d 744 ram_counters.dirty_sync_count) == -1) {
56e93d26
JQ
745 return -1;
746 } else {
747 /* update *current_data when the page has been
748 inserted into cache */
749 *current_data = get_cached_data(XBZRLE.cache, current_addr);
750 }
751 }
752 return -1;
753 }
754
755 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
756
757 /* save current buffer into memory */
758 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
759
760 /* XBZRLE encoding (if there is no overflow) */
761 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
762 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
763 TARGET_PAGE_SIZE);
764 if (encoded_len == 0) {
55c4446b 765 trace_save_xbzrle_page_skipping();
56e93d26
JQ
766 return 0;
767 } else if (encoded_len == -1) {
55c4446b 768 trace_save_xbzrle_page_overflow();
9360447d 769 xbzrle_counters.overflow++;
56e93d26
JQ
770 /* update data in the cache */
771 if (!last_stage) {
772 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
773 *current_data = prev_cached_page;
774 }
775 return -1;
776 }
777
778 /* we need to update the data in the cache, in order to get the same data */
779 if (!last_stage) {
780 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
781 }
782
783 /* Send XBZRLE based compressed page */
2bf3aa85 784 bytes_xbzrle = save_page_header(rs, rs->f, block,
204b88b8
JQ
785 offset | RAM_SAVE_FLAG_XBZRLE);
786 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
787 qemu_put_be16(rs->f, encoded_len);
788 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
56e93d26 789 bytes_xbzrle += encoded_len + 1 + 2;
9360447d
JQ
790 xbzrle_counters.pages++;
791 xbzrle_counters.bytes += bytes_xbzrle;
792 ram_counters.transferred += bytes_xbzrle;
56e93d26
JQ
793
794 return 1;
795}
796
3d0684b2
JQ
797/**
798 * migration_bitmap_find_dirty: find the next dirty page from start
f3f491fc 799 *
3d0684b2
JQ
800 * Called with rcu_read_lock() to protect migration_bitmap
801 *
802 * Returns the byte offset within memory region of the start of a dirty page
803 *
6f37bb8b 804 * @rs: current RAM state
3d0684b2 805 * @rb: RAMBlock where to search for dirty pages
a935e30f 806 * @start: page where we start the search
f3f491fc 807 */
56e93d26 808static inline
a935e30f 809unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
f20e2865 810 unsigned long start)
56e93d26 811{
6b6712ef
JQ
812 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
813 unsigned long *bitmap = rb->bmap;
56e93d26
JQ
814 unsigned long next;
815
6b6712ef
JQ
816 if (rs->ram_bulk_stage && start > 0) {
817 next = start + 1;
56e93d26 818 } else {
6b6712ef 819 next = find_next_bit(bitmap, size, start);
56e93d26
JQ
820 }
821
6b6712ef 822 return next;
56e93d26
JQ
823}
824
06b10688 825static inline bool migration_bitmap_clear_dirty(RAMState *rs,
f20e2865
JQ
826 RAMBlock *rb,
827 unsigned long page)
a82d593b
DDAG
828{
829 bool ret;
a82d593b 830
6b6712ef 831 ret = test_and_clear_bit(page, rb->bmap);
a82d593b
DDAG
832
833 if (ret) {
0d8ec885 834 rs->migration_dirty_pages--;
a82d593b
DDAG
835 }
836 return ret;
837}
838
15440dd5
JQ
839static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
840 ram_addr_t start, ram_addr_t length)
56e93d26 841{
0d8ec885 842 rs->migration_dirty_pages +=
6b6712ef 843 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
0d8ec885 844 &rs->num_dirty_pages_period);
56e93d26
JQ
845}
846
3d0684b2
JQ
847/**
848 * ram_pagesize_summary: calculate all the pagesizes of a VM
849 *
850 * Returns a summary bitmap of the page sizes of all RAMBlocks
851 *
852 * For VMs with just normal pages this is equivalent to the host page
853 * size. If it's got some huge pages then it's the OR of all the
854 * different page sizes.
e8ca1db2
DDAG
855 */
856uint64_t ram_pagesize_summary(void)
857{
858 RAMBlock *block;
859 uint64_t summary = 0;
860
99e15582 861 RAMBLOCK_FOREACH(block) {
e8ca1db2
DDAG
862 summary |= block->page_size;
863 }
864
865 return summary;
866}
867
8d820d6f 868static void migration_bitmap_sync(RAMState *rs)
56e93d26
JQ
869{
870 RAMBlock *block;
56e93d26 871 int64_t end_time;
c4bdf0cf 872 uint64_t bytes_xfer_now;
56e93d26 873
9360447d 874 ram_counters.dirty_sync_count++;
56e93d26 875
f664da80
JQ
876 if (!rs->time_last_bitmap_sync) {
877 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56e93d26
JQ
878 }
879
880 trace_migration_bitmap_sync_start();
9c1f8f44 881 memory_global_dirty_log_sync();
56e93d26 882
108cfae0 883 qemu_mutex_lock(&rs->bitmap_mutex);
56e93d26 884 rcu_read_lock();
99e15582 885 RAMBLOCK_FOREACH(block) {
15440dd5 886 migration_bitmap_sync_range(rs, block, 0, block->used_length);
56e93d26
JQ
887 }
888 rcu_read_unlock();
108cfae0 889 qemu_mutex_unlock(&rs->bitmap_mutex);
56e93d26 890
a66cd90c 891 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1ffb5dfd 892
56e93d26
JQ
893 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
894
895 /* more than 1 second = 1000 millisecons */
f664da80 896 if (end_time > rs->time_last_bitmap_sync + 1000) {
d693c6f1 897 /* calculate period counters */
9360447d 898 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
d693c6f1 899 / (end_time - rs->time_last_bitmap_sync);
9360447d 900 bytes_xfer_now = ram_counters.transferred;
d693c6f1 901
9ac78b61
PL
902 /* During block migration the auto-converge logic incorrectly detects
903 * that ram migration makes no progress. Avoid this by disabling the
904 * throttling logic during the bulk phase of block migration. */
905 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
56e93d26
JQ
906 /* The following detection logic can be refined later. For now:
907 Check to see if the dirtied bytes is 50% more than the approx.
908 amount of bytes that just got transferred since the last time we
070afca2
JH
909 were in this routine. If that happens twice, start or increase
910 throttling */
070afca2 911
d693c6f1 912 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
eac74159 913 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
b4a3c64b 914 (++rs->dirty_rate_high_cnt >= 2)) {
56e93d26 915 trace_migration_throttle();
8d820d6f 916 rs->dirty_rate_high_cnt = 0;
070afca2 917 mig_throttle_guest_down();
d693c6f1 918 }
56e93d26 919 }
070afca2 920
56e93d26 921 if (migrate_use_xbzrle()) {
23b28c3c 922 if (rs->iterations_prev != rs->iterations) {
9360447d
JQ
923 xbzrle_counters.cache_miss_rate =
924 (double)(xbzrle_counters.cache_miss -
b5833fde 925 rs->xbzrle_cache_miss_prev) /
23b28c3c 926 (rs->iterations - rs->iterations_prev);
56e93d26 927 }
23b28c3c 928 rs->iterations_prev = rs->iterations;
9360447d 929 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
56e93d26 930 }
d693c6f1
FF
931
932 /* reset period counters */
f664da80 933 rs->time_last_bitmap_sync = end_time;
a66cd90c 934 rs->num_dirty_pages_period = 0;
d2a4d85a 935 rs->bytes_xfer_prev = bytes_xfer_now;
56e93d26 936 }
4addcd4f 937 if (migrate_use_events()) {
9360447d 938 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
4addcd4f 939 }
56e93d26
JQ
940}
941
942/**
3d0684b2 943 * save_zero_page: send the zero page to the stream
56e93d26 944 *
3d0684b2 945 * Returns the number of pages written.
56e93d26 946 *
f7ccd61b 947 * @rs: current RAM state
56e93d26
JQ
948 * @block: block that contains the page we want to send
949 * @offset: offset inside the block for the page
56e93d26 950 */
7faccdc3 951static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
56e93d26 952{
7faccdc3 953 uint8_t *p = block->host + offset;
56e93d26
JQ
954 int pages = -1;
955
956 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
9360447d
JQ
957 ram_counters.duplicate++;
958 ram_counters.transferred +=
bb890ed5 959 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
ce25d337 960 qemu_put_byte(rs->f, 0);
9360447d 961 ram_counters.transferred += 1;
56e93d26
JQ
962 pages = 1;
963 }
964
965 return pages;
966}
967
5727309d 968static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
53f09a10 969{
5727309d 970 if (!migrate_release_ram() || !migration_in_postcopy()) {
53f09a10
PB
971 return;
972 }
973
aaa2064c 974 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
53f09a10
PB
975}
976
059ff0fb
XG
977/*
978 * @pages: the number of pages written by the control path,
979 * < 0 - error
980 * > 0 - number of pages written
981 *
982 * Return true if the pages has been saved, otherwise false is returned.
983 */
984static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
985 int *pages)
986{
987 uint64_t bytes_xmit = 0;
988 int ret;
989
990 *pages = -1;
991 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
992 &bytes_xmit);
993 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
994 return false;
995 }
996
997 if (bytes_xmit) {
998 ram_counters.transferred += bytes_xmit;
999 *pages = 1;
1000 }
1001
1002 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1003 return true;
1004 }
1005
1006 if (bytes_xmit > 0) {
1007 ram_counters.normal++;
1008 } else if (bytes_xmit == 0) {
1009 ram_counters.duplicate++;
1010 }
1011
1012 return true;
1013}
1014
56e93d26 1015/**
3d0684b2 1016 * ram_save_page: send the given page to the stream
56e93d26 1017 *
3d0684b2 1018 * Returns the number of pages written.
3fd3c4b3
DDAG
1019 * < 0 - error
1020 * >=0 - Number of pages written - this might legally be 0
1021 * if xbzrle noticed the page was the same.
56e93d26 1022 *
6f37bb8b 1023 * @rs: current RAM state
56e93d26
JQ
1024 * @block: block that contains the page we want to send
1025 * @offset: offset inside the block for the page
1026 * @last_stage: if we are at the completion stage
56e93d26 1027 */
a0a8aa14 1028static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
56e93d26
JQ
1029{
1030 int pages = -1;
56e93d26 1031 uint8_t *p;
56e93d26 1032 bool send_async = true;
a08f6890 1033 RAMBlock *block = pss->block;
a935e30f 1034 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
059ff0fb 1035 ram_addr_t current_addr = block->offset + offset;
56e93d26 1036
2f68e399 1037 p = block->host + offset;
1db9d8e5 1038 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
56e93d26 1039
059ff0fb
XG
1040 if (control_save_page(rs, block, offset, &pages)) {
1041 return pages;
56e93d26
JQ
1042 }
1043
1044 XBZRLE_cache_lock();
059ff0fb
XG
1045 pages = save_zero_page(rs, block, offset);
1046 if (pages > 0) {
1047 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1048 * page would be stale
1049 */
1050 xbzrle_cache_zero_page(rs, current_addr);
1051 ram_release_pages(block->idstr, offset, pages);
1052 } else if (!rs->ram_bulk_stage &&
1053 !migration_in_postcopy() && migrate_use_xbzrle()) {
1054 pages = save_xbzrle_page(rs, &p, current_addr, block,
1055 offset, last_stage);
1056 if (!last_stage) {
1057 /* Can't send this cached data async, since the cache page
1058 * might get updated before it gets to the wire
56e93d26 1059 */
059ff0fb 1060 send_async = false;
56e93d26
JQ
1061 }
1062 }
1063
1064 /* XBZRLE overflow or normal page */
1065 if (pages == -1) {
9360447d
JQ
1066 ram_counters.transferred +=
1067 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
56e93d26 1068 if (send_async) {
ce25d337 1069 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
53f09a10 1070 migrate_release_ram() &
5727309d 1071 migration_in_postcopy());
56e93d26 1072 } else {
ce25d337 1073 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
56e93d26 1074 }
9360447d 1075 ram_counters.transferred += TARGET_PAGE_SIZE;
56e93d26 1076 pages = 1;
9360447d 1077 ram_counters.normal++;
56e93d26
JQ
1078 }
1079
1080 XBZRLE_cache_unlock();
1081
1082 return pages;
1083}
1084
dcaf446e 1085static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
34ab9e97 1086 ram_addr_t offset, uint8_t *source_buf)
56e93d26 1087{
53518d94 1088 RAMState *rs = ram_state;
56e93d26 1089 int bytes_sent, blen;
a7a9a88f 1090 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 1091
2bf3aa85 1092 bytes_sent = save_page_header(rs, f, block, offset |
56e93d26 1093 RAM_SAVE_FLAG_COMPRESS_PAGE);
34ab9e97
XG
1094
1095 /*
1096 * copy it to a internal buffer to avoid it being modified by VM
1097 * so that we can catch up the error during compression and
1098 * decompression
1099 */
1100 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1101 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
b3be2896
LL
1102 if (blen < 0) {
1103 bytes_sent = 0;
1104 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1105 error_report("compressed data failed!");
1106 } else {
1107 bytes_sent += blen;
5727309d 1108 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
b3be2896 1109 }
56e93d26
JQ
1110
1111 return bytes_sent;
1112}
1113
ce25d337 1114static void flush_compressed_data(RAMState *rs)
56e93d26
JQ
1115{
1116 int idx, len, thread_count;
1117
1118 if (!migrate_use_compression()) {
1119 return;
1120 }
1121 thread_count = migrate_compress_threads();
a7a9a88f 1122
0d9f9a5c 1123 qemu_mutex_lock(&comp_done_lock);
56e93d26 1124 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 1125 while (!comp_param[idx].done) {
0d9f9a5c 1126 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 1127 }
a7a9a88f 1128 }
0d9f9a5c 1129 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
1130
1131 for (idx = 0; idx < thread_count; idx++) {
1132 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 1133 if (!comp_param[idx].quit) {
ce25d337 1134 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
9360447d 1135 ram_counters.transferred += len;
56e93d26 1136 }
a7a9a88f 1137 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
1138 }
1139}
1140
1141static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1142 ram_addr_t offset)
1143{
1144 param->block = block;
1145 param->offset = offset;
1146}
1147
ce25d337
JQ
1148static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1149 ram_addr_t offset)
56e93d26
JQ
1150{
1151 int idx, thread_count, bytes_xmit = -1, pages = -1;
1152
1153 thread_count = migrate_compress_threads();
0d9f9a5c 1154 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
1155 while (true) {
1156 for (idx = 0; idx < thread_count; idx++) {
1157 if (comp_param[idx].done) {
a7a9a88f 1158 comp_param[idx].done = false;
ce25d337 1159 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
a7a9a88f 1160 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 1161 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
1162 qemu_cond_signal(&comp_param[idx].cond);
1163 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26 1164 pages = 1;
9360447d
JQ
1165 ram_counters.normal++;
1166 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
1167 break;
1168 }
1169 }
1170 if (pages > 0) {
1171 break;
1172 } else {
0d9f9a5c 1173 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
1174 }
1175 }
0d9f9a5c 1176 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
1177
1178 return pages;
1179}
1180
1181/**
1182 * ram_save_compressed_page: compress the given page and send it to the stream
1183 *
3d0684b2 1184 * Returns the number of pages written.
56e93d26 1185 *
6f37bb8b 1186 * @rs: current RAM state
56e93d26
JQ
1187 * @block: block that contains the page we want to send
1188 * @offset: offset inside the block for the page
1189 * @last_stage: if we are at the completion stage
56e93d26 1190 */
a0a8aa14
JQ
1191static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
1192 bool last_stage)
56e93d26
JQ
1193{
1194 int pages = -1;
56e93d26 1195 uint8_t *p;
a08f6890 1196 RAMBlock *block = pss->block;
a935e30f 1197 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
56e93d26 1198
2f68e399 1199 p = block->host + offset;
56e93d26 1200
059ff0fb
XG
1201 if (control_save_page(rs, block, offset, &pages)) {
1202 return pages;
56e93d26 1203 }
059ff0fb
XG
1204
1205 /* When starting the process of a new block, the first page of
1206 * the block should be sent out before other pages in the same
1207 * block, and all the pages in last block should have been sent
1208 * out, keeping this order is important, because the 'cont' flag
1209 * is used to avoid resending the block name.
1210 */
1211 if (block != rs->last_sent_block) {
1212 flush_compressed_data(rs);
1213 pages = save_zero_page(rs, block, offset);
1214 if (pages > 0) {
1215 ram_release_pages(block->idstr, offset, pages);
1216 } else {
1217 /*
1218 * Make sure the first page is sent out before other pages.
1219 *
1220 * we post it as normal page as compression will take much
1221 * CPU resource.
1222 */
1223 ram_counters.transferred += save_page_header(rs, rs->f, block,
1224 offset | RAM_SAVE_FLAG_PAGE);
1225 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
1226 migrate_release_ram() &
1227 migration_in_postcopy());
1228 ram_counters.transferred += TARGET_PAGE_SIZE;
1229 ram_counters.normal++;
1230 pages = 1;
56e93d26
JQ
1231 }
1232 } else {
059ff0fb
XG
1233 pages = save_zero_page(rs, block, offset);
1234 if (pages == -1) {
1235 pages = compress_page_with_multi_thread(rs, block, offset);
56e93d26 1236 } else {
059ff0fb 1237 ram_release_pages(block->idstr, offset, pages);
56e93d26
JQ
1238 }
1239 }
1240
1241 return pages;
1242}
1243
3d0684b2
JQ
1244/**
1245 * find_dirty_block: find the next dirty page and update any state
1246 * associated with the search process.
b9e60928 1247 *
3d0684b2 1248 * Returns if a page is found
b9e60928 1249 *
6f37bb8b 1250 * @rs: current RAM state
3d0684b2
JQ
1251 * @pss: data about the state of the current dirty page scan
1252 * @again: set to false if the search has scanned the whole of RAM
b9e60928 1253 */
f20e2865 1254static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
b9e60928 1255{
f20e2865 1256 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
6f37bb8b 1257 if (pss->complete_round && pss->block == rs->last_seen_block &&
a935e30f 1258 pss->page >= rs->last_page) {
b9e60928
DDAG
1259 /*
1260 * We've been once around the RAM and haven't found anything.
1261 * Give up.
1262 */
1263 *again = false;
1264 return false;
1265 }
a935e30f 1266 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
b9e60928 1267 /* Didn't find anything in this RAM Block */
a935e30f 1268 pss->page = 0;
b9e60928
DDAG
1269 pss->block = QLIST_NEXT_RCU(pss->block, next);
1270 if (!pss->block) {
1271 /* Hit the end of the list */
1272 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1273 /* Flag that we've looped */
1274 pss->complete_round = true;
6f37bb8b 1275 rs->ram_bulk_stage = false;
b9e60928
DDAG
1276 if (migrate_use_xbzrle()) {
1277 /* If xbzrle is on, stop using the data compression at this
1278 * point. In theory, xbzrle can do better than compression.
1279 */
ce25d337 1280 flush_compressed_data(rs);
b9e60928
DDAG
1281 }
1282 }
1283 /* Didn't find anything this time, but try again on the new block */
1284 *again = true;
1285 return false;
1286 } else {
1287 /* Can go around again, but... */
1288 *again = true;
1289 /* We've found something so probably don't need to */
1290 return true;
1291 }
1292}
1293
3d0684b2
JQ
1294/**
1295 * unqueue_page: gets a page of the queue
1296 *
a82d593b 1297 * Helper for 'get_queued_page' - gets a page off the queue
a82d593b 1298 *
3d0684b2
JQ
1299 * Returns the block of the page (or NULL if none available)
1300 *
ec481c6c 1301 * @rs: current RAM state
3d0684b2 1302 * @offset: used to return the offset within the RAMBlock
a82d593b 1303 */
f20e2865 1304static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
a82d593b
DDAG
1305{
1306 RAMBlock *block = NULL;
1307
ec481c6c
JQ
1308 qemu_mutex_lock(&rs->src_page_req_mutex);
1309 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1310 struct RAMSrcPageRequest *entry =
1311 QSIMPLEQ_FIRST(&rs->src_page_requests);
a82d593b
DDAG
1312 block = entry->rb;
1313 *offset = entry->offset;
a82d593b
DDAG
1314
1315 if (entry->len > TARGET_PAGE_SIZE) {
1316 entry->len -= TARGET_PAGE_SIZE;
1317 entry->offset += TARGET_PAGE_SIZE;
1318 } else {
1319 memory_region_unref(block->mr);
ec481c6c 1320 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
a82d593b
DDAG
1321 g_free(entry);
1322 }
1323 }
ec481c6c 1324 qemu_mutex_unlock(&rs->src_page_req_mutex);
a82d593b
DDAG
1325
1326 return block;
1327}
1328
3d0684b2
JQ
1329/**
1330 * get_queued_page: unqueue a page from the postocpy requests
1331 *
1332 * Skips pages that are already sent (!dirty)
a82d593b 1333 *
3d0684b2 1334 * Returns if a queued page is found
a82d593b 1335 *
6f37bb8b 1336 * @rs: current RAM state
3d0684b2 1337 * @pss: data about the state of the current dirty page scan
a82d593b 1338 */
f20e2865 1339static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
a82d593b
DDAG
1340{
1341 RAMBlock *block;
1342 ram_addr_t offset;
1343 bool dirty;
1344
1345 do {
f20e2865 1346 block = unqueue_page(rs, &offset);
a82d593b
DDAG
1347 /*
1348 * We're sending this page, and since it's postcopy nothing else
1349 * will dirty it, and we must make sure it doesn't get sent again
1350 * even if this queue request was received after the background
1351 * search already sent it.
1352 */
1353 if (block) {
f20e2865
JQ
1354 unsigned long page;
1355
6b6712ef
JQ
1356 page = offset >> TARGET_PAGE_BITS;
1357 dirty = test_bit(page, block->bmap);
a82d593b 1358 if (!dirty) {
06b10688 1359 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
6b6712ef 1360 page, test_bit(page, block->unsentmap));
a82d593b 1361 } else {
f20e2865 1362 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
a82d593b
DDAG
1363 }
1364 }
1365
1366 } while (block && !dirty);
1367
1368 if (block) {
1369 /*
1370 * As soon as we start servicing pages out of order, then we have
1371 * to kill the bulk stage, since the bulk stage assumes
1372 * in (migration_bitmap_find_and_reset_dirty) that every page is
1373 * dirty, that's no longer true.
1374 */
6f37bb8b 1375 rs->ram_bulk_stage = false;
a82d593b
DDAG
1376
1377 /*
1378 * We want the background search to continue from the queued page
1379 * since the guest is likely to want other pages near to the page
1380 * it just requested.
1381 */
1382 pss->block = block;
a935e30f 1383 pss->page = offset >> TARGET_PAGE_BITS;
a82d593b
DDAG
1384 }
1385
1386 return !!block;
1387}
1388
6c595cde 1389/**
5e58f968
JQ
1390 * migration_page_queue_free: drop any remaining pages in the ram
1391 * request queue
6c595cde 1392 *
3d0684b2
JQ
1393 * It should be empty at the end anyway, but in error cases there may
1394 * be some left. in case that there is any page left, we drop it.
1395 *
6c595cde 1396 */
83c13382 1397static void migration_page_queue_free(RAMState *rs)
6c595cde 1398{
ec481c6c 1399 struct RAMSrcPageRequest *mspr, *next_mspr;
6c595cde
DDAG
1400 /* This queue generally should be empty - but in the case of a failed
1401 * migration might have some droppings in.
1402 */
1403 rcu_read_lock();
ec481c6c 1404 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
6c595cde 1405 memory_region_unref(mspr->rb->mr);
ec481c6c 1406 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
6c595cde
DDAG
1407 g_free(mspr);
1408 }
1409 rcu_read_unlock();
1410}
1411
1412/**
3d0684b2
JQ
1413 * ram_save_queue_pages: queue the page for transmission
1414 *
1415 * A request from postcopy destination for example.
1416 *
1417 * Returns zero on success or negative on error
1418 *
3d0684b2
JQ
1419 * @rbname: Name of the RAMBLock of the request. NULL means the
1420 * same that last one.
1421 * @start: starting address from the start of the RAMBlock
1422 * @len: length (in bytes) to send
6c595cde 1423 */
96506894 1424int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
6c595cde
DDAG
1425{
1426 RAMBlock *ramblock;
53518d94 1427 RAMState *rs = ram_state;
6c595cde 1428
9360447d 1429 ram_counters.postcopy_requests++;
6c595cde
DDAG
1430 rcu_read_lock();
1431 if (!rbname) {
1432 /* Reuse last RAMBlock */
68a098f3 1433 ramblock = rs->last_req_rb;
6c595cde
DDAG
1434
1435 if (!ramblock) {
1436 /*
1437 * Shouldn't happen, we can't reuse the last RAMBlock if
1438 * it's the 1st request.
1439 */
1440 error_report("ram_save_queue_pages no previous block");
1441 goto err;
1442 }
1443 } else {
1444 ramblock = qemu_ram_block_by_name(rbname);
1445
1446 if (!ramblock) {
1447 /* We shouldn't be asked for a non-existent RAMBlock */
1448 error_report("ram_save_queue_pages no block '%s'", rbname);
1449 goto err;
1450 }
68a098f3 1451 rs->last_req_rb = ramblock;
6c595cde
DDAG
1452 }
1453 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1454 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1455 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1456 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1457 __func__, start, len, ramblock->used_length);
1458 goto err;
1459 }
1460
ec481c6c
JQ
1461 struct RAMSrcPageRequest *new_entry =
1462 g_malloc0(sizeof(struct RAMSrcPageRequest));
6c595cde
DDAG
1463 new_entry->rb = ramblock;
1464 new_entry->offset = start;
1465 new_entry->len = len;
1466
1467 memory_region_ref(ramblock->mr);
ec481c6c
JQ
1468 qemu_mutex_lock(&rs->src_page_req_mutex);
1469 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1470 qemu_mutex_unlock(&rs->src_page_req_mutex);
6c595cde
DDAG
1471 rcu_read_unlock();
1472
1473 return 0;
1474
1475err:
1476 rcu_read_unlock();
1477 return -1;
1478}
1479
a82d593b 1480/**
3d0684b2 1481 * ram_save_target_page: save one target page
a82d593b 1482 *
3d0684b2 1483 * Returns the number of pages written
a82d593b 1484 *
6f37bb8b 1485 * @rs: current RAM state
3d0684b2 1486 * @pss: data about the page we want to send
a82d593b 1487 * @last_stage: if we are at the completion stage
a82d593b 1488 */
a0a8aa14 1489static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1490 bool last_stage)
a82d593b 1491{
1faa5665
XG
1492 /*
1493 * If xbzrle is on, stop using the data compression after first
1494 * round of migration even if compression is enabled. In theory,
1495 * xbzrle can do better than compression.
1496 */
1497 if (migrate_use_compression() &&
1498 (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1499 return ram_save_compressed_page(rs, pss, last_stage);
a82d593b
DDAG
1500 }
1501
1faa5665 1502 return ram_save_page(rs, pss, last_stage);
a82d593b
DDAG
1503}
1504
1505/**
3d0684b2 1506 * ram_save_host_page: save a whole host page
a82d593b 1507 *
3d0684b2
JQ
1508 * Starting at *offset send pages up to the end of the current host
1509 * page. It's valid for the initial offset to point into the middle of
1510 * a host page in which case the remainder of the hostpage is sent.
1511 * Only dirty target pages are sent. Note that the host page size may
1512 * be a huge page for this block.
1eb3fc0a
DDAG
1513 * The saving stops at the boundary of the used_length of the block
1514 * if the RAMBlock isn't a multiple of the host page size.
a82d593b 1515 *
3d0684b2
JQ
1516 * Returns the number of pages written or negative on error
1517 *
6f37bb8b 1518 * @rs: current RAM state
3d0684b2 1519 * @ms: current migration state
3d0684b2 1520 * @pss: data about the page we want to send
a82d593b 1521 * @last_stage: if we are at the completion stage
a82d593b 1522 */
a0a8aa14 1523static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1524 bool last_stage)
a82d593b
DDAG
1525{
1526 int tmppages, pages = 0;
a935e30f
JQ
1527 size_t pagesize_bits =
1528 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
4c011c37 1529
a82d593b 1530 do {
1faa5665
XG
1531 /* Check the pages is dirty and if it is send it */
1532 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1533 pss->page++;
1534 continue;
1535 }
1536
f20e2865 1537 tmppages = ram_save_target_page(rs, pss, last_stage);
a82d593b
DDAG
1538 if (tmppages < 0) {
1539 return tmppages;
1540 }
1541
1542 pages += tmppages;
1faa5665
XG
1543 if (pss->block->unsentmap) {
1544 clear_bit(pss->page, pss->block->unsentmap);
1545 }
1546
a935e30f 1547 pss->page++;
1eb3fc0a
DDAG
1548 } while ((pss->page & (pagesize_bits - 1)) &&
1549 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
a82d593b
DDAG
1550
1551 /* The offset we leave with is the last one we looked at */
a935e30f 1552 pss->page--;
a82d593b
DDAG
1553 return pages;
1554}
6c595cde 1555
56e93d26 1556/**
3d0684b2 1557 * ram_find_and_save_block: finds a dirty page and sends it to f
56e93d26
JQ
1558 *
1559 * Called within an RCU critical section.
1560 *
3d0684b2 1561 * Returns the number of pages written where zero means no dirty pages
56e93d26 1562 *
6f37bb8b 1563 * @rs: current RAM state
56e93d26 1564 * @last_stage: if we are at the completion stage
a82d593b
DDAG
1565 *
1566 * On systems where host-page-size > target-page-size it will send all the
1567 * pages in a host page that are dirty.
56e93d26
JQ
1568 */
1569
ce25d337 1570static int ram_find_and_save_block(RAMState *rs, bool last_stage)
56e93d26 1571{
b8fb8cb7 1572 PageSearchStatus pss;
56e93d26 1573 int pages = 0;
b9e60928 1574 bool again, found;
56e93d26 1575
0827b9e9
AA
1576 /* No dirty page as there is zero RAM */
1577 if (!ram_bytes_total()) {
1578 return pages;
1579 }
1580
6f37bb8b 1581 pss.block = rs->last_seen_block;
a935e30f 1582 pss.page = rs->last_page;
b8fb8cb7
DDAG
1583 pss.complete_round = false;
1584
1585 if (!pss.block) {
1586 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1587 }
56e93d26 1588
b9e60928 1589 do {
a82d593b 1590 again = true;
f20e2865 1591 found = get_queued_page(rs, &pss);
b9e60928 1592
a82d593b
DDAG
1593 if (!found) {
1594 /* priority queue empty, so just search for something dirty */
f20e2865 1595 found = find_dirty_block(rs, &pss, &again);
a82d593b 1596 }
f3f491fc 1597
a82d593b 1598 if (found) {
f20e2865 1599 pages = ram_save_host_page(rs, &pss, last_stage);
56e93d26 1600 }
b9e60928 1601 } while (!pages && again);
56e93d26 1602
6f37bb8b 1603 rs->last_seen_block = pss.block;
a935e30f 1604 rs->last_page = pss.page;
56e93d26
JQ
1605
1606 return pages;
1607}
1608
1609void acct_update_position(QEMUFile *f, size_t size, bool zero)
1610{
1611 uint64_t pages = size / TARGET_PAGE_SIZE;
f7ccd61b 1612
56e93d26 1613 if (zero) {
9360447d 1614 ram_counters.duplicate += pages;
56e93d26 1615 } else {
9360447d
JQ
1616 ram_counters.normal += pages;
1617 ram_counters.transferred += size;
56e93d26
JQ
1618 qemu_update_position(f, size);
1619 }
1620}
1621
56e93d26
JQ
1622uint64_t ram_bytes_total(void)
1623{
1624 RAMBlock *block;
1625 uint64_t total = 0;
1626
1627 rcu_read_lock();
99e15582 1628 RAMBLOCK_FOREACH(block) {
56e93d26 1629 total += block->used_length;
99e15582 1630 }
56e93d26
JQ
1631 rcu_read_unlock();
1632 return total;
1633}
1634
f265e0e4 1635static void xbzrle_load_setup(void)
56e93d26 1636{
f265e0e4 1637 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
56e93d26
JQ
1638}
1639
f265e0e4
JQ
1640static void xbzrle_load_cleanup(void)
1641{
1642 g_free(XBZRLE.decoded_buf);
1643 XBZRLE.decoded_buf = NULL;
1644}
1645
7d7c96be
PX
1646static void ram_state_cleanup(RAMState **rsp)
1647{
b9ccaf6d
DDAG
1648 if (*rsp) {
1649 migration_page_queue_free(*rsp);
1650 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1651 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1652 g_free(*rsp);
1653 *rsp = NULL;
1654 }
7d7c96be
PX
1655}
1656
84593a08
PX
1657static void xbzrle_cleanup(void)
1658{
1659 XBZRLE_cache_lock();
1660 if (XBZRLE.cache) {
1661 cache_fini(XBZRLE.cache);
1662 g_free(XBZRLE.encoded_buf);
1663 g_free(XBZRLE.current_buf);
1664 g_free(XBZRLE.zero_target_page);
1665 XBZRLE.cache = NULL;
1666 XBZRLE.encoded_buf = NULL;
1667 XBZRLE.current_buf = NULL;
1668 XBZRLE.zero_target_page = NULL;
1669 }
1670 XBZRLE_cache_unlock();
1671}
1672
f265e0e4 1673static void ram_save_cleanup(void *opaque)
56e93d26 1674{
53518d94 1675 RAMState **rsp = opaque;
6b6712ef 1676 RAMBlock *block;
eb859c53 1677
2ff64038
LZ
1678 /* caller have hold iothread lock or is in a bh, so there is
1679 * no writing race against this migration_bitmap
1680 */
6b6712ef
JQ
1681 memory_global_dirty_log_stop();
1682
1683 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1684 g_free(block->bmap);
1685 block->bmap = NULL;
1686 g_free(block->unsentmap);
1687 block->unsentmap = NULL;
56e93d26
JQ
1688 }
1689
84593a08 1690 xbzrle_cleanup();
f0afa331 1691 compress_threads_save_cleanup();
7d7c96be 1692 ram_state_cleanup(rsp);
56e93d26
JQ
1693}
1694
6f37bb8b 1695static void ram_state_reset(RAMState *rs)
56e93d26 1696{
6f37bb8b
JQ
1697 rs->last_seen_block = NULL;
1698 rs->last_sent_block = NULL;
269ace29 1699 rs->last_page = 0;
6f37bb8b
JQ
1700 rs->last_version = ram_list.version;
1701 rs->ram_bulk_stage = true;
56e93d26
JQ
1702}
1703
1704#define MAX_WAIT 50 /* ms, half buffered_file limit */
1705
4f2e4252
DDAG
1706/*
1707 * 'expected' is the value you expect the bitmap mostly to be full
1708 * of; it won't bother printing lines that are all this value.
1709 * If 'todump' is null the migration bitmap is dumped.
1710 */
6b6712ef
JQ
1711void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1712 unsigned long pages)
4f2e4252 1713{
4f2e4252
DDAG
1714 int64_t cur;
1715 int64_t linelen = 128;
1716 char linebuf[129];
1717
6b6712ef 1718 for (cur = 0; cur < pages; cur += linelen) {
4f2e4252
DDAG
1719 int64_t curb;
1720 bool found = false;
1721 /*
1722 * Last line; catch the case where the line length
1723 * is longer than remaining ram
1724 */
6b6712ef
JQ
1725 if (cur + linelen > pages) {
1726 linelen = pages - cur;
4f2e4252
DDAG
1727 }
1728 for (curb = 0; curb < linelen; curb++) {
1729 bool thisbit = test_bit(cur + curb, todump);
1730 linebuf[curb] = thisbit ? '1' : '.';
1731 found = found || (thisbit != expected);
1732 }
1733 if (found) {
1734 linebuf[curb] = '\0';
1735 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1736 }
1737 }
1738}
1739
e0b266f0
DDAG
1740/* **** functions for postcopy ***** */
1741
ced1c616
PB
1742void ram_postcopy_migrated_memory_release(MigrationState *ms)
1743{
1744 struct RAMBlock *block;
ced1c616 1745
99e15582 1746 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1747 unsigned long *bitmap = block->bmap;
1748 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1749 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
ced1c616
PB
1750
1751 while (run_start < range) {
1752 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
aaa2064c 1753 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
ced1c616
PB
1754 (run_end - run_start) << TARGET_PAGE_BITS);
1755 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1756 }
1757 }
1758}
1759
3d0684b2
JQ
1760/**
1761 * postcopy_send_discard_bm_ram: discard a RAMBlock
1762 *
1763 * Returns zero on success
1764 *
e0b266f0
DDAG
1765 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1766 * Note: At this point the 'unsentmap' is the processed bitmap combined
1767 * with the dirtymap; so a '1' means it's either dirty or unsent.
3d0684b2
JQ
1768 *
1769 * @ms: current migration state
1770 * @pds: state for postcopy
1771 * @start: RAMBlock starting page
1772 * @length: RAMBlock size
e0b266f0
DDAG
1773 */
1774static int postcopy_send_discard_bm_ram(MigrationState *ms,
1775 PostcopyDiscardState *pds,
6b6712ef 1776 RAMBlock *block)
e0b266f0 1777{
6b6712ef 1778 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
e0b266f0 1779 unsigned long current;
6b6712ef 1780 unsigned long *unsentmap = block->unsentmap;
e0b266f0 1781
6b6712ef 1782 for (current = 0; current < end; ) {
e0b266f0
DDAG
1783 unsigned long one = find_next_bit(unsentmap, end, current);
1784
1785 if (one <= end) {
1786 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1787 unsigned long discard_length;
1788
1789 if (zero >= end) {
1790 discard_length = end - one;
1791 } else {
1792 discard_length = zero - one;
1793 }
d688c62d
DDAG
1794 if (discard_length) {
1795 postcopy_discard_send_range(ms, pds, one, discard_length);
1796 }
e0b266f0
DDAG
1797 current = one + discard_length;
1798 } else {
1799 current = one;
1800 }
1801 }
1802
1803 return 0;
1804}
1805
3d0684b2
JQ
1806/**
1807 * postcopy_each_ram_send_discard: discard all RAMBlocks
1808 *
1809 * Returns 0 for success or negative for error
1810 *
e0b266f0
DDAG
1811 * Utility for the outgoing postcopy code.
1812 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1813 * passing it bitmap indexes and name.
e0b266f0
DDAG
1814 * (qemu_ram_foreach_block ends up passing unscaled lengths
1815 * which would mean postcopy code would have to deal with target page)
3d0684b2
JQ
1816 *
1817 * @ms: current migration state
e0b266f0
DDAG
1818 */
1819static int postcopy_each_ram_send_discard(MigrationState *ms)
1820{
1821 struct RAMBlock *block;
1822 int ret;
1823
99e15582 1824 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1825 PostcopyDiscardState *pds =
1826 postcopy_discard_send_init(ms, block->idstr);
e0b266f0
DDAG
1827
1828 /*
1829 * Postcopy sends chunks of bitmap over the wire, but it
1830 * just needs indexes at this point, avoids it having
1831 * target page specific code.
1832 */
6b6712ef 1833 ret = postcopy_send_discard_bm_ram(ms, pds, block);
e0b266f0
DDAG
1834 postcopy_discard_send_finish(ms, pds);
1835 if (ret) {
1836 return ret;
1837 }
1838 }
1839
1840 return 0;
1841}
1842
3d0684b2
JQ
1843/**
1844 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1845 *
1846 * Helper for postcopy_chunk_hostpages; it's called twice to
1847 * canonicalize the two bitmaps, that are similar, but one is
1848 * inverted.
99e314eb 1849 *
3d0684b2
JQ
1850 * Postcopy requires that all target pages in a hostpage are dirty or
1851 * clean, not a mix. This function canonicalizes the bitmaps.
99e314eb 1852 *
3d0684b2
JQ
1853 * @ms: current migration state
1854 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1855 * otherwise we need to canonicalize partially dirty host pages
1856 * @block: block that contains the page we want to canonicalize
1857 * @pds: state for postcopy
99e314eb
DDAG
1858 */
1859static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1860 RAMBlock *block,
1861 PostcopyDiscardState *pds)
1862{
53518d94 1863 RAMState *rs = ram_state;
6b6712ef
JQ
1864 unsigned long *bitmap = block->bmap;
1865 unsigned long *unsentmap = block->unsentmap;
29c59172 1866 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
6b6712ef 1867 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
99e314eb
DDAG
1868 unsigned long run_start;
1869
29c59172
DDAG
1870 if (block->page_size == TARGET_PAGE_SIZE) {
1871 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1872 return;
1873 }
1874
99e314eb
DDAG
1875 if (unsent_pass) {
1876 /* Find a sent page */
6b6712ef 1877 run_start = find_next_zero_bit(unsentmap, pages, 0);
99e314eb
DDAG
1878 } else {
1879 /* Find a dirty page */
6b6712ef 1880 run_start = find_next_bit(bitmap, pages, 0);
99e314eb
DDAG
1881 }
1882
6b6712ef 1883 while (run_start < pages) {
99e314eb
DDAG
1884 bool do_fixup = false;
1885 unsigned long fixup_start_addr;
1886 unsigned long host_offset;
1887
1888 /*
1889 * If the start of this run of pages is in the middle of a host
1890 * page, then we need to fixup this host page.
1891 */
1892 host_offset = run_start % host_ratio;
1893 if (host_offset) {
1894 do_fixup = true;
1895 run_start -= host_offset;
1896 fixup_start_addr = run_start;
1897 /* For the next pass */
1898 run_start = run_start + host_ratio;
1899 } else {
1900 /* Find the end of this run */
1901 unsigned long run_end;
1902 if (unsent_pass) {
6b6712ef 1903 run_end = find_next_bit(unsentmap, pages, run_start + 1);
99e314eb 1904 } else {
6b6712ef 1905 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
99e314eb
DDAG
1906 }
1907 /*
1908 * If the end isn't at the start of a host page, then the
1909 * run doesn't finish at the end of a host page
1910 * and we need to discard.
1911 */
1912 host_offset = run_end % host_ratio;
1913 if (host_offset) {
1914 do_fixup = true;
1915 fixup_start_addr = run_end - host_offset;
1916 /*
1917 * This host page has gone, the next loop iteration starts
1918 * from after the fixup
1919 */
1920 run_start = fixup_start_addr + host_ratio;
1921 } else {
1922 /*
1923 * No discards on this iteration, next loop starts from
1924 * next sent/dirty page
1925 */
1926 run_start = run_end + 1;
1927 }
1928 }
1929
1930 if (do_fixup) {
1931 unsigned long page;
1932
1933 /* Tell the destination to discard this page */
1934 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1935 /* For the unsent_pass we:
1936 * discard partially sent pages
1937 * For the !unsent_pass (dirty) we:
1938 * discard partially dirty pages that were sent
1939 * (any partially sent pages were already discarded
1940 * by the previous unsent_pass)
1941 */
1942 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1943 host_ratio);
1944 }
1945
1946 /* Clean up the bitmap */
1947 for (page = fixup_start_addr;
1948 page < fixup_start_addr + host_ratio; page++) {
1949 /* All pages in this host page are now not sent */
1950 set_bit(page, unsentmap);
1951
1952 /*
1953 * Remark them as dirty, updating the count for any pages
1954 * that weren't previously dirty.
1955 */
0d8ec885 1956 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
99e314eb
DDAG
1957 }
1958 }
1959
1960 if (unsent_pass) {
1961 /* Find the next sent page for the next iteration */
6b6712ef 1962 run_start = find_next_zero_bit(unsentmap, pages, run_start);
99e314eb
DDAG
1963 } else {
1964 /* Find the next dirty page for the next iteration */
6b6712ef 1965 run_start = find_next_bit(bitmap, pages, run_start);
99e314eb
DDAG
1966 }
1967 }
1968}
1969
3d0684b2
JQ
1970/**
1971 * postcopy_chuck_hostpages: discrad any partially sent host page
1972 *
99e314eb
DDAG
1973 * Utility for the outgoing postcopy code.
1974 *
1975 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
1976 * dirty host-page size chunks as all dirty. In this case the host-page
1977 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb 1978 *
3d0684b2
JQ
1979 * Returns zero on success
1980 *
1981 * @ms: current migration state
6b6712ef 1982 * @block: block we want to work with
99e314eb 1983 */
6b6712ef 1984static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
99e314eb 1985{
6b6712ef
JQ
1986 PostcopyDiscardState *pds =
1987 postcopy_discard_send_init(ms, block->idstr);
99e314eb 1988
6b6712ef
JQ
1989 /* First pass: Discard all partially sent host pages */
1990 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1991 /*
1992 * Second pass: Ensure that all partially dirty host pages are made
1993 * fully dirty.
1994 */
1995 postcopy_chunk_hostpages_pass(ms, false, block, pds);
99e314eb 1996
6b6712ef 1997 postcopy_discard_send_finish(ms, pds);
99e314eb
DDAG
1998 return 0;
1999}
2000
3d0684b2
JQ
2001/**
2002 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2003 *
2004 * Returns zero on success
2005 *
e0b266f0
DDAG
2006 * Transmit the set of pages to be discarded after precopy to the target
2007 * these are pages that:
2008 * a) Have been previously transmitted but are now dirty again
2009 * b) Pages that have never been transmitted, this ensures that
2010 * any pages on the destination that have been mapped by background
2011 * tasks get discarded (transparent huge pages is the specific concern)
2012 * Hopefully this is pretty sparse
3d0684b2
JQ
2013 *
2014 * @ms: current migration state
e0b266f0
DDAG
2015 */
2016int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2017{
53518d94 2018 RAMState *rs = ram_state;
6b6712ef 2019 RAMBlock *block;
e0b266f0 2020 int ret;
e0b266f0
DDAG
2021
2022 rcu_read_lock();
2023
2024 /* This should be our last sync, the src is now paused */
eb859c53 2025 migration_bitmap_sync(rs);
e0b266f0 2026
6b6712ef
JQ
2027 /* Easiest way to make sure we don't resume in the middle of a host-page */
2028 rs->last_seen_block = NULL;
2029 rs->last_sent_block = NULL;
2030 rs->last_page = 0;
e0b266f0 2031
6b6712ef
JQ
2032 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2033 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2034 unsigned long *bitmap = block->bmap;
2035 unsigned long *unsentmap = block->unsentmap;
2036
2037 if (!unsentmap) {
2038 /* We don't have a safe way to resize the sentmap, so
2039 * if the bitmap was resized it will be NULL at this
2040 * point.
2041 */
2042 error_report("migration ram resized during precopy phase");
2043 rcu_read_unlock();
2044 return -EINVAL;
2045 }
2046 /* Deal with TPS != HPS and huge pages */
2047 ret = postcopy_chunk_hostpages(ms, block);
2048 if (ret) {
2049 rcu_read_unlock();
2050 return ret;
2051 }
e0b266f0 2052
6b6712ef
JQ
2053 /*
2054 * Update the unsentmap to be unsentmap = unsentmap | dirty
2055 */
2056 bitmap_or(unsentmap, unsentmap, bitmap, pages);
e0b266f0 2057#ifdef DEBUG_POSTCOPY
6b6712ef 2058 ram_debug_dump_bitmap(unsentmap, true, pages);
e0b266f0 2059#endif
6b6712ef
JQ
2060 }
2061 trace_ram_postcopy_send_discard_bitmap();
e0b266f0
DDAG
2062
2063 ret = postcopy_each_ram_send_discard(ms);
2064 rcu_read_unlock();
2065
2066 return ret;
2067}
2068
3d0684b2
JQ
2069/**
2070 * ram_discard_range: discard dirtied pages at the beginning of postcopy
e0b266f0 2071 *
3d0684b2 2072 * Returns zero on success
e0b266f0 2073 *
36449157
JQ
2074 * @rbname: name of the RAMBlock of the request. NULL means the
2075 * same that last one.
3d0684b2
JQ
2076 * @start: RAMBlock starting page
2077 * @length: RAMBlock size
e0b266f0 2078 */
aaa2064c 2079int ram_discard_range(const char *rbname, uint64_t start, size_t length)
e0b266f0
DDAG
2080{
2081 int ret = -1;
2082
36449157 2083 trace_ram_discard_range(rbname, start, length);
d3a5038c 2084
e0b266f0 2085 rcu_read_lock();
36449157 2086 RAMBlock *rb = qemu_ram_block_by_name(rbname);
e0b266f0
DDAG
2087
2088 if (!rb) {
36449157 2089 error_report("ram_discard_range: Failed to find block '%s'", rbname);
e0b266f0
DDAG
2090 goto err;
2091 }
2092
f9494614
AP
2093 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2094 length >> qemu_target_page_bits());
d3a5038c 2095 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
2096
2097err:
2098 rcu_read_unlock();
2099
2100 return ret;
2101}
2102
84593a08
PX
2103/*
2104 * For every allocation, we will try not to crash the VM if the
2105 * allocation failed.
2106 */
2107static int xbzrle_init(void)
2108{
2109 Error *local_err = NULL;
2110
2111 if (!migrate_use_xbzrle()) {
2112 return 0;
2113 }
2114
2115 XBZRLE_cache_lock();
2116
2117 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2118 if (!XBZRLE.zero_target_page) {
2119 error_report("%s: Error allocating zero page", __func__);
2120 goto err_out;
2121 }
2122
2123 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2124 TARGET_PAGE_SIZE, &local_err);
2125 if (!XBZRLE.cache) {
2126 error_report_err(local_err);
2127 goto free_zero_page;
2128 }
2129
2130 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2131 if (!XBZRLE.encoded_buf) {
2132 error_report("%s: Error allocating encoded_buf", __func__);
2133 goto free_cache;
2134 }
2135
2136 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2137 if (!XBZRLE.current_buf) {
2138 error_report("%s: Error allocating current_buf", __func__);
2139 goto free_encoded_buf;
2140 }
2141
2142 /* We are all good */
2143 XBZRLE_cache_unlock();
2144 return 0;
2145
2146free_encoded_buf:
2147 g_free(XBZRLE.encoded_buf);
2148 XBZRLE.encoded_buf = NULL;
2149free_cache:
2150 cache_fini(XBZRLE.cache);
2151 XBZRLE.cache = NULL;
2152free_zero_page:
2153 g_free(XBZRLE.zero_target_page);
2154 XBZRLE.zero_target_page = NULL;
2155err_out:
2156 XBZRLE_cache_unlock();
2157 return -ENOMEM;
2158}
2159
53518d94 2160static int ram_state_init(RAMState **rsp)
56e93d26 2161{
7d00ee6a
PX
2162 *rsp = g_try_new0(RAMState, 1);
2163
2164 if (!*rsp) {
2165 error_report("%s: Init ramstate fail", __func__);
2166 return -1;
2167 }
53518d94
JQ
2168
2169 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2170 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2171 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
56e93d26 2172
7d00ee6a
PX
2173 /*
2174 * Count the total number of pages used by ram blocks not including any
2175 * gaps due to alignment or unplugs.
2176 */
2177 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2178
2179 ram_state_reset(*rsp);
2180
2181 return 0;
2182}
2183
d6eff5d7 2184static void ram_list_init_bitmaps(void)
7d00ee6a 2185{
d6eff5d7
PX
2186 RAMBlock *block;
2187 unsigned long pages;
56e93d26 2188
0827b9e9
AA
2189 /* Skip setting bitmap if there is no RAM */
2190 if (ram_bytes_total()) {
6b6712ef 2191 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
d6eff5d7 2192 pages = block->max_length >> TARGET_PAGE_BITS;
6b6712ef
JQ
2193 block->bmap = bitmap_new(pages);
2194 bitmap_set(block->bmap, 0, pages);
2195 if (migrate_postcopy_ram()) {
2196 block->unsentmap = bitmap_new(pages);
2197 bitmap_set(block->unsentmap, 0, pages);
2198 }
0827b9e9 2199 }
f3f491fc 2200 }
d6eff5d7
PX
2201}
2202
2203static void ram_init_bitmaps(RAMState *rs)
2204{
2205 /* For memory_global_dirty_log_start below. */
2206 qemu_mutex_lock_iothread();
2207 qemu_mutex_lock_ramlist();
2208 rcu_read_lock();
f3f491fc 2209
d6eff5d7 2210 ram_list_init_bitmaps();
56e93d26 2211 memory_global_dirty_log_start();
d6eff5d7
PX
2212 migration_bitmap_sync(rs);
2213
2214 rcu_read_unlock();
56e93d26 2215 qemu_mutex_unlock_ramlist();
49877834 2216 qemu_mutex_unlock_iothread();
d6eff5d7
PX
2217}
2218
2219static int ram_init_all(RAMState **rsp)
2220{
2221 if (ram_state_init(rsp)) {
2222 return -1;
2223 }
2224
2225 if (xbzrle_init()) {
2226 ram_state_cleanup(rsp);
2227 return -1;
2228 }
2229
2230 ram_init_bitmaps(*rsp);
a91246c9
HZ
2231
2232 return 0;
2233}
2234
3d0684b2
JQ
2235/*
2236 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
a91246c9
HZ
2237 * long-running RCU critical section. When rcu-reclaims in the code
2238 * start to become numerous it will be necessary to reduce the
2239 * granularity of these critical sections.
2240 */
2241
3d0684b2
JQ
2242/**
2243 * ram_save_setup: Setup RAM for migration
2244 *
2245 * Returns zero to indicate success and negative for error
2246 *
2247 * @f: QEMUFile where to send the data
2248 * @opaque: RAMState pointer
2249 */
a91246c9
HZ
2250static int ram_save_setup(QEMUFile *f, void *opaque)
2251{
53518d94 2252 RAMState **rsp = opaque;
a91246c9
HZ
2253 RAMBlock *block;
2254
dcaf446e
XG
2255 if (compress_threads_save_setup()) {
2256 return -1;
2257 }
2258
a91246c9
HZ
2259 /* migration has already setup the bitmap, reuse it. */
2260 if (!migration_in_colo_state()) {
7d00ee6a 2261 if (ram_init_all(rsp) != 0) {
dcaf446e 2262 compress_threads_save_cleanup();
a91246c9 2263 return -1;
53518d94 2264 }
a91246c9 2265 }
53518d94 2266 (*rsp)->f = f;
a91246c9
HZ
2267
2268 rcu_read_lock();
56e93d26
JQ
2269
2270 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2271
99e15582 2272 RAMBLOCK_FOREACH(block) {
56e93d26
JQ
2273 qemu_put_byte(f, strlen(block->idstr));
2274 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2275 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2276 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2277 qemu_put_be64(f, block->page_size);
2278 }
56e93d26
JQ
2279 }
2280
2281 rcu_read_unlock();
2282
2283 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2284 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2285
2286 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2287
2288 return 0;
2289}
2290
3d0684b2
JQ
2291/**
2292 * ram_save_iterate: iterative stage for migration
2293 *
2294 * Returns zero to indicate success and negative for error
2295 *
2296 * @f: QEMUFile where to send the data
2297 * @opaque: RAMState pointer
2298 */
56e93d26
JQ
2299static int ram_save_iterate(QEMUFile *f, void *opaque)
2300{
53518d94
JQ
2301 RAMState **temp = opaque;
2302 RAMState *rs = *temp;
56e93d26
JQ
2303 int ret;
2304 int i;
2305 int64_t t0;
5c90308f 2306 int done = 0;
56e93d26 2307
b2557345
PL
2308 if (blk_mig_bulk_active()) {
2309 /* Avoid transferring ram during bulk phase of block migration as
2310 * the bulk phase will usually take a long time and transferring
2311 * ram updates during that time is pointless. */
2312 goto out;
2313 }
2314
56e93d26 2315 rcu_read_lock();
6f37bb8b
JQ
2316 if (ram_list.version != rs->last_version) {
2317 ram_state_reset(rs);
56e93d26
JQ
2318 }
2319
2320 /* Read version before ram_list.blocks */
2321 smp_rmb();
2322
2323 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2324
2325 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2326 i = 0;
2327 while ((ret = qemu_file_rate_limit(f)) == 0) {
2328 int pages;
2329
ce25d337 2330 pages = ram_find_and_save_block(rs, false);
56e93d26
JQ
2331 /* no more pages to sent */
2332 if (pages == 0) {
5c90308f 2333 done = 1;
56e93d26
JQ
2334 break;
2335 }
23b28c3c 2336 rs->iterations++;
070afca2 2337
56e93d26
JQ
2338 /* we want to check in the 1st loop, just in case it was the 1st time
2339 and we had to sync the dirty bitmap.
2340 qemu_get_clock_ns() is a bit expensive, so we only check each some
2341 iterations
2342 */
2343 if ((i & 63) == 0) {
2344 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2345 if (t1 > MAX_WAIT) {
55c4446b 2346 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2347 break;
2348 }
2349 }
2350 i++;
2351 }
ce25d337 2352 flush_compressed_data(rs);
56e93d26
JQ
2353 rcu_read_unlock();
2354
2355 /*
2356 * Must occur before EOS (or any QEMUFile operation)
2357 * because of RDMA protocol.
2358 */
2359 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2360
b2557345 2361out:
56e93d26 2362 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
9360447d 2363 ram_counters.transferred += 8;
56e93d26
JQ
2364
2365 ret = qemu_file_get_error(f);
2366 if (ret < 0) {
2367 return ret;
2368 }
2369
5c90308f 2370 return done;
56e93d26
JQ
2371}
2372
3d0684b2
JQ
2373/**
2374 * ram_save_complete: function called to send the remaining amount of ram
2375 *
2376 * Returns zero to indicate success
2377 *
2378 * Called with iothread lock
2379 *
2380 * @f: QEMUFile where to send the data
2381 * @opaque: RAMState pointer
2382 */
56e93d26
JQ
2383static int ram_save_complete(QEMUFile *f, void *opaque)
2384{
53518d94
JQ
2385 RAMState **temp = opaque;
2386 RAMState *rs = *temp;
6f37bb8b 2387
56e93d26
JQ
2388 rcu_read_lock();
2389
5727309d 2390 if (!migration_in_postcopy()) {
8d820d6f 2391 migration_bitmap_sync(rs);
663e6c1d 2392 }
56e93d26
JQ
2393
2394 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2395
2396 /* try transferring iterative blocks of memory */
2397
2398 /* flush all remaining blocks regardless of rate limiting */
2399 while (true) {
2400 int pages;
2401
ce25d337 2402 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
56e93d26
JQ
2403 /* no more blocks to sent */
2404 if (pages == 0) {
2405 break;
2406 }
2407 }
2408
ce25d337 2409 flush_compressed_data(rs);
56e93d26 2410 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2411
2412 rcu_read_unlock();
d09a6fde 2413
56e93d26
JQ
2414 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2415
2416 return 0;
2417}
2418
c31b098f 2419static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
47995026
VSO
2420 uint64_t *res_precopy_only,
2421 uint64_t *res_compatible,
2422 uint64_t *res_postcopy_only)
56e93d26 2423{
53518d94
JQ
2424 RAMState **temp = opaque;
2425 RAMState *rs = *temp;
56e93d26
JQ
2426 uint64_t remaining_size;
2427
9edabd4d 2428 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2429
5727309d 2430 if (!migration_in_postcopy() &&
663e6c1d 2431 remaining_size < max_size) {
56e93d26
JQ
2432 qemu_mutex_lock_iothread();
2433 rcu_read_lock();
8d820d6f 2434 migration_bitmap_sync(rs);
56e93d26
JQ
2435 rcu_read_unlock();
2436 qemu_mutex_unlock_iothread();
9edabd4d 2437 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2438 }
c31b098f 2439
86e1167e
VSO
2440 if (migrate_postcopy_ram()) {
2441 /* We can do postcopy, and all the data is postcopiable */
47995026 2442 *res_compatible += remaining_size;
86e1167e 2443 } else {
47995026 2444 *res_precopy_only += remaining_size;
86e1167e 2445 }
56e93d26
JQ
2446}
2447
2448static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2449{
2450 unsigned int xh_len;
2451 int xh_flags;
063e760a 2452 uint8_t *loaded_data;
56e93d26 2453
56e93d26
JQ
2454 /* extract RLE header */
2455 xh_flags = qemu_get_byte(f);
2456 xh_len = qemu_get_be16(f);
2457
2458 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2459 error_report("Failed to load XBZRLE page - wrong compression!");
2460 return -1;
2461 }
2462
2463 if (xh_len > TARGET_PAGE_SIZE) {
2464 error_report("Failed to load XBZRLE page - len overflow!");
2465 return -1;
2466 }
f265e0e4 2467 loaded_data = XBZRLE.decoded_buf;
56e93d26 2468 /* load data and decode */
f265e0e4 2469 /* it can change loaded_data to point to an internal buffer */
063e760a 2470 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2471
2472 /* decode RLE */
063e760a 2473 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2474 TARGET_PAGE_SIZE) == -1) {
2475 error_report("Failed to load XBZRLE page - decode error!");
2476 return -1;
2477 }
2478
2479 return 0;
2480}
2481
3d0684b2
JQ
2482/**
2483 * ram_block_from_stream: read a RAMBlock id from the migration stream
2484 *
2485 * Must be called from within a rcu critical section.
2486 *
56e93d26 2487 * Returns a pointer from within the RCU-protected ram_list.
a7180877 2488 *
3d0684b2
JQ
2489 * @f: QEMUFile where to read the data from
2490 * @flags: Page flags (mostly to see if it's a continuation of previous block)
a7180877 2491 */
3d0684b2 2492static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
56e93d26
JQ
2493{
2494 static RAMBlock *block = NULL;
2495 char id[256];
2496 uint8_t len;
2497
2498 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2499 if (!block) {
56e93d26
JQ
2500 error_report("Ack, bad migration stream!");
2501 return NULL;
2502 }
4c4bad48 2503 return block;
56e93d26
JQ
2504 }
2505
2506 len = qemu_get_byte(f);
2507 qemu_get_buffer(f, (uint8_t *)id, len);
2508 id[len] = 0;
2509
e3dd7493 2510 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2511 if (!block) {
2512 error_report("Can't find block %s", id);
2513 return NULL;
56e93d26
JQ
2514 }
2515
4c4bad48
HZ
2516 return block;
2517}
2518
2519static inline void *host_from_ram_block_offset(RAMBlock *block,
2520 ram_addr_t offset)
2521{
2522 if (!offset_in_ramblock(block, offset)) {
2523 return NULL;
2524 }
2525
2526 return block->host + offset;
56e93d26
JQ
2527}
2528
3d0684b2
JQ
2529/**
2530 * ram_handle_compressed: handle the zero page case
2531 *
56e93d26
JQ
2532 * If a page (or a whole RDMA chunk) has been
2533 * determined to be zero, then zap it.
3d0684b2
JQ
2534 *
2535 * @host: host address for the zero page
2536 * @ch: what the page is filled from. We only support zero
2537 * @size: size of the zero page
56e93d26
JQ
2538 */
2539void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2540{
2541 if (ch != 0 || !is_zero_range(host, size)) {
2542 memset(host, ch, size);
2543 }
2544}
2545
797ca154
XG
2546/* return the size after decompression, or negative value on error */
2547static int
2548qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
2549 const uint8_t *source, size_t source_len)
2550{
2551 int err;
2552
2553 err = inflateReset(stream);
2554 if (err != Z_OK) {
2555 return -1;
2556 }
2557
2558 stream->avail_in = source_len;
2559 stream->next_in = (uint8_t *)source;
2560 stream->avail_out = dest_len;
2561 stream->next_out = dest;
2562
2563 err = inflate(stream, Z_NO_FLUSH);
2564 if (err != Z_STREAM_END) {
2565 return -1;
2566 }
2567
2568 return stream->total_out;
2569}
2570
56e93d26
JQ
2571static void *do_data_decompress(void *opaque)
2572{
2573 DecompressParam *param = opaque;
2574 unsigned long pagesize;
33d151f4 2575 uint8_t *des;
34ab9e97 2576 int len, ret;
56e93d26 2577
33d151f4 2578 qemu_mutex_lock(&param->mutex);
90e56fb4 2579 while (!param->quit) {
33d151f4
LL
2580 if (param->des) {
2581 des = param->des;
2582 len = param->len;
2583 param->des = 0;
2584 qemu_mutex_unlock(&param->mutex);
2585
56e93d26 2586 pagesize = TARGET_PAGE_SIZE;
34ab9e97
XG
2587
2588 ret = qemu_uncompress_data(&param->stream, des, pagesize,
2589 param->compbuf, len);
2590 if (ret < 0) {
2591 error_report("decompress data failed");
2592 qemu_file_set_error(decomp_file, ret);
2593 }
73a8912b 2594
33d151f4
LL
2595 qemu_mutex_lock(&decomp_done_lock);
2596 param->done = true;
2597 qemu_cond_signal(&decomp_done_cond);
2598 qemu_mutex_unlock(&decomp_done_lock);
2599
2600 qemu_mutex_lock(&param->mutex);
2601 } else {
2602 qemu_cond_wait(&param->cond, &param->mutex);
2603 }
56e93d26 2604 }
33d151f4 2605 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2606
2607 return NULL;
2608}
2609
34ab9e97 2610static int wait_for_decompress_done(void)
5533b2e9
LL
2611{
2612 int idx, thread_count;
2613
2614 if (!migrate_use_compression()) {
34ab9e97 2615 return 0;
5533b2e9
LL
2616 }
2617
2618 thread_count = migrate_decompress_threads();
2619 qemu_mutex_lock(&decomp_done_lock);
2620 for (idx = 0; idx < thread_count; idx++) {
2621 while (!decomp_param[idx].done) {
2622 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2623 }
2624 }
2625 qemu_mutex_unlock(&decomp_done_lock);
34ab9e97 2626 return qemu_file_get_error(decomp_file);
5533b2e9
LL
2627}
2628
f0afa331 2629static void compress_threads_load_cleanup(void)
56e93d26
JQ
2630{
2631 int i, thread_count;
2632
3416ab5b
JQ
2633 if (!migrate_use_compression()) {
2634 return;
2635 }
56e93d26
JQ
2636 thread_count = migrate_decompress_threads();
2637 for (i = 0; i < thread_count; i++) {
797ca154
XG
2638 /*
2639 * we use it as a indicator which shows if the thread is
2640 * properly init'd or not
2641 */
2642 if (!decomp_param[i].compbuf) {
2643 break;
2644 }
2645
56e93d26 2646 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2647 decomp_param[i].quit = true;
56e93d26
JQ
2648 qemu_cond_signal(&decomp_param[i].cond);
2649 qemu_mutex_unlock(&decomp_param[i].mutex);
2650 }
2651 for (i = 0; i < thread_count; i++) {
797ca154
XG
2652 if (!decomp_param[i].compbuf) {
2653 break;
2654 }
2655
56e93d26
JQ
2656 qemu_thread_join(decompress_threads + i);
2657 qemu_mutex_destroy(&decomp_param[i].mutex);
2658 qemu_cond_destroy(&decomp_param[i].cond);
797ca154 2659 inflateEnd(&decomp_param[i].stream);
56e93d26 2660 g_free(decomp_param[i].compbuf);
797ca154 2661 decomp_param[i].compbuf = NULL;
56e93d26
JQ
2662 }
2663 g_free(decompress_threads);
2664 g_free(decomp_param);
56e93d26
JQ
2665 decompress_threads = NULL;
2666 decomp_param = NULL;
34ab9e97 2667 decomp_file = NULL;
56e93d26
JQ
2668}
2669
34ab9e97 2670static int compress_threads_load_setup(QEMUFile *f)
797ca154
XG
2671{
2672 int i, thread_count;
2673
2674 if (!migrate_use_compression()) {
2675 return 0;
2676 }
2677
2678 thread_count = migrate_decompress_threads();
2679 decompress_threads = g_new0(QemuThread, thread_count);
2680 decomp_param = g_new0(DecompressParam, thread_count);
2681 qemu_mutex_init(&decomp_done_lock);
2682 qemu_cond_init(&decomp_done_cond);
34ab9e97 2683 decomp_file = f;
797ca154
XG
2684 for (i = 0; i < thread_count; i++) {
2685 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
2686 goto exit;
2687 }
2688
2689 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2690 qemu_mutex_init(&decomp_param[i].mutex);
2691 qemu_cond_init(&decomp_param[i].cond);
2692 decomp_param[i].done = true;
2693 decomp_param[i].quit = false;
2694 qemu_thread_create(decompress_threads + i, "decompress",
2695 do_data_decompress, decomp_param + i,
2696 QEMU_THREAD_JOINABLE);
2697 }
2698 return 0;
2699exit:
2700 compress_threads_load_cleanup();
2701 return -1;
2702}
2703
c1bc6626 2704static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2705 void *host, int len)
2706{
2707 int idx, thread_count;
2708
2709 thread_count = migrate_decompress_threads();
73a8912b 2710 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2711 while (true) {
2712 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2713 if (decomp_param[idx].done) {
33d151f4
LL
2714 decomp_param[idx].done = false;
2715 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2716 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2717 decomp_param[idx].des = host;
2718 decomp_param[idx].len = len;
33d151f4
LL
2719 qemu_cond_signal(&decomp_param[idx].cond);
2720 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2721 break;
2722 }
2723 }
2724 if (idx < thread_count) {
2725 break;
73a8912b
LL
2726 } else {
2727 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2728 }
2729 }
73a8912b 2730 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2731}
2732
f265e0e4
JQ
2733/**
2734 * ram_load_setup: Setup RAM for migration incoming side
2735 *
2736 * Returns zero to indicate success and negative for error
2737 *
2738 * @f: QEMUFile where to receive the data
2739 * @opaque: RAMState pointer
2740 */
2741static int ram_load_setup(QEMUFile *f, void *opaque)
2742{
34ab9e97 2743 if (compress_threads_load_setup(f)) {
797ca154
XG
2744 return -1;
2745 }
2746
f265e0e4 2747 xbzrle_load_setup();
f9494614 2748 ramblock_recv_map_init();
f265e0e4
JQ
2749 return 0;
2750}
2751
2752static int ram_load_cleanup(void *opaque)
2753{
f9494614 2754 RAMBlock *rb;
f265e0e4 2755 xbzrle_load_cleanup();
f0afa331 2756 compress_threads_load_cleanup();
f9494614
AP
2757
2758 RAMBLOCK_FOREACH(rb) {
2759 g_free(rb->receivedmap);
2760 rb->receivedmap = NULL;
2761 }
f265e0e4
JQ
2762 return 0;
2763}
2764
3d0684b2
JQ
2765/**
2766 * ram_postcopy_incoming_init: allocate postcopy data structures
2767 *
2768 * Returns 0 for success and negative if there was one error
2769 *
2770 * @mis: current migration incoming state
2771 *
2772 * Allocate data structures etc needed by incoming migration with
2773 * postcopy-ram. postcopy-ram's similarly names
2774 * postcopy_ram_incoming_init does the work.
1caddf8a
DDAG
2775 */
2776int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2777{
b8c48993 2778 unsigned long ram_pages = last_ram_page();
1caddf8a
DDAG
2779
2780 return postcopy_ram_incoming_init(mis, ram_pages);
2781}
2782
3d0684b2
JQ
2783/**
2784 * ram_load_postcopy: load a page in postcopy case
2785 *
2786 * Returns 0 for success or -errno in case of error
2787 *
a7180877
DDAG
2788 * Called in postcopy mode by ram_load().
2789 * rcu_read_lock is taken prior to this being called.
3d0684b2
JQ
2790 *
2791 * @f: QEMUFile where to send the data
a7180877
DDAG
2792 */
2793static int ram_load_postcopy(QEMUFile *f)
2794{
2795 int flags = 0, ret = 0;
2796 bool place_needed = false;
28abd200 2797 bool matching_page_sizes = false;
a7180877
DDAG
2798 MigrationIncomingState *mis = migration_incoming_get_current();
2799 /* Temporary page that is later 'placed' */
2800 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2801 void *last_host = NULL;
a3b6ff6d 2802 bool all_zero = false;
a7180877
DDAG
2803
2804 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2805 ram_addr_t addr;
2806 void *host = NULL;
2807 void *page_buffer = NULL;
2808 void *place_source = NULL;
df9ff5e1 2809 RAMBlock *block = NULL;
a7180877 2810 uint8_t ch;
a7180877
DDAG
2811
2812 addr = qemu_get_be64(f);
7a9ddfbf
PX
2813
2814 /*
2815 * If qemu file error, we should stop here, and then "addr"
2816 * may be invalid
2817 */
2818 ret = qemu_file_get_error(f);
2819 if (ret) {
2820 break;
2821 }
2822
a7180877
DDAG
2823 flags = addr & ~TARGET_PAGE_MASK;
2824 addr &= TARGET_PAGE_MASK;
2825
2826 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2827 place_needed = false;
bb890ed5 2828 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2829 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2830
2831 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2832 if (!host) {
2833 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2834 ret = -EINVAL;
2835 break;
2836 }
28abd200 2837 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2838 /*
28abd200
DDAG
2839 * Postcopy requires that we place whole host pages atomically;
2840 * these may be huge pages for RAMBlocks that are backed by
2841 * hugetlbfs.
a7180877
DDAG
2842 * To make it atomic, the data is read into a temporary page
2843 * that's moved into place later.
2844 * The migration protocol uses, possibly smaller, target-pages
2845 * however the source ensures it always sends all the components
2846 * of a host page in order.
2847 */
2848 page_buffer = postcopy_host_page +
28abd200 2849 ((uintptr_t)host & (block->page_size - 1));
a7180877 2850 /* If all TP are zero then we can optimise the place */
28abd200 2851 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2852 all_zero = true;
c53b7ddc
DDAG
2853 } else {
2854 /* not the 1st TP within the HP */
2855 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2856 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2857 host, last_host);
2858 ret = -EINVAL;
2859 break;
2860 }
a7180877
DDAG
2861 }
2862
c53b7ddc 2863
a7180877
DDAG
2864 /*
2865 * If it's the last part of a host page then we place the host
2866 * page
2867 */
2868 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2869 (block->page_size - 1)) == 0;
a7180877
DDAG
2870 place_source = postcopy_host_page;
2871 }
c53b7ddc 2872 last_host = host;
a7180877
DDAG
2873
2874 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
bb890ed5 2875 case RAM_SAVE_FLAG_ZERO:
a7180877
DDAG
2876 ch = qemu_get_byte(f);
2877 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2878 if (ch) {
2879 all_zero = false;
2880 }
2881 break;
2882
2883 case RAM_SAVE_FLAG_PAGE:
2884 all_zero = false;
2885 if (!place_needed || !matching_page_sizes) {
2886 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2887 } else {
2888 /* Avoids the qemu_file copy during postcopy, which is
2889 * going to do a copy later; can only do it when we
2890 * do this read in one go (matching page sizes)
2891 */
2892 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2893 TARGET_PAGE_SIZE);
2894 }
2895 break;
2896 case RAM_SAVE_FLAG_EOS:
2897 /* normal exit */
2898 break;
2899 default:
2900 error_report("Unknown combination of migration flags: %#x"
2901 " (postcopy mode)", flags);
2902 ret = -EINVAL;
7a9ddfbf
PX
2903 break;
2904 }
2905
2906 /* Detect for any possible file errors */
2907 if (!ret && qemu_file_get_error(f)) {
2908 ret = qemu_file_get_error(f);
a7180877
DDAG
2909 }
2910
7a9ddfbf 2911 if (!ret && place_needed) {
a7180877 2912 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2913 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2914
a7180877 2915 if (all_zero) {
df9ff5e1 2916 ret = postcopy_place_page_zero(mis, place_dest,
8be4620b 2917 block);
a7180877 2918 } else {
df9ff5e1 2919 ret = postcopy_place_page(mis, place_dest,
8be4620b 2920 place_source, block);
a7180877
DDAG
2921 }
2922 }
a7180877
DDAG
2923 }
2924
2925 return ret;
2926}
2927
acab30b8
DHB
2928static bool postcopy_is_advised(void)
2929{
2930 PostcopyState ps = postcopy_state_get();
2931 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2932}
2933
2934static bool postcopy_is_running(void)
2935{
2936 PostcopyState ps = postcopy_state_get();
2937 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
2938}
2939
56e93d26
JQ
2940static int ram_load(QEMUFile *f, void *opaque, int version_id)
2941{
edc60127 2942 int flags = 0, ret = 0, invalid_flags = 0;
56e93d26
JQ
2943 static uint64_t seq_iter;
2944 int len = 0;
a7180877
DDAG
2945 /*
2946 * If system is running in postcopy mode, page inserts to host memory must
2947 * be atomic
2948 */
acab30b8 2949 bool postcopy_running = postcopy_is_running();
ef08fb38 2950 /* ADVISE is earlier, it shows the source has the postcopy capability on */
acab30b8 2951 bool postcopy_advised = postcopy_is_advised();
56e93d26
JQ
2952
2953 seq_iter++;
2954
2955 if (version_id != 4) {
2956 ret = -EINVAL;
2957 }
2958
edc60127
JQ
2959 if (!migrate_use_compression()) {
2960 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
2961 }
56e93d26
JQ
2962 /* This RCU critical section can be very long running.
2963 * When RCU reclaims in the code start to become numerous,
2964 * it will be necessary to reduce the granularity of this
2965 * critical section.
2966 */
2967 rcu_read_lock();
a7180877
DDAG
2968
2969 if (postcopy_running) {
2970 ret = ram_load_postcopy(f);
2971 }
2972
2973 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2974 ram_addr_t addr, total_ram_bytes;
a776aa15 2975 void *host = NULL;
56e93d26
JQ
2976 uint8_t ch;
2977
2978 addr = qemu_get_be64(f);
2979 flags = addr & ~TARGET_PAGE_MASK;
2980 addr &= TARGET_PAGE_MASK;
2981
edc60127
JQ
2982 if (flags & invalid_flags) {
2983 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
2984 error_report("Received an unexpected compressed page");
2985 }
2986
2987 ret = -EINVAL;
2988 break;
2989 }
2990
bb890ed5 2991 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
a776aa15 2992 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2993 RAMBlock *block = ram_block_from_stream(f, flags);
2994
2995 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2996 if (!host) {
2997 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2998 ret = -EINVAL;
2999 break;
3000 }
f9494614 3001 ramblock_recv_bitmap_set(block, host);
1db9d8e5 3002 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
a776aa15
DDAG
3003 }
3004
56e93d26
JQ
3005 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3006 case RAM_SAVE_FLAG_MEM_SIZE:
3007 /* Synchronize RAM block list */
3008 total_ram_bytes = addr;
3009 while (!ret && total_ram_bytes) {
3010 RAMBlock *block;
56e93d26
JQ
3011 char id[256];
3012 ram_addr_t length;
3013
3014 len = qemu_get_byte(f);
3015 qemu_get_buffer(f, (uint8_t *)id, len);
3016 id[len] = 0;
3017 length = qemu_get_be64(f);
3018
e3dd7493
DDAG
3019 block = qemu_ram_block_by_name(id);
3020 if (block) {
3021 if (length != block->used_length) {
3022 Error *local_err = NULL;
56e93d26 3023
fa53a0e5 3024 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
3025 &local_err);
3026 if (local_err) {
3027 error_report_err(local_err);
56e93d26 3028 }
56e93d26 3029 }
ef08fb38
DDAG
3030 /* For postcopy we need to check hugepage sizes match */
3031 if (postcopy_advised &&
3032 block->page_size != qemu_host_page_size) {
3033 uint64_t remote_page_size = qemu_get_be64(f);
3034 if (remote_page_size != block->page_size) {
3035 error_report("Mismatched RAM page size %s "
3036 "(local) %zd != %" PRId64,
3037 id, block->page_size,
3038 remote_page_size);
3039 ret = -EINVAL;
3040 }
3041 }
e3dd7493
DDAG
3042 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3043 block->idstr);
3044 } else {
56e93d26
JQ
3045 error_report("Unknown ramblock \"%s\", cannot "
3046 "accept migration", id);
3047 ret = -EINVAL;
3048 }
3049
3050 total_ram_bytes -= length;
3051 }
3052 break;
a776aa15 3053
bb890ed5 3054 case RAM_SAVE_FLAG_ZERO:
56e93d26
JQ
3055 ch = qemu_get_byte(f);
3056 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3057 break;
a776aa15 3058
56e93d26 3059 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
3060 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3061 break;
56e93d26 3062
a776aa15 3063 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
3064 len = qemu_get_be32(f);
3065 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3066 error_report("Invalid compressed data length: %d", len);
3067 ret = -EINVAL;
3068 break;
3069 }
c1bc6626 3070 decompress_data_with_multi_threads(f, host, len);
56e93d26 3071 break;
a776aa15 3072
56e93d26 3073 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
3074 if (load_xbzrle(f, addr, host) < 0) {
3075 error_report("Failed to decompress XBZRLE page at "
3076 RAM_ADDR_FMT, addr);
3077 ret = -EINVAL;
3078 break;
3079 }
3080 break;
3081 case RAM_SAVE_FLAG_EOS:
3082 /* normal exit */
3083 break;
3084 default:
3085 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 3086 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
3087 } else {
3088 error_report("Unknown combination of migration flags: %#x",
3089 flags);
3090 ret = -EINVAL;
3091 }
3092 }
3093 if (!ret) {
3094 ret = qemu_file_get_error(f);
3095 }
3096 }
3097
34ab9e97 3098 ret |= wait_for_decompress_done();
56e93d26 3099 rcu_read_unlock();
55c4446b 3100 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
3101 return ret;
3102}
3103
c6467627
VSO
3104static bool ram_has_postcopy(void *opaque)
3105{
3106 return migrate_postcopy_ram();
3107}
3108
56e93d26 3109static SaveVMHandlers savevm_ram_handlers = {
9907e842 3110 .save_setup = ram_save_setup,
56e93d26 3111 .save_live_iterate = ram_save_iterate,
763c906b 3112 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 3113 .save_live_complete_precopy = ram_save_complete,
c6467627 3114 .has_postcopy = ram_has_postcopy,
56e93d26
JQ
3115 .save_live_pending = ram_save_pending,
3116 .load_state = ram_load,
f265e0e4
JQ
3117 .save_cleanup = ram_save_cleanup,
3118 .load_setup = ram_load_setup,
3119 .load_cleanup = ram_load_cleanup,
56e93d26
JQ
3120};
3121
3122void ram_mig_init(void)
3123{
3124 qemu_mutex_init(&XBZRLE.lock);
6f37bb8b 3125 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
56e93d26 3126}