]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
migration: terminate_* can be called for other threads
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
e688df6b 28
1393a485 29#include "qemu/osdep.h"
33c11879 30#include "cpu.h"
56e93d26 31#include <zlib.h>
f348b6d1 32#include "qemu/cutils.h"
56e93d26
JQ
33#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
7205c9ec 35#include "qemu/main-loop.h"
709e3fe8 36#include "xbzrle.h"
7b1e1a22 37#include "ram.h"
6666c96a 38#include "migration.h"
71bb07db 39#include "socket.h"
f2a8f0a6 40#include "migration/register.h"
7b1e1a22 41#include "migration/misc.h"
08a0aee1 42#include "qemu-file.h"
be07b0ac 43#include "postcopy-ram.h"
56e93d26 44#include "migration/page_cache.h"
56e93d26 45#include "qemu/error-report.h"
e688df6b 46#include "qapi/error.h"
9af23989 47#include "qapi/qapi-events-migration.h"
8acabf69 48#include "qapi/qmp/qerror.h"
56e93d26 49#include "trace.h"
56e93d26 50#include "exec/ram_addr.h"
f9494614 51#include "exec/target_page.h"
56e93d26 52#include "qemu/rcu_queue.h"
a91246c9 53#include "migration/colo.h"
9ac78b61 54#include "migration/block.h"
56e93d26 55
56e93d26
JQ
56/***********************************************************/
57/* ram save/restore */
58
bb890ed5
JQ
59/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
60 * worked for pages that where filled with the same char. We switched
61 * it to only search for the zero value. And to avoid confusion with
62 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
63 */
64
56e93d26 65#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
bb890ed5 66#define RAM_SAVE_FLAG_ZERO 0x02
56e93d26
JQ
67#define RAM_SAVE_FLAG_MEM_SIZE 0x04
68#define RAM_SAVE_FLAG_PAGE 0x08
69#define RAM_SAVE_FLAG_EOS 0x10
70#define RAM_SAVE_FLAG_CONTINUE 0x20
71#define RAM_SAVE_FLAG_XBZRLE 0x40
72/* 0x80 is reserved in migration.h start with 0x100 next */
73#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
74
56e93d26
JQ
75static inline bool is_zero_range(uint8_t *p, uint64_t size)
76{
a1febc49 77 return buffer_is_zero(p, size);
56e93d26
JQ
78}
79
9360447d
JQ
80XBZRLECacheStats xbzrle_counters;
81
56e93d26
JQ
82/* struct contains XBZRLE cache and a static page
83 used by the compression */
84static struct {
85 /* buffer used for XBZRLE encoding */
86 uint8_t *encoded_buf;
87 /* buffer for storing page content */
88 uint8_t *current_buf;
89 /* Cache for XBZRLE, Protected by lock. */
90 PageCache *cache;
91 QemuMutex lock;
c00e0928
JQ
92 /* it will store a page full of zeros */
93 uint8_t *zero_target_page;
f265e0e4
JQ
94 /* buffer used for XBZRLE decoding */
95 uint8_t *decoded_buf;
56e93d26
JQ
96} XBZRLE;
97
56e93d26
JQ
98static void XBZRLE_cache_lock(void)
99{
100 if (migrate_use_xbzrle())
101 qemu_mutex_lock(&XBZRLE.lock);
102}
103
104static void XBZRLE_cache_unlock(void)
105{
106 if (migrate_use_xbzrle())
107 qemu_mutex_unlock(&XBZRLE.lock);
108}
109
3d0684b2
JQ
110/**
111 * xbzrle_cache_resize: resize the xbzrle cache
112 *
113 * This function is called from qmp_migrate_set_cache_size in main
114 * thread, possibly while a migration is in progress. A running
115 * migration may be using the cache and might finish during this call,
116 * hence changes to the cache are protected by XBZRLE.lock().
117 *
c9dede2d 118 * Returns 0 for success or -1 for error
3d0684b2
JQ
119 *
120 * @new_size: new cache size
8acabf69 121 * @errp: set *errp if the check failed, with reason
56e93d26 122 */
c9dede2d 123int xbzrle_cache_resize(int64_t new_size, Error **errp)
56e93d26
JQ
124{
125 PageCache *new_cache;
c9dede2d 126 int64_t ret = 0;
56e93d26 127
8acabf69
JQ
128 /* Check for truncation */
129 if (new_size != (size_t)new_size) {
130 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
131 "exceeding address space");
132 return -1;
133 }
134
2a313e5c
JQ
135 if (new_size == migrate_xbzrle_cache_size()) {
136 /* nothing to do */
c9dede2d 137 return 0;
2a313e5c
JQ
138 }
139
56e93d26
JQ
140 XBZRLE_cache_lock();
141
142 if (XBZRLE.cache != NULL) {
80f8dfde 143 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
56e93d26 144 if (!new_cache) {
56e93d26
JQ
145 ret = -1;
146 goto out;
147 }
148
149 cache_fini(XBZRLE.cache);
150 XBZRLE.cache = new_cache;
151 }
56e93d26
JQ
152out:
153 XBZRLE_cache_unlock();
154 return ret;
155}
156
f9494614
AP
157static void ramblock_recv_map_init(void)
158{
159 RAMBlock *rb;
160
161 RAMBLOCK_FOREACH(rb) {
162 assert(!rb->receivedmap);
163 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
164 }
165}
166
167int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
168{
169 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
170 rb->receivedmap);
171}
172
1cba9f6e
DDAG
173bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
174{
175 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
176}
177
f9494614
AP
178void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
179{
180 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
181}
182
183void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
184 size_t nr)
185{
186 bitmap_set_atomic(rb->receivedmap,
187 ramblock_recv_bitmap_offset(host_addr, rb),
188 nr);
189}
190
ec481c6c
JQ
191/*
192 * An outstanding page request, on the source, having been received
193 * and queued
194 */
195struct RAMSrcPageRequest {
196 RAMBlock *rb;
197 hwaddr offset;
198 hwaddr len;
199
200 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
201};
202
6f37bb8b
JQ
203/* State of RAM for migration */
204struct RAMState {
204b88b8
JQ
205 /* QEMUFile used for this migration */
206 QEMUFile *f;
6f37bb8b
JQ
207 /* Last block that we have visited searching for dirty pages */
208 RAMBlock *last_seen_block;
209 /* Last block from where we have sent data */
210 RAMBlock *last_sent_block;
269ace29
JQ
211 /* Last dirty target page we have sent */
212 ram_addr_t last_page;
6f37bb8b
JQ
213 /* last ram version we have seen */
214 uint32_t last_version;
215 /* We are in the first round */
216 bool ram_bulk_stage;
8d820d6f
JQ
217 /* How many times we have dirty too many pages */
218 int dirty_rate_high_cnt;
f664da80
JQ
219 /* these variables are used for bitmap sync */
220 /* last time we did a full bitmap_sync */
221 int64_t time_last_bitmap_sync;
eac74159 222 /* bytes transferred at start_time */
c4bdf0cf 223 uint64_t bytes_xfer_prev;
a66cd90c 224 /* number of dirty pages since start_time */
68908ed6 225 uint64_t num_dirty_pages_period;
b5833fde
JQ
226 /* xbzrle misses since the beginning of the period */
227 uint64_t xbzrle_cache_miss_prev;
36040d9c
JQ
228 /* number of iterations at the beginning of period */
229 uint64_t iterations_prev;
23b28c3c
JQ
230 /* Iterations since start */
231 uint64_t iterations;
9360447d 232 /* number of dirty bits in the bitmap */
2dfaf12e
PX
233 uint64_t migration_dirty_pages;
234 /* protects modification of the bitmap */
108cfae0 235 QemuMutex bitmap_mutex;
68a098f3
JQ
236 /* The RAMBlock used in the last src_page_requests */
237 RAMBlock *last_req_rb;
ec481c6c
JQ
238 /* Queue of outstanding page requests from the destination */
239 QemuMutex src_page_req_mutex;
240 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
6f37bb8b
JQ
241};
242typedef struct RAMState RAMState;
243
53518d94 244static RAMState *ram_state;
6f37bb8b 245
9edabd4d 246uint64_t ram_bytes_remaining(void)
2f4fde93 247{
bae416e5
DDAG
248 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
249 0;
2f4fde93
JQ
250}
251
9360447d 252MigrationStats ram_counters;
96506894 253
b8fb8cb7
DDAG
254/* used by the search for pages to send */
255struct PageSearchStatus {
256 /* Current block being searched */
257 RAMBlock *block;
a935e30f
JQ
258 /* Current page to search from */
259 unsigned long page;
b8fb8cb7
DDAG
260 /* Set once we wrap around */
261 bool complete_round;
262};
263typedef struct PageSearchStatus PageSearchStatus;
264
56e93d26 265struct CompressParam {
56e93d26 266 bool done;
90e56fb4 267 bool quit;
56e93d26
JQ
268 QEMUFile *file;
269 QemuMutex mutex;
270 QemuCond cond;
271 RAMBlock *block;
272 ram_addr_t offset;
34ab9e97
XG
273
274 /* internally used fields */
dcaf446e 275 z_stream stream;
34ab9e97 276 uint8_t *originbuf;
56e93d26
JQ
277};
278typedef struct CompressParam CompressParam;
279
280struct DecompressParam {
73a8912b 281 bool done;
90e56fb4 282 bool quit;
56e93d26
JQ
283 QemuMutex mutex;
284 QemuCond cond;
285 void *des;
d341d9f3 286 uint8_t *compbuf;
56e93d26 287 int len;
797ca154 288 z_stream stream;
56e93d26
JQ
289};
290typedef struct DecompressParam DecompressParam;
291
292static CompressParam *comp_param;
293static QemuThread *compress_threads;
294/* comp_done_cond is used to wake up the migration thread when
295 * one of the compression threads has finished the compression.
296 * comp_done_lock is used to co-work with comp_done_cond.
297 */
0d9f9a5c
LL
298static QemuMutex comp_done_lock;
299static QemuCond comp_done_cond;
56e93d26
JQ
300/* The empty QEMUFileOps will be used by file in CompressParam */
301static const QEMUFileOps empty_ops = { };
302
34ab9e97 303static QEMUFile *decomp_file;
56e93d26
JQ
304static DecompressParam *decomp_param;
305static QemuThread *decompress_threads;
73a8912b
LL
306static QemuMutex decomp_done_lock;
307static QemuCond decomp_done_cond;
56e93d26 308
dcaf446e 309static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
34ab9e97 310 ram_addr_t offset, uint8_t *source_buf);
56e93d26
JQ
311
312static void *do_data_compress(void *opaque)
313{
314 CompressParam *param = opaque;
a7a9a88f
LL
315 RAMBlock *block;
316 ram_addr_t offset;
56e93d26 317
a7a9a88f 318 qemu_mutex_lock(&param->mutex);
90e56fb4 319 while (!param->quit) {
a7a9a88f
LL
320 if (param->block) {
321 block = param->block;
322 offset = param->offset;
323 param->block = NULL;
324 qemu_mutex_unlock(&param->mutex);
325
34ab9e97
XG
326 do_compress_ram_page(param->file, &param->stream, block, offset,
327 param->originbuf);
a7a9a88f 328
0d9f9a5c 329 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 330 param->done = true;
0d9f9a5c
LL
331 qemu_cond_signal(&comp_done_cond);
332 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
333
334 qemu_mutex_lock(&param->mutex);
335 } else {
56e93d26
JQ
336 qemu_cond_wait(&param->cond, &param->mutex);
337 }
56e93d26 338 }
a7a9a88f 339 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
340
341 return NULL;
342}
343
344static inline void terminate_compression_threads(void)
345{
346 int idx, thread_count;
347
348 thread_count = migrate_compress_threads();
3d0684b2 349
56e93d26
JQ
350 for (idx = 0; idx < thread_count; idx++) {
351 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 352 comp_param[idx].quit = true;
56e93d26
JQ
353 qemu_cond_signal(&comp_param[idx].cond);
354 qemu_mutex_unlock(&comp_param[idx].mutex);
355 }
356}
357
f0afa331 358static void compress_threads_save_cleanup(void)
56e93d26
JQ
359{
360 int i, thread_count;
361
362 if (!migrate_use_compression()) {
363 return;
364 }
365 terminate_compression_threads();
366 thread_count = migrate_compress_threads();
367 for (i = 0; i < thread_count; i++) {
dcaf446e
XG
368 /*
369 * we use it as a indicator which shows if the thread is
370 * properly init'd or not
371 */
372 if (!comp_param[i].file) {
373 break;
374 }
56e93d26 375 qemu_thread_join(compress_threads + i);
56e93d26
JQ
376 qemu_mutex_destroy(&comp_param[i].mutex);
377 qemu_cond_destroy(&comp_param[i].cond);
dcaf446e 378 deflateEnd(&comp_param[i].stream);
34ab9e97 379 g_free(comp_param[i].originbuf);
dcaf446e
XG
380 qemu_fclose(comp_param[i].file);
381 comp_param[i].file = NULL;
56e93d26 382 }
0d9f9a5c
LL
383 qemu_mutex_destroy(&comp_done_lock);
384 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
385 g_free(compress_threads);
386 g_free(comp_param);
56e93d26
JQ
387 compress_threads = NULL;
388 comp_param = NULL;
56e93d26
JQ
389}
390
dcaf446e 391static int compress_threads_save_setup(void)
56e93d26
JQ
392{
393 int i, thread_count;
394
395 if (!migrate_use_compression()) {
dcaf446e 396 return 0;
56e93d26 397 }
56e93d26
JQ
398 thread_count = migrate_compress_threads();
399 compress_threads = g_new0(QemuThread, thread_count);
400 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
401 qemu_cond_init(&comp_done_cond);
402 qemu_mutex_init(&comp_done_lock);
56e93d26 403 for (i = 0; i < thread_count; i++) {
34ab9e97
XG
404 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
405 if (!comp_param[i].originbuf) {
406 goto exit;
407 }
408
dcaf446e
XG
409 if (deflateInit(&comp_param[i].stream,
410 migrate_compress_level()) != Z_OK) {
34ab9e97 411 g_free(comp_param[i].originbuf);
dcaf446e
XG
412 goto exit;
413 }
414
e110aa91
C
415 /* comp_param[i].file is just used as a dummy buffer to save data,
416 * set its ops to empty.
56e93d26
JQ
417 */
418 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
419 comp_param[i].done = true;
90e56fb4 420 comp_param[i].quit = false;
56e93d26
JQ
421 qemu_mutex_init(&comp_param[i].mutex);
422 qemu_cond_init(&comp_param[i].cond);
423 qemu_thread_create(compress_threads + i, "compress",
424 do_data_compress, comp_param + i,
425 QEMU_THREAD_JOINABLE);
426 }
dcaf446e
XG
427 return 0;
428
429exit:
430 compress_threads_save_cleanup();
431 return -1;
56e93d26
JQ
432}
433
f986c3d2
JQ
434/* Multiple fd's */
435
436struct MultiFDSendParams {
437 uint8_t id;
438 char *name;
439 QemuThread thread;
440 QemuSemaphore sem;
441 QemuMutex mutex;
66770707 442 bool running;
f986c3d2
JQ
443 bool quit;
444};
445typedef struct MultiFDSendParams MultiFDSendParams;
446
447struct {
448 MultiFDSendParams *params;
449 /* number of created threads */
450 int count;
451} *multifd_send_state;
452
66770707 453static void multifd_send_terminate_threads(Error *err)
f986c3d2
JQ
454{
455 int i;
456
7a169d74
JQ
457 if (err) {
458 MigrationState *s = migrate_get_current();
459 migrate_set_error(s, err);
460 if (s->state == MIGRATION_STATUS_SETUP ||
461 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
462 s->state == MIGRATION_STATUS_DEVICE ||
463 s->state == MIGRATION_STATUS_ACTIVE) {
464 migrate_set_state(&s->state, s->state,
465 MIGRATION_STATUS_FAILED);
466 }
467 }
468
66770707 469 for (i = 0; i < migrate_multifd_channels(); i++) {
f986c3d2
JQ
470 MultiFDSendParams *p = &multifd_send_state->params[i];
471
472 qemu_mutex_lock(&p->mutex);
473 p->quit = true;
474 qemu_sem_post(&p->sem);
475 qemu_mutex_unlock(&p->mutex);
476 }
477}
478
479int multifd_save_cleanup(Error **errp)
480{
481 int i;
482 int ret = 0;
483
484 if (!migrate_use_multifd()) {
485 return 0;
486 }
66770707
JQ
487 multifd_send_terminate_threads(NULL);
488 for (i = 0; i < migrate_multifd_channels(); i++) {
f986c3d2
JQ
489 MultiFDSendParams *p = &multifd_send_state->params[i];
490
66770707
JQ
491 if (p->running) {
492 qemu_thread_join(&p->thread);
493 }
f986c3d2
JQ
494 qemu_mutex_destroy(&p->mutex);
495 qemu_sem_destroy(&p->sem);
496 g_free(p->name);
497 p->name = NULL;
498 }
499 g_free(multifd_send_state->params);
500 multifd_send_state->params = NULL;
501 g_free(multifd_send_state);
502 multifd_send_state = NULL;
503 return ret;
504}
505
506static void *multifd_send_thread(void *opaque)
507{
508 MultiFDSendParams *p = opaque;
509
510 while (true) {
511 qemu_mutex_lock(&p->mutex);
512 if (p->quit) {
513 qemu_mutex_unlock(&p->mutex);
514 break;
515 }
516 qemu_mutex_unlock(&p->mutex);
517 qemu_sem_wait(&p->sem);
518 }
519
66770707
JQ
520 qemu_mutex_lock(&p->mutex);
521 p->running = false;
522 qemu_mutex_unlock(&p->mutex);
523
f986c3d2
JQ
524 return NULL;
525}
526
527int multifd_save_setup(void)
528{
529 int thread_count;
530 uint8_t i;
531
532 if (!migrate_use_multifd()) {
533 return 0;
534 }
535 thread_count = migrate_multifd_channels();
536 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
537 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
66770707 538 atomic_set(&multifd_send_state->count, 0);
f986c3d2
JQ
539 for (i = 0; i < thread_count; i++) {
540 MultiFDSendParams *p = &multifd_send_state->params[i];
541
542 qemu_mutex_init(&p->mutex);
543 qemu_sem_init(&p->sem, 0);
544 p->quit = false;
545 p->id = i;
546 p->name = g_strdup_printf("multifdsend_%d", i);
66770707 547 p->running = true;
f986c3d2
JQ
548 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
549 QEMU_THREAD_JOINABLE);
550
66770707 551 atomic_inc(&multifd_send_state->count);
f986c3d2
JQ
552 }
553 return 0;
554}
555
556struct MultiFDRecvParams {
557 uint8_t id;
558 char *name;
559 QemuThread thread;
560 QemuSemaphore sem;
561 QemuMutex mutex;
66770707 562 bool running;
f986c3d2
JQ
563 bool quit;
564};
565typedef struct MultiFDRecvParams MultiFDRecvParams;
566
567struct {
568 MultiFDRecvParams *params;
569 /* number of created threads */
570 int count;
571} *multifd_recv_state;
572
66770707 573static void multifd_recv_terminate_threads(Error *err)
f986c3d2
JQ
574{
575 int i;
576
7a169d74
JQ
577 if (err) {
578 MigrationState *s = migrate_get_current();
579 migrate_set_error(s, err);
580 if (s->state == MIGRATION_STATUS_SETUP ||
581 s->state == MIGRATION_STATUS_ACTIVE) {
582 migrate_set_state(&s->state, s->state,
583 MIGRATION_STATUS_FAILED);
584 }
585 }
586
66770707 587 for (i = 0; i < migrate_multifd_channels(); i++) {
f986c3d2
JQ
588 MultiFDRecvParams *p = &multifd_recv_state->params[i];
589
590 qemu_mutex_lock(&p->mutex);
591 p->quit = true;
592 qemu_sem_post(&p->sem);
593 qemu_mutex_unlock(&p->mutex);
594 }
595}
596
597int multifd_load_cleanup(Error **errp)
598{
599 int i;
600 int ret = 0;
601
602 if (!migrate_use_multifd()) {
603 return 0;
604 }
66770707
JQ
605 multifd_recv_terminate_threads(NULL);
606 for (i = 0; i < migrate_multifd_channels(); i++) {
f986c3d2
JQ
607 MultiFDRecvParams *p = &multifd_recv_state->params[i];
608
66770707
JQ
609 if (p->running) {
610 qemu_thread_join(&p->thread);
611 }
f986c3d2
JQ
612 qemu_mutex_destroy(&p->mutex);
613 qemu_sem_destroy(&p->sem);
614 g_free(p->name);
615 p->name = NULL;
616 }
617 g_free(multifd_recv_state->params);
618 multifd_recv_state->params = NULL;
619 g_free(multifd_recv_state);
620 multifd_recv_state = NULL;
621
622 return ret;
623}
624
625static void *multifd_recv_thread(void *opaque)
626{
627 MultiFDRecvParams *p = opaque;
628
629 while (true) {
630 qemu_mutex_lock(&p->mutex);
631 if (p->quit) {
632 qemu_mutex_unlock(&p->mutex);
633 break;
634 }
635 qemu_mutex_unlock(&p->mutex);
636 qemu_sem_wait(&p->sem);
637 }
638
66770707
JQ
639 qemu_mutex_lock(&p->mutex);
640 p->running = false;
641 qemu_mutex_unlock(&p->mutex);
642
f986c3d2
JQ
643 return NULL;
644}
645
646int multifd_load_setup(void)
647{
648 int thread_count;
649 uint8_t i;
650
651 if (!migrate_use_multifd()) {
652 return 0;
653 }
654 thread_count = migrate_multifd_channels();
655 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
656 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
66770707 657 atomic_set(&multifd_recv_state->count, 0);
f986c3d2
JQ
658 for (i = 0; i < thread_count; i++) {
659 MultiFDRecvParams *p = &multifd_recv_state->params[i];
660
661 qemu_mutex_init(&p->mutex);
662 qemu_sem_init(&p->sem, 0);
663 p->quit = false;
664 p->id = i;
665 p->name = g_strdup_printf("multifdrecv_%d", i);
66770707 666 p->running = true;
f986c3d2
JQ
667 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
668 QEMU_THREAD_JOINABLE);
66770707 669 atomic_inc(&multifd_recv_state->count);
f986c3d2
JQ
670 }
671 return 0;
672}
673
71bb07db
JQ
674void multifd_recv_new_channel(QIOChannel *ioc)
675{
676 /* nothing to do yet */
677}
678
56e93d26 679/**
3d0684b2 680 * save_page_header: write page header to wire
56e93d26
JQ
681 *
682 * If this is the 1st block, it also writes the block identification
683 *
3d0684b2 684 * Returns the number of bytes written
56e93d26
JQ
685 *
686 * @f: QEMUFile where to send the data
687 * @block: block that contains the page we want to send
688 * @offset: offset inside the block for the page
689 * in the lower bits, it contains flags
690 */
2bf3aa85
JQ
691static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
692 ram_addr_t offset)
56e93d26 693{
9f5f380b 694 size_t size, len;
56e93d26 695
24795694
JQ
696 if (block == rs->last_sent_block) {
697 offset |= RAM_SAVE_FLAG_CONTINUE;
698 }
2bf3aa85 699 qemu_put_be64(f, offset);
56e93d26
JQ
700 size = 8;
701
702 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b 703 len = strlen(block->idstr);
2bf3aa85
JQ
704 qemu_put_byte(f, len);
705 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
9f5f380b 706 size += 1 + len;
24795694 707 rs->last_sent_block = block;
56e93d26
JQ
708 }
709 return size;
710}
711
3d0684b2
JQ
712/**
713 * mig_throttle_guest_down: throotle down the guest
714 *
715 * Reduce amount of guest cpu execution to hopefully slow down memory
716 * writes. If guest dirty memory rate is reduced below the rate at
717 * which we can transfer pages to the destination then we should be
718 * able to complete migration. Some workloads dirty memory way too
719 * fast and will not effectively converge, even with auto-converge.
070afca2
JH
720 */
721static void mig_throttle_guest_down(void)
722{
723 MigrationState *s = migrate_get_current();
2594f56d
DB
724 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
725 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
726
727 /* We have not started throttling yet. Let's start it. */
728 if (!cpu_throttle_active()) {
729 cpu_throttle_set(pct_initial);
730 } else {
731 /* Throttling already on, just increase the rate */
732 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
733 }
734}
735
3d0684b2
JQ
736/**
737 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
738 *
6f37bb8b 739 * @rs: current RAM state
3d0684b2
JQ
740 * @current_addr: address for the zero page
741 *
742 * Update the xbzrle cache to reflect a page that's been sent as all 0.
56e93d26
JQ
743 * The important thing is that a stale (not-yet-0'd) page be replaced
744 * by the new data.
745 * As a bonus, if the page wasn't in the cache it gets added so that
3d0684b2 746 * when a small write is made into the 0'd page it gets XBZRLE sent.
56e93d26 747 */
6f37bb8b 748static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
56e93d26 749{
6f37bb8b 750 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
56e93d26
JQ
751 return;
752 }
753
754 /* We don't care if this fails to allocate a new cache page
755 * as long as it updated an old one */
c00e0928 756 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
9360447d 757 ram_counters.dirty_sync_count);
56e93d26
JQ
758}
759
760#define ENCODING_FLAG_XBZRLE 0x1
761
762/**
763 * save_xbzrle_page: compress and send current page
764 *
765 * Returns: 1 means that we wrote the page
766 * 0 means that page is identical to the one already sent
767 * -1 means that xbzrle would be longer than normal
768 *
5a987738 769 * @rs: current RAM state
3d0684b2
JQ
770 * @current_data: pointer to the address of the page contents
771 * @current_addr: addr of the page
56e93d26
JQ
772 * @block: block that contains the page we want to send
773 * @offset: offset inside the block for the page
774 * @last_stage: if we are at the completion stage
56e93d26 775 */
204b88b8 776static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
56e93d26 777 ram_addr_t current_addr, RAMBlock *block,
072c2511 778 ram_addr_t offset, bool last_stage)
56e93d26
JQ
779{
780 int encoded_len = 0, bytes_xbzrle;
781 uint8_t *prev_cached_page;
782
9360447d
JQ
783 if (!cache_is_cached(XBZRLE.cache, current_addr,
784 ram_counters.dirty_sync_count)) {
785 xbzrle_counters.cache_miss++;
56e93d26
JQ
786 if (!last_stage) {
787 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
9360447d 788 ram_counters.dirty_sync_count) == -1) {
56e93d26
JQ
789 return -1;
790 } else {
791 /* update *current_data when the page has been
792 inserted into cache */
793 *current_data = get_cached_data(XBZRLE.cache, current_addr);
794 }
795 }
796 return -1;
797 }
798
799 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
800
801 /* save current buffer into memory */
802 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
803
804 /* XBZRLE encoding (if there is no overflow) */
805 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
806 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
807 TARGET_PAGE_SIZE);
808 if (encoded_len == 0) {
55c4446b 809 trace_save_xbzrle_page_skipping();
56e93d26
JQ
810 return 0;
811 } else if (encoded_len == -1) {
55c4446b 812 trace_save_xbzrle_page_overflow();
9360447d 813 xbzrle_counters.overflow++;
56e93d26
JQ
814 /* update data in the cache */
815 if (!last_stage) {
816 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
817 *current_data = prev_cached_page;
818 }
819 return -1;
820 }
821
822 /* we need to update the data in the cache, in order to get the same data */
823 if (!last_stage) {
824 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
825 }
826
827 /* Send XBZRLE based compressed page */
2bf3aa85 828 bytes_xbzrle = save_page_header(rs, rs->f, block,
204b88b8
JQ
829 offset | RAM_SAVE_FLAG_XBZRLE);
830 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
831 qemu_put_be16(rs->f, encoded_len);
832 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
56e93d26 833 bytes_xbzrle += encoded_len + 1 + 2;
9360447d
JQ
834 xbzrle_counters.pages++;
835 xbzrle_counters.bytes += bytes_xbzrle;
836 ram_counters.transferred += bytes_xbzrle;
56e93d26
JQ
837
838 return 1;
839}
840
3d0684b2
JQ
841/**
842 * migration_bitmap_find_dirty: find the next dirty page from start
f3f491fc 843 *
3d0684b2
JQ
844 * Called with rcu_read_lock() to protect migration_bitmap
845 *
846 * Returns the byte offset within memory region of the start of a dirty page
847 *
6f37bb8b 848 * @rs: current RAM state
3d0684b2 849 * @rb: RAMBlock where to search for dirty pages
a935e30f 850 * @start: page where we start the search
f3f491fc 851 */
56e93d26 852static inline
a935e30f 853unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
f20e2865 854 unsigned long start)
56e93d26 855{
6b6712ef
JQ
856 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
857 unsigned long *bitmap = rb->bmap;
56e93d26
JQ
858 unsigned long next;
859
6b6712ef
JQ
860 if (rs->ram_bulk_stage && start > 0) {
861 next = start + 1;
56e93d26 862 } else {
6b6712ef 863 next = find_next_bit(bitmap, size, start);
56e93d26
JQ
864 }
865
6b6712ef 866 return next;
56e93d26
JQ
867}
868
06b10688 869static inline bool migration_bitmap_clear_dirty(RAMState *rs,
f20e2865
JQ
870 RAMBlock *rb,
871 unsigned long page)
a82d593b
DDAG
872{
873 bool ret;
a82d593b 874
6b6712ef 875 ret = test_and_clear_bit(page, rb->bmap);
a82d593b
DDAG
876
877 if (ret) {
0d8ec885 878 rs->migration_dirty_pages--;
a82d593b
DDAG
879 }
880 return ret;
881}
882
15440dd5
JQ
883static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
884 ram_addr_t start, ram_addr_t length)
56e93d26 885{
0d8ec885 886 rs->migration_dirty_pages +=
6b6712ef 887 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
0d8ec885 888 &rs->num_dirty_pages_period);
56e93d26
JQ
889}
890
3d0684b2
JQ
891/**
892 * ram_pagesize_summary: calculate all the pagesizes of a VM
893 *
894 * Returns a summary bitmap of the page sizes of all RAMBlocks
895 *
896 * For VMs with just normal pages this is equivalent to the host page
897 * size. If it's got some huge pages then it's the OR of all the
898 * different page sizes.
e8ca1db2
DDAG
899 */
900uint64_t ram_pagesize_summary(void)
901{
902 RAMBlock *block;
903 uint64_t summary = 0;
904
99e15582 905 RAMBLOCK_FOREACH(block) {
e8ca1db2
DDAG
906 summary |= block->page_size;
907 }
908
909 return summary;
910}
911
8d820d6f 912static void migration_bitmap_sync(RAMState *rs)
56e93d26
JQ
913{
914 RAMBlock *block;
56e93d26 915 int64_t end_time;
c4bdf0cf 916 uint64_t bytes_xfer_now;
56e93d26 917
9360447d 918 ram_counters.dirty_sync_count++;
56e93d26 919
f664da80
JQ
920 if (!rs->time_last_bitmap_sync) {
921 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56e93d26
JQ
922 }
923
924 trace_migration_bitmap_sync_start();
9c1f8f44 925 memory_global_dirty_log_sync();
56e93d26 926
108cfae0 927 qemu_mutex_lock(&rs->bitmap_mutex);
56e93d26 928 rcu_read_lock();
99e15582 929 RAMBLOCK_FOREACH(block) {
15440dd5 930 migration_bitmap_sync_range(rs, block, 0, block->used_length);
56e93d26
JQ
931 }
932 rcu_read_unlock();
108cfae0 933 qemu_mutex_unlock(&rs->bitmap_mutex);
56e93d26 934
a66cd90c 935 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1ffb5dfd 936
56e93d26
JQ
937 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
938
939 /* more than 1 second = 1000 millisecons */
f664da80 940 if (end_time > rs->time_last_bitmap_sync + 1000) {
d693c6f1 941 /* calculate period counters */
9360447d 942 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
d693c6f1 943 / (end_time - rs->time_last_bitmap_sync);
9360447d 944 bytes_xfer_now = ram_counters.transferred;
d693c6f1 945
9ac78b61
PL
946 /* During block migration the auto-converge logic incorrectly detects
947 * that ram migration makes no progress. Avoid this by disabling the
948 * throttling logic during the bulk phase of block migration. */
949 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
56e93d26
JQ
950 /* The following detection logic can be refined later. For now:
951 Check to see if the dirtied bytes is 50% more than the approx.
952 amount of bytes that just got transferred since the last time we
070afca2
JH
953 were in this routine. If that happens twice, start or increase
954 throttling */
070afca2 955
d693c6f1 956 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
eac74159 957 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
b4a3c64b 958 (++rs->dirty_rate_high_cnt >= 2)) {
56e93d26 959 trace_migration_throttle();
8d820d6f 960 rs->dirty_rate_high_cnt = 0;
070afca2 961 mig_throttle_guest_down();
d693c6f1 962 }
56e93d26 963 }
070afca2 964
56e93d26 965 if (migrate_use_xbzrle()) {
23b28c3c 966 if (rs->iterations_prev != rs->iterations) {
9360447d
JQ
967 xbzrle_counters.cache_miss_rate =
968 (double)(xbzrle_counters.cache_miss -
b5833fde 969 rs->xbzrle_cache_miss_prev) /
23b28c3c 970 (rs->iterations - rs->iterations_prev);
56e93d26 971 }
23b28c3c 972 rs->iterations_prev = rs->iterations;
9360447d 973 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
56e93d26 974 }
d693c6f1
FF
975
976 /* reset period counters */
f664da80 977 rs->time_last_bitmap_sync = end_time;
a66cd90c 978 rs->num_dirty_pages_period = 0;
d2a4d85a 979 rs->bytes_xfer_prev = bytes_xfer_now;
56e93d26 980 }
4addcd4f 981 if (migrate_use_events()) {
9360447d 982 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
4addcd4f 983 }
56e93d26
JQ
984}
985
986/**
3d0684b2 987 * save_zero_page: send the zero page to the stream
56e93d26 988 *
3d0684b2 989 * Returns the number of pages written.
56e93d26 990 *
f7ccd61b 991 * @rs: current RAM state
56e93d26
JQ
992 * @block: block that contains the page we want to send
993 * @offset: offset inside the block for the page
56e93d26 994 */
7faccdc3 995static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
56e93d26 996{
7faccdc3 997 uint8_t *p = block->host + offset;
56e93d26
JQ
998 int pages = -1;
999
1000 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
9360447d
JQ
1001 ram_counters.duplicate++;
1002 ram_counters.transferred +=
bb890ed5 1003 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
ce25d337 1004 qemu_put_byte(rs->f, 0);
9360447d 1005 ram_counters.transferred += 1;
56e93d26
JQ
1006 pages = 1;
1007 }
1008
1009 return pages;
1010}
1011
5727309d 1012static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
53f09a10 1013{
5727309d 1014 if (!migrate_release_ram() || !migration_in_postcopy()) {
53f09a10
PB
1015 return;
1016 }
1017
aaa2064c 1018 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
53f09a10
PB
1019}
1020
059ff0fb
XG
1021/*
1022 * @pages: the number of pages written by the control path,
1023 * < 0 - error
1024 * > 0 - number of pages written
1025 *
1026 * Return true if the pages has been saved, otherwise false is returned.
1027 */
1028static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1029 int *pages)
1030{
1031 uint64_t bytes_xmit = 0;
1032 int ret;
1033
1034 *pages = -1;
1035 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1036 &bytes_xmit);
1037 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1038 return false;
1039 }
1040
1041 if (bytes_xmit) {
1042 ram_counters.transferred += bytes_xmit;
1043 *pages = 1;
1044 }
1045
1046 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1047 return true;
1048 }
1049
1050 if (bytes_xmit > 0) {
1051 ram_counters.normal++;
1052 } else if (bytes_xmit == 0) {
1053 ram_counters.duplicate++;
1054 }
1055
1056 return true;
1057}
1058
65dacaa0
XG
1059/*
1060 * directly send the page to the stream
1061 *
1062 * Returns the number of pages written.
1063 *
1064 * @rs: current RAM state
1065 * @block: block that contains the page we want to send
1066 * @offset: offset inside the block for the page
1067 * @buf: the page to be sent
1068 * @async: send to page asyncly
1069 */
1070static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1071 uint8_t *buf, bool async)
1072{
1073 ram_counters.transferred += save_page_header(rs, rs->f, block,
1074 offset | RAM_SAVE_FLAG_PAGE);
1075 if (async) {
1076 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1077 migrate_release_ram() &
1078 migration_in_postcopy());
1079 } else {
1080 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1081 }
1082 ram_counters.transferred += TARGET_PAGE_SIZE;
1083 ram_counters.normal++;
1084 return 1;
1085}
1086
56e93d26 1087/**
3d0684b2 1088 * ram_save_page: send the given page to the stream
56e93d26 1089 *
3d0684b2 1090 * Returns the number of pages written.
3fd3c4b3
DDAG
1091 * < 0 - error
1092 * >=0 - Number of pages written - this might legally be 0
1093 * if xbzrle noticed the page was the same.
56e93d26 1094 *
6f37bb8b 1095 * @rs: current RAM state
56e93d26
JQ
1096 * @block: block that contains the page we want to send
1097 * @offset: offset inside the block for the page
1098 * @last_stage: if we are at the completion stage
56e93d26 1099 */
a0a8aa14 1100static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
56e93d26
JQ
1101{
1102 int pages = -1;
56e93d26 1103 uint8_t *p;
56e93d26 1104 bool send_async = true;
a08f6890 1105 RAMBlock *block = pss->block;
a935e30f 1106 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
059ff0fb 1107 ram_addr_t current_addr = block->offset + offset;
56e93d26 1108
2f68e399 1109 p = block->host + offset;
1db9d8e5 1110 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
56e93d26 1111
56e93d26 1112 XBZRLE_cache_lock();
d7400a34
XG
1113 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1114 migrate_use_xbzrle()) {
059ff0fb
XG
1115 pages = save_xbzrle_page(rs, &p, current_addr, block,
1116 offset, last_stage);
1117 if (!last_stage) {
1118 /* Can't send this cached data async, since the cache page
1119 * might get updated before it gets to the wire
56e93d26 1120 */
059ff0fb 1121 send_async = false;
56e93d26
JQ
1122 }
1123 }
1124
1125 /* XBZRLE overflow or normal page */
1126 if (pages == -1) {
65dacaa0 1127 pages = save_normal_page(rs, block, offset, p, send_async);
56e93d26
JQ
1128 }
1129
1130 XBZRLE_cache_unlock();
1131
1132 return pages;
1133}
1134
dcaf446e 1135static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
34ab9e97 1136 ram_addr_t offset, uint8_t *source_buf)
56e93d26 1137{
53518d94 1138 RAMState *rs = ram_state;
56e93d26 1139 int bytes_sent, blen;
a7a9a88f 1140 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 1141
2bf3aa85 1142 bytes_sent = save_page_header(rs, f, block, offset |
56e93d26 1143 RAM_SAVE_FLAG_COMPRESS_PAGE);
34ab9e97
XG
1144
1145 /*
1146 * copy it to a internal buffer to avoid it being modified by VM
1147 * so that we can catch up the error during compression and
1148 * decompression
1149 */
1150 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1151 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
b3be2896
LL
1152 if (blen < 0) {
1153 bytes_sent = 0;
1154 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1155 error_report("compressed data failed!");
1156 } else {
1157 bytes_sent += blen;
5727309d 1158 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
b3be2896 1159 }
56e93d26
JQ
1160
1161 return bytes_sent;
1162}
1163
ce25d337 1164static void flush_compressed_data(RAMState *rs)
56e93d26
JQ
1165{
1166 int idx, len, thread_count;
1167
1168 if (!migrate_use_compression()) {
1169 return;
1170 }
1171 thread_count = migrate_compress_threads();
a7a9a88f 1172
0d9f9a5c 1173 qemu_mutex_lock(&comp_done_lock);
56e93d26 1174 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 1175 while (!comp_param[idx].done) {
0d9f9a5c 1176 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 1177 }
a7a9a88f 1178 }
0d9f9a5c 1179 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
1180
1181 for (idx = 0; idx < thread_count; idx++) {
1182 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 1183 if (!comp_param[idx].quit) {
ce25d337 1184 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
9360447d 1185 ram_counters.transferred += len;
56e93d26 1186 }
a7a9a88f 1187 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
1188 }
1189}
1190
1191static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1192 ram_addr_t offset)
1193{
1194 param->block = block;
1195 param->offset = offset;
1196}
1197
ce25d337
JQ
1198static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1199 ram_addr_t offset)
56e93d26
JQ
1200{
1201 int idx, thread_count, bytes_xmit = -1, pages = -1;
1202
1203 thread_count = migrate_compress_threads();
0d9f9a5c 1204 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
1205 while (true) {
1206 for (idx = 0; idx < thread_count; idx++) {
1207 if (comp_param[idx].done) {
a7a9a88f 1208 comp_param[idx].done = false;
ce25d337 1209 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
a7a9a88f 1210 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 1211 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
1212 qemu_cond_signal(&comp_param[idx].cond);
1213 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26 1214 pages = 1;
9360447d
JQ
1215 ram_counters.normal++;
1216 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
1217 break;
1218 }
1219 }
1220 if (pages > 0) {
1221 break;
1222 } else {
0d9f9a5c 1223 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
1224 }
1225 }
0d9f9a5c 1226 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
1227
1228 return pages;
1229}
1230
3d0684b2
JQ
1231/**
1232 * find_dirty_block: find the next dirty page and update any state
1233 * associated with the search process.
b9e60928 1234 *
3d0684b2 1235 * Returns if a page is found
b9e60928 1236 *
6f37bb8b 1237 * @rs: current RAM state
3d0684b2
JQ
1238 * @pss: data about the state of the current dirty page scan
1239 * @again: set to false if the search has scanned the whole of RAM
b9e60928 1240 */
f20e2865 1241static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
b9e60928 1242{
f20e2865 1243 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
6f37bb8b 1244 if (pss->complete_round && pss->block == rs->last_seen_block &&
a935e30f 1245 pss->page >= rs->last_page) {
b9e60928
DDAG
1246 /*
1247 * We've been once around the RAM and haven't found anything.
1248 * Give up.
1249 */
1250 *again = false;
1251 return false;
1252 }
a935e30f 1253 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
b9e60928 1254 /* Didn't find anything in this RAM Block */
a935e30f 1255 pss->page = 0;
b9e60928
DDAG
1256 pss->block = QLIST_NEXT_RCU(pss->block, next);
1257 if (!pss->block) {
1258 /* Hit the end of the list */
1259 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1260 /* Flag that we've looped */
1261 pss->complete_round = true;
6f37bb8b 1262 rs->ram_bulk_stage = false;
b9e60928
DDAG
1263 if (migrate_use_xbzrle()) {
1264 /* If xbzrle is on, stop using the data compression at this
1265 * point. In theory, xbzrle can do better than compression.
1266 */
ce25d337 1267 flush_compressed_data(rs);
b9e60928
DDAG
1268 }
1269 }
1270 /* Didn't find anything this time, but try again on the new block */
1271 *again = true;
1272 return false;
1273 } else {
1274 /* Can go around again, but... */
1275 *again = true;
1276 /* We've found something so probably don't need to */
1277 return true;
1278 }
1279}
1280
3d0684b2
JQ
1281/**
1282 * unqueue_page: gets a page of the queue
1283 *
a82d593b 1284 * Helper for 'get_queued_page' - gets a page off the queue
a82d593b 1285 *
3d0684b2
JQ
1286 * Returns the block of the page (or NULL if none available)
1287 *
ec481c6c 1288 * @rs: current RAM state
3d0684b2 1289 * @offset: used to return the offset within the RAMBlock
a82d593b 1290 */
f20e2865 1291static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
a82d593b
DDAG
1292{
1293 RAMBlock *block = NULL;
1294
ec481c6c
JQ
1295 qemu_mutex_lock(&rs->src_page_req_mutex);
1296 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1297 struct RAMSrcPageRequest *entry =
1298 QSIMPLEQ_FIRST(&rs->src_page_requests);
a82d593b
DDAG
1299 block = entry->rb;
1300 *offset = entry->offset;
a82d593b
DDAG
1301
1302 if (entry->len > TARGET_PAGE_SIZE) {
1303 entry->len -= TARGET_PAGE_SIZE;
1304 entry->offset += TARGET_PAGE_SIZE;
1305 } else {
1306 memory_region_unref(block->mr);
ec481c6c 1307 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
a82d593b
DDAG
1308 g_free(entry);
1309 }
1310 }
ec481c6c 1311 qemu_mutex_unlock(&rs->src_page_req_mutex);
a82d593b
DDAG
1312
1313 return block;
1314}
1315
3d0684b2
JQ
1316/**
1317 * get_queued_page: unqueue a page from the postocpy requests
1318 *
1319 * Skips pages that are already sent (!dirty)
a82d593b 1320 *
3d0684b2 1321 * Returns if a queued page is found
a82d593b 1322 *
6f37bb8b 1323 * @rs: current RAM state
3d0684b2 1324 * @pss: data about the state of the current dirty page scan
a82d593b 1325 */
f20e2865 1326static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
a82d593b
DDAG
1327{
1328 RAMBlock *block;
1329 ram_addr_t offset;
1330 bool dirty;
1331
1332 do {
f20e2865 1333 block = unqueue_page(rs, &offset);
a82d593b
DDAG
1334 /*
1335 * We're sending this page, and since it's postcopy nothing else
1336 * will dirty it, and we must make sure it doesn't get sent again
1337 * even if this queue request was received after the background
1338 * search already sent it.
1339 */
1340 if (block) {
f20e2865
JQ
1341 unsigned long page;
1342
6b6712ef
JQ
1343 page = offset >> TARGET_PAGE_BITS;
1344 dirty = test_bit(page, block->bmap);
a82d593b 1345 if (!dirty) {
06b10688 1346 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
6b6712ef 1347 page, test_bit(page, block->unsentmap));
a82d593b 1348 } else {
f20e2865 1349 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
a82d593b
DDAG
1350 }
1351 }
1352
1353 } while (block && !dirty);
1354
1355 if (block) {
1356 /*
1357 * As soon as we start servicing pages out of order, then we have
1358 * to kill the bulk stage, since the bulk stage assumes
1359 * in (migration_bitmap_find_and_reset_dirty) that every page is
1360 * dirty, that's no longer true.
1361 */
6f37bb8b 1362 rs->ram_bulk_stage = false;
a82d593b
DDAG
1363
1364 /*
1365 * We want the background search to continue from the queued page
1366 * since the guest is likely to want other pages near to the page
1367 * it just requested.
1368 */
1369 pss->block = block;
a935e30f 1370 pss->page = offset >> TARGET_PAGE_BITS;
a82d593b
DDAG
1371 }
1372
1373 return !!block;
1374}
1375
6c595cde 1376/**
5e58f968
JQ
1377 * migration_page_queue_free: drop any remaining pages in the ram
1378 * request queue
6c595cde 1379 *
3d0684b2
JQ
1380 * It should be empty at the end anyway, but in error cases there may
1381 * be some left. in case that there is any page left, we drop it.
1382 *
6c595cde 1383 */
83c13382 1384static void migration_page_queue_free(RAMState *rs)
6c595cde 1385{
ec481c6c 1386 struct RAMSrcPageRequest *mspr, *next_mspr;
6c595cde
DDAG
1387 /* This queue generally should be empty - but in the case of a failed
1388 * migration might have some droppings in.
1389 */
1390 rcu_read_lock();
ec481c6c 1391 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
6c595cde 1392 memory_region_unref(mspr->rb->mr);
ec481c6c 1393 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
6c595cde
DDAG
1394 g_free(mspr);
1395 }
1396 rcu_read_unlock();
1397}
1398
1399/**
3d0684b2
JQ
1400 * ram_save_queue_pages: queue the page for transmission
1401 *
1402 * A request from postcopy destination for example.
1403 *
1404 * Returns zero on success or negative on error
1405 *
3d0684b2
JQ
1406 * @rbname: Name of the RAMBLock of the request. NULL means the
1407 * same that last one.
1408 * @start: starting address from the start of the RAMBlock
1409 * @len: length (in bytes) to send
6c595cde 1410 */
96506894 1411int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
6c595cde
DDAG
1412{
1413 RAMBlock *ramblock;
53518d94 1414 RAMState *rs = ram_state;
6c595cde 1415
9360447d 1416 ram_counters.postcopy_requests++;
6c595cde
DDAG
1417 rcu_read_lock();
1418 if (!rbname) {
1419 /* Reuse last RAMBlock */
68a098f3 1420 ramblock = rs->last_req_rb;
6c595cde
DDAG
1421
1422 if (!ramblock) {
1423 /*
1424 * Shouldn't happen, we can't reuse the last RAMBlock if
1425 * it's the 1st request.
1426 */
1427 error_report("ram_save_queue_pages no previous block");
1428 goto err;
1429 }
1430 } else {
1431 ramblock = qemu_ram_block_by_name(rbname);
1432
1433 if (!ramblock) {
1434 /* We shouldn't be asked for a non-existent RAMBlock */
1435 error_report("ram_save_queue_pages no block '%s'", rbname);
1436 goto err;
1437 }
68a098f3 1438 rs->last_req_rb = ramblock;
6c595cde
DDAG
1439 }
1440 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1441 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1442 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1443 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1444 __func__, start, len, ramblock->used_length);
1445 goto err;
1446 }
1447
ec481c6c
JQ
1448 struct RAMSrcPageRequest *new_entry =
1449 g_malloc0(sizeof(struct RAMSrcPageRequest));
6c595cde
DDAG
1450 new_entry->rb = ramblock;
1451 new_entry->offset = start;
1452 new_entry->len = len;
1453
1454 memory_region_ref(ramblock->mr);
ec481c6c
JQ
1455 qemu_mutex_lock(&rs->src_page_req_mutex);
1456 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1457 qemu_mutex_unlock(&rs->src_page_req_mutex);
6c595cde
DDAG
1458 rcu_read_unlock();
1459
1460 return 0;
1461
1462err:
1463 rcu_read_unlock();
1464 return -1;
1465}
1466
d7400a34
XG
1467static bool save_page_use_compression(RAMState *rs)
1468{
1469 if (!migrate_use_compression()) {
1470 return false;
1471 }
1472
1473 /*
1474 * If xbzrle is on, stop using the data compression after first
1475 * round of migration even if compression is enabled. In theory,
1476 * xbzrle can do better than compression.
1477 */
1478 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1479 return true;
1480 }
1481
1482 return false;
1483}
1484
a82d593b 1485/**
3d0684b2 1486 * ram_save_target_page: save one target page
a82d593b 1487 *
3d0684b2 1488 * Returns the number of pages written
a82d593b 1489 *
6f37bb8b 1490 * @rs: current RAM state
3d0684b2 1491 * @pss: data about the page we want to send
a82d593b 1492 * @last_stage: if we are at the completion stage
a82d593b 1493 */
a0a8aa14 1494static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1495 bool last_stage)
a82d593b 1496{
a8ec91f9
XG
1497 RAMBlock *block = pss->block;
1498 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1499 int res;
1500
1501 if (control_save_page(rs, block, offset, &res)) {
1502 return res;
1503 }
1504
1faa5665 1505 /*
d7400a34
XG
1506 * When starting the process of a new block, the first page of
1507 * the block should be sent out before other pages in the same
1508 * block, and all the pages in last block should have been sent
1509 * out, keeping this order is important, because the 'cont' flag
1510 * is used to avoid resending the block name.
1faa5665 1511 */
d7400a34
XG
1512 if (block != rs->last_sent_block && save_page_use_compression(rs)) {
1513 flush_compressed_data(rs);
1514 }
1515
1516 res = save_zero_page(rs, block, offset);
1517 if (res > 0) {
1518 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1519 * page would be stale
1520 */
1521 if (!save_page_use_compression(rs)) {
1522 XBZRLE_cache_lock();
1523 xbzrle_cache_zero_page(rs, block->offset + offset);
1524 XBZRLE_cache_unlock();
1525 }
1526 ram_release_pages(block->idstr, offset, res);
1527 return res;
1528 }
1529
da3f56cb
XG
1530 /*
1531 * Make sure the first page is sent out before other pages.
1532 *
1533 * we post it as normal page as compression will take much
1534 * CPU resource.
1535 */
1536 if (block == rs->last_sent_block && save_page_use_compression(rs)) {
701b1876 1537 return compress_page_with_multi_thread(rs, block, offset);
a82d593b
DDAG
1538 }
1539
1faa5665 1540 return ram_save_page(rs, pss, last_stage);
a82d593b
DDAG
1541}
1542
1543/**
3d0684b2 1544 * ram_save_host_page: save a whole host page
a82d593b 1545 *
3d0684b2
JQ
1546 * Starting at *offset send pages up to the end of the current host
1547 * page. It's valid for the initial offset to point into the middle of
1548 * a host page in which case the remainder of the hostpage is sent.
1549 * Only dirty target pages are sent. Note that the host page size may
1550 * be a huge page for this block.
1eb3fc0a
DDAG
1551 * The saving stops at the boundary of the used_length of the block
1552 * if the RAMBlock isn't a multiple of the host page size.
a82d593b 1553 *
3d0684b2
JQ
1554 * Returns the number of pages written or negative on error
1555 *
6f37bb8b 1556 * @rs: current RAM state
3d0684b2 1557 * @ms: current migration state
3d0684b2 1558 * @pss: data about the page we want to send
a82d593b 1559 * @last_stage: if we are at the completion stage
a82d593b 1560 */
a0a8aa14 1561static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1562 bool last_stage)
a82d593b
DDAG
1563{
1564 int tmppages, pages = 0;
a935e30f
JQ
1565 size_t pagesize_bits =
1566 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
4c011c37 1567
a82d593b 1568 do {
1faa5665
XG
1569 /* Check the pages is dirty and if it is send it */
1570 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1571 pss->page++;
1572 continue;
1573 }
1574
f20e2865 1575 tmppages = ram_save_target_page(rs, pss, last_stage);
a82d593b
DDAG
1576 if (tmppages < 0) {
1577 return tmppages;
1578 }
1579
1580 pages += tmppages;
1faa5665
XG
1581 if (pss->block->unsentmap) {
1582 clear_bit(pss->page, pss->block->unsentmap);
1583 }
1584
a935e30f 1585 pss->page++;
1eb3fc0a
DDAG
1586 } while ((pss->page & (pagesize_bits - 1)) &&
1587 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
a82d593b
DDAG
1588
1589 /* The offset we leave with is the last one we looked at */
a935e30f 1590 pss->page--;
a82d593b
DDAG
1591 return pages;
1592}
6c595cde 1593
56e93d26 1594/**
3d0684b2 1595 * ram_find_and_save_block: finds a dirty page and sends it to f
56e93d26
JQ
1596 *
1597 * Called within an RCU critical section.
1598 *
3d0684b2 1599 * Returns the number of pages written where zero means no dirty pages
56e93d26 1600 *
6f37bb8b 1601 * @rs: current RAM state
56e93d26 1602 * @last_stage: if we are at the completion stage
a82d593b
DDAG
1603 *
1604 * On systems where host-page-size > target-page-size it will send all the
1605 * pages in a host page that are dirty.
56e93d26
JQ
1606 */
1607
ce25d337 1608static int ram_find_and_save_block(RAMState *rs, bool last_stage)
56e93d26 1609{
b8fb8cb7 1610 PageSearchStatus pss;
56e93d26 1611 int pages = 0;
b9e60928 1612 bool again, found;
56e93d26 1613
0827b9e9
AA
1614 /* No dirty page as there is zero RAM */
1615 if (!ram_bytes_total()) {
1616 return pages;
1617 }
1618
6f37bb8b 1619 pss.block = rs->last_seen_block;
a935e30f 1620 pss.page = rs->last_page;
b8fb8cb7
DDAG
1621 pss.complete_round = false;
1622
1623 if (!pss.block) {
1624 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1625 }
56e93d26 1626
b9e60928 1627 do {
a82d593b 1628 again = true;
f20e2865 1629 found = get_queued_page(rs, &pss);
b9e60928 1630
a82d593b
DDAG
1631 if (!found) {
1632 /* priority queue empty, so just search for something dirty */
f20e2865 1633 found = find_dirty_block(rs, &pss, &again);
a82d593b 1634 }
f3f491fc 1635
a82d593b 1636 if (found) {
f20e2865 1637 pages = ram_save_host_page(rs, &pss, last_stage);
56e93d26 1638 }
b9e60928 1639 } while (!pages && again);
56e93d26 1640
6f37bb8b 1641 rs->last_seen_block = pss.block;
a935e30f 1642 rs->last_page = pss.page;
56e93d26
JQ
1643
1644 return pages;
1645}
1646
1647void acct_update_position(QEMUFile *f, size_t size, bool zero)
1648{
1649 uint64_t pages = size / TARGET_PAGE_SIZE;
f7ccd61b 1650
56e93d26 1651 if (zero) {
9360447d 1652 ram_counters.duplicate += pages;
56e93d26 1653 } else {
9360447d
JQ
1654 ram_counters.normal += pages;
1655 ram_counters.transferred += size;
56e93d26
JQ
1656 qemu_update_position(f, size);
1657 }
1658}
1659
56e93d26
JQ
1660uint64_t ram_bytes_total(void)
1661{
1662 RAMBlock *block;
1663 uint64_t total = 0;
1664
1665 rcu_read_lock();
99e15582 1666 RAMBLOCK_FOREACH(block) {
56e93d26 1667 total += block->used_length;
99e15582 1668 }
56e93d26
JQ
1669 rcu_read_unlock();
1670 return total;
1671}
1672
f265e0e4 1673static void xbzrle_load_setup(void)
56e93d26 1674{
f265e0e4 1675 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
56e93d26
JQ
1676}
1677
f265e0e4
JQ
1678static void xbzrle_load_cleanup(void)
1679{
1680 g_free(XBZRLE.decoded_buf);
1681 XBZRLE.decoded_buf = NULL;
1682}
1683
7d7c96be
PX
1684static void ram_state_cleanup(RAMState **rsp)
1685{
b9ccaf6d
DDAG
1686 if (*rsp) {
1687 migration_page_queue_free(*rsp);
1688 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1689 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1690 g_free(*rsp);
1691 *rsp = NULL;
1692 }
7d7c96be
PX
1693}
1694
84593a08
PX
1695static void xbzrle_cleanup(void)
1696{
1697 XBZRLE_cache_lock();
1698 if (XBZRLE.cache) {
1699 cache_fini(XBZRLE.cache);
1700 g_free(XBZRLE.encoded_buf);
1701 g_free(XBZRLE.current_buf);
1702 g_free(XBZRLE.zero_target_page);
1703 XBZRLE.cache = NULL;
1704 XBZRLE.encoded_buf = NULL;
1705 XBZRLE.current_buf = NULL;
1706 XBZRLE.zero_target_page = NULL;
1707 }
1708 XBZRLE_cache_unlock();
1709}
1710
f265e0e4 1711static void ram_save_cleanup(void *opaque)
56e93d26 1712{
53518d94 1713 RAMState **rsp = opaque;
6b6712ef 1714 RAMBlock *block;
eb859c53 1715
2ff64038
LZ
1716 /* caller have hold iothread lock or is in a bh, so there is
1717 * no writing race against this migration_bitmap
1718 */
6b6712ef
JQ
1719 memory_global_dirty_log_stop();
1720
1721 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1722 g_free(block->bmap);
1723 block->bmap = NULL;
1724 g_free(block->unsentmap);
1725 block->unsentmap = NULL;
56e93d26
JQ
1726 }
1727
84593a08 1728 xbzrle_cleanup();
f0afa331 1729 compress_threads_save_cleanup();
7d7c96be 1730 ram_state_cleanup(rsp);
56e93d26
JQ
1731}
1732
6f37bb8b 1733static void ram_state_reset(RAMState *rs)
56e93d26 1734{
6f37bb8b
JQ
1735 rs->last_seen_block = NULL;
1736 rs->last_sent_block = NULL;
269ace29 1737 rs->last_page = 0;
6f37bb8b
JQ
1738 rs->last_version = ram_list.version;
1739 rs->ram_bulk_stage = true;
56e93d26
JQ
1740}
1741
1742#define MAX_WAIT 50 /* ms, half buffered_file limit */
1743
4f2e4252
DDAG
1744/*
1745 * 'expected' is the value you expect the bitmap mostly to be full
1746 * of; it won't bother printing lines that are all this value.
1747 * If 'todump' is null the migration bitmap is dumped.
1748 */
6b6712ef
JQ
1749void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1750 unsigned long pages)
4f2e4252 1751{
4f2e4252
DDAG
1752 int64_t cur;
1753 int64_t linelen = 128;
1754 char linebuf[129];
1755
6b6712ef 1756 for (cur = 0; cur < pages; cur += linelen) {
4f2e4252
DDAG
1757 int64_t curb;
1758 bool found = false;
1759 /*
1760 * Last line; catch the case where the line length
1761 * is longer than remaining ram
1762 */
6b6712ef
JQ
1763 if (cur + linelen > pages) {
1764 linelen = pages - cur;
4f2e4252
DDAG
1765 }
1766 for (curb = 0; curb < linelen; curb++) {
1767 bool thisbit = test_bit(cur + curb, todump);
1768 linebuf[curb] = thisbit ? '1' : '.';
1769 found = found || (thisbit != expected);
1770 }
1771 if (found) {
1772 linebuf[curb] = '\0';
1773 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1774 }
1775 }
1776}
1777
e0b266f0
DDAG
1778/* **** functions for postcopy ***** */
1779
ced1c616
PB
1780void ram_postcopy_migrated_memory_release(MigrationState *ms)
1781{
1782 struct RAMBlock *block;
ced1c616 1783
99e15582 1784 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1785 unsigned long *bitmap = block->bmap;
1786 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1787 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
ced1c616
PB
1788
1789 while (run_start < range) {
1790 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
aaa2064c 1791 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
ced1c616
PB
1792 (run_end - run_start) << TARGET_PAGE_BITS);
1793 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1794 }
1795 }
1796}
1797
3d0684b2
JQ
1798/**
1799 * postcopy_send_discard_bm_ram: discard a RAMBlock
1800 *
1801 * Returns zero on success
1802 *
e0b266f0
DDAG
1803 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1804 * Note: At this point the 'unsentmap' is the processed bitmap combined
1805 * with the dirtymap; so a '1' means it's either dirty or unsent.
3d0684b2
JQ
1806 *
1807 * @ms: current migration state
1808 * @pds: state for postcopy
1809 * @start: RAMBlock starting page
1810 * @length: RAMBlock size
e0b266f0
DDAG
1811 */
1812static int postcopy_send_discard_bm_ram(MigrationState *ms,
1813 PostcopyDiscardState *pds,
6b6712ef 1814 RAMBlock *block)
e0b266f0 1815{
6b6712ef 1816 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
e0b266f0 1817 unsigned long current;
6b6712ef 1818 unsigned long *unsentmap = block->unsentmap;
e0b266f0 1819
6b6712ef 1820 for (current = 0; current < end; ) {
e0b266f0
DDAG
1821 unsigned long one = find_next_bit(unsentmap, end, current);
1822
1823 if (one <= end) {
1824 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1825 unsigned long discard_length;
1826
1827 if (zero >= end) {
1828 discard_length = end - one;
1829 } else {
1830 discard_length = zero - one;
1831 }
d688c62d
DDAG
1832 if (discard_length) {
1833 postcopy_discard_send_range(ms, pds, one, discard_length);
1834 }
e0b266f0
DDAG
1835 current = one + discard_length;
1836 } else {
1837 current = one;
1838 }
1839 }
1840
1841 return 0;
1842}
1843
3d0684b2
JQ
1844/**
1845 * postcopy_each_ram_send_discard: discard all RAMBlocks
1846 *
1847 * Returns 0 for success or negative for error
1848 *
e0b266f0
DDAG
1849 * Utility for the outgoing postcopy code.
1850 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1851 * passing it bitmap indexes and name.
e0b266f0
DDAG
1852 * (qemu_ram_foreach_block ends up passing unscaled lengths
1853 * which would mean postcopy code would have to deal with target page)
3d0684b2
JQ
1854 *
1855 * @ms: current migration state
e0b266f0
DDAG
1856 */
1857static int postcopy_each_ram_send_discard(MigrationState *ms)
1858{
1859 struct RAMBlock *block;
1860 int ret;
1861
99e15582 1862 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1863 PostcopyDiscardState *pds =
1864 postcopy_discard_send_init(ms, block->idstr);
e0b266f0
DDAG
1865
1866 /*
1867 * Postcopy sends chunks of bitmap over the wire, but it
1868 * just needs indexes at this point, avoids it having
1869 * target page specific code.
1870 */
6b6712ef 1871 ret = postcopy_send_discard_bm_ram(ms, pds, block);
e0b266f0
DDAG
1872 postcopy_discard_send_finish(ms, pds);
1873 if (ret) {
1874 return ret;
1875 }
1876 }
1877
1878 return 0;
1879}
1880
3d0684b2
JQ
1881/**
1882 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1883 *
1884 * Helper for postcopy_chunk_hostpages; it's called twice to
1885 * canonicalize the two bitmaps, that are similar, but one is
1886 * inverted.
99e314eb 1887 *
3d0684b2
JQ
1888 * Postcopy requires that all target pages in a hostpage are dirty or
1889 * clean, not a mix. This function canonicalizes the bitmaps.
99e314eb 1890 *
3d0684b2
JQ
1891 * @ms: current migration state
1892 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1893 * otherwise we need to canonicalize partially dirty host pages
1894 * @block: block that contains the page we want to canonicalize
1895 * @pds: state for postcopy
99e314eb
DDAG
1896 */
1897static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1898 RAMBlock *block,
1899 PostcopyDiscardState *pds)
1900{
53518d94 1901 RAMState *rs = ram_state;
6b6712ef
JQ
1902 unsigned long *bitmap = block->bmap;
1903 unsigned long *unsentmap = block->unsentmap;
29c59172 1904 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
6b6712ef 1905 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
99e314eb
DDAG
1906 unsigned long run_start;
1907
29c59172
DDAG
1908 if (block->page_size == TARGET_PAGE_SIZE) {
1909 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1910 return;
1911 }
1912
99e314eb
DDAG
1913 if (unsent_pass) {
1914 /* Find a sent page */
6b6712ef 1915 run_start = find_next_zero_bit(unsentmap, pages, 0);
99e314eb
DDAG
1916 } else {
1917 /* Find a dirty page */
6b6712ef 1918 run_start = find_next_bit(bitmap, pages, 0);
99e314eb
DDAG
1919 }
1920
6b6712ef 1921 while (run_start < pages) {
99e314eb
DDAG
1922 bool do_fixup = false;
1923 unsigned long fixup_start_addr;
1924 unsigned long host_offset;
1925
1926 /*
1927 * If the start of this run of pages is in the middle of a host
1928 * page, then we need to fixup this host page.
1929 */
1930 host_offset = run_start % host_ratio;
1931 if (host_offset) {
1932 do_fixup = true;
1933 run_start -= host_offset;
1934 fixup_start_addr = run_start;
1935 /* For the next pass */
1936 run_start = run_start + host_ratio;
1937 } else {
1938 /* Find the end of this run */
1939 unsigned long run_end;
1940 if (unsent_pass) {
6b6712ef 1941 run_end = find_next_bit(unsentmap, pages, run_start + 1);
99e314eb 1942 } else {
6b6712ef 1943 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
99e314eb
DDAG
1944 }
1945 /*
1946 * If the end isn't at the start of a host page, then the
1947 * run doesn't finish at the end of a host page
1948 * and we need to discard.
1949 */
1950 host_offset = run_end % host_ratio;
1951 if (host_offset) {
1952 do_fixup = true;
1953 fixup_start_addr = run_end - host_offset;
1954 /*
1955 * This host page has gone, the next loop iteration starts
1956 * from after the fixup
1957 */
1958 run_start = fixup_start_addr + host_ratio;
1959 } else {
1960 /*
1961 * No discards on this iteration, next loop starts from
1962 * next sent/dirty page
1963 */
1964 run_start = run_end + 1;
1965 }
1966 }
1967
1968 if (do_fixup) {
1969 unsigned long page;
1970
1971 /* Tell the destination to discard this page */
1972 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1973 /* For the unsent_pass we:
1974 * discard partially sent pages
1975 * For the !unsent_pass (dirty) we:
1976 * discard partially dirty pages that were sent
1977 * (any partially sent pages were already discarded
1978 * by the previous unsent_pass)
1979 */
1980 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1981 host_ratio);
1982 }
1983
1984 /* Clean up the bitmap */
1985 for (page = fixup_start_addr;
1986 page < fixup_start_addr + host_ratio; page++) {
1987 /* All pages in this host page are now not sent */
1988 set_bit(page, unsentmap);
1989
1990 /*
1991 * Remark them as dirty, updating the count for any pages
1992 * that weren't previously dirty.
1993 */
0d8ec885 1994 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
99e314eb
DDAG
1995 }
1996 }
1997
1998 if (unsent_pass) {
1999 /* Find the next sent page for the next iteration */
6b6712ef 2000 run_start = find_next_zero_bit(unsentmap, pages, run_start);
99e314eb
DDAG
2001 } else {
2002 /* Find the next dirty page for the next iteration */
6b6712ef 2003 run_start = find_next_bit(bitmap, pages, run_start);
99e314eb
DDAG
2004 }
2005 }
2006}
2007
3d0684b2
JQ
2008/**
2009 * postcopy_chuck_hostpages: discrad any partially sent host page
2010 *
99e314eb
DDAG
2011 * Utility for the outgoing postcopy code.
2012 *
2013 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
2014 * dirty host-page size chunks as all dirty. In this case the host-page
2015 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb 2016 *
3d0684b2
JQ
2017 * Returns zero on success
2018 *
2019 * @ms: current migration state
6b6712ef 2020 * @block: block we want to work with
99e314eb 2021 */
6b6712ef 2022static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
99e314eb 2023{
6b6712ef
JQ
2024 PostcopyDiscardState *pds =
2025 postcopy_discard_send_init(ms, block->idstr);
99e314eb 2026
6b6712ef
JQ
2027 /* First pass: Discard all partially sent host pages */
2028 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2029 /*
2030 * Second pass: Ensure that all partially dirty host pages are made
2031 * fully dirty.
2032 */
2033 postcopy_chunk_hostpages_pass(ms, false, block, pds);
99e314eb 2034
6b6712ef 2035 postcopy_discard_send_finish(ms, pds);
99e314eb
DDAG
2036 return 0;
2037}
2038
3d0684b2
JQ
2039/**
2040 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2041 *
2042 * Returns zero on success
2043 *
e0b266f0
DDAG
2044 * Transmit the set of pages to be discarded after precopy to the target
2045 * these are pages that:
2046 * a) Have been previously transmitted but are now dirty again
2047 * b) Pages that have never been transmitted, this ensures that
2048 * any pages on the destination that have been mapped by background
2049 * tasks get discarded (transparent huge pages is the specific concern)
2050 * Hopefully this is pretty sparse
3d0684b2
JQ
2051 *
2052 * @ms: current migration state
e0b266f0
DDAG
2053 */
2054int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2055{
53518d94 2056 RAMState *rs = ram_state;
6b6712ef 2057 RAMBlock *block;
e0b266f0 2058 int ret;
e0b266f0
DDAG
2059
2060 rcu_read_lock();
2061
2062 /* This should be our last sync, the src is now paused */
eb859c53 2063 migration_bitmap_sync(rs);
e0b266f0 2064
6b6712ef
JQ
2065 /* Easiest way to make sure we don't resume in the middle of a host-page */
2066 rs->last_seen_block = NULL;
2067 rs->last_sent_block = NULL;
2068 rs->last_page = 0;
e0b266f0 2069
6b6712ef
JQ
2070 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2071 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2072 unsigned long *bitmap = block->bmap;
2073 unsigned long *unsentmap = block->unsentmap;
2074
2075 if (!unsentmap) {
2076 /* We don't have a safe way to resize the sentmap, so
2077 * if the bitmap was resized it will be NULL at this
2078 * point.
2079 */
2080 error_report("migration ram resized during precopy phase");
2081 rcu_read_unlock();
2082 return -EINVAL;
2083 }
2084 /* Deal with TPS != HPS and huge pages */
2085 ret = postcopy_chunk_hostpages(ms, block);
2086 if (ret) {
2087 rcu_read_unlock();
2088 return ret;
2089 }
e0b266f0 2090
6b6712ef
JQ
2091 /*
2092 * Update the unsentmap to be unsentmap = unsentmap | dirty
2093 */
2094 bitmap_or(unsentmap, unsentmap, bitmap, pages);
e0b266f0 2095#ifdef DEBUG_POSTCOPY
6b6712ef 2096 ram_debug_dump_bitmap(unsentmap, true, pages);
e0b266f0 2097#endif
6b6712ef
JQ
2098 }
2099 trace_ram_postcopy_send_discard_bitmap();
e0b266f0
DDAG
2100
2101 ret = postcopy_each_ram_send_discard(ms);
2102 rcu_read_unlock();
2103
2104 return ret;
2105}
2106
3d0684b2
JQ
2107/**
2108 * ram_discard_range: discard dirtied pages at the beginning of postcopy
e0b266f0 2109 *
3d0684b2 2110 * Returns zero on success
e0b266f0 2111 *
36449157
JQ
2112 * @rbname: name of the RAMBlock of the request. NULL means the
2113 * same that last one.
3d0684b2
JQ
2114 * @start: RAMBlock starting page
2115 * @length: RAMBlock size
e0b266f0 2116 */
aaa2064c 2117int ram_discard_range(const char *rbname, uint64_t start, size_t length)
e0b266f0
DDAG
2118{
2119 int ret = -1;
2120
36449157 2121 trace_ram_discard_range(rbname, start, length);
d3a5038c 2122
e0b266f0 2123 rcu_read_lock();
36449157 2124 RAMBlock *rb = qemu_ram_block_by_name(rbname);
e0b266f0
DDAG
2125
2126 if (!rb) {
36449157 2127 error_report("ram_discard_range: Failed to find block '%s'", rbname);
e0b266f0
DDAG
2128 goto err;
2129 }
2130
f9494614
AP
2131 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2132 length >> qemu_target_page_bits());
d3a5038c 2133 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
2134
2135err:
2136 rcu_read_unlock();
2137
2138 return ret;
2139}
2140
84593a08
PX
2141/*
2142 * For every allocation, we will try not to crash the VM if the
2143 * allocation failed.
2144 */
2145static int xbzrle_init(void)
2146{
2147 Error *local_err = NULL;
2148
2149 if (!migrate_use_xbzrle()) {
2150 return 0;
2151 }
2152
2153 XBZRLE_cache_lock();
2154
2155 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2156 if (!XBZRLE.zero_target_page) {
2157 error_report("%s: Error allocating zero page", __func__);
2158 goto err_out;
2159 }
2160
2161 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2162 TARGET_PAGE_SIZE, &local_err);
2163 if (!XBZRLE.cache) {
2164 error_report_err(local_err);
2165 goto free_zero_page;
2166 }
2167
2168 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2169 if (!XBZRLE.encoded_buf) {
2170 error_report("%s: Error allocating encoded_buf", __func__);
2171 goto free_cache;
2172 }
2173
2174 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2175 if (!XBZRLE.current_buf) {
2176 error_report("%s: Error allocating current_buf", __func__);
2177 goto free_encoded_buf;
2178 }
2179
2180 /* We are all good */
2181 XBZRLE_cache_unlock();
2182 return 0;
2183
2184free_encoded_buf:
2185 g_free(XBZRLE.encoded_buf);
2186 XBZRLE.encoded_buf = NULL;
2187free_cache:
2188 cache_fini(XBZRLE.cache);
2189 XBZRLE.cache = NULL;
2190free_zero_page:
2191 g_free(XBZRLE.zero_target_page);
2192 XBZRLE.zero_target_page = NULL;
2193err_out:
2194 XBZRLE_cache_unlock();
2195 return -ENOMEM;
2196}
2197
53518d94 2198static int ram_state_init(RAMState **rsp)
56e93d26 2199{
7d00ee6a
PX
2200 *rsp = g_try_new0(RAMState, 1);
2201
2202 if (!*rsp) {
2203 error_report("%s: Init ramstate fail", __func__);
2204 return -1;
2205 }
53518d94
JQ
2206
2207 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2208 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2209 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
56e93d26 2210
7d00ee6a
PX
2211 /*
2212 * Count the total number of pages used by ram blocks not including any
2213 * gaps due to alignment or unplugs.
2214 */
2215 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2216
2217 ram_state_reset(*rsp);
2218
2219 return 0;
2220}
2221
d6eff5d7 2222static void ram_list_init_bitmaps(void)
7d00ee6a 2223{
d6eff5d7
PX
2224 RAMBlock *block;
2225 unsigned long pages;
56e93d26 2226
0827b9e9
AA
2227 /* Skip setting bitmap if there is no RAM */
2228 if (ram_bytes_total()) {
6b6712ef 2229 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
d6eff5d7 2230 pages = block->max_length >> TARGET_PAGE_BITS;
6b6712ef
JQ
2231 block->bmap = bitmap_new(pages);
2232 bitmap_set(block->bmap, 0, pages);
2233 if (migrate_postcopy_ram()) {
2234 block->unsentmap = bitmap_new(pages);
2235 bitmap_set(block->unsentmap, 0, pages);
2236 }
0827b9e9 2237 }
f3f491fc 2238 }
d6eff5d7
PX
2239}
2240
2241static void ram_init_bitmaps(RAMState *rs)
2242{
2243 /* For memory_global_dirty_log_start below. */
2244 qemu_mutex_lock_iothread();
2245 qemu_mutex_lock_ramlist();
2246 rcu_read_lock();
f3f491fc 2247
d6eff5d7 2248 ram_list_init_bitmaps();
56e93d26 2249 memory_global_dirty_log_start();
d6eff5d7
PX
2250 migration_bitmap_sync(rs);
2251
2252 rcu_read_unlock();
56e93d26 2253 qemu_mutex_unlock_ramlist();
49877834 2254 qemu_mutex_unlock_iothread();
d6eff5d7
PX
2255}
2256
2257static int ram_init_all(RAMState **rsp)
2258{
2259 if (ram_state_init(rsp)) {
2260 return -1;
2261 }
2262
2263 if (xbzrle_init()) {
2264 ram_state_cleanup(rsp);
2265 return -1;
2266 }
2267
2268 ram_init_bitmaps(*rsp);
a91246c9
HZ
2269
2270 return 0;
2271}
2272
3d0684b2
JQ
2273/*
2274 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
a91246c9
HZ
2275 * long-running RCU critical section. When rcu-reclaims in the code
2276 * start to become numerous it will be necessary to reduce the
2277 * granularity of these critical sections.
2278 */
2279
3d0684b2
JQ
2280/**
2281 * ram_save_setup: Setup RAM for migration
2282 *
2283 * Returns zero to indicate success and negative for error
2284 *
2285 * @f: QEMUFile where to send the data
2286 * @opaque: RAMState pointer
2287 */
a91246c9
HZ
2288static int ram_save_setup(QEMUFile *f, void *opaque)
2289{
53518d94 2290 RAMState **rsp = opaque;
a91246c9
HZ
2291 RAMBlock *block;
2292
dcaf446e
XG
2293 if (compress_threads_save_setup()) {
2294 return -1;
2295 }
2296
a91246c9
HZ
2297 /* migration has already setup the bitmap, reuse it. */
2298 if (!migration_in_colo_state()) {
7d00ee6a 2299 if (ram_init_all(rsp) != 0) {
dcaf446e 2300 compress_threads_save_cleanup();
a91246c9 2301 return -1;
53518d94 2302 }
a91246c9 2303 }
53518d94 2304 (*rsp)->f = f;
a91246c9
HZ
2305
2306 rcu_read_lock();
56e93d26
JQ
2307
2308 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2309
99e15582 2310 RAMBLOCK_FOREACH(block) {
56e93d26
JQ
2311 qemu_put_byte(f, strlen(block->idstr));
2312 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2313 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2314 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2315 qemu_put_be64(f, block->page_size);
2316 }
56e93d26
JQ
2317 }
2318
2319 rcu_read_unlock();
2320
2321 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2322 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2323
2324 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2325
2326 return 0;
2327}
2328
3d0684b2
JQ
2329/**
2330 * ram_save_iterate: iterative stage for migration
2331 *
2332 * Returns zero to indicate success and negative for error
2333 *
2334 * @f: QEMUFile where to send the data
2335 * @opaque: RAMState pointer
2336 */
56e93d26
JQ
2337static int ram_save_iterate(QEMUFile *f, void *opaque)
2338{
53518d94
JQ
2339 RAMState **temp = opaque;
2340 RAMState *rs = *temp;
56e93d26
JQ
2341 int ret;
2342 int i;
2343 int64_t t0;
5c90308f 2344 int done = 0;
56e93d26 2345
b2557345
PL
2346 if (blk_mig_bulk_active()) {
2347 /* Avoid transferring ram during bulk phase of block migration as
2348 * the bulk phase will usually take a long time and transferring
2349 * ram updates during that time is pointless. */
2350 goto out;
2351 }
2352
56e93d26 2353 rcu_read_lock();
6f37bb8b
JQ
2354 if (ram_list.version != rs->last_version) {
2355 ram_state_reset(rs);
56e93d26
JQ
2356 }
2357
2358 /* Read version before ram_list.blocks */
2359 smp_rmb();
2360
2361 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2362
2363 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2364 i = 0;
2365 while ((ret = qemu_file_rate_limit(f)) == 0) {
2366 int pages;
2367
ce25d337 2368 pages = ram_find_and_save_block(rs, false);
56e93d26
JQ
2369 /* no more pages to sent */
2370 if (pages == 0) {
5c90308f 2371 done = 1;
56e93d26
JQ
2372 break;
2373 }
23b28c3c 2374 rs->iterations++;
070afca2 2375
56e93d26
JQ
2376 /* we want to check in the 1st loop, just in case it was the 1st time
2377 and we had to sync the dirty bitmap.
2378 qemu_get_clock_ns() is a bit expensive, so we only check each some
2379 iterations
2380 */
2381 if ((i & 63) == 0) {
2382 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2383 if (t1 > MAX_WAIT) {
55c4446b 2384 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2385 break;
2386 }
2387 }
2388 i++;
2389 }
ce25d337 2390 flush_compressed_data(rs);
56e93d26
JQ
2391 rcu_read_unlock();
2392
2393 /*
2394 * Must occur before EOS (or any QEMUFile operation)
2395 * because of RDMA protocol.
2396 */
2397 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2398
b2557345 2399out:
56e93d26 2400 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
9360447d 2401 ram_counters.transferred += 8;
56e93d26
JQ
2402
2403 ret = qemu_file_get_error(f);
2404 if (ret < 0) {
2405 return ret;
2406 }
2407
5c90308f 2408 return done;
56e93d26
JQ
2409}
2410
3d0684b2
JQ
2411/**
2412 * ram_save_complete: function called to send the remaining amount of ram
2413 *
2414 * Returns zero to indicate success
2415 *
2416 * Called with iothread lock
2417 *
2418 * @f: QEMUFile where to send the data
2419 * @opaque: RAMState pointer
2420 */
56e93d26
JQ
2421static int ram_save_complete(QEMUFile *f, void *opaque)
2422{
53518d94
JQ
2423 RAMState **temp = opaque;
2424 RAMState *rs = *temp;
6f37bb8b 2425
56e93d26
JQ
2426 rcu_read_lock();
2427
5727309d 2428 if (!migration_in_postcopy()) {
8d820d6f 2429 migration_bitmap_sync(rs);
663e6c1d 2430 }
56e93d26
JQ
2431
2432 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2433
2434 /* try transferring iterative blocks of memory */
2435
2436 /* flush all remaining blocks regardless of rate limiting */
2437 while (true) {
2438 int pages;
2439
ce25d337 2440 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
56e93d26
JQ
2441 /* no more blocks to sent */
2442 if (pages == 0) {
2443 break;
2444 }
2445 }
2446
ce25d337 2447 flush_compressed_data(rs);
56e93d26 2448 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2449
2450 rcu_read_unlock();
d09a6fde 2451
56e93d26
JQ
2452 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2453
2454 return 0;
2455}
2456
c31b098f 2457static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
47995026
VSO
2458 uint64_t *res_precopy_only,
2459 uint64_t *res_compatible,
2460 uint64_t *res_postcopy_only)
56e93d26 2461{
53518d94
JQ
2462 RAMState **temp = opaque;
2463 RAMState *rs = *temp;
56e93d26
JQ
2464 uint64_t remaining_size;
2465
9edabd4d 2466 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2467
5727309d 2468 if (!migration_in_postcopy() &&
663e6c1d 2469 remaining_size < max_size) {
56e93d26
JQ
2470 qemu_mutex_lock_iothread();
2471 rcu_read_lock();
8d820d6f 2472 migration_bitmap_sync(rs);
56e93d26
JQ
2473 rcu_read_unlock();
2474 qemu_mutex_unlock_iothread();
9edabd4d 2475 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2476 }
c31b098f 2477
86e1167e
VSO
2478 if (migrate_postcopy_ram()) {
2479 /* We can do postcopy, and all the data is postcopiable */
47995026 2480 *res_compatible += remaining_size;
86e1167e 2481 } else {
47995026 2482 *res_precopy_only += remaining_size;
86e1167e 2483 }
56e93d26
JQ
2484}
2485
2486static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2487{
2488 unsigned int xh_len;
2489 int xh_flags;
063e760a 2490 uint8_t *loaded_data;
56e93d26 2491
56e93d26
JQ
2492 /* extract RLE header */
2493 xh_flags = qemu_get_byte(f);
2494 xh_len = qemu_get_be16(f);
2495
2496 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2497 error_report("Failed to load XBZRLE page - wrong compression!");
2498 return -1;
2499 }
2500
2501 if (xh_len > TARGET_PAGE_SIZE) {
2502 error_report("Failed to load XBZRLE page - len overflow!");
2503 return -1;
2504 }
f265e0e4 2505 loaded_data = XBZRLE.decoded_buf;
56e93d26 2506 /* load data and decode */
f265e0e4 2507 /* it can change loaded_data to point to an internal buffer */
063e760a 2508 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2509
2510 /* decode RLE */
063e760a 2511 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2512 TARGET_PAGE_SIZE) == -1) {
2513 error_report("Failed to load XBZRLE page - decode error!");
2514 return -1;
2515 }
2516
2517 return 0;
2518}
2519
3d0684b2
JQ
2520/**
2521 * ram_block_from_stream: read a RAMBlock id from the migration stream
2522 *
2523 * Must be called from within a rcu critical section.
2524 *
56e93d26 2525 * Returns a pointer from within the RCU-protected ram_list.
a7180877 2526 *
3d0684b2
JQ
2527 * @f: QEMUFile where to read the data from
2528 * @flags: Page flags (mostly to see if it's a continuation of previous block)
a7180877 2529 */
3d0684b2 2530static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
56e93d26
JQ
2531{
2532 static RAMBlock *block = NULL;
2533 char id[256];
2534 uint8_t len;
2535
2536 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2537 if (!block) {
56e93d26
JQ
2538 error_report("Ack, bad migration stream!");
2539 return NULL;
2540 }
4c4bad48 2541 return block;
56e93d26
JQ
2542 }
2543
2544 len = qemu_get_byte(f);
2545 qemu_get_buffer(f, (uint8_t *)id, len);
2546 id[len] = 0;
2547
e3dd7493 2548 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2549 if (!block) {
2550 error_report("Can't find block %s", id);
2551 return NULL;
56e93d26
JQ
2552 }
2553
4c4bad48
HZ
2554 return block;
2555}
2556
2557static inline void *host_from_ram_block_offset(RAMBlock *block,
2558 ram_addr_t offset)
2559{
2560 if (!offset_in_ramblock(block, offset)) {
2561 return NULL;
2562 }
2563
2564 return block->host + offset;
56e93d26
JQ
2565}
2566
3d0684b2
JQ
2567/**
2568 * ram_handle_compressed: handle the zero page case
2569 *
56e93d26
JQ
2570 * If a page (or a whole RDMA chunk) has been
2571 * determined to be zero, then zap it.
3d0684b2
JQ
2572 *
2573 * @host: host address for the zero page
2574 * @ch: what the page is filled from. We only support zero
2575 * @size: size of the zero page
56e93d26
JQ
2576 */
2577void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2578{
2579 if (ch != 0 || !is_zero_range(host, size)) {
2580 memset(host, ch, size);
2581 }
2582}
2583
797ca154
XG
2584/* return the size after decompression, or negative value on error */
2585static int
2586qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
2587 const uint8_t *source, size_t source_len)
2588{
2589 int err;
2590
2591 err = inflateReset(stream);
2592 if (err != Z_OK) {
2593 return -1;
2594 }
2595
2596 stream->avail_in = source_len;
2597 stream->next_in = (uint8_t *)source;
2598 stream->avail_out = dest_len;
2599 stream->next_out = dest;
2600
2601 err = inflate(stream, Z_NO_FLUSH);
2602 if (err != Z_STREAM_END) {
2603 return -1;
2604 }
2605
2606 return stream->total_out;
2607}
2608
56e93d26
JQ
2609static void *do_data_decompress(void *opaque)
2610{
2611 DecompressParam *param = opaque;
2612 unsigned long pagesize;
33d151f4 2613 uint8_t *des;
34ab9e97 2614 int len, ret;
56e93d26 2615
33d151f4 2616 qemu_mutex_lock(&param->mutex);
90e56fb4 2617 while (!param->quit) {
33d151f4
LL
2618 if (param->des) {
2619 des = param->des;
2620 len = param->len;
2621 param->des = 0;
2622 qemu_mutex_unlock(&param->mutex);
2623
56e93d26 2624 pagesize = TARGET_PAGE_SIZE;
34ab9e97
XG
2625
2626 ret = qemu_uncompress_data(&param->stream, des, pagesize,
2627 param->compbuf, len);
2628 if (ret < 0) {
2629 error_report("decompress data failed");
2630 qemu_file_set_error(decomp_file, ret);
2631 }
73a8912b 2632
33d151f4
LL
2633 qemu_mutex_lock(&decomp_done_lock);
2634 param->done = true;
2635 qemu_cond_signal(&decomp_done_cond);
2636 qemu_mutex_unlock(&decomp_done_lock);
2637
2638 qemu_mutex_lock(&param->mutex);
2639 } else {
2640 qemu_cond_wait(&param->cond, &param->mutex);
2641 }
56e93d26 2642 }
33d151f4 2643 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2644
2645 return NULL;
2646}
2647
34ab9e97 2648static int wait_for_decompress_done(void)
5533b2e9
LL
2649{
2650 int idx, thread_count;
2651
2652 if (!migrate_use_compression()) {
34ab9e97 2653 return 0;
5533b2e9
LL
2654 }
2655
2656 thread_count = migrate_decompress_threads();
2657 qemu_mutex_lock(&decomp_done_lock);
2658 for (idx = 0; idx < thread_count; idx++) {
2659 while (!decomp_param[idx].done) {
2660 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2661 }
2662 }
2663 qemu_mutex_unlock(&decomp_done_lock);
34ab9e97 2664 return qemu_file_get_error(decomp_file);
5533b2e9
LL
2665}
2666
f0afa331 2667static void compress_threads_load_cleanup(void)
56e93d26
JQ
2668{
2669 int i, thread_count;
2670
3416ab5b
JQ
2671 if (!migrate_use_compression()) {
2672 return;
2673 }
56e93d26
JQ
2674 thread_count = migrate_decompress_threads();
2675 for (i = 0; i < thread_count; i++) {
797ca154
XG
2676 /*
2677 * we use it as a indicator which shows if the thread is
2678 * properly init'd or not
2679 */
2680 if (!decomp_param[i].compbuf) {
2681 break;
2682 }
2683
56e93d26 2684 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2685 decomp_param[i].quit = true;
56e93d26
JQ
2686 qemu_cond_signal(&decomp_param[i].cond);
2687 qemu_mutex_unlock(&decomp_param[i].mutex);
2688 }
2689 for (i = 0; i < thread_count; i++) {
797ca154
XG
2690 if (!decomp_param[i].compbuf) {
2691 break;
2692 }
2693
56e93d26
JQ
2694 qemu_thread_join(decompress_threads + i);
2695 qemu_mutex_destroy(&decomp_param[i].mutex);
2696 qemu_cond_destroy(&decomp_param[i].cond);
797ca154 2697 inflateEnd(&decomp_param[i].stream);
56e93d26 2698 g_free(decomp_param[i].compbuf);
797ca154 2699 decomp_param[i].compbuf = NULL;
56e93d26
JQ
2700 }
2701 g_free(decompress_threads);
2702 g_free(decomp_param);
56e93d26
JQ
2703 decompress_threads = NULL;
2704 decomp_param = NULL;
34ab9e97 2705 decomp_file = NULL;
56e93d26
JQ
2706}
2707
34ab9e97 2708static int compress_threads_load_setup(QEMUFile *f)
797ca154
XG
2709{
2710 int i, thread_count;
2711
2712 if (!migrate_use_compression()) {
2713 return 0;
2714 }
2715
2716 thread_count = migrate_decompress_threads();
2717 decompress_threads = g_new0(QemuThread, thread_count);
2718 decomp_param = g_new0(DecompressParam, thread_count);
2719 qemu_mutex_init(&decomp_done_lock);
2720 qemu_cond_init(&decomp_done_cond);
34ab9e97 2721 decomp_file = f;
797ca154
XG
2722 for (i = 0; i < thread_count; i++) {
2723 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
2724 goto exit;
2725 }
2726
2727 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2728 qemu_mutex_init(&decomp_param[i].mutex);
2729 qemu_cond_init(&decomp_param[i].cond);
2730 decomp_param[i].done = true;
2731 decomp_param[i].quit = false;
2732 qemu_thread_create(decompress_threads + i, "decompress",
2733 do_data_decompress, decomp_param + i,
2734 QEMU_THREAD_JOINABLE);
2735 }
2736 return 0;
2737exit:
2738 compress_threads_load_cleanup();
2739 return -1;
2740}
2741
c1bc6626 2742static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2743 void *host, int len)
2744{
2745 int idx, thread_count;
2746
2747 thread_count = migrate_decompress_threads();
73a8912b 2748 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2749 while (true) {
2750 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2751 if (decomp_param[idx].done) {
33d151f4
LL
2752 decomp_param[idx].done = false;
2753 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2754 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2755 decomp_param[idx].des = host;
2756 decomp_param[idx].len = len;
33d151f4
LL
2757 qemu_cond_signal(&decomp_param[idx].cond);
2758 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2759 break;
2760 }
2761 }
2762 if (idx < thread_count) {
2763 break;
73a8912b
LL
2764 } else {
2765 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2766 }
2767 }
73a8912b 2768 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2769}
2770
f265e0e4
JQ
2771/**
2772 * ram_load_setup: Setup RAM for migration incoming side
2773 *
2774 * Returns zero to indicate success and negative for error
2775 *
2776 * @f: QEMUFile where to receive the data
2777 * @opaque: RAMState pointer
2778 */
2779static int ram_load_setup(QEMUFile *f, void *opaque)
2780{
34ab9e97 2781 if (compress_threads_load_setup(f)) {
797ca154
XG
2782 return -1;
2783 }
2784
f265e0e4 2785 xbzrle_load_setup();
f9494614 2786 ramblock_recv_map_init();
f265e0e4
JQ
2787 return 0;
2788}
2789
2790static int ram_load_cleanup(void *opaque)
2791{
f9494614 2792 RAMBlock *rb;
f265e0e4 2793 xbzrle_load_cleanup();
f0afa331 2794 compress_threads_load_cleanup();
f9494614
AP
2795
2796 RAMBLOCK_FOREACH(rb) {
2797 g_free(rb->receivedmap);
2798 rb->receivedmap = NULL;
2799 }
f265e0e4
JQ
2800 return 0;
2801}
2802
3d0684b2
JQ
2803/**
2804 * ram_postcopy_incoming_init: allocate postcopy data structures
2805 *
2806 * Returns 0 for success and negative if there was one error
2807 *
2808 * @mis: current migration incoming state
2809 *
2810 * Allocate data structures etc needed by incoming migration with
2811 * postcopy-ram. postcopy-ram's similarly names
2812 * postcopy_ram_incoming_init does the work.
1caddf8a
DDAG
2813 */
2814int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2815{
b8c48993 2816 unsigned long ram_pages = last_ram_page();
1caddf8a
DDAG
2817
2818 return postcopy_ram_incoming_init(mis, ram_pages);
2819}
2820
3d0684b2
JQ
2821/**
2822 * ram_load_postcopy: load a page in postcopy case
2823 *
2824 * Returns 0 for success or -errno in case of error
2825 *
a7180877
DDAG
2826 * Called in postcopy mode by ram_load().
2827 * rcu_read_lock is taken prior to this being called.
3d0684b2
JQ
2828 *
2829 * @f: QEMUFile where to send the data
a7180877
DDAG
2830 */
2831static int ram_load_postcopy(QEMUFile *f)
2832{
2833 int flags = 0, ret = 0;
2834 bool place_needed = false;
28abd200 2835 bool matching_page_sizes = false;
a7180877
DDAG
2836 MigrationIncomingState *mis = migration_incoming_get_current();
2837 /* Temporary page that is later 'placed' */
2838 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2839 void *last_host = NULL;
a3b6ff6d 2840 bool all_zero = false;
a7180877
DDAG
2841
2842 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2843 ram_addr_t addr;
2844 void *host = NULL;
2845 void *page_buffer = NULL;
2846 void *place_source = NULL;
df9ff5e1 2847 RAMBlock *block = NULL;
a7180877 2848 uint8_t ch;
a7180877
DDAG
2849
2850 addr = qemu_get_be64(f);
7a9ddfbf
PX
2851
2852 /*
2853 * If qemu file error, we should stop here, and then "addr"
2854 * may be invalid
2855 */
2856 ret = qemu_file_get_error(f);
2857 if (ret) {
2858 break;
2859 }
2860
a7180877
DDAG
2861 flags = addr & ~TARGET_PAGE_MASK;
2862 addr &= TARGET_PAGE_MASK;
2863
2864 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2865 place_needed = false;
bb890ed5 2866 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2867 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2868
2869 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2870 if (!host) {
2871 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2872 ret = -EINVAL;
2873 break;
2874 }
28abd200 2875 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2876 /*
28abd200
DDAG
2877 * Postcopy requires that we place whole host pages atomically;
2878 * these may be huge pages for RAMBlocks that are backed by
2879 * hugetlbfs.
a7180877
DDAG
2880 * To make it atomic, the data is read into a temporary page
2881 * that's moved into place later.
2882 * The migration protocol uses, possibly smaller, target-pages
2883 * however the source ensures it always sends all the components
2884 * of a host page in order.
2885 */
2886 page_buffer = postcopy_host_page +
28abd200 2887 ((uintptr_t)host & (block->page_size - 1));
a7180877 2888 /* If all TP are zero then we can optimise the place */
28abd200 2889 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2890 all_zero = true;
c53b7ddc
DDAG
2891 } else {
2892 /* not the 1st TP within the HP */
2893 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2894 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2895 host, last_host);
2896 ret = -EINVAL;
2897 break;
2898 }
a7180877
DDAG
2899 }
2900
c53b7ddc 2901
a7180877
DDAG
2902 /*
2903 * If it's the last part of a host page then we place the host
2904 * page
2905 */
2906 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2907 (block->page_size - 1)) == 0;
a7180877
DDAG
2908 place_source = postcopy_host_page;
2909 }
c53b7ddc 2910 last_host = host;
a7180877
DDAG
2911
2912 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
bb890ed5 2913 case RAM_SAVE_FLAG_ZERO:
a7180877
DDAG
2914 ch = qemu_get_byte(f);
2915 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2916 if (ch) {
2917 all_zero = false;
2918 }
2919 break;
2920
2921 case RAM_SAVE_FLAG_PAGE:
2922 all_zero = false;
2923 if (!place_needed || !matching_page_sizes) {
2924 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2925 } else {
2926 /* Avoids the qemu_file copy during postcopy, which is
2927 * going to do a copy later; can only do it when we
2928 * do this read in one go (matching page sizes)
2929 */
2930 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2931 TARGET_PAGE_SIZE);
2932 }
2933 break;
2934 case RAM_SAVE_FLAG_EOS:
2935 /* normal exit */
2936 break;
2937 default:
2938 error_report("Unknown combination of migration flags: %#x"
2939 " (postcopy mode)", flags);
2940 ret = -EINVAL;
7a9ddfbf
PX
2941 break;
2942 }
2943
2944 /* Detect for any possible file errors */
2945 if (!ret && qemu_file_get_error(f)) {
2946 ret = qemu_file_get_error(f);
a7180877
DDAG
2947 }
2948
7a9ddfbf 2949 if (!ret && place_needed) {
a7180877 2950 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2951 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2952
a7180877 2953 if (all_zero) {
df9ff5e1 2954 ret = postcopy_place_page_zero(mis, place_dest,
8be4620b 2955 block);
a7180877 2956 } else {
df9ff5e1 2957 ret = postcopy_place_page(mis, place_dest,
8be4620b 2958 place_source, block);
a7180877
DDAG
2959 }
2960 }
a7180877
DDAG
2961 }
2962
2963 return ret;
2964}
2965
acab30b8
DHB
2966static bool postcopy_is_advised(void)
2967{
2968 PostcopyState ps = postcopy_state_get();
2969 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2970}
2971
2972static bool postcopy_is_running(void)
2973{
2974 PostcopyState ps = postcopy_state_get();
2975 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
2976}
2977
56e93d26
JQ
2978static int ram_load(QEMUFile *f, void *opaque, int version_id)
2979{
edc60127 2980 int flags = 0, ret = 0, invalid_flags = 0;
56e93d26
JQ
2981 static uint64_t seq_iter;
2982 int len = 0;
a7180877
DDAG
2983 /*
2984 * If system is running in postcopy mode, page inserts to host memory must
2985 * be atomic
2986 */
acab30b8 2987 bool postcopy_running = postcopy_is_running();
ef08fb38 2988 /* ADVISE is earlier, it shows the source has the postcopy capability on */
acab30b8 2989 bool postcopy_advised = postcopy_is_advised();
56e93d26
JQ
2990
2991 seq_iter++;
2992
2993 if (version_id != 4) {
2994 ret = -EINVAL;
2995 }
2996
edc60127
JQ
2997 if (!migrate_use_compression()) {
2998 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
2999 }
56e93d26
JQ
3000 /* This RCU critical section can be very long running.
3001 * When RCU reclaims in the code start to become numerous,
3002 * it will be necessary to reduce the granularity of this
3003 * critical section.
3004 */
3005 rcu_read_lock();
a7180877
DDAG
3006
3007 if (postcopy_running) {
3008 ret = ram_load_postcopy(f);
3009 }
3010
3011 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 3012 ram_addr_t addr, total_ram_bytes;
a776aa15 3013 void *host = NULL;
56e93d26
JQ
3014 uint8_t ch;
3015
3016 addr = qemu_get_be64(f);
3017 flags = addr & ~TARGET_PAGE_MASK;
3018 addr &= TARGET_PAGE_MASK;
3019
edc60127
JQ
3020 if (flags & invalid_flags) {
3021 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3022 error_report("Received an unexpected compressed page");
3023 }
3024
3025 ret = -EINVAL;
3026 break;
3027 }
3028
bb890ed5 3029 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
a776aa15 3030 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
3031 RAMBlock *block = ram_block_from_stream(f, flags);
3032
3033 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
3034 if (!host) {
3035 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3036 ret = -EINVAL;
3037 break;
3038 }
f9494614 3039 ramblock_recv_bitmap_set(block, host);
1db9d8e5 3040 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
a776aa15
DDAG
3041 }
3042
56e93d26
JQ
3043 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3044 case RAM_SAVE_FLAG_MEM_SIZE:
3045 /* Synchronize RAM block list */
3046 total_ram_bytes = addr;
3047 while (!ret && total_ram_bytes) {
3048 RAMBlock *block;
56e93d26
JQ
3049 char id[256];
3050 ram_addr_t length;
3051
3052 len = qemu_get_byte(f);
3053 qemu_get_buffer(f, (uint8_t *)id, len);
3054 id[len] = 0;
3055 length = qemu_get_be64(f);
3056
e3dd7493
DDAG
3057 block = qemu_ram_block_by_name(id);
3058 if (block) {
3059 if (length != block->used_length) {
3060 Error *local_err = NULL;
56e93d26 3061
fa53a0e5 3062 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
3063 &local_err);
3064 if (local_err) {
3065 error_report_err(local_err);
56e93d26 3066 }
56e93d26 3067 }
ef08fb38
DDAG
3068 /* For postcopy we need to check hugepage sizes match */
3069 if (postcopy_advised &&
3070 block->page_size != qemu_host_page_size) {
3071 uint64_t remote_page_size = qemu_get_be64(f);
3072 if (remote_page_size != block->page_size) {
3073 error_report("Mismatched RAM page size %s "
3074 "(local) %zd != %" PRId64,
3075 id, block->page_size,
3076 remote_page_size);
3077 ret = -EINVAL;
3078 }
3079 }
e3dd7493
DDAG
3080 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3081 block->idstr);
3082 } else {
56e93d26
JQ
3083 error_report("Unknown ramblock \"%s\", cannot "
3084 "accept migration", id);
3085 ret = -EINVAL;
3086 }
3087
3088 total_ram_bytes -= length;
3089 }
3090 break;
a776aa15 3091
bb890ed5 3092 case RAM_SAVE_FLAG_ZERO:
56e93d26
JQ
3093 ch = qemu_get_byte(f);
3094 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3095 break;
a776aa15 3096
56e93d26 3097 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
3098 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3099 break;
56e93d26 3100
a776aa15 3101 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
3102 len = qemu_get_be32(f);
3103 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3104 error_report("Invalid compressed data length: %d", len);
3105 ret = -EINVAL;
3106 break;
3107 }
c1bc6626 3108 decompress_data_with_multi_threads(f, host, len);
56e93d26 3109 break;
a776aa15 3110
56e93d26 3111 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
3112 if (load_xbzrle(f, addr, host) < 0) {
3113 error_report("Failed to decompress XBZRLE page at "
3114 RAM_ADDR_FMT, addr);
3115 ret = -EINVAL;
3116 break;
3117 }
3118 break;
3119 case RAM_SAVE_FLAG_EOS:
3120 /* normal exit */
3121 break;
3122 default:
3123 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 3124 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
3125 } else {
3126 error_report("Unknown combination of migration flags: %#x",
3127 flags);
3128 ret = -EINVAL;
3129 }
3130 }
3131 if (!ret) {
3132 ret = qemu_file_get_error(f);
3133 }
3134 }
3135
34ab9e97 3136 ret |= wait_for_decompress_done();
56e93d26 3137 rcu_read_unlock();
55c4446b 3138 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
3139 return ret;
3140}
3141
c6467627
VSO
3142static bool ram_has_postcopy(void *opaque)
3143{
3144 return migrate_postcopy_ram();
3145}
3146
56e93d26 3147static SaveVMHandlers savevm_ram_handlers = {
9907e842 3148 .save_setup = ram_save_setup,
56e93d26 3149 .save_live_iterate = ram_save_iterate,
763c906b 3150 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 3151 .save_live_complete_precopy = ram_save_complete,
c6467627 3152 .has_postcopy = ram_has_postcopy,
56e93d26
JQ
3153 .save_live_pending = ram_save_pending,
3154 .load_state = ram_load,
f265e0e4
JQ
3155 .save_cleanup = ram_save_cleanup,
3156 .load_setup = ram_load_setup,
3157 .load_cleanup = ram_load_cleanup,
56e93d26
JQ
3158};
3159
3160void ram_mig_init(void)
3161{
3162 qemu_mutex_init(&XBZRLE.lock);
6f37bb8b 3163 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
56e93d26 3164}