]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
ram: Move migration_bitmap_mutex into RAMState
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
1393a485 28#include "qemu/osdep.h"
33c11879
PB
29#include "qemu-common.h"
30#include "cpu.h"
56e93d26 31#include <zlib.h>
4addcd4f 32#include "qapi-event.h"
f348b6d1 33#include "qemu/cutils.h"
56e93d26
JQ
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
7205c9ec
JQ
36#include "qemu/timer.h"
37#include "qemu/main-loop.h"
56e93d26 38#include "migration/migration.h"
e0b266f0 39#include "migration/postcopy-ram.h"
56e93d26
JQ
40#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
56e93d26 42#include "qemu/error-report.h"
56e93d26 43#include "trace.h"
56e93d26 44#include "exec/ram_addr.h"
56e93d26 45#include "qemu/rcu_queue.h"
a91246c9 46#include "migration/colo.h"
56e93d26 47
56e93d26
JQ
48/***********************************************************/
49/* ram save/restore */
50
51#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
52#define RAM_SAVE_FLAG_COMPRESS 0x02
53#define RAM_SAVE_FLAG_MEM_SIZE 0x04
54#define RAM_SAVE_FLAG_PAGE 0x08
55#define RAM_SAVE_FLAG_EOS 0x10
56#define RAM_SAVE_FLAG_CONTINUE 0x20
57#define RAM_SAVE_FLAG_XBZRLE 0x40
58/* 0x80 is reserved in migration.h start with 0x100 next */
59#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
60
adb65dec 61static uint8_t *ZERO_TARGET_PAGE;
56e93d26
JQ
62
63static inline bool is_zero_range(uint8_t *p, uint64_t size)
64{
a1febc49 65 return buffer_is_zero(p, size);
56e93d26
JQ
66}
67
68/* struct contains XBZRLE cache and a static page
69 used by the compression */
70static struct {
71 /* buffer used for XBZRLE encoding */
72 uint8_t *encoded_buf;
73 /* buffer for storing page content */
74 uint8_t *current_buf;
75 /* Cache for XBZRLE, Protected by lock. */
76 PageCache *cache;
77 QemuMutex lock;
78} XBZRLE;
79
80/* buffer used for XBZRLE decoding */
81static uint8_t *xbzrle_decoded_buf;
82
83static void XBZRLE_cache_lock(void)
84{
85 if (migrate_use_xbzrle())
86 qemu_mutex_lock(&XBZRLE.lock);
87}
88
89static void XBZRLE_cache_unlock(void)
90{
91 if (migrate_use_xbzrle())
92 qemu_mutex_unlock(&XBZRLE.lock);
93}
94
3d0684b2
JQ
95/**
96 * xbzrle_cache_resize: resize the xbzrle cache
97 *
98 * This function is called from qmp_migrate_set_cache_size in main
99 * thread, possibly while a migration is in progress. A running
100 * migration may be using the cache and might finish during this call,
101 * hence changes to the cache are protected by XBZRLE.lock().
102 *
103 * Returns the new_size or negative in case of error.
104 *
105 * @new_size: new cache size
56e93d26
JQ
106 */
107int64_t xbzrle_cache_resize(int64_t new_size)
108{
109 PageCache *new_cache;
110 int64_t ret;
111
112 if (new_size < TARGET_PAGE_SIZE) {
113 return -1;
114 }
115
116 XBZRLE_cache_lock();
117
118 if (XBZRLE.cache != NULL) {
119 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
120 goto out_new_size;
121 }
122 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
123 TARGET_PAGE_SIZE);
124 if (!new_cache) {
125 error_report("Error creating cache");
126 ret = -1;
127 goto out;
128 }
129
130 cache_fini(XBZRLE.cache);
131 XBZRLE.cache = new_cache;
132 }
133
134out_new_size:
135 ret = pow2floor(new_size);
136out:
137 XBZRLE_cache_unlock();
138 return ret;
139}
140
6f37bb8b
JQ
141/* State of RAM for migration */
142struct RAMState {
143 /* Last block that we have visited searching for dirty pages */
144 RAMBlock *last_seen_block;
145 /* Last block from where we have sent data */
146 RAMBlock *last_sent_block;
147 /* Last offset we have sent data from */
148 ram_addr_t last_offset;
149 /* last ram version we have seen */
150 uint32_t last_version;
151 /* We are in the first round */
152 bool ram_bulk_stage;
8d820d6f
JQ
153 /* How many times we have dirty too many pages */
154 int dirty_rate_high_cnt;
5a987738
JQ
155 /* How many times we have synchronized the bitmap */
156 uint64_t bitmap_sync_count;
f664da80
JQ
157 /* these variables are used for bitmap sync */
158 /* last time we did a full bitmap_sync */
159 int64_t time_last_bitmap_sync;
eac74159 160 /* bytes transferred at start_time */
c4bdf0cf 161 uint64_t bytes_xfer_prev;
a66cd90c 162 /* number of dirty pages since start_time */
68908ed6 163 uint64_t num_dirty_pages_period;
b5833fde
JQ
164 /* xbzrle misses since the beginning of the period */
165 uint64_t xbzrle_cache_miss_prev;
36040d9c
JQ
166 /* number of iterations at the beginning of period */
167 uint64_t iterations_prev;
f7ccd61b
JQ
168 /* Accounting fields */
169 /* number of zero pages. It used to be pages filled by the same char. */
170 uint64_t zero_pages;
b4d1c6e7
JQ
171 /* number of normal transferred pages */
172 uint64_t norm_pages;
23b28c3c
JQ
173 /* Iterations since start */
174 uint64_t iterations;
f36ada95
JQ
175 /* xbzrle transmitted bytes. Notice that this is with
176 * compression, they can't be calculated from the pages */
07ed50a2 177 uint64_t xbzrle_bytes;
f36ada95
JQ
178 /* xbzrle transmmited pages */
179 uint64_t xbzrle_pages;
544c36f1
JQ
180 /* xbzrle number of cache miss */
181 uint64_t xbzrle_cache_miss;
b07016b6
JQ
182 /* xbzrle miss rate */
183 double xbzrle_cache_miss_rate;
180f61f7
JQ
184 /* xbzrle number of overflows */
185 uint64_t xbzrle_overflows;
0d8ec885
JQ
186 /* number of dirty bits in the bitmap */
187 uint64_t migration_dirty_pages;
108cfae0
JQ
188 /* protects modification of the bitmap */
189 QemuMutex bitmap_mutex;
6f37bb8b
JQ
190};
191typedef struct RAMState RAMState;
192
193static RAMState ram_state;
194
56e93d26
JQ
195uint64_t dup_mig_pages_transferred(void)
196{
f7ccd61b 197 return ram_state.zero_pages;
56e93d26
JQ
198}
199
56e93d26
JQ
200uint64_t norm_mig_pages_transferred(void)
201{
b4d1c6e7 202 return ram_state.norm_pages;
56e93d26
JQ
203}
204
205uint64_t xbzrle_mig_bytes_transferred(void)
206{
07ed50a2 207 return ram_state.xbzrle_bytes;
56e93d26
JQ
208}
209
210uint64_t xbzrle_mig_pages_transferred(void)
211{
f36ada95 212 return ram_state.xbzrle_pages;
56e93d26
JQ
213}
214
215uint64_t xbzrle_mig_pages_cache_miss(void)
216{
544c36f1 217 return ram_state.xbzrle_cache_miss;
56e93d26
JQ
218}
219
220double xbzrle_mig_cache_miss_rate(void)
221{
b07016b6 222 return ram_state.xbzrle_cache_miss_rate;
56e93d26
JQ
223}
224
225uint64_t xbzrle_mig_pages_overflow(void)
226{
180f61f7 227 return ram_state.xbzrle_overflows;
56e93d26
JQ
228}
229
0d8ec885
JQ
230static ram_addr_t ram_save_remaining(void)
231{
232 return ram_state.migration_dirty_pages;
233}
234
b8fb8cb7
DDAG
235/* used by the search for pages to send */
236struct PageSearchStatus {
237 /* Current block being searched */
238 RAMBlock *block;
239 /* Current offset to search from */
240 ram_addr_t offset;
241 /* Set once we wrap around */
242 bool complete_round;
243};
244typedef struct PageSearchStatus PageSearchStatus;
245
60be6340
DL
246static struct BitmapRcu {
247 struct rcu_head rcu;
f3f491fc 248 /* Main migration bitmap */
60be6340 249 unsigned long *bmap;
f3f491fc
DDAG
250 /* bitmap of pages that haven't been sent even once
251 * only maintained and used in postcopy at the moment
252 * where it's used to send the dirtymap at the start
253 * of the postcopy phase
254 */
255 unsigned long *unsentmap;
60be6340
DL
256} *migration_bitmap_rcu;
257
56e93d26 258struct CompressParam {
56e93d26 259 bool done;
90e56fb4 260 bool quit;
56e93d26
JQ
261 QEMUFile *file;
262 QemuMutex mutex;
263 QemuCond cond;
264 RAMBlock *block;
265 ram_addr_t offset;
266};
267typedef struct CompressParam CompressParam;
268
269struct DecompressParam {
73a8912b 270 bool done;
90e56fb4 271 bool quit;
56e93d26
JQ
272 QemuMutex mutex;
273 QemuCond cond;
274 void *des;
d341d9f3 275 uint8_t *compbuf;
56e93d26
JQ
276 int len;
277};
278typedef struct DecompressParam DecompressParam;
279
280static CompressParam *comp_param;
281static QemuThread *compress_threads;
282/* comp_done_cond is used to wake up the migration thread when
283 * one of the compression threads has finished the compression.
284 * comp_done_lock is used to co-work with comp_done_cond.
285 */
0d9f9a5c
LL
286static QemuMutex comp_done_lock;
287static QemuCond comp_done_cond;
56e93d26
JQ
288/* The empty QEMUFileOps will be used by file in CompressParam */
289static const QEMUFileOps empty_ops = { };
290
291static bool compression_switch;
56e93d26
JQ
292static DecompressParam *decomp_param;
293static QemuThread *decompress_threads;
73a8912b
LL
294static QemuMutex decomp_done_lock;
295static QemuCond decomp_done_cond;
56e93d26 296
a7a9a88f
LL
297static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
298 ram_addr_t offset);
56e93d26
JQ
299
300static void *do_data_compress(void *opaque)
301{
302 CompressParam *param = opaque;
a7a9a88f
LL
303 RAMBlock *block;
304 ram_addr_t offset;
56e93d26 305
a7a9a88f 306 qemu_mutex_lock(&param->mutex);
90e56fb4 307 while (!param->quit) {
a7a9a88f
LL
308 if (param->block) {
309 block = param->block;
310 offset = param->offset;
311 param->block = NULL;
312 qemu_mutex_unlock(&param->mutex);
313
314 do_compress_ram_page(param->file, block, offset);
315
0d9f9a5c 316 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 317 param->done = true;
0d9f9a5c
LL
318 qemu_cond_signal(&comp_done_cond);
319 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
320
321 qemu_mutex_lock(&param->mutex);
322 } else {
56e93d26
JQ
323 qemu_cond_wait(&param->cond, &param->mutex);
324 }
56e93d26 325 }
a7a9a88f 326 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
327
328 return NULL;
329}
330
331static inline void terminate_compression_threads(void)
332{
333 int idx, thread_count;
334
335 thread_count = migrate_compress_threads();
3d0684b2 336
56e93d26
JQ
337 for (idx = 0; idx < thread_count; idx++) {
338 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 339 comp_param[idx].quit = true;
56e93d26
JQ
340 qemu_cond_signal(&comp_param[idx].cond);
341 qemu_mutex_unlock(&comp_param[idx].mutex);
342 }
343}
344
345void migrate_compress_threads_join(void)
346{
347 int i, thread_count;
348
349 if (!migrate_use_compression()) {
350 return;
351 }
352 terminate_compression_threads();
353 thread_count = migrate_compress_threads();
354 for (i = 0; i < thread_count; i++) {
355 qemu_thread_join(compress_threads + i);
356 qemu_fclose(comp_param[i].file);
357 qemu_mutex_destroy(&comp_param[i].mutex);
358 qemu_cond_destroy(&comp_param[i].cond);
359 }
0d9f9a5c
LL
360 qemu_mutex_destroy(&comp_done_lock);
361 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
362 g_free(compress_threads);
363 g_free(comp_param);
56e93d26
JQ
364 compress_threads = NULL;
365 comp_param = NULL;
56e93d26
JQ
366}
367
368void migrate_compress_threads_create(void)
369{
370 int i, thread_count;
371
372 if (!migrate_use_compression()) {
373 return;
374 }
56e93d26
JQ
375 compression_switch = true;
376 thread_count = migrate_compress_threads();
377 compress_threads = g_new0(QemuThread, thread_count);
378 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
379 qemu_cond_init(&comp_done_cond);
380 qemu_mutex_init(&comp_done_lock);
56e93d26 381 for (i = 0; i < thread_count; i++) {
e110aa91
C
382 /* comp_param[i].file is just used as a dummy buffer to save data,
383 * set its ops to empty.
56e93d26
JQ
384 */
385 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
386 comp_param[i].done = true;
90e56fb4 387 comp_param[i].quit = false;
56e93d26
JQ
388 qemu_mutex_init(&comp_param[i].mutex);
389 qemu_cond_init(&comp_param[i].cond);
390 qemu_thread_create(compress_threads + i, "compress",
391 do_data_compress, comp_param + i,
392 QEMU_THREAD_JOINABLE);
393 }
394}
395
396/**
3d0684b2 397 * save_page_header: write page header to wire
56e93d26
JQ
398 *
399 * If this is the 1st block, it also writes the block identification
400 *
3d0684b2 401 * Returns the number of bytes written
56e93d26
JQ
402 *
403 * @f: QEMUFile where to send the data
404 * @block: block that contains the page we want to send
405 * @offset: offset inside the block for the page
406 * in the lower bits, it contains flags
407 */
408static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
409{
9f5f380b 410 size_t size, len;
56e93d26
JQ
411
412 qemu_put_be64(f, offset);
413 size = 8;
414
415 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b
LL
416 len = strlen(block->idstr);
417 qemu_put_byte(f, len);
418 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
419 size += 1 + len;
56e93d26
JQ
420 }
421 return size;
422}
423
3d0684b2
JQ
424/**
425 * mig_throttle_guest_down: throotle down the guest
426 *
427 * Reduce amount of guest cpu execution to hopefully slow down memory
428 * writes. If guest dirty memory rate is reduced below the rate at
429 * which we can transfer pages to the destination then we should be
430 * able to complete migration. Some workloads dirty memory way too
431 * fast and will not effectively converge, even with auto-converge.
070afca2
JH
432 */
433static void mig_throttle_guest_down(void)
434{
435 MigrationState *s = migrate_get_current();
2594f56d
DB
436 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
437 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
438
439 /* We have not started throttling yet. Let's start it. */
440 if (!cpu_throttle_active()) {
441 cpu_throttle_set(pct_initial);
442 } else {
443 /* Throttling already on, just increase the rate */
444 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
445 }
446}
447
3d0684b2
JQ
448/**
449 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
450 *
6f37bb8b 451 * @rs: current RAM state
3d0684b2
JQ
452 * @current_addr: address for the zero page
453 *
454 * Update the xbzrle cache to reflect a page that's been sent as all 0.
56e93d26
JQ
455 * The important thing is that a stale (not-yet-0'd) page be replaced
456 * by the new data.
457 * As a bonus, if the page wasn't in the cache it gets added so that
3d0684b2 458 * when a small write is made into the 0'd page it gets XBZRLE sent.
56e93d26 459 */
6f37bb8b 460static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
56e93d26 461{
6f37bb8b 462 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
56e93d26
JQ
463 return;
464 }
465
466 /* We don't care if this fails to allocate a new cache page
467 * as long as it updated an old one */
468 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
5a987738 469 rs->bitmap_sync_count);
56e93d26
JQ
470}
471
472#define ENCODING_FLAG_XBZRLE 0x1
473
474/**
475 * save_xbzrle_page: compress and send current page
476 *
477 * Returns: 1 means that we wrote the page
478 * 0 means that page is identical to the one already sent
479 * -1 means that xbzrle would be longer than normal
480 *
5a987738 481 * @rs: current RAM state
56e93d26 482 * @f: QEMUFile where to send the data
3d0684b2
JQ
483 * @current_data: pointer to the address of the page contents
484 * @current_addr: addr of the page
56e93d26
JQ
485 * @block: block that contains the page we want to send
486 * @offset: offset inside the block for the page
487 * @last_stage: if we are at the completion stage
488 * @bytes_transferred: increase it with the number of transferred bytes
489 */
5a987738 490static int save_xbzrle_page(RAMState *rs, QEMUFile *f, uint8_t **current_data,
56e93d26
JQ
491 ram_addr_t current_addr, RAMBlock *block,
492 ram_addr_t offset, bool last_stage,
493 uint64_t *bytes_transferred)
494{
495 int encoded_len = 0, bytes_xbzrle;
496 uint8_t *prev_cached_page;
497
5a987738 498 if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
544c36f1 499 rs->xbzrle_cache_miss++;
56e93d26
JQ
500 if (!last_stage) {
501 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
5a987738 502 rs->bitmap_sync_count) == -1) {
56e93d26
JQ
503 return -1;
504 } else {
505 /* update *current_data when the page has been
506 inserted into cache */
507 *current_data = get_cached_data(XBZRLE.cache, current_addr);
508 }
509 }
510 return -1;
511 }
512
513 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
514
515 /* save current buffer into memory */
516 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
517
518 /* XBZRLE encoding (if there is no overflow) */
519 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
520 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
521 TARGET_PAGE_SIZE);
522 if (encoded_len == 0) {
55c4446b 523 trace_save_xbzrle_page_skipping();
56e93d26
JQ
524 return 0;
525 } else if (encoded_len == -1) {
55c4446b 526 trace_save_xbzrle_page_overflow();
180f61f7 527 rs->xbzrle_overflows++;
56e93d26
JQ
528 /* update data in the cache */
529 if (!last_stage) {
530 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
531 *current_data = prev_cached_page;
532 }
533 return -1;
534 }
535
536 /* we need to update the data in the cache, in order to get the same data */
537 if (!last_stage) {
538 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
539 }
540
541 /* Send XBZRLE based compressed page */
542 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
543 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
544 qemu_put_be16(f, encoded_len);
545 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
546 bytes_xbzrle += encoded_len + 1 + 2;
f36ada95 547 rs->xbzrle_pages++;
07ed50a2 548 rs->xbzrle_bytes += bytes_xbzrle;
56e93d26
JQ
549 *bytes_transferred += bytes_xbzrle;
550
551 return 1;
552}
553
3d0684b2
JQ
554/**
555 * migration_bitmap_find_dirty: find the next dirty page from start
f3f491fc 556 *
3d0684b2
JQ
557 * Called with rcu_read_lock() to protect migration_bitmap
558 *
559 * Returns the byte offset within memory region of the start of a dirty page
560 *
6f37bb8b 561 * @rs: current RAM state
3d0684b2
JQ
562 * @rb: RAMBlock where to search for dirty pages
563 * @start: starting address (typically so we can continue from previous page)
564 * @ram_addr_abs: pointer into which to store the address of the dirty page
565 * within the global ram_addr space
f3f491fc 566 */
56e93d26 567static inline
6f37bb8b 568ram_addr_t migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
a82d593b
DDAG
569 ram_addr_t start,
570 ram_addr_t *ram_addr_abs)
56e93d26 571{
2f68e399 572 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
56e93d26 573 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
2f68e399
DDAG
574 uint64_t rb_size = rb->used_length;
575 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
2ff64038 576 unsigned long *bitmap;
56e93d26
JQ
577
578 unsigned long next;
579
60be6340 580 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
6f37bb8b 581 if (rs->ram_bulk_stage && nr > base) {
56e93d26
JQ
582 next = nr + 1;
583 } else {
2ff64038 584 next = find_next_bit(bitmap, size, nr);
56e93d26
JQ
585 }
586
f3f491fc 587 *ram_addr_abs = next << TARGET_PAGE_BITS;
56e93d26
JQ
588 return (next - base) << TARGET_PAGE_BITS;
589}
590
0d8ec885 591static inline bool migration_bitmap_clear_dirty(RAMState *rs, ram_addr_t addr)
a82d593b
DDAG
592{
593 bool ret;
594 int nr = addr >> TARGET_PAGE_BITS;
595 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
596
597 ret = test_and_clear_bit(nr, bitmap);
598
599 if (ret) {
0d8ec885 600 rs->migration_dirty_pages--;
a82d593b
DDAG
601 }
602 return ret;
603}
604
a66cd90c
JQ
605static void migration_bitmap_sync_range(RAMState *rs, ram_addr_t start,
606 ram_addr_t length)
56e93d26 607{
2ff64038 608 unsigned long *bitmap;
60be6340 609 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
0d8ec885
JQ
610 rs->migration_dirty_pages +=
611 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length,
612 &rs->num_dirty_pages_period);
56e93d26
JQ
613}
614
3d0684b2
JQ
615/**
616 * ram_pagesize_summary: calculate all the pagesizes of a VM
617 *
618 * Returns a summary bitmap of the page sizes of all RAMBlocks
619 *
620 * For VMs with just normal pages this is equivalent to the host page
621 * size. If it's got some huge pages then it's the OR of all the
622 * different page sizes.
e8ca1db2
DDAG
623 */
624uint64_t ram_pagesize_summary(void)
625{
626 RAMBlock *block;
627 uint64_t summary = 0;
628
629 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
630 summary |= block->page_size;
631 }
632
633 return summary;
634}
635
8d820d6f 636static void migration_bitmap_sync(RAMState *rs)
56e93d26
JQ
637{
638 RAMBlock *block;
56e93d26
JQ
639 MigrationState *s = migrate_get_current();
640 int64_t end_time;
c4bdf0cf 641 uint64_t bytes_xfer_now;
56e93d26 642
5a987738 643 rs->bitmap_sync_count++;
56e93d26 644
eac74159
JQ
645 if (!rs->bytes_xfer_prev) {
646 rs->bytes_xfer_prev = ram_bytes_transferred();
56e93d26
JQ
647 }
648
f664da80
JQ
649 if (!rs->time_last_bitmap_sync) {
650 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56e93d26
JQ
651 }
652
653 trace_migration_bitmap_sync_start();
9c1f8f44 654 memory_global_dirty_log_sync();
56e93d26 655
108cfae0 656 qemu_mutex_lock(&rs->bitmap_mutex);
56e93d26
JQ
657 rcu_read_lock();
658 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
a66cd90c 659 migration_bitmap_sync_range(rs, block->offset, block->used_length);
56e93d26
JQ
660 }
661 rcu_read_unlock();
108cfae0 662 qemu_mutex_unlock(&rs->bitmap_mutex);
56e93d26 663
a66cd90c 664 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1ffb5dfd 665
56e93d26
JQ
666 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
667
668 /* more than 1 second = 1000 millisecons */
f664da80 669 if (end_time > rs->time_last_bitmap_sync + 1000) {
56e93d26
JQ
670 if (migrate_auto_converge()) {
671 /* The following detection logic can be refined later. For now:
672 Check to see if the dirtied bytes is 50% more than the approx.
673 amount of bytes that just got transferred since the last time we
070afca2
JH
674 were in this routine. If that happens twice, start or increase
675 throttling */
56e93d26 676 bytes_xfer_now = ram_bytes_transferred();
070afca2 677
56e93d26 678 if (s->dirty_pages_rate &&
a66cd90c 679 (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
eac74159 680 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
8d820d6f 681 (rs->dirty_rate_high_cnt++ >= 2)) {
56e93d26 682 trace_migration_throttle();
8d820d6f 683 rs->dirty_rate_high_cnt = 0;
070afca2 684 mig_throttle_guest_down();
56e93d26 685 }
eac74159 686 rs->bytes_xfer_prev = bytes_xfer_now;
56e93d26 687 }
070afca2 688
56e93d26 689 if (migrate_use_xbzrle()) {
23b28c3c 690 if (rs->iterations_prev != rs->iterations) {
b07016b6 691 rs->xbzrle_cache_miss_rate =
544c36f1 692 (double)(rs->xbzrle_cache_miss -
b5833fde 693 rs->xbzrle_cache_miss_prev) /
23b28c3c 694 (rs->iterations - rs->iterations_prev);
56e93d26 695 }
23b28c3c 696 rs->iterations_prev = rs->iterations;
544c36f1 697 rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
56e93d26 698 }
a66cd90c 699 s->dirty_pages_rate = rs->num_dirty_pages_period * 1000
f664da80 700 / (end_time - rs->time_last_bitmap_sync);
56e93d26 701 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
f664da80 702 rs->time_last_bitmap_sync = end_time;
a66cd90c 703 rs->num_dirty_pages_period = 0;
56e93d26 704 }
5a987738 705 s->dirty_sync_count = rs->bitmap_sync_count;
4addcd4f 706 if (migrate_use_events()) {
5a987738 707 qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
4addcd4f 708 }
56e93d26
JQ
709}
710
711/**
3d0684b2 712 * save_zero_page: send the zero page to the stream
56e93d26 713 *
3d0684b2 714 * Returns the number of pages written.
56e93d26 715 *
f7ccd61b 716 * @rs: current RAM state
56e93d26
JQ
717 * @f: QEMUFile where to send the data
718 * @block: block that contains the page we want to send
719 * @offset: offset inside the block for the page
720 * @p: pointer to the page
721 * @bytes_transferred: increase it with the number of transferred bytes
722 */
f7ccd61b
JQ
723static int save_zero_page(RAMState *rs, QEMUFile *f, RAMBlock *block,
724 ram_addr_t offset,
56e93d26
JQ
725 uint8_t *p, uint64_t *bytes_transferred)
726{
727 int pages = -1;
728
729 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
f7ccd61b 730 rs->zero_pages++;
56e93d26
JQ
731 *bytes_transferred += save_page_header(f, block,
732 offset | RAM_SAVE_FLAG_COMPRESS);
733 qemu_put_byte(f, 0);
734 *bytes_transferred += 1;
735 pages = 1;
736 }
737
738 return pages;
739}
740
36449157 741static void ram_release_pages(MigrationState *ms, const char *rbname,
53f09a10
PB
742 uint64_t offset, int pages)
743{
744 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
745 return;
746 }
747
36449157 748 ram_discard_range(NULL, rbname, offset, pages << TARGET_PAGE_BITS);
53f09a10
PB
749}
750
56e93d26 751/**
3d0684b2 752 * ram_save_page: send the given page to the stream
56e93d26 753 *
3d0684b2 754 * Returns the number of pages written.
3fd3c4b3
DDAG
755 * < 0 - error
756 * >=0 - Number of pages written - this might legally be 0
757 * if xbzrle noticed the page was the same.
56e93d26 758 *
6f37bb8b 759 * @rs: current RAM state
3d0684b2 760 * @ms: current migration state
56e93d26
JQ
761 * @f: QEMUFile where to send the data
762 * @block: block that contains the page we want to send
763 * @offset: offset inside the block for the page
764 * @last_stage: if we are at the completion stage
765 * @bytes_transferred: increase it with the number of transferred bytes
766 */
6f37bb8b
JQ
767static int ram_save_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
768 PageSearchStatus *pss, bool last_stage,
769 uint64_t *bytes_transferred)
56e93d26
JQ
770{
771 int pages = -1;
772 uint64_t bytes_xmit;
773 ram_addr_t current_addr;
56e93d26
JQ
774 uint8_t *p;
775 int ret;
776 bool send_async = true;
a08f6890
HZ
777 RAMBlock *block = pss->block;
778 ram_addr_t offset = pss->offset;
56e93d26 779
2f68e399 780 p = block->host + offset;
56e93d26
JQ
781
782 /* In doubt sent page as normal */
783 bytes_xmit = 0;
784 ret = ram_control_save_page(f, block->offset,
785 offset, TARGET_PAGE_SIZE, &bytes_xmit);
786 if (bytes_xmit) {
787 *bytes_transferred += bytes_xmit;
788 pages = 1;
789 }
790
791 XBZRLE_cache_lock();
792
793 current_addr = block->offset + offset;
794
6f37bb8b 795 if (block == rs->last_sent_block) {
56e93d26
JQ
796 offset |= RAM_SAVE_FLAG_CONTINUE;
797 }
798 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
799 if (ret != RAM_SAVE_CONTROL_DELAYED) {
800 if (bytes_xmit > 0) {
b4d1c6e7 801 rs->norm_pages++;
56e93d26 802 } else if (bytes_xmit == 0) {
f7ccd61b 803 rs->zero_pages++;
56e93d26
JQ
804 }
805 }
806 } else {
f7ccd61b 807 pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
56e93d26
JQ
808 if (pages > 0) {
809 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
810 * page would be stale
811 */
6f37bb8b 812 xbzrle_cache_zero_page(rs, current_addr);
53f09a10 813 ram_release_pages(ms, block->idstr, pss->offset, pages);
6f37bb8b 814 } else if (!rs->ram_bulk_stage &&
9eb14766 815 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
5a987738 816 pages = save_xbzrle_page(rs, f, &p, current_addr, block,
56e93d26
JQ
817 offset, last_stage, bytes_transferred);
818 if (!last_stage) {
819 /* Can't send this cached data async, since the cache page
820 * might get updated before it gets to the wire
821 */
822 send_async = false;
823 }
824 }
825 }
826
827 /* XBZRLE overflow or normal page */
828 if (pages == -1) {
829 *bytes_transferred += save_page_header(f, block,
830 offset | RAM_SAVE_FLAG_PAGE);
831 if (send_async) {
53f09a10
PB
832 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
833 migrate_release_ram() &
834 migration_in_postcopy(ms));
56e93d26
JQ
835 } else {
836 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
837 }
838 *bytes_transferred += TARGET_PAGE_SIZE;
839 pages = 1;
b4d1c6e7 840 rs->norm_pages++;
56e93d26
JQ
841 }
842
843 XBZRLE_cache_unlock();
844
845 return pages;
846}
847
a7a9a88f
LL
848static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
849 ram_addr_t offset)
56e93d26
JQ
850{
851 int bytes_sent, blen;
a7a9a88f 852 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 853
a7a9a88f 854 bytes_sent = save_page_header(f, block, offset |
56e93d26 855 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 856 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 857 migrate_compress_level());
b3be2896
LL
858 if (blen < 0) {
859 bytes_sent = 0;
860 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
861 error_report("compressed data failed!");
862 } else {
863 bytes_sent += blen;
53f09a10
PB
864 ram_release_pages(migrate_get_current(), block->idstr,
865 offset & TARGET_PAGE_MASK, 1);
b3be2896 866 }
56e93d26
JQ
867
868 return bytes_sent;
869}
870
56e93d26
JQ
871static uint64_t bytes_transferred;
872
873static void flush_compressed_data(QEMUFile *f)
874{
875 int idx, len, thread_count;
876
877 if (!migrate_use_compression()) {
878 return;
879 }
880 thread_count = migrate_compress_threads();
a7a9a88f 881
0d9f9a5c 882 qemu_mutex_lock(&comp_done_lock);
56e93d26 883 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 884 while (!comp_param[idx].done) {
0d9f9a5c 885 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 886 }
a7a9a88f 887 }
0d9f9a5c 888 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
889
890 for (idx = 0; idx < thread_count; idx++) {
891 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 892 if (!comp_param[idx].quit) {
56e93d26
JQ
893 len = qemu_put_qemu_file(f, comp_param[idx].file);
894 bytes_transferred += len;
895 }
a7a9a88f 896 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
897 }
898}
899
900static inline void set_compress_params(CompressParam *param, RAMBlock *block,
901 ram_addr_t offset)
902{
903 param->block = block;
904 param->offset = offset;
905}
906
b4d1c6e7
JQ
907static int compress_page_with_multi_thread(RAMState *rs, QEMUFile *f,
908 RAMBlock *block, ram_addr_t offset,
56e93d26
JQ
909 uint64_t *bytes_transferred)
910{
911 int idx, thread_count, bytes_xmit = -1, pages = -1;
912
913 thread_count = migrate_compress_threads();
0d9f9a5c 914 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
915 while (true) {
916 for (idx = 0; idx < thread_count; idx++) {
917 if (comp_param[idx].done) {
a7a9a88f 918 comp_param[idx].done = false;
56e93d26 919 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
a7a9a88f 920 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 921 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
922 qemu_cond_signal(&comp_param[idx].cond);
923 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26 924 pages = 1;
b4d1c6e7 925 rs->norm_pages++;
56e93d26
JQ
926 *bytes_transferred += bytes_xmit;
927 break;
928 }
929 }
930 if (pages > 0) {
931 break;
932 } else {
0d9f9a5c 933 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
934 }
935 }
0d9f9a5c 936 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
937
938 return pages;
939}
940
941/**
942 * ram_save_compressed_page: compress the given page and send it to the stream
943 *
3d0684b2 944 * Returns the number of pages written.
56e93d26 945 *
6f37bb8b 946 * @rs: current RAM state
3d0684b2 947 * @ms: current migration state
56e93d26
JQ
948 * @f: QEMUFile where to send the data
949 * @block: block that contains the page we want to send
950 * @offset: offset inside the block for the page
951 * @last_stage: if we are at the completion stage
952 * @bytes_transferred: increase it with the number of transferred bytes
953 */
6f37bb8b
JQ
954static int ram_save_compressed_page(RAMState *rs, MigrationState *ms,
955 QEMUFile *f,
9eb14766 956 PageSearchStatus *pss, bool last_stage,
56e93d26
JQ
957 uint64_t *bytes_transferred)
958{
959 int pages = -1;
fc50438e 960 uint64_t bytes_xmit = 0;
56e93d26 961 uint8_t *p;
fc50438e 962 int ret, blen;
a08f6890
HZ
963 RAMBlock *block = pss->block;
964 ram_addr_t offset = pss->offset;
56e93d26 965
2f68e399 966 p = block->host + offset;
56e93d26 967
56e93d26
JQ
968 ret = ram_control_save_page(f, block->offset,
969 offset, TARGET_PAGE_SIZE, &bytes_xmit);
970 if (bytes_xmit) {
971 *bytes_transferred += bytes_xmit;
972 pages = 1;
973 }
56e93d26
JQ
974 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
975 if (ret != RAM_SAVE_CONTROL_DELAYED) {
976 if (bytes_xmit > 0) {
b4d1c6e7 977 rs->norm_pages++;
56e93d26 978 } else if (bytes_xmit == 0) {
f7ccd61b 979 rs->zero_pages++;
56e93d26
JQ
980 }
981 }
982 } else {
983 /* When starting the process of a new block, the first page of
984 * the block should be sent out before other pages in the same
985 * block, and all the pages in last block should have been sent
986 * out, keeping this order is important, because the 'cont' flag
987 * is used to avoid resending the block name.
988 */
6f37bb8b 989 if (block != rs->last_sent_block) {
56e93d26 990 flush_compressed_data(f);
f7ccd61b 991 pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
56e93d26 992 if (pages == -1) {
fc50438e
LL
993 /* Make sure the first page is sent out before other pages */
994 bytes_xmit = save_page_header(f, block, offset |
995 RAM_SAVE_FLAG_COMPRESS_PAGE);
996 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
997 migrate_compress_level());
998 if (blen > 0) {
999 *bytes_transferred += bytes_xmit + blen;
b4d1c6e7 1000 rs->norm_pages++;
b3be2896 1001 pages = 1;
fc50438e
LL
1002 } else {
1003 qemu_file_set_error(f, blen);
1004 error_report("compressed data failed!");
b3be2896 1005 }
56e93d26 1006 }
53f09a10
PB
1007 if (pages > 0) {
1008 ram_release_pages(ms, block->idstr, pss->offset, pages);
1009 }
56e93d26 1010 } else {
fc50438e 1011 offset |= RAM_SAVE_FLAG_CONTINUE;
f7ccd61b 1012 pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
56e93d26 1013 if (pages == -1) {
b4d1c6e7 1014 pages = compress_page_with_multi_thread(rs, f, block, offset,
56e93d26 1015 bytes_transferred);
53f09a10
PB
1016 } else {
1017 ram_release_pages(ms, block->idstr, pss->offset, pages);
56e93d26
JQ
1018 }
1019 }
1020 }
1021
1022 return pages;
1023}
1024
3d0684b2
JQ
1025/**
1026 * find_dirty_block: find the next dirty page and update any state
1027 * associated with the search process.
b9e60928 1028 *
3d0684b2 1029 * Returns if a page is found
b9e60928 1030 *
6f37bb8b 1031 * @rs: current RAM state
3d0684b2
JQ
1032 * @f: QEMUFile where to send the data
1033 * @pss: data about the state of the current dirty page scan
1034 * @again: set to false if the search has scanned the whole of RAM
1035 * @ram_addr_abs: pointer into which to store the address of the dirty page
1036 * within the global ram_addr space
b9e60928 1037 */
6f37bb8b 1038static bool find_dirty_block(RAMState *rs, QEMUFile *f, PageSearchStatus *pss,
f3f491fc 1039 bool *again, ram_addr_t *ram_addr_abs)
b9e60928 1040{
6f37bb8b 1041 pss->offset = migration_bitmap_find_dirty(rs, pss->block, pss->offset,
a82d593b 1042 ram_addr_abs);
6f37bb8b
JQ
1043 if (pss->complete_round && pss->block == rs->last_seen_block &&
1044 pss->offset >= rs->last_offset) {
b9e60928
DDAG
1045 /*
1046 * We've been once around the RAM and haven't found anything.
1047 * Give up.
1048 */
1049 *again = false;
1050 return false;
1051 }
1052 if (pss->offset >= pss->block->used_length) {
1053 /* Didn't find anything in this RAM Block */
1054 pss->offset = 0;
1055 pss->block = QLIST_NEXT_RCU(pss->block, next);
1056 if (!pss->block) {
1057 /* Hit the end of the list */
1058 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1059 /* Flag that we've looped */
1060 pss->complete_round = true;
6f37bb8b 1061 rs->ram_bulk_stage = false;
b9e60928
DDAG
1062 if (migrate_use_xbzrle()) {
1063 /* If xbzrle is on, stop using the data compression at this
1064 * point. In theory, xbzrle can do better than compression.
1065 */
1066 flush_compressed_data(f);
1067 compression_switch = false;
1068 }
1069 }
1070 /* Didn't find anything this time, but try again on the new block */
1071 *again = true;
1072 return false;
1073 } else {
1074 /* Can go around again, but... */
1075 *again = true;
1076 /* We've found something so probably don't need to */
1077 return true;
1078 }
1079}
1080
3d0684b2
JQ
1081/**
1082 * unqueue_page: gets a page of the queue
1083 *
a82d593b 1084 * Helper for 'get_queued_page' - gets a page off the queue
a82d593b 1085 *
3d0684b2
JQ
1086 * Returns the block of the page (or NULL if none available)
1087 *
1088 * @ms: current migration state
1089 * @offset: used to return the offset within the RAMBlock
1090 * @ram_addr_abs: pointer into which to store the address of the dirty page
1091 * within the global ram_addr space
a82d593b
DDAG
1092 */
1093static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1094 ram_addr_t *ram_addr_abs)
1095{
1096 RAMBlock *block = NULL;
1097
1098 qemu_mutex_lock(&ms->src_page_req_mutex);
1099 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1100 struct MigrationSrcPageRequest *entry =
1101 QSIMPLEQ_FIRST(&ms->src_page_requests);
1102 block = entry->rb;
1103 *offset = entry->offset;
1104 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1105 TARGET_PAGE_MASK;
1106
1107 if (entry->len > TARGET_PAGE_SIZE) {
1108 entry->len -= TARGET_PAGE_SIZE;
1109 entry->offset += TARGET_PAGE_SIZE;
1110 } else {
1111 memory_region_unref(block->mr);
1112 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1113 g_free(entry);
1114 }
1115 }
1116 qemu_mutex_unlock(&ms->src_page_req_mutex);
1117
1118 return block;
1119}
1120
3d0684b2
JQ
1121/**
1122 * get_queued_page: unqueue a page from the postocpy requests
1123 *
1124 * Skips pages that are already sent (!dirty)
a82d593b 1125 *
3d0684b2 1126 * Returns if a queued page is found
a82d593b 1127 *
6f37bb8b 1128 * @rs: current RAM state
3d0684b2
JQ
1129 * @ms: current migration state
1130 * @pss: data about the state of the current dirty page scan
1131 * @ram_addr_abs: pointer into which to store the address of the dirty page
1132 * within the global ram_addr space
a82d593b 1133 */
6f37bb8b
JQ
1134static bool get_queued_page(RAMState *rs, MigrationState *ms,
1135 PageSearchStatus *pss,
a82d593b
DDAG
1136 ram_addr_t *ram_addr_abs)
1137{
1138 RAMBlock *block;
1139 ram_addr_t offset;
1140 bool dirty;
1141
1142 do {
1143 block = unqueue_page(ms, &offset, ram_addr_abs);
1144 /*
1145 * We're sending this page, and since it's postcopy nothing else
1146 * will dirty it, and we must make sure it doesn't get sent again
1147 * even if this queue request was received after the background
1148 * search already sent it.
1149 */
1150 if (block) {
1151 unsigned long *bitmap;
1152 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1153 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1154 if (!dirty) {
1155 trace_get_queued_page_not_dirty(
1156 block->idstr, (uint64_t)offset,
1157 (uint64_t)*ram_addr_abs,
1158 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1159 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1160 } else {
1161 trace_get_queued_page(block->idstr,
1162 (uint64_t)offset,
1163 (uint64_t)*ram_addr_abs);
1164 }
1165 }
1166
1167 } while (block && !dirty);
1168
1169 if (block) {
1170 /*
1171 * As soon as we start servicing pages out of order, then we have
1172 * to kill the bulk stage, since the bulk stage assumes
1173 * in (migration_bitmap_find_and_reset_dirty) that every page is
1174 * dirty, that's no longer true.
1175 */
6f37bb8b 1176 rs->ram_bulk_stage = false;
a82d593b
DDAG
1177
1178 /*
1179 * We want the background search to continue from the queued page
1180 * since the guest is likely to want other pages near to the page
1181 * it just requested.
1182 */
1183 pss->block = block;
1184 pss->offset = offset;
1185 }
1186
1187 return !!block;
1188}
1189
6c595cde 1190/**
5e58f968
JQ
1191 * migration_page_queue_free: drop any remaining pages in the ram
1192 * request queue
6c595cde 1193 *
3d0684b2
JQ
1194 * It should be empty at the end anyway, but in error cases there may
1195 * be some left. in case that there is any page left, we drop it.
1196 *
1197 * @ms: current migration state
6c595cde 1198 */
5e58f968 1199void migration_page_queue_free(MigrationState *ms)
6c595cde
DDAG
1200{
1201 struct MigrationSrcPageRequest *mspr, *next_mspr;
1202 /* This queue generally should be empty - but in the case of a failed
1203 * migration might have some droppings in.
1204 */
1205 rcu_read_lock();
1206 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1207 memory_region_unref(mspr->rb->mr);
1208 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1209 g_free(mspr);
1210 }
1211 rcu_read_unlock();
1212}
1213
1214/**
3d0684b2
JQ
1215 * ram_save_queue_pages: queue the page for transmission
1216 *
1217 * A request from postcopy destination for example.
1218 *
1219 * Returns zero on success or negative on error
1220 *
1221 * @ms: current migration state
1222 * @rbname: Name of the RAMBLock of the request. NULL means the
1223 * same that last one.
1224 * @start: starting address from the start of the RAMBlock
1225 * @len: length (in bytes) to send
6c595cde
DDAG
1226 */
1227int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1228 ram_addr_t start, ram_addr_t len)
1229{
1230 RAMBlock *ramblock;
1231
d3bf5418 1232 ms->postcopy_requests++;
6c595cde
DDAG
1233 rcu_read_lock();
1234 if (!rbname) {
1235 /* Reuse last RAMBlock */
1236 ramblock = ms->last_req_rb;
1237
1238 if (!ramblock) {
1239 /*
1240 * Shouldn't happen, we can't reuse the last RAMBlock if
1241 * it's the 1st request.
1242 */
1243 error_report("ram_save_queue_pages no previous block");
1244 goto err;
1245 }
1246 } else {
1247 ramblock = qemu_ram_block_by_name(rbname);
1248
1249 if (!ramblock) {
1250 /* We shouldn't be asked for a non-existent RAMBlock */
1251 error_report("ram_save_queue_pages no block '%s'", rbname);
1252 goto err;
1253 }
1254 ms->last_req_rb = ramblock;
1255 }
1256 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1257 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1258 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1259 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1260 __func__, start, len, ramblock->used_length);
1261 goto err;
1262 }
1263
1264 struct MigrationSrcPageRequest *new_entry =
1265 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1266 new_entry->rb = ramblock;
1267 new_entry->offset = start;
1268 new_entry->len = len;
1269
1270 memory_region_ref(ramblock->mr);
1271 qemu_mutex_lock(&ms->src_page_req_mutex);
1272 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1273 qemu_mutex_unlock(&ms->src_page_req_mutex);
1274 rcu_read_unlock();
1275
1276 return 0;
1277
1278err:
1279 rcu_read_unlock();
1280 return -1;
1281}
1282
a82d593b 1283/**
3d0684b2 1284 * ram_save_target_page: save one target page
a82d593b 1285 *
3d0684b2 1286 * Returns the number of pages written
a82d593b 1287 *
6f37bb8b 1288 * @rs: current RAM state
3d0684b2 1289 * @ms: current migration state
a82d593b 1290 * @f: QEMUFile where to send the data
3d0684b2 1291 * @pss: data about the page we want to send
a82d593b
DDAG
1292 * @last_stage: if we are at the completion stage
1293 * @bytes_transferred: increase it with the number of transferred bytes
3d0684b2 1294 * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
a82d593b 1295 */
6f37bb8b 1296static int ram_save_target_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
a08f6890 1297 PageSearchStatus *pss,
a82d593b
DDAG
1298 bool last_stage,
1299 uint64_t *bytes_transferred,
1300 ram_addr_t dirty_ram_abs)
1301{
1302 int res = 0;
1303
1304 /* Check the pages is dirty and if it is send it */
0d8ec885 1305 if (migration_bitmap_clear_dirty(rs, dirty_ram_abs)) {
a82d593b
DDAG
1306 unsigned long *unsentmap;
1307 if (compression_switch && migrate_use_compression()) {
6f37bb8b 1308 res = ram_save_compressed_page(rs, ms, f, pss,
a82d593b
DDAG
1309 last_stage,
1310 bytes_transferred);
1311 } else {
6f37bb8b 1312 res = ram_save_page(rs, ms, f, pss, last_stage,
a82d593b
DDAG
1313 bytes_transferred);
1314 }
1315
1316 if (res < 0) {
1317 return res;
1318 }
1319 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1320 if (unsentmap) {
1321 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1322 }
3fd3c4b3
DDAG
1323 /* Only update last_sent_block if a block was actually sent; xbzrle
1324 * might have decided the page was identical so didn't bother writing
1325 * to the stream.
1326 */
1327 if (res > 0) {
6f37bb8b 1328 rs->last_sent_block = pss->block;
3fd3c4b3 1329 }
a82d593b
DDAG
1330 }
1331
1332 return res;
1333}
1334
1335/**
3d0684b2 1336 * ram_save_host_page: save a whole host page
a82d593b 1337 *
3d0684b2
JQ
1338 * Starting at *offset send pages up to the end of the current host
1339 * page. It's valid for the initial offset to point into the middle of
1340 * a host page in which case the remainder of the hostpage is sent.
1341 * Only dirty target pages are sent. Note that the host page size may
1342 * be a huge page for this block.
a82d593b 1343 *
3d0684b2
JQ
1344 * Returns the number of pages written or negative on error
1345 *
6f37bb8b 1346 * @rs: current RAM state
3d0684b2 1347 * @ms: current migration state
a82d593b 1348 * @f: QEMUFile where to send the data
3d0684b2 1349 * @pss: data about the page we want to send
a82d593b
DDAG
1350 * @last_stage: if we are at the completion stage
1351 * @bytes_transferred: increase it with the number of transferred bytes
1352 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1353 */
6f37bb8b 1354static int ram_save_host_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
a08f6890
HZ
1355 PageSearchStatus *pss,
1356 bool last_stage,
a82d593b
DDAG
1357 uint64_t *bytes_transferred,
1358 ram_addr_t dirty_ram_abs)
1359{
1360 int tmppages, pages = 0;
4c011c37
DDAG
1361 size_t pagesize = qemu_ram_pagesize(pss->block);
1362
a82d593b 1363 do {
6f37bb8b 1364 tmppages = ram_save_target_page(rs, ms, f, pss, last_stage,
a82d593b
DDAG
1365 bytes_transferred, dirty_ram_abs);
1366 if (tmppages < 0) {
1367 return tmppages;
1368 }
1369
1370 pages += tmppages;
a08f6890 1371 pss->offset += TARGET_PAGE_SIZE;
a82d593b 1372 dirty_ram_abs += TARGET_PAGE_SIZE;
4c011c37 1373 } while (pss->offset & (pagesize - 1));
a82d593b
DDAG
1374
1375 /* The offset we leave with is the last one we looked at */
a08f6890 1376 pss->offset -= TARGET_PAGE_SIZE;
a82d593b
DDAG
1377 return pages;
1378}
6c595cde 1379
56e93d26 1380/**
3d0684b2 1381 * ram_find_and_save_block: finds a dirty page and sends it to f
56e93d26
JQ
1382 *
1383 * Called within an RCU critical section.
1384 *
3d0684b2 1385 * Returns the number of pages written where zero means no dirty pages
56e93d26 1386 *
6f37bb8b 1387 * @rs: current RAM state
56e93d26
JQ
1388 * @f: QEMUFile where to send the data
1389 * @last_stage: if we are at the completion stage
1390 * @bytes_transferred: increase it with the number of transferred bytes
a82d593b
DDAG
1391 *
1392 * On systems where host-page-size > target-page-size it will send all the
1393 * pages in a host page that are dirty.
56e93d26
JQ
1394 */
1395
6f37bb8b 1396static int ram_find_and_save_block(RAMState *rs, QEMUFile *f, bool last_stage,
56e93d26
JQ
1397 uint64_t *bytes_transferred)
1398{
b8fb8cb7 1399 PageSearchStatus pss;
a82d593b 1400 MigrationState *ms = migrate_get_current();
56e93d26 1401 int pages = 0;
b9e60928 1402 bool again, found;
f3f491fc
DDAG
1403 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1404 ram_addr_t space */
56e93d26 1405
0827b9e9
AA
1406 /* No dirty page as there is zero RAM */
1407 if (!ram_bytes_total()) {
1408 return pages;
1409 }
1410
6f37bb8b
JQ
1411 pss.block = rs->last_seen_block;
1412 pss.offset = rs->last_offset;
b8fb8cb7
DDAG
1413 pss.complete_round = false;
1414
1415 if (!pss.block) {
1416 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1417 }
56e93d26 1418
b9e60928 1419 do {
a82d593b 1420 again = true;
6f37bb8b 1421 found = get_queued_page(rs, ms, &pss, &dirty_ram_abs);
b9e60928 1422
a82d593b
DDAG
1423 if (!found) {
1424 /* priority queue empty, so just search for something dirty */
6f37bb8b 1425 found = find_dirty_block(rs, f, &pss, &again, &dirty_ram_abs);
a82d593b 1426 }
f3f491fc 1427
a82d593b 1428 if (found) {
6f37bb8b 1429 pages = ram_save_host_page(rs, ms, f, &pss,
a82d593b
DDAG
1430 last_stage, bytes_transferred,
1431 dirty_ram_abs);
56e93d26 1432 }
b9e60928 1433 } while (!pages && again);
56e93d26 1434
6f37bb8b
JQ
1435 rs->last_seen_block = pss.block;
1436 rs->last_offset = pss.offset;
56e93d26
JQ
1437
1438 return pages;
1439}
1440
1441void acct_update_position(QEMUFile *f, size_t size, bool zero)
1442{
1443 uint64_t pages = size / TARGET_PAGE_SIZE;
f7ccd61b
JQ
1444 RAMState *rs = &ram_state;
1445
56e93d26 1446 if (zero) {
f7ccd61b 1447 rs->zero_pages += pages;
56e93d26 1448 } else {
b4d1c6e7 1449 rs->norm_pages += pages;
56e93d26
JQ
1450 bytes_transferred += size;
1451 qemu_update_position(f, size);
1452 }
1453}
1454
56e93d26
JQ
1455uint64_t ram_bytes_remaining(void)
1456{
1457 return ram_save_remaining() * TARGET_PAGE_SIZE;
1458}
1459
1460uint64_t ram_bytes_transferred(void)
1461{
1462 return bytes_transferred;
1463}
1464
1465uint64_t ram_bytes_total(void)
1466{
1467 RAMBlock *block;
1468 uint64_t total = 0;
1469
1470 rcu_read_lock();
1471 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1472 total += block->used_length;
1473 rcu_read_unlock();
1474 return total;
1475}
1476
1477void free_xbzrle_decoded_buf(void)
1478{
1479 g_free(xbzrle_decoded_buf);
1480 xbzrle_decoded_buf = NULL;
1481}
1482
60be6340
DL
1483static void migration_bitmap_free(struct BitmapRcu *bmap)
1484{
1485 g_free(bmap->bmap);
f3f491fc 1486 g_free(bmap->unsentmap);
60be6340
DL
1487 g_free(bmap);
1488}
1489
6ad2a215 1490static void ram_migration_cleanup(void *opaque)
56e93d26 1491{
2ff64038
LZ
1492 /* caller have hold iothread lock or is in a bh, so there is
1493 * no writing race against this migration_bitmap
1494 */
60be6340
DL
1495 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1496 atomic_rcu_set(&migration_bitmap_rcu, NULL);
2ff64038 1497 if (bitmap) {
56e93d26 1498 memory_global_dirty_log_stop();
60be6340 1499 call_rcu(bitmap, migration_bitmap_free, rcu);
56e93d26
JQ
1500 }
1501
1502 XBZRLE_cache_lock();
1503 if (XBZRLE.cache) {
1504 cache_fini(XBZRLE.cache);
1505 g_free(XBZRLE.encoded_buf);
1506 g_free(XBZRLE.current_buf);
adb65dec 1507 g_free(ZERO_TARGET_PAGE);
56e93d26
JQ
1508 XBZRLE.cache = NULL;
1509 XBZRLE.encoded_buf = NULL;
1510 XBZRLE.current_buf = NULL;
1511 }
1512 XBZRLE_cache_unlock();
1513}
1514
6f37bb8b 1515static void ram_state_reset(RAMState *rs)
56e93d26 1516{
6f37bb8b
JQ
1517 rs->last_seen_block = NULL;
1518 rs->last_sent_block = NULL;
1519 rs->last_offset = 0;
1520 rs->last_version = ram_list.version;
1521 rs->ram_bulk_stage = true;
56e93d26
JQ
1522}
1523
1524#define MAX_WAIT 50 /* ms, half buffered_file limit */
1525
dd631697
LZ
1526void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1527{
0d8ec885 1528 RAMState *rs = &ram_state;
108cfae0 1529
dd631697
LZ
1530 /* called in qemu main thread, so there is
1531 * no writing race against this migration_bitmap
1532 */
60be6340
DL
1533 if (migration_bitmap_rcu) {
1534 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1535 bitmap = g_new(struct BitmapRcu, 1);
1536 bitmap->bmap = bitmap_new(new);
dd631697
LZ
1537
1538 /* prevent migration_bitmap content from being set bit
1539 * by migration_bitmap_sync_range() at the same time.
1540 * it is safe to migration if migration_bitmap is cleared bit
1541 * at the same time.
1542 */
108cfae0 1543 qemu_mutex_lock(&rs->bitmap_mutex);
60be6340
DL
1544 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1545 bitmap_set(bitmap->bmap, old, new - old);
f3f491fc
DDAG
1546
1547 /* We don't have a way to safely extend the sentmap
1548 * with RCU; so mark it as missing, entry to postcopy
1549 * will fail.
1550 */
1551 bitmap->unsentmap = NULL;
1552
60be6340 1553 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
108cfae0 1554 qemu_mutex_unlock(&rs->bitmap_mutex);
0d8ec885 1555 rs->migration_dirty_pages += new - old;
60be6340 1556 call_rcu(old_bitmap, migration_bitmap_free, rcu);
dd631697
LZ
1557 }
1558}
56e93d26 1559
4f2e4252
DDAG
1560/*
1561 * 'expected' is the value you expect the bitmap mostly to be full
1562 * of; it won't bother printing lines that are all this value.
1563 * If 'todump' is null the migration bitmap is dumped.
1564 */
1565void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1566{
1567 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1568
1569 int64_t cur;
1570 int64_t linelen = 128;
1571 char linebuf[129];
1572
1573 if (!todump) {
1574 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1575 }
1576
1577 for (cur = 0; cur < ram_pages; cur += linelen) {
1578 int64_t curb;
1579 bool found = false;
1580 /*
1581 * Last line; catch the case where the line length
1582 * is longer than remaining ram
1583 */
1584 if (cur + linelen > ram_pages) {
1585 linelen = ram_pages - cur;
1586 }
1587 for (curb = 0; curb < linelen; curb++) {
1588 bool thisbit = test_bit(cur + curb, todump);
1589 linebuf[curb] = thisbit ? '1' : '.';
1590 found = found || (thisbit != expected);
1591 }
1592 if (found) {
1593 linebuf[curb] = '\0';
1594 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1595 }
1596 }
1597}
1598
e0b266f0
DDAG
1599/* **** functions for postcopy ***** */
1600
ced1c616
PB
1601void ram_postcopy_migrated_memory_release(MigrationState *ms)
1602{
1603 struct RAMBlock *block;
1604 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1605
1606 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1607 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1608 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1609 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1610
1611 while (run_start < range) {
1612 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1613 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1614 (run_end - run_start) << TARGET_PAGE_BITS);
1615 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1616 }
1617 }
1618}
1619
3d0684b2
JQ
1620/**
1621 * postcopy_send_discard_bm_ram: discard a RAMBlock
1622 *
1623 * Returns zero on success
1624 *
e0b266f0
DDAG
1625 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1626 * Note: At this point the 'unsentmap' is the processed bitmap combined
1627 * with the dirtymap; so a '1' means it's either dirty or unsent.
3d0684b2
JQ
1628 *
1629 * @ms: current migration state
1630 * @pds: state for postcopy
1631 * @start: RAMBlock starting page
1632 * @length: RAMBlock size
e0b266f0
DDAG
1633 */
1634static int postcopy_send_discard_bm_ram(MigrationState *ms,
1635 PostcopyDiscardState *pds,
1636 unsigned long start,
1637 unsigned long length)
1638{
1639 unsigned long end = start + length; /* one after the end */
1640 unsigned long current;
1641 unsigned long *unsentmap;
1642
1643 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1644 for (current = start; current < end; ) {
1645 unsigned long one = find_next_bit(unsentmap, end, current);
1646
1647 if (one <= end) {
1648 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1649 unsigned long discard_length;
1650
1651 if (zero >= end) {
1652 discard_length = end - one;
1653 } else {
1654 discard_length = zero - one;
1655 }
d688c62d
DDAG
1656 if (discard_length) {
1657 postcopy_discard_send_range(ms, pds, one, discard_length);
1658 }
e0b266f0
DDAG
1659 current = one + discard_length;
1660 } else {
1661 current = one;
1662 }
1663 }
1664
1665 return 0;
1666}
1667
3d0684b2
JQ
1668/**
1669 * postcopy_each_ram_send_discard: discard all RAMBlocks
1670 *
1671 * Returns 0 for success or negative for error
1672 *
e0b266f0
DDAG
1673 * Utility for the outgoing postcopy code.
1674 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1675 * passing it bitmap indexes and name.
e0b266f0
DDAG
1676 * (qemu_ram_foreach_block ends up passing unscaled lengths
1677 * which would mean postcopy code would have to deal with target page)
3d0684b2
JQ
1678 *
1679 * @ms: current migration state
e0b266f0
DDAG
1680 */
1681static int postcopy_each_ram_send_discard(MigrationState *ms)
1682{
1683 struct RAMBlock *block;
1684 int ret;
1685
1686 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1687 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1688 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1689 first,
1690 block->idstr);
1691
1692 /*
1693 * Postcopy sends chunks of bitmap over the wire, but it
1694 * just needs indexes at this point, avoids it having
1695 * target page specific code.
1696 */
1697 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1698 block->used_length >> TARGET_PAGE_BITS);
1699 postcopy_discard_send_finish(ms, pds);
1700 if (ret) {
1701 return ret;
1702 }
1703 }
1704
1705 return 0;
1706}
1707
3d0684b2
JQ
1708/**
1709 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1710 *
1711 * Helper for postcopy_chunk_hostpages; it's called twice to
1712 * canonicalize the two bitmaps, that are similar, but one is
1713 * inverted.
99e314eb 1714 *
3d0684b2
JQ
1715 * Postcopy requires that all target pages in a hostpage are dirty or
1716 * clean, not a mix. This function canonicalizes the bitmaps.
99e314eb 1717 *
3d0684b2
JQ
1718 * @ms: current migration state
1719 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1720 * otherwise we need to canonicalize partially dirty host pages
1721 * @block: block that contains the page we want to canonicalize
1722 * @pds: state for postcopy
99e314eb
DDAG
1723 */
1724static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1725 RAMBlock *block,
1726 PostcopyDiscardState *pds)
1727{
0d8ec885 1728 RAMState *rs = &ram_state;
99e314eb
DDAG
1729 unsigned long *bitmap;
1730 unsigned long *unsentmap;
29c59172 1731 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
99e314eb
DDAG
1732 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1733 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1734 unsigned long last = first + (len - 1);
1735 unsigned long run_start;
1736
29c59172
DDAG
1737 if (block->page_size == TARGET_PAGE_SIZE) {
1738 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1739 return;
1740 }
1741
99e314eb
DDAG
1742 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1743 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1744
1745 if (unsent_pass) {
1746 /* Find a sent page */
1747 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1748 } else {
1749 /* Find a dirty page */
1750 run_start = find_next_bit(bitmap, last + 1, first);
1751 }
1752
1753 while (run_start <= last) {
1754 bool do_fixup = false;
1755 unsigned long fixup_start_addr;
1756 unsigned long host_offset;
1757
1758 /*
1759 * If the start of this run of pages is in the middle of a host
1760 * page, then we need to fixup this host page.
1761 */
1762 host_offset = run_start % host_ratio;
1763 if (host_offset) {
1764 do_fixup = true;
1765 run_start -= host_offset;
1766 fixup_start_addr = run_start;
1767 /* For the next pass */
1768 run_start = run_start + host_ratio;
1769 } else {
1770 /* Find the end of this run */
1771 unsigned long run_end;
1772 if (unsent_pass) {
1773 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1774 } else {
1775 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1776 }
1777 /*
1778 * If the end isn't at the start of a host page, then the
1779 * run doesn't finish at the end of a host page
1780 * and we need to discard.
1781 */
1782 host_offset = run_end % host_ratio;
1783 if (host_offset) {
1784 do_fixup = true;
1785 fixup_start_addr = run_end - host_offset;
1786 /*
1787 * This host page has gone, the next loop iteration starts
1788 * from after the fixup
1789 */
1790 run_start = fixup_start_addr + host_ratio;
1791 } else {
1792 /*
1793 * No discards on this iteration, next loop starts from
1794 * next sent/dirty page
1795 */
1796 run_start = run_end + 1;
1797 }
1798 }
1799
1800 if (do_fixup) {
1801 unsigned long page;
1802
1803 /* Tell the destination to discard this page */
1804 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1805 /* For the unsent_pass we:
1806 * discard partially sent pages
1807 * For the !unsent_pass (dirty) we:
1808 * discard partially dirty pages that were sent
1809 * (any partially sent pages were already discarded
1810 * by the previous unsent_pass)
1811 */
1812 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1813 host_ratio);
1814 }
1815
1816 /* Clean up the bitmap */
1817 for (page = fixup_start_addr;
1818 page < fixup_start_addr + host_ratio; page++) {
1819 /* All pages in this host page are now not sent */
1820 set_bit(page, unsentmap);
1821
1822 /*
1823 * Remark them as dirty, updating the count for any pages
1824 * that weren't previously dirty.
1825 */
0d8ec885 1826 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
99e314eb
DDAG
1827 }
1828 }
1829
1830 if (unsent_pass) {
1831 /* Find the next sent page for the next iteration */
1832 run_start = find_next_zero_bit(unsentmap, last + 1,
1833 run_start);
1834 } else {
1835 /* Find the next dirty page for the next iteration */
1836 run_start = find_next_bit(bitmap, last + 1, run_start);
1837 }
1838 }
1839}
1840
3d0684b2
JQ
1841/**
1842 * postcopy_chuck_hostpages: discrad any partially sent host page
1843 *
99e314eb
DDAG
1844 * Utility for the outgoing postcopy code.
1845 *
1846 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
1847 * dirty host-page size chunks as all dirty. In this case the host-page
1848 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb 1849 *
3d0684b2
JQ
1850 * Returns zero on success
1851 *
1852 * @ms: current migration state
99e314eb
DDAG
1853 */
1854static int postcopy_chunk_hostpages(MigrationState *ms)
1855{
6f37bb8b 1856 RAMState *rs = &ram_state;
99e314eb
DDAG
1857 struct RAMBlock *block;
1858
99e314eb 1859 /* Easiest way to make sure we don't resume in the middle of a host-page */
6f37bb8b
JQ
1860 rs->last_seen_block = NULL;
1861 rs->last_sent_block = NULL;
1862 rs->last_offset = 0;
99e314eb
DDAG
1863
1864 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1865 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1866
1867 PostcopyDiscardState *pds =
1868 postcopy_discard_send_init(ms, first, block->idstr);
1869
1870 /* First pass: Discard all partially sent host pages */
1871 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1872 /*
1873 * Second pass: Ensure that all partially dirty host pages are made
1874 * fully dirty.
1875 */
1876 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1877
1878 postcopy_discard_send_finish(ms, pds);
1879 } /* ram_list loop */
1880
1881 return 0;
1882}
1883
3d0684b2
JQ
1884/**
1885 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1886 *
1887 * Returns zero on success
1888 *
e0b266f0
DDAG
1889 * Transmit the set of pages to be discarded after precopy to the target
1890 * these are pages that:
1891 * a) Have been previously transmitted but are now dirty again
1892 * b) Pages that have never been transmitted, this ensures that
1893 * any pages on the destination that have been mapped by background
1894 * tasks get discarded (transparent huge pages is the specific concern)
1895 * Hopefully this is pretty sparse
3d0684b2
JQ
1896 *
1897 * @ms: current migration state
e0b266f0
DDAG
1898 */
1899int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1900{
1901 int ret;
1902 unsigned long *bitmap, *unsentmap;
1903
1904 rcu_read_lock();
1905
1906 /* This should be our last sync, the src is now paused */
8d820d6f 1907 migration_bitmap_sync(&ram_state);
e0b266f0
DDAG
1908
1909 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1910 if (!unsentmap) {
1911 /* We don't have a safe way to resize the sentmap, so
1912 * if the bitmap was resized it will be NULL at this
1913 * point.
1914 */
1915 error_report("migration ram resized during precopy phase");
1916 rcu_read_unlock();
1917 return -EINVAL;
1918 }
1919
29c59172 1920 /* Deal with TPS != HPS and huge pages */
99e314eb
DDAG
1921 ret = postcopy_chunk_hostpages(ms);
1922 if (ret) {
1923 rcu_read_unlock();
1924 return ret;
1925 }
1926
e0b266f0
DDAG
1927 /*
1928 * Update the unsentmap to be unsentmap = unsentmap | dirty
1929 */
1930 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1931 bitmap_or(unsentmap, unsentmap, bitmap,
1932 last_ram_offset() >> TARGET_PAGE_BITS);
1933
1934
1935 trace_ram_postcopy_send_discard_bitmap();
1936#ifdef DEBUG_POSTCOPY
1937 ram_debug_dump_bitmap(unsentmap, true);
1938#endif
1939
1940 ret = postcopy_each_ram_send_discard(ms);
1941 rcu_read_unlock();
1942
1943 return ret;
1944}
1945
3d0684b2
JQ
1946/**
1947 * ram_discard_range: discard dirtied pages at the beginning of postcopy
e0b266f0 1948 *
3d0684b2 1949 * Returns zero on success
e0b266f0 1950 *
3d0684b2 1951 * @mis: current migration incoming state
36449157
JQ
1952 * @rbname: name of the RAMBlock of the request. NULL means the
1953 * same that last one.
3d0684b2
JQ
1954 * @start: RAMBlock starting page
1955 * @length: RAMBlock size
e0b266f0
DDAG
1956 */
1957int ram_discard_range(MigrationIncomingState *mis,
36449157 1958 const char *rbname,
e0b266f0
DDAG
1959 uint64_t start, size_t length)
1960{
1961 int ret = -1;
1962
36449157 1963 trace_ram_discard_range(rbname, start, length);
d3a5038c 1964
e0b266f0 1965 rcu_read_lock();
36449157 1966 RAMBlock *rb = qemu_ram_block_by_name(rbname);
e0b266f0
DDAG
1967
1968 if (!rb) {
36449157 1969 error_report("ram_discard_range: Failed to find block '%s'", rbname);
e0b266f0
DDAG
1970 goto err;
1971 }
1972
d3a5038c 1973 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
1974
1975err:
1976 rcu_read_unlock();
1977
1978 return ret;
1979}
1980
ceb4d168 1981static int ram_state_init(RAMState *rs)
56e93d26 1982{
56e93d26
JQ
1983 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1984
ceb4d168 1985 memset(rs, 0, sizeof(*rs));
108cfae0 1986 qemu_mutex_init(&rs->bitmap_mutex);
56e93d26
JQ
1987
1988 if (migrate_use_xbzrle()) {
1989 XBZRLE_cache_lock();
adb65dec 1990 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
56e93d26
JQ
1991 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1992 TARGET_PAGE_SIZE,
1993 TARGET_PAGE_SIZE);
1994 if (!XBZRLE.cache) {
1995 XBZRLE_cache_unlock();
1996 error_report("Error creating cache");
1997 return -1;
1998 }
1999 XBZRLE_cache_unlock();
2000
2001 /* We prefer not to abort if there is no memory */
2002 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2003 if (!XBZRLE.encoded_buf) {
2004 error_report("Error allocating encoded_buf");
2005 return -1;
2006 }
2007
2008 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2009 if (!XBZRLE.current_buf) {
2010 error_report("Error allocating current_buf");
2011 g_free(XBZRLE.encoded_buf);
2012 XBZRLE.encoded_buf = NULL;
2013 return -1;
2014 }
56e93d26
JQ
2015 }
2016
49877834
PB
2017 /* For memory_global_dirty_log_start below. */
2018 qemu_mutex_lock_iothread();
2019
56e93d26
JQ
2020 qemu_mutex_lock_ramlist();
2021 rcu_read_lock();
2022 bytes_transferred = 0;
6f37bb8b 2023 ram_state_reset(rs);
56e93d26 2024
f3f491fc 2025 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
0827b9e9
AA
2026 /* Skip setting bitmap if there is no RAM */
2027 if (ram_bytes_total()) {
2028 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2029 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
2030 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
2031
2032 if (migrate_postcopy_ram()) {
2033 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
2034 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
2035 }
f3f491fc
DDAG
2036 }
2037
56e93d26
JQ
2038 /*
2039 * Count the total number of pages used by ram blocks not including any
2040 * gaps due to alignment or unplugs.
2041 */
0d8ec885 2042 rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
56e93d26
JQ
2043
2044 memory_global_dirty_log_start();
8d820d6f 2045 migration_bitmap_sync(rs);
56e93d26 2046 qemu_mutex_unlock_ramlist();
49877834 2047 qemu_mutex_unlock_iothread();
a91246c9
HZ
2048 rcu_read_unlock();
2049
2050 return 0;
2051}
2052
3d0684b2
JQ
2053/*
2054 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
a91246c9
HZ
2055 * long-running RCU critical section. When rcu-reclaims in the code
2056 * start to become numerous it will be necessary to reduce the
2057 * granularity of these critical sections.
2058 */
2059
3d0684b2
JQ
2060/**
2061 * ram_save_setup: Setup RAM for migration
2062 *
2063 * Returns zero to indicate success and negative for error
2064 *
2065 * @f: QEMUFile where to send the data
2066 * @opaque: RAMState pointer
2067 */
a91246c9
HZ
2068static int ram_save_setup(QEMUFile *f, void *opaque)
2069{
6f37bb8b 2070 RAMState *rs = opaque;
a91246c9
HZ
2071 RAMBlock *block;
2072
2073 /* migration has already setup the bitmap, reuse it. */
2074 if (!migration_in_colo_state()) {
ceb4d168 2075 if (ram_state_init(rs) < 0) {
a91246c9
HZ
2076 return -1;
2077 }
2078 }
2079
2080 rcu_read_lock();
56e93d26
JQ
2081
2082 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2083
2084 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2085 qemu_put_byte(f, strlen(block->idstr));
2086 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2087 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2088 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2089 qemu_put_be64(f, block->page_size);
2090 }
56e93d26
JQ
2091 }
2092
2093 rcu_read_unlock();
2094
2095 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2096 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2097
2098 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2099
2100 return 0;
2101}
2102
3d0684b2
JQ
2103/**
2104 * ram_save_iterate: iterative stage for migration
2105 *
2106 * Returns zero to indicate success and negative for error
2107 *
2108 * @f: QEMUFile where to send the data
2109 * @opaque: RAMState pointer
2110 */
56e93d26
JQ
2111static int ram_save_iterate(QEMUFile *f, void *opaque)
2112{
6f37bb8b 2113 RAMState *rs = opaque;
56e93d26
JQ
2114 int ret;
2115 int i;
2116 int64_t t0;
5c90308f 2117 int done = 0;
56e93d26
JQ
2118
2119 rcu_read_lock();
6f37bb8b
JQ
2120 if (ram_list.version != rs->last_version) {
2121 ram_state_reset(rs);
56e93d26
JQ
2122 }
2123
2124 /* Read version before ram_list.blocks */
2125 smp_rmb();
2126
2127 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2128
2129 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2130 i = 0;
2131 while ((ret = qemu_file_rate_limit(f)) == 0) {
2132 int pages;
2133
6f37bb8b 2134 pages = ram_find_and_save_block(rs, f, false, &bytes_transferred);
56e93d26
JQ
2135 /* no more pages to sent */
2136 if (pages == 0) {
5c90308f 2137 done = 1;
56e93d26
JQ
2138 break;
2139 }
23b28c3c 2140 rs->iterations++;
070afca2 2141
56e93d26
JQ
2142 /* we want to check in the 1st loop, just in case it was the 1st time
2143 and we had to sync the dirty bitmap.
2144 qemu_get_clock_ns() is a bit expensive, so we only check each some
2145 iterations
2146 */
2147 if ((i & 63) == 0) {
2148 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2149 if (t1 > MAX_WAIT) {
55c4446b 2150 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2151 break;
2152 }
2153 }
2154 i++;
2155 }
2156 flush_compressed_data(f);
2157 rcu_read_unlock();
2158
2159 /*
2160 * Must occur before EOS (or any QEMUFile operation)
2161 * because of RDMA protocol.
2162 */
2163 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2164
2165 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2166 bytes_transferred += 8;
2167
2168 ret = qemu_file_get_error(f);
2169 if (ret < 0) {
2170 return ret;
2171 }
2172
5c90308f 2173 return done;
56e93d26
JQ
2174}
2175
3d0684b2
JQ
2176/**
2177 * ram_save_complete: function called to send the remaining amount of ram
2178 *
2179 * Returns zero to indicate success
2180 *
2181 * Called with iothread lock
2182 *
2183 * @f: QEMUFile where to send the data
2184 * @opaque: RAMState pointer
2185 */
56e93d26
JQ
2186static int ram_save_complete(QEMUFile *f, void *opaque)
2187{
6f37bb8b
JQ
2188 RAMState *rs = opaque;
2189
56e93d26
JQ
2190 rcu_read_lock();
2191
663e6c1d 2192 if (!migration_in_postcopy(migrate_get_current())) {
8d820d6f 2193 migration_bitmap_sync(rs);
663e6c1d 2194 }
56e93d26
JQ
2195
2196 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2197
2198 /* try transferring iterative blocks of memory */
2199
2200 /* flush all remaining blocks regardless of rate limiting */
2201 while (true) {
2202 int pages;
2203
6f37bb8b 2204 pages = ram_find_and_save_block(rs, f, !migration_in_colo_state(),
a91246c9 2205 &bytes_transferred);
56e93d26
JQ
2206 /* no more blocks to sent */
2207 if (pages == 0) {
2208 break;
2209 }
2210 }
2211
2212 flush_compressed_data(f);
2213 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2214
2215 rcu_read_unlock();
d09a6fde 2216
56e93d26
JQ
2217 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2218
2219 return 0;
2220}
2221
c31b098f
DDAG
2222static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2223 uint64_t *non_postcopiable_pending,
2224 uint64_t *postcopiable_pending)
56e93d26 2225{
8d820d6f 2226 RAMState *rs = opaque;
56e93d26
JQ
2227 uint64_t remaining_size;
2228
2229 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2230
663e6c1d
DDAG
2231 if (!migration_in_postcopy(migrate_get_current()) &&
2232 remaining_size < max_size) {
56e93d26
JQ
2233 qemu_mutex_lock_iothread();
2234 rcu_read_lock();
8d820d6f 2235 migration_bitmap_sync(rs);
56e93d26
JQ
2236 rcu_read_unlock();
2237 qemu_mutex_unlock_iothread();
2238 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2239 }
c31b098f
DDAG
2240
2241 /* We can do postcopy, and all the data is postcopiable */
2242 *postcopiable_pending += remaining_size;
56e93d26
JQ
2243}
2244
2245static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2246{
2247 unsigned int xh_len;
2248 int xh_flags;
063e760a 2249 uint8_t *loaded_data;
56e93d26
JQ
2250
2251 if (!xbzrle_decoded_buf) {
2252 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2253 }
063e760a 2254 loaded_data = xbzrle_decoded_buf;
56e93d26
JQ
2255
2256 /* extract RLE header */
2257 xh_flags = qemu_get_byte(f);
2258 xh_len = qemu_get_be16(f);
2259
2260 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2261 error_report("Failed to load XBZRLE page - wrong compression!");
2262 return -1;
2263 }
2264
2265 if (xh_len > TARGET_PAGE_SIZE) {
2266 error_report("Failed to load XBZRLE page - len overflow!");
2267 return -1;
2268 }
2269 /* load data and decode */
063e760a 2270 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2271
2272 /* decode RLE */
063e760a 2273 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2274 TARGET_PAGE_SIZE) == -1) {
2275 error_report("Failed to load XBZRLE page - decode error!");
2276 return -1;
2277 }
2278
2279 return 0;
2280}
2281
3d0684b2
JQ
2282/**
2283 * ram_block_from_stream: read a RAMBlock id from the migration stream
2284 *
2285 * Must be called from within a rcu critical section.
2286 *
56e93d26 2287 * Returns a pointer from within the RCU-protected ram_list.
a7180877 2288 *
3d0684b2
JQ
2289 * @f: QEMUFile where to read the data from
2290 * @flags: Page flags (mostly to see if it's a continuation of previous block)
a7180877 2291 */
3d0684b2 2292static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
56e93d26
JQ
2293{
2294 static RAMBlock *block = NULL;
2295 char id[256];
2296 uint8_t len;
2297
2298 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2299 if (!block) {
56e93d26
JQ
2300 error_report("Ack, bad migration stream!");
2301 return NULL;
2302 }
4c4bad48 2303 return block;
56e93d26
JQ
2304 }
2305
2306 len = qemu_get_byte(f);
2307 qemu_get_buffer(f, (uint8_t *)id, len);
2308 id[len] = 0;
2309
e3dd7493 2310 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2311 if (!block) {
2312 error_report("Can't find block %s", id);
2313 return NULL;
56e93d26
JQ
2314 }
2315
4c4bad48
HZ
2316 return block;
2317}
2318
2319static inline void *host_from_ram_block_offset(RAMBlock *block,
2320 ram_addr_t offset)
2321{
2322 if (!offset_in_ramblock(block, offset)) {
2323 return NULL;
2324 }
2325
2326 return block->host + offset;
56e93d26
JQ
2327}
2328
3d0684b2
JQ
2329/**
2330 * ram_handle_compressed: handle the zero page case
2331 *
56e93d26
JQ
2332 * If a page (or a whole RDMA chunk) has been
2333 * determined to be zero, then zap it.
3d0684b2
JQ
2334 *
2335 * @host: host address for the zero page
2336 * @ch: what the page is filled from. We only support zero
2337 * @size: size of the zero page
56e93d26
JQ
2338 */
2339void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2340{
2341 if (ch != 0 || !is_zero_range(host, size)) {
2342 memset(host, ch, size);
2343 }
2344}
2345
2346static void *do_data_decompress(void *opaque)
2347{
2348 DecompressParam *param = opaque;
2349 unsigned long pagesize;
33d151f4
LL
2350 uint8_t *des;
2351 int len;
56e93d26 2352
33d151f4 2353 qemu_mutex_lock(&param->mutex);
90e56fb4 2354 while (!param->quit) {
33d151f4
LL
2355 if (param->des) {
2356 des = param->des;
2357 len = param->len;
2358 param->des = 0;
2359 qemu_mutex_unlock(&param->mutex);
2360
56e93d26 2361 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2362 /* uncompress() will return failed in some case, especially
2363 * when the page is dirted when doing the compression, it's
2364 * not a problem because the dirty page will be retransferred
2365 * and uncompress() won't break the data in other pages.
2366 */
33d151f4
LL
2367 uncompress((Bytef *)des, &pagesize,
2368 (const Bytef *)param->compbuf, len);
73a8912b 2369
33d151f4
LL
2370 qemu_mutex_lock(&decomp_done_lock);
2371 param->done = true;
2372 qemu_cond_signal(&decomp_done_cond);
2373 qemu_mutex_unlock(&decomp_done_lock);
2374
2375 qemu_mutex_lock(&param->mutex);
2376 } else {
2377 qemu_cond_wait(&param->cond, &param->mutex);
2378 }
56e93d26 2379 }
33d151f4 2380 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2381
2382 return NULL;
2383}
2384
5533b2e9
LL
2385static void wait_for_decompress_done(void)
2386{
2387 int idx, thread_count;
2388
2389 if (!migrate_use_compression()) {
2390 return;
2391 }
2392
2393 thread_count = migrate_decompress_threads();
2394 qemu_mutex_lock(&decomp_done_lock);
2395 for (idx = 0; idx < thread_count; idx++) {
2396 while (!decomp_param[idx].done) {
2397 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2398 }
2399 }
2400 qemu_mutex_unlock(&decomp_done_lock);
2401}
2402
56e93d26
JQ
2403void migrate_decompress_threads_create(void)
2404{
2405 int i, thread_count;
2406
2407 thread_count = migrate_decompress_threads();
2408 decompress_threads = g_new0(QemuThread, thread_count);
2409 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2410 qemu_mutex_init(&decomp_done_lock);
2411 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2412 for (i = 0; i < thread_count; i++) {
2413 qemu_mutex_init(&decomp_param[i].mutex);
2414 qemu_cond_init(&decomp_param[i].cond);
2415 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2416 decomp_param[i].done = true;
90e56fb4 2417 decomp_param[i].quit = false;
56e93d26
JQ
2418 qemu_thread_create(decompress_threads + i, "decompress",
2419 do_data_decompress, decomp_param + i,
2420 QEMU_THREAD_JOINABLE);
2421 }
2422}
2423
2424void migrate_decompress_threads_join(void)
2425{
2426 int i, thread_count;
2427
56e93d26
JQ
2428 thread_count = migrate_decompress_threads();
2429 for (i = 0; i < thread_count; i++) {
2430 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2431 decomp_param[i].quit = true;
56e93d26
JQ
2432 qemu_cond_signal(&decomp_param[i].cond);
2433 qemu_mutex_unlock(&decomp_param[i].mutex);
2434 }
2435 for (i = 0; i < thread_count; i++) {
2436 qemu_thread_join(decompress_threads + i);
2437 qemu_mutex_destroy(&decomp_param[i].mutex);
2438 qemu_cond_destroy(&decomp_param[i].cond);
2439 g_free(decomp_param[i].compbuf);
2440 }
2441 g_free(decompress_threads);
2442 g_free(decomp_param);
56e93d26
JQ
2443 decompress_threads = NULL;
2444 decomp_param = NULL;
56e93d26
JQ
2445}
2446
c1bc6626 2447static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2448 void *host, int len)
2449{
2450 int idx, thread_count;
2451
2452 thread_count = migrate_decompress_threads();
73a8912b 2453 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2454 while (true) {
2455 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2456 if (decomp_param[idx].done) {
33d151f4
LL
2457 decomp_param[idx].done = false;
2458 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2459 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2460 decomp_param[idx].des = host;
2461 decomp_param[idx].len = len;
33d151f4
LL
2462 qemu_cond_signal(&decomp_param[idx].cond);
2463 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2464 break;
2465 }
2466 }
2467 if (idx < thread_count) {
2468 break;
73a8912b
LL
2469 } else {
2470 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2471 }
2472 }
73a8912b 2473 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2474}
2475
3d0684b2
JQ
2476/**
2477 * ram_postcopy_incoming_init: allocate postcopy data structures
2478 *
2479 * Returns 0 for success and negative if there was one error
2480 *
2481 * @mis: current migration incoming state
2482 *
2483 * Allocate data structures etc needed by incoming migration with
2484 * postcopy-ram. postcopy-ram's similarly names
2485 * postcopy_ram_incoming_init does the work.
1caddf8a
DDAG
2486 */
2487int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2488{
2489 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2490
2491 return postcopy_ram_incoming_init(mis, ram_pages);
2492}
2493
3d0684b2
JQ
2494/**
2495 * ram_load_postcopy: load a page in postcopy case
2496 *
2497 * Returns 0 for success or -errno in case of error
2498 *
a7180877
DDAG
2499 * Called in postcopy mode by ram_load().
2500 * rcu_read_lock is taken prior to this being called.
3d0684b2
JQ
2501 *
2502 * @f: QEMUFile where to send the data
a7180877
DDAG
2503 */
2504static int ram_load_postcopy(QEMUFile *f)
2505{
2506 int flags = 0, ret = 0;
2507 bool place_needed = false;
28abd200 2508 bool matching_page_sizes = false;
a7180877
DDAG
2509 MigrationIncomingState *mis = migration_incoming_get_current();
2510 /* Temporary page that is later 'placed' */
2511 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2512 void *last_host = NULL;
a3b6ff6d 2513 bool all_zero = false;
a7180877
DDAG
2514
2515 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2516 ram_addr_t addr;
2517 void *host = NULL;
2518 void *page_buffer = NULL;
2519 void *place_source = NULL;
df9ff5e1 2520 RAMBlock *block = NULL;
a7180877 2521 uint8_t ch;
a7180877
DDAG
2522
2523 addr = qemu_get_be64(f);
2524 flags = addr & ~TARGET_PAGE_MASK;
2525 addr &= TARGET_PAGE_MASK;
2526
2527 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2528 place_needed = false;
2529 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2530 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2531
2532 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2533 if (!host) {
2534 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2535 ret = -EINVAL;
2536 break;
2537 }
28abd200 2538 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2539 /*
28abd200
DDAG
2540 * Postcopy requires that we place whole host pages atomically;
2541 * these may be huge pages for RAMBlocks that are backed by
2542 * hugetlbfs.
a7180877
DDAG
2543 * To make it atomic, the data is read into a temporary page
2544 * that's moved into place later.
2545 * The migration protocol uses, possibly smaller, target-pages
2546 * however the source ensures it always sends all the components
2547 * of a host page in order.
2548 */
2549 page_buffer = postcopy_host_page +
28abd200 2550 ((uintptr_t)host & (block->page_size - 1));
a7180877 2551 /* If all TP are zero then we can optimise the place */
28abd200 2552 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2553 all_zero = true;
c53b7ddc
DDAG
2554 } else {
2555 /* not the 1st TP within the HP */
2556 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2557 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2558 host, last_host);
2559 ret = -EINVAL;
2560 break;
2561 }
a7180877
DDAG
2562 }
2563
c53b7ddc 2564
a7180877
DDAG
2565 /*
2566 * If it's the last part of a host page then we place the host
2567 * page
2568 */
2569 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2570 (block->page_size - 1)) == 0;
a7180877
DDAG
2571 place_source = postcopy_host_page;
2572 }
c53b7ddc 2573 last_host = host;
a7180877
DDAG
2574
2575 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2576 case RAM_SAVE_FLAG_COMPRESS:
2577 ch = qemu_get_byte(f);
2578 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2579 if (ch) {
2580 all_zero = false;
2581 }
2582 break;
2583
2584 case RAM_SAVE_FLAG_PAGE:
2585 all_zero = false;
2586 if (!place_needed || !matching_page_sizes) {
2587 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2588 } else {
2589 /* Avoids the qemu_file copy during postcopy, which is
2590 * going to do a copy later; can only do it when we
2591 * do this read in one go (matching page sizes)
2592 */
2593 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2594 TARGET_PAGE_SIZE);
2595 }
2596 break;
2597 case RAM_SAVE_FLAG_EOS:
2598 /* normal exit */
2599 break;
2600 default:
2601 error_report("Unknown combination of migration flags: %#x"
2602 " (postcopy mode)", flags);
2603 ret = -EINVAL;
2604 }
2605
2606 if (place_needed) {
2607 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2608 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2609
a7180877 2610 if (all_zero) {
df9ff5e1
DDAG
2611 ret = postcopy_place_page_zero(mis, place_dest,
2612 block->page_size);
a7180877 2613 } else {
df9ff5e1
DDAG
2614 ret = postcopy_place_page(mis, place_dest,
2615 place_source, block->page_size);
a7180877
DDAG
2616 }
2617 }
2618 if (!ret) {
2619 ret = qemu_file_get_error(f);
2620 }
2621 }
2622
2623 return ret;
2624}
2625
56e93d26
JQ
2626static int ram_load(QEMUFile *f, void *opaque, int version_id)
2627{
2628 int flags = 0, ret = 0;
2629 static uint64_t seq_iter;
2630 int len = 0;
a7180877
DDAG
2631 /*
2632 * If system is running in postcopy mode, page inserts to host memory must
2633 * be atomic
2634 */
2635 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
ef08fb38
DDAG
2636 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2637 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
56e93d26
JQ
2638
2639 seq_iter++;
2640
2641 if (version_id != 4) {
2642 ret = -EINVAL;
2643 }
2644
2645 /* This RCU critical section can be very long running.
2646 * When RCU reclaims in the code start to become numerous,
2647 * it will be necessary to reduce the granularity of this
2648 * critical section.
2649 */
2650 rcu_read_lock();
a7180877
DDAG
2651
2652 if (postcopy_running) {
2653 ret = ram_load_postcopy(f);
2654 }
2655
2656 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2657 ram_addr_t addr, total_ram_bytes;
a776aa15 2658 void *host = NULL;
56e93d26
JQ
2659 uint8_t ch;
2660
2661 addr = qemu_get_be64(f);
2662 flags = addr & ~TARGET_PAGE_MASK;
2663 addr &= TARGET_PAGE_MASK;
2664
a776aa15
DDAG
2665 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2666 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2667 RAMBlock *block = ram_block_from_stream(f, flags);
2668
2669 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2670 if (!host) {
2671 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2672 ret = -EINVAL;
2673 break;
2674 }
2675 }
2676
56e93d26
JQ
2677 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2678 case RAM_SAVE_FLAG_MEM_SIZE:
2679 /* Synchronize RAM block list */
2680 total_ram_bytes = addr;
2681 while (!ret && total_ram_bytes) {
2682 RAMBlock *block;
56e93d26
JQ
2683 char id[256];
2684 ram_addr_t length;
2685
2686 len = qemu_get_byte(f);
2687 qemu_get_buffer(f, (uint8_t *)id, len);
2688 id[len] = 0;
2689 length = qemu_get_be64(f);
2690
e3dd7493
DDAG
2691 block = qemu_ram_block_by_name(id);
2692 if (block) {
2693 if (length != block->used_length) {
2694 Error *local_err = NULL;
56e93d26 2695
fa53a0e5 2696 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2697 &local_err);
2698 if (local_err) {
2699 error_report_err(local_err);
56e93d26 2700 }
56e93d26 2701 }
ef08fb38
DDAG
2702 /* For postcopy we need to check hugepage sizes match */
2703 if (postcopy_advised &&
2704 block->page_size != qemu_host_page_size) {
2705 uint64_t remote_page_size = qemu_get_be64(f);
2706 if (remote_page_size != block->page_size) {
2707 error_report("Mismatched RAM page size %s "
2708 "(local) %zd != %" PRId64,
2709 id, block->page_size,
2710 remote_page_size);
2711 ret = -EINVAL;
2712 }
2713 }
e3dd7493
DDAG
2714 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2715 block->idstr);
2716 } else {
56e93d26
JQ
2717 error_report("Unknown ramblock \"%s\", cannot "
2718 "accept migration", id);
2719 ret = -EINVAL;
2720 }
2721
2722 total_ram_bytes -= length;
2723 }
2724 break;
a776aa15 2725
56e93d26 2726 case RAM_SAVE_FLAG_COMPRESS:
56e93d26
JQ
2727 ch = qemu_get_byte(f);
2728 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2729 break;
a776aa15 2730
56e93d26 2731 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2732 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2733 break;
56e93d26 2734
a776aa15 2735 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2736 len = qemu_get_be32(f);
2737 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2738 error_report("Invalid compressed data length: %d", len);
2739 ret = -EINVAL;
2740 break;
2741 }
c1bc6626 2742 decompress_data_with_multi_threads(f, host, len);
56e93d26 2743 break;
a776aa15 2744
56e93d26 2745 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2746 if (load_xbzrle(f, addr, host) < 0) {
2747 error_report("Failed to decompress XBZRLE page at "
2748 RAM_ADDR_FMT, addr);
2749 ret = -EINVAL;
2750 break;
2751 }
2752 break;
2753 case RAM_SAVE_FLAG_EOS:
2754 /* normal exit */
2755 break;
2756 default:
2757 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2758 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2759 } else {
2760 error_report("Unknown combination of migration flags: %#x",
2761 flags);
2762 ret = -EINVAL;
2763 }
2764 }
2765 if (!ret) {
2766 ret = qemu_file_get_error(f);
2767 }
2768 }
2769
5533b2e9 2770 wait_for_decompress_done();
56e93d26 2771 rcu_read_unlock();
55c4446b 2772 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
2773 return ret;
2774}
2775
2776static SaveVMHandlers savevm_ram_handlers = {
2777 .save_live_setup = ram_save_setup,
2778 .save_live_iterate = ram_save_iterate,
763c906b 2779 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 2780 .save_live_complete_precopy = ram_save_complete,
56e93d26
JQ
2781 .save_live_pending = ram_save_pending,
2782 .load_state = ram_load,
6ad2a215 2783 .cleanup = ram_migration_cleanup,
56e93d26
JQ
2784};
2785
2786void ram_mig_init(void)
2787{
2788 qemu_mutex_init(&XBZRLE.lock);
6f37bb8b 2789 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
56e93d26 2790}