]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
ram: Move last_req_rb to RAMState
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
1393a485 28#include "qemu/osdep.h"
33c11879
PB
29#include "qemu-common.h"
30#include "cpu.h"
56e93d26 31#include <zlib.h>
4addcd4f 32#include "qapi-event.h"
f348b6d1 33#include "qemu/cutils.h"
56e93d26
JQ
34#include "qemu/bitops.h"
35#include "qemu/bitmap.h"
7205c9ec
JQ
36#include "qemu/timer.h"
37#include "qemu/main-loop.h"
56e93d26 38#include "migration/migration.h"
e0b266f0 39#include "migration/postcopy-ram.h"
56e93d26
JQ
40#include "exec/address-spaces.h"
41#include "migration/page_cache.h"
56e93d26 42#include "qemu/error-report.h"
56e93d26 43#include "trace.h"
56e93d26 44#include "exec/ram_addr.h"
56e93d26 45#include "qemu/rcu_queue.h"
a91246c9 46#include "migration/colo.h"
56e93d26 47
56e93d26
JQ
48/***********************************************************/
49/* ram save/restore */
50
51#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
52#define RAM_SAVE_FLAG_COMPRESS 0x02
53#define RAM_SAVE_FLAG_MEM_SIZE 0x04
54#define RAM_SAVE_FLAG_PAGE 0x08
55#define RAM_SAVE_FLAG_EOS 0x10
56#define RAM_SAVE_FLAG_CONTINUE 0x20
57#define RAM_SAVE_FLAG_XBZRLE 0x40
58/* 0x80 is reserved in migration.h start with 0x100 next */
59#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
60
adb65dec 61static uint8_t *ZERO_TARGET_PAGE;
56e93d26
JQ
62
63static inline bool is_zero_range(uint8_t *p, uint64_t size)
64{
a1febc49 65 return buffer_is_zero(p, size);
56e93d26
JQ
66}
67
68/* struct contains XBZRLE cache and a static page
69 used by the compression */
70static struct {
71 /* buffer used for XBZRLE encoding */
72 uint8_t *encoded_buf;
73 /* buffer for storing page content */
74 uint8_t *current_buf;
75 /* Cache for XBZRLE, Protected by lock. */
76 PageCache *cache;
77 QemuMutex lock;
78} XBZRLE;
79
80/* buffer used for XBZRLE decoding */
81static uint8_t *xbzrle_decoded_buf;
82
83static void XBZRLE_cache_lock(void)
84{
85 if (migrate_use_xbzrle())
86 qemu_mutex_lock(&XBZRLE.lock);
87}
88
89static void XBZRLE_cache_unlock(void)
90{
91 if (migrate_use_xbzrle())
92 qemu_mutex_unlock(&XBZRLE.lock);
93}
94
3d0684b2
JQ
95/**
96 * xbzrle_cache_resize: resize the xbzrle cache
97 *
98 * This function is called from qmp_migrate_set_cache_size in main
99 * thread, possibly while a migration is in progress. A running
100 * migration may be using the cache and might finish during this call,
101 * hence changes to the cache are protected by XBZRLE.lock().
102 *
103 * Returns the new_size or negative in case of error.
104 *
105 * @new_size: new cache size
56e93d26
JQ
106 */
107int64_t xbzrle_cache_resize(int64_t new_size)
108{
109 PageCache *new_cache;
110 int64_t ret;
111
112 if (new_size < TARGET_PAGE_SIZE) {
113 return -1;
114 }
115
116 XBZRLE_cache_lock();
117
118 if (XBZRLE.cache != NULL) {
119 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
120 goto out_new_size;
121 }
122 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
123 TARGET_PAGE_SIZE);
124 if (!new_cache) {
125 error_report("Error creating cache");
126 ret = -1;
127 goto out;
128 }
129
130 cache_fini(XBZRLE.cache);
131 XBZRLE.cache = new_cache;
132 }
133
134out_new_size:
135 ret = pow2floor(new_size);
136out:
137 XBZRLE_cache_unlock();
138 return ret;
139}
140
eb859c53
JQ
141struct RAMBitmap {
142 struct rcu_head rcu;
143 /* Main migration bitmap */
144 unsigned long *bmap;
145 /* bitmap of pages that haven't been sent even once
146 * only maintained and used in postcopy at the moment
147 * where it's used to send the dirtymap at the start
148 * of the postcopy phase
149 */
150 unsigned long *unsentmap;
151};
152typedef struct RAMBitmap RAMBitmap;
153
6f37bb8b
JQ
154/* State of RAM for migration */
155struct RAMState {
156 /* Last block that we have visited searching for dirty pages */
157 RAMBlock *last_seen_block;
158 /* Last block from where we have sent data */
159 RAMBlock *last_sent_block;
160 /* Last offset we have sent data from */
161 ram_addr_t last_offset;
162 /* last ram version we have seen */
163 uint32_t last_version;
164 /* We are in the first round */
165 bool ram_bulk_stage;
8d820d6f
JQ
166 /* How many times we have dirty too many pages */
167 int dirty_rate_high_cnt;
5a987738
JQ
168 /* How many times we have synchronized the bitmap */
169 uint64_t bitmap_sync_count;
f664da80
JQ
170 /* these variables are used for bitmap sync */
171 /* last time we did a full bitmap_sync */
172 int64_t time_last_bitmap_sync;
eac74159 173 /* bytes transferred at start_time */
c4bdf0cf 174 uint64_t bytes_xfer_prev;
a66cd90c 175 /* number of dirty pages since start_time */
68908ed6 176 uint64_t num_dirty_pages_period;
b5833fde
JQ
177 /* xbzrle misses since the beginning of the period */
178 uint64_t xbzrle_cache_miss_prev;
36040d9c
JQ
179 /* number of iterations at the beginning of period */
180 uint64_t iterations_prev;
f7ccd61b
JQ
181 /* Accounting fields */
182 /* number of zero pages. It used to be pages filled by the same char. */
183 uint64_t zero_pages;
b4d1c6e7
JQ
184 /* number of normal transferred pages */
185 uint64_t norm_pages;
23b28c3c
JQ
186 /* Iterations since start */
187 uint64_t iterations;
f36ada95
JQ
188 /* xbzrle transmitted bytes. Notice that this is with
189 * compression, they can't be calculated from the pages */
07ed50a2 190 uint64_t xbzrle_bytes;
f36ada95
JQ
191 /* xbzrle transmmited pages */
192 uint64_t xbzrle_pages;
544c36f1
JQ
193 /* xbzrle number of cache miss */
194 uint64_t xbzrle_cache_miss;
b07016b6
JQ
195 /* xbzrle miss rate */
196 double xbzrle_cache_miss_rate;
180f61f7
JQ
197 /* xbzrle number of overflows */
198 uint64_t xbzrle_overflows;
0d8ec885
JQ
199 /* number of dirty bits in the bitmap */
200 uint64_t migration_dirty_pages;
2f4fde93
JQ
201 /* total number of bytes transferred */
202 uint64_t bytes_transferred;
108cfae0
JQ
203 /* protects modification of the bitmap */
204 QemuMutex bitmap_mutex;
eb859c53
JQ
205 /* Ram Bitmap protected by RCU */
206 RAMBitmap *ram_bitmap;
68a098f3
JQ
207 /* The RAMBlock used in the last src_page_requests */
208 RAMBlock *last_req_rb;
6f37bb8b
JQ
209};
210typedef struct RAMState RAMState;
211
212static RAMState ram_state;
213
56e93d26
JQ
214uint64_t dup_mig_pages_transferred(void)
215{
f7ccd61b 216 return ram_state.zero_pages;
56e93d26
JQ
217}
218
56e93d26
JQ
219uint64_t norm_mig_pages_transferred(void)
220{
b4d1c6e7 221 return ram_state.norm_pages;
56e93d26
JQ
222}
223
224uint64_t xbzrle_mig_bytes_transferred(void)
225{
07ed50a2 226 return ram_state.xbzrle_bytes;
56e93d26
JQ
227}
228
229uint64_t xbzrle_mig_pages_transferred(void)
230{
f36ada95 231 return ram_state.xbzrle_pages;
56e93d26
JQ
232}
233
234uint64_t xbzrle_mig_pages_cache_miss(void)
235{
544c36f1 236 return ram_state.xbzrle_cache_miss;
56e93d26
JQ
237}
238
239double xbzrle_mig_cache_miss_rate(void)
240{
b07016b6 241 return ram_state.xbzrle_cache_miss_rate;
56e93d26
JQ
242}
243
244uint64_t xbzrle_mig_pages_overflow(void)
245{
180f61f7 246 return ram_state.xbzrle_overflows;
56e93d26
JQ
247}
248
9edabd4d 249uint64_t ram_bytes_transferred(void)
0d8ec885 250{
9edabd4d 251 return ram_state.bytes_transferred;
0d8ec885
JQ
252}
253
9edabd4d 254uint64_t ram_bytes_remaining(void)
2f4fde93 255{
9edabd4d 256 return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
2f4fde93
JQ
257}
258
b8fb8cb7
DDAG
259/* used by the search for pages to send */
260struct PageSearchStatus {
261 /* Current block being searched */
262 RAMBlock *block;
263 /* Current offset to search from */
264 ram_addr_t offset;
265 /* Set once we wrap around */
266 bool complete_round;
267};
268typedef struct PageSearchStatus PageSearchStatus;
269
56e93d26 270struct CompressParam {
56e93d26 271 bool done;
90e56fb4 272 bool quit;
56e93d26
JQ
273 QEMUFile *file;
274 QemuMutex mutex;
275 QemuCond cond;
276 RAMBlock *block;
277 ram_addr_t offset;
278};
279typedef struct CompressParam CompressParam;
280
281struct DecompressParam {
73a8912b 282 bool done;
90e56fb4 283 bool quit;
56e93d26
JQ
284 QemuMutex mutex;
285 QemuCond cond;
286 void *des;
d341d9f3 287 uint8_t *compbuf;
56e93d26
JQ
288 int len;
289};
290typedef struct DecompressParam DecompressParam;
291
292static CompressParam *comp_param;
293static QemuThread *compress_threads;
294/* comp_done_cond is used to wake up the migration thread when
295 * one of the compression threads has finished the compression.
296 * comp_done_lock is used to co-work with comp_done_cond.
297 */
0d9f9a5c
LL
298static QemuMutex comp_done_lock;
299static QemuCond comp_done_cond;
56e93d26
JQ
300/* The empty QEMUFileOps will be used by file in CompressParam */
301static const QEMUFileOps empty_ops = { };
302
303static bool compression_switch;
56e93d26
JQ
304static DecompressParam *decomp_param;
305static QemuThread *decompress_threads;
73a8912b
LL
306static QemuMutex decomp_done_lock;
307static QemuCond decomp_done_cond;
56e93d26 308
a7a9a88f
LL
309static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
310 ram_addr_t offset);
56e93d26
JQ
311
312static void *do_data_compress(void *opaque)
313{
314 CompressParam *param = opaque;
a7a9a88f
LL
315 RAMBlock *block;
316 ram_addr_t offset;
56e93d26 317
a7a9a88f 318 qemu_mutex_lock(&param->mutex);
90e56fb4 319 while (!param->quit) {
a7a9a88f
LL
320 if (param->block) {
321 block = param->block;
322 offset = param->offset;
323 param->block = NULL;
324 qemu_mutex_unlock(&param->mutex);
325
326 do_compress_ram_page(param->file, block, offset);
327
0d9f9a5c 328 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 329 param->done = true;
0d9f9a5c
LL
330 qemu_cond_signal(&comp_done_cond);
331 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
332
333 qemu_mutex_lock(&param->mutex);
334 } else {
56e93d26
JQ
335 qemu_cond_wait(&param->cond, &param->mutex);
336 }
56e93d26 337 }
a7a9a88f 338 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
339
340 return NULL;
341}
342
343static inline void terminate_compression_threads(void)
344{
345 int idx, thread_count;
346
347 thread_count = migrate_compress_threads();
3d0684b2 348
56e93d26
JQ
349 for (idx = 0; idx < thread_count; idx++) {
350 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 351 comp_param[idx].quit = true;
56e93d26
JQ
352 qemu_cond_signal(&comp_param[idx].cond);
353 qemu_mutex_unlock(&comp_param[idx].mutex);
354 }
355}
356
357void migrate_compress_threads_join(void)
358{
359 int i, thread_count;
360
361 if (!migrate_use_compression()) {
362 return;
363 }
364 terminate_compression_threads();
365 thread_count = migrate_compress_threads();
366 for (i = 0; i < thread_count; i++) {
367 qemu_thread_join(compress_threads + i);
368 qemu_fclose(comp_param[i].file);
369 qemu_mutex_destroy(&comp_param[i].mutex);
370 qemu_cond_destroy(&comp_param[i].cond);
371 }
0d9f9a5c
LL
372 qemu_mutex_destroy(&comp_done_lock);
373 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
374 g_free(compress_threads);
375 g_free(comp_param);
56e93d26
JQ
376 compress_threads = NULL;
377 comp_param = NULL;
56e93d26
JQ
378}
379
380void migrate_compress_threads_create(void)
381{
382 int i, thread_count;
383
384 if (!migrate_use_compression()) {
385 return;
386 }
56e93d26
JQ
387 compression_switch = true;
388 thread_count = migrate_compress_threads();
389 compress_threads = g_new0(QemuThread, thread_count);
390 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
391 qemu_cond_init(&comp_done_cond);
392 qemu_mutex_init(&comp_done_lock);
56e93d26 393 for (i = 0; i < thread_count; i++) {
e110aa91
C
394 /* comp_param[i].file is just used as a dummy buffer to save data,
395 * set its ops to empty.
56e93d26
JQ
396 */
397 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
398 comp_param[i].done = true;
90e56fb4 399 comp_param[i].quit = false;
56e93d26
JQ
400 qemu_mutex_init(&comp_param[i].mutex);
401 qemu_cond_init(&comp_param[i].cond);
402 qemu_thread_create(compress_threads + i, "compress",
403 do_data_compress, comp_param + i,
404 QEMU_THREAD_JOINABLE);
405 }
406}
407
408/**
3d0684b2 409 * save_page_header: write page header to wire
56e93d26
JQ
410 *
411 * If this is the 1st block, it also writes the block identification
412 *
3d0684b2 413 * Returns the number of bytes written
56e93d26
JQ
414 *
415 * @f: QEMUFile where to send the data
416 * @block: block that contains the page we want to send
417 * @offset: offset inside the block for the page
418 * in the lower bits, it contains flags
419 */
420static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
421{
9f5f380b 422 size_t size, len;
56e93d26
JQ
423
424 qemu_put_be64(f, offset);
425 size = 8;
426
427 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b
LL
428 len = strlen(block->idstr);
429 qemu_put_byte(f, len);
430 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
431 size += 1 + len;
56e93d26
JQ
432 }
433 return size;
434}
435
3d0684b2
JQ
436/**
437 * mig_throttle_guest_down: throotle down the guest
438 *
439 * Reduce amount of guest cpu execution to hopefully slow down memory
440 * writes. If guest dirty memory rate is reduced below the rate at
441 * which we can transfer pages to the destination then we should be
442 * able to complete migration. Some workloads dirty memory way too
443 * fast and will not effectively converge, even with auto-converge.
070afca2
JH
444 */
445static void mig_throttle_guest_down(void)
446{
447 MigrationState *s = migrate_get_current();
2594f56d
DB
448 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
449 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
450
451 /* We have not started throttling yet. Let's start it. */
452 if (!cpu_throttle_active()) {
453 cpu_throttle_set(pct_initial);
454 } else {
455 /* Throttling already on, just increase the rate */
456 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
457 }
458}
459
3d0684b2
JQ
460/**
461 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
462 *
6f37bb8b 463 * @rs: current RAM state
3d0684b2
JQ
464 * @current_addr: address for the zero page
465 *
466 * Update the xbzrle cache to reflect a page that's been sent as all 0.
56e93d26
JQ
467 * The important thing is that a stale (not-yet-0'd) page be replaced
468 * by the new data.
469 * As a bonus, if the page wasn't in the cache it gets added so that
3d0684b2 470 * when a small write is made into the 0'd page it gets XBZRLE sent.
56e93d26 471 */
6f37bb8b 472static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
56e93d26 473{
6f37bb8b 474 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
56e93d26
JQ
475 return;
476 }
477
478 /* We don't care if this fails to allocate a new cache page
479 * as long as it updated an old one */
480 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
5a987738 481 rs->bitmap_sync_count);
56e93d26
JQ
482}
483
484#define ENCODING_FLAG_XBZRLE 0x1
485
486/**
487 * save_xbzrle_page: compress and send current page
488 *
489 * Returns: 1 means that we wrote the page
490 * 0 means that page is identical to the one already sent
491 * -1 means that xbzrle would be longer than normal
492 *
5a987738 493 * @rs: current RAM state
56e93d26 494 * @f: QEMUFile where to send the data
3d0684b2
JQ
495 * @current_data: pointer to the address of the page contents
496 * @current_addr: addr of the page
56e93d26
JQ
497 * @block: block that contains the page we want to send
498 * @offset: offset inside the block for the page
499 * @last_stage: if we are at the completion stage
56e93d26 500 */
5a987738 501static int save_xbzrle_page(RAMState *rs, QEMUFile *f, uint8_t **current_data,
56e93d26 502 ram_addr_t current_addr, RAMBlock *block,
072c2511 503 ram_addr_t offset, bool last_stage)
56e93d26
JQ
504{
505 int encoded_len = 0, bytes_xbzrle;
506 uint8_t *prev_cached_page;
507
5a987738 508 if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
544c36f1 509 rs->xbzrle_cache_miss++;
56e93d26
JQ
510 if (!last_stage) {
511 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
5a987738 512 rs->bitmap_sync_count) == -1) {
56e93d26
JQ
513 return -1;
514 } else {
515 /* update *current_data when the page has been
516 inserted into cache */
517 *current_data = get_cached_data(XBZRLE.cache, current_addr);
518 }
519 }
520 return -1;
521 }
522
523 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
524
525 /* save current buffer into memory */
526 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
527
528 /* XBZRLE encoding (if there is no overflow) */
529 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
530 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
531 TARGET_PAGE_SIZE);
532 if (encoded_len == 0) {
55c4446b 533 trace_save_xbzrle_page_skipping();
56e93d26
JQ
534 return 0;
535 } else if (encoded_len == -1) {
55c4446b 536 trace_save_xbzrle_page_overflow();
180f61f7 537 rs->xbzrle_overflows++;
56e93d26
JQ
538 /* update data in the cache */
539 if (!last_stage) {
540 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
541 *current_data = prev_cached_page;
542 }
543 return -1;
544 }
545
546 /* we need to update the data in the cache, in order to get the same data */
547 if (!last_stage) {
548 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
549 }
550
551 /* Send XBZRLE based compressed page */
552 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
553 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
554 qemu_put_be16(f, encoded_len);
555 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
556 bytes_xbzrle += encoded_len + 1 + 2;
f36ada95 557 rs->xbzrle_pages++;
07ed50a2 558 rs->xbzrle_bytes += bytes_xbzrle;
072c2511 559 rs->bytes_transferred += bytes_xbzrle;
56e93d26
JQ
560
561 return 1;
562}
563
3d0684b2
JQ
564/**
565 * migration_bitmap_find_dirty: find the next dirty page from start
f3f491fc 566 *
3d0684b2
JQ
567 * Called with rcu_read_lock() to protect migration_bitmap
568 *
569 * Returns the byte offset within memory region of the start of a dirty page
570 *
6f37bb8b 571 * @rs: current RAM state
3d0684b2
JQ
572 * @rb: RAMBlock where to search for dirty pages
573 * @start: starting address (typically so we can continue from previous page)
574 * @ram_addr_abs: pointer into which to store the address of the dirty page
575 * within the global ram_addr space
f3f491fc 576 */
56e93d26 577static inline
6f37bb8b 578ram_addr_t migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
a82d593b
DDAG
579 ram_addr_t start,
580 ram_addr_t *ram_addr_abs)
56e93d26 581{
2f68e399 582 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
56e93d26 583 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
2f68e399
DDAG
584 uint64_t rb_size = rb->used_length;
585 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
2ff64038 586 unsigned long *bitmap;
56e93d26
JQ
587
588 unsigned long next;
589
eb859c53 590 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
6f37bb8b 591 if (rs->ram_bulk_stage && nr > base) {
56e93d26
JQ
592 next = nr + 1;
593 } else {
2ff64038 594 next = find_next_bit(bitmap, size, nr);
56e93d26
JQ
595 }
596
f3f491fc 597 *ram_addr_abs = next << TARGET_PAGE_BITS;
56e93d26
JQ
598 return (next - base) << TARGET_PAGE_BITS;
599}
600
0d8ec885 601static inline bool migration_bitmap_clear_dirty(RAMState *rs, ram_addr_t addr)
a82d593b
DDAG
602{
603 bool ret;
604 int nr = addr >> TARGET_PAGE_BITS;
eb859c53 605 unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
a82d593b
DDAG
606
607 ret = test_and_clear_bit(nr, bitmap);
608
609 if (ret) {
0d8ec885 610 rs->migration_dirty_pages--;
a82d593b
DDAG
611 }
612 return ret;
613}
614
a66cd90c
JQ
615static void migration_bitmap_sync_range(RAMState *rs, ram_addr_t start,
616 ram_addr_t length)
56e93d26 617{
2ff64038 618 unsigned long *bitmap;
eb859c53 619 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
0d8ec885
JQ
620 rs->migration_dirty_pages +=
621 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length,
622 &rs->num_dirty_pages_period);
56e93d26
JQ
623}
624
3d0684b2
JQ
625/**
626 * ram_pagesize_summary: calculate all the pagesizes of a VM
627 *
628 * Returns a summary bitmap of the page sizes of all RAMBlocks
629 *
630 * For VMs with just normal pages this is equivalent to the host page
631 * size. If it's got some huge pages then it's the OR of all the
632 * different page sizes.
e8ca1db2
DDAG
633 */
634uint64_t ram_pagesize_summary(void)
635{
636 RAMBlock *block;
637 uint64_t summary = 0;
638
639 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
640 summary |= block->page_size;
641 }
642
643 return summary;
644}
645
8d820d6f 646static void migration_bitmap_sync(RAMState *rs)
56e93d26
JQ
647{
648 RAMBlock *block;
56e93d26
JQ
649 MigrationState *s = migrate_get_current();
650 int64_t end_time;
c4bdf0cf 651 uint64_t bytes_xfer_now;
56e93d26 652
5a987738 653 rs->bitmap_sync_count++;
56e93d26 654
eac74159
JQ
655 if (!rs->bytes_xfer_prev) {
656 rs->bytes_xfer_prev = ram_bytes_transferred();
56e93d26
JQ
657 }
658
f664da80
JQ
659 if (!rs->time_last_bitmap_sync) {
660 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56e93d26
JQ
661 }
662
663 trace_migration_bitmap_sync_start();
9c1f8f44 664 memory_global_dirty_log_sync();
56e93d26 665
108cfae0 666 qemu_mutex_lock(&rs->bitmap_mutex);
56e93d26
JQ
667 rcu_read_lock();
668 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
a66cd90c 669 migration_bitmap_sync_range(rs, block->offset, block->used_length);
56e93d26
JQ
670 }
671 rcu_read_unlock();
108cfae0 672 qemu_mutex_unlock(&rs->bitmap_mutex);
56e93d26 673
a66cd90c 674 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1ffb5dfd 675
56e93d26
JQ
676 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
677
678 /* more than 1 second = 1000 millisecons */
f664da80 679 if (end_time > rs->time_last_bitmap_sync + 1000) {
56e93d26
JQ
680 if (migrate_auto_converge()) {
681 /* The following detection logic can be refined later. For now:
682 Check to see if the dirtied bytes is 50% more than the approx.
683 amount of bytes that just got transferred since the last time we
070afca2
JH
684 were in this routine. If that happens twice, start or increase
685 throttling */
56e93d26 686 bytes_xfer_now = ram_bytes_transferred();
070afca2 687
56e93d26 688 if (s->dirty_pages_rate &&
a66cd90c 689 (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
eac74159 690 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
8d820d6f 691 (rs->dirty_rate_high_cnt++ >= 2)) {
56e93d26 692 trace_migration_throttle();
8d820d6f 693 rs->dirty_rate_high_cnt = 0;
070afca2 694 mig_throttle_guest_down();
56e93d26 695 }
eac74159 696 rs->bytes_xfer_prev = bytes_xfer_now;
56e93d26 697 }
070afca2 698
56e93d26 699 if (migrate_use_xbzrle()) {
23b28c3c 700 if (rs->iterations_prev != rs->iterations) {
b07016b6 701 rs->xbzrle_cache_miss_rate =
544c36f1 702 (double)(rs->xbzrle_cache_miss -
b5833fde 703 rs->xbzrle_cache_miss_prev) /
23b28c3c 704 (rs->iterations - rs->iterations_prev);
56e93d26 705 }
23b28c3c 706 rs->iterations_prev = rs->iterations;
544c36f1 707 rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
56e93d26 708 }
a66cd90c 709 s->dirty_pages_rate = rs->num_dirty_pages_period * 1000
f664da80 710 / (end_time - rs->time_last_bitmap_sync);
56e93d26 711 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
f664da80 712 rs->time_last_bitmap_sync = end_time;
a66cd90c 713 rs->num_dirty_pages_period = 0;
56e93d26 714 }
5a987738 715 s->dirty_sync_count = rs->bitmap_sync_count;
4addcd4f 716 if (migrate_use_events()) {
5a987738 717 qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
4addcd4f 718 }
56e93d26
JQ
719}
720
721/**
3d0684b2 722 * save_zero_page: send the zero page to the stream
56e93d26 723 *
3d0684b2 724 * Returns the number of pages written.
56e93d26 725 *
f7ccd61b 726 * @rs: current RAM state
56e93d26
JQ
727 * @f: QEMUFile where to send the data
728 * @block: block that contains the page we want to send
729 * @offset: offset inside the block for the page
730 * @p: pointer to the page
56e93d26 731 */
f7ccd61b 732static int save_zero_page(RAMState *rs, QEMUFile *f, RAMBlock *block,
072c2511 733 ram_addr_t offset, uint8_t *p)
56e93d26
JQ
734{
735 int pages = -1;
736
737 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
f7ccd61b 738 rs->zero_pages++;
072c2511
JQ
739 rs->bytes_transferred +=
740 save_page_header(f, block, offset | RAM_SAVE_FLAG_COMPRESS);
56e93d26 741 qemu_put_byte(f, 0);
072c2511 742 rs->bytes_transferred += 1;
56e93d26
JQ
743 pages = 1;
744 }
745
746 return pages;
747}
748
36449157 749static void ram_release_pages(MigrationState *ms, const char *rbname,
53f09a10
PB
750 uint64_t offset, int pages)
751{
752 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
753 return;
754 }
755
36449157 756 ram_discard_range(NULL, rbname, offset, pages << TARGET_PAGE_BITS);
53f09a10
PB
757}
758
56e93d26 759/**
3d0684b2 760 * ram_save_page: send the given page to the stream
56e93d26 761 *
3d0684b2 762 * Returns the number of pages written.
3fd3c4b3
DDAG
763 * < 0 - error
764 * >=0 - Number of pages written - this might legally be 0
765 * if xbzrle noticed the page was the same.
56e93d26 766 *
6f37bb8b 767 * @rs: current RAM state
3d0684b2 768 * @ms: current migration state
56e93d26
JQ
769 * @f: QEMUFile where to send the data
770 * @block: block that contains the page we want to send
771 * @offset: offset inside the block for the page
772 * @last_stage: if we are at the completion stage
56e93d26 773 */
6f37bb8b 774static int ram_save_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
072c2511 775 PageSearchStatus *pss, bool last_stage)
56e93d26
JQ
776{
777 int pages = -1;
778 uint64_t bytes_xmit;
779 ram_addr_t current_addr;
56e93d26
JQ
780 uint8_t *p;
781 int ret;
782 bool send_async = true;
a08f6890
HZ
783 RAMBlock *block = pss->block;
784 ram_addr_t offset = pss->offset;
56e93d26 785
2f68e399 786 p = block->host + offset;
56e93d26
JQ
787
788 /* In doubt sent page as normal */
789 bytes_xmit = 0;
790 ret = ram_control_save_page(f, block->offset,
791 offset, TARGET_PAGE_SIZE, &bytes_xmit);
792 if (bytes_xmit) {
072c2511 793 rs->bytes_transferred += bytes_xmit;
56e93d26
JQ
794 pages = 1;
795 }
796
797 XBZRLE_cache_lock();
798
799 current_addr = block->offset + offset;
800
6f37bb8b 801 if (block == rs->last_sent_block) {
56e93d26
JQ
802 offset |= RAM_SAVE_FLAG_CONTINUE;
803 }
804 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
805 if (ret != RAM_SAVE_CONTROL_DELAYED) {
806 if (bytes_xmit > 0) {
b4d1c6e7 807 rs->norm_pages++;
56e93d26 808 } else if (bytes_xmit == 0) {
f7ccd61b 809 rs->zero_pages++;
56e93d26
JQ
810 }
811 }
812 } else {
072c2511 813 pages = save_zero_page(rs, f, block, offset, p);
56e93d26
JQ
814 if (pages > 0) {
815 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
816 * page would be stale
817 */
6f37bb8b 818 xbzrle_cache_zero_page(rs, current_addr);
53f09a10 819 ram_release_pages(ms, block->idstr, pss->offset, pages);
6f37bb8b 820 } else if (!rs->ram_bulk_stage &&
9eb14766 821 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
5a987738 822 pages = save_xbzrle_page(rs, f, &p, current_addr, block,
072c2511 823 offset, last_stage);
56e93d26
JQ
824 if (!last_stage) {
825 /* Can't send this cached data async, since the cache page
826 * might get updated before it gets to the wire
827 */
828 send_async = false;
829 }
830 }
831 }
832
833 /* XBZRLE overflow or normal page */
834 if (pages == -1) {
072c2511 835 rs->bytes_transferred += save_page_header(f, block,
56e93d26
JQ
836 offset | RAM_SAVE_FLAG_PAGE);
837 if (send_async) {
53f09a10
PB
838 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
839 migrate_release_ram() &
840 migration_in_postcopy(ms));
56e93d26
JQ
841 } else {
842 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
843 }
072c2511 844 rs->bytes_transferred += TARGET_PAGE_SIZE;
56e93d26 845 pages = 1;
b4d1c6e7 846 rs->norm_pages++;
56e93d26
JQ
847 }
848
849 XBZRLE_cache_unlock();
850
851 return pages;
852}
853
a7a9a88f
LL
854static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
855 ram_addr_t offset)
56e93d26
JQ
856{
857 int bytes_sent, blen;
a7a9a88f 858 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 859
a7a9a88f 860 bytes_sent = save_page_header(f, block, offset |
56e93d26 861 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 862 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 863 migrate_compress_level());
b3be2896
LL
864 if (blen < 0) {
865 bytes_sent = 0;
866 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
867 error_report("compressed data failed!");
868 } else {
869 bytes_sent += blen;
53f09a10
PB
870 ram_release_pages(migrate_get_current(), block->idstr,
871 offset & TARGET_PAGE_MASK, 1);
b3be2896 872 }
56e93d26
JQ
873
874 return bytes_sent;
875}
876
2f4fde93 877static void flush_compressed_data(RAMState *rs, QEMUFile *f)
56e93d26
JQ
878{
879 int idx, len, thread_count;
880
881 if (!migrate_use_compression()) {
882 return;
883 }
884 thread_count = migrate_compress_threads();
a7a9a88f 885
0d9f9a5c 886 qemu_mutex_lock(&comp_done_lock);
56e93d26 887 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 888 while (!comp_param[idx].done) {
0d9f9a5c 889 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 890 }
a7a9a88f 891 }
0d9f9a5c 892 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
893
894 for (idx = 0; idx < thread_count; idx++) {
895 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 896 if (!comp_param[idx].quit) {
56e93d26 897 len = qemu_put_qemu_file(f, comp_param[idx].file);
2f4fde93 898 rs->bytes_transferred += len;
56e93d26 899 }
a7a9a88f 900 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
901 }
902}
903
904static inline void set_compress_params(CompressParam *param, RAMBlock *block,
905 ram_addr_t offset)
906{
907 param->block = block;
908 param->offset = offset;
909}
910
b4d1c6e7 911static int compress_page_with_multi_thread(RAMState *rs, QEMUFile *f,
072c2511 912 RAMBlock *block, ram_addr_t offset)
56e93d26
JQ
913{
914 int idx, thread_count, bytes_xmit = -1, pages = -1;
915
916 thread_count = migrate_compress_threads();
0d9f9a5c 917 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
918 while (true) {
919 for (idx = 0; idx < thread_count; idx++) {
920 if (comp_param[idx].done) {
a7a9a88f 921 comp_param[idx].done = false;
56e93d26 922 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
a7a9a88f 923 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 924 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
925 qemu_cond_signal(&comp_param[idx].cond);
926 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26 927 pages = 1;
b4d1c6e7 928 rs->norm_pages++;
072c2511 929 rs->bytes_transferred += bytes_xmit;
56e93d26
JQ
930 break;
931 }
932 }
933 if (pages > 0) {
934 break;
935 } else {
0d9f9a5c 936 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
937 }
938 }
0d9f9a5c 939 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
940
941 return pages;
942}
943
944/**
945 * ram_save_compressed_page: compress the given page and send it to the stream
946 *
3d0684b2 947 * Returns the number of pages written.
56e93d26 948 *
6f37bb8b 949 * @rs: current RAM state
3d0684b2 950 * @ms: current migration state
56e93d26
JQ
951 * @f: QEMUFile where to send the data
952 * @block: block that contains the page we want to send
953 * @offset: offset inside the block for the page
954 * @last_stage: if we are at the completion stage
56e93d26 955 */
6f37bb8b
JQ
956static int ram_save_compressed_page(RAMState *rs, MigrationState *ms,
957 QEMUFile *f,
072c2511 958 PageSearchStatus *pss, bool last_stage)
56e93d26
JQ
959{
960 int pages = -1;
fc50438e 961 uint64_t bytes_xmit = 0;
56e93d26 962 uint8_t *p;
fc50438e 963 int ret, blen;
a08f6890
HZ
964 RAMBlock *block = pss->block;
965 ram_addr_t offset = pss->offset;
56e93d26 966
2f68e399 967 p = block->host + offset;
56e93d26 968
56e93d26
JQ
969 ret = ram_control_save_page(f, block->offset,
970 offset, TARGET_PAGE_SIZE, &bytes_xmit);
971 if (bytes_xmit) {
072c2511 972 rs->bytes_transferred += bytes_xmit;
56e93d26
JQ
973 pages = 1;
974 }
56e93d26
JQ
975 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
976 if (ret != RAM_SAVE_CONTROL_DELAYED) {
977 if (bytes_xmit > 0) {
b4d1c6e7 978 rs->norm_pages++;
56e93d26 979 } else if (bytes_xmit == 0) {
f7ccd61b 980 rs->zero_pages++;
56e93d26
JQ
981 }
982 }
983 } else {
984 /* When starting the process of a new block, the first page of
985 * the block should be sent out before other pages in the same
986 * block, and all the pages in last block should have been sent
987 * out, keeping this order is important, because the 'cont' flag
988 * is used to avoid resending the block name.
989 */
6f37bb8b 990 if (block != rs->last_sent_block) {
2f4fde93 991 flush_compressed_data(rs, f);
072c2511 992 pages = save_zero_page(rs, f, block, offset, p);
56e93d26 993 if (pages == -1) {
fc50438e
LL
994 /* Make sure the first page is sent out before other pages */
995 bytes_xmit = save_page_header(f, block, offset |
996 RAM_SAVE_FLAG_COMPRESS_PAGE);
997 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
998 migrate_compress_level());
999 if (blen > 0) {
072c2511 1000 rs->bytes_transferred += bytes_xmit + blen;
b4d1c6e7 1001 rs->norm_pages++;
b3be2896 1002 pages = 1;
fc50438e
LL
1003 } else {
1004 qemu_file_set_error(f, blen);
1005 error_report("compressed data failed!");
b3be2896 1006 }
56e93d26 1007 }
53f09a10
PB
1008 if (pages > 0) {
1009 ram_release_pages(ms, block->idstr, pss->offset, pages);
1010 }
56e93d26 1011 } else {
fc50438e 1012 offset |= RAM_SAVE_FLAG_CONTINUE;
072c2511 1013 pages = save_zero_page(rs, f, block, offset, p);
56e93d26 1014 if (pages == -1) {
072c2511 1015 pages = compress_page_with_multi_thread(rs, f, block, offset);
53f09a10
PB
1016 } else {
1017 ram_release_pages(ms, block->idstr, pss->offset, pages);
56e93d26
JQ
1018 }
1019 }
1020 }
1021
1022 return pages;
1023}
1024
3d0684b2
JQ
1025/**
1026 * find_dirty_block: find the next dirty page and update any state
1027 * associated with the search process.
b9e60928 1028 *
3d0684b2 1029 * Returns if a page is found
b9e60928 1030 *
6f37bb8b 1031 * @rs: current RAM state
3d0684b2
JQ
1032 * @f: QEMUFile where to send the data
1033 * @pss: data about the state of the current dirty page scan
1034 * @again: set to false if the search has scanned the whole of RAM
1035 * @ram_addr_abs: pointer into which to store the address of the dirty page
1036 * within the global ram_addr space
b9e60928 1037 */
6f37bb8b 1038static bool find_dirty_block(RAMState *rs, QEMUFile *f, PageSearchStatus *pss,
f3f491fc 1039 bool *again, ram_addr_t *ram_addr_abs)
b9e60928 1040{
6f37bb8b 1041 pss->offset = migration_bitmap_find_dirty(rs, pss->block, pss->offset,
a82d593b 1042 ram_addr_abs);
6f37bb8b
JQ
1043 if (pss->complete_round && pss->block == rs->last_seen_block &&
1044 pss->offset >= rs->last_offset) {
b9e60928
DDAG
1045 /*
1046 * We've been once around the RAM and haven't found anything.
1047 * Give up.
1048 */
1049 *again = false;
1050 return false;
1051 }
1052 if (pss->offset >= pss->block->used_length) {
1053 /* Didn't find anything in this RAM Block */
1054 pss->offset = 0;
1055 pss->block = QLIST_NEXT_RCU(pss->block, next);
1056 if (!pss->block) {
1057 /* Hit the end of the list */
1058 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1059 /* Flag that we've looped */
1060 pss->complete_round = true;
6f37bb8b 1061 rs->ram_bulk_stage = false;
b9e60928
DDAG
1062 if (migrate_use_xbzrle()) {
1063 /* If xbzrle is on, stop using the data compression at this
1064 * point. In theory, xbzrle can do better than compression.
1065 */
2f4fde93 1066 flush_compressed_data(rs, f);
b9e60928
DDAG
1067 compression_switch = false;
1068 }
1069 }
1070 /* Didn't find anything this time, but try again on the new block */
1071 *again = true;
1072 return false;
1073 } else {
1074 /* Can go around again, but... */
1075 *again = true;
1076 /* We've found something so probably don't need to */
1077 return true;
1078 }
1079}
1080
3d0684b2
JQ
1081/**
1082 * unqueue_page: gets a page of the queue
1083 *
a82d593b 1084 * Helper for 'get_queued_page' - gets a page off the queue
a82d593b 1085 *
3d0684b2
JQ
1086 * Returns the block of the page (or NULL if none available)
1087 *
1088 * @ms: current migration state
1089 * @offset: used to return the offset within the RAMBlock
1090 * @ram_addr_abs: pointer into which to store the address of the dirty page
1091 * within the global ram_addr space
a82d593b
DDAG
1092 */
1093static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1094 ram_addr_t *ram_addr_abs)
1095{
1096 RAMBlock *block = NULL;
1097
1098 qemu_mutex_lock(&ms->src_page_req_mutex);
1099 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1100 struct MigrationSrcPageRequest *entry =
1101 QSIMPLEQ_FIRST(&ms->src_page_requests);
1102 block = entry->rb;
1103 *offset = entry->offset;
1104 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1105 TARGET_PAGE_MASK;
1106
1107 if (entry->len > TARGET_PAGE_SIZE) {
1108 entry->len -= TARGET_PAGE_SIZE;
1109 entry->offset += TARGET_PAGE_SIZE;
1110 } else {
1111 memory_region_unref(block->mr);
1112 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1113 g_free(entry);
1114 }
1115 }
1116 qemu_mutex_unlock(&ms->src_page_req_mutex);
1117
1118 return block;
1119}
1120
3d0684b2
JQ
1121/**
1122 * get_queued_page: unqueue a page from the postocpy requests
1123 *
1124 * Skips pages that are already sent (!dirty)
a82d593b 1125 *
3d0684b2 1126 * Returns if a queued page is found
a82d593b 1127 *
6f37bb8b 1128 * @rs: current RAM state
3d0684b2
JQ
1129 * @ms: current migration state
1130 * @pss: data about the state of the current dirty page scan
1131 * @ram_addr_abs: pointer into which to store the address of the dirty page
1132 * within the global ram_addr space
a82d593b 1133 */
6f37bb8b
JQ
1134static bool get_queued_page(RAMState *rs, MigrationState *ms,
1135 PageSearchStatus *pss,
a82d593b
DDAG
1136 ram_addr_t *ram_addr_abs)
1137{
1138 RAMBlock *block;
1139 ram_addr_t offset;
1140 bool dirty;
1141
1142 do {
1143 block = unqueue_page(ms, &offset, ram_addr_abs);
1144 /*
1145 * We're sending this page, and since it's postcopy nothing else
1146 * will dirty it, and we must make sure it doesn't get sent again
1147 * even if this queue request was received after the background
1148 * search already sent it.
1149 */
1150 if (block) {
1151 unsigned long *bitmap;
eb859c53 1152 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
a82d593b
DDAG
1153 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1154 if (!dirty) {
1155 trace_get_queued_page_not_dirty(
1156 block->idstr, (uint64_t)offset,
1157 (uint64_t)*ram_addr_abs,
1158 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
eb859c53 1159 atomic_rcu_read(&rs->ram_bitmap)->unsentmap));
a82d593b
DDAG
1160 } else {
1161 trace_get_queued_page(block->idstr,
1162 (uint64_t)offset,
1163 (uint64_t)*ram_addr_abs);
1164 }
1165 }
1166
1167 } while (block && !dirty);
1168
1169 if (block) {
1170 /*
1171 * As soon as we start servicing pages out of order, then we have
1172 * to kill the bulk stage, since the bulk stage assumes
1173 * in (migration_bitmap_find_and_reset_dirty) that every page is
1174 * dirty, that's no longer true.
1175 */
6f37bb8b 1176 rs->ram_bulk_stage = false;
a82d593b
DDAG
1177
1178 /*
1179 * We want the background search to continue from the queued page
1180 * since the guest is likely to want other pages near to the page
1181 * it just requested.
1182 */
1183 pss->block = block;
1184 pss->offset = offset;
1185 }
1186
1187 return !!block;
1188}
1189
6c595cde 1190/**
5e58f968
JQ
1191 * migration_page_queue_free: drop any remaining pages in the ram
1192 * request queue
6c595cde 1193 *
3d0684b2
JQ
1194 * It should be empty at the end anyway, but in error cases there may
1195 * be some left. in case that there is any page left, we drop it.
1196 *
1197 * @ms: current migration state
6c595cde 1198 */
5e58f968 1199void migration_page_queue_free(MigrationState *ms)
6c595cde
DDAG
1200{
1201 struct MigrationSrcPageRequest *mspr, *next_mspr;
1202 /* This queue generally should be empty - but in the case of a failed
1203 * migration might have some droppings in.
1204 */
1205 rcu_read_lock();
1206 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1207 memory_region_unref(mspr->rb->mr);
1208 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1209 g_free(mspr);
1210 }
1211 rcu_read_unlock();
1212}
1213
1214/**
3d0684b2
JQ
1215 * ram_save_queue_pages: queue the page for transmission
1216 *
1217 * A request from postcopy destination for example.
1218 *
1219 * Returns zero on success or negative on error
1220 *
1221 * @ms: current migration state
1222 * @rbname: Name of the RAMBLock of the request. NULL means the
1223 * same that last one.
1224 * @start: starting address from the start of the RAMBlock
1225 * @len: length (in bytes) to send
6c595cde
DDAG
1226 */
1227int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1228 ram_addr_t start, ram_addr_t len)
1229{
1230 RAMBlock *ramblock;
68a098f3 1231 RAMState *rs = &ram_state;
6c595cde 1232
d3bf5418 1233 ms->postcopy_requests++;
6c595cde
DDAG
1234 rcu_read_lock();
1235 if (!rbname) {
1236 /* Reuse last RAMBlock */
68a098f3 1237 ramblock = rs->last_req_rb;
6c595cde
DDAG
1238
1239 if (!ramblock) {
1240 /*
1241 * Shouldn't happen, we can't reuse the last RAMBlock if
1242 * it's the 1st request.
1243 */
1244 error_report("ram_save_queue_pages no previous block");
1245 goto err;
1246 }
1247 } else {
1248 ramblock = qemu_ram_block_by_name(rbname);
1249
1250 if (!ramblock) {
1251 /* We shouldn't be asked for a non-existent RAMBlock */
1252 error_report("ram_save_queue_pages no block '%s'", rbname);
1253 goto err;
1254 }
68a098f3 1255 rs->last_req_rb = ramblock;
6c595cde
DDAG
1256 }
1257 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1258 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1259 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1260 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1261 __func__, start, len, ramblock->used_length);
1262 goto err;
1263 }
1264
1265 struct MigrationSrcPageRequest *new_entry =
1266 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1267 new_entry->rb = ramblock;
1268 new_entry->offset = start;
1269 new_entry->len = len;
1270
1271 memory_region_ref(ramblock->mr);
1272 qemu_mutex_lock(&ms->src_page_req_mutex);
1273 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1274 qemu_mutex_unlock(&ms->src_page_req_mutex);
1275 rcu_read_unlock();
1276
1277 return 0;
1278
1279err:
1280 rcu_read_unlock();
1281 return -1;
1282}
1283
a82d593b 1284/**
3d0684b2 1285 * ram_save_target_page: save one target page
a82d593b 1286 *
3d0684b2 1287 * Returns the number of pages written
a82d593b 1288 *
6f37bb8b 1289 * @rs: current RAM state
3d0684b2 1290 * @ms: current migration state
a82d593b 1291 * @f: QEMUFile where to send the data
3d0684b2 1292 * @pss: data about the page we want to send
a82d593b 1293 * @last_stage: if we are at the completion stage
3d0684b2 1294 * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
a82d593b 1295 */
6f37bb8b 1296static int ram_save_target_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
a08f6890 1297 PageSearchStatus *pss,
a82d593b 1298 bool last_stage,
a82d593b
DDAG
1299 ram_addr_t dirty_ram_abs)
1300{
1301 int res = 0;
1302
1303 /* Check the pages is dirty and if it is send it */
0d8ec885 1304 if (migration_bitmap_clear_dirty(rs, dirty_ram_abs)) {
a82d593b
DDAG
1305 unsigned long *unsentmap;
1306 if (compression_switch && migrate_use_compression()) {
072c2511 1307 res = ram_save_compressed_page(rs, ms, f, pss, last_stage);
a82d593b 1308 } else {
072c2511 1309 res = ram_save_page(rs, ms, f, pss, last_stage);
a82d593b
DDAG
1310 }
1311
1312 if (res < 0) {
1313 return res;
1314 }
eb859c53 1315 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
a82d593b
DDAG
1316 if (unsentmap) {
1317 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1318 }
3fd3c4b3
DDAG
1319 /* Only update last_sent_block if a block was actually sent; xbzrle
1320 * might have decided the page was identical so didn't bother writing
1321 * to the stream.
1322 */
1323 if (res > 0) {
6f37bb8b 1324 rs->last_sent_block = pss->block;
3fd3c4b3 1325 }
a82d593b
DDAG
1326 }
1327
1328 return res;
1329}
1330
1331/**
3d0684b2 1332 * ram_save_host_page: save a whole host page
a82d593b 1333 *
3d0684b2
JQ
1334 * Starting at *offset send pages up to the end of the current host
1335 * page. It's valid for the initial offset to point into the middle of
1336 * a host page in which case the remainder of the hostpage is sent.
1337 * Only dirty target pages are sent. Note that the host page size may
1338 * be a huge page for this block.
a82d593b 1339 *
3d0684b2
JQ
1340 * Returns the number of pages written or negative on error
1341 *
6f37bb8b 1342 * @rs: current RAM state
3d0684b2 1343 * @ms: current migration state
a82d593b 1344 * @f: QEMUFile where to send the data
3d0684b2 1345 * @pss: data about the page we want to send
a82d593b 1346 * @last_stage: if we are at the completion stage
a82d593b
DDAG
1347 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1348 */
6f37bb8b 1349static int ram_save_host_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
a08f6890
HZ
1350 PageSearchStatus *pss,
1351 bool last_stage,
a82d593b
DDAG
1352 ram_addr_t dirty_ram_abs)
1353{
1354 int tmppages, pages = 0;
4c011c37
DDAG
1355 size_t pagesize = qemu_ram_pagesize(pss->block);
1356
a82d593b 1357 do {
6f37bb8b 1358 tmppages = ram_save_target_page(rs, ms, f, pss, last_stage,
072c2511 1359 dirty_ram_abs);
a82d593b
DDAG
1360 if (tmppages < 0) {
1361 return tmppages;
1362 }
1363
1364 pages += tmppages;
a08f6890 1365 pss->offset += TARGET_PAGE_SIZE;
a82d593b 1366 dirty_ram_abs += TARGET_PAGE_SIZE;
4c011c37 1367 } while (pss->offset & (pagesize - 1));
a82d593b
DDAG
1368
1369 /* The offset we leave with is the last one we looked at */
a08f6890 1370 pss->offset -= TARGET_PAGE_SIZE;
a82d593b
DDAG
1371 return pages;
1372}
6c595cde 1373
56e93d26 1374/**
3d0684b2 1375 * ram_find_and_save_block: finds a dirty page and sends it to f
56e93d26
JQ
1376 *
1377 * Called within an RCU critical section.
1378 *
3d0684b2 1379 * Returns the number of pages written where zero means no dirty pages
56e93d26 1380 *
6f37bb8b 1381 * @rs: current RAM state
56e93d26
JQ
1382 * @f: QEMUFile where to send the data
1383 * @last_stage: if we are at the completion stage
a82d593b
DDAG
1384 *
1385 * On systems where host-page-size > target-page-size it will send all the
1386 * pages in a host page that are dirty.
56e93d26
JQ
1387 */
1388
072c2511 1389static int ram_find_and_save_block(RAMState *rs, QEMUFile *f, bool last_stage)
56e93d26 1390{
b8fb8cb7 1391 PageSearchStatus pss;
a82d593b 1392 MigrationState *ms = migrate_get_current();
56e93d26 1393 int pages = 0;
b9e60928 1394 bool again, found;
f3f491fc
DDAG
1395 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1396 ram_addr_t space */
56e93d26 1397
0827b9e9
AA
1398 /* No dirty page as there is zero RAM */
1399 if (!ram_bytes_total()) {
1400 return pages;
1401 }
1402
6f37bb8b
JQ
1403 pss.block = rs->last_seen_block;
1404 pss.offset = rs->last_offset;
b8fb8cb7
DDAG
1405 pss.complete_round = false;
1406
1407 if (!pss.block) {
1408 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1409 }
56e93d26 1410
b9e60928 1411 do {
a82d593b 1412 again = true;
6f37bb8b 1413 found = get_queued_page(rs, ms, &pss, &dirty_ram_abs);
b9e60928 1414
a82d593b
DDAG
1415 if (!found) {
1416 /* priority queue empty, so just search for something dirty */
6f37bb8b 1417 found = find_dirty_block(rs, f, &pss, &again, &dirty_ram_abs);
a82d593b 1418 }
f3f491fc 1419
a82d593b 1420 if (found) {
072c2511 1421 pages = ram_save_host_page(rs, ms, f, &pss, last_stage,
a82d593b 1422 dirty_ram_abs);
56e93d26 1423 }
b9e60928 1424 } while (!pages && again);
56e93d26 1425
6f37bb8b
JQ
1426 rs->last_seen_block = pss.block;
1427 rs->last_offset = pss.offset;
56e93d26
JQ
1428
1429 return pages;
1430}
1431
1432void acct_update_position(QEMUFile *f, size_t size, bool zero)
1433{
1434 uint64_t pages = size / TARGET_PAGE_SIZE;
f7ccd61b
JQ
1435 RAMState *rs = &ram_state;
1436
56e93d26 1437 if (zero) {
f7ccd61b 1438 rs->zero_pages += pages;
56e93d26 1439 } else {
b4d1c6e7 1440 rs->norm_pages += pages;
2f4fde93 1441 rs->bytes_transferred += size;
56e93d26
JQ
1442 qemu_update_position(f, size);
1443 }
1444}
1445
56e93d26
JQ
1446uint64_t ram_bytes_total(void)
1447{
1448 RAMBlock *block;
1449 uint64_t total = 0;
1450
1451 rcu_read_lock();
1452 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1453 total += block->used_length;
1454 rcu_read_unlock();
1455 return total;
1456}
1457
1458void free_xbzrle_decoded_buf(void)
1459{
1460 g_free(xbzrle_decoded_buf);
1461 xbzrle_decoded_buf = NULL;
1462}
1463
eb859c53 1464static void migration_bitmap_free(struct RAMBitmap *bmap)
60be6340
DL
1465{
1466 g_free(bmap->bmap);
f3f491fc 1467 g_free(bmap->unsentmap);
60be6340
DL
1468 g_free(bmap);
1469}
1470
6ad2a215 1471static void ram_migration_cleanup(void *opaque)
56e93d26 1472{
eb859c53
JQ
1473 RAMState *rs = opaque;
1474
2ff64038
LZ
1475 /* caller have hold iothread lock or is in a bh, so there is
1476 * no writing race against this migration_bitmap
1477 */
eb859c53
JQ
1478 struct RAMBitmap *bitmap = rs->ram_bitmap;
1479 atomic_rcu_set(&rs->ram_bitmap, NULL);
2ff64038 1480 if (bitmap) {
56e93d26 1481 memory_global_dirty_log_stop();
60be6340 1482 call_rcu(bitmap, migration_bitmap_free, rcu);
56e93d26
JQ
1483 }
1484
1485 XBZRLE_cache_lock();
1486 if (XBZRLE.cache) {
1487 cache_fini(XBZRLE.cache);
1488 g_free(XBZRLE.encoded_buf);
1489 g_free(XBZRLE.current_buf);
adb65dec 1490 g_free(ZERO_TARGET_PAGE);
56e93d26
JQ
1491 XBZRLE.cache = NULL;
1492 XBZRLE.encoded_buf = NULL;
1493 XBZRLE.current_buf = NULL;
1494 }
1495 XBZRLE_cache_unlock();
1496}
1497
6f37bb8b 1498static void ram_state_reset(RAMState *rs)
56e93d26 1499{
6f37bb8b
JQ
1500 rs->last_seen_block = NULL;
1501 rs->last_sent_block = NULL;
1502 rs->last_offset = 0;
1503 rs->last_version = ram_list.version;
1504 rs->ram_bulk_stage = true;
56e93d26
JQ
1505}
1506
1507#define MAX_WAIT 50 /* ms, half buffered_file limit */
1508
dd631697
LZ
1509void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1510{
0d8ec885 1511 RAMState *rs = &ram_state;
108cfae0 1512
dd631697
LZ
1513 /* called in qemu main thread, so there is
1514 * no writing race against this migration_bitmap
1515 */
eb859c53
JQ
1516 if (rs->ram_bitmap) {
1517 struct RAMBitmap *old_bitmap = rs->ram_bitmap, *bitmap;
1518 bitmap = g_new(struct RAMBitmap, 1);
60be6340 1519 bitmap->bmap = bitmap_new(new);
dd631697
LZ
1520
1521 /* prevent migration_bitmap content from being set bit
1522 * by migration_bitmap_sync_range() at the same time.
1523 * it is safe to migration if migration_bitmap is cleared bit
1524 * at the same time.
1525 */
108cfae0 1526 qemu_mutex_lock(&rs->bitmap_mutex);
60be6340
DL
1527 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1528 bitmap_set(bitmap->bmap, old, new - old);
f3f491fc
DDAG
1529
1530 /* We don't have a way to safely extend the sentmap
1531 * with RCU; so mark it as missing, entry to postcopy
1532 * will fail.
1533 */
1534 bitmap->unsentmap = NULL;
1535
eb859c53 1536 atomic_rcu_set(&rs->ram_bitmap, bitmap);
108cfae0 1537 qemu_mutex_unlock(&rs->bitmap_mutex);
0d8ec885 1538 rs->migration_dirty_pages += new - old;
60be6340 1539 call_rcu(old_bitmap, migration_bitmap_free, rcu);
dd631697
LZ
1540 }
1541}
56e93d26 1542
4f2e4252
DDAG
1543/*
1544 * 'expected' is the value you expect the bitmap mostly to be full
1545 * of; it won't bother printing lines that are all this value.
1546 * If 'todump' is null the migration bitmap is dumped.
1547 */
1548void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1549{
1550 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
eb859c53 1551 RAMState *rs = &ram_state;
4f2e4252
DDAG
1552 int64_t cur;
1553 int64_t linelen = 128;
1554 char linebuf[129];
1555
1556 if (!todump) {
eb859c53 1557 todump = atomic_rcu_read(&rs->ram_bitmap)->bmap;
4f2e4252
DDAG
1558 }
1559
1560 for (cur = 0; cur < ram_pages; cur += linelen) {
1561 int64_t curb;
1562 bool found = false;
1563 /*
1564 * Last line; catch the case where the line length
1565 * is longer than remaining ram
1566 */
1567 if (cur + linelen > ram_pages) {
1568 linelen = ram_pages - cur;
1569 }
1570 for (curb = 0; curb < linelen; curb++) {
1571 bool thisbit = test_bit(cur + curb, todump);
1572 linebuf[curb] = thisbit ? '1' : '.';
1573 found = found || (thisbit != expected);
1574 }
1575 if (found) {
1576 linebuf[curb] = '\0';
1577 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1578 }
1579 }
1580}
1581
e0b266f0
DDAG
1582/* **** functions for postcopy ***** */
1583
ced1c616
PB
1584void ram_postcopy_migrated_memory_release(MigrationState *ms)
1585{
eb859c53 1586 RAMState *rs = &ram_state;
ced1c616 1587 struct RAMBlock *block;
eb859c53 1588 unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
ced1c616
PB
1589
1590 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1591 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1592 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1593 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1594
1595 while (run_start < range) {
1596 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1597 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1598 (run_end - run_start) << TARGET_PAGE_BITS);
1599 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1600 }
1601 }
1602}
1603
3d0684b2
JQ
1604/**
1605 * postcopy_send_discard_bm_ram: discard a RAMBlock
1606 *
1607 * Returns zero on success
1608 *
e0b266f0
DDAG
1609 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1610 * Note: At this point the 'unsentmap' is the processed bitmap combined
1611 * with the dirtymap; so a '1' means it's either dirty or unsent.
3d0684b2
JQ
1612 *
1613 * @ms: current migration state
1614 * @pds: state for postcopy
1615 * @start: RAMBlock starting page
1616 * @length: RAMBlock size
e0b266f0
DDAG
1617 */
1618static int postcopy_send_discard_bm_ram(MigrationState *ms,
1619 PostcopyDiscardState *pds,
1620 unsigned long start,
1621 unsigned long length)
1622{
eb859c53 1623 RAMState *rs = &ram_state;
e0b266f0
DDAG
1624 unsigned long end = start + length; /* one after the end */
1625 unsigned long current;
1626 unsigned long *unsentmap;
1627
eb859c53 1628 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
e0b266f0
DDAG
1629 for (current = start; current < end; ) {
1630 unsigned long one = find_next_bit(unsentmap, end, current);
1631
1632 if (one <= end) {
1633 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1634 unsigned long discard_length;
1635
1636 if (zero >= end) {
1637 discard_length = end - one;
1638 } else {
1639 discard_length = zero - one;
1640 }
d688c62d
DDAG
1641 if (discard_length) {
1642 postcopy_discard_send_range(ms, pds, one, discard_length);
1643 }
e0b266f0
DDAG
1644 current = one + discard_length;
1645 } else {
1646 current = one;
1647 }
1648 }
1649
1650 return 0;
1651}
1652
3d0684b2
JQ
1653/**
1654 * postcopy_each_ram_send_discard: discard all RAMBlocks
1655 *
1656 * Returns 0 for success or negative for error
1657 *
e0b266f0
DDAG
1658 * Utility for the outgoing postcopy code.
1659 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1660 * passing it bitmap indexes and name.
e0b266f0
DDAG
1661 * (qemu_ram_foreach_block ends up passing unscaled lengths
1662 * which would mean postcopy code would have to deal with target page)
3d0684b2
JQ
1663 *
1664 * @ms: current migration state
e0b266f0
DDAG
1665 */
1666static int postcopy_each_ram_send_discard(MigrationState *ms)
1667{
1668 struct RAMBlock *block;
1669 int ret;
1670
1671 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1672 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1673 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1674 first,
1675 block->idstr);
1676
1677 /*
1678 * Postcopy sends chunks of bitmap over the wire, but it
1679 * just needs indexes at this point, avoids it having
1680 * target page specific code.
1681 */
1682 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1683 block->used_length >> TARGET_PAGE_BITS);
1684 postcopy_discard_send_finish(ms, pds);
1685 if (ret) {
1686 return ret;
1687 }
1688 }
1689
1690 return 0;
1691}
1692
3d0684b2
JQ
1693/**
1694 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1695 *
1696 * Helper for postcopy_chunk_hostpages; it's called twice to
1697 * canonicalize the two bitmaps, that are similar, but one is
1698 * inverted.
99e314eb 1699 *
3d0684b2
JQ
1700 * Postcopy requires that all target pages in a hostpage are dirty or
1701 * clean, not a mix. This function canonicalizes the bitmaps.
99e314eb 1702 *
3d0684b2
JQ
1703 * @ms: current migration state
1704 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1705 * otherwise we need to canonicalize partially dirty host pages
1706 * @block: block that contains the page we want to canonicalize
1707 * @pds: state for postcopy
99e314eb
DDAG
1708 */
1709static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1710 RAMBlock *block,
1711 PostcopyDiscardState *pds)
1712{
0d8ec885 1713 RAMState *rs = &ram_state;
99e314eb
DDAG
1714 unsigned long *bitmap;
1715 unsigned long *unsentmap;
29c59172 1716 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
99e314eb
DDAG
1717 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1718 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1719 unsigned long last = first + (len - 1);
1720 unsigned long run_start;
1721
29c59172
DDAG
1722 if (block->page_size == TARGET_PAGE_SIZE) {
1723 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1724 return;
1725 }
1726
eb859c53
JQ
1727 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1728 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
99e314eb
DDAG
1729
1730 if (unsent_pass) {
1731 /* Find a sent page */
1732 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1733 } else {
1734 /* Find a dirty page */
1735 run_start = find_next_bit(bitmap, last + 1, first);
1736 }
1737
1738 while (run_start <= last) {
1739 bool do_fixup = false;
1740 unsigned long fixup_start_addr;
1741 unsigned long host_offset;
1742
1743 /*
1744 * If the start of this run of pages is in the middle of a host
1745 * page, then we need to fixup this host page.
1746 */
1747 host_offset = run_start % host_ratio;
1748 if (host_offset) {
1749 do_fixup = true;
1750 run_start -= host_offset;
1751 fixup_start_addr = run_start;
1752 /* For the next pass */
1753 run_start = run_start + host_ratio;
1754 } else {
1755 /* Find the end of this run */
1756 unsigned long run_end;
1757 if (unsent_pass) {
1758 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1759 } else {
1760 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1761 }
1762 /*
1763 * If the end isn't at the start of a host page, then the
1764 * run doesn't finish at the end of a host page
1765 * and we need to discard.
1766 */
1767 host_offset = run_end % host_ratio;
1768 if (host_offset) {
1769 do_fixup = true;
1770 fixup_start_addr = run_end - host_offset;
1771 /*
1772 * This host page has gone, the next loop iteration starts
1773 * from after the fixup
1774 */
1775 run_start = fixup_start_addr + host_ratio;
1776 } else {
1777 /*
1778 * No discards on this iteration, next loop starts from
1779 * next sent/dirty page
1780 */
1781 run_start = run_end + 1;
1782 }
1783 }
1784
1785 if (do_fixup) {
1786 unsigned long page;
1787
1788 /* Tell the destination to discard this page */
1789 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1790 /* For the unsent_pass we:
1791 * discard partially sent pages
1792 * For the !unsent_pass (dirty) we:
1793 * discard partially dirty pages that were sent
1794 * (any partially sent pages were already discarded
1795 * by the previous unsent_pass)
1796 */
1797 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1798 host_ratio);
1799 }
1800
1801 /* Clean up the bitmap */
1802 for (page = fixup_start_addr;
1803 page < fixup_start_addr + host_ratio; page++) {
1804 /* All pages in this host page are now not sent */
1805 set_bit(page, unsentmap);
1806
1807 /*
1808 * Remark them as dirty, updating the count for any pages
1809 * that weren't previously dirty.
1810 */
0d8ec885 1811 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
99e314eb
DDAG
1812 }
1813 }
1814
1815 if (unsent_pass) {
1816 /* Find the next sent page for the next iteration */
1817 run_start = find_next_zero_bit(unsentmap, last + 1,
1818 run_start);
1819 } else {
1820 /* Find the next dirty page for the next iteration */
1821 run_start = find_next_bit(bitmap, last + 1, run_start);
1822 }
1823 }
1824}
1825
3d0684b2
JQ
1826/**
1827 * postcopy_chuck_hostpages: discrad any partially sent host page
1828 *
99e314eb
DDAG
1829 * Utility for the outgoing postcopy code.
1830 *
1831 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
1832 * dirty host-page size chunks as all dirty. In this case the host-page
1833 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb 1834 *
3d0684b2
JQ
1835 * Returns zero on success
1836 *
1837 * @ms: current migration state
99e314eb
DDAG
1838 */
1839static int postcopy_chunk_hostpages(MigrationState *ms)
1840{
6f37bb8b 1841 RAMState *rs = &ram_state;
99e314eb
DDAG
1842 struct RAMBlock *block;
1843
99e314eb 1844 /* Easiest way to make sure we don't resume in the middle of a host-page */
6f37bb8b
JQ
1845 rs->last_seen_block = NULL;
1846 rs->last_sent_block = NULL;
1847 rs->last_offset = 0;
99e314eb
DDAG
1848
1849 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1850 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1851
1852 PostcopyDiscardState *pds =
1853 postcopy_discard_send_init(ms, first, block->idstr);
1854
1855 /* First pass: Discard all partially sent host pages */
1856 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1857 /*
1858 * Second pass: Ensure that all partially dirty host pages are made
1859 * fully dirty.
1860 */
1861 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1862
1863 postcopy_discard_send_finish(ms, pds);
1864 } /* ram_list loop */
1865
1866 return 0;
1867}
1868
3d0684b2
JQ
1869/**
1870 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1871 *
1872 * Returns zero on success
1873 *
e0b266f0
DDAG
1874 * Transmit the set of pages to be discarded after precopy to the target
1875 * these are pages that:
1876 * a) Have been previously transmitted but are now dirty again
1877 * b) Pages that have never been transmitted, this ensures that
1878 * any pages on the destination that have been mapped by background
1879 * tasks get discarded (transparent huge pages is the specific concern)
1880 * Hopefully this is pretty sparse
3d0684b2
JQ
1881 *
1882 * @ms: current migration state
e0b266f0
DDAG
1883 */
1884int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1885{
eb859c53 1886 RAMState *rs = &ram_state;
e0b266f0
DDAG
1887 int ret;
1888 unsigned long *bitmap, *unsentmap;
1889
1890 rcu_read_lock();
1891
1892 /* This should be our last sync, the src is now paused */
eb859c53 1893 migration_bitmap_sync(rs);
e0b266f0 1894
eb859c53 1895 unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
e0b266f0
DDAG
1896 if (!unsentmap) {
1897 /* We don't have a safe way to resize the sentmap, so
1898 * if the bitmap was resized it will be NULL at this
1899 * point.
1900 */
1901 error_report("migration ram resized during precopy phase");
1902 rcu_read_unlock();
1903 return -EINVAL;
1904 }
1905
29c59172 1906 /* Deal with TPS != HPS and huge pages */
99e314eb
DDAG
1907 ret = postcopy_chunk_hostpages(ms);
1908 if (ret) {
1909 rcu_read_unlock();
1910 return ret;
1911 }
1912
e0b266f0
DDAG
1913 /*
1914 * Update the unsentmap to be unsentmap = unsentmap | dirty
1915 */
eb859c53 1916 bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
e0b266f0
DDAG
1917 bitmap_or(unsentmap, unsentmap, bitmap,
1918 last_ram_offset() >> TARGET_PAGE_BITS);
1919
1920
1921 trace_ram_postcopy_send_discard_bitmap();
1922#ifdef DEBUG_POSTCOPY
1923 ram_debug_dump_bitmap(unsentmap, true);
1924#endif
1925
1926 ret = postcopy_each_ram_send_discard(ms);
1927 rcu_read_unlock();
1928
1929 return ret;
1930}
1931
3d0684b2
JQ
1932/**
1933 * ram_discard_range: discard dirtied pages at the beginning of postcopy
e0b266f0 1934 *
3d0684b2 1935 * Returns zero on success
e0b266f0 1936 *
3d0684b2 1937 * @mis: current migration incoming state
36449157
JQ
1938 * @rbname: name of the RAMBlock of the request. NULL means the
1939 * same that last one.
3d0684b2
JQ
1940 * @start: RAMBlock starting page
1941 * @length: RAMBlock size
e0b266f0
DDAG
1942 */
1943int ram_discard_range(MigrationIncomingState *mis,
36449157 1944 const char *rbname,
e0b266f0
DDAG
1945 uint64_t start, size_t length)
1946{
1947 int ret = -1;
1948
36449157 1949 trace_ram_discard_range(rbname, start, length);
d3a5038c 1950
e0b266f0 1951 rcu_read_lock();
36449157 1952 RAMBlock *rb = qemu_ram_block_by_name(rbname);
e0b266f0
DDAG
1953
1954 if (!rb) {
36449157 1955 error_report("ram_discard_range: Failed to find block '%s'", rbname);
e0b266f0
DDAG
1956 goto err;
1957 }
1958
d3a5038c 1959 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
1960
1961err:
1962 rcu_read_unlock();
1963
1964 return ret;
1965}
1966
ceb4d168 1967static int ram_state_init(RAMState *rs)
56e93d26 1968{
56e93d26
JQ
1969 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1970
ceb4d168 1971 memset(rs, 0, sizeof(*rs));
108cfae0 1972 qemu_mutex_init(&rs->bitmap_mutex);
56e93d26
JQ
1973
1974 if (migrate_use_xbzrle()) {
1975 XBZRLE_cache_lock();
adb65dec 1976 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
56e93d26
JQ
1977 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1978 TARGET_PAGE_SIZE,
1979 TARGET_PAGE_SIZE);
1980 if (!XBZRLE.cache) {
1981 XBZRLE_cache_unlock();
1982 error_report("Error creating cache");
1983 return -1;
1984 }
1985 XBZRLE_cache_unlock();
1986
1987 /* We prefer not to abort if there is no memory */
1988 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1989 if (!XBZRLE.encoded_buf) {
1990 error_report("Error allocating encoded_buf");
1991 return -1;
1992 }
1993
1994 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1995 if (!XBZRLE.current_buf) {
1996 error_report("Error allocating current_buf");
1997 g_free(XBZRLE.encoded_buf);
1998 XBZRLE.encoded_buf = NULL;
1999 return -1;
2000 }
56e93d26
JQ
2001 }
2002
49877834
PB
2003 /* For memory_global_dirty_log_start below. */
2004 qemu_mutex_lock_iothread();
2005
56e93d26
JQ
2006 qemu_mutex_lock_ramlist();
2007 rcu_read_lock();
6f37bb8b 2008 ram_state_reset(rs);
56e93d26 2009
eb859c53 2010 rs->ram_bitmap = g_new0(struct RAMBitmap, 1);
0827b9e9
AA
2011 /* Skip setting bitmap if there is no RAM */
2012 if (ram_bytes_total()) {
2013 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
eb859c53
JQ
2014 rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages);
2015 bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages);
0827b9e9
AA
2016
2017 if (migrate_postcopy_ram()) {
eb859c53
JQ
2018 rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages);
2019 bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages);
0827b9e9 2020 }
f3f491fc
DDAG
2021 }
2022
56e93d26
JQ
2023 /*
2024 * Count the total number of pages used by ram blocks not including any
2025 * gaps due to alignment or unplugs.
2026 */
0d8ec885 2027 rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
56e93d26
JQ
2028
2029 memory_global_dirty_log_start();
8d820d6f 2030 migration_bitmap_sync(rs);
56e93d26 2031 qemu_mutex_unlock_ramlist();
49877834 2032 qemu_mutex_unlock_iothread();
a91246c9
HZ
2033 rcu_read_unlock();
2034
2035 return 0;
2036}
2037
3d0684b2
JQ
2038/*
2039 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
a91246c9
HZ
2040 * long-running RCU critical section. When rcu-reclaims in the code
2041 * start to become numerous it will be necessary to reduce the
2042 * granularity of these critical sections.
2043 */
2044
3d0684b2
JQ
2045/**
2046 * ram_save_setup: Setup RAM for migration
2047 *
2048 * Returns zero to indicate success and negative for error
2049 *
2050 * @f: QEMUFile where to send the data
2051 * @opaque: RAMState pointer
2052 */
a91246c9
HZ
2053static int ram_save_setup(QEMUFile *f, void *opaque)
2054{
6f37bb8b 2055 RAMState *rs = opaque;
a91246c9
HZ
2056 RAMBlock *block;
2057
2058 /* migration has already setup the bitmap, reuse it. */
2059 if (!migration_in_colo_state()) {
ceb4d168 2060 if (ram_state_init(rs) < 0) {
a91246c9
HZ
2061 return -1;
2062 }
2063 }
2064
2065 rcu_read_lock();
56e93d26
JQ
2066
2067 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2068
2069 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2070 qemu_put_byte(f, strlen(block->idstr));
2071 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2072 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2073 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2074 qemu_put_be64(f, block->page_size);
2075 }
56e93d26
JQ
2076 }
2077
2078 rcu_read_unlock();
2079
2080 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2081 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2082
2083 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2084
2085 return 0;
2086}
2087
3d0684b2
JQ
2088/**
2089 * ram_save_iterate: iterative stage for migration
2090 *
2091 * Returns zero to indicate success and negative for error
2092 *
2093 * @f: QEMUFile where to send the data
2094 * @opaque: RAMState pointer
2095 */
56e93d26
JQ
2096static int ram_save_iterate(QEMUFile *f, void *opaque)
2097{
6f37bb8b 2098 RAMState *rs = opaque;
56e93d26
JQ
2099 int ret;
2100 int i;
2101 int64_t t0;
5c90308f 2102 int done = 0;
56e93d26
JQ
2103
2104 rcu_read_lock();
6f37bb8b
JQ
2105 if (ram_list.version != rs->last_version) {
2106 ram_state_reset(rs);
56e93d26
JQ
2107 }
2108
2109 /* Read version before ram_list.blocks */
2110 smp_rmb();
2111
2112 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2113
2114 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2115 i = 0;
2116 while ((ret = qemu_file_rate_limit(f)) == 0) {
2117 int pages;
2118
072c2511 2119 pages = ram_find_and_save_block(rs, f, false);
56e93d26
JQ
2120 /* no more pages to sent */
2121 if (pages == 0) {
5c90308f 2122 done = 1;
56e93d26
JQ
2123 break;
2124 }
23b28c3c 2125 rs->iterations++;
070afca2 2126
56e93d26
JQ
2127 /* we want to check in the 1st loop, just in case it was the 1st time
2128 and we had to sync the dirty bitmap.
2129 qemu_get_clock_ns() is a bit expensive, so we only check each some
2130 iterations
2131 */
2132 if ((i & 63) == 0) {
2133 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2134 if (t1 > MAX_WAIT) {
55c4446b 2135 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2136 break;
2137 }
2138 }
2139 i++;
2140 }
2f4fde93 2141 flush_compressed_data(rs, f);
56e93d26
JQ
2142 rcu_read_unlock();
2143
2144 /*
2145 * Must occur before EOS (or any QEMUFile operation)
2146 * because of RDMA protocol.
2147 */
2148 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2149
2150 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2f4fde93 2151 rs->bytes_transferred += 8;
56e93d26
JQ
2152
2153 ret = qemu_file_get_error(f);
2154 if (ret < 0) {
2155 return ret;
2156 }
2157
5c90308f 2158 return done;
56e93d26
JQ
2159}
2160
3d0684b2
JQ
2161/**
2162 * ram_save_complete: function called to send the remaining amount of ram
2163 *
2164 * Returns zero to indicate success
2165 *
2166 * Called with iothread lock
2167 *
2168 * @f: QEMUFile where to send the data
2169 * @opaque: RAMState pointer
2170 */
56e93d26
JQ
2171static int ram_save_complete(QEMUFile *f, void *opaque)
2172{
6f37bb8b
JQ
2173 RAMState *rs = opaque;
2174
56e93d26
JQ
2175 rcu_read_lock();
2176
663e6c1d 2177 if (!migration_in_postcopy(migrate_get_current())) {
8d820d6f 2178 migration_bitmap_sync(rs);
663e6c1d 2179 }
56e93d26
JQ
2180
2181 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2182
2183 /* try transferring iterative blocks of memory */
2184
2185 /* flush all remaining blocks regardless of rate limiting */
2186 while (true) {
2187 int pages;
2188
072c2511 2189 pages = ram_find_and_save_block(rs, f, !migration_in_colo_state());
56e93d26
JQ
2190 /* no more blocks to sent */
2191 if (pages == 0) {
2192 break;
2193 }
2194 }
2195
2f4fde93 2196 flush_compressed_data(rs, f);
56e93d26 2197 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2198
2199 rcu_read_unlock();
d09a6fde 2200
56e93d26
JQ
2201 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2202
2203 return 0;
2204}
2205
c31b098f
DDAG
2206static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2207 uint64_t *non_postcopiable_pending,
2208 uint64_t *postcopiable_pending)
56e93d26 2209{
8d820d6f 2210 RAMState *rs = opaque;
56e93d26
JQ
2211 uint64_t remaining_size;
2212
9edabd4d 2213 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2214
663e6c1d
DDAG
2215 if (!migration_in_postcopy(migrate_get_current()) &&
2216 remaining_size < max_size) {
56e93d26
JQ
2217 qemu_mutex_lock_iothread();
2218 rcu_read_lock();
8d820d6f 2219 migration_bitmap_sync(rs);
56e93d26
JQ
2220 rcu_read_unlock();
2221 qemu_mutex_unlock_iothread();
9edabd4d 2222 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2223 }
c31b098f
DDAG
2224
2225 /* We can do postcopy, and all the data is postcopiable */
2226 *postcopiable_pending += remaining_size;
56e93d26
JQ
2227}
2228
2229static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2230{
2231 unsigned int xh_len;
2232 int xh_flags;
063e760a 2233 uint8_t *loaded_data;
56e93d26
JQ
2234
2235 if (!xbzrle_decoded_buf) {
2236 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2237 }
063e760a 2238 loaded_data = xbzrle_decoded_buf;
56e93d26
JQ
2239
2240 /* extract RLE header */
2241 xh_flags = qemu_get_byte(f);
2242 xh_len = qemu_get_be16(f);
2243
2244 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2245 error_report("Failed to load XBZRLE page - wrong compression!");
2246 return -1;
2247 }
2248
2249 if (xh_len > TARGET_PAGE_SIZE) {
2250 error_report("Failed to load XBZRLE page - len overflow!");
2251 return -1;
2252 }
2253 /* load data and decode */
063e760a 2254 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2255
2256 /* decode RLE */
063e760a 2257 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2258 TARGET_PAGE_SIZE) == -1) {
2259 error_report("Failed to load XBZRLE page - decode error!");
2260 return -1;
2261 }
2262
2263 return 0;
2264}
2265
3d0684b2
JQ
2266/**
2267 * ram_block_from_stream: read a RAMBlock id from the migration stream
2268 *
2269 * Must be called from within a rcu critical section.
2270 *
56e93d26 2271 * Returns a pointer from within the RCU-protected ram_list.
a7180877 2272 *
3d0684b2
JQ
2273 * @f: QEMUFile where to read the data from
2274 * @flags: Page flags (mostly to see if it's a continuation of previous block)
a7180877 2275 */
3d0684b2 2276static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
56e93d26
JQ
2277{
2278 static RAMBlock *block = NULL;
2279 char id[256];
2280 uint8_t len;
2281
2282 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2283 if (!block) {
56e93d26
JQ
2284 error_report("Ack, bad migration stream!");
2285 return NULL;
2286 }
4c4bad48 2287 return block;
56e93d26
JQ
2288 }
2289
2290 len = qemu_get_byte(f);
2291 qemu_get_buffer(f, (uint8_t *)id, len);
2292 id[len] = 0;
2293
e3dd7493 2294 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2295 if (!block) {
2296 error_report("Can't find block %s", id);
2297 return NULL;
56e93d26
JQ
2298 }
2299
4c4bad48
HZ
2300 return block;
2301}
2302
2303static inline void *host_from_ram_block_offset(RAMBlock *block,
2304 ram_addr_t offset)
2305{
2306 if (!offset_in_ramblock(block, offset)) {
2307 return NULL;
2308 }
2309
2310 return block->host + offset;
56e93d26
JQ
2311}
2312
3d0684b2
JQ
2313/**
2314 * ram_handle_compressed: handle the zero page case
2315 *
56e93d26
JQ
2316 * If a page (or a whole RDMA chunk) has been
2317 * determined to be zero, then zap it.
3d0684b2
JQ
2318 *
2319 * @host: host address for the zero page
2320 * @ch: what the page is filled from. We only support zero
2321 * @size: size of the zero page
56e93d26
JQ
2322 */
2323void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2324{
2325 if (ch != 0 || !is_zero_range(host, size)) {
2326 memset(host, ch, size);
2327 }
2328}
2329
2330static void *do_data_decompress(void *opaque)
2331{
2332 DecompressParam *param = opaque;
2333 unsigned long pagesize;
33d151f4
LL
2334 uint8_t *des;
2335 int len;
56e93d26 2336
33d151f4 2337 qemu_mutex_lock(&param->mutex);
90e56fb4 2338 while (!param->quit) {
33d151f4
LL
2339 if (param->des) {
2340 des = param->des;
2341 len = param->len;
2342 param->des = 0;
2343 qemu_mutex_unlock(&param->mutex);
2344
56e93d26 2345 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2346 /* uncompress() will return failed in some case, especially
2347 * when the page is dirted when doing the compression, it's
2348 * not a problem because the dirty page will be retransferred
2349 * and uncompress() won't break the data in other pages.
2350 */
33d151f4
LL
2351 uncompress((Bytef *)des, &pagesize,
2352 (const Bytef *)param->compbuf, len);
73a8912b 2353
33d151f4
LL
2354 qemu_mutex_lock(&decomp_done_lock);
2355 param->done = true;
2356 qemu_cond_signal(&decomp_done_cond);
2357 qemu_mutex_unlock(&decomp_done_lock);
2358
2359 qemu_mutex_lock(&param->mutex);
2360 } else {
2361 qemu_cond_wait(&param->cond, &param->mutex);
2362 }
56e93d26 2363 }
33d151f4 2364 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2365
2366 return NULL;
2367}
2368
5533b2e9
LL
2369static void wait_for_decompress_done(void)
2370{
2371 int idx, thread_count;
2372
2373 if (!migrate_use_compression()) {
2374 return;
2375 }
2376
2377 thread_count = migrate_decompress_threads();
2378 qemu_mutex_lock(&decomp_done_lock);
2379 for (idx = 0; idx < thread_count; idx++) {
2380 while (!decomp_param[idx].done) {
2381 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2382 }
2383 }
2384 qemu_mutex_unlock(&decomp_done_lock);
2385}
2386
56e93d26
JQ
2387void migrate_decompress_threads_create(void)
2388{
2389 int i, thread_count;
2390
2391 thread_count = migrate_decompress_threads();
2392 decompress_threads = g_new0(QemuThread, thread_count);
2393 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2394 qemu_mutex_init(&decomp_done_lock);
2395 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2396 for (i = 0; i < thread_count; i++) {
2397 qemu_mutex_init(&decomp_param[i].mutex);
2398 qemu_cond_init(&decomp_param[i].cond);
2399 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2400 decomp_param[i].done = true;
90e56fb4 2401 decomp_param[i].quit = false;
56e93d26
JQ
2402 qemu_thread_create(decompress_threads + i, "decompress",
2403 do_data_decompress, decomp_param + i,
2404 QEMU_THREAD_JOINABLE);
2405 }
2406}
2407
2408void migrate_decompress_threads_join(void)
2409{
2410 int i, thread_count;
2411
56e93d26
JQ
2412 thread_count = migrate_decompress_threads();
2413 for (i = 0; i < thread_count; i++) {
2414 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2415 decomp_param[i].quit = true;
56e93d26
JQ
2416 qemu_cond_signal(&decomp_param[i].cond);
2417 qemu_mutex_unlock(&decomp_param[i].mutex);
2418 }
2419 for (i = 0; i < thread_count; i++) {
2420 qemu_thread_join(decompress_threads + i);
2421 qemu_mutex_destroy(&decomp_param[i].mutex);
2422 qemu_cond_destroy(&decomp_param[i].cond);
2423 g_free(decomp_param[i].compbuf);
2424 }
2425 g_free(decompress_threads);
2426 g_free(decomp_param);
56e93d26
JQ
2427 decompress_threads = NULL;
2428 decomp_param = NULL;
56e93d26
JQ
2429}
2430
c1bc6626 2431static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2432 void *host, int len)
2433{
2434 int idx, thread_count;
2435
2436 thread_count = migrate_decompress_threads();
73a8912b 2437 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2438 while (true) {
2439 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2440 if (decomp_param[idx].done) {
33d151f4
LL
2441 decomp_param[idx].done = false;
2442 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2443 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2444 decomp_param[idx].des = host;
2445 decomp_param[idx].len = len;
33d151f4
LL
2446 qemu_cond_signal(&decomp_param[idx].cond);
2447 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2448 break;
2449 }
2450 }
2451 if (idx < thread_count) {
2452 break;
73a8912b
LL
2453 } else {
2454 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2455 }
2456 }
73a8912b 2457 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2458}
2459
3d0684b2
JQ
2460/**
2461 * ram_postcopy_incoming_init: allocate postcopy data structures
2462 *
2463 * Returns 0 for success and negative if there was one error
2464 *
2465 * @mis: current migration incoming state
2466 *
2467 * Allocate data structures etc needed by incoming migration with
2468 * postcopy-ram. postcopy-ram's similarly names
2469 * postcopy_ram_incoming_init does the work.
1caddf8a
DDAG
2470 */
2471int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2472{
2473 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2474
2475 return postcopy_ram_incoming_init(mis, ram_pages);
2476}
2477
3d0684b2
JQ
2478/**
2479 * ram_load_postcopy: load a page in postcopy case
2480 *
2481 * Returns 0 for success or -errno in case of error
2482 *
a7180877
DDAG
2483 * Called in postcopy mode by ram_load().
2484 * rcu_read_lock is taken prior to this being called.
3d0684b2
JQ
2485 *
2486 * @f: QEMUFile where to send the data
a7180877
DDAG
2487 */
2488static int ram_load_postcopy(QEMUFile *f)
2489{
2490 int flags = 0, ret = 0;
2491 bool place_needed = false;
28abd200 2492 bool matching_page_sizes = false;
a7180877
DDAG
2493 MigrationIncomingState *mis = migration_incoming_get_current();
2494 /* Temporary page that is later 'placed' */
2495 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2496 void *last_host = NULL;
a3b6ff6d 2497 bool all_zero = false;
a7180877
DDAG
2498
2499 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2500 ram_addr_t addr;
2501 void *host = NULL;
2502 void *page_buffer = NULL;
2503 void *place_source = NULL;
df9ff5e1 2504 RAMBlock *block = NULL;
a7180877 2505 uint8_t ch;
a7180877
DDAG
2506
2507 addr = qemu_get_be64(f);
2508 flags = addr & ~TARGET_PAGE_MASK;
2509 addr &= TARGET_PAGE_MASK;
2510
2511 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2512 place_needed = false;
2513 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2514 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2515
2516 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2517 if (!host) {
2518 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2519 ret = -EINVAL;
2520 break;
2521 }
28abd200 2522 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2523 /*
28abd200
DDAG
2524 * Postcopy requires that we place whole host pages atomically;
2525 * these may be huge pages for RAMBlocks that are backed by
2526 * hugetlbfs.
a7180877
DDAG
2527 * To make it atomic, the data is read into a temporary page
2528 * that's moved into place later.
2529 * The migration protocol uses, possibly smaller, target-pages
2530 * however the source ensures it always sends all the components
2531 * of a host page in order.
2532 */
2533 page_buffer = postcopy_host_page +
28abd200 2534 ((uintptr_t)host & (block->page_size - 1));
a7180877 2535 /* If all TP are zero then we can optimise the place */
28abd200 2536 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2537 all_zero = true;
c53b7ddc
DDAG
2538 } else {
2539 /* not the 1st TP within the HP */
2540 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2541 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2542 host, last_host);
2543 ret = -EINVAL;
2544 break;
2545 }
a7180877
DDAG
2546 }
2547
c53b7ddc 2548
a7180877
DDAG
2549 /*
2550 * If it's the last part of a host page then we place the host
2551 * page
2552 */
2553 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2554 (block->page_size - 1)) == 0;
a7180877
DDAG
2555 place_source = postcopy_host_page;
2556 }
c53b7ddc 2557 last_host = host;
a7180877
DDAG
2558
2559 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2560 case RAM_SAVE_FLAG_COMPRESS:
2561 ch = qemu_get_byte(f);
2562 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2563 if (ch) {
2564 all_zero = false;
2565 }
2566 break;
2567
2568 case RAM_SAVE_FLAG_PAGE:
2569 all_zero = false;
2570 if (!place_needed || !matching_page_sizes) {
2571 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2572 } else {
2573 /* Avoids the qemu_file copy during postcopy, which is
2574 * going to do a copy later; can only do it when we
2575 * do this read in one go (matching page sizes)
2576 */
2577 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2578 TARGET_PAGE_SIZE);
2579 }
2580 break;
2581 case RAM_SAVE_FLAG_EOS:
2582 /* normal exit */
2583 break;
2584 default:
2585 error_report("Unknown combination of migration flags: %#x"
2586 " (postcopy mode)", flags);
2587 ret = -EINVAL;
2588 }
2589
2590 if (place_needed) {
2591 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2592 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2593
a7180877 2594 if (all_zero) {
df9ff5e1
DDAG
2595 ret = postcopy_place_page_zero(mis, place_dest,
2596 block->page_size);
a7180877 2597 } else {
df9ff5e1
DDAG
2598 ret = postcopy_place_page(mis, place_dest,
2599 place_source, block->page_size);
a7180877
DDAG
2600 }
2601 }
2602 if (!ret) {
2603 ret = qemu_file_get_error(f);
2604 }
2605 }
2606
2607 return ret;
2608}
2609
56e93d26
JQ
2610static int ram_load(QEMUFile *f, void *opaque, int version_id)
2611{
2612 int flags = 0, ret = 0;
2613 static uint64_t seq_iter;
2614 int len = 0;
a7180877
DDAG
2615 /*
2616 * If system is running in postcopy mode, page inserts to host memory must
2617 * be atomic
2618 */
2619 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
ef08fb38
DDAG
2620 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2621 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
56e93d26
JQ
2622
2623 seq_iter++;
2624
2625 if (version_id != 4) {
2626 ret = -EINVAL;
2627 }
2628
2629 /* This RCU critical section can be very long running.
2630 * When RCU reclaims in the code start to become numerous,
2631 * it will be necessary to reduce the granularity of this
2632 * critical section.
2633 */
2634 rcu_read_lock();
a7180877
DDAG
2635
2636 if (postcopy_running) {
2637 ret = ram_load_postcopy(f);
2638 }
2639
2640 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2641 ram_addr_t addr, total_ram_bytes;
a776aa15 2642 void *host = NULL;
56e93d26
JQ
2643 uint8_t ch;
2644
2645 addr = qemu_get_be64(f);
2646 flags = addr & ~TARGET_PAGE_MASK;
2647 addr &= TARGET_PAGE_MASK;
2648
a776aa15
DDAG
2649 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2650 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2651 RAMBlock *block = ram_block_from_stream(f, flags);
2652
2653 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2654 if (!host) {
2655 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2656 ret = -EINVAL;
2657 break;
2658 }
2659 }
2660
56e93d26
JQ
2661 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2662 case RAM_SAVE_FLAG_MEM_SIZE:
2663 /* Synchronize RAM block list */
2664 total_ram_bytes = addr;
2665 while (!ret && total_ram_bytes) {
2666 RAMBlock *block;
56e93d26
JQ
2667 char id[256];
2668 ram_addr_t length;
2669
2670 len = qemu_get_byte(f);
2671 qemu_get_buffer(f, (uint8_t *)id, len);
2672 id[len] = 0;
2673 length = qemu_get_be64(f);
2674
e3dd7493
DDAG
2675 block = qemu_ram_block_by_name(id);
2676 if (block) {
2677 if (length != block->used_length) {
2678 Error *local_err = NULL;
56e93d26 2679
fa53a0e5 2680 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2681 &local_err);
2682 if (local_err) {
2683 error_report_err(local_err);
56e93d26 2684 }
56e93d26 2685 }
ef08fb38
DDAG
2686 /* For postcopy we need to check hugepage sizes match */
2687 if (postcopy_advised &&
2688 block->page_size != qemu_host_page_size) {
2689 uint64_t remote_page_size = qemu_get_be64(f);
2690 if (remote_page_size != block->page_size) {
2691 error_report("Mismatched RAM page size %s "
2692 "(local) %zd != %" PRId64,
2693 id, block->page_size,
2694 remote_page_size);
2695 ret = -EINVAL;
2696 }
2697 }
e3dd7493
DDAG
2698 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2699 block->idstr);
2700 } else {
56e93d26
JQ
2701 error_report("Unknown ramblock \"%s\", cannot "
2702 "accept migration", id);
2703 ret = -EINVAL;
2704 }
2705
2706 total_ram_bytes -= length;
2707 }
2708 break;
a776aa15 2709
56e93d26 2710 case RAM_SAVE_FLAG_COMPRESS:
56e93d26
JQ
2711 ch = qemu_get_byte(f);
2712 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2713 break;
a776aa15 2714
56e93d26 2715 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2716 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2717 break;
56e93d26 2718
a776aa15 2719 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2720 len = qemu_get_be32(f);
2721 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2722 error_report("Invalid compressed data length: %d", len);
2723 ret = -EINVAL;
2724 break;
2725 }
c1bc6626 2726 decompress_data_with_multi_threads(f, host, len);
56e93d26 2727 break;
a776aa15 2728
56e93d26 2729 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2730 if (load_xbzrle(f, addr, host) < 0) {
2731 error_report("Failed to decompress XBZRLE page at "
2732 RAM_ADDR_FMT, addr);
2733 ret = -EINVAL;
2734 break;
2735 }
2736 break;
2737 case RAM_SAVE_FLAG_EOS:
2738 /* normal exit */
2739 break;
2740 default:
2741 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2742 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2743 } else {
2744 error_report("Unknown combination of migration flags: %#x",
2745 flags);
2746 ret = -EINVAL;
2747 }
2748 }
2749 if (!ret) {
2750 ret = qemu_file_get_error(f);
2751 }
2752 }
2753
5533b2e9 2754 wait_for_decompress_done();
56e93d26 2755 rcu_read_unlock();
55c4446b 2756 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
2757 return ret;
2758}
2759
2760static SaveVMHandlers savevm_ram_handlers = {
2761 .save_live_setup = ram_save_setup,
2762 .save_live_iterate = ram_save_iterate,
763c906b 2763 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 2764 .save_live_complete_precopy = ram_save_complete,
56e93d26
JQ
2765 .save_live_pending = ram_save_pending,
2766 .load_state = ram_load,
6ad2a215 2767 .cleanup = ram_migration_cleanup,
56e93d26
JQ
2768};
2769
2770void ram_mig_init(void)
2771{
2772 qemu_mutex_init(&XBZRLE.lock);
6f37bb8b 2773 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
56e93d26 2774}