]> git.proxmox.com Git - mirror_qemu.git/blob - migration/ram.c
migration: stop compressing page in migration thread
[mirror_qemu.git] / migration / ram.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
28
29 #include "qemu/osdep.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
36 #include "xbzrle.h"
37 #include "ram.h"
38 #include "migration.h"
39 #include "migration/register.h"
40 #include "migration/misc.h"
41 #include "qemu-file.h"
42 #include "postcopy-ram.h"
43 #include "migration/page_cache.h"
44 #include "qemu/error-report.h"
45 #include "qapi/error.h"
46 #include "qapi/qapi-events-migration.h"
47 #include "qapi/qmp/qerror.h"
48 #include "trace.h"
49 #include "exec/ram_addr.h"
50 #include "exec/target_page.h"
51 #include "qemu/rcu_queue.h"
52 #include "migration/colo.h"
53 #include "migration/block.h"
54
55 /***********************************************************/
56 /* ram save/restore */
57
58 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
59 * worked for pages that where filled with the same char. We switched
60 * it to only search for the zero value. And to avoid confusion with
61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
62 */
63
64 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
65 #define RAM_SAVE_FLAG_ZERO 0x02
66 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
67 #define RAM_SAVE_FLAG_PAGE 0x08
68 #define RAM_SAVE_FLAG_EOS 0x10
69 #define RAM_SAVE_FLAG_CONTINUE 0x20
70 #define RAM_SAVE_FLAG_XBZRLE 0x40
71 /* 0x80 is reserved in migration.h start with 0x100 next */
72 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
73
74 static inline bool is_zero_range(uint8_t *p, uint64_t size)
75 {
76 return buffer_is_zero(p, size);
77 }
78
79 XBZRLECacheStats xbzrle_counters;
80
81 /* struct contains XBZRLE cache and a static page
82 used by the compression */
83 static struct {
84 /* buffer used for XBZRLE encoding */
85 uint8_t *encoded_buf;
86 /* buffer for storing page content */
87 uint8_t *current_buf;
88 /* Cache for XBZRLE, Protected by lock. */
89 PageCache *cache;
90 QemuMutex lock;
91 /* it will store a page full of zeros */
92 uint8_t *zero_target_page;
93 /* buffer used for XBZRLE decoding */
94 uint8_t *decoded_buf;
95 } XBZRLE;
96
97 static void XBZRLE_cache_lock(void)
98 {
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE.lock);
101 }
102
103 static void XBZRLE_cache_unlock(void)
104 {
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE.lock);
107 }
108
109 /**
110 * xbzrle_cache_resize: resize the xbzrle cache
111 *
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
116 *
117 * Returns 0 for success or -1 for error
118 *
119 * @new_size: new cache size
120 * @errp: set *errp if the check failed, with reason
121 */
122 int xbzrle_cache_resize(int64_t new_size, Error **errp)
123 {
124 PageCache *new_cache;
125 int64_t ret = 0;
126
127 /* Check for truncation */
128 if (new_size != (size_t)new_size) {
129 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
130 "exceeding address space");
131 return -1;
132 }
133
134 if (new_size == migrate_xbzrle_cache_size()) {
135 /* nothing to do */
136 return 0;
137 }
138
139 XBZRLE_cache_lock();
140
141 if (XBZRLE.cache != NULL) {
142 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
143 if (!new_cache) {
144 ret = -1;
145 goto out;
146 }
147
148 cache_fini(XBZRLE.cache);
149 XBZRLE.cache = new_cache;
150 }
151 out:
152 XBZRLE_cache_unlock();
153 return ret;
154 }
155
156 static void ramblock_recv_map_init(void)
157 {
158 RAMBlock *rb;
159
160 RAMBLOCK_FOREACH(rb) {
161 assert(!rb->receivedmap);
162 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
163 }
164 }
165
166 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
167 {
168 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
169 rb->receivedmap);
170 }
171
172 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
173 {
174 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
175 }
176
177 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
178 {
179 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
180 }
181
182 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
183 size_t nr)
184 {
185 bitmap_set_atomic(rb->receivedmap,
186 ramblock_recv_bitmap_offset(host_addr, rb),
187 nr);
188 }
189
190 /*
191 * An outstanding page request, on the source, having been received
192 * and queued
193 */
194 struct RAMSrcPageRequest {
195 RAMBlock *rb;
196 hwaddr offset;
197 hwaddr len;
198
199 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
200 };
201
202 /* State of RAM for migration */
203 struct RAMState {
204 /* QEMUFile used for this migration */
205 QEMUFile *f;
206 /* Last block that we have visited searching for dirty pages */
207 RAMBlock *last_seen_block;
208 /* Last block from where we have sent data */
209 RAMBlock *last_sent_block;
210 /* Last dirty target page we have sent */
211 ram_addr_t last_page;
212 /* last ram version we have seen */
213 uint32_t last_version;
214 /* We are in the first round */
215 bool ram_bulk_stage;
216 /* How many times we have dirty too many pages */
217 int dirty_rate_high_cnt;
218 /* these variables are used for bitmap sync */
219 /* last time we did a full bitmap_sync */
220 int64_t time_last_bitmap_sync;
221 /* bytes transferred at start_time */
222 uint64_t bytes_xfer_prev;
223 /* number of dirty pages since start_time */
224 uint64_t num_dirty_pages_period;
225 /* xbzrle misses since the beginning of the period */
226 uint64_t xbzrle_cache_miss_prev;
227 /* number of iterations at the beginning of period */
228 uint64_t iterations_prev;
229 /* Iterations since start */
230 uint64_t iterations;
231 /* number of dirty bits in the bitmap */
232 uint64_t migration_dirty_pages;
233 /* protects modification of the bitmap */
234 QemuMutex bitmap_mutex;
235 /* The RAMBlock used in the last src_page_requests */
236 RAMBlock *last_req_rb;
237 /* Queue of outstanding page requests from the destination */
238 QemuMutex src_page_req_mutex;
239 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
240 };
241 typedef struct RAMState RAMState;
242
243 static RAMState *ram_state;
244
245 uint64_t ram_bytes_remaining(void)
246 {
247 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
248 0;
249 }
250
251 MigrationStats ram_counters;
252
253 /* used by the search for pages to send */
254 struct PageSearchStatus {
255 /* Current block being searched */
256 RAMBlock *block;
257 /* Current page to search from */
258 unsigned long page;
259 /* Set once we wrap around */
260 bool complete_round;
261 };
262 typedef struct PageSearchStatus PageSearchStatus;
263
264 struct CompressParam {
265 bool done;
266 bool quit;
267 QEMUFile *file;
268 QemuMutex mutex;
269 QemuCond cond;
270 RAMBlock *block;
271 ram_addr_t offset;
272 };
273 typedef struct CompressParam CompressParam;
274
275 struct DecompressParam {
276 bool done;
277 bool quit;
278 QemuMutex mutex;
279 QemuCond cond;
280 void *des;
281 uint8_t *compbuf;
282 int len;
283 };
284 typedef struct DecompressParam DecompressParam;
285
286 static CompressParam *comp_param;
287 static QemuThread *compress_threads;
288 /* comp_done_cond is used to wake up the migration thread when
289 * one of the compression threads has finished the compression.
290 * comp_done_lock is used to co-work with comp_done_cond.
291 */
292 static QemuMutex comp_done_lock;
293 static QemuCond comp_done_cond;
294 /* The empty QEMUFileOps will be used by file in CompressParam */
295 static const QEMUFileOps empty_ops = { };
296
297 static DecompressParam *decomp_param;
298 static QemuThread *decompress_threads;
299 static QemuMutex decomp_done_lock;
300 static QemuCond decomp_done_cond;
301
302 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
303 ram_addr_t offset);
304
305 static void *do_data_compress(void *opaque)
306 {
307 CompressParam *param = opaque;
308 RAMBlock *block;
309 ram_addr_t offset;
310
311 qemu_mutex_lock(&param->mutex);
312 while (!param->quit) {
313 if (param->block) {
314 block = param->block;
315 offset = param->offset;
316 param->block = NULL;
317 qemu_mutex_unlock(&param->mutex);
318
319 do_compress_ram_page(param->file, block, offset);
320
321 qemu_mutex_lock(&comp_done_lock);
322 param->done = true;
323 qemu_cond_signal(&comp_done_cond);
324 qemu_mutex_unlock(&comp_done_lock);
325
326 qemu_mutex_lock(&param->mutex);
327 } else {
328 qemu_cond_wait(&param->cond, &param->mutex);
329 }
330 }
331 qemu_mutex_unlock(&param->mutex);
332
333 return NULL;
334 }
335
336 static inline void terminate_compression_threads(void)
337 {
338 int idx, thread_count;
339
340 thread_count = migrate_compress_threads();
341
342 for (idx = 0; idx < thread_count; idx++) {
343 qemu_mutex_lock(&comp_param[idx].mutex);
344 comp_param[idx].quit = true;
345 qemu_cond_signal(&comp_param[idx].cond);
346 qemu_mutex_unlock(&comp_param[idx].mutex);
347 }
348 }
349
350 static void compress_threads_save_cleanup(void)
351 {
352 int i, thread_count;
353
354 if (!migrate_use_compression()) {
355 return;
356 }
357 terminate_compression_threads();
358 thread_count = migrate_compress_threads();
359 for (i = 0; i < thread_count; i++) {
360 qemu_thread_join(compress_threads + i);
361 qemu_fclose(comp_param[i].file);
362 qemu_mutex_destroy(&comp_param[i].mutex);
363 qemu_cond_destroy(&comp_param[i].cond);
364 }
365 qemu_mutex_destroy(&comp_done_lock);
366 qemu_cond_destroy(&comp_done_cond);
367 g_free(compress_threads);
368 g_free(comp_param);
369 compress_threads = NULL;
370 comp_param = NULL;
371 }
372
373 static void compress_threads_save_setup(void)
374 {
375 int i, thread_count;
376
377 if (!migrate_use_compression()) {
378 return;
379 }
380 thread_count = migrate_compress_threads();
381 compress_threads = g_new0(QemuThread, thread_count);
382 comp_param = g_new0(CompressParam, thread_count);
383 qemu_cond_init(&comp_done_cond);
384 qemu_mutex_init(&comp_done_lock);
385 for (i = 0; i < thread_count; i++) {
386 /* comp_param[i].file is just used as a dummy buffer to save data,
387 * set its ops to empty.
388 */
389 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
390 comp_param[i].done = true;
391 comp_param[i].quit = false;
392 qemu_mutex_init(&comp_param[i].mutex);
393 qemu_cond_init(&comp_param[i].cond);
394 qemu_thread_create(compress_threads + i, "compress",
395 do_data_compress, comp_param + i,
396 QEMU_THREAD_JOINABLE);
397 }
398 }
399
400 /* Multiple fd's */
401
402 struct MultiFDSendParams {
403 uint8_t id;
404 char *name;
405 QemuThread thread;
406 QemuSemaphore sem;
407 QemuMutex mutex;
408 bool quit;
409 };
410 typedef struct MultiFDSendParams MultiFDSendParams;
411
412 struct {
413 MultiFDSendParams *params;
414 /* number of created threads */
415 int count;
416 } *multifd_send_state;
417
418 static void terminate_multifd_send_threads(Error *errp)
419 {
420 int i;
421
422 for (i = 0; i < multifd_send_state->count; i++) {
423 MultiFDSendParams *p = &multifd_send_state->params[i];
424
425 qemu_mutex_lock(&p->mutex);
426 p->quit = true;
427 qemu_sem_post(&p->sem);
428 qemu_mutex_unlock(&p->mutex);
429 }
430 }
431
432 int multifd_save_cleanup(Error **errp)
433 {
434 int i;
435 int ret = 0;
436
437 if (!migrate_use_multifd()) {
438 return 0;
439 }
440 terminate_multifd_send_threads(NULL);
441 for (i = 0; i < multifd_send_state->count; i++) {
442 MultiFDSendParams *p = &multifd_send_state->params[i];
443
444 qemu_thread_join(&p->thread);
445 qemu_mutex_destroy(&p->mutex);
446 qemu_sem_destroy(&p->sem);
447 g_free(p->name);
448 p->name = NULL;
449 }
450 g_free(multifd_send_state->params);
451 multifd_send_state->params = NULL;
452 g_free(multifd_send_state);
453 multifd_send_state = NULL;
454 return ret;
455 }
456
457 static void *multifd_send_thread(void *opaque)
458 {
459 MultiFDSendParams *p = opaque;
460
461 while (true) {
462 qemu_mutex_lock(&p->mutex);
463 if (p->quit) {
464 qemu_mutex_unlock(&p->mutex);
465 break;
466 }
467 qemu_mutex_unlock(&p->mutex);
468 qemu_sem_wait(&p->sem);
469 }
470
471 return NULL;
472 }
473
474 int multifd_save_setup(void)
475 {
476 int thread_count;
477 uint8_t i;
478
479 if (!migrate_use_multifd()) {
480 return 0;
481 }
482 thread_count = migrate_multifd_channels();
483 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
484 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
485 multifd_send_state->count = 0;
486 for (i = 0; i < thread_count; i++) {
487 MultiFDSendParams *p = &multifd_send_state->params[i];
488
489 qemu_mutex_init(&p->mutex);
490 qemu_sem_init(&p->sem, 0);
491 p->quit = false;
492 p->id = i;
493 p->name = g_strdup_printf("multifdsend_%d", i);
494 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
495 QEMU_THREAD_JOINABLE);
496
497 multifd_send_state->count++;
498 }
499 return 0;
500 }
501
502 struct MultiFDRecvParams {
503 uint8_t id;
504 char *name;
505 QemuThread thread;
506 QemuSemaphore sem;
507 QemuMutex mutex;
508 bool quit;
509 };
510 typedef struct MultiFDRecvParams MultiFDRecvParams;
511
512 struct {
513 MultiFDRecvParams *params;
514 /* number of created threads */
515 int count;
516 } *multifd_recv_state;
517
518 static void terminate_multifd_recv_threads(Error *errp)
519 {
520 int i;
521
522 for (i = 0; i < multifd_recv_state->count; i++) {
523 MultiFDRecvParams *p = &multifd_recv_state->params[i];
524
525 qemu_mutex_lock(&p->mutex);
526 p->quit = true;
527 qemu_sem_post(&p->sem);
528 qemu_mutex_unlock(&p->mutex);
529 }
530 }
531
532 int multifd_load_cleanup(Error **errp)
533 {
534 int i;
535 int ret = 0;
536
537 if (!migrate_use_multifd()) {
538 return 0;
539 }
540 terminate_multifd_recv_threads(NULL);
541 for (i = 0; i < multifd_recv_state->count; i++) {
542 MultiFDRecvParams *p = &multifd_recv_state->params[i];
543
544 qemu_thread_join(&p->thread);
545 qemu_mutex_destroy(&p->mutex);
546 qemu_sem_destroy(&p->sem);
547 g_free(p->name);
548 p->name = NULL;
549 }
550 g_free(multifd_recv_state->params);
551 multifd_recv_state->params = NULL;
552 g_free(multifd_recv_state);
553 multifd_recv_state = NULL;
554
555 return ret;
556 }
557
558 static void *multifd_recv_thread(void *opaque)
559 {
560 MultiFDRecvParams *p = opaque;
561
562 while (true) {
563 qemu_mutex_lock(&p->mutex);
564 if (p->quit) {
565 qemu_mutex_unlock(&p->mutex);
566 break;
567 }
568 qemu_mutex_unlock(&p->mutex);
569 qemu_sem_wait(&p->sem);
570 }
571
572 return NULL;
573 }
574
575 int multifd_load_setup(void)
576 {
577 int thread_count;
578 uint8_t i;
579
580 if (!migrate_use_multifd()) {
581 return 0;
582 }
583 thread_count = migrate_multifd_channels();
584 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
585 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
586 multifd_recv_state->count = 0;
587 for (i = 0; i < thread_count; i++) {
588 MultiFDRecvParams *p = &multifd_recv_state->params[i];
589
590 qemu_mutex_init(&p->mutex);
591 qemu_sem_init(&p->sem, 0);
592 p->quit = false;
593 p->id = i;
594 p->name = g_strdup_printf("multifdrecv_%d", i);
595 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
596 QEMU_THREAD_JOINABLE);
597 multifd_recv_state->count++;
598 }
599 return 0;
600 }
601
602 /**
603 * save_page_header: write page header to wire
604 *
605 * If this is the 1st block, it also writes the block identification
606 *
607 * Returns the number of bytes written
608 *
609 * @f: QEMUFile where to send the data
610 * @block: block that contains the page we want to send
611 * @offset: offset inside the block for the page
612 * in the lower bits, it contains flags
613 */
614 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
615 ram_addr_t offset)
616 {
617 size_t size, len;
618
619 if (block == rs->last_sent_block) {
620 offset |= RAM_SAVE_FLAG_CONTINUE;
621 }
622 qemu_put_be64(f, offset);
623 size = 8;
624
625 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
626 len = strlen(block->idstr);
627 qemu_put_byte(f, len);
628 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
629 size += 1 + len;
630 rs->last_sent_block = block;
631 }
632 return size;
633 }
634
635 /**
636 * mig_throttle_guest_down: throotle down the guest
637 *
638 * Reduce amount of guest cpu execution to hopefully slow down memory
639 * writes. If guest dirty memory rate is reduced below the rate at
640 * which we can transfer pages to the destination then we should be
641 * able to complete migration. Some workloads dirty memory way too
642 * fast and will not effectively converge, even with auto-converge.
643 */
644 static void mig_throttle_guest_down(void)
645 {
646 MigrationState *s = migrate_get_current();
647 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
648 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
649
650 /* We have not started throttling yet. Let's start it. */
651 if (!cpu_throttle_active()) {
652 cpu_throttle_set(pct_initial);
653 } else {
654 /* Throttling already on, just increase the rate */
655 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
656 }
657 }
658
659 /**
660 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
661 *
662 * @rs: current RAM state
663 * @current_addr: address for the zero page
664 *
665 * Update the xbzrle cache to reflect a page that's been sent as all 0.
666 * The important thing is that a stale (not-yet-0'd) page be replaced
667 * by the new data.
668 * As a bonus, if the page wasn't in the cache it gets added so that
669 * when a small write is made into the 0'd page it gets XBZRLE sent.
670 */
671 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
672 {
673 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
674 return;
675 }
676
677 /* We don't care if this fails to allocate a new cache page
678 * as long as it updated an old one */
679 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
680 ram_counters.dirty_sync_count);
681 }
682
683 #define ENCODING_FLAG_XBZRLE 0x1
684
685 /**
686 * save_xbzrle_page: compress and send current page
687 *
688 * Returns: 1 means that we wrote the page
689 * 0 means that page is identical to the one already sent
690 * -1 means that xbzrle would be longer than normal
691 *
692 * @rs: current RAM state
693 * @current_data: pointer to the address of the page contents
694 * @current_addr: addr of the page
695 * @block: block that contains the page we want to send
696 * @offset: offset inside the block for the page
697 * @last_stage: if we are at the completion stage
698 */
699 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
700 ram_addr_t current_addr, RAMBlock *block,
701 ram_addr_t offset, bool last_stage)
702 {
703 int encoded_len = 0, bytes_xbzrle;
704 uint8_t *prev_cached_page;
705
706 if (!cache_is_cached(XBZRLE.cache, current_addr,
707 ram_counters.dirty_sync_count)) {
708 xbzrle_counters.cache_miss++;
709 if (!last_stage) {
710 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
711 ram_counters.dirty_sync_count) == -1) {
712 return -1;
713 } else {
714 /* update *current_data when the page has been
715 inserted into cache */
716 *current_data = get_cached_data(XBZRLE.cache, current_addr);
717 }
718 }
719 return -1;
720 }
721
722 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
723
724 /* save current buffer into memory */
725 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
726
727 /* XBZRLE encoding (if there is no overflow) */
728 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
729 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
730 TARGET_PAGE_SIZE);
731 if (encoded_len == 0) {
732 trace_save_xbzrle_page_skipping();
733 return 0;
734 } else if (encoded_len == -1) {
735 trace_save_xbzrle_page_overflow();
736 xbzrle_counters.overflow++;
737 /* update data in the cache */
738 if (!last_stage) {
739 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
740 *current_data = prev_cached_page;
741 }
742 return -1;
743 }
744
745 /* we need to update the data in the cache, in order to get the same data */
746 if (!last_stage) {
747 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
748 }
749
750 /* Send XBZRLE based compressed page */
751 bytes_xbzrle = save_page_header(rs, rs->f, block,
752 offset | RAM_SAVE_FLAG_XBZRLE);
753 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
754 qemu_put_be16(rs->f, encoded_len);
755 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
756 bytes_xbzrle += encoded_len + 1 + 2;
757 xbzrle_counters.pages++;
758 xbzrle_counters.bytes += bytes_xbzrle;
759 ram_counters.transferred += bytes_xbzrle;
760
761 return 1;
762 }
763
764 /**
765 * migration_bitmap_find_dirty: find the next dirty page from start
766 *
767 * Called with rcu_read_lock() to protect migration_bitmap
768 *
769 * Returns the byte offset within memory region of the start of a dirty page
770 *
771 * @rs: current RAM state
772 * @rb: RAMBlock where to search for dirty pages
773 * @start: page where we start the search
774 */
775 static inline
776 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
777 unsigned long start)
778 {
779 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
780 unsigned long *bitmap = rb->bmap;
781 unsigned long next;
782
783 if (rs->ram_bulk_stage && start > 0) {
784 next = start + 1;
785 } else {
786 next = find_next_bit(bitmap, size, start);
787 }
788
789 return next;
790 }
791
792 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
793 RAMBlock *rb,
794 unsigned long page)
795 {
796 bool ret;
797
798 ret = test_and_clear_bit(page, rb->bmap);
799
800 if (ret) {
801 rs->migration_dirty_pages--;
802 }
803 return ret;
804 }
805
806 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
807 ram_addr_t start, ram_addr_t length)
808 {
809 rs->migration_dirty_pages +=
810 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
811 &rs->num_dirty_pages_period);
812 }
813
814 /**
815 * ram_pagesize_summary: calculate all the pagesizes of a VM
816 *
817 * Returns a summary bitmap of the page sizes of all RAMBlocks
818 *
819 * For VMs with just normal pages this is equivalent to the host page
820 * size. If it's got some huge pages then it's the OR of all the
821 * different page sizes.
822 */
823 uint64_t ram_pagesize_summary(void)
824 {
825 RAMBlock *block;
826 uint64_t summary = 0;
827
828 RAMBLOCK_FOREACH(block) {
829 summary |= block->page_size;
830 }
831
832 return summary;
833 }
834
835 static void migration_bitmap_sync(RAMState *rs)
836 {
837 RAMBlock *block;
838 int64_t end_time;
839 uint64_t bytes_xfer_now;
840
841 ram_counters.dirty_sync_count++;
842
843 if (!rs->time_last_bitmap_sync) {
844 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
845 }
846
847 trace_migration_bitmap_sync_start();
848 memory_global_dirty_log_sync();
849
850 qemu_mutex_lock(&rs->bitmap_mutex);
851 rcu_read_lock();
852 RAMBLOCK_FOREACH(block) {
853 migration_bitmap_sync_range(rs, block, 0, block->used_length);
854 }
855 rcu_read_unlock();
856 qemu_mutex_unlock(&rs->bitmap_mutex);
857
858 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
859
860 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
861
862 /* more than 1 second = 1000 millisecons */
863 if (end_time > rs->time_last_bitmap_sync + 1000) {
864 /* calculate period counters */
865 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
866 / (end_time - rs->time_last_bitmap_sync);
867 bytes_xfer_now = ram_counters.transferred;
868
869 /* During block migration the auto-converge logic incorrectly detects
870 * that ram migration makes no progress. Avoid this by disabling the
871 * throttling logic during the bulk phase of block migration. */
872 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
873 /* The following detection logic can be refined later. For now:
874 Check to see if the dirtied bytes is 50% more than the approx.
875 amount of bytes that just got transferred since the last time we
876 were in this routine. If that happens twice, start or increase
877 throttling */
878
879 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
880 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
881 (++rs->dirty_rate_high_cnt >= 2)) {
882 trace_migration_throttle();
883 rs->dirty_rate_high_cnt = 0;
884 mig_throttle_guest_down();
885 }
886 }
887
888 if (migrate_use_xbzrle()) {
889 if (rs->iterations_prev != rs->iterations) {
890 xbzrle_counters.cache_miss_rate =
891 (double)(xbzrle_counters.cache_miss -
892 rs->xbzrle_cache_miss_prev) /
893 (rs->iterations - rs->iterations_prev);
894 }
895 rs->iterations_prev = rs->iterations;
896 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
897 }
898
899 /* reset period counters */
900 rs->time_last_bitmap_sync = end_time;
901 rs->num_dirty_pages_period = 0;
902 rs->bytes_xfer_prev = bytes_xfer_now;
903 }
904 if (migrate_use_events()) {
905 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
906 }
907 }
908
909 /**
910 * save_zero_page: send the zero page to the stream
911 *
912 * Returns the number of pages written.
913 *
914 * @rs: current RAM state
915 * @block: block that contains the page we want to send
916 * @offset: offset inside the block for the page
917 */
918 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
919 {
920 uint8_t *p = block->host + offset;
921 int pages = -1;
922
923 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
924 ram_counters.duplicate++;
925 ram_counters.transferred +=
926 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
927 qemu_put_byte(rs->f, 0);
928 ram_counters.transferred += 1;
929 pages = 1;
930 }
931
932 return pages;
933 }
934
935 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
936 {
937 if (!migrate_release_ram() || !migration_in_postcopy()) {
938 return;
939 }
940
941 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
942 }
943
944 /**
945 * ram_save_page: send the given page to the stream
946 *
947 * Returns the number of pages written.
948 * < 0 - error
949 * >=0 - Number of pages written - this might legally be 0
950 * if xbzrle noticed the page was the same.
951 *
952 * @rs: current RAM state
953 * @block: block that contains the page we want to send
954 * @offset: offset inside the block for the page
955 * @last_stage: if we are at the completion stage
956 */
957 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
958 {
959 int pages = -1;
960 uint64_t bytes_xmit;
961 ram_addr_t current_addr;
962 uint8_t *p;
963 int ret;
964 bool send_async = true;
965 RAMBlock *block = pss->block;
966 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
967
968 p = block->host + offset;
969 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
970
971 /* In doubt sent page as normal */
972 bytes_xmit = 0;
973 ret = ram_control_save_page(rs->f, block->offset,
974 offset, TARGET_PAGE_SIZE, &bytes_xmit);
975 if (bytes_xmit) {
976 ram_counters.transferred += bytes_xmit;
977 pages = 1;
978 }
979
980 XBZRLE_cache_lock();
981
982 current_addr = block->offset + offset;
983
984 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
985 if (ret != RAM_SAVE_CONTROL_DELAYED) {
986 if (bytes_xmit > 0) {
987 ram_counters.normal++;
988 } else if (bytes_xmit == 0) {
989 ram_counters.duplicate++;
990 }
991 }
992 } else {
993 pages = save_zero_page(rs, block, offset);
994 if (pages > 0) {
995 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
996 * page would be stale
997 */
998 xbzrle_cache_zero_page(rs, current_addr);
999 ram_release_pages(block->idstr, offset, pages);
1000 } else if (!rs->ram_bulk_stage &&
1001 !migration_in_postcopy() && migrate_use_xbzrle()) {
1002 pages = save_xbzrle_page(rs, &p, current_addr, block,
1003 offset, last_stage);
1004 if (!last_stage) {
1005 /* Can't send this cached data async, since the cache page
1006 * might get updated before it gets to the wire
1007 */
1008 send_async = false;
1009 }
1010 }
1011 }
1012
1013 /* XBZRLE overflow or normal page */
1014 if (pages == -1) {
1015 ram_counters.transferred +=
1016 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
1017 if (send_async) {
1018 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
1019 migrate_release_ram() &
1020 migration_in_postcopy());
1021 } else {
1022 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
1023 }
1024 ram_counters.transferred += TARGET_PAGE_SIZE;
1025 pages = 1;
1026 ram_counters.normal++;
1027 }
1028
1029 XBZRLE_cache_unlock();
1030
1031 return pages;
1032 }
1033
1034 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
1035 ram_addr_t offset)
1036 {
1037 RAMState *rs = ram_state;
1038 int bytes_sent, blen;
1039 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1040
1041 bytes_sent = save_page_header(rs, f, block, offset |
1042 RAM_SAVE_FLAG_COMPRESS_PAGE);
1043 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
1044 migrate_compress_level());
1045 if (blen < 0) {
1046 bytes_sent = 0;
1047 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1048 error_report("compressed data failed!");
1049 } else {
1050 bytes_sent += blen;
1051 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1052 }
1053
1054 return bytes_sent;
1055 }
1056
1057 static void flush_compressed_data(RAMState *rs)
1058 {
1059 int idx, len, thread_count;
1060
1061 if (!migrate_use_compression()) {
1062 return;
1063 }
1064 thread_count = migrate_compress_threads();
1065
1066 qemu_mutex_lock(&comp_done_lock);
1067 for (idx = 0; idx < thread_count; idx++) {
1068 while (!comp_param[idx].done) {
1069 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1070 }
1071 }
1072 qemu_mutex_unlock(&comp_done_lock);
1073
1074 for (idx = 0; idx < thread_count; idx++) {
1075 qemu_mutex_lock(&comp_param[idx].mutex);
1076 if (!comp_param[idx].quit) {
1077 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1078 ram_counters.transferred += len;
1079 }
1080 qemu_mutex_unlock(&comp_param[idx].mutex);
1081 }
1082 }
1083
1084 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1085 ram_addr_t offset)
1086 {
1087 param->block = block;
1088 param->offset = offset;
1089 }
1090
1091 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1092 ram_addr_t offset)
1093 {
1094 int idx, thread_count, bytes_xmit = -1, pages = -1;
1095
1096 thread_count = migrate_compress_threads();
1097 qemu_mutex_lock(&comp_done_lock);
1098 while (true) {
1099 for (idx = 0; idx < thread_count; idx++) {
1100 if (comp_param[idx].done) {
1101 comp_param[idx].done = false;
1102 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1103 qemu_mutex_lock(&comp_param[idx].mutex);
1104 set_compress_params(&comp_param[idx], block, offset);
1105 qemu_cond_signal(&comp_param[idx].cond);
1106 qemu_mutex_unlock(&comp_param[idx].mutex);
1107 pages = 1;
1108 ram_counters.normal++;
1109 ram_counters.transferred += bytes_xmit;
1110 break;
1111 }
1112 }
1113 if (pages > 0) {
1114 break;
1115 } else {
1116 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1117 }
1118 }
1119 qemu_mutex_unlock(&comp_done_lock);
1120
1121 return pages;
1122 }
1123
1124 /**
1125 * ram_save_compressed_page: compress the given page and send it to the stream
1126 *
1127 * Returns the number of pages written.
1128 *
1129 * @rs: current RAM state
1130 * @block: block that contains the page we want to send
1131 * @offset: offset inside the block for the page
1132 * @last_stage: if we are at the completion stage
1133 */
1134 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
1135 bool last_stage)
1136 {
1137 int pages = -1;
1138 uint64_t bytes_xmit = 0;
1139 uint8_t *p;
1140 int ret;
1141 RAMBlock *block = pss->block;
1142 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1143
1144 p = block->host + offset;
1145
1146 ret = ram_control_save_page(rs->f, block->offset,
1147 offset, TARGET_PAGE_SIZE, &bytes_xmit);
1148 if (bytes_xmit) {
1149 ram_counters.transferred += bytes_xmit;
1150 pages = 1;
1151 }
1152 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
1153 if (ret != RAM_SAVE_CONTROL_DELAYED) {
1154 if (bytes_xmit > 0) {
1155 ram_counters.normal++;
1156 } else if (bytes_xmit == 0) {
1157 ram_counters.duplicate++;
1158 }
1159 }
1160 } else {
1161 /* When starting the process of a new block, the first page of
1162 * the block should be sent out before other pages in the same
1163 * block, and all the pages in last block should have been sent
1164 * out, keeping this order is important, because the 'cont' flag
1165 * is used to avoid resending the block name.
1166 */
1167 if (block != rs->last_sent_block) {
1168 flush_compressed_data(rs);
1169 pages = save_zero_page(rs, block, offset);
1170 if (pages > 0) {
1171 ram_release_pages(block->idstr, offset, pages);
1172 } else {
1173 /*
1174 * Make sure the first page is sent out before other pages.
1175 *
1176 * we post it as normal page as compression will take much
1177 * CPU resource.
1178 */
1179 ram_counters.transferred += save_page_header(rs, rs->f, block,
1180 offset | RAM_SAVE_FLAG_PAGE);
1181 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
1182 migrate_release_ram() &
1183 migration_in_postcopy());
1184 ram_counters.transferred += TARGET_PAGE_SIZE;
1185 ram_counters.normal++;
1186 pages = 1;
1187 }
1188 } else {
1189 pages = save_zero_page(rs, block, offset);
1190 if (pages == -1) {
1191 pages = compress_page_with_multi_thread(rs, block, offset);
1192 } else {
1193 ram_release_pages(block->idstr, offset, pages);
1194 }
1195 }
1196 }
1197
1198 return pages;
1199 }
1200
1201 /**
1202 * find_dirty_block: find the next dirty page and update any state
1203 * associated with the search process.
1204 *
1205 * Returns if a page is found
1206 *
1207 * @rs: current RAM state
1208 * @pss: data about the state of the current dirty page scan
1209 * @again: set to false if the search has scanned the whole of RAM
1210 */
1211 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1212 {
1213 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1214 if (pss->complete_round && pss->block == rs->last_seen_block &&
1215 pss->page >= rs->last_page) {
1216 /*
1217 * We've been once around the RAM and haven't found anything.
1218 * Give up.
1219 */
1220 *again = false;
1221 return false;
1222 }
1223 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1224 /* Didn't find anything in this RAM Block */
1225 pss->page = 0;
1226 pss->block = QLIST_NEXT_RCU(pss->block, next);
1227 if (!pss->block) {
1228 /* Hit the end of the list */
1229 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1230 /* Flag that we've looped */
1231 pss->complete_round = true;
1232 rs->ram_bulk_stage = false;
1233 if (migrate_use_xbzrle()) {
1234 /* If xbzrle is on, stop using the data compression at this
1235 * point. In theory, xbzrle can do better than compression.
1236 */
1237 flush_compressed_data(rs);
1238 }
1239 }
1240 /* Didn't find anything this time, but try again on the new block */
1241 *again = true;
1242 return false;
1243 } else {
1244 /* Can go around again, but... */
1245 *again = true;
1246 /* We've found something so probably don't need to */
1247 return true;
1248 }
1249 }
1250
1251 /**
1252 * unqueue_page: gets a page of the queue
1253 *
1254 * Helper for 'get_queued_page' - gets a page off the queue
1255 *
1256 * Returns the block of the page (or NULL if none available)
1257 *
1258 * @rs: current RAM state
1259 * @offset: used to return the offset within the RAMBlock
1260 */
1261 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1262 {
1263 RAMBlock *block = NULL;
1264
1265 qemu_mutex_lock(&rs->src_page_req_mutex);
1266 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1267 struct RAMSrcPageRequest *entry =
1268 QSIMPLEQ_FIRST(&rs->src_page_requests);
1269 block = entry->rb;
1270 *offset = entry->offset;
1271
1272 if (entry->len > TARGET_PAGE_SIZE) {
1273 entry->len -= TARGET_PAGE_SIZE;
1274 entry->offset += TARGET_PAGE_SIZE;
1275 } else {
1276 memory_region_unref(block->mr);
1277 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1278 g_free(entry);
1279 }
1280 }
1281 qemu_mutex_unlock(&rs->src_page_req_mutex);
1282
1283 return block;
1284 }
1285
1286 /**
1287 * get_queued_page: unqueue a page from the postocpy requests
1288 *
1289 * Skips pages that are already sent (!dirty)
1290 *
1291 * Returns if a queued page is found
1292 *
1293 * @rs: current RAM state
1294 * @pss: data about the state of the current dirty page scan
1295 */
1296 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1297 {
1298 RAMBlock *block;
1299 ram_addr_t offset;
1300 bool dirty;
1301
1302 do {
1303 block = unqueue_page(rs, &offset);
1304 /*
1305 * We're sending this page, and since it's postcopy nothing else
1306 * will dirty it, and we must make sure it doesn't get sent again
1307 * even if this queue request was received after the background
1308 * search already sent it.
1309 */
1310 if (block) {
1311 unsigned long page;
1312
1313 page = offset >> TARGET_PAGE_BITS;
1314 dirty = test_bit(page, block->bmap);
1315 if (!dirty) {
1316 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1317 page, test_bit(page, block->unsentmap));
1318 } else {
1319 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1320 }
1321 }
1322
1323 } while (block && !dirty);
1324
1325 if (block) {
1326 /*
1327 * As soon as we start servicing pages out of order, then we have
1328 * to kill the bulk stage, since the bulk stage assumes
1329 * in (migration_bitmap_find_and_reset_dirty) that every page is
1330 * dirty, that's no longer true.
1331 */
1332 rs->ram_bulk_stage = false;
1333
1334 /*
1335 * We want the background search to continue from the queued page
1336 * since the guest is likely to want other pages near to the page
1337 * it just requested.
1338 */
1339 pss->block = block;
1340 pss->page = offset >> TARGET_PAGE_BITS;
1341 }
1342
1343 return !!block;
1344 }
1345
1346 /**
1347 * migration_page_queue_free: drop any remaining pages in the ram
1348 * request queue
1349 *
1350 * It should be empty at the end anyway, but in error cases there may
1351 * be some left. in case that there is any page left, we drop it.
1352 *
1353 */
1354 static void migration_page_queue_free(RAMState *rs)
1355 {
1356 struct RAMSrcPageRequest *mspr, *next_mspr;
1357 /* This queue generally should be empty - but in the case of a failed
1358 * migration might have some droppings in.
1359 */
1360 rcu_read_lock();
1361 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1362 memory_region_unref(mspr->rb->mr);
1363 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1364 g_free(mspr);
1365 }
1366 rcu_read_unlock();
1367 }
1368
1369 /**
1370 * ram_save_queue_pages: queue the page for transmission
1371 *
1372 * A request from postcopy destination for example.
1373 *
1374 * Returns zero on success or negative on error
1375 *
1376 * @rbname: Name of the RAMBLock of the request. NULL means the
1377 * same that last one.
1378 * @start: starting address from the start of the RAMBlock
1379 * @len: length (in bytes) to send
1380 */
1381 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1382 {
1383 RAMBlock *ramblock;
1384 RAMState *rs = ram_state;
1385
1386 ram_counters.postcopy_requests++;
1387 rcu_read_lock();
1388 if (!rbname) {
1389 /* Reuse last RAMBlock */
1390 ramblock = rs->last_req_rb;
1391
1392 if (!ramblock) {
1393 /*
1394 * Shouldn't happen, we can't reuse the last RAMBlock if
1395 * it's the 1st request.
1396 */
1397 error_report("ram_save_queue_pages no previous block");
1398 goto err;
1399 }
1400 } else {
1401 ramblock = qemu_ram_block_by_name(rbname);
1402
1403 if (!ramblock) {
1404 /* We shouldn't be asked for a non-existent RAMBlock */
1405 error_report("ram_save_queue_pages no block '%s'", rbname);
1406 goto err;
1407 }
1408 rs->last_req_rb = ramblock;
1409 }
1410 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1411 if (start+len > ramblock->used_length) {
1412 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1413 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1414 __func__, start, len, ramblock->used_length);
1415 goto err;
1416 }
1417
1418 struct RAMSrcPageRequest *new_entry =
1419 g_malloc0(sizeof(struct RAMSrcPageRequest));
1420 new_entry->rb = ramblock;
1421 new_entry->offset = start;
1422 new_entry->len = len;
1423
1424 memory_region_ref(ramblock->mr);
1425 qemu_mutex_lock(&rs->src_page_req_mutex);
1426 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1427 qemu_mutex_unlock(&rs->src_page_req_mutex);
1428 rcu_read_unlock();
1429
1430 return 0;
1431
1432 err:
1433 rcu_read_unlock();
1434 return -1;
1435 }
1436
1437 /**
1438 * ram_save_target_page: save one target page
1439 *
1440 * Returns the number of pages written
1441 *
1442 * @rs: current RAM state
1443 * @ms: current migration state
1444 * @pss: data about the page we want to send
1445 * @last_stage: if we are at the completion stage
1446 */
1447 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1448 bool last_stage)
1449 {
1450 int res = 0;
1451
1452 /* Check the pages is dirty and if it is send it */
1453 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1454 /*
1455 * If xbzrle is on, stop using the data compression after first
1456 * round of migration even if compression is enabled. In theory,
1457 * xbzrle can do better than compression.
1458 */
1459 if (migrate_use_compression() &&
1460 (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1461 res = ram_save_compressed_page(rs, pss, last_stage);
1462 } else {
1463 res = ram_save_page(rs, pss, last_stage);
1464 }
1465
1466 if (res < 0) {
1467 return res;
1468 }
1469 if (pss->block->unsentmap) {
1470 clear_bit(pss->page, pss->block->unsentmap);
1471 }
1472 }
1473
1474 return res;
1475 }
1476
1477 /**
1478 * ram_save_host_page: save a whole host page
1479 *
1480 * Starting at *offset send pages up to the end of the current host
1481 * page. It's valid for the initial offset to point into the middle of
1482 * a host page in which case the remainder of the hostpage is sent.
1483 * Only dirty target pages are sent. Note that the host page size may
1484 * be a huge page for this block.
1485 * The saving stops at the boundary of the used_length of the block
1486 * if the RAMBlock isn't a multiple of the host page size.
1487 *
1488 * Returns the number of pages written or negative on error
1489 *
1490 * @rs: current RAM state
1491 * @ms: current migration state
1492 * @pss: data about the page we want to send
1493 * @last_stage: if we are at the completion stage
1494 */
1495 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1496 bool last_stage)
1497 {
1498 int tmppages, pages = 0;
1499 size_t pagesize_bits =
1500 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1501
1502 do {
1503 tmppages = ram_save_target_page(rs, pss, last_stage);
1504 if (tmppages < 0) {
1505 return tmppages;
1506 }
1507
1508 pages += tmppages;
1509 pss->page++;
1510 } while ((pss->page & (pagesize_bits - 1)) &&
1511 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
1512
1513 /* The offset we leave with is the last one we looked at */
1514 pss->page--;
1515 return pages;
1516 }
1517
1518 /**
1519 * ram_find_and_save_block: finds a dirty page and sends it to f
1520 *
1521 * Called within an RCU critical section.
1522 *
1523 * Returns the number of pages written where zero means no dirty pages
1524 *
1525 * @rs: current RAM state
1526 * @last_stage: if we are at the completion stage
1527 *
1528 * On systems where host-page-size > target-page-size it will send all the
1529 * pages in a host page that are dirty.
1530 */
1531
1532 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1533 {
1534 PageSearchStatus pss;
1535 int pages = 0;
1536 bool again, found;
1537
1538 /* No dirty page as there is zero RAM */
1539 if (!ram_bytes_total()) {
1540 return pages;
1541 }
1542
1543 pss.block = rs->last_seen_block;
1544 pss.page = rs->last_page;
1545 pss.complete_round = false;
1546
1547 if (!pss.block) {
1548 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1549 }
1550
1551 do {
1552 again = true;
1553 found = get_queued_page(rs, &pss);
1554
1555 if (!found) {
1556 /* priority queue empty, so just search for something dirty */
1557 found = find_dirty_block(rs, &pss, &again);
1558 }
1559
1560 if (found) {
1561 pages = ram_save_host_page(rs, &pss, last_stage);
1562 }
1563 } while (!pages && again);
1564
1565 rs->last_seen_block = pss.block;
1566 rs->last_page = pss.page;
1567
1568 return pages;
1569 }
1570
1571 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1572 {
1573 uint64_t pages = size / TARGET_PAGE_SIZE;
1574
1575 if (zero) {
1576 ram_counters.duplicate += pages;
1577 } else {
1578 ram_counters.normal += pages;
1579 ram_counters.transferred += size;
1580 qemu_update_position(f, size);
1581 }
1582 }
1583
1584 uint64_t ram_bytes_total(void)
1585 {
1586 RAMBlock *block;
1587 uint64_t total = 0;
1588
1589 rcu_read_lock();
1590 RAMBLOCK_FOREACH(block) {
1591 total += block->used_length;
1592 }
1593 rcu_read_unlock();
1594 return total;
1595 }
1596
1597 static void xbzrle_load_setup(void)
1598 {
1599 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1600 }
1601
1602 static void xbzrle_load_cleanup(void)
1603 {
1604 g_free(XBZRLE.decoded_buf);
1605 XBZRLE.decoded_buf = NULL;
1606 }
1607
1608 static void ram_state_cleanup(RAMState **rsp)
1609 {
1610 if (*rsp) {
1611 migration_page_queue_free(*rsp);
1612 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1613 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1614 g_free(*rsp);
1615 *rsp = NULL;
1616 }
1617 }
1618
1619 static void xbzrle_cleanup(void)
1620 {
1621 XBZRLE_cache_lock();
1622 if (XBZRLE.cache) {
1623 cache_fini(XBZRLE.cache);
1624 g_free(XBZRLE.encoded_buf);
1625 g_free(XBZRLE.current_buf);
1626 g_free(XBZRLE.zero_target_page);
1627 XBZRLE.cache = NULL;
1628 XBZRLE.encoded_buf = NULL;
1629 XBZRLE.current_buf = NULL;
1630 XBZRLE.zero_target_page = NULL;
1631 }
1632 XBZRLE_cache_unlock();
1633 }
1634
1635 static void ram_save_cleanup(void *opaque)
1636 {
1637 RAMState **rsp = opaque;
1638 RAMBlock *block;
1639
1640 /* caller have hold iothread lock or is in a bh, so there is
1641 * no writing race against this migration_bitmap
1642 */
1643 memory_global_dirty_log_stop();
1644
1645 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1646 g_free(block->bmap);
1647 block->bmap = NULL;
1648 g_free(block->unsentmap);
1649 block->unsentmap = NULL;
1650 }
1651
1652 xbzrle_cleanup();
1653 compress_threads_save_cleanup();
1654 ram_state_cleanup(rsp);
1655 }
1656
1657 static void ram_state_reset(RAMState *rs)
1658 {
1659 rs->last_seen_block = NULL;
1660 rs->last_sent_block = NULL;
1661 rs->last_page = 0;
1662 rs->last_version = ram_list.version;
1663 rs->ram_bulk_stage = true;
1664 }
1665
1666 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1667
1668 /*
1669 * 'expected' is the value you expect the bitmap mostly to be full
1670 * of; it won't bother printing lines that are all this value.
1671 * If 'todump' is null the migration bitmap is dumped.
1672 */
1673 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1674 unsigned long pages)
1675 {
1676 int64_t cur;
1677 int64_t linelen = 128;
1678 char linebuf[129];
1679
1680 for (cur = 0; cur < pages; cur += linelen) {
1681 int64_t curb;
1682 bool found = false;
1683 /*
1684 * Last line; catch the case where the line length
1685 * is longer than remaining ram
1686 */
1687 if (cur + linelen > pages) {
1688 linelen = pages - cur;
1689 }
1690 for (curb = 0; curb < linelen; curb++) {
1691 bool thisbit = test_bit(cur + curb, todump);
1692 linebuf[curb] = thisbit ? '1' : '.';
1693 found = found || (thisbit != expected);
1694 }
1695 if (found) {
1696 linebuf[curb] = '\0';
1697 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1698 }
1699 }
1700 }
1701
1702 /* **** functions for postcopy ***** */
1703
1704 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1705 {
1706 struct RAMBlock *block;
1707
1708 RAMBLOCK_FOREACH(block) {
1709 unsigned long *bitmap = block->bmap;
1710 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1711 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
1712
1713 while (run_start < range) {
1714 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1715 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
1716 (run_end - run_start) << TARGET_PAGE_BITS);
1717 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1718 }
1719 }
1720 }
1721
1722 /**
1723 * postcopy_send_discard_bm_ram: discard a RAMBlock
1724 *
1725 * Returns zero on success
1726 *
1727 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1728 * Note: At this point the 'unsentmap' is the processed bitmap combined
1729 * with the dirtymap; so a '1' means it's either dirty or unsent.
1730 *
1731 * @ms: current migration state
1732 * @pds: state for postcopy
1733 * @start: RAMBlock starting page
1734 * @length: RAMBlock size
1735 */
1736 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1737 PostcopyDiscardState *pds,
1738 RAMBlock *block)
1739 {
1740 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
1741 unsigned long current;
1742 unsigned long *unsentmap = block->unsentmap;
1743
1744 for (current = 0; current < end; ) {
1745 unsigned long one = find_next_bit(unsentmap, end, current);
1746
1747 if (one <= end) {
1748 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1749 unsigned long discard_length;
1750
1751 if (zero >= end) {
1752 discard_length = end - one;
1753 } else {
1754 discard_length = zero - one;
1755 }
1756 if (discard_length) {
1757 postcopy_discard_send_range(ms, pds, one, discard_length);
1758 }
1759 current = one + discard_length;
1760 } else {
1761 current = one;
1762 }
1763 }
1764
1765 return 0;
1766 }
1767
1768 /**
1769 * postcopy_each_ram_send_discard: discard all RAMBlocks
1770 *
1771 * Returns 0 for success or negative for error
1772 *
1773 * Utility for the outgoing postcopy code.
1774 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1775 * passing it bitmap indexes and name.
1776 * (qemu_ram_foreach_block ends up passing unscaled lengths
1777 * which would mean postcopy code would have to deal with target page)
1778 *
1779 * @ms: current migration state
1780 */
1781 static int postcopy_each_ram_send_discard(MigrationState *ms)
1782 {
1783 struct RAMBlock *block;
1784 int ret;
1785
1786 RAMBLOCK_FOREACH(block) {
1787 PostcopyDiscardState *pds =
1788 postcopy_discard_send_init(ms, block->idstr);
1789
1790 /*
1791 * Postcopy sends chunks of bitmap over the wire, but it
1792 * just needs indexes at this point, avoids it having
1793 * target page specific code.
1794 */
1795 ret = postcopy_send_discard_bm_ram(ms, pds, block);
1796 postcopy_discard_send_finish(ms, pds);
1797 if (ret) {
1798 return ret;
1799 }
1800 }
1801
1802 return 0;
1803 }
1804
1805 /**
1806 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1807 *
1808 * Helper for postcopy_chunk_hostpages; it's called twice to
1809 * canonicalize the two bitmaps, that are similar, but one is
1810 * inverted.
1811 *
1812 * Postcopy requires that all target pages in a hostpage are dirty or
1813 * clean, not a mix. This function canonicalizes the bitmaps.
1814 *
1815 * @ms: current migration state
1816 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1817 * otherwise we need to canonicalize partially dirty host pages
1818 * @block: block that contains the page we want to canonicalize
1819 * @pds: state for postcopy
1820 */
1821 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1822 RAMBlock *block,
1823 PostcopyDiscardState *pds)
1824 {
1825 RAMState *rs = ram_state;
1826 unsigned long *bitmap = block->bmap;
1827 unsigned long *unsentmap = block->unsentmap;
1828 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1829 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1830 unsigned long run_start;
1831
1832 if (block->page_size == TARGET_PAGE_SIZE) {
1833 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1834 return;
1835 }
1836
1837 if (unsent_pass) {
1838 /* Find a sent page */
1839 run_start = find_next_zero_bit(unsentmap, pages, 0);
1840 } else {
1841 /* Find a dirty page */
1842 run_start = find_next_bit(bitmap, pages, 0);
1843 }
1844
1845 while (run_start < pages) {
1846 bool do_fixup = false;
1847 unsigned long fixup_start_addr;
1848 unsigned long host_offset;
1849
1850 /*
1851 * If the start of this run of pages is in the middle of a host
1852 * page, then we need to fixup this host page.
1853 */
1854 host_offset = run_start % host_ratio;
1855 if (host_offset) {
1856 do_fixup = true;
1857 run_start -= host_offset;
1858 fixup_start_addr = run_start;
1859 /* For the next pass */
1860 run_start = run_start + host_ratio;
1861 } else {
1862 /* Find the end of this run */
1863 unsigned long run_end;
1864 if (unsent_pass) {
1865 run_end = find_next_bit(unsentmap, pages, run_start + 1);
1866 } else {
1867 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
1868 }
1869 /*
1870 * If the end isn't at the start of a host page, then the
1871 * run doesn't finish at the end of a host page
1872 * and we need to discard.
1873 */
1874 host_offset = run_end % host_ratio;
1875 if (host_offset) {
1876 do_fixup = true;
1877 fixup_start_addr = run_end - host_offset;
1878 /*
1879 * This host page has gone, the next loop iteration starts
1880 * from after the fixup
1881 */
1882 run_start = fixup_start_addr + host_ratio;
1883 } else {
1884 /*
1885 * No discards on this iteration, next loop starts from
1886 * next sent/dirty page
1887 */
1888 run_start = run_end + 1;
1889 }
1890 }
1891
1892 if (do_fixup) {
1893 unsigned long page;
1894
1895 /* Tell the destination to discard this page */
1896 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1897 /* For the unsent_pass we:
1898 * discard partially sent pages
1899 * For the !unsent_pass (dirty) we:
1900 * discard partially dirty pages that were sent
1901 * (any partially sent pages were already discarded
1902 * by the previous unsent_pass)
1903 */
1904 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1905 host_ratio);
1906 }
1907
1908 /* Clean up the bitmap */
1909 for (page = fixup_start_addr;
1910 page < fixup_start_addr + host_ratio; page++) {
1911 /* All pages in this host page are now not sent */
1912 set_bit(page, unsentmap);
1913
1914 /*
1915 * Remark them as dirty, updating the count for any pages
1916 * that weren't previously dirty.
1917 */
1918 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1919 }
1920 }
1921
1922 if (unsent_pass) {
1923 /* Find the next sent page for the next iteration */
1924 run_start = find_next_zero_bit(unsentmap, pages, run_start);
1925 } else {
1926 /* Find the next dirty page for the next iteration */
1927 run_start = find_next_bit(bitmap, pages, run_start);
1928 }
1929 }
1930 }
1931
1932 /**
1933 * postcopy_chuck_hostpages: discrad any partially sent host page
1934 *
1935 * Utility for the outgoing postcopy code.
1936 *
1937 * Discard any partially sent host-page size chunks, mark any partially
1938 * dirty host-page size chunks as all dirty. In this case the host-page
1939 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1940 *
1941 * Returns zero on success
1942 *
1943 * @ms: current migration state
1944 * @block: block we want to work with
1945 */
1946 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
1947 {
1948 PostcopyDiscardState *pds =
1949 postcopy_discard_send_init(ms, block->idstr);
1950
1951 /* First pass: Discard all partially sent host pages */
1952 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1953 /*
1954 * Second pass: Ensure that all partially dirty host pages are made
1955 * fully dirty.
1956 */
1957 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1958
1959 postcopy_discard_send_finish(ms, pds);
1960 return 0;
1961 }
1962
1963 /**
1964 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1965 *
1966 * Returns zero on success
1967 *
1968 * Transmit the set of pages to be discarded after precopy to the target
1969 * these are pages that:
1970 * a) Have been previously transmitted but are now dirty again
1971 * b) Pages that have never been transmitted, this ensures that
1972 * any pages on the destination that have been mapped by background
1973 * tasks get discarded (transparent huge pages is the specific concern)
1974 * Hopefully this is pretty sparse
1975 *
1976 * @ms: current migration state
1977 */
1978 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1979 {
1980 RAMState *rs = ram_state;
1981 RAMBlock *block;
1982 int ret;
1983
1984 rcu_read_lock();
1985
1986 /* This should be our last sync, the src is now paused */
1987 migration_bitmap_sync(rs);
1988
1989 /* Easiest way to make sure we don't resume in the middle of a host-page */
1990 rs->last_seen_block = NULL;
1991 rs->last_sent_block = NULL;
1992 rs->last_page = 0;
1993
1994 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1995 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1996 unsigned long *bitmap = block->bmap;
1997 unsigned long *unsentmap = block->unsentmap;
1998
1999 if (!unsentmap) {
2000 /* We don't have a safe way to resize the sentmap, so
2001 * if the bitmap was resized it will be NULL at this
2002 * point.
2003 */
2004 error_report("migration ram resized during precopy phase");
2005 rcu_read_unlock();
2006 return -EINVAL;
2007 }
2008 /* Deal with TPS != HPS and huge pages */
2009 ret = postcopy_chunk_hostpages(ms, block);
2010 if (ret) {
2011 rcu_read_unlock();
2012 return ret;
2013 }
2014
2015 /*
2016 * Update the unsentmap to be unsentmap = unsentmap | dirty
2017 */
2018 bitmap_or(unsentmap, unsentmap, bitmap, pages);
2019 #ifdef DEBUG_POSTCOPY
2020 ram_debug_dump_bitmap(unsentmap, true, pages);
2021 #endif
2022 }
2023 trace_ram_postcopy_send_discard_bitmap();
2024
2025 ret = postcopy_each_ram_send_discard(ms);
2026 rcu_read_unlock();
2027
2028 return ret;
2029 }
2030
2031 /**
2032 * ram_discard_range: discard dirtied pages at the beginning of postcopy
2033 *
2034 * Returns zero on success
2035 *
2036 * @rbname: name of the RAMBlock of the request. NULL means the
2037 * same that last one.
2038 * @start: RAMBlock starting page
2039 * @length: RAMBlock size
2040 */
2041 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2042 {
2043 int ret = -1;
2044
2045 trace_ram_discard_range(rbname, start, length);
2046
2047 rcu_read_lock();
2048 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2049
2050 if (!rb) {
2051 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2052 goto err;
2053 }
2054
2055 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2056 length >> qemu_target_page_bits());
2057 ret = ram_block_discard_range(rb, start, length);
2058
2059 err:
2060 rcu_read_unlock();
2061
2062 return ret;
2063 }
2064
2065 /*
2066 * For every allocation, we will try not to crash the VM if the
2067 * allocation failed.
2068 */
2069 static int xbzrle_init(void)
2070 {
2071 Error *local_err = NULL;
2072
2073 if (!migrate_use_xbzrle()) {
2074 return 0;
2075 }
2076
2077 XBZRLE_cache_lock();
2078
2079 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2080 if (!XBZRLE.zero_target_page) {
2081 error_report("%s: Error allocating zero page", __func__);
2082 goto err_out;
2083 }
2084
2085 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2086 TARGET_PAGE_SIZE, &local_err);
2087 if (!XBZRLE.cache) {
2088 error_report_err(local_err);
2089 goto free_zero_page;
2090 }
2091
2092 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2093 if (!XBZRLE.encoded_buf) {
2094 error_report("%s: Error allocating encoded_buf", __func__);
2095 goto free_cache;
2096 }
2097
2098 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2099 if (!XBZRLE.current_buf) {
2100 error_report("%s: Error allocating current_buf", __func__);
2101 goto free_encoded_buf;
2102 }
2103
2104 /* We are all good */
2105 XBZRLE_cache_unlock();
2106 return 0;
2107
2108 free_encoded_buf:
2109 g_free(XBZRLE.encoded_buf);
2110 XBZRLE.encoded_buf = NULL;
2111 free_cache:
2112 cache_fini(XBZRLE.cache);
2113 XBZRLE.cache = NULL;
2114 free_zero_page:
2115 g_free(XBZRLE.zero_target_page);
2116 XBZRLE.zero_target_page = NULL;
2117 err_out:
2118 XBZRLE_cache_unlock();
2119 return -ENOMEM;
2120 }
2121
2122 static int ram_state_init(RAMState **rsp)
2123 {
2124 *rsp = g_try_new0(RAMState, 1);
2125
2126 if (!*rsp) {
2127 error_report("%s: Init ramstate fail", __func__);
2128 return -1;
2129 }
2130
2131 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2132 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2133 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2134
2135 /*
2136 * Count the total number of pages used by ram blocks not including any
2137 * gaps due to alignment or unplugs.
2138 */
2139 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2140
2141 ram_state_reset(*rsp);
2142
2143 return 0;
2144 }
2145
2146 static void ram_list_init_bitmaps(void)
2147 {
2148 RAMBlock *block;
2149 unsigned long pages;
2150
2151 /* Skip setting bitmap if there is no RAM */
2152 if (ram_bytes_total()) {
2153 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2154 pages = block->max_length >> TARGET_PAGE_BITS;
2155 block->bmap = bitmap_new(pages);
2156 bitmap_set(block->bmap, 0, pages);
2157 if (migrate_postcopy_ram()) {
2158 block->unsentmap = bitmap_new(pages);
2159 bitmap_set(block->unsentmap, 0, pages);
2160 }
2161 }
2162 }
2163 }
2164
2165 static void ram_init_bitmaps(RAMState *rs)
2166 {
2167 /* For memory_global_dirty_log_start below. */
2168 qemu_mutex_lock_iothread();
2169 qemu_mutex_lock_ramlist();
2170 rcu_read_lock();
2171
2172 ram_list_init_bitmaps();
2173 memory_global_dirty_log_start();
2174 migration_bitmap_sync(rs);
2175
2176 rcu_read_unlock();
2177 qemu_mutex_unlock_ramlist();
2178 qemu_mutex_unlock_iothread();
2179 }
2180
2181 static int ram_init_all(RAMState **rsp)
2182 {
2183 if (ram_state_init(rsp)) {
2184 return -1;
2185 }
2186
2187 if (xbzrle_init()) {
2188 ram_state_cleanup(rsp);
2189 return -1;
2190 }
2191
2192 ram_init_bitmaps(*rsp);
2193
2194 return 0;
2195 }
2196
2197 /*
2198 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2199 * long-running RCU critical section. When rcu-reclaims in the code
2200 * start to become numerous it will be necessary to reduce the
2201 * granularity of these critical sections.
2202 */
2203
2204 /**
2205 * ram_save_setup: Setup RAM for migration
2206 *
2207 * Returns zero to indicate success and negative for error
2208 *
2209 * @f: QEMUFile where to send the data
2210 * @opaque: RAMState pointer
2211 */
2212 static int ram_save_setup(QEMUFile *f, void *opaque)
2213 {
2214 RAMState **rsp = opaque;
2215 RAMBlock *block;
2216
2217 /* migration has already setup the bitmap, reuse it. */
2218 if (!migration_in_colo_state()) {
2219 if (ram_init_all(rsp) != 0) {
2220 return -1;
2221 }
2222 }
2223 (*rsp)->f = f;
2224
2225 rcu_read_lock();
2226
2227 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2228
2229 RAMBLOCK_FOREACH(block) {
2230 qemu_put_byte(f, strlen(block->idstr));
2231 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2232 qemu_put_be64(f, block->used_length);
2233 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2234 qemu_put_be64(f, block->page_size);
2235 }
2236 }
2237
2238 rcu_read_unlock();
2239 compress_threads_save_setup();
2240
2241 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2242 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2243
2244 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2245
2246 return 0;
2247 }
2248
2249 /**
2250 * ram_save_iterate: iterative stage for migration
2251 *
2252 * Returns zero to indicate success and negative for error
2253 *
2254 * @f: QEMUFile where to send the data
2255 * @opaque: RAMState pointer
2256 */
2257 static int ram_save_iterate(QEMUFile *f, void *opaque)
2258 {
2259 RAMState **temp = opaque;
2260 RAMState *rs = *temp;
2261 int ret;
2262 int i;
2263 int64_t t0;
2264 int done = 0;
2265
2266 if (blk_mig_bulk_active()) {
2267 /* Avoid transferring ram during bulk phase of block migration as
2268 * the bulk phase will usually take a long time and transferring
2269 * ram updates during that time is pointless. */
2270 goto out;
2271 }
2272
2273 rcu_read_lock();
2274 if (ram_list.version != rs->last_version) {
2275 ram_state_reset(rs);
2276 }
2277
2278 /* Read version before ram_list.blocks */
2279 smp_rmb();
2280
2281 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2282
2283 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2284 i = 0;
2285 while ((ret = qemu_file_rate_limit(f)) == 0) {
2286 int pages;
2287
2288 pages = ram_find_and_save_block(rs, false);
2289 /* no more pages to sent */
2290 if (pages == 0) {
2291 done = 1;
2292 break;
2293 }
2294 rs->iterations++;
2295
2296 /* we want to check in the 1st loop, just in case it was the 1st time
2297 and we had to sync the dirty bitmap.
2298 qemu_get_clock_ns() is a bit expensive, so we only check each some
2299 iterations
2300 */
2301 if ((i & 63) == 0) {
2302 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2303 if (t1 > MAX_WAIT) {
2304 trace_ram_save_iterate_big_wait(t1, i);
2305 break;
2306 }
2307 }
2308 i++;
2309 }
2310 flush_compressed_data(rs);
2311 rcu_read_unlock();
2312
2313 /*
2314 * Must occur before EOS (or any QEMUFile operation)
2315 * because of RDMA protocol.
2316 */
2317 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2318
2319 out:
2320 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2321 ram_counters.transferred += 8;
2322
2323 ret = qemu_file_get_error(f);
2324 if (ret < 0) {
2325 return ret;
2326 }
2327
2328 return done;
2329 }
2330
2331 /**
2332 * ram_save_complete: function called to send the remaining amount of ram
2333 *
2334 * Returns zero to indicate success
2335 *
2336 * Called with iothread lock
2337 *
2338 * @f: QEMUFile where to send the data
2339 * @opaque: RAMState pointer
2340 */
2341 static int ram_save_complete(QEMUFile *f, void *opaque)
2342 {
2343 RAMState **temp = opaque;
2344 RAMState *rs = *temp;
2345
2346 rcu_read_lock();
2347
2348 if (!migration_in_postcopy()) {
2349 migration_bitmap_sync(rs);
2350 }
2351
2352 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2353
2354 /* try transferring iterative blocks of memory */
2355
2356 /* flush all remaining blocks regardless of rate limiting */
2357 while (true) {
2358 int pages;
2359
2360 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2361 /* no more blocks to sent */
2362 if (pages == 0) {
2363 break;
2364 }
2365 }
2366
2367 flush_compressed_data(rs);
2368 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2369
2370 rcu_read_unlock();
2371
2372 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2373
2374 return 0;
2375 }
2376
2377 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2378 uint64_t *res_precopy_only,
2379 uint64_t *res_compatible,
2380 uint64_t *res_postcopy_only)
2381 {
2382 RAMState **temp = opaque;
2383 RAMState *rs = *temp;
2384 uint64_t remaining_size;
2385
2386 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2387
2388 if (!migration_in_postcopy() &&
2389 remaining_size < max_size) {
2390 qemu_mutex_lock_iothread();
2391 rcu_read_lock();
2392 migration_bitmap_sync(rs);
2393 rcu_read_unlock();
2394 qemu_mutex_unlock_iothread();
2395 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2396 }
2397
2398 if (migrate_postcopy_ram()) {
2399 /* We can do postcopy, and all the data is postcopiable */
2400 *res_compatible += remaining_size;
2401 } else {
2402 *res_precopy_only += remaining_size;
2403 }
2404 }
2405
2406 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2407 {
2408 unsigned int xh_len;
2409 int xh_flags;
2410 uint8_t *loaded_data;
2411
2412 /* extract RLE header */
2413 xh_flags = qemu_get_byte(f);
2414 xh_len = qemu_get_be16(f);
2415
2416 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2417 error_report("Failed to load XBZRLE page - wrong compression!");
2418 return -1;
2419 }
2420
2421 if (xh_len > TARGET_PAGE_SIZE) {
2422 error_report("Failed to load XBZRLE page - len overflow!");
2423 return -1;
2424 }
2425 loaded_data = XBZRLE.decoded_buf;
2426 /* load data and decode */
2427 /* it can change loaded_data to point to an internal buffer */
2428 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2429
2430 /* decode RLE */
2431 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2432 TARGET_PAGE_SIZE) == -1) {
2433 error_report("Failed to load XBZRLE page - decode error!");
2434 return -1;
2435 }
2436
2437 return 0;
2438 }
2439
2440 /**
2441 * ram_block_from_stream: read a RAMBlock id from the migration stream
2442 *
2443 * Must be called from within a rcu critical section.
2444 *
2445 * Returns a pointer from within the RCU-protected ram_list.
2446 *
2447 * @f: QEMUFile where to read the data from
2448 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2449 */
2450 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2451 {
2452 static RAMBlock *block = NULL;
2453 char id[256];
2454 uint8_t len;
2455
2456 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2457 if (!block) {
2458 error_report("Ack, bad migration stream!");
2459 return NULL;
2460 }
2461 return block;
2462 }
2463
2464 len = qemu_get_byte(f);
2465 qemu_get_buffer(f, (uint8_t *)id, len);
2466 id[len] = 0;
2467
2468 block = qemu_ram_block_by_name(id);
2469 if (!block) {
2470 error_report("Can't find block %s", id);
2471 return NULL;
2472 }
2473
2474 return block;
2475 }
2476
2477 static inline void *host_from_ram_block_offset(RAMBlock *block,
2478 ram_addr_t offset)
2479 {
2480 if (!offset_in_ramblock(block, offset)) {
2481 return NULL;
2482 }
2483
2484 return block->host + offset;
2485 }
2486
2487 /**
2488 * ram_handle_compressed: handle the zero page case
2489 *
2490 * If a page (or a whole RDMA chunk) has been
2491 * determined to be zero, then zap it.
2492 *
2493 * @host: host address for the zero page
2494 * @ch: what the page is filled from. We only support zero
2495 * @size: size of the zero page
2496 */
2497 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2498 {
2499 if (ch != 0 || !is_zero_range(host, size)) {
2500 memset(host, ch, size);
2501 }
2502 }
2503
2504 static void *do_data_decompress(void *opaque)
2505 {
2506 DecompressParam *param = opaque;
2507 unsigned long pagesize;
2508 uint8_t *des;
2509 int len;
2510
2511 qemu_mutex_lock(&param->mutex);
2512 while (!param->quit) {
2513 if (param->des) {
2514 des = param->des;
2515 len = param->len;
2516 param->des = 0;
2517 qemu_mutex_unlock(&param->mutex);
2518
2519 pagesize = TARGET_PAGE_SIZE;
2520 /* uncompress() will return failed in some case, especially
2521 * when the page is dirted when doing the compression, it's
2522 * not a problem because the dirty page will be retransferred
2523 * and uncompress() won't break the data in other pages.
2524 */
2525 uncompress((Bytef *)des, &pagesize,
2526 (const Bytef *)param->compbuf, len);
2527
2528 qemu_mutex_lock(&decomp_done_lock);
2529 param->done = true;
2530 qemu_cond_signal(&decomp_done_cond);
2531 qemu_mutex_unlock(&decomp_done_lock);
2532
2533 qemu_mutex_lock(&param->mutex);
2534 } else {
2535 qemu_cond_wait(&param->cond, &param->mutex);
2536 }
2537 }
2538 qemu_mutex_unlock(&param->mutex);
2539
2540 return NULL;
2541 }
2542
2543 static void wait_for_decompress_done(void)
2544 {
2545 int idx, thread_count;
2546
2547 if (!migrate_use_compression()) {
2548 return;
2549 }
2550
2551 thread_count = migrate_decompress_threads();
2552 qemu_mutex_lock(&decomp_done_lock);
2553 for (idx = 0; idx < thread_count; idx++) {
2554 while (!decomp_param[idx].done) {
2555 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2556 }
2557 }
2558 qemu_mutex_unlock(&decomp_done_lock);
2559 }
2560
2561 static void compress_threads_load_setup(void)
2562 {
2563 int i, thread_count;
2564
2565 if (!migrate_use_compression()) {
2566 return;
2567 }
2568 thread_count = migrate_decompress_threads();
2569 decompress_threads = g_new0(QemuThread, thread_count);
2570 decomp_param = g_new0(DecompressParam, thread_count);
2571 qemu_mutex_init(&decomp_done_lock);
2572 qemu_cond_init(&decomp_done_cond);
2573 for (i = 0; i < thread_count; i++) {
2574 qemu_mutex_init(&decomp_param[i].mutex);
2575 qemu_cond_init(&decomp_param[i].cond);
2576 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2577 decomp_param[i].done = true;
2578 decomp_param[i].quit = false;
2579 qemu_thread_create(decompress_threads + i, "decompress",
2580 do_data_decompress, decomp_param + i,
2581 QEMU_THREAD_JOINABLE);
2582 }
2583 }
2584
2585 static void compress_threads_load_cleanup(void)
2586 {
2587 int i, thread_count;
2588
2589 if (!migrate_use_compression()) {
2590 return;
2591 }
2592 thread_count = migrate_decompress_threads();
2593 for (i = 0; i < thread_count; i++) {
2594 qemu_mutex_lock(&decomp_param[i].mutex);
2595 decomp_param[i].quit = true;
2596 qemu_cond_signal(&decomp_param[i].cond);
2597 qemu_mutex_unlock(&decomp_param[i].mutex);
2598 }
2599 for (i = 0; i < thread_count; i++) {
2600 qemu_thread_join(decompress_threads + i);
2601 qemu_mutex_destroy(&decomp_param[i].mutex);
2602 qemu_cond_destroy(&decomp_param[i].cond);
2603 g_free(decomp_param[i].compbuf);
2604 }
2605 g_free(decompress_threads);
2606 g_free(decomp_param);
2607 decompress_threads = NULL;
2608 decomp_param = NULL;
2609 }
2610
2611 static void decompress_data_with_multi_threads(QEMUFile *f,
2612 void *host, int len)
2613 {
2614 int idx, thread_count;
2615
2616 thread_count = migrate_decompress_threads();
2617 qemu_mutex_lock(&decomp_done_lock);
2618 while (true) {
2619 for (idx = 0; idx < thread_count; idx++) {
2620 if (decomp_param[idx].done) {
2621 decomp_param[idx].done = false;
2622 qemu_mutex_lock(&decomp_param[idx].mutex);
2623 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2624 decomp_param[idx].des = host;
2625 decomp_param[idx].len = len;
2626 qemu_cond_signal(&decomp_param[idx].cond);
2627 qemu_mutex_unlock(&decomp_param[idx].mutex);
2628 break;
2629 }
2630 }
2631 if (idx < thread_count) {
2632 break;
2633 } else {
2634 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2635 }
2636 }
2637 qemu_mutex_unlock(&decomp_done_lock);
2638 }
2639
2640 /**
2641 * ram_load_setup: Setup RAM for migration incoming side
2642 *
2643 * Returns zero to indicate success and negative for error
2644 *
2645 * @f: QEMUFile where to receive the data
2646 * @opaque: RAMState pointer
2647 */
2648 static int ram_load_setup(QEMUFile *f, void *opaque)
2649 {
2650 xbzrle_load_setup();
2651 compress_threads_load_setup();
2652 ramblock_recv_map_init();
2653 return 0;
2654 }
2655
2656 static int ram_load_cleanup(void *opaque)
2657 {
2658 RAMBlock *rb;
2659 xbzrle_load_cleanup();
2660 compress_threads_load_cleanup();
2661
2662 RAMBLOCK_FOREACH(rb) {
2663 g_free(rb->receivedmap);
2664 rb->receivedmap = NULL;
2665 }
2666 return 0;
2667 }
2668
2669 /**
2670 * ram_postcopy_incoming_init: allocate postcopy data structures
2671 *
2672 * Returns 0 for success and negative if there was one error
2673 *
2674 * @mis: current migration incoming state
2675 *
2676 * Allocate data structures etc needed by incoming migration with
2677 * postcopy-ram. postcopy-ram's similarly names
2678 * postcopy_ram_incoming_init does the work.
2679 */
2680 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2681 {
2682 unsigned long ram_pages = last_ram_page();
2683
2684 return postcopy_ram_incoming_init(mis, ram_pages);
2685 }
2686
2687 /**
2688 * ram_load_postcopy: load a page in postcopy case
2689 *
2690 * Returns 0 for success or -errno in case of error
2691 *
2692 * Called in postcopy mode by ram_load().
2693 * rcu_read_lock is taken prior to this being called.
2694 *
2695 * @f: QEMUFile where to send the data
2696 */
2697 static int ram_load_postcopy(QEMUFile *f)
2698 {
2699 int flags = 0, ret = 0;
2700 bool place_needed = false;
2701 bool matching_page_sizes = false;
2702 MigrationIncomingState *mis = migration_incoming_get_current();
2703 /* Temporary page that is later 'placed' */
2704 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2705 void *last_host = NULL;
2706 bool all_zero = false;
2707
2708 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2709 ram_addr_t addr;
2710 void *host = NULL;
2711 void *page_buffer = NULL;
2712 void *place_source = NULL;
2713 RAMBlock *block = NULL;
2714 uint8_t ch;
2715
2716 addr = qemu_get_be64(f);
2717
2718 /*
2719 * If qemu file error, we should stop here, and then "addr"
2720 * may be invalid
2721 */
2722 ret = qemu_file_get_error(f);
2723 if (ret) {
2724 break;
2725 }
2726
2727 flags = addr & ~TARGET_PAGE_MASK;
2728 addr &= TARGET_PAGE_MASK;
2729
2730 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2731 place_needed = false;
2732 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
2733 block = ram_block_from_stream(f, flags);
2734
2735 host = host_from_ram_block_offset(block, addr);
2736 if (!host) {
2737 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2738 ret = -EINVAL;
2739 break;
2740 }
2741 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2742 /*
2743 * Postcopy requires that we place whole host pages atomically;
2744 * these may be huge pages for RAMBlocks that are backed by
2745 * hugetlbfs.
2746 * To make it atomic, the data is read into a temporary page
2747 * that's moved into place later.
2748 * The migration protocol uses, possibly smaller, target-pages
2749 * however the source ensures it always sends all the components
2750 * of a host page in order.
2751 */
2752 page_buffer = postcopy_host_page +
2753 ((uintptr_t)host & (block->page_size - 1));
2754 /* If all TP are zero then we can optimise the place */
2755 if (!((uintptr_t)host & (block->page_size - 1))) {
2756 all_zero = true;
2757 } else {
2758 /* not the 1st TP within the HP */
2759 if (host != (last_host + TARGET_PAGE_SIZE)) {
2760 error_report("Non-sequential target page %p/%p",
2761 host, last_host);
2762 ret = -EINVAL;
2763 break;
2764 }
2765 }
2766
2767
2768 /*
2769 * If it's the last part of a host page then we place the host
2770 * page
2771 */
2772 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2773 (block->page_size - 1)) == 0;
2774 place_source = postcopy_host_page;
2775 }
2776 last_host = host;
2777
2778 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2779 case RAM_SAVE_FLAG_ZERO:
2780 ch = qemu_get_byte(f);
2781 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2782 if (ch) {
2783 all_zero = false;
2784 }
2785 break;
2786
2787 case RAM_SAVE_FLAG_PAGE:
2788 all_zero = false;
2789 if (!place_needed || !matching_page_sizes) {
2790 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2791 } else {
2792 /* Avoids the qemu_file copy during postcopy, which is
2793 * going to do a copy later; can only do it when we
2794 * do this read in one go (matching page sizes)
2795 */
2796 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2797 TARGET_PAGE_SIZE);
2798 }
2799 break;
2800 case RAM_SAVE_FLAG_EOS:
2801 /* normal exit */
2802 break;
2803 default:
2804 error_report("Unknown combination of migration flags: %#x"
2805 " (postcopy mode)", flags);
2806 ret = -EINVAL;
2807 break;
2808 }
2809
2810 /* Detect for any possible file errors */
2811 if (!ret && qemu_file_get_error(f)) {
2812 ret = qemu_file_get_error(f);
2813 }
2814
2815 if (!ret && place_needed) {
2816 /* This gets called at the last target page in the host page */
2817 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2818
2819 if (all_zero) {
2820 ret = postcopy_place_page_zero(mis, place_dest,
2821 block);
2822 } else {
2823 ret = postcopy_place_page(mis, place_dest,
2824 place_source, block);
2825 }
2826 }
2827 }
2828
2829 return ret;
2830 }
2831
2832 static bool postcopy_is_advised(void)
2833 {
2834 PostcopyState ps = postcopy_state_get();
2835 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2836 }
2837
2838 static bool postcopy_is_running(void)
2839 {
2840 PostcopyState ps = postcopy_state_get();
2841 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
2842 }
2843
2844 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2845 {
2846 int flags = 0, ret = 0, invalid_flags = 0;
2847 static uint64_t seq_iter;
2848 int len = 0;
2849 /*
2850 * If system is running in postcopy mode, page inserts to host memory must
2851 * be atomic
2852 */
2853 bool postcopy_running = postcopy_is_running();
2854 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2855 bool postcopy_advised = postcopy_is_advised();
2856
2857 seq_iter++;
2858
2859 if (version_id != 4) {
2860 ret = -EINVAL;
2861 }
2862
2863 if (!migrate_use_compression()) {
2864 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
2865 }
2866 /* This RCU critical section can be very long running.
2867 * When RCU reclaims in the code start to become numerous,
2868 * it will be necessary to reduce the granularity of this
2869 * critical section.
2870 */
2871 rcu_read_lock();
2872
2873 if (postcopy_running) {
2874 ret = ram_load_postcopy(f);
2875 }
2876
2877 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2878 ram_addr_t addr, total_ram_bytes;
2879 void *host = NULL;
2880 uint8_t ch;
2881
2882 addr = qemu_get_be64(f);
2883 flags = addr & ~TARGET_PAGE_MASK;
2884 addr &= TARGET_PAGE_MASK;
2885
2886 if (flags & invalid_flags) {
2887 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
2888 error_report("Received an unexpected compressed page");
2889 }
2890
2891 ret = -EINVAL;
2892 break;
2893 }
2894
2895 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
2896 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2897 RAMBlock *block = ram_block_from_stream(f, flags);
2898
2899 host = host_from_ram_block_offset(block, addr);
2900 if (!host) {
2901 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2902 ret = -EINVAL;
2903 break;
2904 }
2905 ramblock_recv_bitmap_set(block, host);
2906 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
2907 }
2908
2909 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2910 case RAM_SAVE_FLAG_MEM_SIZE:
2911 /* Synchronize RAM block list */
2912 total_ram_bytes = addr;
2913 while (!ret && total_ram_bytes) {
2914 RAMBlock *block;
2915 char id[256];
2916 ram_addr_t length;
2917
2918 len = qemu_get_byte(f);
2919 qemu_get_buffer(f, (uint8_t *)id, len);
2920 id[len] = 0;
2921 length = qemu_get_be64(f);
2922
2923 block = qemu_ram_block_by_name(id);
2924 if (block) {
2925 if (length != block->used_length) {
2926 Error *local_err = NULL;
2927
2928 ret = qemu_ram_resize(block, length,
2929 &local_err);
2930 if (local_err) {
2931 error_report_err(local_err);
2932 }
2933 }
2934 /* For postcopy we need to check hugepage sizes match */
2935 if (postcopy_advised &&
2936 block->page_size != qemu_host_page_size) {
2937 uint64_t remote_page_size = qemu_get_be64(f);
2938 if (remote_page_size != block->page_size) {
2939 error_report("Mismatched RAM page size %s "
2940 "(local) %zd != %" PRId64,
2941 id, block->page_size,
2942 remote_page_size);
2943 ret = -EINVAL;
2944 }
2945 }
2946 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2947 block->idstr);
2948 } else {
2949 error_report("Unknown ramblock \"%s\", cannot "
2950 "accept migration", id);
2951 ret = -EINVAL;
2952 }
2953
2954 total_ram_bytes -= length;
2955 }
2956 break;
2957
2958 case RAM_SAVE_FLAG_ZERO:
2959 ch = qemu_get_byte(f);
2960 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2961 break;
2962
2963 case RAM_SAVE_FLAG_PAGE:
2964 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2965 break;
2966
2967 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2968 len = qemu_get_be32(f);
2969 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2970 error_report("Invalid compressed data length: %d", len);
2971 ret = -EINVAL;
2972 break;
2973 }
2974 decompress_data_with_multi_threads(f, host, len);
2975 break;
2976
2977 case RAM_SAVE_FLAG_XBZRLE:
2978 if (load_xbzrle(f, addr, host) < 0) {
2979 error_report("Failed to decompress XBZRLE page at "
2980 RAM_ADDR_FMT, addr);
2981 ret = -EINVAL;
2982 break;
2983 }
2984 break;
2985 case RAM_SAVE_FLAG_EOS:
2986 /* normal exit */
2987 break;
2988 default:
2989 if (flags & RAM_SAVE_FLAG_HOOK) {
2990 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2991 } else {
2992 error_report("Unknown combination of migration flags: %#x",
2993 flags);
2994 ret = -EINVAL;
2995 }
2996 }
2997 if (!ret) {
2998 ret = qemu_file_get_error(f);
2999 }
3000 }
3001
3002 wait_for_decompress_done();
3003 rcu_read_unlock();
3004 trace_ram_load_complete(ret, seq_iter);
3005 return ret;
3006 }
3007
3008 static bool ram_has_postcopy(void *opaque)
3009 {
3010 return migrate_postcopy_ram();
3011 }
3012
3013 static SaveVMHandlers savevm_ram_handlers = {
3014 .save_setup = ram_save_setup,
3015 .save_live_iterate = ram_save_iterate,
3016 .save_live_complete_postcopy = ram_save_complete,
3017 .save_live_complete_precopy = ram_save_complete,
3018 .has_postcopy = ram_has_postcopy,
3019 .save_live_pending = ram_save_pending,
3020 .load_state = ram_load,
3021 .save_cleanup = ram_save_cleanup,
3022 .load_setup = ram_load_setup,
3023 .load_cleanup = ram_load_cleanup,
3024 };
3025
3026 void ram_mig_init(void)
3027 {
3028 qemu_mutex_init(&XBZRLE.lock);
3029 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
3030 }