]> git.proxmox.com Git - mirror_qemu.git/blame - migration/ram.c
migration: move ram stuff to migration/ram
[mirror_qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24#include <stdint.h>
25#include <stdarg.h>
26#include <stdlib.h>
27#include <zlib.h>
28#ifndef _WIN32
29#include <sys/types.h>
30#include <sys/mman.h>
31#endif
32#include "config.h"
33#include "monitor/monitor.h"
34#include "sysemu/sysemu.h"
35#include "qemu/bitops.h"
36#include "qemu/bitmap.h"
37#include "hw/i386/pc.h"
38#include "hw/pci/pci.h"
39#include "hw/audio/audio.h"
40#include "migration/migration.h"
41#include "exec/address-spaces.h"
42#include "migration/page_cache.h"
43#include "qemu/config-file.h"
44#include "qemu/error-report.h"
45#include "qmp-commands.h"
46#include "trace.h"
47#include "exec/cpu-all.h"
48#include "exec/ram_addr.h"
49#include "qemu/host-utils.h"
50#include "qemu/rcu_queue.h"
51
52#ifdef DEBUG_MIGRATION_RAM
53#define DPRINTF(fmt, ...) \
54 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
55#else
56#define DPRINTF(fmt, ...) \
57 do { } while (0)
58#endif
59
60static bool mig_throttle_on;
61static int dirty_rate_high_cnt;
62static void check_guest_throttling(void);
63
64static uint64_t bitmap_sync_count;
65
66/***********************************************************/
67/* ram save/restore */
68
69#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
70#define RAM_SAVE_FLAG_COMPRESS 0x02
71#define RAM_SAVE_FLAG_MEM_SIZE 0x04
72#define RAM_SAVE_FLAG_PAGE 0x08
73#define RAM_SAVE_FLAG_EOS 0x10
74#define RAM_SAVE_FLAG_CONTINUE 0x20
75#define RAM_SAVE_FLAG_XBZRLE 0x40
76/* 0x80 is reserved in migration.h start with 0x100 next */
77#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78
79static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
80
81static inline bool is_zero_range(uint8_t *p, uint64_t size)
82{
83 return buffer_find_nonzero_offset(p, size) == size;
84}
85
86/* struct contains XBZRLE cache and a static page
87 used by the compression */
88static struct {
89 /* buffer used for XBZRLE encoding */
90 uint8_t *encoded_buf;
91 /* buffer for storing page content */
92 uint8_t *current_buf;
93 /* Cache for XBZRLE, Protected by lock. */
94 PageCache *cache;
95 QemuMutex lock;
96} XBZRLE;
97
98/* buffer used for XBZRLE decoding */
99static uint8_t *xbzrle_decoded_buf;
100
101static void XBZRLE_cache_lock(void)
102{
103 if (migrate_use_xbzrle())
104 qemu_mutex_lock(&XBZRLE.lock);
105}
106
107static void XBZRLE_cache_unlock(void)
108{
109 if (migrate_use_xbzrle())
110 qemu_mutex_unlock(&XBZRLE.lock);
111}
112
113/*
114 * called from qmp_migrate_set_cache_size in main thread, possibly while
115 * a migration is in progress.
116 * A running migration maybe using the cache and might finish during this
117 * call, hence changes to the cache are protected by XBZRLE.lock().
118 */
119int64_t xbzrle_cache_resize(int64_t new_size)
120{
121 PageCache *new_cache;
122 int64_t ret;
123
124 if (new_size < TARGET_PAGE_SIZE) {
125 return -1;
126 }
127
128 XBZRLE_cache_lock();
129
130 if (XBZRLE.cache != NULL) {
131 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
132 goto out_new_size;
133 }
134 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
135 TARGET_PAGE_SIZE);
136 if (!new_cache) {
137 error_report("Error creating cache");
138 ret = -1;
139 goto out;
140 }
141
142 cache_fini(XBZRLE.cache);
143 XBZRLE.cache = new_cache;
144 }
145
146out_new_size:
147 ret = pow2floor(new_size);
148out:
149 XBZRLE_cache_unlock();
150 return ret;
151}
152
153/* accounting for migration statistics */
154typedef struct AccountingInfo {
155 uint64_t dup_pages;
156 uint64_t skipped_pages;
157 uint64_t norm_pages;
158 uint64_t iterations;
159 uint64_t xbzrle_bytes;
160 uint64_t xbzrle_pages;
161 uint64_t xbzrle_cache_miss;
162 double xbzrle_cache_miss_rate;
163 uint64_t xbzrle_overflows;
164} AccountingInfo;
165
166static AccountingInfo acct_info;
167
168static void acct_clear(void)
169{
170 memset(&acct_info, 0, sizeof(acct_info));
171}
172
173uint64_t dup_mig_bytes_transferred(void)
174{
175 return acct_info.dup_pages * TARGET_PAGE_SIZE;
176}
177
178uint64_t dup_mig_pages_transferred(void)
179{
180 return acct_info.dup_pages;
181}
182
183uint64_t skipped_mig_bytes_transferred(void)
184{
185 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
186}
187
188uint64_t skipped_mig_pages_transferred(void)
189{
190 return acct_info.skipped_pages;
191}
192
193uint64_t norm_mig_bytes_transferred(void)
194{
195 return acct_info.norm_pages * TARGET_PAGE_SIZE;
196}
197
198uint64_t norm_mig_pages_transferred(void)
199{
200 return acct_info.norm_pages;
201}
202
203uint64_t xbzrle_mig_bytes_transferred(void)
204{
205 return acct_info.xbzrle_bytes;
206}
207
208uint64_t xbzrle_mig_pages_transferred(void)
209{
210 return acct_info.xbzrle_pages;
211}
212
213uint64_t xbzrle_mig_pages_cache_miss(void)
214{
215 return acct_info.xbzrle_cache_miss;
216}
217
218double xbzrle_mig_cache_miss_rate(void)
219{
220 return acct_info.xbzrle_cache_miss_rate;
221}
222
223uint64_t xbzrle_mig_pages_overflow(void)
224{
225 return acct_info.xbzrle_overflows;
226}
227
228/* This is the last block that we have visited serching for dirty pages
229 */
230static RAMBlock *last_seen_block;
231/* This is the last block from where we have sent data */
232static RAMBlock *last_sent_block;
233static ram_addr_t last_offset;
234static unsigned long *migration_bitmap;
235static uint64_t migration_dirty_pages;
236static uint32_t last_version;
237static bool ram_bulk_stage;
238
239struct CompressParam {
240 bool start;
241 bool done;
242 QEMUFile *file;
243 QemuMutex mutex;
244 QemuCond cond;
245 RAMBlock *block;
246 ram_addr_t offset;
247};
248typedef struct CompressParam CompressParam;
249
250struct DecompressParam {
251 bool start;
252 QemuMutex mutex;
253 QemuCond cond;
254 void *des;
255 uint8 *compbuf;
256 int len;
257};
258typedef struct DecompressParam DecompressParam;
259
260static CompressParam *comp_param;
261static QemuThread *compress_threads;
262/* comp_done_cond is used to wake up the migration thread when
263 * one of the compression threads has finished the compression.
264 * comp_done_lock is used to co-work with comp_done_cond.
265 */
266static QemuMutex *comp_done_lock;
267static QemuCond *comp_done_cond;
268/* The empty QEMUFileOps will be used by file in CompressParam */
269static const QEMUFileOps empty_ops = { };
270
271static bool compression_switch;
272static bool quit_comp_thread;
273static bool quit_decomp_thread;
274static DecompressParam *decomp_param;
275static QemuThread *decompress_threads;
276static uint8_t *compressed_data_buf;
277
278static int do_compress_ram_page(CompressParam *param);
279
280static void *do_data_compress(void *opaque)
281{
282 CompressParam *param = opaque;
283
284 while (!quit_comp_thread) {
285 qemu_mutex_lock(&param->mutex);
286 /* Re-check the quit_comp_thread in case of
287 * terminate_compression_threads is called just before
288 * qemu_mutex_lock(&param->mutex) and after
289 * while(!quit_comp_thread), re-check it here can make
290 * sure the compression thread terminate as expected.
291 */
292 while (!param->start && !quit_comp_thread) {
293 qemu_cond_wait(&param->cond, &param->mutex);
294 }
295 if (!quit_comp_thread) {
296 do_compress_ram_page(param);
297 }
298 param->start = false;
299 qemu_mutex_unlock(&param->mutex);
300
301 qemu_mutex_lock(comp_done_lock);
302 param->done = true;
303 qemu_cond_signal(comp_done_cond);
304 qemu_mutex_unlock(comp_done_lock);
305 }
306
307 return NULL;
308}
309
310static inline void terminate_compression_threads(void)
311{
312 int idx, thread_count;
313
314 thread_count = migrate_compress_threads();
315 quit_comp_thread = true;
316 for (idx = 0; idx < thread_count; idx++) {
317 qemu_mutex_lock(&comp_param[idx].mutex);
318 qemu_cond_signal(&comp_param[idx].cond);
319 qemu_mutex_unlock(&comp_param[idx].mutex);
320 }
321}
322
323void migrate_compress_threads_join(void)
324{
325 int i, thread_count;
326
327 if (!migrate_use_compression()) {
328 return;
329 }
330 terminate_compression_threads();
331 thread_count = migrate_compress_threads();
332 for (i = 0; i < thread_count; i++) {
333 qemu_thread_join(compress_threads + i);
334 qemu_fclose(comp_param[i].file);
335 qemu_mutex_destroy(&comp_param[i].mutex);
336 qemu_cond_destroy(&comp_param[i].cond);
337 }
338 qemu_mutex_destroy(comp_done_lock);
339 qemu_cond_destroy(comp_done_cond);
340 g_free(compress_threads);
341 g_free(comp_param);
342 g_free(comp_done_cond);
343 g_free(comp_done_lock);
344 compress_threads = NULL;
345 comp_param = NULL;
346 comp_done_cond = NULL;
347 comp_done_lock = NULL;
348}
349
350void migrate_compress_threads_create(void)
351{
352 int i, thread_count;
353
354 if (!migrate_use_compression()) {
355 return;
356 }
357 quit_comp_thread = false;
358 compression_switch = true;
359 thread_count = migrate_compress_threads();
360 compress_threads = g_new0(QemuThread, thread_count);
361 comp_param = g_new0(CompressParam, thread_count);
362 comp_done_cond = g_new0(QemuCond, 1);
363 comp_done_lock = g_new0(QemuMutex, 1);
364 qemu_cond_init(comp_done_cond);
365 qemu_mutex_init(comp_done_lock);
366 for (i = 0; i < thread_count; i++) {
367 /* com_param[i].file is just used as a dummy buffer to save data, set
368 * it's ops to empty.
369 */
370 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
371 comp_param[i].done = true;
372 qemu_mutex_init(&comp_param[i].mutex);
373 qemu_cond_init(&comp_param[i].cond);
374 qemu_thread_create(compress_threads + i, "compress",
375 do_data_compress, comp_param + i,
376 QEMU_THREAD_JOINABLE);
377 }
378}
379
380/**
381 * save_page_header: Write page header to wire
382 *
383 * If this is the 1st block, it also writes the block identification
384 *
385 * Returns: Number of bytes written
386 *
387 * @f: QEMUFile where to send the data
388 * @block: block that contains the page we want to send
389 * @offset: offset inside the block for the page
390 * in the lower bits, it contains flags
391 */
392static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
393{
394 size_t size;
395
396 qemu_put_be64(f, offset);
397 size = 8;
398
399 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
400 qemu_put_byte(f, strlen(block->idstr));
401 qemu_put_buffer(f, (uint8_t *)block->idstr,
402 strlen(block->idstr));
403 size += 1 + strlen(block->idstr);
404 }
405 return size;
406}
407
408/* Update the xbzrle cache to reflect a page that's been sent as all 0.
409 * The important thing is that a stale (not-yet-0'd) page be replaced
410 * by the new data.
411 * As a bonus, if the page wasn't in the cache it gets added so that
412 * when a small write is made into the 0'd page it gets XBZRLE sent
413 */
414static void xbzrle_cache_zero_page(ram_addr_t current_addr)
415{
416 if (ram_bulk_stage || !migrate_use_xbzrle()) {
417 return;
418 }
419
420 /* We don't care if this fails to allocate a new cache page
421 * as long as it updated an old one */
422 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
423 bitmap_sync_count);
424}
425
426#define ENCODING_FLAG_XBZRLE 0x1
427
428/**
429 * save_xbzrle_page: compress and send current page
430 *
431 * Returns: 1 means that we wrote the page
432 * 0 means that page is identical to the one already sent
433 * -1 means that xbzrle would be longer than normal
434 *
435 * @f: QEMUFile where to send the data
436 * @current_data:
437 * @current_addr:
438 * @block: block that contains the page we want to send
439 * @offset: offset inside the block for the page
440 * @last_stage: if we are at the completion stage
441 * @bytes_transferred: increase it with the number of transferred bytes
442 */
443static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
444 ram_addr_t current_addr, RAMBlock *block,
445 ram_addr_t offset, bool last_stage,
446 uint64_t *bytes_transferred)
447{
448 int encoded_len = 0, bytes_xbzrle;
449 uint8_t *prev_cached_page;
450
451 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
452 acct_info.xbzrle_cache_miss++;
453 if (!last_stage) {
454 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
455 bitmap_sync_count) == -1) {
456 return -1;
457 } else {
458 /* update *current_data when the page has been
459 inserted into cache */
460 *current_data = get_cached_data(XBZRLE.cache, current_addr);
461 }
462 }
463 return -1;
464 }
465
466 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
467
468 /* save current buffer into memory */
469 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
470
471 /* XBZRLE encoding (if there is no overflow) */
472 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
473 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
474 TARGET_PAGE_SIZE);
475 if (encoded_len == 0) {
476 DPRINTF("Skipping unmodified page\n");
477 return 0;
478 } else if (encoded_len == -1) {
479 DPRINTF("Overflow\n");
480 acct_info.xbzrle_overflows++;
481 /* update data in the cache */
482 if (!last_stage) {
483 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
484 *current_data = prev_cached_page;
485 }
486 return -1;
487 }
488
489 /* we need to update the data in the cache, in order to get the same data */
490 if (!last_stage) {
491 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
492 }
493
494 /* Send XBZRLE based compressed page */
495 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
496 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
497 qemu_put_be16(f, encoded_len);
498 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
499 bytes_xbzrle += encoded_len + 1 + 2;
500 acct_info.xbzrle_pages++;
501 acct_info.xbzrle_bytes += bytes_xbzrle;
502 *bytes_transferred += bytes_xbzrle;
503
504 return 1;
505}
506
507static inline
508ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
509 ram_addr_t start)
510{
511 unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
512 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
513 uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
514 unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
515
516 unsigned long next;
517
518 if (ram_bulk_stage && nr > base) {
519 next = nr + 1;
520 } else {
521 next = find_next_bit(migration_bitmap, size, nr);
522 }
523
524 if (next < size) {
525 clear_bit(next, migration_bitmap);
526 migration_dirty_pages--;
527 }
528 return (next - base) << TARGET_PAGE_BITS;
529}
530
531static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
532{
533 migration_dirty_pages +=
534 cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
535}
536
537
538/* Fix me: there are too many global variables used in migration process. */
539static int64_t start_time;
540static int64_t bytes_xfer_prev;
541static int64_t num_dirty_pages_period;
542static uint64_t xbzrle_cache_miss_prev;
543static uint64_t iterations_prev;
544
545static void migration_bitmap_sync_init(void)
546{
547 start_time = 0;
548 bytes_xfer_prev = 0;
549 num_dirty_pages_period = 0;
550 xbzrle_cache_miss_prev = 0;
551 iterations_prev = 0;
552}
553
554/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
555static void migration_bitmap_sync(void)
556{
557 RAMBlock *block;
558 uint64_t num_dirty_pages_init = migration_dirty_pages;
559 MigrationState *s = migrate_get_current();
560 int64_t end_time;
561 int64_t bytes_xfer_now;
562
563 bitmap_sync_count++;
564
565 if (!bytes_xfer_prev) {
566 bytes_xfer_prev = ram_bytes_transferred();
567 }
568
569 if (!start_time) {
570 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
571 }
572
573 trace_migration_bitmap_sync_start();
574 address_space_sync_dirty_bitmap(&address_space_memory);
575
576 rcu_read_lock();
577 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
578 migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
579 }
580 rcu_read_unlock();
581
582 trace_migration_bitmap_sync_end(migration_dirty_pages
583 - num_dirty_pages_init);
584 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
585 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
586
587 /* more than 1 second = 1000 millisecons */
588 if (end_time > start_time + 1000) {
589 if (migrate_auto_converge()) {
590 /* The following detection logic can be refined later. For now:
591 Check to see if the dirtied bytes is 50% more than the approx.
592 amount of bytes that just got transferred since the last time we
593 were in this routine. If that happens >N times (for now N==4)
594 we turn on the throttle down logic */
595 bytes_xfer_now = ram_bytes_transferred();
596 if (s->dirty_pages_rate &&
597 (num_dirty_pages_period * TARGET_PAGE_SIZE >
598 (bytes_xfer_now - bytes_xfer_prev)/2) &&
599 (dirty_rate_high_cnt++ > 4)) {
600 trace_migration_throttle();
601 mig_throttle_on = true;
602 dirty_rate_high_cnt = 0;
603 }
604 bytes_xfer_prev = bytes_xfer_now;
605 } else {
606 mig_throttle_on = false;
607 }
608 if (migrate_use_xbzrle()) {
609 if (iterations_prev != acct_info.iterations) {
610 acct_info.xbzrle_cache_miss_rate =
611 (double)(acct_info.xbzrle_cache_miss -
612 xbzrle_cache_miss_prev) /
613 (acct_info.iterations - iterations_prev);
614 }
615 iterations_prev = acct_info.iterations;
616 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
617 }
618 s->dirty_pages_rate = num_dirty_pages_period * 1000
619 / (end_time - start_time);
620 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
621 start_time = end_time;
622 num_dirty_pages_period = 0;
623 }
624 s->dirty_sync_count = bitmap_sync_count;
625}
626
627/**
628 * save_zero_page: Send the zero page to the stream
629 *
630 * Returns: Number of pages written.
631 *
632 * @f: QEMUFile where to send the data
633 * @block: block that contains the page we want to send
634 * @offset: offset inside the block for the page
635 * @p: pointer to the page
636 * @bytes_transferred: increase it with the number of transferred bytes
637 */
638static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
639 uint8_t *p, uint64_t *bytes_transferred)
640{
641 int pages = -1;
642
643 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
644 acct_info.dup_pages++;
645 *bytes_transferred += save_page_header(f, block,
646 offset | RAM_SAVE_FLAG_COMPRESS);
647 qemu_put_byte(f, 0);
648 *bytes_transferred += 1;
649 pages = 1;
650 }
651
652 return pages;
653}
654
655/**
656 * ram_save_page: Send the given page to the stream
657 *
658 * Returns: Number of pages written.
659 *
660 * @f: QEMUFile where to send the data
661 * @block: block that contains the page we want to send
662 * @offset: offset inside the block for the page
663 * @last_stage: if we are at the completion stage
664 * @bytes_transferred: increase it with the number of transferred bytes
665 */
666static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
667 bool last_stage, uint64_t *bytes_transferred)
668{
669 int pages = -1;
670 uint64_t bytes_xmit;
671 ram_addr_t current_addr;
672 MemoryRegion *mr = block->mr;
673 uint8_t *p;
674 int ret;
675 bool send_async = true;
676
677 p = memory_region_get_ram_ptr(mr) + offset;
678
679 /* In doubt sent page as normal */
680 bytes_xmit = 0;
681 ret = ram_control_save_page(f, block->offset,
682 offset, TARGET_PAGE_SIZE, &bytes_xmit);
683 if (bytes_xmit) {
684 *bytes_transferred += bytes_xmit;
685 pages = 1;
686 }
687
688 XBZRLE_cache_lock();
689
690 current_addr = block->offset + offset;
691
692 if (block == last_sent_block) {
693 offset |= RAM_SAVE_FLAG_CONTINUE;
694 }
695 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
696 if (ret != RAM_SAVE_CONTROL_DELAYED) {
697 if (bytes_xmit > 0) {
698 acct_info.norm_pages++;
699 } else if (bytes_xmit == 0) {
700 acct_info.dup_pages++;
701 }
702 }
703 } else {
704 pages = save_zero_page(f, block, offset, p, bytes_transferred);
705 if (pages > 0) {
706 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
707 * page would be stale
708 */
709 xbzrle_cache_zero_page(current_addr);
710 } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
711 pages = save_xbzrle_page(f, &p, current_addr, block,
712 offset, last_stage, bytes_transferred);
713 if (!last_stage) {
714 /* Can't send this cached data async, since the cache page
715 * might get updated before it gets to the wire
716 */
717 send_async = false;
718 }
719 }
720 }
721
722 /* XBZRLE overflow or normal page */
723 if (pages == -1) {
724 *bytes_transferred += save_page_header(f, block,
725 offset | RAM_SAVE_FLAG_PAGE);
726 if (send_async) {
727 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
728 } else {
729 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
730 }
731 *bytes_transferred += TARGET_PAGE_SIZE;
732 pages = 1;
733 acct_info.norm_pages++;
734 }
735
736 XBZRLE_cache_unlock();
737
738 return pages;
739}
740
741static int do_compress_ram_page(CompressParam *param)
742{
743 int bytes_sent, blen;
744 uint8_t *p;
745 RAMBlock *block = param->block;
746 ram_addr_t offset = param->offset;
747
748 p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK);
749
750 bytes_sent = save_page_header(param->file, block, offset |
751 RAM_SAVE_FLAG_COMPRESS_PAGE);
752 blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
753 migrate_compress_level());
754 bytes_sent += blen;
755
756 return bytes_sent;
757}
758
759static inline void start_compression(CompressParam *param)
760{
761 param->done = false;
762 qemu_mutex_lock(&param->mutex);
763 param->start = true;
764 qemu_cond_signal(&param->cond);
765 qemu_mutex_unlock(&param->mutex);
766}
767
768static inline void start_decompression(DecompressParam *param)
769{
770 qemu_mutex_lock(&param->mutex);
771 param->start = true;
772 qemu_cond_signal(&param->cond);
773 qemu_mutex_unlock(&param->mutex);
774}
775
776static uint64_t bytes_transferred;
777
778static void flush_compressed_data(QEMUFile *f)
779{
780 int idx, len, thread_count;
781
782 if (!migrate_use_compression()) {
783 return;
784 }
785 thread_count = migrate_compress_threads();
786 for (idx = 0; idx < thread_count; idx++) {
787 if (!comp_param[idx].done) {
788 qemu_mutex_lock(comp_done_lock);
789 while (!comp_param[idx].done && !quit_comp_thread) {
790 qemu_cond_wait(comp_done_cond, comp_done_lock);
791 }
792 qemu_mutex_unlock(comp_done_lock);
793 }
794 if (!quit_comp_thread) {
795 len = qemu_put_qemu_file(f, comp_param[idx].file);
796 bytes_transferred += len;
797 }
798 }
799}
800
801static inline void set_compress_params(CompressParam *param, RAMBlock *block,
802 ram_addr_t offset)
803{
804 param->block = block;
805 param->offset = offset;
806}
807
808static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
809 ram_addr_t offset,
810 uint64_t *bytes_transferred)
811{
812 int idx, thread_count, bytes_xmit = -1, pages = -1;
813
814 thread_count = migrate_compress_threads();
815 qemu_mutex_lock(comp_done_lock);
816 while (true) {
817 for (idx = 0; idx < thread_count; idx++) {
818 if (comp_param[idx].done) {
819 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
820 set_compress_params(&comp_param[idx], block, offset);
821 start_compression(&comp_param[idx]);
822 pages = 1;
823 acct_info.norm_pages++;
824 *bytes_transferred += bytes_xmit;
825 break;
826 }
827 }
828 if (pages > 0) {
829 break;
830 } else {
831 qemu_cond_wait(comp_done_cond, comp_done_lock);
832 }
833 }
834 qemu_mutex_unlock(comp_done_lock);
835
836 return pages;
837}
838
839/**
840 * ram_save_compressed_page: compress the given page and send it to the stream
841 *
842 * Returns: Number of pages written.
843 *
844 * @f: QEMUFile where to send the data
845 * @block: block that contains the page we want to send
846 * @offset: offset inside the block for the page
847 * @last_stage: if we are at the completion stage
848 * @bytes_transferred: increase it with the number of transferred bytes
849 */
850static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
851 ram_addr_t offset, bool last_stage,
852 uint64_t *bytes_transferred)
853{
854 int pages = -1;
855 uint64_t bytes_xmit;
856 MemoryRegion *mr = block->mr;
857 uint8_t *p;
858 int ret;
859
860 p = memory_region_get_ram_ptr(mr) + offset;
861
862 bytes_xmit = 0;
863 ret = ram_control_save_page(f, block->offset,
864 offset, TARGET_PAGE_SIZE, &bytes_xmit);
865 if (bytes_xmit) {
866 *bytes_transferred += bytes_xmit;
867 pages = 1;
868 }
869 if (block == last_sent_block) {
870 offset |= RAM_SAVE_FLAG_CONTINUE;
871 }
872 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
873 if (ret != RAM_SAVE_CONTROL_DELAYED) {
874 if (bytes_xmit > 0) {
875 acct_info.norm_pages++;
876 } else if (bytes_xmit == 0) {
877 acct_info.dup_pages++;
878 }
879 }
880 } else {
881 /* When starting the process of a new block, the first page of
882 * the block should be sent out before other pages in the same
883 * block, and all the pages in last block should have been sent
884 * out, keeping this order is important, because the 'cont' flag
885 * is used to avoid resending the block name.
886 */
887 if (block != last_sent_block) {
888 flush_compressed_data(f);
889 pages = save_zero_page(f, block, offset, p, bytes_transferred);
890 if (pages == -1) {
891 set_compress_params(&comp_param[0], block, offset);
892 /* Use the qemu thread to compress the data to make sure the
893 * first page is sent out before other pages
894 */
895 bytes_xmit = do_compress_ram_page(&comp_param[0]);
896 acct_info.norm_pages++;
897 qemu_put_qemu_file(f, comp_param[0].file);
898 *bytes_transferred += bytes_xmit;
899 pages = 1;
900 }
901 } else {
902 pages = save_zero_page(f, block, offset, p, bytes_transferred);
903 if (pages == -1) {
904 pages = compress_page_with_multi_thread(f, block, offset,
905 bytes_transferred);
906 }
907 }
908 }
909
910 return pages;
911}
912
913/**
914 * ram_find_and_save_block: Finds a dirty page and sends it to f
915 *
916 * Called within an RCU critical section.
917 *
918 * Returns: The number of pages written
919 * 0 means no dirty pages
920 *
921 * @f: QEMUFile where to send the data
922 * @last_stage: if we are at the completion stage
923 * @bytes_transferred: increase it with the number of transferred bytes
924 */
925
926static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
927 uint64_t *bytes_transferred)
928{
929 RAMBlock *block = last_seen_block;
930 ram_addr_t offset = last_offset;
931 bool complete_round = false;
932 int pages = 0;
933 MemoryRegion *mr;
934
935 if (!block)
936 block = QLIST_FIRST_RCU(&ram_list.blocks);
937
938 while (true) {
939 mr = block->mr;
940 offset = migration_bitmap_find_and_reset_dirty(mr, offset);
941 if (complete_round && block == last_seen_block &&
942 offset >= last_offset) {
943 break;
944 }
945 if (offset >= block->used_length) {
946 offset = 0;
947 block = QLIST_NEXT_RCU(block, next);
948 if (!block) {
949 block = QLIST_FIRST_RCU(&ram_list.blocks);
950 complete_round = true;
951 ram_bulk_stage = false;
952 if (migrate_use_xbzrle()) {
953 /* If xbzrle is on, stop using the data compression at this
954 * point. In theory, xbzrle can do better than compression.
955 */
956 flush_compressed_data(f);
957 compression_switch = false;
958 }
959 }
960 } else {
961 if (compression_switch && migrate_use_compression()) {
962 pages = ram_save_compressed_page(f, block, offset, last_stage,
963 bytes_transferred);
964 } else {
965 pages = ram_save_page(f, block, offset, last_stage,
966 bytes_transferred);
967 }
968
969 /* if page is unmodified, continue to the next */
970 if (pages > 0) {
971 last_sent_block = block;
972 break;
973 }
974 }
975 }
976
977 last_seen_block = block;
978 last_offset = offset;
979
980 return pages;
981}
982
983void acct_update_position(QEMUFile *f, size_t size, bool zero)
984{
985 uint64_t pages = size / TARGET_PAGE_SIZE;
986 if (zero) {
987 acct_info.dup_pages += pages;
988 } else {
989 acct_info.norm_pages += pages;
990 bytes_transferred += size;
991 qemu_update_position(f, size);
992 }
993}
994
995static ram_addr_t ram_save_remaining(void)
996{
997 return migration_dirty_pages;
998}
999
1000uint64_t ram_bytes_remaining(void)
1001{
1002 return ram_save_remaining() * TARGET_PAGE_SIZE;
1003}
1004
1005uint64_t ram_bytes_transferred(void)
1006{
1007 return bytes_transferred;
1008}
1009
1010uint64_t ram_bytes_total(void)
1011{
1012 RAMBlock *block;
1013 uint64_t total = 0;
1014
1015 rcu_read_lock();
1016 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1017 total += block->used_length;
1018 rcu_read_unlock();
1019 return total;
1020}
1021
1022void free_xbzrle_decoded_buf(void)
1023{
1024 g_free(xbzrle_decoded_buf);
1025 xbzrle_decoded_buf = NULL;
1026}
1027
1028static void migration_end(void)
1029{
1030 if (migration_bitmap) {
1031 memory_global_dirty_log_stop();
1032 g_free(migration_bitmap);
1033 migration_bitmap = NULL;
1034 }
1035
1036 XBZRLE_cache_lock();
1037 if (XBZRLE.cache) {
1038 cache_fini(XBZRLE.cache);
1039 g_free(XBZRLE.encoded_buf);
1040 g_free(XBZRLE.current_buf);
1041 XBZRLE.cache = NULL;
1042 XBZRLE.encoded_buf = NULL;
1043 XBZRLE.current_buf = NULL;
1044 }
1045 XBZRLE_cache_unlock();
1046}
1047
1048static void ram_migration_cancel(void *opaque)
1049{
1050 migration_end();
1051}
1052
1053static void reset_ram_globals(void)
1054{
1055 last_seen_block = NULL;
1056 last_sent_block = NULL;
1057 last_offset = 0;
1058 last_version = ram_list.version;
1059 ram_bulk_stage = true;
1060}
1061
1062#define MAX_WAIT 50 /* ms, half buffered_file limit */
1063
1064
1065/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1066 * long-running RCU critical section. When rcu-reclaims in the code
1067 * start to become numerous it will be necessary to reduce the
1068 * granularity of these critical sections.
1069 */
1070
1071static int ram_save_setup(QEMUFile *f, void *opaque)
1072{
1073 RAMBlock *block;
1074 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1075
1076 mig_throttle_on = false;
1077 dirty_rate_high_cnt = 0;
1078 bitmap_sync_count = 0;
1079 migration_bitmap_sync_init();
1080
1081 if (migrate_use_xbzrle()) {
1082 XBZRLE_cache_lock();
1083 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1084 TARGET_PAGE_SIZE,
1085 TARGET_PAGE_SIZE);
1086 if (!XBZRLE.cache) {
1087 XBZRLE_cache_unlock();
1088 error_report("Error creating cache");
1089 return -1;
1090 }
1091 XBZRLE_cache_unlock();
1092
1093 /* We prefer not to abort if there is no memory */
1094 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1095 if (!XBZRLE.encoded_buf) {
1096 error_report("Error allocating encoded_buf");
1097 return -1;
1098 }
1099
1100 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1101 if (!XBZRLE.current_buf) {
1102 error_report("Error allocating current_buf");
1103 g_free(XBZRLE.encoded_buf);
1104 XBZRLE.encoded_buf = NULL;
1105 return -1;
1106 }
1107
1108 acct_clear();
1109 }
1110
1111 /* iothread lock needed for ram_list.dirty_memory[] */
1112 qemu_mutex_lock_iothread();
1113 qemu_mutex_lock_ramlist();
1114 rcu_read_lock();
1115 bytes_transferred = 0;
1116 reset_ram_globals();
1117
1118 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1119 migration_bitmap = bitmap_new(ram_bitmap_pages);
1120 bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
1121
1122 /*
1123 * Count the total number of pages used by ram blocks not including any
1124 * gaps due to alignment or unplugs.
1125 */
1126 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1127
1128 memory_global_dirty_log_start();
1129 migration_bitmap_sync();
1130 qemu_mutex_unlock_ramlist();
1131 qemu_mutex_unlock_iothread();
1132
1133 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1134
1135 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1136 qemu_put_byte(f, strlen(block->idstr));
1137 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1138 qemu_put_be64(f, block->used_length);
1139 }
1140
1141 rcu_read_unlock();
1142
1143 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1144 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1145
1146 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1147
1148 return 0;
1149}
1150
1151static int ram_save_iterate(QEMUFile *f, void *opaque)
1152{
1153 int ret;
1154 int i;
1155 int64_t t0;
1156 int pages_sent = 0;
1157
1158 rcu_read_lock();
1159 if (ram_list.version != last_version) {
1160 reset_ram_globals();
1161 }
1162
1163 /* Read version before ram_list.blocks */
1164 smp_rmb();
1165
1166 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1167
1168 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1169 i = 0;
1170 while ((ret = qemu_file_rate_limit(f)) == 0) {
1171 int pages;
1172
1173 pages = ram_find_and_save_block(f, false, &bytes_transferred);
1174 /* no more pages to sent */
1175 if (pages == 0) {
1176 break;
1177 }
1178 pages_sent += pages;
1179 acct_info.iterations++;
1180 check_guest_throttling();
1181 /* we want to check in the 1st loop, just in case it was the 1st time
1182 and we had to sync the dirty bitmap.
1183 qemu_get_clock_ns() is a bit expensive, so we only check each some
1184 iterations
1185 */
1186 if ((i & 63) == 0) {
1187 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
1188 if (t1 > MAX_WAIT) {
1189 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
1190 t1, i);
1191 break;
1192 }
1193 }
1194 i++;
1195 }
1196 flush_compressed_data(f);
1197 rcu_read_unlock();
1198
1199 /*
1200 * Must occur before EOS (or any QEMUFile operation)
1201 * because of RDMA protocol.
1202 */
1203 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
1204
1205 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1206 bytes_transferred += 8;
1207
1208 ret = qemu_file_get_error(f);
1209 if (ret < 0) {
1210 return ret;
1211 }
1212
1213 return pages_sent;
1214}
1215
1216/* Called with iothread lock */
1217static int ram_save_complete(QEMUFile *f, void *opaque)
1218{
1219 rcu_read_lock();
1220
1221 migration_bitmap_sync();
1222
1223 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
1224
1225 /* try transferring iterative blocks of memory */
1226
1227 /* flush all remaining blocks regardless of rate limiting */
1228 while (true) {
1229 int pages;
1230
1231 pages = ram_find_and_save_block(f, true, &bytes_transferred);
1232 /* no more blocks to sent */
1233 if (pages == 0) {
1234 break;
1235 }
1236 }
1237
1238 flush_compressed_data(f);
1239 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
1240 migration_end();
1241
1242 rcu_read_unlock();
1243 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1244
1245 return 0;
1246}
1247
1248static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
1249{
1250 uint64_t remaining_size;
1251
1252 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
1253
1254 if (remaining_size < max_size) {
1255 qemu_mutex_lock_iothread();
1256 rcu_read_lock();
1257 migration_bitmap_sync();
1258 rcu_read_unlock();
1259 qemu_mutex_unlock_iothread();
1260 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
1261 }
1262 return remaining_size;
1263}
1264
1265static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
1266{
1267 unsigned int xh_len;
1268 int xh_flags;
1269
1270 if (!xbzrle_decoded_buf) {
1271 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1272 }
1273
1274 /* extract RLE header */
1275 xh_flags = qemu_get_byte(f);
1276 xh_len = qemu_get_be16(f);
1277
1278 if (xh_flags != ENCODING_FLAG_XBZRLE) {
1279 error_report("Failed to load XBZRLE page - wrong compression!");
1280 return -1;
1281 }
1282
1283 if (xh_len > TARGET_PAGE_SIZE) {
1284 error_report("Failed to load XBZRLE page - len overflow!");
1285 return -1;
1286 }
1287 /* load data and decode */
1288 qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
1289
1290 /* decode RLE */
1291 if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
1292 TARGET_PAGE_SIZE) == -1) {
1293 error_report("Failed to load XBZRLE page - decode error!");
1294 return -1;
1295 }
1296
1297 return 0;
1298}
1299
1300/* Must be called from within a rcu critical section.
1301 * Returns a pointer from within the RCU-protected ram_list.
1302 */
1303static inline void *host_from_stream_offset(QEMUFile *f,
1304 ram_addr_t offset,
1305 int flags)
1306{
1307 static RAMBlock *block = NULL;
1308 char id[256];
1309 uint8_t len;
1310
1311 if (flags & RAM_SAVE_FLAG_CONTINUE) {
1312 if (!block || block->max_length <= offset) {
1313 error_report("Ack, bad migration stream!");
1314 return NULL;
1315 }
1316
1317 return memory_region_get_ram_ptr(block->mr) + offset;
1318 }
1319
1320 len = qemu_get_byte(f);
1321 qemu_get_buffer(f, (uint8_t *)id, len);
1322 id[len] = 0;
1323
1324 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1325 if (!strncmp(id, block->idstr, sizeof(id)) &&
1326 block->max_length > offset) {
1327 return memory_region_get_ram_ptr(block->mr) + offset;
1328 }
1329 }
1330
1331 error_report("Can't find block %s!", id);
1332 return NULL;
1333}
1334
1335/*
1336 * If a page (or a whole RDMA chunk) has been
1337 * determined to be zero, then zap it.
1338 */
1339void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
1340{
1341 if (ch != 0 || !is_zero_range(host, size)) {
1342 memset(host, ch, size);
1343 }
1344}
1345
1346static void *do_data_decompress(void *opaque)
1347{
1348 DecompressParam *param = opaque;
1349 unsigned long pagesize;
1350
1351 while (!quit_decomp_thread) {
1352 qemu_mutex_lock(&param->mutex);
1353 while (!param->start && !quit_decomp_thread) {
1354 qemu_cond_wait(&param->cond, &param->mutex);
1355 pagesize = TARGET_PAGE_SIZE;
1356 if (!quit_decomp_thread) {
1357 /* uncompress() will return failed in some case, especially
1358 * when the page is dirted when doing the compression, it's
1359 * not a problem because the dirty page will be retransferred
1360 * and uncompress() won't break the data in other pages.
1361 */
1362 uncompress((Bytef *)param->des, &pagesize,
1363 (const Bytef *)param->compbuf, param->len);
1364 }
1365 param->start = false;
1366 }
1367 qemu_mutex_unlock(&param->mutex);
1368 }
1369
1370 return NULL;
1371}
1372
1373void migrate_decompress_threads_create(void)
1374{
1375 int i, thread_count;
1376
1377 thread_count = migrate_decompress_threads();
1378 decompress_threads = g_new0(QemuThread, thread_count);
1379 decomp_param = g_new0(DecompressParam, thread_count);
1380 compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
1381 quit_decomp_thread = false;
1382 for (i = 0; i < thread_count; i++) {
1383 qemu_mutex_init(&decomp_param[i].mutex);
1384 qemu_cond_init(&decomp_param[i].cond);
1385 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
1386 qemu_thread_create(decompress_threads + i, "decompress",
1387 do_data_decompress, decomp_param + i,
1388 QEMU_THREAD_JOINABLE);
1389 }
1390}
1391
1392void migrate_decompress_threads_join(void)
1393{
1394 int i, thread_count;
1395
1396 quit_decomp_thread = true;
1397 thread_count = migrate_decompress_threads();
1398 for (i = 0; i < thread_count; i++) {
1399 qemu_mutex_lock(&decomp_param[i].mutex);
1400 qemu_cond_signal(&decomp_param[i].cond);
1401 qemu_mutex_unlock(&decomp_param[i].mutex);
1402 }
1403 for (i = 0; i < thread_count; i++) {
1404 qemu_thread_join(decompress_threads + i);
1405 qemu_mutex_destroy(&decomp_param[i].mutex);
1406 qemu_cond_destroy(&decomp_param[i].cond);
1407 g_free(decomp_param[i].compbuf);
1408 }
1409 g_free(decompress_threads);
1410 g_free(decomp_param);
1411 g_free(compressed_data_buf);
1412 decompress_threads = NULL;
1413 decomp_param = NULL;
1414 compressed_data_buf = NULL;
1415}
1416
1417static void decompress_data_with_multi_threads(uint8_t *compbuf,
1418 void *host, int len)
1419{
1420 int idx, thread_count;
1421
1422 thread_count = migrate_decompress_threads();
1423 while (true) {
1424 for (idx = 0; idx < thread_count; idx++) {
1425 if (!decomp_param[idx].start) {
1426 memcpy(decomp_param[idx].compbuf, compbuf, len);
1427 decomp_param[idx].des = host;
1428 decomp_param[idx].len = len;
1429 start_decompression(&decomp_param[idx]);
1430 break;
1431 }
1432 }
1433 if (idx < thread_count) {
1434 break;
1435 }
1436 }
1437}
1438
1439static int ram_load(QEMUFile *f, void *opaque, int version_id)
1440{
1441 int flags = 0, ret = 0;
1442 static uint64_t seq_iter;
1443 int len = 0;
1444
1445 seq_iter++;
1446
1447 if (version_id != 4) {
1448 ret = -EINVAL;
1449 }
1450
1451 /* This RCU critical section can be very long running.
1452 * When RCU reclaims in the code start to become numerous,
1453 * it will be necessary to reduce the granularity of this
1454 * critical section.
1455 */
1456 rcu_read_lock();
1457 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
1458 ram_addr_t addr, total_ram_bytes;
1459 void *host;
1460 uint8_t ch;
1461
1462 addr = qemu_get_be64(f);
1463 flags = addr & ~TARGET_PAGE_MASK;
1464 addr &= TARGET_PAGE_MASK;
1465
1466 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
1467 case RAM_SAVE_FLAG_MEM_SIZE:
1468 /* Synchronize RAM block list */
1469 total_ram_bytes = addr;
1470 while (!ret && total_ram_bytes) {
1471 RAMBlock *block;
1472 uint8_t len;
1473 char id[256];
1474 ram_addr_t length;
1475
1476 len = qemu_get_byte(f);
1477 qemu_get_buffer(f, (uint8_t *)id, len);
1478 id[len] = 0;
1479 length = qemu_get_be64(f);
1480
1481 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1482 if (!strncmp(id, block->idstr, sizeof(id))) {
1483 if (length != block->used_length) {
1484 Error *local_err = NULL;
1485
1486 ret = qemu_ram_resize(block->offset, length, &local_err);
1487 if (local_err) {
1488 error_report_err(local_err);
1489 }
1490 }
1491 break;
1492 }
1493 }
1494
1495 if (!block) {
1496 error_report("Unknown ramblock \"%s\", cannot "
1497 "accept migration", id);
1498 ret = -EINVAL;
1499 }
1500
1501 total_ram_bytes -= length;
1502 }
1503 break;
1504 case RAM_SAVE_FLAG_COMPRESS:
1505 host = host_from_stream_offset(f, addr, flags);
1506 if (!host) {
1507 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1508 ret = -EINVAL;
1509 break;
1510 }
1511 ch = qemu_get_byte(f);
1512 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
1513 break;
1514 case RAM_SAVE_FLAG_PAGE:
1515 host = host_from_stream_offset(f, addr, flags);
1516 if (!host) {
1517 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1518 ret = -EINVAL;
1519 break;
1520 }
1521 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
1522 break;
1523 case RAM_SAVE_FLAG_COMPRESS_PAGE:
1524 host = host_from_stream_offset(f, addr, flags);
1525 if (!host) {
1526 error_report("Invalid RAM offset " RAM_ADDR_FMT, addr);
1527 ret = -EINVAL;
1528 break;
1529 }
1530
1531 len = qemu_get_be32(f);
1532 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
1533 error_report("Invalid compressed data length: %d", len);
1534 ret = -EINVAL;
1535 break;
1536 }
1537 qemu_get_buffer(f, compressed_data_buf, len);
1538 decompress_data_with_multi_threads(compressed_data_buf, host, len);
1539 break;
1540 case RAM_SAVE_FLAG_XBZRLE:
1541 host = host_from_stream_offset(f, addr, flags);
1542 if (!host) {
1543 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1544 ret = -EINVAL;
1545 break;
1546 }
1547 if (load_xbzrle(f, addr, host) < 0) {
1548 error_report("Failed to decompress XBZRLE page at "
1549 RAM_ADDR_FMT, addr);
1550 ret = -EINVAL;
1551 break;
1552 }
1553 break;
1554 case RAM_SAVE_FLAG_EOS:
1555 /* normal exit */
1556 break;
1557 default:
1558 if (flags & RAM_SAVE_FLAG_HOOK) {
1559 ram_control_load_hook(f, flags);
1560 } else {
1561 error_report("Unknown combination of migration flags: %#x",
1562 flags);
1563 ret = -EINVAL;
1564 }
1565 }
1566 if (!ret) {
1567 ret = qemu_file_get_error(f);
1568 }
1569 }
1570
1571 rcu_read_unlock();
1572 DPRINTF("Completed load of VM with exit code %d seq iteration "
1573 "%" PRIu64 "\n", ret, seq_iter);
1574 return ret;
1575}
1576
1577static SaveVMHandlers savevm_ram_handlers = {
1578 .save_live_setup = ram_save_setup,
1579 .save_live_iterate = ram_save_iterate,
1580 .save_live_complete = ram_save_complete,
1581 .save_live_pending = ram_save_pending,
1582 .load_state = ram_load,
1583 .cancel = ram_migration_cancel,
1584};
1585
1586void ram_mig_init(void)
1587{
1588 qemu_mutex_init(&XBZRLE.lock);
1589 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
1590}
1591/* Stub function that's gets run on the vcpu when its brought out of the
1592 VM to run inside qemu via async_run_on_cpu()*/
1593
1594static void mig_sleep_cpu(void *opq)
1595{
1596 qemu_mutex_unlock_iothread();
1597 g_usleep(30*1000);
1598 qemu_mutex_lock_iothread();
1599}
1600
1601/* To reduce the dirty rate explicitly disallow the VCPUs from spending
1602 much time in the VM. The migration thread will try to catchup.
1603 Workload will experience a performance drop.
1604*/
1605static void mig_throttle_guest_down(void)
1606{
1607 CPUState *cpu;
1608
1609 qemu_mutex_lock_iothread();
1610 CPU_FOREACH(cpu) {
1611 async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
1612 }
1613 qemu_mutex_unlock_iothread();
1614}
1615
1616static void check_guest_throttling(void)
1617{
1618 static int64_t t0;
1619 int64_t t1;
1620
1621 if (!mig_throttle_on) {
1622 return;
1623 }
1624
1625 if (!t0) {
1626 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1627 return;
1628 }
1629
1630 t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1631
1632 /* If it has been more than 40 ms since the last time the guest
1633 * was throttled then do it again.
1634 */
1635 if (40 < (t1-t0)/1000000) {
1636 mig_throttle_guest_down();
1637 t0 = t1;
1638 }
1639}