4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "migration/migration.h"
39 #include "migration/postcopy-ram.h"
40 #include "exec/address-spaces.h"
41 #include "migration/page_cache.h"
42 #include "qemu/error-report.h"
44 #include "exec/ram_addr.h"
45 #include "qemu/rcu_queue.h"
46 #include "migration/colo.h"
48 /***********************************************************/
49 /* ram save/restore */
51 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
52 #define RAM_SAVE_FLAG_COMPRESS 0x02
53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
54 #define RAM_SAVE_FLAG_PAGE 0x08
55 #define RAM_SAVE_FLAG_EOS 0x10
56 #define RAM_SAVE_FLAG_CONTINUE 0x20
57 #define RAM_SAVE_FLAG_XBZRLE 0x40
58 /* 0x80 is reserved in migration.h start with 0x100 next */
59 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
61 static uint8_t *ZERO_TARGET_PAGE
;
63 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
65 return buffer_is_zero(p
, size
);
68 /* struct contains XBZRLE cache and a static page
69 used by the compression */
71 /* buffer used for XBZRLE encoding */
73 /* buffer for storing page content */
75 /* Cache for XBZRLE, Protected by lock. */
80 /* buffer used for XBZRLE decoding */
81 static uint8_t *xbzrle_decoded_buf
;
83 static void XBZRLE_cache_lock(void)
85 if (migrate_use_xbzrle())
86 qemu_mutex_lock(&XBZRLE
.lock
);
89 static void XBZRLE_cache_unlock(void)
91 if (migrate_use_xbzrle())
92 qemu_mutex_unlock(&XBZRLE
.lock
);
96 * xbzrle_cache_resize: resize the xbzrle cache
98 * This function is called from qmp_migrate_set_cache_size in main
99 * thread, possibly while a migration is in progress. A running
100 * migration may be using the cache and might finish during this call,
101 * hence changes to the cache are protected by XBZRLE.lock().
103 * Returns the new_size or negative in case of error.
105 * @new_size: new cache size
107 int64_t xbzrle_cache_resize(int64_t new_size
)
109 PageCache
*new_cache
;
112 if (new_size
< TARGET_PAGE_SIZE
) {
118 if (XBZRLE
.cache
!= NULL
) {
119 if (pow2floor(new_size
) == migrate_xbzrle_cache_size()) {
122 new_cache
= cache_init(new_size
/ TARGET_PAGE_SIZE
,
125 error_report("Error creating cache");
130 cache_fini(XBZRLE
.cache
);
131 XBZRLE
.cache
= new_cache
;
135 ret
= pow2floor(new_size
);
137 XBZRLE_cache_unlock();
143 /* Main migration bitmap */
145 /* bitmap of pages that haven't been sent even once
146 * only maintained and used in postcopy at the moment
147 * where it's used to send the dirtymap at the start
148 * of the postcopy phase
150 unsigned long *unsentmap
;
152 typedef struct RAMBitmap RAMBitmap
;
155 * An outstanding page request, on the source, having been received
158 struct RAMSrcPageRequest
{
163 QSIMPLEQ_ENTRY(RAMSrcPageRequest
) next_req
;
166 /* State of RAM for migration */
168 /* QEMUFile used for this migration */
170 /* Last block that we have visited searching for dirty pages */
171 RAMBlock
*last_seen_block
;
172 /* Last block from where we have sent data */
173 RAMBlock
*last_sent_block
;
174 /* Last offset we have sent data from */
175 ram_addr_t last_offset
;
176 /* last ram version we have seen */
177 uint32_t last_version
;
178 /* We are in the first round */
180 /* How many times we have dirty too many pages */
181 int dirty_rate_high_cnt
;
182 /* How many times we have synchronized the bitmap */
183 uint64_t bitmap_sync_count
;
184 /* these variables are used for bitmap sync */
185 /* last time we did a full bitmap_sync */
186 int64_t time_last_bitmap_sync
;
187 /* bytes transferred at start_time */
188 uint64_t bytes_xfer_prev
;
189 /* number of dirty pages since start_time */
190 uint64_t num_dirty_pages_period
;
191 /* xbzrle misses since the beginning of the period */
192 uint64_t xbzrle_cache_miss_prev
;
193 /* number of iterations at the beginning of period */
194 uint64_t iterations_prev
;
195 /* Accounting fields */
196 /* number of zero pages. It used to be pages filled by the same char. */
198 /* number of normal transferred pages */
200 /* Iterations since start */
202 /* xbzrle transmitted bytes. Notice that this is with
203 * compression, they can't be calculated from the pages */
204 uint64_t xbzrle_bytes
;
205 /* xbzrle transmmited pages */
206 uint64_t xbzrle_pages
;
207 /* xbzrle number of cache miss */
208 uint64_t xbzrle_cache_miss
;
209 /* xbzrle miss rate */
210 double xbzrle_cache_miss_rate
;
211 /* xbzrle number of overflows */
212 uint64_t xbzrle_overflows
;
213 /* number of dirty bits in the bitmap */
214 uint64_t migration_dirty_pages
;
215 /* total number of bytes transferred */
216 uint64_t bytes_transferred
;
217 /* number of dirtied pages in the last second */
218 uint64_t dirty_pages_rate
;
219 /* Count of requests incoming from destination */
220 uint64_t postcopy_requests
;
221 /* protects modification of the bitmap */
222 QemuMutex bitmap_mutex
;
223 /* Ram Bitmap protected by RCU */
224 RAMBitmap
*ram_bitmap
;
225 /* The RAMBlock used in the last src_page_requests */
226 RAMBlock
*last_req_rb
;
227 /* Queue of outstanding page requests from the destination */
228 QemuMutex src_page_req_mutex
;
229 QSIMPLEQ_HEAD(src_page_requests
, RAMSrcPageRequest
) src_page_requests
;
231 typedef struct RAMState RAMState
;
233 static RAMState ram_state
;
235 uint64_t dup_mig_pages_transferred(void)
237 return ram_state
.zero_pages
;
240 uint64_t norm_mig_pages_transferred(void)
242 return ram_state
.norm_pages
;
245 uint64_t xbzrle_mig_bytes_transferred(void)
247 return ram_state
.xbzrle_bytes
;
250 uint64_t xbzrle_mig_pages_transferred(void)
252 return ram_state
.xbzrle_pages
;
255 uint64_t xbzrle_mig_pages_cache_miss(void)
257 return ram_state
.xbzrle_cache_miss
;
260 double xbzrle_mig_cache_miss_rate(void)
262 return ram_state
.xbzrle_cache_miss_rate
;
265 uint64_t xbzrle_mig_pages_overflow(void)
267 return ram_state
.xbzrle_overflows
;
270 uint64_t ram_bytes_transferred(void)
272 return ram_state
.bytes_transferred
;
275 uint64_t ram_bytes_remaining(void)
277 return ram_state
.migration_dirty_pages
* TARGET_PAGE_SIZE
;
280 uint64_t ram_dirty_sync_count(void)
282 return ram_state
.bitmap_sync_count
;
285 uint64_t ram_dirty_pages_rate(void)
287 return ram_state
.dirty_pages_rate
;
290 uint64_t ram_postcopy_requests(void)
292 return ram_state
.postcopy_requests
;
295 /* used by the search for pages to send */
296 struct PageSearchStatus
{
297 /* Current block being searched */
299 /* Current offset to search from */
301 /* Set once we wrap around */
304 typedef struct PageSearchStatus PageSearchStatus
;
306 struct CompressParam
{
315 typedef struct CompressParam CompressParam
;
317 struct DecompressParam
{
326 typedef struct DecompressParam DecompressParam
;
328 static CompressParam
*comp_param
;
329 static QemuThread
*compress_threads
;
330 /* comp_done_cond is used to wake up the migration thread when
331 * one of the compression threads has finished the compression.
332 * comp_done_lock is used to co-work with comp_done_cond.
334 static QemuMutex comp_done_lock
;
335 static QemuCond comp_done_cond
;
336 /* The empty QEMUFileOps will be used by file in CompressParam */
337 static const QEMUFileOps empty_ops
= { };
339 static DecompressParam
*decomp_param
;
340 static QemuThread
*decompress_threads
;
341 static QemuMutex decomp_done_lock
;
342 static QemuCond decomp_done_cond
;
344 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
347 static void *do_data_compress(void *opaque
)
349 CompressParam
*param
= opaque
;
353 qemu_mutex_lock(¶m
->mutex
);
354 while (!param
->quit
) {
356 block
= param
->block
;
357 offset
= param
->offset
;
359 qemu_mutex_unlock(¶m
->mutex
);
361 do_compress_ram_page(param
->file
, block
, offset
);
363 qemu_mutex_lock(&comp_done_lock
);
365 qemu_cond_signal(&comp_done_cond
);
366 qemu_mutex_unlock(&comp_done_lock
);
368 qemu_mutex_lock(¶m
->mutex
);
370 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
373 qemu_mutex_unlock(¶m
->mutex
);
378 static inline void terminate_compression_threads(void)
380 int idx
, thread_count
;
382 thread_count
= migrate_compress_threads();
384 for (idx
= 0; idx
< thread_count
; idx
++) {
385 qemu_mutex_lock(&comp_param
[idx
].mutex
);
386 comp_param
[idx
].quit
= true;
387 qemu_cond_signal(&comp_param
[idx
].cond
);
388 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
392 void migrate_compress_threads_join(void)
396 if (!migrate_use_compression()) {
399 terminate_compression_threads();
400 thread_count
= migrate_compress_threads();
401 for (i
= 0; i
< thread_count
; i
++) {
402 qemu_thread_join(compress_threads
+ i
);
403 qemu_fclose(comp_param
[i
].file
);
404 qemu_mutex_destroy(&comp_param
[i
].mutex
);
405 qemu_cond_destroy(&comp_param
[i
].cond
);
407 qemu_mutex_destroy(&comp_done_lock
);
408 qemu_cond_destroy(&comp_done_cond
);
409 g_free(compress_threads
);
411 compress_threads
= NULL
;
415 void migrate_compress_threads_create(void)
419 if (!migrate_use_compression()) {
422 thread_count
= migrate_compress_threads();
423 compress_threads
= g_new0(QemuThread
, thread_count
);
424 comp_param
= g_new0(CompressParam
, thread_count
);
425 qemu_cond_init(&comp_done_cond
);
426 qemu_mutex_init(&comp_done_lock
);
427 for (i
= 0; i
< thread_count
; i
++) {
428 /* comp_param[i].file is just used as a dummy buffer to save data,
429 * set its ops to empty.
431 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
432 comp_param
[i
].done
= true;
433 comp_param
[i
].quit
= false;
434 qemu_mutex_init(&comp_param
[i
].mutex
);
435 qemu_cond_init(&comp_param
[i
].cond
);
436 qemu_thread_create(compress_threads
+ i
, "compress",
437 do_data_compress
, comp_param
+ i
,
438 QEMU_THREAD_JOINABLE
);
443 * save_page_header: write page header to wire
445 * If this is the 1st block, it also writes the block identification
447 * Returns the number of bytes written
449 * @f: QEMUFile where to send the data
450 * @block: block that contains the page we want to send
451 * @offset: offset inside the block for the page
452 * in the lower bits, it contains flags
454 static size_t save_page_header(QEMUFile
*f
, RAMBlock
*block
, ram_addr_t offset
)
458 qemu_put_be64(f
, offset
);
461 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
462 len
= strlen(block
->idstr
);
463 qemu_put_byte(f
, len
);
464 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
471 * mig_throttle_guest_down: throotle down the guest
473 * Reduce amount of guest cpu execution to hopefully slow down memory
474 * writes. If guest dirty memory rate is reduced below the rate at
475 * which we can transfer pages to the destination then we should be
476 * able to complete migration. Some workloads dirty memory way too
477 * fast and will not effectively converge, even with auto-converge.
479 static void mig_throttle_guest_down(void)
481 MigrationState
*s
= migrate_get_current();
482 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
483 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
485 /* We have not started throttling yet. Let's start it. */
486 if (!cpu_throttle_active()) {
487 cpu_throttle_set(pct_initial
);
489 /* Throttling already on, just increase the rate */
490 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
495 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
497 * @rs: current RAM state
498 * @current_addr: address for the zero page
500 * Update the xbzrle cache to reflect a page that's been sent as all 0.
501 * The important thing is that a stale (not-yet-0'd) page be replaced
503 * As a bonus, if the page wasn't in the cache it gets added so that
504 * when a small write is made into the 0'd page it gets XBZRLE sent.
506 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
508 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
512 /* We don't care if this fails to allocate a new cache page
513 * as long as it updated an old one */
514 cache_insert(XBZRLE
.cache
, current_addr
, ZERO_TARGET_PAGE
,
515 rs
->bitmap_sync_count
);
518 #define ENCODING_FLAG_XBZRLE 0x1
521 * save_xbzrle_page: compress and send current page
523 * Returns: 1 means that we wrote the page
524 * 0 means that page is identical to the one already sent
525 * -1 means that xbzrle would be longer than normal
527 * @rs: current RAM state
528 * @current_data: pointer to the address of the page contents
529 * @current_addr: addr of the page
530 * @block: block that contains the page we want to send
531 * @offset: offset inside the block for the page
532 * @last_stage: if we are at the completion stage
534 static int save_xbzrle_page(RAMState
*rs
, uint8_t **current_data
,
535 ram_addr_t current_addr
, RAMBlock
*block
,
536 ram_addr_t offset
, bool last_stage
)
538 int encoded_len
= 0, bytes_xbzrle
;
539 uint8_t *prev_cached_page
;
541 if (!cache_is_cached(XBZRLE
.cache
, current_addr
, rs
->bitmap_sync_count
)) {
542 rs
->xbzrle_cache_miss
++;
544 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
545 rs
->bitmap_sync_count
) == -1) {
548 /* update *current_data when the page has been
549 inserted into cache */
550 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
556 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
558 /* save current buffer into memory */
559 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
561 /* XBZRLE encoding (if there is no overflow) */
562 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
563 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
565 if (encoded_len
== 0) {
566 trace_save_xbzrle_page_skipping();
568 } else if (encoded_len
== -1) {
569 trace_save_xbzrle_page_overflow();
570 rs
->xbzrle_overflows
++;
571 /* update data in the cache */
573 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
574 *current_data
= prev_cached_page
;
579 /* we need to update the data in the cache, in order to get the same data */
581 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
584 /* Send XBZRLE based compressed page */
585 bytes_xbzrle
= save_page_header(rs
->f
, block
,
586 offset
| RAM_SAVE_FLAG_XBZRLE
);
587 qemu_put_byte(rs
->f
, ENCODING_FLAG_XBZRLE
);
588 qemu_put_be16(rs
->f
, encoded_len
);
589 qemu_put_buffer(rs
->f
, XBZRLE
.encoded_buf
, encoded_len
);
590 bytes_xbzrle
+= encoded_len
+ 1 + 2;
592 rs
->xbzrle_bytes
+= bytes_xbzrle
;
593 rs
->bytes_transferred
+= bytes_xbzrle
;
599 * migration_bitmap_find_dirty: find the next dirty page from start
601 * Called with rcu_read_lock() to protect migration_bitmap
603 * Returns the byte offset within memory region of the start of a dirty page
605 * @rs: current RAM state
606 * @rb: RAMBlock where to search for dirty pages
607 * @start: starting address (typically so we can continue from previous page)
608 * @ram_addr_abs: pointer into which to store the address of the dirty page
609 * within the global ram_addr space
612 ram_addr_t
migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
614 ram_addr_t
*ram_addr_abs
)
616 unsigned long base
= rb
->offset
>> TARGET_PAGE_BITS
;
617 unsigned long nr
= base
+ (start
>> TARGET_PAGE_BITS
);
618 uint64_t rb_size
= rb
->used_length
;
619 unsigned long size
= base
+ (rb_size
>> TARGET_PAGE_BITS
);
620 unsigned long *bitmap
;
624 bitmap
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
625 if (rs
->ram_bulk_stage
&& nr
> base
) {
628 next
= find_next_bit(bitmap
, size
, nr
);
631 *ram_addr_abs
= next
<< TARGET_PAGE_BITS
;
632 return (next
- base
) << TARGET_PAGE_BITS
;
635 static inline bool migration_bitmap_clear_dirty(RAMState
*rs
, ram_addr_t addr
)
638 int nr
= addr
>> TARGET_PAGE_BITS
;
639 unsigned long *bitmap
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
641 ret
= test_and_clear_bit(nr
, bitmap
);
644 rs
->migration_dirty_pages
--;
649 static void migration_bitmap_sync_range(RAMState
*rs
, ram_addr_t start
,
652 unsigned long *bitmap
;
653 bitmap
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
654 rs
->migration_dirty_pages
+=
655 cpu_physical_memory_sync_dirty_bitmap(bitmap
, start
, length
,
656 &rs
->num_dirty_pages_period
);
660 * ram_pagesize_summary: calculate all the pagesizes of a VM
662 * Returns a summary bitmap of the page sizes of all RAMBlocks
664 * For VMs with just normal pages this is equivalent to the host page
665 * size. If it's got some huge pages then it's the OR of all the
666 * different page sizes.
668 uint64_t ram_pagesize_summary(void)
671 uint64_t summary
= 0;
673 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
674 summary
|= block
->page_size
;
680 static void migration_bitmap_sync(RAMState
*rs
)
684 uint64_t bytes_xfer_now
;
686 rs
->bitmap_sync_count
++;
688 if (!rs
->bytes_xfer_prev
) {
689 rs
->bytes_xfer_prev
= ram_bytes_transferred();
692 if (!rs
->time_last_bitmap_sync
) {
693 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
696 trace_migration_bitmap_sync_start();
697 memory_global_dirty_log_sync();
699 qemu_mutex_lock(&rs
->bitmap_mutex
);
701 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
702 migration_bitmap_sync_range(rs
, block
->offset
, block
->used_length
);
705 qemu_mutex_unlock(&rs
->bitmap_mutex
);
707 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
709 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
711 /* more than 1 second = 1000 millisecons */
712 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
713 if (migrate_auto_converge()) {
714 /* The following detection logic can be refined later. For now:
715 Check to see if the dirtied bytes is 50% more than the approx.
716 amount of bytes that just got transferred since the last time we
717 were in this routine. If that happens twice, start or increase
719 bytes_xfer_now
= ram_bytes_transferred();
721 if (rs
->dirty_pages_rate
&&
722 (rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
723 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
724 (rs
->dirty_rate_high_cnt
++ >= 2)) {
725 trace_migration_throttle();
726 rs
->dirty_rate_high_cnt
= 0;
727 mig_throttle_guest_down();
729 rs
->bytes_xfer_prev
= bytes_xfer_now
;
732 if (migrate_use_xbzrle()) {
733 if (rs
->iterations_prev
!= rs
->iterations
) {
734 rs
->xbzrle_cache_miss_rate
=
735 (double)(rs
->xbzrle_cache_miss
-
736 rs
->xbzrle_cache_miss_prev
) /
737 (rs
->iterations
- rs
->iterations_prev
);
739 rs
->iterations_prev
= rs
->iterations
;
740 rs
->xbzrle_cache_miss_prev
= rs
->xbzrle_cache_miss
;
742 rs
->dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
743 / (end_time
- rs
->time_last_bitmap_sync
);
744 rs
->time_last_bitmap_sync
= end_time
;
745 rs
->num_dirty_pages_period
= 0;
747 if (migrate_use_events()) {
748 qapi_event_send_migration_pass(rs
->bitmap_sync_count
, NULL
);
753 * save_zero_page: send the zero page to the stream
755 * Returns the number of pages written.
757 * @rs: current RAM state
758 * @block: block that contains the page we want to send
759 * @offset: offset inside the block for the page
760 * @p: pointer to the page
762 static int save_zero_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
767 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
769 rs
->bytes_transferred
+=
770 save_page_header(rs
->f
, block
, offset
| RAM_SAVE_FLAG_COMPRESS
);
771 qemu_put_byte(rs
->f
, 0);
772 rs
->bytes_transferred
+= 1;
779 static void ram_release_pages(const char *rbname
, uint64_t offset
, int pages
)
781 if (!migrate_release_ram() || !migration_in_postcopy()) {
785 ram_discard_range(NULL
, rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
789 * ram_save_page: send the given page to the stream
791 * Returns the number of pages written.
793 * >=0 - Number of pages written - this might legally be 0
794 * if xbzrle noticed the page was the same.
796 * @rs: current RAM state
797 * @block: block that contains the page we want to send
798 * @offset: offset inside the block for the page
799 * @last_stage: if we are at the completion stage
801 static int ram_save_page(RAMState
*rs
, PageSearchStatus
*pss
, bool last_stage
)
805 ram_addr_t current_addr
;
808 bool send_async
= true;
809 RAMBlock
*block
= pss
->block
;
810 ram_addr_t offset
= pss
->offset
;
812 p
= block
->host
+ offset
;
814 /* In doubt sent page as normal */
816 ret
= ram_control_save_page(rs
->f
, block
->offset
,
817 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
819 rs
->bytes_transferred
+= bytes_xmit
;
825 current_addr
= block
->offset
+ offset
;
827 if (block
== rs
->last_sent_block
) {
828 offset
|= RAM_SAVE_FLAG_CONTINUE
;
830 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
831 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
832 if (bytes_xmit
> 0) {
834 } else if (bytes_xmit
== 0) {
839 pages
= save_zero_page(rs
, block
, offset
, p
);
841 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
842 * page would be stale
844 xbzrle_cache_zero_page(rs
, current_addr
);
845 ram_release_pages(block
->idstr
, pss
->offset
, pages
);
846 } else if (!rs
->ram_bulk_stage
&&
847 !migration_in_postcopy() && migrate_use_xbzrle()) {
848 pages
= save_xbzrle_page(rs
, &p
, current_addr
, block
,
851 /* Can't send this cached data async, since the cache page
852 * might get updated before it gets to the wire
859 /* XBZRLE overflow or normal page */
861 rs
->bytes_transferred
+= save_page_header(rs
->f
, block
,
862 offset
| RAM_SAVE_FLAG_PAGE
);
864 qemu_put_buffer_async(rs
->f
, p
, TARGET_PAGE_SIZE
,
865 migrate_release_ram() &
866 migration_in_postcopy());
868 qemu_put_buffer(rs
->f
, p
, TARGET_PAGE_SIZE
);
870 rs
->bytes_transferred
+= TARGET_PAGE_SIZE
;
875 XBZRLE_cache_unlock();
880 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
883 int bytes_sent
, blen
;
884 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
886 bytes_sent
= save_page_header(f
, block
, offset
|
887 RAM_SAVE_FLAG_COMPRESS_PAGE
);
888 blen
= qemu_put_compression_data(f
, p
, TARGET_PAGE_SIZE
,
889 migrate_compress_level());
892 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
893 error_report("compressed data failed!");
896 ram_release_pages(block
->idstr
, offset
& TARGET_PAGE_MASK
, 1);
902 static void flush_compressed_data(RAMState
*rs
)
904 int idx
, len
, thread_count
;
906 if (!migrate_use_compression()) {
909 thread_count
= migrate_compress_threads();
911 qemu_mutex_lock(&comp_done_lock
);
912 for (idx
= 0; idx
< thread_count
; idx
++) {
913 while (!comp_param
[idx
].done
) {
914 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
917 qemu_mutex_unlock(&comp_done_lock
);
919 for (idx
= 0; idx
< thread_count
; idx
++) {
920 qemu_mutex_lock(&comp_param
[idx
].mutex
);
921 if (!comp_param
[idx
].quit
) {
922 len
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
923 rs
->bytes_transferred
+= len
;
925 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
929 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
932 param
->block
= block
;
933 param
->offset
= offset
;
936 static int compress_page_with_multi_thread(RAMState
*rs
, RAMBlock
*block
,
939 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
941 thread_count
= migrate_compress_threads();
942 qemu_mutex_lock(&comp_done_lock
);
944 for (idx
= 0; idx
< thread_count
; idx
++) {
945 if (comp_param
[idx
].done
) {
946 comp_param
[idx
].done
= false;
947 bytes_xmit
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
948 qemu_mutex_lock(&comp_param
[idx
].mutex
);
949 set_compress_params(&comp_param
[idx
], block
, offset
);
950 qemu_cond_signal(&comp_param
[idx
].cond
);
951 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
954 rs
->bytes_transferred
+= bytes_xmit
;
961 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
964 qemu_mutex_unlock(&comp_done_lock
);
970 * ram_save_compressed_page: compress the given page and send it to the stream
972 * Returns the number of pages written.
974 * @rs: current RAM state
975 * @block: block that contains the page we want to send
976 * @offset: offset inside the block for the page
977 * @last_stage: if we are at the completion stage
979 static int ram_save_compressed_page(RAMState
*rs
, PageSearchStatus
*pss
,
983 uint64_t bytes_xmit
= 0;
986 RAMBlock
*block
= pss
->block
;
987 ram_addr_t offset
= pss
->offset
;
989 p
= block
->host
+ offset
;
991 ret
= ram_control_save_page(rs
->f
, block
->offset
,
992 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
994 rs
->bytes_transferred
+= bytes_xmit
;
997 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
998 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
999 if (bytes_xmit
> 0) {
1001 } else if (bytes_xmit
== 0) {
1006 /* When starting the process of a new block, the first page of
1007 * the block should be sent out before other pages in the same
1008 * block, and all the pages in last block should have been sent
1009 * out, keeping this order is important, because the 'cont' flag
1010 * is used to avoid resending the block name.
1012 if (block
!= rs
->last_sent_block
) {
1013 flush_compressed_data(rs
);
1014 pages
= save_zero_page(rs
, block
, offset
, p
);
1016 /* Make sure the first page is sent out before other pages */
1017 bytes_xmit
= save_page_header(rs
->f
, block
, offset
|
1018 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1019 blen
= qemu_put_compression_data(rs
->f
, p
, TARGET_PAGE_SIZE
,
1020 migrate_compress_level());
1022 rs
->bytes_transferred
+= bytes_xmit
+ blen
;
1026 qemu_file_set_error(rs
->f
, blen
);
1027 error_report("compressed data failed!");
1031 ram_release_pages(block
->idstr
, pss
->offset
, pages
);
1034 offset
|= RAM_SAVE_FLAG_CONTINUE
;
1035 pages
= save_zero_page(rs
, block
, offset
, p
);
1037 pages
= compress_page_with_multi_thread(rs
, block
, offset
);
1039 ram_release_pages(block
->idstr
, pss
->offset
, pages
);
1048 * find_dirty_block: find the next dirty page and update any state
1049 * associated with the search process.
1051 * Returns if a page is found
1053 * @rs: current RAM state
1054 * @pss: data about the state of the current dirty page scan
1055 * @again: set to false if the search has scanned the whole of RAM
1056 * @ram_addr_abs: pointer into which to store the address of the dirty page
1057 * within the global ram_addr space
1059 static bool find_dirty_block(RAMState
*rs
, PageSearchStatus
*pss
,
1060 bool *again
, ram_addr_t
*ram_addr_abs
)
1062 pss
->offset
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->offset
,
1064 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1065 pss
->offset
>= rs
->last_offset
) {
1067 * We've been once around the RAM and haven't found anything.
1073 if (pss
->offset
>= pss
->block
->used_length
) {
1074 /* Didn't find anything in this RAM Block */
1076 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1078 /* Hit the end of the list */
1079 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1080 /* Flag that we've looped */
1081 pss
->complete_round
= true;
1082 rs
->ram_bulk_stage
= false;
1083 if (migrate_use_xbzrle()) {
1084 /* If xbzrle is on, stop using the data compression at this
1085 * point. In theory, xbzrle can do better than compression.
1087 flush_compressed_data(rs
);
1090 /* Didn't find anything this time, but try again on the new block */
1094 /* Can go around again, but... */
1096 /* We've found something so probably don't need to */
1102 * unqueue_page: gets a page of the queue
1104 * Helper for 'get_queued_page' - gets a page off the queue
1106 * Returns the block of the page (or NULL if none available)
1108 * @rs: current RAM state
1109 * @offset: used to return the offset within the RAMBlock
1110 * @ram_addr_abs: pointer into which to store the address of the dirty page
1111 * within the global ram_addr space
1113 static RAMBlock
*unqueue_page(RAMState
*rs
, ram_addr_t
*offset
,
1114 ram_addr_t
*ram_addr_abs
)
1116 RAMBlock
*block
= NULL
;
1118 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1119 if (!QSIMPLEQ_EMPTY(&rs
->src_page_requests
)) {
1120 struct RAMSrcPageRequest
*entry
=
1121 QSIMPLEQ_FIRST(&rs
->src_page_requests
);
1123 *offset
= entry
->offset
;
1124 *ram_addr_abs
= (entry
->offset
+ entry
->rb
->offset
) &
1127 if (entry
->len
> TARGET_PAGE_SIZE
) {
1128 entry
->len
-= TARGET_PAGE_SIZE
;
1129 entry
->offset
+= TARGET_PAGE_SIZE
;
1131 memory_region_unref(block
->mr
);
1132 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1136 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1142 * get_queued_page: unqueue a page from the postocpy requests
1144 * Skips pages that are already sent (!dirty)
1146 * Returns if a queued page is found
1148 * @rs: current RAM state
1149 * @pss: data about the state of the current dirty page scan
1150 * @ram_addr_abs: pointer into which to store the address of the dirty page
1151 * within the global ram_addr space
1153 static bool get_queued_page(RAMState
*rs
, PageSearchStatus
*pss
,
1154 ram_addr_t
*ram_addr_abs
)
1161 block
= unqueue_page(rs
, &offset
, ram_addr_abs
);
1163 * We're sending this page, and since it's postcopy nothing else
1164 * will dirty it, and we must make sure it doesn't get sent again
1165 * even if this queue request was received after the background
1166 * search already sent it.
1169 unsigned long *bitmap
;
1170 bitmap
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
1171 dirty
= test_bit(*ram_addr_abs
>> TARGET_PAGE_BITS
, bitmap
);
1173 trace_get_queued_page_not_dirty(
1174 block
->idstr
, (uint64_t)offset
,
1175 (uint64_t)*ram_addr_abs
,
1176 test_bit(*ram_addr_abs
>> TARGET_PAGE_BITS
,
1177 atomic_rcu_read(&rs
->ram_bitmap
)->unsentmap
));
1179 trace_get_queued_page(block
->idstr
,
1181 (uint64_t)*ram_addr_abs
);
1185 } while (block
&& !dirty
);
1189 * As soon as we start servicing pages out of order, then we have
1190 * to kill the bulk stage, since the bulk stage assumes
1191 * in (migration_bitmap_find_and_reset_dirty) that every page is
1192 * dirty, that's no longer true.
1194 rs
->ram_bulk_stage
= false;
1197 * We want the background search to continue from the queued page
1198 * since the guest is likely to want other pages near to the page
1199 * it just requested.
1202 pss
->offset
= offset
;
1209 * migration_page_queue_free: drop any remaining pages in the ram
1212 * It should be empty at the end anyway, but in error cases there may
1213 * be some left. in case that there is any page left, we drop it.
1216 void migration_page_queue_free(void)
1218 struct RAMSrcPageRequest
*mspr
, *next_mspr
;
1219 RAMState
*rs
= &ram_state
;
1220 /* This queue generally should be empty - but in the case of a failed
1221 * migration might have some droppings in.
1224 QSIMPLEQ_FOREACH_SAFE(mspr
, &rs
->src_page_requests
, next_req
, next_mspr
) {
1225 memory_region_unref(mspr
->rb
->mr
);
1226 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1233 * ram_save_queue_pages: queue the page for transmission
1235 * A request from postcopy destination for example.
1237 * Returns zero on success or negative on error
1239 * @rbname: Name of the RAMBLock of the request. NULL means the
1240 * same that last one.
1241 * @start: starting address from the start of the RAMBlock
1242 * @len: length (in bytes) to send
1244 int ram_save_queue_pages(const char *rbname
, ram_addr_t start
, ram_addr_t len
)
1247 RAMState
*rs
= &ram_state
;
1249 rs
->postcopy_requests
++;
1252 /* Reuse last RAMBlock */
1253 ramblock
= rs
->last_req_rb
;
1257 * Shouldn't happen, we can't reuse the last RAMBlock if
1258 * it's the 1st request.
1260 error_report("ram_save_queue_pages no previous block");
1264 ramblock
= qemu_ram_block_by_name(rbname
);
1267 /* We shouldn't be asked for a non-existent RAMBlock */
1268 error_report("ram_save_queue_pages no block '%s'", rbname
);
1271 rs
->last_req_rb
= ramblock
;
1273 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1274 if (start
+len
> ramblock
->used_length
) {
1275 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1276 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1277 __func__
, start
, len
, ramblock
->used_length
);
1281 struct RAMSrcPageRequest
*new_entry
=
1282 g_malloc0(sizeof(struct RAMSrcPageRequest
));
1283 new_entry
->rb
= ramblock
;
1284 new_entry
->offset
= start
;
1285 new_entry
->len
= len
;
1287 memory_region_ref(ramblock
->mr
);
1288 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1289 QSIMPLEQ_INSERT_TAIL(&rs
->src_page_requests
, new_entry
, next_req
);
1290 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1301 * ram_save_target_page: save one target page
1303 * Returns the number of pages written
1305 * @rs: current RAM state
1306 * @ms: current migration state
1307 * @pss: data about the page we want to send
1308 * @last_stage: if we are at the completion stage
1309 * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
1311 static int ram_save_target_page(RAMState
*rs
, PageSearchStatus
*pss
,
1312 bool last_stage
, ram_addr_t dirty_ram_abs
)
1316 /* Check the pages is dirty and if it is send it */
1317 if (migration_bitmap_clear_dirty(rs
, dirty_ram_abs
)) {
1318 unsigned long *unsentmap
;
1320 * If xbzrle is on, stop using the data compression after first
1321 * round of migration even if compression is enabled. In theory,
1322 * xbzrle can do better than compression.
1325 if (migrate_use_compression()
1326 && (rs
->ram_bulk_stage
|| !migrate_use_xbzrle())) {
1327 res
= ram_save_compressed_page(rs
, pss
, last_stage
);
1329 res
= ram_save_page(rs
, pss
, last_stage
);
1335 unsentmap
= atomic_rcu_read(&rs
->ram_bitmap
)->unsentmap
;
1337 clear_bit(dirty_ram_abs
>> TARGET_PAGE_BITS
, unsentmap
);
1339 /* Only update last_sent_block if a block was actually sent; xbzrle
1340 * might have decided the page was identical so didn't bother writing
1344 rs
->last_sent_block
= pss
->block
;
1352 * ram_save_host_page: save a whole host page
1354 * Starting at *offset send pages up to the end of the current host
1355 * page. It's valid for the initial offset to point into the middle of
1356 * a host page in which case the remainder of the hostpage is sent.
1357 * Only dirty target pages are sent. Note that the host page size may
1358 * be a huge page for this block.
1360 * Returns the number of pages written or negative on error
1362 * @rs: current RAM state
1363 * @ms: current migration state
1364 * @pss: data about the page we want to send
1365 * @last_stage: if we are at the completion stage
1366 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1368 static int ram_save_host_page(RAMState
*rs
, PageSearchStatus
*pss
,
1370 ram_addr_t dirty_ram_abs
)
1372 int tmppages
, pages
= 0;
1373 size_t pagesize
= qemu_ram_pagesize(pss
->block
);
1376 tmppages
= ram_save_target_page(rs
, pss
, last_stage
, dirty_ram_abs
);
1382 pss
->offset
+= TARGET_PAGE_SIZE
;
1383 dirty_ram_abs
+= TARGET_PAGE_SIZE
;
1384 } while (pss
->offset
& (pagesize
- 1));
1386 /* The offset we leave with is the last one we looked at */
1387 pss
->offset
-= TARGET_PAGE_SIZE
;
1392 * ram_find_and_save_block: finds a dirty page and sends it to f
1394 * Called within an RCU critical section.
1396 * Returns the number of pages written where zero means no dirty pages
1398 * @rs: current RAM state
1399 * @last_stage: if we are at the completion stage
1401 * On systems where host-page-size > target-page-size it will send all the
1402 * pages in a host page that are dirty.
1405 static int ram_find_and_save_block(RAMState
*rs
, bool last_stage
)
1407 PageSearchStatus pss
;
1410 ram_addr_t dirty_ram_abs
; /* Address of the start of the dirty page in
1413 /* No dirty page as there is zero RAM */
1414 if (!ram_bytes_total()) {
1418 pss
.block
= rs
->last_seen_block
;
1419 pss
.offset
= rs
->last_offset
;
1420 pss
.complete_round
= false;
1423 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1428 found
= get_queued_page(rs
, &pss
, &dirty_ram_abs
);
1431 /* priority queue empty, so just search for something dirty */
1432 found
= find_dirty_block(rs
, &pss
, &again
, &dirty_ram_abs
);
1436 pages
= ram_save_host_page(rs
, &pss
, last_stage
, dirty_ram_abs
);
1438 } while (!pages
&& again
);
1440 rs
->last_seen_block
= pss
.block
;
1441 rs
->last_offset
= pss
.offset
;
1446 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1448 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1449 RAMState
*rs
= &ram_state
;
1452 rs
->zero_pages
+= pages
;
1454 rs
->norm_pages
+= pages
;
1455 rs
->bytes_transferred
+= size
;
1456 qemu_update_position(f
, size
);
1460 uint64_t ram_bytes_total(void)
1466 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
)
1467 total
+= block
->used_length
;
1472 void free_xbzrle_decoded_buf(void)
1474 g_free(xbzrle_decoded_buf
);
1475 xbzrle_decoded_buf
= NULL
;
1478 static void migration_bitmap_free(struct RAMBitmap
*bmap
)
1481 g_free(bmap
->unsentmap
);
1485 static void ram_migration_cleanup(void *opaque
)
1487 RAMState
*rs
= opaque
;
1489 /* caller have hold iothread lock or is in a bh, so there is
1490 * no writing race against this migration_bitmap
1492 struct RAMBitmap
*bitmap
= rs
->ram_bitmap
;
1493 atomic_rcu_set(&rs
->ram_bitmap
, NULL
);
1495 memory_global_dirty_log_stop();
1496 call_rcu(bitmap
, migration_bitmap_free
, rcu
);
1499 XBZRLE_cache_lock();
1501 cache_fini(XBZRLE
.cache
);
1502 g_free(XBZRLE
.encoded_buf
);
1503 g_free(XBZRLE
.current_buf
);
1504 g_free(ZERO_TARGET_PAGE
);
1505 XBZRLE
.cache
= NULL
;
1506 XBZRLE
.encoded_buf
= NULL
;
1507 XBZRLE
.current_buf
= NULL
;
1509 XBZRLE_cache_unlock();
1512 static void ram_state_reset(RAMState
*rs
)
1514 rs
->last_seen_block
= NULL
;
1515 rs
->last_sent_block
= NULL
;
1516 rs
->last_offset
= 0;
1517 rs
->last_version
= ram_list
.version
;
1518 rs
->ram_bulk_stage
= true;
1521 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1523 void migration_bitmap_extend(ram_addr_t old
, ram_addr_t
new)
1525 RAMState
*rs
= &ram_state
;
1527 /* called in qemu main thread, so there is
1528 * no writing race against this migration_bitmap
1530 if (rs
->ram_bitmap
) {
1531 struct RAMBitmap
*old_bitmap
= rs
->ram_bitmap
, *bitmap
;
1532 bitmap
= g_new(struct RAMBitmap
, 1);
1533 bitmap
->bmap
= bitmap_new(new);
1535 /* prevent migration_bitmap content from being set bit
1536 * by migration_bitmap_sync_range() at the same time.
1537 * it is safe to migration if migration_bitmap is cleared bit
1540 qemu_mutex_lock(&rs
->bitmap_mutex
);
1541 bitmap_copy(bitmap
->bmap
, old_bitmap
->bmap
, old
);
1542 bitmap_set(bitmap
->bmap
, old
, new - old
);
1544 /* We don't have a way to safely extend the sentmap
1545 * with RCU; so mark it as missing, entry to postcopy
1548 bitmap
->unsentmap
= NULL
;
1550 atomic_rcu_set(&rs
->ram_bitmap
, bitmap
);
1551 qemu_mutex_unlock(&rs
->bitmap_mutex
);
1552 rs
->migration_dirty_pages
+= new - old
;
1553 call_rcu(old_bitmap
, migration_bitmap_free
, rcu
);
1558 * 'expected' is the value you expect the bitmap mostly to be full
1559 * of; it won't bother printing lines that are all this value.
1560 * If 'todump' is null the migration bitmap is dumped.
1562 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
)
1564 int64_t ram_pages
= last_ram_offset() >> TARGET_PAGE_BITS
;
1565 RAMState
*rs
= &ram_state
;
1567 int64_t linelen
= 128;
1571 todump
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
1574 for (cur
= 0; cur
< ram_pages
; cur
+= linelen
) {
1578 * Last line; catch the case where the line length
1579 * is longer than remaining ram
1581 if (cur
+ linelen
> ram_pages
) {
1582 linelen
= ram_pages
- cur
;
1584 for (curb
= 0; curb
< linelen
; curb
++) {
1585 bool thisbit
= test_bit(cur
+ curb
, todump
);
1586 linebuf
[curb
] = thisbit
? '1' : '.';
1587 found
= found
|| (thisbit
!= expected
);
1590 linebuf
[curb
] = '\0';
1591 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
1596 /* **** functions for postcopy ***** */
1598 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
1600 RAMState
*rs
= &ram_state
;
1601 struct RAMBlock
*block
;
1602 unsigned long *bitmap
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
1604 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1605 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1606 unsigned long range
= first
+ (block
->used_length
>> TARGET_PAGE_BITS
);
1607 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, first
);
1609 while (run_start
< range
) {
1610 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
1611 ram_discard_range(NULL
, block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
1612 (run_end
- run_start
) << TARGET_PAGE_BITS
);
1613 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
1619 * postcopy_send_discard_bm_ram: discard a RAMBlock
1621 * Returns zero on success
1623 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1624 * Note: At this point the 'unsentmap' is the processed bitmap combined
1625 * with the dirtymap; so a '1' means it's either dirty or unsent.
1627 * @ms: current migration state
1628 * @pds: state for postcopy
1629 * @start: RAMBlock starting page
1630 * @length: RAMBlock size
1632 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
1633 PostcopyDiscardState
*pds
,
1634 unsigned long start
,
1635 unsigned long length
)
1637 RAMState
*rs
= &ram_state
;
1638 unsigned long end
= start
+ length
; /* one after the end */
1639 unsigned long current
;
1640 unsigned long *unsentmap
;
1642 unsentmap
= atomic_rcu_read(&rs
->ram_bitmap
)->unsentmap
;
1643 for (current
= start
; current
< end
; ) {
1644 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
1647 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
1648 unsigned long discard_length
;
1651 discard_length
= end
- one
;
1653 discard_length
= zero
- one
;
1655 if (discard_length
) {
1656 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
1658 current
= one
+ discard_length
;
1668 * postcopy_each_ram_send_discard: discard all RAMBlocks
1670 * Returns 0 for success or negative for error
1672 * Utility for the outgoing postcopy code.
1673 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1674 * passing it bitmap indexes and name.
1675 * (qemu_ram_foreach_block ends up passing unscaled lengths
1676 * which would mean postcopy code would have to deal with target page)
1678 * @ms: current migration state
1680 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
1682 struct RAMBlock
*block
;
1685 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1686 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1687 PostcopyDiscardState
*pds
= postcopy_discard_send_init(ms
,
1692 * Postcopy sends chunks of bitmap over the wire, but it
1693 * just needs indexes at this point, avoids it having
1694 * target page specific code.
1696 ret
= postcopy_send_discard_bm_ram(ms
, pds
, first
,
1697 block
->used_length
>> TARGET_PAGE_BITS
);
1698 postcopy_discard_send_finish(ms
, pds
);
1708 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1710 * Helper for postcopy_chunk_hostpages; it's called twice to
1711 * canonicalize the two bitmaps, that are similar, but one is
1714 * Postcopy requires that all target pages in a hostpage are dirty or
1715 * clean, not a mix. This function canonicalizes the bitmaps.
1717 * @ms: current migration state
1718 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1719 * otherwise we need to canonicalize partially dirty host pages
1720 * @block: block that contains the page we want to canonicalize
1721 * @pds: state for postcopy
1723 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
1725 PostcopyDiscardState
*pds
)
1727 RAMState
*rs
= &ram_state
;
1728 unsigned long *bitmap
;
1729 unsigned long *unsentmap
;
1730 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
1731 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1732 unsigned long len
= block
->used_length
>> TARGET_PAGE_BITS
;
1733 unsigned long last
= first
+ (len
- 1);
1734 unsigned long run_start
;
1736 if (block
->page_size
== TARGET_PAGE_SIZE
) {
1737 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1741 bitmap
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
1742 unsentmap
= atomic_rcu_read(&rs
->ram_bitmap
)->unsentmap
;
1745 /* Find a sent page */
1746 run_start
= find_next_zero_bit(unsentmap
, last
+ 1, first
);
1748 /* Find a dirty page */
1749 run_start
= find_next_bit(bitmap
, last
+ 1, first
);
1752 while (run_start
<= last
) {
1753 bool do_fixup
= false;
1754 unsigned long fixup_start_addr
;
1755 unsigned long host_offset
;
1758 * If the start of this run of pages is in the middle of a host
1759 * page, then we need to fixup this host page.
1761 host_offset
= run_start
% host_ratio
;
1764 run_start
-= host_offset
;
1765 fixup_start_addr
= run_start
;
1766 /* For the next pass */
1767 run_start
= run_start
+ host_ratio
;
1769 /* Find the end of this run */
1770 unsigned long run_end
;
1772 run_end
= find_next_bit(unsentmap
, last
+ 1, run_start
+ 1);
1774 run_end
= find_next_zero_bit(bitmap
, last
+ 1, run_start
+ 1);
1777 * If the end isn't at the start of a host page, then the
1778 * run doesn't finish at the end of a host page
1779 * and we need to discard.
1781 host_offset
= run_end
% host_ratio
;
1784 fixup_start_addr
= run_end
- host_offset
;
1786 * This host page has gone, the next loop iteration starts
1787 * from after the fixup
1789 run_start
= fixup_start_addr
+ host_ratio
;
1792 * No discards on this iteration, next loop starts from
1793 * next sent/dirty page
1795 run_start
= run_end
+ 1;
1802 /* Tell the destination to discard this page */
1803 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
1804 /* For the unsent_pass we:
1805 * discard partially sent pages
1806 * For the !unsent_pass (dirty) we:
1807 * discard partially dirty pages that were sent
1808 * (any partially sent pages were already discarded
1809 * by the previous unsent_pass)
1811 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
1815 /* Clean up the bitmap */
1816 for (page
= fixup_start_addr
;
1817 page
< fixup_start_addr
+ host_ratio
; page
++) {
1818 /* All pages in this host page are now not sent */
1819 set_bit(page
, unsentmap
);
1822 * Remark them as dirty, updating the count for any pages
1823 * that weren't previously dirty.
1825 rs
->migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
1830 /* Find the next sent page for the next iteration */
1831 run_start
= find_next_zero_bit(unsentmap
, last
+ 1,
1834 /* Find the next dirty page for the next iteration */
1835 run_start
= find_next_bit(bitmap
, last
+ 1, run_start
);
1841 * postcopy_chuck_hostpages: discrad any partially sent host page
1843 * Utility for the outgoing postcopy code.
1845 * Discard any partially sent host-page size chunks, mark any partially
1846 * dirty host-page size chunks as all dirty. In this case the host-page
1847 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1849 * Returns zero on success
1851 * @ms: current migration state
1853 static int postcopy_chunk_hostpages(MigrationState
*ms
)
1855 RAMState
*rs
= &ram_state
;
1856 struct RAMBlock
*block
;
1858 /* Easiest way to make sure we don't resume in the middle of a host-page */
1859 rs
->last_seen_block
= NULL
;
1860 rs
->last_sent_block
= NULL
;
1861 rs
->last_offset
= 0;
1863 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1864 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1866 PostcopyDiscardState
*pds
=
1867 postcopy_discard_send_init(ms
, first
, block
->idstr
);
1869 /* First pass: Discard all partially sent host pages */
1870 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
1872 * Second pass: Ensure that all partially dirty host pages are made
1875 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
1877 postcopy_discard_send_finish(ms
, pds
);
1878 } /* ram_list loop */
1884 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1886 * Returns zero on success
1888 * Transmit the set of pages to be discarded after precopy to the target
1889 * these are pages that:
1890 * a) Have been previously transmitted but are now dirty again
1891 * b) Pages that have never been transmitted, this ensures that
1892 * any pages on the destination that have been mapped by background
1893 * tasks get discarded (transparent huge pages is the specific concern)
1894 * Hopefully this is pretty sparse
1896 * @ms: current migration state
1898 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
1900 RAMState
*rs
= &ram_state
;
1902 unsigned long *bitmap
, *unsentmap
;
1906 /* This should be our last sync, the src is now paused */
1907 migration_bitmap_sync(rs
);
1909 unsentmap
= atomic_rcu_read(&rs
->ram_bitmap
)->unsentmap
;
1911 /* We don't have a safe way to resize the sentmap, so
1912 * if the bitmap was resized it will be NULL at this
1915 error_report("migration ram resized during precopy phase");
1920 /* Deal with TPS != HPS and huge pages */
1921 ret
= postcopy_chunk_hostpages(ms
);
1928 * Update the unsentmap to be unsentmap = unsentmap | dirty
1930 bitmap
= atomic_rcu_read(&rs
->ram_bitmap
)->bmap
;
1931 bitmap_or(unsentmap
, unsentmap
, bitmap
,
1932 last_ram_offset() >> TARGET_PAGE_BITS
);
1935 trace_ram_postcopy_send_discard_bitmap();
1936 #ifdef DEBUG_POSTCOPY
1937 ram_debug_dump_bitmap(unsentmap
, true);
1940 ret
= postcopy_each_ram_send_discard(ms
);
1947 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1949 * Returns zero on success
1951 * @mis: current migration incoming state
1952 * @rbname: name of the RAMBlock of the request. NULL means the
1953 * same that last one.
1954 * @start: RAMBlock starting page
1955 * @length: RAMBlock size
1957 int ram_discard_range(MigrationIncomingState
*mis
,
1959 uint64_t start
, size_t length
)
1963 trace_ram_discard_range(rbname
, start
, length
);
1966 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
1969 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
1973 ret
= ram_block_discard_range(rb
, start
, length
);
1981 static int ram_state_init(RAMState
*rs
)
1983 int64_t ram_bitmap_pages
; /* Size of bitmap in pages, including gaps */
1985 memset(rs
, 0, sizeof(*rs
));
1986 qemu_mutex_init(&rs
->bitmap_mutex
);
1987 qemu_mutex_init(&rs
->src_page_req_mutex
);
1988 QSIMPLEQ_INIT(&rs
->src_page_requests
);
1990 if (migrate_use_xbzrle()) {
1991 XBZRLE_cache_lock();
1992 ZERO_TARGET_PAGE
= g_malloc0(TARGET_PAGE_SIZE
);
1993 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size() /
1996 if (!XBZRLE
.cache
) {
1997 XBZRLE_cache_unlock();
1998 error_report("Error creating cache");
2001 XBZRLE_cache_unlock();
2003 /* We prefer not to abort if there is no memory */
2004 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
2005 if (!XBZRLE
.encoded_buf
) {
2006 error_report("Error allocating encoded_buf");
2010 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
2011 if (!XBZRLE
.current_buf
) {
2012 error_report("Error allocating current_buf");
2013 g_free(XBZRLE
.encoded_buf
);
2014 XBZRLE
.encoded_buf
= NULL
;
2019 /* For memory_global_dirty_log_start below. */
2020 qemu_mutex_lock_iothread();
2022 qemu_mutex_lock_ramlist();
2024 ram_state_reset(rs
);
2026 rs
->ram_bitmap
= g_new0(struct RAMBitmap
, 1);
2027 /* Skip setting bitmap if there is no RAM */
2028 if (ram_bytes_total()) {
2029 ram_bitmap_pages
= last_ram_offset() >> TARGET_PAGE_BITS
;
2030 rs
->ram_bitmap
->bmap
= bitmap_new(ram_bitmap_pages
);
2031 bitmap_set(rs
->ram_bitmap
->bmap
, 0, ram_bitmap_pages
);
2033 if (migrate_postcopy_ram()) {
2034 rs
->ram_bitmap
->unsentmap
= bitmap_new(ram_bitmap_pages
);
2035 bitmap_set(rs
->ram_bitmap
->unsentmap
, 0, ram_bitmap_pages
);
2040 * Count the total number of pages used by ram blocks not including any
2041 * gaps due to alignment or unplugs.
2043 rs
->migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
2045 memory_global_dirty_log_start();
2046 migration_bitmap_sync(rs
);
2047 qemu_mutex_unlock_ramlist();
2048 qemu_mutex_unlock_iothread();
2055 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2056 * long-running RCU critical section. When rcu-reclaims in the code
2057 * start to become numerous it will be necessary to reduce the
2058 * granularity of these critical sections.
2062 * ram_save_setup: Setup RAM for migration
2064 * Returns zero to indicate success and negative for error
2066 * @f: QEMUFile where to send the data
2067 * @opaque: RAMState pointer
2069 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
2071 RAMState
*rs
= opaque
;
2074 /* migration has already setup the bitmap, reuse it. */
2075 if (!migration_in_colo_state()) {
2076 if (ram_state_init(rs
) < 0) {
2084 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
2086 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
2087 qemu_put_byte(f
, strlen(block
->idstr
));
2088 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
2089 qemu_put_be64(f
, block
->used_length
);
2090 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
2091 qemu_put_be64(f
, block
->page_size
);
2097 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
2098 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
2100 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2106 * ram_save_iterate: iterative stage for migration
2108 * Returns zero to indicate success and negative for error
2110 * @f: QEMUFile where to send the data
2111 * @opaque: RAMState pointer
2113 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2115 RAMState
*rs
= opaque
;
2122 if (ram_list
.version
!= rs
->last_version
) {
2123 ram_state_reset(rs
);
2126 /* Read version before ram_list.blocks */
2129 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2131 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2133 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2136 pages
= ram_find_and_save_block(rs
, false);
2137 /* no more pages to sent */
2144 /* we want to check in the 1st loop, just in case it was the 1st time
2145 and we had to sync the dirty bitmap.
2146 qemu_get_clock_ns() is a bit expensive, so we only check each some
2149 if ((i
& 63) == 0) {
2150 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2151 if (t1
> MAX_WAIT
) {
2152 trace_ram_save_iterate_big_wait(t1
, i
);
2158 flush_compressed_data(rs
);
2162 * Must occur before EOS (or any QEMUFile operation)
2163 * because of RDMA protocol.
2165 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2167 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2168 rs
->bytes_transferred
+= 8;
2170 ret
= qemu_file_get_error(f
);
2179 * ram_save_complete: function called to send the remaining amount of ram
2181 * Returns zero to indicate success
2183 * Called with iothread lock
2185 * @f: QEMUFile where to send the data
2186 * @opaque: RAMState pointer
2188 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2190 RAMState
*rs
= opaque
;
2194 if (!migration_in_postcopy()) {
2195 migration_bitmap_sync(rs
);
2198 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2200 /* try transferring iterative blocks of memory */
2202 /* flush all remaining blocks regardless of rate limiting */
2206 pages
= ram_find_and_save_block(rs
, !migration_in_colo_state());
2207 /* no more blocks to sent */
2213 flush_compressed_data(rs
);
2214 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2218 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2223 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2224 uint64_t *non_postcopiable_pending
,
2225 uint64_t *postcopiable_pending
)
2227 RAMState
*rs
= opaque
;
2228 uint64_t remaining_size
;
2230 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2232 if (!migration_in_postcopy() &&
2233 remaining_size
< max_size
) {
2234 qemu_mutex_lock_iothread();
2236 migration_bitmap_sync(rs
);
2238 qemu_mutex_unlock_iothread();
2239 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2242 /* We can do postcopy, and all the data is postcopiable */
2243 *postcopiable_pending
+= remaining_size
;
2246 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2248 unsigned int xh_len
;
2250 uint8_t *loaded_data
;
2252 if (!xbzrle_decoded_buf
) {
2253 xbzrle_decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
2255 loaded_data
= xbzrle_decoded_buf
;
2257 /* extract RLE header */
2258 xh_flags
= qemu_get_byte(f
);
2259 xh_len
= qemu_get_be16(f
);
2261 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2262 error_report("Failed to load XBZRLE page - wrong compression!");
2266 if (xh_len
> TARGET_PAGE_SIZE
) {
2267 error_report("Failed to load XBZRLE page - len overflow!");
2270 /* load data and decode */
2271 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2274 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2275 TARGET_PAGE_SIZE
) == -1) {
2276 error_report("Failed to load XBZRLE page - decode error!");
2284 * ram_block_from_stream: read a RAMBlock id from the migration stream
2286 * Must be called from within a rcu critical section.
2288 * Returns a pointer from within the RCU-protected ram_list.
2290 * @f: QEMUFile where to read the data from
2291 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2293 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2295 static RAMBlock
*block
= NULL
;
2299 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2301 error_report("Ack, bad migration stream!");
2307 len
= qemu_get_byte(f
);
2308 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2311 block
= qemu_ram_block_by_name(id
);
2313 error_report("Can't find block %s", id
);
2320 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2323 if (!offset_in_ramblock(block
, offset
)) {
2327 return block
->host
+ offset
;
2331 * ram_handle_compressed: handle the zero page case
2333 * If a page (or a whole RDMA chunk) has been
2334 * determined to be zero, then zap it.
2336 * @host: host address for the zero page
2337 * @ch: what the page is filled from. We only support zero
2338 * @size: size of the zero page
2340 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2342 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2343 memset(host
, ch
, size
);
2347 static void *do_data_decompress(void *opaque
)
2349 DecompressParam
*param
= opaque
;
2350 unsigned long pagesize
;
2354 qemu_mutex_lock(¶m
->mutex
);
2355 while (!param
->quit
) {
2360 qemu_mutex_unlock(¶m
->mutex
);
2362 pagesize
= TARGET_PAGE_SIZE
;
2363 /* uncompress() will return failed in some case, especially
2364 * when the page is dirted when doing the compression, it's
2365 * not a problem because the dirty page will be retransferred
2366 * and uncompress() won't break the data in other pages.
2368 uncompress((Bytef
*)des
, &pagesize
,
2369 (const Bytef
*)param
->compbuf
, len
);
2371 qemu_mutex_lock(&decomp_done_lock
);
2373 qemu_cond_signal(&decomp_done_cond
);
2374 qemu_mutex_unlock(&decomp_done_lock
);
2376 qemu_mutex_lock(¶m
->mutex
);
2378 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2381 qemu_mutex_unlock(¶m
->mutex
);
2386 static void wait_for_decompress_done(void)
2388 int idx
, thread_count
;
2390 if (!migrate_use_compression()) {
2394 thread_count
= migrate_decompress_threads();
2395 qemu_mutex_lock(&decomp_done_lock
);
2396 for (idx
= 0; idx
< thread_count
; idx
++) {
2397 while (!decomp_param
[idx
].done
) {
2398 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2401 qemu_mutex_unlock(&decomp_done_lock
);
2404 void migrate_decompress_threads_create(void)
2406 int i
, thread_count
;
2408 thread_count
= migrate_decompress_threads();
2409 decompress_threads
= g_new0(QemuThread
, thread_count
);
2410 decomp_param
= g_new0(DecompressParam
, thread_count
);
2411 qemu_mutex_init(&decomp_done_lock
);
2412 qemu_cond_init(&decomp_done_cond
);
2413 for (i
= 0; i
< thread_count
; i
++) {
2414 qemu_mutex_init(&decomp_param
[i
].mutex
);
2415 qemu_cond_init(&decomp_param
[i
].cond
);
2416 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
2417 decomp_param
[i
].done
= true;
2418 decomp_param
[i
].quit
= false;
2419 qemu_thread_create(decompress_threads
+ i
, "decompress",
2420 do_data_decompress
, decomp_param
+ i
,
2421 QEMU_THREAD_JOINABLE
);
2425 void migrate_decompress_threads_join(void)
2427 int i
, thread_count
;
2429 thread_count
= migrate_decompress_threads();
2430 for (i
= 0; i
< thread_count
; i
++) {
2431 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2432 decomp_param
[i
].quit
= true;
2433 qemu_cond_signal(&decomp_param
[i
].cond
);
2434 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2436 for (i
= 0; i
< thread_count
; i
++) {
2437 qemu_thread_join(decompress_threads
+ i
);
2438 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2439 qemu_cond_destroy(&decomp_param
[i
].cond
);
2440 g_free(decomp_param
[i
].compbuf
);
2442 g_free(decompress_threads
);
2443 g_free(decomp_param
);
2444 decompress_threads
= NULL
;
2445 decomp_param
= NULL
;
2448 static void decompress_data_with_multi_threads(QEMUFile
*f
,
2449 void *host
, int len
)
2451 int idx
, thread_count
;
2453 thread_count
= migrate_decompress_threads();
2454 qemu_mutex_lock(&decomp_done_lock
);
2456 for (idx
= 0; idx
< thread_count
; idx
++) {
2457 if (decomp_param
[idx
].done
) {
2458 decomp_param
[idx
].done
= false;
2459 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
2460 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
2461 decomp_param
[idx
].des
= host
;
2462 decomp_param
[idx
].len
= len
;
2463 qemu_cond_signal(&decomp_param
[idx
].cond
);
2464 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
2468 if (idx
< thread_count
) {
2471 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2474 qemu_mutex_unlock(&decomp_done_lock
);
2478 * ram_postcopy_incoming_init: allocate postcopy data structures
2480 * Returns 0 for success and negative if there was one error
2482 * @mis: current migration incoming state
2484 * Allocate data structures etc needed by incoming migration with
2485 * postcopy-ram. postcopy-ram's similarly names
2486 * postcopy_ram_incoming_init does the work.
2488 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
2490 size_t ram_pages
= last_ram_offset() >> TARGET_PAGE_BITS
;
2492 return postcopy_ram_incoming_init(mis
, ram_pages
);
2496 * ram_load_postcopy: load a page in postcopy case
2498 * Returns 0 for success or -errno in case of error
2500 * Called in postcopy mode by ram_load().
2501 * rcu_read_lock is taken prior to this being called.
2503 * @f: QEMUFile where to send the data
2505 static int ram_load_postcopy(QEMUFile
*f
)
2507 int flags
= 0, ret
= 0;
2508 bool place_needed
= false;
2509 bool matching_page_sizes
= false;
2510 MigrationIncomingState
*mis
= migration_incoming_get_current();
2511 /* Temporary page that is later 'placed' */
2512 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
2513 void *last_host
= NULL
;
2514 bool all_zero
= false;
2516 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2519 void *page_buffer
= NULL
;
2520 void *place_source
= NULL
;
2521 RAMBlock
*block
= NULL
;
2524 addr
= qemu_get_be64(f
);
2525 flags
= addr
& ~TARGET_PAGE_MASK
;
2526 addr
&= TARGET_PAGE_MASK
;
2528 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
2529 place_needed
= false;
2530 if (flags
& (RAM_SAVE_FLAG_COMPRESS
| RAM_SAVE_FLAG_PAGE
)) {
2531 block
= ram_block_from_stream(f
, flags
);
2533 host
= host_from_ram_block_offset(block
, addr
);
2535 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2539 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
2541 * Postcopy requires that we place whole host pages atomically;
2542 * these may be huge pages for RAMBlocks that are backed by
2544 * To make it atomic, the data is read into a temporary page
2545 * that's moved into place later.
2546 * The migration protocol uses, possibly smaller, target-pages
2547 * however the source ensures it always sends all the components
2548 * of a host page in order.
2550 page_buffer
= postcopy_host_page
+
2551 ((uintptr_t)host
& (block
->page_size
- 1));
2552 /* If all TP are zero then we can optimise the place */
2553 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
2556 /* not the 1st TP within the HP */
2557 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
2558 error_report("Non-sequential target page %p/%p",
2567 * If it's the last part of a host page then we place the host
2570 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
2571 (block
->page_size
- 1)) == 0;
2572 place_source
= postcopy_host_page
;
2576 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2577 case RAM_SAVE_FLAG_COMPRESS
:
2578 ch
= qemu_get_byte(f
);
2579 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
2585 case RAM_SAVE_FLAG_PAGE
:
2587 if (!place_needed
|| !matching_page_sizes
) {
2588 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
2590 /* Avoids the qemu_file copy during postcopy, which is
2591 * going to do a copy later; can only do it when we
2592 * do this read in one go (matching page sizes)
2594 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
2598 case RAM_SAVE_FLAG_EOS
:
2602 error_report("Unknown combination of migration flags: %#x"
2603 " (postcopy mode)", flags
);
2608 /* This gets called at the last target page in the host page */
2609 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
2612 ret
= postcopy_place_page_zero(mis
, place_dest
,
2615 ret
= postcopy_place_page(mis
, place_dest
,
2616 place_source
, block
->page_size
);
2620 ret
= qemu_file_get_error(f
);
2627 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
2629 int flags
= 0, ret
= 0;
2630 static uint64_t seq_iter
;
2633 * If system is running in postcopy mode, page inserts to host memory must
2636 bool postcopy_running
= postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING
;
2637 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2638 bool postcopy_advised
= postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE
;
2642 if (version_id
!= 4) {
2646 /* This RCU critical section can be very long running.
2647 * When RCU reclaims in the code start to become numerous,
2648 * it will be necessary to reduce the granularity of this
2653 if (postcopy_running
) {
2654 ret
= ram_load_postcopy(f
);
2657 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2658 ram_addr_t addr
, total_ram_bytes
;
2662 addr
= qemu_get_be64(f
);
2663 flags
= addr
& ~TARGET_PAGE_MASK
;
2664 addr
&= TARGET_PAGE_MASK
;
2666 if (flags
& (RAM_SAVE_FLAG_COMPRESS
| RAM_SAVE_FLAG_PAGE
|
2667 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
2668 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
2670 host
= host_from_ram_block_offset(block
, addr
);
2672 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2678 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2679 case RAM_SAVE_FLAG_MEM_SIZE
:
2680 /* Synchronize RAM block list */
2681 total_ram_bytes
= addr
;
2682 while (!ret
&& total_ram_bytes
) {
2687 len
= qemu_get_byte(f
);
2688 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2690 length
= qemu_get_be64(f
);
2692 block
= qemu_ram_block_by_name(id
);
2694 if (length
!= block
->used_length
) {
2695 Error
*local_err
= NULL
;
2697 ret
= qemu_ram_resize(block
, length
,
2700 error_report_err(local_err
);
2703 /* For postcopy we need to check hugepage sizes match */
2704 if (postcopy_advised
&&
2705 block
->page_size
!= qemu_host_page_size
) {
2706 uint64_t remote_page_size
= qemu_get_be64(f
);
2707 if (remote_page_size
!= block
->page_size
) {
2708 error_report("Mismatched RAM page size %s "
2709 "(local) %zd != %" PRId64
,
2710 id
, block
->page_size
,
2715 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
2718 error_report("Unknown ramblock \"%s\", cannot "
2719 "accept migration", id
);
2723 total_ram_bytes
-= length
;
2727 case RAM_SAVE_FLAG_COMPRESS
:
2728 ch
= qemu_get_byte(f
);
2729 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
2732 case RAM_SAVE_FLAG_PAGE
:
2733 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
2736 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
2737 len
= qemu_get_be32(f
);
2738 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
2739 error_report("Invalid compressed data length: %d", len
);
2743 decompress_data_with_multi_threads(f
, host
, len
);
2746 case RAM_SAVE_FLAG_XBZRLE
:
2747 if (load_xbzrle(f
, addr
, host
) < 0) {
2748 error_report("Failed to decompress XBZRLE page at "
2749 RAM_ADDR_FMT
, addr
);
2754 case RAM_SAVE_FLAG_EOS
:
2758 if (flags
& RAM_SAVE_FLAG_HOOK
) {
2759 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
2761 error_report("Unknown combination of migration flags: %#x",
2767 ret
= qemu_file_get_error(f
);
2771 wait_for_decompress_done();
2773 trace_ram_load_complete(ret
, seq_iter
);
2777 static SaveVMHandlers savevm_ram_handlers
= {
2778 .save_live_setup
= ram_save_setup
,
2779 .save_live_iterate
= ram_save_iterate
,
2780 .save_live_complete_postcopy
= ram_save_complete
,
2781 .save_live_complete_precopy
= ram_save_complete
,
2782 .save_live_pending
= ram_save_pending
,
2783 .load_state
= ram_load
,
2784 .cleanup
= ram_migration_cleanup
,
2787 void ram_mig_init(void)
2789 qemu_mutex_init(&XBZRLE
.lock
);
2790 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);