]> git.proxmox.com Git - mirror_qemu.git/blame - migration/dirtyrate.c
bsd-user: spelling fixes: necesary, agrument, undocummented
[mirror_qemu.git] / migration / dirtyrate.c
CommitLineData
4240dcee
CZ
1/*
2 * Dirtyrate implement code
3 *
4 * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
5 *
6 * Authors:
7 * Chuan Zheng <zhengchuan@huawei.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13#include "qemu/osdep.h"
cc37d98b 14#include "qemu/error-report.h"
662770af 15#include <zlib.h>
1e05888a 16#include "hw/core/cpu.h"
4240dcee 17#include "qapi/error.h"
4240dcee 18#include "exec/ramblock.h"
beeda9b7 19#include "exec/target_page.h"
4240dcee 20#include "qemu/rcu_queue.h"
0e21bf24 21#include "qemu/main-loop.h"
4240dcee 22#include "qapi/qapi-commands-migration.h"
3ded54b1 23#include "ram.h"
3c0b5dff 24#include "trace.h"
4240dcee 25#include "dirtyrate.h"
a4a571d9
PX
26#include "monitor/hmp.h"
27#include "monitor/monitor.h"
28#include "qapi/qmp/qdict.h"
0e21bf24
HH
29#include "sysemu/kvm.h"
30#include "sysemu/runstate.h"
31#include "exec/memory.h"
00a3f9c6 32#include "qemu/xxhash.h"
0e21bf24 33
4998a37e
HH
34/*
35 * total_dirty_pages is procted by BQL and is used
36 * to stat dirty pages during the period of two
37 * memory_global_dirty_log_sync
38 */
39uint64_t total_dirty_pages;
40
0e21bf24
HH
41typedef struct DirtyPageRecord {
42 uint64_t start_pages;
43 uint64_t end_pages;
44} DirtyPageRecord;
4240dcee 45
7df3aa30 46static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
c9a58d71 47static struct DirtyRateStat DirtyStat;
0e21bf24
HH
48static DirtyRateMeasureMode dirtyrate_mode =
49 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
7df3aa30 50
8244166d 51static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
eca58224
CZ
52{
53 int64_t current_time;
54
55 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56 if ((current_time - initial_time) >= msec) {
57 msec = current_time - initial_time;
58 } else {
59 g_usleep((msec + initial_time - current_time) * 1000);
3eb82637
AG
60 /* g_usleep may overshoot */
61 msec = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - initial_time;
eca58224
CZ
62 }
63
64 return msec;
65}
66
8244166d
HH
67static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
68 CPUState *cpu, bool start)
69{
70 if (start) {
71 dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
72 } else {
73 dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
74 }
75}
76
77static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
78 int64_t calc_time_ms)
79{
8244166d
HH
80 uint64_t increased_dirty_pages =
81 dirty_pages.end_pages - dirty_pages.start_pages;
82
3eb82637
AG
83 /*
84 * multiply by 1000ms/s _before_ converting down to megabytes
85 * to avoid losing precision
86 */
87 return qemu_target_pages_to_MiB(increased_dirty_pages * 1000) /
88 calc_time_ms;
8244166d
HH
89}
90
91void global_dirty_log_change(unsigned int flag, bool start)
92{
93 qemu_mutex_lock_iothread();
94 if (start) {
95 memory_global_dirty_log_start(flag);
96 } else {
97 memory_global_dirty_log_stop(flag);
98 }
99 qemu_mutex_unlock_iothread();
100}
101
102/*
103 * global_dirty_log_sync
104 * 1. sync dirty log from kvm
105 * 2. stop dirty tracking if needed.
106 */
107static void global_dirty_log_sync(unsigned int flag, bool one_shot)
108{
109 qemu_mutex_lock_iothread();
1e493be5 110 memory_global_dirty_log_sync(false);
8244166d
HH
111 if (one_shot) {
112 memory_global_dirty_log_stop(flag);
113 }
114 qemu_mutex_unlock_iothread();
115}
116
117static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
118{
119 CPUState *cpu;
8244166d
HH
120 int nvcpu = 0;
121
122 CPU_FOREACH(cpu) {
123 nvcpu++;
124 }
125
126 stat->nvcpu = nvcpu;
c5e8d518 127 stat->rates = g_new0(DirtyRateVcpu, nvcpu);
8244166d 128
66997c42 129 return g_new0(DirtyPageRecord, nvcpu);
8244166d
HH
130}
131
132static void vcpu_dirty_stat_collect(VcpuStat *stat,
133 DirtyPageRecord *records,
134 bool start)
135{
136 CPUState *cpu;
137
138 CPU_FOREACH(cpu) {
139 record_dirtypages(records, cpu, start);
140 }
141}
142
143int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
144 VcpuStat *stat,
145 unsigned int flag,
146 bool one_shot)
147{
148 DirtyPageRecord *records;
149 int64_t init_time_ms;
150 int64_t duration;
151 int64_t dirtyrate;
152 int i = 0;
153 unsigned int gen_id;
154
155retry:
156 init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
157
370ed600
JI
158 WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
159 gen_id = cpu_list_generation_id_get();
160 records = vcpu_dirty_stat_alloc(stat);
161 vcpu_dirty_stat_collect(stat, records, true);
162 }
8244166d
HH
163
164 duration = dirty_stat_wait(calc_time_ms, init_time_ms);
165
166 global_dirty_log_sync(flag, one_shot);
167
370ed600
JI
168 WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
169 if (gen_id != cpu_list_generation_id_get()) {
170 g_free(records);
171 g_free(stat->rates);
172 cpu_list_unlock();
173 goto retry;
174 }
175 vcpu_dirty_stat_collect(stat, records, false);
8244166d 176 }
8244166d
HH
177
178 for (i = 0; i < stat->nvcpu; i++) {
179 dirtyrate = do_calculate_dirtyrate(records[i], duration);
180
181 stat->rates[i].id = i;
182 stat->rates[i].dirty_rate = dirtyrate;
183
184 trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
185 }
186
187 g_free(records);
188
189 return duration;
190}
191
34a68001 192static bool is_calc_time_valid(int64_t msec)
eca58224 193{
34a68001 194 if ((msec < MIN_CALC_TIME_MS) || (msec > MAX_CALC_TIME_MS)) {
eca58224
CZ
195 return false;
196 }
197
198 return true;
199}
200
7afa08cd
HH
201static bool is_sample_pages_valid(int64_t pages)
202{
203 return pages >= MIN_SAMPLE_PAGE_COUNT &&
204 pages <= MAX_SAMPLE_PAGE_COUNT;
205}
206
7df3aa30
CZ
207static int dirtyrate_set_state(int *state, int old_state, int new_state)
208{
209 assert(new_state < DIRTY_RATE_STATUS__MAX);
3c0b5dff 210 trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
7df3aa30
CZ
211 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
212 return 0;
213 } else {
214 return -1;
215 }
216}
217
34a68001
AG
218/* Decimal power of given time unit relative to one second */
219static int time_unit_to_power(TimeUnit time_unit)
220{
221 switch (time_unit) {
222 case TIME_UNIT_SECOND:
223 return 0;
224 case TIME_UNIT_MILLISECOND:
225 return -3;
226 default:
227 assert(false); /* unreachable */
228 return 0;
229 }
230}
231
232static int64_t convert_time_unit(int64_t value, TimeUnit unit_from,
233 TimeUnit unit_to)
234{
235 int power = time_unit_to_power(unit_from) -
236 time_unit_to_power(unit_to);
237 while (power < 0) {
238 value /= 10;
239 power += 1;
240 }
241 while (power > 0) {
242 value *= 10;
243 power -= 1;
244 }
245 return value;
246}
247
248
249static struct DirtyRateInfo *
250query_dirty_rate_info(TimeUnit calc_time_unit)
4c437254 251{
0e21bf24 252 int i;
4c437254 253 int64_t dirty_rate = DirtyStat.dirty_rate;
b21e2380 254 struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
0e21bf24 255 DirtyRateVcpuList *head = NULL, **tail = &head;
4c437254
CZ
256
257 info->status = CalculatingState;
258 info->start_time = DirtyStat.start_time;
34a68001
AG
259 info->calc_time = convert_time_unit(DirtyStat.calc_time_ms,
260 TIME_UNIT_MILLISECOND,
261 calc_time_unit);
262 info->calc_time_unit = calc_time_unit;
7afa08cd 263 info->sample_pages = DirtyStat.sample_pages;
0e21bf24
HH
264 info->mode = dirtyrate_mode;
265
266 if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
267 info->has_dirty_rate = true;
268 info->dirty_rate = dirty_rate;
269
270 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
271 /*
272 * set sample_pages with 0 to indicate page sampling
273 * isn't enabled
274 **/
275 info->sample_pages = 0;
276 info->has_vcpu_dirty_rate = true;
277 for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
b21e2380 278 DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
0e21bf24
HH
279 rate->id = DirtyStat.dirty_ring.rates[i].id;
280 rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
281 QAPI_LIST_APPEND(tail, rate);
282 }
283 info->vcpu_dirty_rate = head;
284 }
826b8bc8
HH
285
286 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
287 info->sample_pages = 0;
288 }
0e21bf24 289 }
4c437254 290
3c0b5dff
CZ
291 trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
292
4c437254
CZ
293 return info;
294}
295
320a6ccc 296static void init_dirtyrate_stat(struct DirtyRateConfig config)
c9a58d71 297{
c9a58d71 298 DirtyStat.dirty_rate = -1;
320a6ccc 299 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
34a68001 300 DirtyStat.calc_time_ms = config.calc_time_ms;
71864ead
HH
301 DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
302
303 switch (config.mode) {
304 case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
305 DirtyStat.page_sampling.total_dirty_samples = 0;
306 DirtyStat.page_sampling.total_sample_count = 0;
307 DirtyStat.page_sampling.total_block_mem_MB = 0;
308 break;
309 case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
310 DirtyStat.dirty_ring.nvcpu = -1;
311 DirtyStat.dirty_ring.rates = NULL;
312 break;
313 default:
314 break;
315 }
c9a58d71
CZ
316}
317
0e21bf24
HH
318static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
319{
320 /* last calc-dirty-rate qmp use dirty ring mode */
321 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
322 free(DirtyStat.dirty_ring.rates);
323 DirtyStat.dirty_ring.rates = NULL;
324 }
325}
326
c9a58d71
CZ
327static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
328{
71864ead
HH
329 DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
330 DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
c9a58d71 331 /* size of total pages in MB */
beeda9b7
JQ
332 DirtyStat.page_sampling.total_block_mem_MB +=
333 qemu_target_pages_to_MiB(info->ramblock_pages);
c9a58d71
CZ
334}
335
336static void update_dirtyrate(uint64_t msec)
337{
338 uint64_t dirtyrate;
71864ead
HH
339 uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
340 uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
341 uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
c9a58d71
CZ
342
343 dirtyrate = total_dirty_samples * total_block_mem_MB *
344 1000 / (total_sample_count * msec);
345
346 DirtyStat.dirty_rate = dirtyrate;
347}
7df3aa30 348
00a3f9c6
AG
349/*
350 * Compute hash of a single page of size TARGET_PAGE_SIZE.
351 */
352static uint32_t compute_page_hash(void *ptr)
353{
edd83a70 354 size_t page_size = qemu_target_page_size();
00a3f9c6
AG
355 uint32_t i;
356 uint64_t v1, v2, v3, v4;
357 uint64_t res;
358 const uint64_t *p = ptr;
359
360 v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
361 v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
362 v3 = QEMU_XXHASH_SEED + 0;
363 v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
edd83a70 364 for (i = 0; i < page_size / 8; i += 4) {
00a3f9c6
AG
365 v1 = XXH64_round(v1, p[i + 0]);
366 v2 = XXH64_round(v2, p[i + 1]);
367 v3 = XXH64_round(v3, p[i + 2]);
368 v4 = XXH64_round(v4, p[i + 3]);
369 }
370 res = XXH64_mergerounds(v1, v2, v3, v4);
edd83a70 371 res += page_size;
00a3f9c6
AG
372 res = XXH64_avalanche(res);
373 return (uint32_t)(res & UINT32_MAX);
374}
375
376
ba0e519f
CZ
377/*
378 * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
379 * in ramblock, which starts from ramblock base address.
380 */
381static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
382 uint64_t vfn)
383{
00a3f9c6 384 uint32_t hash;
ba0e519f 385
edd83a70
JQ
386 hash = compute_page_hash(info->ramblock_addr +
387 vfn * qemu_target_page_size());
ba0e519f 388
00a3f9c6
AG
389 trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
390 return hash;
ba0e519f
CZ
391}
392
393static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
394{
395 unsigned int sample_pages_count;
396 int i;
397 GRand *rand;
398
399 sample_pages_count = info->sample_pages_count;
400
401 /* ramblock size less than one page, return success to skip this ramblock */
402 if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
403 return true;
404 }
405
406 info->hash_result = g_try_malloc0_n(sample_pages_count,
407 sizeof(uint32_t));
408 if (!info->hash_result) {
409 return false;
410 }
411
412 info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
413 sizeof(uint64_t));
414 if (!info->sample_page_vfn) {
415 g_free(info->hash_result);
416 return false;
417 }
418
419 rand = g_rand_new();
420 for (i = 0; i < sample_pages_count; i++) {
421 info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
422 info->ramblock_pages - 1);
423 info->hash_result[i] = get_ramblock_vfn_hash(info,
424 info->sample_page_vfn[i]);
425 }
426 g_rand_free(rand);
427
428 return true;
429}
430
431static void get_ramblock_dirty_info(RAMBlock *block,
432 struct RamblockDirtyInfo *info,
433 struct DirtyRateConfig *config)
434{
435 uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
436
437 /* Right shift 30 bits to calc ramblock size in GB */
438 info->sample_pages_count = (qemu_ram_get_used_length(block) *
439 sample_pages_per_gigabytes) >> 30;
440 /* Right shift TARGET_PAGE_BITS to calc page count */
441 info->ramblock_pages = qemu_ram_get_used_length(block) >>
148b1ad8 442 qemu_target_page_bits();
ba0e519f
CZ
443 info->ramblock_addr = qemu_ram_get_host_addr(block);
444 strcpy(info->idstr, qemu_ram_get_idstr(block));
445}
446
cf0bbb49
CZ
447static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
448{
449 int i;
450
451 if (!infos) {
452 return;
453 }
454
455 for (i = 0; i < count; i++) {
456 g_free(infos[i].sample_page_vfn);
457 g_free(infos[i].hash_result);
458 }
459 g_free(infos);
460}
461
f82583cd
CZ
462static bool skip_sample_ramblock(RAMBlock *block)
463{
464 /*
465 * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
466 */
467 if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
3c0b5dff
CZ
468 trace_skip_sample_ramblock(block->idstr,
469 qemu_ram_get_used_length(block));
f82583cd
CZ
470 return true;
471 }
472
473 return false;
474}
475
ba0e519f
CZ
476static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
477 struct DirtyRateConfig config,
478 int *block_count)
479{
480 struct RamblockDirtyInfo *info = NULL;
481 struct RamblockDirtyInfo *dinfo = NULL;
482 RAMBlock *block = NULL;
483 int total_count = 0;
484 int index = 0;
485 bool ret = false;
486
487 RAMBLOCK_FOREACH_MIGRATABLE(block) {
f82583cd
CZ
488 if (skip_sample_ramblock(block)) {
489 continue;
490 }
ba0e519f
CZ
491 total_count++;
492 }
493
494 dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
495 if (dinfo == NULL) {
496 goto out;
497 }
498
499 RAMBLOCK_FOREACH_MIGRATABLE(block) {
f82583cd
CZ
500 if (skip_sample_ramblock(block)) {
501 continue;
502 }
ba0e519f
CZ
503 if (index >= total_count) {
504 break;
505 }
506 info = &dinfo[index];
507 get_ramblock_dirty_info(block, info, &config);
508 if (!save_ramblock_hash(info)) {
509 goto out;
510 }
511 index++;
512 }
513 ret = true;
514
515out:
516 *block_count = index;
517 *block_dinfo = dinfo;
518 return ret;
519}
520
9c04387b
CZ
521static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
522{
00a3f9c6 523 uint32_t hash;
9c04387b
CZ
524 int i;
525
526 for (i = 0; i < info->sample_pages_count; i++) {
00a3f9c6
AG
527 hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
528 if (hash != info->hash_result[i]) {
529 trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
9c04387b
CZ
530 info->sample_dirty_count++;
531 }
532 }
533}
534
535static struct RamblockDirtyInfo *
536find_block_matched(RAMBlock *block, int count,
537 struct RamblockDirtyInfo *infos)
538{
539 int i;
9c04387b
CZ
540
541 for (i = 0; i < count; i++) {
542 if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
543 break;
544 }
545 }
546
547 if (i == count) {
548 return NULL;
549 }
550
551 if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
552 infos[i].ramblock_pages !=
148b1ad8 553 (qemu_ram_get_used_length(block) >> qemu_target_page_bits())) {
3c0b5dff 554 trace_find_page_matched(block->idstr);
9c04387b
CZ
555 return NULL;
556 }
557
66997c42 558 return &infos[i];
9c04387b
CZ
559}
560
561static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
562 int block_count)
563{
564 struct RamblockDirtyInfo *block_dinfo = NULL;
565 RAMBlock *block = NULL;
566
567 RAMBLOCK_FOREACH_MIGRATABLE(block) {
f82583cd
CZ
568 if (skip_sample_ramblock(block)) {
569 continue;
570 }
9c04387b
CZ
571 block_dinfo = find_block_matched(block, block_count, info);
572 if (block_dinfo == NULL) {
573 continue;
574 }
575 calc_page_dirty_rate(block_dinfo);
576 update_dirtyrate_stat(block_dinfo);
577 }
578
71864ead 579 if (DirtyStat.page_sampling.total_sample_count == 0) {
9c04387b
CZ
580 return false;
581 }
582
583 return true;
584}
585
826b8bc8
HH
586static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
587 bool start)
588{
589 if (start) {
590 dirty_pages->start_pages = total_dirty_pages;
591 } else {
592 dirty_pages->end_pages = total_dirty_pages;
593 }
594}
595
826b8bc8
HH
596static inline void dirtyrate_manual_reset_protect(void)
597{
598 RAMBlock *block = NULL;
599
600 WITH_RCU_READ_LOCK_GUARD() {
601 RAMBLOCK_FOREACH_MIGRATABLE(block) {
602 memory_region_clear_dirty_bitmap(block->mr, 0,
603 block->used_length);
604 }
605 }
606}
607
608static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
609{
826b8bc8
HH
610 int64_t start_time;
611 DirtyPageRecord dirty_pages;
612
613 qemu_mutex_lock_iothread();
614 memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
615
616 /*
617 * 1'round of log sync may return all 1 bits with
618 * KVM_DIRTY_LOG_INITIALLY_SET enable
619 * skip it unconditionally and start dirty tracking
620 * from 2'round of log sync
621 */
1e493be5 622 memory_global_dirty_log_sync(false);
826b8bc8
HH
623
624 /*
625 * reset page protect manually and unconditionally.
626 * this make sure kvm dirty log be cleared if
627 * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
628 */
629 dirtyrate_manual_reset_protect();
630 qemu_mutex_unlock_iothread();
631
632 record_dirtypages_bitmap(&dirty_pages, true);
633
634 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
320a6ccc 635 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
826b8bc8 636
34a68001 637 DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms, start_time);
826b8bc8
HH
638
639 /*
8244166d 640 * do two things.
826b8bc8
HH
641 * 1. fetch dirty bitmap from kvm
642 * 2. stop dirty tracking
643 */
8244166d 644 global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
826b8bc8
HH
645
646 record_dirtypages_bitmap(&dirty_pages, false);
647
34a68001
AG
648 DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages,
649 DirtyStat.calc_time_ms);
826b8bc8
HH
650}
651
0e21bf24
HH
652static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
653{
0e21bf24
HH
654 uint64_t dirtyrate = 0;
655 uint64_t dirtyrate_sum = 0;
0e21bf24
HH
656 int i = 0;
657
8244166d
HH
658 /* start log sync */
659 global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
0e21bf24 660
320a6ccc 661 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
0e21bf24 662
8244166d 663 /* calculate vcpu dirtyrate */
34a68001
AG
664 DirtyStat.calc_time_ms = vcpu_calculate_dirtyrate(config.calc_time_ms,
665 &DirtyStat.dirty_ring,
666 GLOBAL_DIRTY_DIRTY_RATE,
667 true);
0e21bf24 668
8244166d 669 /* calculate vm dirtyrate */
0e21bf24 670 for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
8244166d 671 dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
0e21bf24
HH
672 DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
673 dirtyrate_sum += dirtyrate;
674 }
675
676 DirtyStat.dirty_rate = dirtyrate_sum;
0e21bf24
HH
677}
678
679static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
4240dcee 680{
cf0bbb49
CZ
681 struct RamblockDirtyInfo *block_dinfo = NULL;
682 int block_count = 0;
cf0bbb49
CZ
683 int64_t initial_time;
684
cf0bbb49
CZ
685 rcu_read_lock();
686 initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
320a6ccc 687 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
cf0bbb49
CZ
688 if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
689 goto out;
690 }
691 rcu_read_unlock();
692
34a68001
AG
693 DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms,
694 initial_time);
cf0bbb49
CZ
695
696 rcu_read_lock();
697 if (!compare_page_hash_info(block_dinfo, block_count)) {
698 goto out;
699 }
700
34a68001 701 update_dirtyrate(DirtyStat.calc_time_ms);
cf0bbb49
CZ
702
703out:
704 rcu_read_unlock();
705 free_ramblock_dirty_info(block_dinfo, block_count);
4240dcee
CZ
706}
707
0e21bf24
HH
708static void calculate_dirtyrate(struct DirtyRateConfig config)
709{
826b8bc8
HH
710 if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
711 calculate_dirtyrate_dirty_bitmap(config);
712 } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
0e21bf24
HH
713 calculate_dirtyrate_dirty_ring(config);
714 } else {
715 calculate_dirtyrate_sample_vm(config);
716 }
717
718 trace_dirtyrate_calculate(DirtyStat.dirty_rate);
719}
720
4240dcee
CZ
721void *get_dirtyrate_thread(void *arg)
722{
723 struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
7df3aa30 724 int ret;
15eb2d64 725 rcu_register_thread();
7df3aa30
CZ
726
727 ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
728 DIRTY_RATE_STATUS_MEASURING);
729 if (ret == -1) {
730 error_report("change dirtyrate state failed.");
731 return NULL;
732 }
4240dcee
CZ
733
734 calculate_dirtyrate(config);
735
7df3aa30
CZ
736 ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
737 DIRTY_RATE_STATUS_MEASURED);
738 if (ret == -1) {
739 error_report("change dirtyrate state failed.");
740 }
15eb2d64
HH
741
742 rcu_unregister_thread();
4240dcee
CZ
743 return NULL;
744}
4c437254 745
0e21bf24 746void qmp_calc_dirty_rate(int64_t calc_time,
34a68001
AG
747 bool has_calc_time_unit,
748 TimeUnit calc_time_unit,
0e21bf24
HH
749 bool has_sample_pages,
750 int64_t sample_pages,
751 bool has_mode,
752 DirtyRateMeasureMode mode,
753 Error **errp)
4c437254
CZ
754{
755 static struct DirtyRateConfig config;
756 QemuThread thread;
757 int ret;
758
759 /*
760 * If the dirty rate is already being measured, don't attempt to start.
761 */
762 if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
763 error_setg(errp, "the dirty rate is already being measured.");
764 return;
765 }
766
34a68001
AG
767 int64_t calc_time_ms = convert_time_unit(
768 calc_time,
769 has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND,
770 TIME_UNIT_MILLISECOND
771 );
772
773 if (!is_calc_time_valid(calc_time_ms)) {
774 error_setg(errp, "Calculation time is out of range [%dms, %dms].",
775 MIN_CALC_TIME_MS, MAX_CALC_TIME_MS);
4c437254
CZ
776 return;
777 }
778
0e21bf24
HH
779 if (!has_mode) {
780 mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
781 }
782
bd9510d3
ZD
783 if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
784 error_setg(errp, "sample-pages is used only in page-sampling mode");
0e21bf24
HH
785 return;
786 }
787
7afa08cd
HH
788 if (has_sample_pages) {
789 if (!is_sample_pages_valid(sample_pages)) {
790 error_setg(errp, "sample-pages is out of range[%d, %d].",
791 MIN_SAMPLE_PAGE_COUNT,
792 MAX_SAMPLE_PAGE_COUNT);
793 return;
794 }
795 } else {
796 sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
797 }
798
0e21bf24
HH
799 /*
800 * dirty ring mode only works when kvm dirty ring is enabled.
826b8bc8 801 * on the contrary, dirty bitmap mode is not.
0e21bf24 802 */
826b8bc8
HH
803 if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
804 !kvm_dirty_ring_enabled()) ||
805 ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
806 kvm_dirty_ring_enabled())) {
807 error_setg(errp, "mode %s is not enabled, use other method instead.",
808 DirtyRateMeasureMode_str(mode));
809 return;
0e21bf24
HH
810 }
811
4c437254
CZ
812 /*
813 * Init calculation state as unstarted.
814 */
815 ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
816 DIRTY_RATE_STATUS_UNSTARTED);
817 if (ret == -1) {
818 error_setg(errp, "init dirty rate calculation state failed.");
819 return;
820 }
821
34a68001 822 config.calc_time_ms = calc_time_ms;
7afa08cd 823 config.sample_pages_per_gigabytes = sample_pages;
0e21bf24
HH
824 config.mode = mode;
825
826 cleanup_dirtyrate_stat(config);
827
828 /*
829 * update dirty rate mode so that we can figure out what mode has
830 * been used in last calculation
831 **/
832 dirtyrate_mode = mode;
9865d0f6 833
320a6ccc 834 init_dirtyrate_stat(config);
9865d0f6 835
4c437254
CZ
836 qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
837 (void *)&config, QEMU_THREAD_DETACHED);
838}
839
34a68001
AG
840
841struct DirtyRateInfo *qmp_query_dirty_rate(bool has_calc_time_unit,
842 TimeUnit calc_time_unit,
843 Error **errp)
4c437254 844{
34a68001
AG
845 return query_dirty_rate_info(
846 has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND);
4c437254 847}
a4a571d9
PX
848
849void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
850{
34a68001 851 DirtyRateInfo *info = query_dirty_rate_info(TIME_UNIT_SECOND);
a4a571d9
PX
852
853 monitor_printf(mon, "Status: %s\n",
854 DirtyRateStatus_str(info->status));
855 monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
856 info->start_time);
bd9510d3
ZD
857 if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
858 monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
859 info->sample_pages);
860 }
a4a571d9
PX
861 monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
862 info->calc_time);
0e21bf24
HH
863 monitor_printf(mon, "Mode: %s\n",
864 DirtyRateMeasureMode_str(info->mode));
a4a571d9
PX
865 monitor_printf(mon, "Dirty rate: ");
866 if (info->has_dirty_rate) {
867 monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
0e21bf24
HH
868 if (info->has_vcpu_dirty_rate) {
869 DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
870 for (rate = head; rate != NULL; rate = rate->next) {
871 monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
872 " (MB/s)\n", rate->value->id,
873 rate->value->dirty_rate);
874 }
875 }
a4a571d9
PX
876 } else {
877 monitor_printf(mon, "(not ready)\n");
878 }
0e21bf24
HH
879
880 qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
a4a571d9
PX
881 g_free(info);
882}
883
884void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
885{
886 int64_t sec = qdict_get_try_int(qdict, "second", 0);
887 int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
888 bool has_sample_pages = (sample_pages != -1);
0e21bf24 889 bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
826b8bc8
HH
890 bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
891 DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
a4a571d9
PX
892 Error *err = NULL;
893
894 if (!sec) {
895 monitor_printf(mon, "Incorrect period length specified!\n");
896 return;
897 }
898
826b8bc8
HH
899 if (dirty_ring && dirty_bitmap) {
900 monitor_printf(mon, "Either dirty ring or dirty bitmap "
901 "can be specified!\n");
902 return;
903 }
904
905 if (dirty_bitmap) {
906 mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
907 } else if (dirty_ring) {
908 mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
909 }
910
34a68001
AG
911 qmp_calc_dirty_rate(sec, /* calc-time */
912 false, TIME_UNIT_SECOND, /* calc-time-unit */
913 has_sample_pages, sample_pages,
914 true, mode,
915 &err);
a4a571d9
PX
916 if (err) {
917 hmp_handle_error(mon, err);
918 return;
919 }
920
921 monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
922 " seconds\n", sec);
923 monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
924}