]> git.proxmox.com Git - mirror_qemu.git/blame - softmmu/dirtylimit.c
migration: Introduce dirty-limit capability
[mirror_qemu.git] / softmmu / dirtylimit.c
CommitLineData
cc2b33ea
HH
1/*
2 * Dirty page rate limit implementation code
3 *
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5 *
6 * Authors:
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13#include "qemu/osdep.h"
cc2b33ea
HH
14#include "qemu/main-loop.h"
15#include "qapi/qapi-commands-migration.h"
f3b2e38c
HH
16#include "qapi/qmp/qdict.h"
17#include "qapi/error.h"
cc2b33ea
HH
18#include "sysemu/dirtyrate.h"
19#include "sysemu/dirtylimit.h"
f3b2e38c
HH
20#include "monitor/hmp.h"
21#include "monitor/monitor.h"
cc2b33ea 22#include "exec/memory.h"
30ee29fd 23#include "exec/target_page.h"
cc2b33ea 24#include "hw/boards.h"
baa60983
HH
25#include "sysemu/kvm.h"
26#include "trace.h"
dc623955
HH
27#include "migration/misc.h"
28#include "migration/migration.h"
29#include "migration/options.h"
baa60983
HH
30
31/*
32 * Dirtylimit stop working if dirty page rate error
33 * value less than DIRTYLIMIT_TOLERANCE_RANGE
34 */
35#define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
36/*
37 * Plus or minus vcpu sleep time linearly if dirty
38 * page rate error value percentage over
39 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
40 * Otherwise, plus or minus a fixed vcpu sleep time.
41 */
42#define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
43/*
44 * Max vcpu sleep time percentage during a cycle
45 * composed of dirty ring full and sleep time.
46 */
47#define DIRTYLIMIT_THROTTLE_PCT_MAX 99
cc2b33ea
HH
48
49struct {
50 VcpuStat stat;
51 bool running;
52 QemuThread thread;
53} *vcpu_dirty_rate_stat;
54
baa60983
HH
55typedef struct VcpuDirtyLimitState {
56 int cpu_index;
57 bool enabled;
58 /*
59 * Quota dirty page rate, unit is MB/s
60 * zero if not enabled.
61 */
62 uint64_t quota;
63} VcpuDirtyLimitState;
64
65struct {
66 VcpuDirtyLimitState *states;
67 /* Max cpus number configured by user */
68 int max_cpus;
69 /* Number of vcpu under dirtylimit */
70 int limited_nvcpu;
71} *dirtylimit_state;
72
73/* protect dirtylimit_state */
74static QemuMutex dirtylimit_mutex;
75
76/* dirtylimit thread quit if dirtylimit_quit is true */
77static bool dirtylimit_quit;
78
cc2b33ea
HH
79static void vcpu_dirty_rate_stat_collect(void)
80{
dc623955 81 MigrationState *s = migrate_get_current();
cc2b33ea
HH
82 VcpuStat stat;
83 int i = 0;
dc623955
HH
84 int64_t period = DIRTYLIMIT_CALC_TIME_MS;
85
86 if (migrate_dirty_limit() &&
87 migration_is_active(s)) {
88 period = s->parameters.x_vcpu_dirty_limit_period;
89 }
cc2b33ea
HH
90
91 /* calculate vcpu dirtyrate */
dc623955
HH
92 vcpu_calculate_dirtyrate(period,
93 &stat,
94 GLOBAL_DIRTY_LIMIT,
95 false);
cc2b33ea
HH
96
97 for (i = 0; i < stat.nvcpu; i++) {
98 vcpu_dirty_rate_stat->stat.rates[i].id = i;
99 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
100 stat.rates[i].dirty_rate;
101 }
102
103 free(stat.rates);
104}
105
106static void *vcpu_dirty_rate_stat_thread(void *opaque)
107{
108 rcu_register_thread();
109
110 /* start log sync */
111 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
112
113 while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
114 vcpu_dirty_rate_stat_collect();
baa60983
HH
115 if (dirtylimit_in_service()) {
116 dirtylimit_process();
117 }
cc2b33ea
HH
118 }
119
120 /* stop log sync */
121 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
122
123 rcu_unregister_thread();
124 return NULL;
125}
126
127int64_t vcpu_dirty_rate_get(int cpu_index)
128{
129 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
130 return qatomic_read_i64(&rates[cpu_index].dirty_rate);
131}
132
133void vcpu_dirty_rate_stat_start(void)
134{
135 if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
136 return;
137 }
138
139 qatomic_set(&vcpu_dirty_rate_stat->running, 1);
140 qemu_thread_create(&vcpu_dirty_rate_stat->thread,
141 "dirtyrate-stat",
142 vcpu_dirty_rate_stat_thread,
143 NULL,
144 QEMU_THREAD_JOINABLE);
145}
146
147void vcpu_dirty_rate_stat_stop(void)
148{
149 qatomic_set(&vcpu_dirty_rate_stat->running, 0);
baa60983 150 dirtylimit_state_unlock();
cc2b33ea
HH
151 qemu_mutex_unlock_iothread();
152 qemu_thread_join(&vcpu_dirty_rate_stat->thread);
153 qemu_mutex_lock_iothread();
baa60983 154 dirtylimit_state_lock();
cc2b33ea
HH
155}
156
157void vcpu_dirty_rate_stat_initialize(void)
158{
159 MachineState *ms = MACHINE(qdev_get_machine());
160 int max_cpus = ms->smp.max_cpus;
161
162 vcpu_dirty_rate_stat =
163 g_malloc0(sizeof(*vcpu_dirty_rate_stat));
164
165 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
166 vcpu_dirty_rate_stat->stat.rates =
c5e8d518 167 g_new0(DirtyRateVcpu, max_cpus);
cc2b33ea
HH
168
169 vcpu_dirty_rate_stat->running = false;
170}
171
172void vcpu_dirty_rate_stat_finalize(void)
173{
174 free(vcpu_dirty_rate_stat->stat.rates);
175 vcpu_dirty_rate_stat->stat.rates = NULL;
176
177 free(vcpu_dirty_rate_stat);
178 vcpu_dirty_rate_stat = NULL;
179}
baa60983
HH
180
181void dirtylimit_state_lock(void)
182{
183 qemu_mutex_lock(&dirtylimit_mutex);
184}
185
186void dirtylimit_state_unlock(void)
187{
188 qemu_mutex_unlock(&dirtylimit_mutex);
189}
190
191static void
192__attribute__((__constructor__)) dirtylimit_mutex_init(void)
193{
194 qemu_mutex_init(&dirtylimit_mutex);
195}
196
197static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
198{
199 return &dirtylimit_state->states[cpu_index];
200}
201
202void dirtylimit_state_initialize(void)
203{
204 MachineState *ms = MACHINE(qdev_get_machine());
205 int max_cpus = ms->smp.max_cpus;
206 int i;
207
208 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
209
210 dirtylimit_state->states =
c5e8d518 211 g_new0(VcpuDirtyLimitState, max_cpus);
baa60983
HH
212
213 for (i = 0; i < max_cpus; i++) {
214 dirtylimit_state->states[i].cpu_index = i;
215 }
216
217 dirtylimit_state->max_cpus = max_cpus;
218 trace_dirtylimit_state_initialize(max_cpus);
219}
220
221void dirtylimit_state_finalize(void)
222{
223 free(dirtylimit_state->states);
224 dirtylimit_state->states = NULL;
225
226 free(dirtylimit_state);
227 dirtylimit_state = NULL;
228
229 trace_dirtylimit_state_finalize();
230}
231
232bool dirtylimit_in_service(void)
233{
234 return !!dirtylimit_state;
235}
236
237bool dirtylimit_vcpu_index_valid(int cpu_index)
238{
239 MachineState *ms = MACHINE(qdev_get_machine());
240
241 return !(cpu_index < 0 ||
242 cpu_index >= ms->smp.max_cpus);
243}
244
6a6447fe 245static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
baa60983
HH
246{
247 static uint64_t max_dirtyrate;
beeda9b7 248 uint64_t dirty_ring_size_MiB;
6a6447fe 249
beeda9b7 250 dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
baa60983
HH
251
252 if (max_dirtyrate < dirtyrate) {
253 max_dirtyrate = dirtyrate;
254 }
255
beeda9b7 256 return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
baa60983
HH
257}
258
259static inline bool dirtylimit_done(uint64_t quota,
260 uint64_t current)
261{
262 uint64_t min, max;
263
264 min = MIN(quota, current);
265 max = MAX(quota, current);
266
267 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
268}
269
270static inline bool
271dirtylimit_need_linear_adjustment(uint64_t quota,
272 uint64_t current)
273{
274 uint64_t min, max;
275
276 min = MIN(quota, current);
277 max = MAX(quota, current);
278
279 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
280}
281
282static void dirtylimit_set_throttle(CPUState *cpu,
283 uint64_t quota,
284 uint64_t current)
285{
286 int64_t ring_full_time_us = 0;
287 uint64_t sleep_pct = 0;
288 uint64_t throttle_us = 0;
289
290 if (current == 0) {
291 cpu->throttle_us_per_full = 0;
292 return;
293 }
294
295 ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
296
297 if (dirtylimit_need_linear_adjustment(quota, current)) {
298 if (quota < current) {
299 sleep_pct = (current - quota) * 100 / current;
300 throttle_us =
301 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
302 cpu->throttle_us_per_full += throttle_us;
303 } else {
304 sleep_pct = (quota - current) * 100 / quota;
305 throttle_us =
306 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
307 cpu->throttle_us_per_full -= throttle_us;
308 }
309
310 trace_dirtylimit_throttle_pct(cpu->cpu_index,
311 sleep_pct,
312 throttle_us);
313 } else {
314 if (quota < current) {
315 cpu->throttle_us_per_full += ring_full_time_us / 10;
316 } else {
317 cpu->throttle_us_per_full -= ring_full_time_us / 10;
318 }
319 }
320
321 /*
322 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
323 * current dirty page rate may never reach the quota, we should stop
324 * increasing sleep time?
325 */
326 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
327 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
328
329 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
330}
331
332static void dirtylimit_adjust_throttle(CPUState *cpu)
333{
334 uint64_t quota = 0;
335 uint64_t current = 0;
336 int cpu_index = cpu->cpu_index;
337
338 quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
339 current = vcpu_dirty_rate_get(cpu_index);
340
341 if (!dirtylimit_done(quota, current)) {
342 dirtylimit_set_throttle(cpu, quota, current);
343 }
344
345 return;
346}
347
348void dirtylimit_process(void)
349{
350 CPUState *cpu;
351
352 if (!qatomic_read(&dirtylimit_quit)) {
353 dirtylimit_state_lock();
354
355 if (!dirtylimit_in_service()) {
356 dirtylimit_state_unlock();
357 return;
358 }
359
360 CPU_FOREACH(cpu) {
361 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
362 continue;
363 }
364 dirtylimit_adjust_throttle(cpu);
365 }
366 dirtylimit_state_unlock();
367 }
368}
369
370void dirtylimit_change(bool start)
371{
372 if (start) {
373 qatomic_set(&dirtylimit_quit, 0);
374 } else {
375 qatomic_set(&dirtylimit_quit, 1);
376 }
377}
378
379void dirtylimit_set_vcpu(int cpu_index,
380 uint64_t quota,
381 bool enable)
382{
383 trace_dirtylimit_set_vcpu(cpu_index, quota);
384
385 if (enable) {
386 dirtylimit_state->states[cpu_index].quota = quota;
387 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
388 dirtylimit_state->limited_nvcpu++;
389 }
390 } else {
391 dirtylimit_state->states[cpu_index].quota = 0;
392 if (dirtylimit_state->states[cpu_index].enabled) {
393 dirtylimit_state->limited_nvcpu--;
394 }
395 }
396
397 dirtylimit_state->states[cpu_index].enabled = enable;
398}
399
400void dirtylimit_set_all(uint64_t quota,
401 bool enable)
402{
403 MachineState *ms = MACHINE(qdev_get_machine());
404 int max_cpus = ms->smp.max_cpus;
405 int i;
406
407 for (i = 0; i < max_cpus; i++) {
408 dirtylimit_set_vcpu(i, quota, enable);
409 }
410}
411
412void dirtylimit_vcpu_execute(CPUState *cpu)
413{
414 if (dirtylimit_in_service() &&
415 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
416 cpu->throttle_us_per_full) {
417 trace_dirtylimit_vcpu_execute(cpu->cpu_index,
418 cpu->throttle_us_per_full);
419 usleep(cpu->throttle_us_per_full);
420 }
421}
f3b2e38c
HH
422
423static void dirtylimit_init(void)
424{
425 dirtylimit_state_initialize();
426 dirtylimit_change(true);
427 vcpu_dirty_rate_stat_initialize();
428 vcpu_dirty_rate_stat_start();
429}
430
431static void dirtylimit_cleanup(void)
432{
433 vcpu_dirty_rate_stat_stop();
434 vcpu_dirty_rate_stat_finalize();
435 dirtylimit_change(false);
436 dirtylimit_state_finalize();
437}
438
439void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
440 int64_t cpu_index,
441 Error **errp)
442{
443 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
444 return;
445 }
446
447 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
448 error_setg(errp, "incorrect cpu index specified");
449 return;
450 }
451
452 if (!dirtylimit_in_service()) {
453 return;
454 }
455
456 dirtylimit_state_lock();
457
458 if (has_cpu_index) {
459 dirtylimit_set_vcpu(cpu_index, 0, false);
460 } else {
461 dirtylimit_set_all(0, false);
462 }
463
464 if (!dirtylimit_state->limited_nvcpu) {
465 dirtylimit_cleanup();
466 }
467
468 dirtylimit_state_unlock();
469}
470
471void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
472{
473 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
474 Error *err = NULL;
475
476 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
477 if (err) {
478 hmp_handle_error(mon, err);
479 return;
480 }
481
482 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
483 "dirty limit for virtual CPU]\n");
484}
485
486void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
487 int64_t cpu_index,
488 uint64_t dirty_rate,
489 Error **errp)
490{
491 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
492 error_setg(errp, "dirty page limit feature requires KVM with"
493 " accelerator property 'dirty-ring-size' set'");
494 return;
495 }
496
497 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
498 error_setg(errp, "incorrect cpu index specified");
499 return;
500 }
501
502 if (!dirty_rate) {
503 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
504 return;
505 }
506
507 dirtylimit_state_lock();
508
509 if (!dirtylimit_in_service()) {
510 dirtylimit_init();
511 }
512
513 if (has_cpu_index) {
514 dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
515 } else {
516 dirtylimit_set_all(dirty_rate, true);
517 }
518
519 dirtylimit_state_unlock();
520}
521
522void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
523{
524 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
525 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
526 Error *err = NULL;
527
140e5a76
HH
528 if (dirty_rate < 0) {
529 error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
530 goto out;
f3b2e38c
HH
531 }
532
140e5a76
HH
533 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
534
535out:
536 hmp_handle_error(mon, err);
f3b2e38c
HH
537}
538
539static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
540{
541 DirtyLimitInfo *info = NULL;
542
543 info = g_malloc0(sizeof(*info));
544 info->cpu_index = cpu_index;
545 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
546 info->current_rate = vcpu_dirty_rate_get(cpu_index);
547
548 return info;
549}
550
551static struct DirtyLimitInfoList *dirtylimit_query_all(void)
552{
553 int i, index;
554 DirtyLimitInfo *info = NULL;
555 DirtyLimitInfoList *head = NULL, **tail = &head;
556
557 dirtylimit_state_lock();
558
559 if (!dirtylimit_in_service()) {
560 dirtylimit_state_unlock();
561 return NULL;
562 }
563
564 for (i = 0; i < dirtylimit_state->max_cpus; i++) {
565 index = dirtylimit_state->states[i].cpu_index;
566 if (dirtylimit_vcpu_get_state(index)->enabled) {
567 info = dirtylimit_query_vcpu(index);
568 QAPI_LIST_APPEND(tail, info);
569 }
570 }
571
572 dirtylimit_state_unlock();
573
574 return head;
575}
576
577struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
578{
579 if (!dirtylimit_in_service()) {
580 return NULL;
581 }
582
583 return dirtylimit_query_all();
584}
585
586void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
587{
588 DirtyLimitInfoList *limit, *head, *info = NULL;
589 Error *err = NULL;
590
591 if (!dirtylimit_in_service()) {
592 monitor_printf(mon, "Dirty page limit not enabled!\n");
593 return;
594 }
595
596 info = qmp_query_vcpu_dirty_limit(&err);
597 if (err) {
598 hmp_handle_error(mon, err);
599 return;
600 }
601
602 head = info;
603 for (limit = head; limit != NULL; limit = limit->next) {
604 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
605 " current rate %"PRIi64 " (MB/s)\n",
606 limit->value->cpu_index,
607 limit->value->limit_rate,
608 limit->value->current_rate);
609 }
610
611 g_free(info);
612}