2 * Dirty page rate limit implementation code
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "qapi/error.h"
15 #include "qemu/main-loop.h"
16 #include "qapi/qapi-commands-migration.h"
17 #include "sysemu/dirtyrate.h"
18 #include "sysemu/dirtylimit.h"
19 #include "exec/memory.h"
20 #include "hw/boards.h"
21 #include "sysemu/kvm.h"
25 * Dirtylimit stop working if dirty page rate error
26 * value less than DIRTYLIMIT_TOLERANCE_RANGE
28 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
30 * Plus or minus vcpu sleep time linearly if dirty
31 * page rate error value percentage over
32 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
33 * Otherwise, plus or minus a fixed vcpu sleep time.
35 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
37 * Max vcpu sleep time percentage during a cycle
38 * composed of dirty ring full and sleep time.
40 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
46 } *vcpu_dirty_rate_stat
;
48 typedef struct VcpuDirtyLimitState
{
52 * Quota dirty page rate, unit is MB/s
53 * zero if not enabled.
56 } VcpuDirtyLimitState
;
59 VcpuDirtyLimitState
*states
;
60 /* Max cpus number configured by user */
62 /* Number of vcpu under dirtylimit */
66 /* protect dirtylimit_state */
67 static QemuMutex dirtylimit_mutex
;
69 /* dirtylimit thread quit if dirtylimit_quit is true */
70 static bool dirtylimit_quit
;
72 static void vcpu_dirty_rate_stat_collect(void)
77 /* calculate vcpu dirtyrate */
78 vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS
,
83 for (i
= 0; i
< stat
.nvcpu
; i
++) {
84 vcpu_dirty_rate_stat
->stat
.rates
[i
].id
= i
;
85 vcpu_dirty_rate_stat
->stat
.rates
[i
].dirty_rate
=
86 stat
.rates
[i
].dirty_rate
;
92 static void *vcpu_dirty_rate_stat_thread(void *opaque
)
94 rcu_register_thread();
97 global_dirty_log_change(GLOBAL_DIRTY_LIMIT
, true);
99 while (qatomic_read(&vcpu_dirty_rate_stat
->running
)) {
100 vcpu_dirty_rate_stat_collect();
101 if (dirtylimit_in_service()) {
102 dirtylimit_process();
107 global_dirty_log_change(GLOBAL_DIRTY_LIMIT
, false);
109 rcu_unregister_thread();
113 int64_t vcpu_dirty_rate_get(int cpu_index
)
115 DirtyRateVcpu
*rates
= vcpu_dirty_rate_stat
->stat
.rates
;
116 return qatomic_read_i64(&rates
[cpu_index
].dirty_rate
);
119 void vcpu_dirty_rate_stat_start(void)
121 if (qatomic_read(&vcpu_dirty_rate_stat
->running
)) {
125 qatomic_set(&vcpu_dirty_rate_stat
->running
, 1);
126 qemu_thread_create(&vcpu_dirty_rate_stat
->thread
,
128 vcpu_dirty_rate_stat_thread
,
130 QEMU_THREAD_JOINABLE
);
133 void vcpu_dirty_rate_stat_stop(void)
135 qatomic_set(&vcpu_dirty_rate_stat
->running
, 0);
136 dirtylimit_state_unlock();
137 qemu_mutex_unlock_iothread();
138 qemu_thread_join(&vcpu_dirty_rate_stat
->thread
);
139 qemu_mutex_lock_iothread();
140 dirtylimit_state_lock();
143 void vcpu_dirty_rate_stat_initialize(void)
145 MachineState
*ms
= MACHINE(qdev_get_machine());
146 int max_cpus
= ms
->smp
.max_cpus
;
148 vcpu_dirty_rate_stat
=
149 g_malloc0(sizeof(*vcpu_dirty_rate_stat
));
151 vcpu_dirty_rate_stat
->stat
.nvcpu
= max_cpus
;
152 vcpu_dirty_rate_stat
->stat
.rates
=
153 g_malloc0(sizeof(DirtyRateVcpu
) * max_cpus
);
155 vcpu_dirty_rate_stat
->running
= false;
158 void vcpu_dirty_rate_stat_finalize(void)
160 free(vcpu_dirty_rate_stat
->stat
.rates
);
161 vcpu_dirty_rate_stat
->stat
.rates
= NULL
;
163 free(vcpu_dirty_rate_stat
);
164 vcpu_dirty_rate_stat
= NULL
;
167 void dirtylimit_state_lock(void)
169 qemu_mutex_lock(&dirtylimit_mutex
);
172 void dirtylimit_state_unlock(void)
174 qemu_mutex_unlock(&dirtylimit_mutex
);
178 __attribute__((__constructor__
)) dirtylimit_mutex_init(void)
180 qemu_mutex_init(&dirtylimit_mutex
);
183 static inline VcpuDirtyLimitState
*dirtylimit_vcpu_get_state(int cpu_index
)
185 return &dirtylimit_state
->states
[cpu_index
];
188 void dirtylimit_state_initialize(void)
190 MachineState
*ms
= MACHINE(qdev_get_machine());
191 int max_cpus
= ms
->smp
.max_cpus
;
194 dirtylimit_state
= g_malloc0(sizeof(*dirtylimit_state
));
196 dirtylimit_state
->states
=
197 g_malloc0(sizeof(VcpuDirtyLimitState
) * max_cpus
);
199 for (i
= 0; i
< max_cpus
; i
++) {
200 dirtylimit_state
->states
[i
].cpu_index
= i
;
203 dirtylimit_state
->max_cpus
= max_cpus
;
204 trace_dirtylimit_state_initialize(max_cpus
);
207 void dirtylimit_state_finalize(void)
209 free(dirtylimit_state
->states
);
210 dirtylimit_state
->states
= NULL
;
212 free(dirtylimit_state
);
213 dirtylimit_state
= NULL
;
215 trace_dirtylimit_state_finalize();
218 bool dirtylimit_in_service(void)
220 return !!dirtylimit_state
;
223 bool dirtylimit_vcpu_index_valid(int cpu_index
)
225 MachineState
*ms
= MACHINE(qdev_get_machine());
227 return !(cpu_index
< 0 ||
228 cpu_index
>= ms
->smp
.max_cpus
);
231 static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate
)
233 static uint64_t max_dirtyrate
;
234 uint32_t dirty_ring_size
= kvm_dirty_ring_size();
235 uint64_t dirty_ring_size_meory_MB
=
236 dirty_ring_size
* TARGET_PAGE_SIZE
>> 20;
238 if (max_dirtyrate
< dirtyrate
) {
239 max_dirtyrate
= dirtyrate
;
242 return dirty_ring_size_meory_MB
* 1000000 / max_dirtyrate
;
245 static inline bool dirtylimit_done(uint64_t quota
,
250 min
= MIN(quota
, current
);
251 max
= MAX(quota
, current
);
253 return ((max
- min
) <= DIRTYLIMIT_TOLERANCE_RANGE
) ? true : false;
257 dirtylimit_need_linear_adjustment(uint64_t quota
,
262 min
= MIN(quota
, current
);
263 max
= MAX(quota
, current
);
265 return ((max
- min
) * 100 / max
) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT
;
268 static void dirtylimit_set_throttle(CPUState
*cpu
,
272 int64_t ring_full_time_us
= 0;
273 uint64_t sleep_pct
= 0;
274 uint64_t throttle_us
= 0;
277 cpu
->throttle_us_per_full
= 0;
281 ring_full_time_us
= dirtylimit_dirty_ring_full_time(current
);
283 if (dirtylimit_need_linear_adjustment(quota
, current
)) {
284 if (quota
< current
) {
285 sleep_pct
= (current
- quota
) * 100 / current
;
287 ring_full_time_us
* sleep_pct
/ (double)(100 - sleep_pct
);
288 cpu
->throttle_us_per_full
+= throttle_us
;
290 sleep_pct
= (quota
- current
) * 100 / quota
;
292 ring_full_time_us
* sleep_pct
/ (double)(100 - sleep_pct
);
293 cpu
->throttle_us_per_full
-= throttle_us
;
296 trace_dirtylimit_throttle_pct(cpu
->cpu_index
,
300 if (quota
< current
) {
301 cpu
->throttle_us_per_full
+= ring_full_time_us
/ 10;
303 cpu
->throttle_us_per_full
-= ring_full_time_us
/ 10;
308 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
309 * current dirty page rate may never reach the quota, we should stop
310 * increasing sleep time?
312 cpu
->throttle_us_per_full
= MIN(cpu
->throttle_us_per_full
,
313 ring_full_time_us
* DIRTYLIMIT_THROTTLE_PCT_MAX
);
315 cpu
->throttle_us_per_full
= MAX(cpu
->throttle_us_per_full
, 0);
318 static void dirtylimit_adjust_throttle(CPUState
*cpu
)
321 uint64_t current
= 0;
322 int cpu_index
= cpu
->cpu_index
;
324 quota
= dirtylimit_vcpu_get_state(cpu_index
)->quota
;
325 current
= vcpu_dirty_rate_get(cpu_index
);
327 if (!dirtylimit_done(quota
, current
)) {
328 dirtylimit_set_throttle(cpu
, quota
, current
);
334 void dirtylimit_process(void)
338 if (!qatomic_read(&dirtylimit_quit
)) {
339 dirtylimit_state_lock();
341 if (!dirtylimit_in_service()) {
342 dirtylimit_state_unlock();
347 if (!dirtylimit_vcpu_get_state(cpu
->cpu_index
)->enabled
) {
350 dirtylimit_adjust_throttle(cpu
);
352 dirtylimit_state_unlock();
356 void dirtylimit_change(bool start
)
359 qatomic_set(&dirtylimit_quit
, 0);
361 qatomic_set(&dirtylimit_quit
, 1);
365 void dirtylimit_set_vcpu(int cpu_index
,
369 trace_dirtylimit_set_vcpu(cpu_index
, quota
);
372 dirtylimit_state
->states
[cpu_index
].quota
= quota
;
373 if (!dirtylimit_vcpu_get_state(cpu_index
)->enabled
) {
374 dirtylimit_state
->limited_nvcpu
++;
377 dirtylimit_state
->states
[cpu_index
].quota
= 0;
378 if (dirtylimit_state
->states
[cpu_index
].enabled
) {
379 dirtylimit_state
->limited_nvcpu
--;
383 dirtylimit_state
->states
[cpu_index
].enabled
= enable
;
386 void dirtylimit_set_all(uint64_t quota
,
389 MachineState
*ms
= MACHINE(qdev_get_machine());
390 int max_cpus
= ms
->smp
.max_cpus
;
393 for (i
= 0; i
< max_cpus
; i
++) {
394 dirtylimit_set_vcpu(i
, quota
, enable
);
398 void dirtylimit_vcpu_execute(CPUState
*cpu
)
400 if (dirtylimit_in_service() &&
401 dirtylimit_vcpu_get_state(cpu
->cpu_index
)->enabled
&&
402 cpu
->throttle_us_per_full
) {
403 trace_dirtylimit_vcpu_execute(cpu
->cpu_index
,
404 cpu
->throttle_us_per_full
);
405 usleep(cpu
->throttle_us_per_full
);