]> git.proxmox.com Git - mirror_qemu.git/blob - softmmu/dirtylimit.c
e5a4f970bd719fe16187b457b9542c319fff77df
[mirror_qemu.git] / softmmu / dirtylimit.c
1 /*
2 * Dirty page rate limit implementation code
3 *
4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5 *
6 * Authors:
7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qapi/error.h"
15 #include "qemu/main-loop.h"
16 #include "qapi/qapi-commands-migration.h"
17 #include "sysemu/dirtyrate.h"
18 #include "sysemu/dirtylimit.h"
19 #include "exec/memory.h"
20 #include "hw/boards.h"
21 #include "sysemu/kvm.h"
22 #include "trace.h"
23
24 /*
25 * Dirtylimit stop working if dirty page rate error
26 * value less than DIRTYLIMIT_TOLERANCE_RANGE
27 */
28 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */
29 /*
30 * Plus or minus vcpu sleep time linearly if dirty
31 * page rate error value percentage over
32 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
33 * Otherwise, plus or minus a fixed vcpu sleep time.
34 */
35 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50
36 /*
37 * Max vcpu sleep time percentage during a cycle
38 * composed of dirty ring full and sleep time.
39 */
40 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
41
42 struct {
43 VcpuStat stat;
44 bool running;
45 QemuThread thread;
46 } *vcpu_dirty_rate_stat;
47
48 typedef struct VcpuDirtyLimitState {
49 int cpu_index;
50 bool enabled;
51 /*
52 * Quota dirty page rate, unit is MB/s
53 * zero if not enabled.
54 */
55 uint64_t quota;
56 } VcpuDirtyLimitState;
57
58 struct {
59 VcpuDirtyLimitState *states;
60 /* Max cpus number configured by user */
61 int max_cpus;
62 /* Number of vcpu under dirtylimit */
63 int limited_nvcpu;
64 } *dirtylimit_state;
65
66 /* protect dirtylimit_state */
67 static QemuMutex dirtylimit_mutex;
68
69 /* dirtylimit thread quit if dirtylimit_quit is true */
70 static bool dirtylimit_quit;
71
72 static void vcpu_dirty_rate_stat_collect(void)
73 {
74 VcpuStat stat;
75 int i = 0;
76
77 /* calculate vcpu dirtyrate */
78 vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS,
79 &stat,
80 GLOBAL_DIRTY_LIMIT,
81 false);
82
83 for (i = 0; i < stat.nvcpu; i++) {
84 vcpu_dirty_rate_stat->stat.rates[i].id = i;
85 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
86 stat.rates[i].dirty_rate;
87 }
88
89 free(stat.rates);
90 }
91
92 static void *vcpu_dirty_rate_stat_thread(void *opaque)
93 {
94 rcu_register_thread();
95
96 /* start log sync */
97 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
98
99 while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
100 vcpu_dirty_rate_stat_collect();
101 if (dirtylimit_in_service()) {
102 dirtylimit_process();
103 }
104 }
105
106 /* stop log sync */
107 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
108
109 rcu_unregister_thread();
110 return NULL;
111 }
112
113 int64_t vcpu_dirty_rate_get(int cpu_index)
114 {
115 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
116 return qatomic_read_i64(&rates[cpu_index].dirty_rate);
117 }
118
119 void vcpu_dirty_rate_stat_start(void)
120 {
121 if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
122 return;
123 }
124
125 qatomic_set(&vcpu_dirty_rate_stat->running, 1);
126 qemu_thread_create(&vcpu_dirty_rate_stat->thread,
127 "dirtyrate-stat",
128 vcpu_dirty_rate_stat_thread,
129 NULL,
130 QEMU_THREAD_JOINABLE);
131 }
132
133 void vcpu_dirty_rate_stat_stop(void)
134 {
135 qatomic_set(&vcpu_dirty_rate_stat->running, 0);
136 dirtylimit_state_unlock();
137 qemu_mutex_unlock_iothread();
138 qemu_thread_join(&vcpu_dirty_rate_stat->thread);
139 qemu_mutex_lock_iothread();
140 dirtylimit_state_lock();
141 }
142
143 void vcpu_dirty_rate_stat_initialize(void)
144 {
145 MachineState *ms = MACHINE(qdev_get_machine());
146 int max_cpus = ms->smp.max_cpus;
147
148 vcpu_dirty_rate_stat =
149 g_malloc0(sizeof(*vcpu_dirty_rate_stat));
150
151 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
152 vcpu_dirty_rate_stat->stat.rates =
153 g_malloc0(sizeof(DirtyRateVcpu) * max_cpus);
154
155 vcpu_dirty_rate_stat->running = false;
156 }
157
158 void vcpu_dirty_rate_stat_finalize(void)
159 {
160 free(vcpu_dirty_rate_stat->stat.rates);
161 vcpu_dirty_rate_stat->stat.rates = NULL;
162
163 free(vcpu_dirty_rate_stat);
164 vcpu_dirty_rate_stat = NULL;
165 }
166
167 void dirtylimit_state_lock(void)
168 {
169 qemu_mutex_lock(&dirtylimit_mutex);
170 }
171
172 void dirtylimit_state_unlock(void)
173 {
174 qemu_mutex_unlock(&dirtylimit_mutex);
175 }
176
177 static void
178 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
179 {
180 qemu_mutex_init(&dirtylimit_mutex);
181 }
182
183 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
184 {
185 return &dirtylimit_state->states[cpu_index];
186 }
187
188 void dirtylimit_state_initialize(void)
189 {
190 MachineState *ms = MACHINE(qdev_get_machine());
191 int max_cpus = ms->smp.max_cpus;
192 int i;
193
194 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
195
196 dirtylimit_state->states =
197 g_malloc0(sizeof(VcpuDirtyLimitState) * max_cpus);
198
199 for (i = 0; i < max_cpus; i++) {
200 dirtylimit_state->states[i].cpu_index = i;
201 }
202
203 dirtylimit_state->max_cpus = max_cpus;
204 trace_dirtylimit_state_initialize(max_cpus);
205 }
206
207 void dirtylimit_state_finalize(void)
208 {
209 free(dirtylimit_state->states);
210 dirtylimit_state->states = NULL;
211
212 free(dirtylimit_state);
213 dirtylimit_state = NULL;
214
215 trace_dirtylimit_state_finalize();
216 }
217
218 bool dirtylimit_in_service(void)
219 {
220 return !!dirtylimit_state;
221 }
222
223 bool dirtylimit_vcpu_index_valid(int cpu_index)
224 {
225 MachineState *ms = MACHINE(qdev_get_machine());
226
227 return !(cpu_index < 0 ||
228 cpu_index >= ms->smp.max_cpus);
229 }
230
231 static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
232 {
233 static uint64_t max_dirtyrate;
234 uint32_t dirty_ring_size = kvm_dirty_ring_size();
235 uint64_t dirty_ring_size_meory_MB =
236 dirty_ring_size * TARGET_PAGE_SIZE >> 20;
237
238 if (max_dirtyrate < dirtyrate) {
239 max_dirtyrate = dirtyrate;
240 }
241
242 return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate;
243 }
244
245 static inline bool dirtylimit_done(uint64_t quota,
246 uint64_t current)
247 {
248 uint64_t min, max;
249
250 min = MIN(quota, current);
251 max = MAX(quota, current);
252
253 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
254 }
255
256 static inline bool
257 dirtylimit_need_linear_adjustment(uint64_t quota,
258 uint64_t current)
259 {
260 uint64_t min, max;
261
262 min = MIN(quota, current);
263 max = MAX(quota, current);
264
265 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
266 }
267
268 static void dirtylimit_set_throttle(CPUState *cpu,
269 uint64_t quota,
270 uint64_t current)
271 {
272 int64_t ring_full_time_us = 0;
273 uint64_t sleep_pct = 0;
274 uint64_t throttle_us = 0;
275
276 if (current == 0) {
277 cpu->throttle_us_per_full = 0;
278 return;
279 }
280
281 ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
282
283 if (dirtylimit_need_linear_adjustment(quota, current)) {
284 if (quota < current) {
285 sleep_pct = (current - quota) * 100 / current;
286 throttle_us =
287 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
288 cpu->throttle_us_per_full += throttle_us;
289 } else {
290 sleep_pct = (quota - current) * 100 / quota;
291 throttle_us =
292 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
293 cpu->throttle_us_per_full -= throttle_us;
294 }
295
296 trace_dirtylimit_throttle_pct(cpu->cpu_index,
297 sleep_pct,
298 throttle_us);
299 } else {
300 if (quota < current) {
301 cpu->throttle_us_per_full += ring_full_time_us / 10;
302 } else {
303 cpu->throttle_us_per_full -= ring_full_time_us / 10;
304 }
305 }
306
307 /*
308 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
309 * current dirty page rate may never reach the quota, we should stop
310 * increasing sleep time?
311 */
312 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
313 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
314
315 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
316 }
317
318 static void dirtylimit_adjust_throttle(CPUState *cpu)
319 {
320 uint64_t quota = 0;
321 uint64_t current = 0;
322 int cpu_index = cpu->cpu_index;
323
324 quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
325 current = vcpu_dirty_rate_get(cpu_index);
326
327 if (!dirtylimit_done(quota, current)) {
328 dirtylimit_set_throttle(cpu, quota, current);
329 }
330
331 return;
332 }
333
334 void dirtylimit_process(void)
335 {
336 CPUState *cpu;
337
338 if (!qatomic_read(&dirtylimit_quit)) {
339 dirtylimit_state_lock();
340
341 if (!dirtylimit_in_service()) {
342 dirtylimit_state_unlock();
343 return;
344 }
345
346 CPU_FOREACH(cpu) {
347 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
348 continue;
349 }
350 dirtylimit_adjust_throttle(cpu);
351 }
352 dirtylimit_state_unlock();
353 }
354 }
355
356 void dirtylimit_change(bool start)
357 {
358 if (start) {
359 qatomic_set(&dirtylimit_quit, 0);
360 } else {
361 qatomic_set(&dirtylimit_quit, 1);
362 }
363 }
364
365 void dirtylimit_set_vcpu(int cpu_index,
366 uint64_t quota,
367 bool enable)
368 {
369 trace_dirtylimit_set_vcpu(cpu_index, quota);
370
371 if (enable) {
372 dirtylimit_state->states[cpu_index].quota = quota;
373 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
374 dirtylimit_state->limited_nvcpu++;
375 }
376 } else {
377 dirtylimit_state->states[cpu_index].quota = 0;
378 if (dirtylimit_state->states[cpu_index].enabled) {
379 dirtylimit_state->limited_nvcpu--;
380 }
381 }
382
383 dirtylimit_state->states[cpu_index].enabled = enable;
384 }
385
386 void dirtylimit_set_all(uint64_t quota,
387 bool enable)
388 {
389 MachineState *ms = MACHINE(qdev_get_machine());
390 int max_cpus = ms->smp.max_cpus;
391 int i;
392
393 for (i = 0; i < max_cpus; i++) {
394 dirtylimit_set_vcpu(i, quota, enable);
395 }
396 }
397
398 void dirtylimit_vcpu_execute(CPUState *cpu)
399 {
400 if (dirtylimit_in_service() &&
401 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled &&
402 cpu->throttle_us_per_full) {
403 trace_dirtylimit_vcpu_execute(cpu->cpu_index,
404 cpu->throttle_us_per_full);
405 usleep(cpu->throttle_us_per_full);
406 }
407 }