]>
Commit | Line | Data |
---|---|---|
4240dcee CZ |
1 | /* |
2 | * Dirtyrate implement code | |
3 | * | |
4 | * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. | |
5 | * | |
6 | * Authors: | |
7 | * Chuan Zheng <zhengchuan@huawei.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
10 | * See the COPYING file in the top-level directory. | |
11 | */ | |
12 | ||
13 | #include "qemu/osdep.h" | |
662770af | 14 | #include <zlib.h> |
4240dcee CZ |
15 | #include "qapi/error.h" |
16 | #include "cpu.h" | |
4240dcee | 17 | #include "exec/ramblock.h" |
4240dcee CZ |
18 | #include "qemu/rcu_queue.h" |
19 | #include "qapi/qapi-commands-migration.h" | |
3ded54b1 | 20 | #include "ram.h" |
3c0b5dff | 21 | #include "trace.h" |
4240dcee | 22 | #include "dirtyrate.h" |
a4a571d9 PX |
23 | #include "monitor/hmp.h" |
24 | #include "monitor/monitor.h" | |
25 | #include "qapi/qmp/qdict.h" | |
4240dcee | 26 | |
7df3aa30 | 27 | static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; |
c9a58d71 | 28 | static struct DirtyRateStat DirtyStat; |
7df3aa30 | 29 | |
eca58224 CZ |
30 | static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) |
31 | { | |
32 | int64_t current_time; | |
33 | ||
34 | current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); | |
35 | if ((current_time - initial_time) >= msec) { | |
36 | msec = current_time - initial_time; | |
37 | } else { | |
38 | g_usleep((msec + initial_time - current_time) * 1000); | |
39 | } | |
40 | ||
41 | return msec; | |
42 | } | |
43 | ||
44 | static bool is_sample_period_valid(int64_t sec) | |
45 | { | |
46 | if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || | |
47 | sec > MAX_FETCH_DIRTYRATE_TIME_SEC) { | |
48 | return false; | |
49 | } | |
50 | ||
51 | return true; | |
52 | } | |
53 | ||
7afa08cd HH |
54 | static bool is_sample_pages_valid(int64_t pages) |
55 | { | |
56 | return pages >= MIN_SAMPLE_PAGE_COUNT && | |
57 | pages <= MAX_SAMPLE_PAGE_COUNT; | |
58 | } | |
59 | ||
7df3aa30 CZ |
60 | static int dirtyrate_set_state(int *state, int old_state, int new_state) |
61 | { | |
62 | assert(new_state < DIRTY_RATE_STATUS__MAX); | |
3c0b5dff | 63 | trace_dirtyrate_set_state(DirtyRateStatus_str(new_state)); |
7df3aa30 CZ |
64 | if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { |
65 | return 0; | |
66 | } else { | |
67 | return -1; | |
68 | } | |
69 | } | |
70 | ||
4c437254 CZ |
71 | static struct DirtyRateInfo *query_dirty_rate_info(void) |
72 | { | |
73 | int64_t dirty_rate = DirtyStat.dirty_rate; | |
74 | struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); | |
75 | ||
76 | if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { | |
b1a859cf | 77 | info->has_dirty_rate = true; |
4c437254 | 78 | info->dirty_rate = dirty_rate; |
4c437254 CZ |
79 | } |
80 | ||
81 | info->status = CalculatingState; | |
82 | info->start_time = DirtyStat.start_time; | |
83 | info->calc_time = DirtyStat.calc_time; | |
7afa08cd | 84 | info->sample_pages = DirtyStat.sample_pages; |
4c437254 | 85 | |
3c0b5dff CZ |
86 | trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState)); |
87 | ||
4c437254 CZ |
88 | return info; |
89 | } | |
90 | ||
7afa08cd HH |
91 | static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time, |
92 | uint64_t sample_pages) | |
c9a58d71 CZ |
93 | { |
94 | DirtyStat.total_dirty_samples = 0; | |
95 | DirtyStat.total_sample_count = 0; | |
96 | DirtyStat.total_block_mem_MB = 0; | |
97 | DirtyStat.dirty_rate = -1; | |
aa84b506 CZ |
98 | DirtyStat.start_time = start_time; |
99 | DirtyStat.calc_time = calc_time; | |
7afa08cd | 100 | DirtyStat.sample_pages = sample_pages; |
c9a58d71 CZ |
101 | } |
102 | ||
103 | static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) | |
104 | { | |
105 | DirtyStat.total_dirty_samples += info->sample_dirty_count; | |
106 | DirtyStat.total_sample_count += info->sample_pages_count; | |
107 | /* size of total pages in MB */ | |
108 | DirtyStat.total_block_mem_MB += (info->ramblock_pages * | |
109 | TARGET_PAGE_SIZE) >> 20; | |
110 | } | |
111 | ||
112 | static void update_dirtyrate(uint64_t msec) | |
113 | { | |
114 | uint64_t dirtyrate; | |
115 | uint64_t total_dirty_samples = DirtyStat.total_dirty_samples; | |
116 | uint64_t total_sample_count = DirtyStat.total_sample_count; | |
117 | uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB; | |
118 | ||
119 | dirtyrate = total_dirty_samples * total_block_mem_MB * | |
120 | 1000 / (total_sample_count * msec); | |
121 | ||
122 | DirtyStat.dirty_rate = dirtyrate; | |
123 | } | |
7df3aa30 | 124 | |
ba0e519f CZ |
125 | /* |
126 | * get hash result for the sampled memory with length of TARGET_PAGE_SIZE | |
127 | * in ramblock, which starts from ramblock base address. | |
128 | */ | |
129 | static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, | |
130 | uint64_t vfn) | |
131 | { | |
132 | uint32_t crc; | |
133 | ||
134 | crc = crc32(0, (info->ramblock_addr + | |
135 | vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); | |
136 | ||
3c0b5dff | 137 | trace_get_ramblock_vfn_hash(info->idstr, vfn, crc); |
ba0e519f CZ |
138 | return crc; |
139 | } | |
140 | ||
141 | static bool save_ramblock_hash(struct RamblockDirtyInfo *info) | |
142 | { | |
143 | unsigned int sample_pages_count; | |
144 | int i; | |
145 | GRand *rand; | |
146 | ||
147 | sample_pages_count = info->sample_pages_count; | |
148 | ||
149 | /* ramblock size less than one page, return success to skip this ramblock */ | |
150 | if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) { | |
151 | return true; | |
152 | } | |
153 | ||
154 | info->hash_result = g_try_malloc0_n(sample_pages_count, | |
155 | sizeof(uint32_t)); | |
156 | if (!info->hash_result) { | |
157 | return false; | |
158 | } | |
159 | ||
160 | info->sample_page_vfn = g_try_malloc0_n(sample_pages_count, | |
161 | sizeof(uint64_t)); | |
162 | if (!info->sample_page_vfn) { | |
163 | g_free(info->hash_result); | |
164 | return false; | |
165 | } | |
166 | ||
167 | rand = g_rand_new(); | |
168 | for (i = 0; i < sample_pages_count; i++) { | |
169 | info->sample_page_vfn[i] = g_rand_int_range(rand, 0, | |
170 | info->ramblock_pages - 1); | |
171 | info->hash_result[i] = get_ramblock_vfn_hash(info, | |
172 | info->sample_page_vfn[i]); | |
173 | } | |
174 | g_rand_free(rand); | |
175 | ||
176 | return true; | |
177 | } | |
178 | ||
179 | static void get_ramblock_dirty_info(RAMBlock *block, | |
180 | struct RamblockDirtyInfo *info, | |
181 | struct DirtyRateConfig *config) | |
182 | { | |
183 | uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes; | |
184 | ||
185 | /* Right shift 30 bits to calc ramblock size in GB */ | |
186 | info->sample_pages_count = (qemu_ram_get_used_length(block) * | |
187 | sample_pages_per_gigabytes) >> 30; | |
188 | /* Right shift TARGET_PAGE_BITS to calc page count */ | |
189 | info->ramblock_pages = qemu_ram_get_used_length(block) >> | |
190 | TARGET_PAGE_BITS; | |
191 | info->ramblock_addr = qemu_ram_get_host_addr(block); | |
192 | strcpy(info->idstr, qemu_ram_get_idstr(block)); | |
193 | } | |
194 | ||
cf0bbb49 CZ |
195 | static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count) |
196 | { | |
197 | int i; | |
198 | ||
199 | if (!infos) { | |
200 | return; | |
201 | } | |
202 | ||
203 | for (i = 0; i < count; i++) { | |
204 | g_free(infos[i].sample_page_vfn); | |
205 | g_free(infos[i].hash_result); | |
206 | } | |
207 | g_free(infos); | |
208 | } | |
209 | ||
f82583cd CZ |
210 | static bool skip_sample_ramblock(RAMBlock *block) |
211 | { | |
212 | /* | |
213 | * Sample only blocks larger than MIN_RAMBLOCK_SIZE. | |
214 | */ | |
215 | if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { | |
3c0b5dff CZ |
216 | trace_skip_sample_ramblock(block->idstr, |
217 | qemu_ram_get_used_length(block)); | |
f82583cd CZ |
218 | return true; |
219 | } | |
220 | ||
221 | return false; | |
222 | } | |
223 | ||
ba0e519f CZ |
224 | static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, |
225 | struct DirtyRateConfig config, | |
226 | int *block_count) | |
227 | { | |
228 | struct RamblockDirtyInfo *info = NULL; | |
229 | struct RamblockDirtyInfo *dinfo = NULL; | |
230 | RAMBlock *block = NULL; | |
231 | int total_count = 0; | |
232 | int index = 0; | |
233 | bool ret = false; | |
234 | ||
235 | RAMBLOCK_FOREACH_MIGRATABLE(block) { | |
f82583cd CZ |
236 | if (skip_sample_ramblock(block)) { |
237 | continue; | |
238 | } | |
ba0e519f CZ |
239 | total_count++; |
240 | } | |
241 | ||
242 | dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo)); | |
243 | if (dinfo == NULL) { | |
244 | goto out; | |
245 | } | |
246 | ||
247 | RAMBLOCK_FOREACH_MIGRATABLE(block) { | |
f82583cd CZ |
248 | if (skip_sample_ramblock(block)) { |
249 | continue; | |
250 | } | |
ba0e519f CZ |
251 | if (index >= total_count) { |
252 | break; | |
253 | } | |
254 | info = &dinfo[index]; | |
255 | get_ramblock_dirty_info(block, info, &config); | |
256 | if (!save_ramblock_hash(info)) { | |
257 | goto out; | |
258 | } | |
259 | index++; | |
260 | } | |
261 | ret = true; | |
262 | ||
263 | out: | |
264 | *block_count = index; | |
265 | *block_dinfo = dinfo; | |
266 | return ret; | |
267 | } | |
268 | ||
9c04387b CZ |
269 | static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) |
270 | { | |
271 | uint32_t crc; | |
272 | int i; | |
273 | ||
274 | for (i = 0; i < info->sample_pages_count; i++) { | |
275 | crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); | |
276 | if (crc != info->hash_result[i]) { | |
3c0b5dff | 277 | trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]); |
9c04387b CZ |
278 | info->sample_dirty_count++; |
279 | } | |
280 | } | |
281 | } | |
282 | ||
283 | static struct RamblockDirtyInfo * | |
284 | find_block_matched(RAMBlock *block, int count, | |
285 | struct RamblockDirtyInfo *infos) | |
286 | { | |
287 | int i; | |
288 | struct RamblockDirtyInfo *matched; | |
289 | ||
290 | for (i = 0; i < count; i++) { | |
291 | if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) { | |
292 | break; | |
293 | } | |
294 | } | |
295 | ||
296 | if (i == count) { | |
297 | return NULL; | |
298 | } | |
299 | ||
300 | if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || | |
301 | infos[i].ramblock_pages != | |
302 | (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { | |
3c0b5dff | 303 | trace_find_page_matched(block->idstr); |
9c04387b CZ |
304 | return NULL; |
305 | } | |
306 | ||
307 | matched = &infos[i]; | |
308 | ||
309 | return matched; | |
310 | } | |
311 | ||
312 | static bool compare_page_hash_info(struct RamblockDirtyInfo *info, | |
313 | int block_count) | |
314 | { | |
315 | struct RamblockDirtyInfo *block_dinfo = NULL; | |
316 | RAMBlock *block = NULL; | |
317 | ||
318 | RAMBLOCK_FOREACH_MIGRATABLE(block) { | |
f82583cd CZ |
319 | if (skip_sample_ramblock(block)) { |
320 | continue; | |
321 | } | |
9c04387b CZ |
322 | block_dinfo = find_block_matched(block, block_count, info); |
323 | if (block_dinfo == NULL) { | |
324 | continue; | |
325 | } | |
326 | calc_page_dirty_rate(block_dinfo); | |
327 | update_dirtyrate_stat(block_dinfo); | |
328 | } | |
329 | ||
330 | if (DirtyStat.total_sample_count == 0) { | |
331 | return false; | |
332 | } | |
333 | ||
334 | return true; | |
335 | } | |
336 | ||
4240dcee CZ |
337 | static void calculate_dirtyrate(struct DirtyRateConfig config) |
338 | { | |
cf0bbb49 CZ |
339 | struct RamblockDirtyInfo *block_dinfo = NULL; |
340 | int block_count = 0; | |
341 | int64_t msec = 0; | |
342 | int64_t initial_time; | |
343 | ||
344 | rcu_register_thread(); | |
cf0bbb49 CZ |
345 | rcu_read_lock(); |
346 | initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); | |
347 | if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { | |
348 | goto out; | |
349 | } | |
350 | rcu_read_unlock(); | |
351 | ||
352 | msec = config.sample_period_seconds * 1000; | |
353 | msec = set_sample_page_period(msec, initial_time); | |
4c437254 CZ |
354 | DirtyStat.start_time = initial_time / 1000; |
355 | DirtyStat.calc_time = msec / 1000; | |
cf0bbb49 CZ |
356 | |
357 | rcu_read_lock(); | |
358 | if (!compare_page_hash_info(block_dinfo, block_count)) { | |
359 | goto out; | |
360 | } | |
361 | ||
362 | update_dirtyrate(msec); | |
363 | ||
364 | out: | |
365 | rcu_read_unlock(); | |
366 | free_ramblock_dirty_info(block_dinfo, block_count); | |
367 | rcu_unregister_thread(); | |
4240dcee CZ |
368 | } |
369 | ||
370 | void *get_dirtyrate_thread(void *arg) | |
371 | { | |
372 | struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; | |
7df3aa30 | 373 | int ret; |
aa84b506 CZ |
374 | int64_t start_time; |
375 | int64_t calc_time; | |
7afa08cd | 376 | uint64_t sample_pages; |
7df3aa30 CZ |
377 | |
378 | ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, | |
379 | DIRTY_RATE_STATUS_MEASURING); | |
380 | if (ret == -1) { | |
381 | error_report("change dirtyrate state failed."); | |
382 | return NULL; | |
383 | } | |
4240dcee | 384 | |
aa84b506 CZ |
385 | start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; |
386 | calc_time = config.sample_period_seconds; | |
7afa08cd HH |
387 | sample_pages = config.sample_pages_per_gigabytes; |
388 | init_dirtyrate_stat(start_time, calc_time, sample_pages); | |
aa84b506 | 389 | |
4240dcee CZ |
390 | calculate_dirtyrate(config); |
391 | ||
7df3aa30 CZ |
392 | ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, |
393 | DIRTY_RATE_STATUS_MEASURED); | |
394 | if (ret == -1) { | |
395 | error_report("change dirtyrate state failed."); | |
396 | } | |
4240dcee CZ |
397 | return NULL; |
398 | } | |
4c437254 | 399 | |
7afa08cd HH |
400 | void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages, |
401 | int64_t sample_pages, Error **errp) | |
4c437254 CZ |
402 | { |
403 | static struct DirtyRateConfig config; | |
404 | QemuThread thread; | |
405 | int ret; | |
406 | ||
407 | /* | |
408 | * If the dirty rate is already being measured, don't attempt to start. | |
409 | */ | |
410 | if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) { | |
411 | error_setg(errp, "the dirty rate is already being measured."); | |
412 | return; | |
413 | } | |
414 | ||
415 | if (!is_sample_period_valid(calc_time)) { | |
416 | error_setg(errp, "calc-time is out of range[%d, %d].", | |
417 | MIN_FETCH_DIRTYRATE_TIME_SEC, | |
418 | MAX_FETCH_DIRTYRATE_TIME_SEC); | |
419 | return; | |
420 | } | |
421 | ||
7afa08cd HH |
422 | if (has_sample_pages) { |
423 | if (!is_sample_pages_valid(sample_pages)) { | |
424 | error_setg(errp, "sample-pages is out of range[%d, %d].", | |
425 | MIN_SAMPLE_PAGE_COUNT, | |
426 | MAX_SAMPLE_PAGE_COUNT); | |
427 | return; | |
428 | } | |
429 | } else { | |
430 | sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES; | |
431 | } | |
432 | ||
4c437254 CZ |
433 | /* |
434 | * Init calculation state as unstarted. | |
435 | */ | |
436 | ret = dirtyrate_set_state(&CalculatingState, CalculatingState, | |
437 | DIRTY_RATE_STATUS_UNSTARTED); | |
438 | if (ret == -1) { | |
439 | error_setg(errp, "init dirty rate calculation state failed."); | |
440 | return; | |
441 | } | |
442 | ||
443 | config.sample_period_seconds = calc_time; | |
7afa08cd | 444 | config.sample_pages_per_gigabytes = sample_pages; |
4c437254 CZ |
445 | qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread, |
446 | (void *)&config, QEMU_THREAD_DETACHED); | |
447 | } | |
448 | ||
449 | struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp) | |
450 | { | |
451 | return query_dirty_rate_info(); | |
452 | } | |
a4a571d9 PX |
453 | |
454 | void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict) | |
455 | { | |
456 | DirtyRateInfo *info = query_dirty_rate_info(); | |
457 | ||
458 | monitor_printf(mon, "Status: %s\n", | |
459 | DirtyRateStatus_str(info->status)); | |
460 | monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n", | |
461 | info->start_time); | |
462 | monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n", | |
463 | info->sample_pages); | |
464 | monitor_printf(mon, "Period: %"PRIi64" (sec)\n", | |
465 | info->calc_time); | |
466 | monitor_printf(mon, "Dirty rate: "); | |
467 | if (info->has_dirty_rate) { | |
468 | monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate); | |
469 | } else { | |
470 | monitor_printf(mon, "(not ready)\n"); | |
471 | } | |
472 | g_free(info); | |
473 | } | |
474 | ||
475 | void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict) | |
476 | { | |
477 | int64_t sec = qdict_get_try_int(qdict, "second", 0); | |
478 | int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1); | |
479 | bool has_sample_pages = (sample_pages != -1); | |
480 | Error *err = NULL; | |
481 | ||
482 | if (!sec) { | |
483 | monitor_printf(mon, "Incorrect period length specified!\n"); | |
484 | return; | |
485 | } | |
486 | ||
487 | qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, &err); | |
488 | if (err) { | |
489 | hmp_handle_error(mon, err); | |
490 | return; | |
491 | } | |
492 | ||
493 | monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64 | |
494 | " seconds\n", sec); | |
495 | monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n"); | |
496 | } |