* See the COPYING file in the top-level directory.
*/
-#include <zlib.h>
#include "qemu/osdep.h"
+#include <zlib.h>
#include "qapi/error.h"
#include "cpu.h"
-#include "qemu/config-file.h"
-#include "exec/memory.h"
#include "exec/ramblock.h"
-#include "exec/target_page.h"
+#include "exec/ram_addr.h"
#include "qemu/rcu_queue.h"
+#include "qemu/main-loop.h"
#include "qapi/qapi-commands-migration.h"
-#include "migration.h"
#include "ram.h"
#include "trace.h"
#include "dirtyrate.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/qmp/qdict.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "exec/memory.h"
+
+/*
+ * total_dirty_pages is procted by BQL and is used
+ * to stat dirty pages during the period of two
+ * memory_global_dirty_log_sync
+ */
+uint64_t total_dirty_pages;
+
+typedef struct DirtyPageRecord {
+ uint64_t start_pages;
+ uint64_t end_pages;
+} DirtyPageRecord;
static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
static struct DirtyRateStat DirtyStat;
+static DirtyRateMeasureMode dirtyrate_mode =
+ DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
-static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
+static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
{
int64_t current_time;
return msec;
}
+static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
+ CPUState *cpu, bool start)
+{
+ if (start) {
+ dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
+ } else {
+ dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
+ }
+}
+
+static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
+ int64_t calc_time_ms)
+{
+ uint64_t memory_size_MB;
+ uint64_t increased_dirty_pages =
+ dirty_pages.end_pages - dirty_pages.start_pages;
+
+ memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
+
+ return memory_size_MB * 1000 / calc_time_ms;
+}
+
+void global_dirty_log_change(unsigned int flag, bool start)
+{
+ qemu_mutex_lock_iothread();
+ if (start) {
+ memory_global_dirty_log_start(flag);
+ } else {
+ memory_global_dirty_log_stop(flag);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
+/*
+ * global_dirty_log_sync
+ * 1. sync dirty log from kvm
+ * 2. stop dirty tracking if needed.
+ */
+static void global_dirty_log_sync(unsigned int flag, bool one_shot)
+{
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_sync();
+ if (one_shot) {
+ memory_global_dirty_log_stop(flag);
+ }
+ qemu_mutex_unlock_iothread();
+}
+
+static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
+{
+ CPUState *cpu;
+ DirtyPageRecord *records;
+ int nvcpu = 0;
+
+ CPU_FOREACH(cpu) {
+ nvcpu++;
+ }
+
+ stat->nvcpu = nvcpu;
+ stat->rates = g_new0(DirtyRateVcpu, nvcpu);
+
+ records = g_new0(DirtyPageRecord, nvcpu);
+
+ return records;
+}
+
+static void vcpu_dirty_stat_collect(VcpuStat *stat,
+ DirtyPageRecord *records,
+ bool start)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ record_dirtypages(records, cpu, start);
+ }
+}
+
+int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
+ VcpuStat *stat,
+ unsigned int flag,
+ bool one_shot)
+{
+ DirtyPageRecord *records;
+ int64_t init_time_ms;
+ int64_t duration;
+ int64_t dirtyrate;
+ int i = 0;
+ unsigned int gen_id;
+
+retry:
+ init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+
+ cpu_list_lock();
+ gen_id = cpu_list_generation_id_get();
+ records = vcpu_dirty_stat_alloc(stat);
+ vcpu_dirty_stat_collect(stat, records, true);
+ cpu_list_unlock();
+
+ duration = dirty_stat_wait(calc_time_ms, init_time_ms);
+
+ global_dirty_log_sync(flag, one_shot);
+
+ cpu_list_lock();
+ if (gen_id != cpu_list_generation_id_get()) {
+ g_free(records);
+ g_free(stat->rates);
+ cpu_list_unlock();
+ goto retry;
+ }
+ vcpu_dirty_stat_collect(stat, records, false);
+ cpu_list_unlock();
+
+ for (i = 0; i < stat->nvcpu; i++) {
+ dirtyrate = do_calculate_dirtyrate(records[i], duration);
+
+ stat->rates[i].id = i;
+ stat->rates[i].dirty_rate = dirtyrate;
+
+ trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
+ }
+
+ g_free(records);
+
+ return duration;
+}
+
static bool is_sample_period_valid(int64_t sec)
{
if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC ||
return true;
}
+static bool is_sample_pages_valid(int64_t pages)
+{
+ return pages >= MIN_SAMPLE_PAGE_COUNT &&
+ pages <= MAX_SAMPLE_PAGE_COUNT;
+}
+
static int dirtyrate_set_state(int *state, int old_state, int new_state)
{
assert(new_state < DIRTY_RATE_STATUS__MAX);
static struct DirtyRateInfo *query_dirty_rate_info(void)
{
+ int i;
int64_t dirty_rate = DirtyStat.dirty_rate;
- struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo));
+ struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
+ DirtyRateVcpuList *head = NULL, **tail = &head;
+
+ info->status = CalculatingState;
+ info->start_time = DirtyStat.start_time;
+ info->calc_time = DirtyStat.calc_time;
+ info->sample_pages = DirtyStat.sample_pages;
+ info->mode = dirtyrate_mode;
if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
info->has_dirty_rate = true;
info->dirty_rate = dirty_rate;
- }
- info->status = CalculatingState;
- info->start_time = DirtyStat.start_time;
- info->calc_time = DirtyStat.calc_time;
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ /*
+ * set sample_pages with 0 to indicate page sampling
+ * isn't enabled
+ **/
+ info->sample_pages = 0;
+ info->has_vcpu_dirty_rate = true;
+ for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+ DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
+ rate->id = DirtyStat.dirty_ring.rates[i].id;
+ rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
+ QAPI_LIST_APPEND(tail, rate);
+ }
+ info->vcpu_dirty_rate = head;
+ }
+
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+ info->sample_pages = 0;
+ }
+ }
trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
return info;
}
-static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
+static void init_dirtyrate_stat(int64_t start_time,
+ struct DirtyRateConfig config)
{
- DirtyStat.total_dirty_samples = 0;
- DirtyStat.total_sample_count = 0;
- DirtyStat.total_block_mem_MB = 0;
DirtyStat.dirty_rate = -1;
DirtyStat.start_time = start_time;
- DirtyStat.calc_time = calc_time;
+ DirtyStat.calc_time = config.sample_period_seconds;
+ DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
+
+ switch (config.mode) {
+ case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
+ DirtyStat.page_sampling.total_dirty_samples = 0;
+ DirtyStat.page_sampling.total_sample_count = 0;
+ DirtyStat.page_sampling.total_block_mem_MB = 0;
+ break;
+ case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
+ DirtyStat.dirty_ring.nvcpu = -1;
+ DirtyStat.dirty_ring.rates = NULL;
+ break;
+ default:
+ break;
+ }
+}
+
+static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
+{
+ /* last calc-dirty-rate qmp use dirty ring mode */
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ free(DirtyStat.dirty_ring.rates);
+ DirtyStat.dirty_ring.rates = NULL;
+ }
}
static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
{
- DirtyStat.total_dirty_samples += info->sample_dirty_count;
- DirtyStat.total_sample_count += info->sample_pages_count;
+ DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
+ DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
/* size of total pages in MB */
- DirtyStat.total_block_mem_MB += (info->ramblock_pages *
- TARGET_PAGE_SIZE) >> 20;
+ DirtyStat.page_sampling.total_block_mem_MB += (info->ramblock_pages *
+ TARGET_PAGE_SIZE) >> 20;
}
static void update_dirtyrate(uint64_t msec)
{
uint64_t dirtyrate;
- uint64_t total_dirty_samples = DirtyStat.total_dirty_samples;
- uint64_t total_sample_count = DirtyStat.total_sample_count;
- uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB;
+ uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
+ uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
+ uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
dirtyrate = total_dirty_samples * total_block_mem_MB *
1000 / (total_sample_count * msec);
update_dirtyrate_stat(block_dinfo);
}
- if (DirtyStat.total_sample_count == 0) {
+ if (DirtyStat.page_sampling.total_sample_count == 0) {
return false;
}
return true;
}
-static void calculate_dirtyrate(struct DirtyRateConfig config)
+static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
+ bool start)
+{
+ if (start) {
+ dirty_pages->start_pages = total_dirty_pages;
+ } else {
+ dirty_pages->end_pages = total_dirty_pages;
+ }
+}
+
+static inline void dirtyrate_manual_reset_protect(void)
+{
+ RAMBlock *block = NULL;
+
+ WITH_RCU_READ_LOCK_GUARD() {
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ memory_region_clear_dirty_bitmap(block->mr, 0,
+ block->used_length);
+ }
+ }
+}
+
+static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
+{
+ int64_t msec = 0;
+ int64_t start_time;
+ DirtyPageRecord dirty_pages;
+
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+
+ /*
+ * 1'round of log sync may return all 1 bits with
+ * KVM_DIRTY_LOG_INITIALLY_SET enable
+ * skip it unconditionally and start dirty tracking
+ * from 2'round of log sync
+ */
+ memory_global_dirty_log_sync();
+
+ /*
+ * reset page protect manually and unconditionally.
+ * this make sure kvm dirty log be cleared if
+ * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
+ */
+ dirtyrate_manual_reset_protect();
+ qemu_mutex_unlock_iothread();
+
+ record_dirtypages_bitmap(&dirty_pages, true);
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ DirtyStat.start_time = start_time / 1000;
+
+ msec = config.sample_period_seconds * 1000;
+ msec = dirty_stat_wait(msec, start_time);
+ DirtyStat.calc_time = msec / 1000;
+
+ /*
+ * do two things.
+ * 1. fetch dirty bitmap from kvm
+ * 2. stop dirty tracking
+ */
+ global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
+
+ record_dirtypages_bitmap(&dirty_pages, false);
+
+ DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages, msec);
+}
+
+static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
+{
+ int64_t duration;
+ uint64_t dirtyrate = 0;
+ uint64_t dirtyrate_sum = 0;
+ int i = 0;
+
+ /* start log sync */
+ global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
+
+ DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+
+ /* calculate vcpu dirtyrate */
+ duration = vcpu_calculate_dirtyrate(config.sample_period_seconds * 1000,
+ &DirtyStat.dirty_ring,
+ GLOBAL_DIRTY_DIRTY_RATE,
+ true);
+
+ DirtyStat.calc_time = duration / 1000;
+
+ /* calculate vm dirtyrate */
+ for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+ dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
+ DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
+ dirtyrate_sum += dirtyrate;
+ }
+
+ DirtyStat.dirty_rate = dirtyrate_sum;
+}
+
+static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
{
struct RamblockDirtyInfo *block_dinfo = NULL;
int block_count = 0;
int64_t msec = 0;
int64_t initial_time;
- rcu_register_thread();
rcu_read_lock();
initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
rcu_read_unlock();
msec = config.sample_period_seconds * 1000;
- msec = set_sample_page_period(msec, initial_time);
+ msec = dirty_stat_wait(msec, initial_time);
DirtyStat.start_time = initial_time / 1000;
DirtyStat.calc_time = msec / 1000;
out:
rcu_read_unlock();
free_ramblock_dirty_info(block_dinfo, block_count);
- rcu_unregister_thread();
+}
+
+static void calculate_dirtyrate(struct DirtyRateConfig config)
+{
+ if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+ calculate_dirtyrate_dirty_bitmap(config);
+ } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ calculate_dirtyrate_dirty_ring(config);
+ } else {
+ calculate_dirtyrate_sample_vm(config);
+ }
+
+ trace_dirtyrate_calculate(DirtyStat.dirty_rate);
}
void *get_dirtyrate_thread(void *arg)
{
struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
int ret;
- int64_t start_time;
- int64_t calc_time;
+ rcu_register_thread();
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
DIRTY_RATE_STATUS_MEASURING);
return NULL;
}
- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
- calc_time = config.sample_period_seconds;
- init_dirtyrate_stat(start_time, calc_time);
-
calculate_dirtyrate(config);
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
if (ret == -1) {
error_report("change dirtyrate state failed.");
}
+
+ rcu_unregister_thread();
return NULL;
}
-void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
+void qmp_calc_dirty_rate(int64_t calc_time,
+ bool has_sample_pages,
+ int64_t sample_pages,
+ bool has_mode,
+ DirtyRateMeasureMode mode,
+ Error **errp)
{
static struct DirtyRateConfig config;
QemuThread thread;
int ret;
+ int64_t start_time;
/*
* If the dirty rate is already being measured, don't attempt to start.
return;
}
+ if (!has_mode) {
+ mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+ }
+
+ if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ error_setg(errp, "either sample-pages or dirty-ring can be specified.");
+ return;
+ }
+
+ if (has_sample_pages) {
+ if (!is_sample_pages_valid(sample_pages)) {
+ error_setg(errp, "sample-pages is out of range[%d, %d].",
+ MIN_SAMPLE_PAGE_COUNT,
+ MAX_SAMPLE_PAGE_COUNT);
+ return;
+ }
+ } else {
+ sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+ }
+
+ /*
+ * dirty ring mode only works when kvm dirty ring is enabled.
+ * on the contrary, dirty bitmap mode is not.
+ */
+ if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
+ !kvm_dirty_ring_enabled()) ||
+ ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
+ kvm_dirty_ring_enabled())) {
+ error_setg(errp, "mode %s is not enabled, use other method instead.",
+ DirtyRateMeasureMode_str(mode));
+ return;
+ }
+
/*
* Init calculation state as unstarted.
*/
}
config.sample_period_seconds = calc_time;
- config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+ config.sample_pages_per_gigabytes = sample_pages;
+ config.mode = mode;
+
+ cleanup_dirtyrate_stat(config);
+
+ /*
+ * update dirty rate mode so that we can figure out what mode has
+ * been used in last calculation
+ **/
+ dirtyrate_mode = mode;
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+ init_dirtyrate_stat(start_time, config);
+
qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
(void *)&config, QEMU_THREAD_DETACHED);
}
{
return query_dirty_rate_info();
}
+
+void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
+{
+ DirtyRateInfo *info = query_dirty_rate_info();
+
+ monitor_printf(mon, "Status: %s\n",
+ DirtyRateStatus_str(info->status));
+ monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
+ info->start_time);
+ monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
+ info->sample_pages);
+ monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
+ info->calc_time);
+ monitor_printf(mon, "Mode: %s\n",
+ DirtyRateMeasureMode_str(info->mode));
+ monitor_printf(mon, "Dirty rate: ");
+ if (info->has_dirty_rate) {
+ monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
+ if (info->has_vcpu_dirty_rate) {
+ DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
+ for (rate = head; rate != NULL; rate = rate->next) {
+ monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
+ " (MB/s)\n", rate->value->id,
+ rate->value->dirty_rate);
+ }
+ }
+ } else {
+ monitor_printf(mon, "(not ready)\n");
+ }
+
+ qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
+ g_free(info);
+}
+
+void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
+{
+ int64_t sec = qdict_get_try_int(qdict, "second", 0);
+ int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
+ bool has_sample_pages = (sample_pages != -1);
+ bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
+ bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
+ DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+ Error *err = NULL;
+
+ if (!sec) {
+ monitor_printf(mon, "Incorrect period length specified!\n");
+ return;
+ }
+
+ if (dirty_ring && dirty_bitmap) {
+ monitor_printf(mon, "Either dirty ring or dirty bitmap "
+ "can be specified!\n");
+ return;
+ }
+
+ if (dirty_bitmap) {
+ mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
+ } else if (dirty_ring) {
+ mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
+ }
+
+ qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, true,
+ mode, &err);
+ if (err) {
+ hmp_handle_error(mon, err);
+ return;
+ }
+
+ monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
+ " seconds\n", sec);
+ monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
+}