#include "qapi/error.h"
#include "cpu.h"
#include "exec/ramblock.h"
+#include "exec/ram_addr.h"
#include "qemu/rcu_queue.h"
+#include "qemu/main-loop.h"
#include "qapi/qapi-commands-migration.h"
#include "ram.h"
#include "trace.h"
#include "monitor/hmp.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qdict.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "exec/memory.h"
+
+/*
+ * total_dirty_pages is procted by BQL and is used
+ * to stat dirty pages during the period of two
+ * memory_global_dirty_log_sync
+ */
+uint64_t total_dirty_pages;
+
+typedef struct DirtyPageRecord {
+ uint64_t start_pages;
+ uint64_t end_pages;
+} DirtyPageRecord;
static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
static struct DirtyRateStat DirtyStat;
+static DirtyRateMeasureMode dirtyrate_mode =
+ DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
{
static struct DirtyRateInfo *query_dirty_rate_info(void)
{
+ int i;
int64_t dirty_rate = DirtyStat.dirty_rate;
struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo));
-
- if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
- info->has_dirty_rate = true;
- info->dirty_rate = dirty_rate;
- }
+ DirtyRateVcpuList *head = NULL, **tail = &head;
info->status = CalculatingState;
info->start_time = DirtyStat.start_time;
info->calc_time = DirtyStat.calc_time;
info->sample_pages = DirtyStat.sample_pages;
+ info->mode = dirtyrate_mode;
+
+ if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
+ info->has_dirty_rate = true;
+ info->dirty_rate = dirty_rate;
+
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ /*
+ * set sample_pages with 0 to indicate page sampling
+ * isn't enabled
+ **/
+ info->sample_pages = 0;
+ info->has_vcpu_dirty_rate = true;
+ for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+ DirtyRateVcpu *rate = g_malloc0(sizeof(DirtyRateVcpu));
+ rate->id = DirtyStat.dirty_ring.rates[i].id;
+ rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
+ QAPI_LIST_APPEND(tail, rate);
+ }
+ info->vcpu_dirty_rate = head;
+ }
+
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+ info->sample_pages = 0;
+ }
+ }
trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
}
}
+static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
+{
+ /* last calc-dirty-rate qmp use dirty ring mode */
+ if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ free(DirtyStat.dirty_ring.rates);
+ DirtyStat.dirty_ring.rates = NULL;
+ }
+}
+
static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
{
DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
return true;
}
-static void calculate_dirtyrate(struct DirtyRateConfig config)
+static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
+ CPUState *cpu, bool start)
+{
+ if (start) {
+ dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
+ } else {
+ dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
+ }
+}
+
+static void dirtyrate_global_dirty_log_start(void)
+{
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+ qemu_mutex_unlock_iothread();
+}
+
+static void dirtyrate_global_dirty_log_stop(void)
+{
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_sync();
+ memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
+ qemu_mutex_unlock_iothread();
+}
+
+static int64_t do_calculate_dirtyrate_vcpu(DirtyPageRecord dirty_pages)
+{
+ uint64_t memory_size_MB;
+ int64_t time_s;
+ uint64_t increased_dirty_pages =
+ dirty_pages.end_pages - dirty_pages.start_pages;
+
+ memory_size_MB = (increased_dirty_pages * TARGET_PAGE_SIZE) >> 20;
+ time_s = DirtyStat.calc_time;
+
+ return memory_size_MB / time_s;
+}
+
+static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
+ bool start)
+{
+ if (start) {
+ dirty_pages->start_pages = total_dirty_pages;
+ } else {
+ dirty_pages->end_pages = total_dirty_pages;
+ }
+}
+
+static void do_calculate_dirtyrate_bitmap(DirtyPageRecord dirty_pages)
+{
+ DirtyStat.dirty_rate = do_calculate_dirtyrate_vcpu(dirty_pages);
+}
+
+static inline void dirtyrate_manual_reset_protect(void)
+{
+ RAMBlock *block = NULL;
+
+ WITH_RCU_READ_LOCK_GUARD() {
+ RAMBLOCK_FOREACH_MIGRATABLE(block) {
+ memory_region_clear_dirty_bitmap(block->mr, 0,
+ block->used_length);
+ }
+ }
+}
+
+static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
+{
+ int64_t msec = 0;
+ int64_t start_time;
+ DirtyPageRecord dirty_pages;
+
+ qemu_mutex_lock_iothread();
+ memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+
+ /*
+ * 1'round of log sync may return all 1 bits with
+ * KVM_DIRTY_LOG_INITIALLY_SET enable
+ * skip it unconditionally and start dirty tracking
+ * from 2'round of log sync
+ */
+ memory_global_dirty_log_sync();
+
+ /*
+ * reset page protect manually and unconditionally.
+ * this make sure kvm dirty log be cleared if
+ * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
+ */
+ dirtyrate_manual_reset_protect();
+ qemu_mutex_unlock_iothread();
+
+ record_dirtypages_bitmap(&dirty_pages, true);
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ DirtyStat.start_time = start_time / 1000;
+
+ msec = config.sample_period_seconds * 1000;
+ msec = set_sample_page_period(msec, start_time);
+ DirtyStat.calc_time = msec / 1000;
+
+ /*
+ * dirtyrate_global_dirty_log_stop do two things.
+ * 1. fetch dirty bitmap from kvm
+ * 2. stop dirty tracking
+ */
+ dirtyrate_global_dirty_log_stop();
+
+ record_dirtypages_bitmap(&dirty_pages, false);
+
+ do_calculate_dirtyrate_bitmap(dirty_pages);
+}
+
+static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
+{
+ CPUState *cpu;
+ int64_t msec = 0;
+ int64_t start_time;
+ uint64_t dirtyrate = 0;
+ uint64_t dirtyrate_sum = 0;
+ DirtyPageRecord *dirty_pages;
+ int nvcpu = 0;
+ int i = 0;
+
+ CPU_FOREACH(cpu) {
+ nvcpu++;
+ }
+
+ dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);
+
+ DirtyStat.dirty_ring.nvcpu = nvcpu;
+ DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
+
+ dirtyrate_global_dirty_log_start();
+
+ CPU_FOREACH(cpu) {
+ record_dirtypages(dirty_pages, cpu, true);
+ }
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ DirtyStat.start_time = start_time / 1000;
+
+ msec = config.sample_period_seconds * 1000;
+ msec = set_sample_page_period(msec, start_time);
+ DirtyStat.calc_time = msec / 1000;
+
+ dirtyrate_global_dirty_log_stop();
+
+ CPU_FOREACH(cpu) {
+ record_dirtypages(dirty_pages, cpu, false);
+ }
+
+ for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
+ dirtyrate = do_calculate_dirtyrate_vcpu(dirty_pages[i]);
+ trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
+
+ DirtyStat.dirty_ring.rates[i].id = i;
+ DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
+ dirtyrate_sum += dirtyrate;
+ }
+
+ DirtyStat.dirty_rate = dirtyrate_sum;
+ free(dirty_pages);
+}
+
+static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
{
struct RamblockDirtyInfo *block_dinfo = NULL;
int block_count = 0;
free_ramblock_dirty_info(block_dinfo, block_count);
}
+static void calculate_dirtyrate(struct DirtyRateConfig config)
+{
+ if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
+ calculate_dirtyrate_dirty_bitmap(config);
+ } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ calculate_dirtyrate_dirty_ring(config);
+ } else {
+ calculate_dirtyrate_sample_vm(config);
+ }
+
+ trace_dirtyrate_calculate(DirtyStat.dirty_rate);
+}
+
void *get_dirtyrate_thread(void *arg)
{
struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
int ret;
- int64_t start_time;
rcu_register_thread();
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
return NULL;
}
- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
- init_dirtyrate_stat(start_time, config);
-
calculate_dirtyrate(config);
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
return NULL;
}
-void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
- int64_t sample_pages, Error **errp)
+void qmp_calc_dirty_rate(int64_t calc_time,
+ bool has_sample_pages,
+ int64_t sample_pages,
+ bool has_mode,
+ DirtyRateMeasureMode mode,
+ Error **errp)
{
static struct DirtyRateConfig config;
QemuThread thread;
int ret;
+ int64_t start_time;
/*
* If the dirty rate is already being measured, don't attempt to start.
return;
}
+ if (!has_mode) {
+ mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+ }
+
+ if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
+ error_setg(errp, "either sample-pages or dirty-ring can be specified.");
+ return;
+ }
+
if (has_sample_pages) {
if (!is_sample_pages_valid(sample_pages)) {
error_setg(errp, "sample-pages is out of range[%d, %d].",
sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
}
+ /*
+ * dirty ring mode only works when kvm dirty ring is enabled.
+ * on the contrary, dirty bitmap mode is not.
+ */
+ if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
+ !kvm_dirty_ring_enabled()) ||
+ ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
+ kvm_dirty_ring_enabled())) {
+ error_setg(errp, "mode %s is not enabled, use other method instead.",
+ DirtyRateMeasureMode_str(mode));
+ return;
+ }
+
/*
* Init calculation state as unstarted.
*/
config.sample_period_seconds = calc_time;
config.sample_pages_per_gigabytes = sample_pages;
- config.mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
+ config.mode = mode;
+
+ cleanup_dirtyrate_stat(config);
+
+ /*
+ * update dirty rate mode so that we can figure out what mode has
+ * been used in last calculation
+ **/
+ dirtyrate_mode = mode;
+
+ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
+ init_dirtyrate_stat(start_time, config);
+
qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
(void *)&config, QEMU_THREAD_DETACHED);
}
info->sample_pages);
monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
info->calc_time);
+ monitor_printf(mon, "Mode: %s\n",
+ DirtyRateMeasureMode_str(info->mode));
monitor_printf(mon, "Dirty rate: ");
if (info->has_dirty_rate) {
monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
+ if (info->has_vcpu_dirty_rate) {
+ DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
+ for (rate = head; rate != NULL; rate = rate->next) {
+ monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
+ " (MB/s)\n", rate->value->id,
+ rate->value->dirty_rate);
+ }
+ }
} else {
monitor_printf(mon, "(not ready)\n");
}
+
+ qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
g_free(info);
}
int64_t sec = qdict_get_try_int(qdict, "second", 0);
int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
bool has_sample_pages = (sample_pages != -1);
+ bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
+ bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
+ DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
Error *err = NULL;
if (!sec) {
return;
}
- qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, &err);
+ if (dirty_ring && dirty_bitmap) {
+ monitor_printf(mon, "Either dirty ring or dirty bitmap "
+ "can be specified!\n");
+ return;
+ }
+
+ if (dirty_bitmap) {
+ mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
+ } else if (dirty_ring) {
+ mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
+ }
+
+ qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, true,
+ mode, &err);
if (err) {
hmp_handle_error(mon, err);
return;