X-Git-Url: https://git.proxmox.com/?p=qemu.git;a=blobdiff_plain;f=cpus.c;h=01d128d7aff71d98b6acf195a7ada6b69d2d5a35;hp=d74cc117b38b947961db8ea5611de09230621d54;hb=HEAD;hpb=9889e04ac193cad7fa0526573ce0cc752dcabb99 diff --git a/cpus.c b/cpus.c index d74cc117b..01d128d7a 100644 --- a/cpus.c +++ b/cpus.c @@ -37,6 +37,7 @@ #include "sysemu/qtest.h" #include "qemu/main-loop.h" #include "qemu/bitmap.h" +#include "qemu/seqlock.h" #ifndef _WIN32 #include "qemu/compatfd.h" @@ -86,7 +87,7 @@ static bool all_cpu_threads_idle(void) { CPUState *cpu; - for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { if (!cpu_thread_is_idle(cpu)) { return false; } @@ -97,21 +98,32 @@ static bool all_cpu_threads_idle(void) /***********************************************************/ /* guest cycle counter */ +/* Protected by TimersState seqlock */ + +/* Compensate for varying guest execution speed. */ +static int64_t qemu_icount_bias; +static int64_t vm_clock_warp_start; /* Conversion factor from emulated instructions to virtual clock ticks. */ static int icount_time_shift; /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ #define MAX_ICOUNT_SHIFT 10 -/* Compensate for varying guest execution speed. */ -static int64_t qemu_icount_bias; + +/* Only written by TCG thread */ +static int64_t qemu_icount; + static QEMUTimer *icount_rt_timer; static QEMUTimer *icount_vm_timer; static QEMUTimer *icount_warp_timer; -static int64_t vm_clock_warp_start; -static int64_t qemu_icount; typedef struct TimersState { + /* Protected by BQL. */ int64_t cpu_ticks_prev; int64_t cpu_ticks_offset; + + /* cpu_clock_offset can be read out of BQL, so protect it with + * this lock. + */ + QemuSeqLock vm_clock_seqlock; int64_t cpu_clock_offset; int32_t cpu_ticks_enabled; int64_t dummy; @@ -120,7 +132,7 @@ typedef struct TimersState { static TimersState timers_state; /* Return the virtual CPU time, based on the instruction counter. */ -int64_t cpu_get_icount(void) +static int64_t cpu_get_icount_locked(void) { int64_t icount; CPUState *cpu = current_cpu; @@ -136,58 +148,100 @@ int64_t cpu_get_icount(void) return qemu_icount_bias + (icount << icount_time_shift); } +int64_t cpu_get_icount(void) +{ + int64_t icount; + unsigned start; + + do { + start = seqlock_read_begin(&timers_state.vm_clock_seqlock); + icount = cpu_get_icount_locked(); + } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); + + return icount; +} + /* return the host CPU cycle counter and handle stop/restart */ +/* Caller must hold the BQL */ int64_t cpu_get_ticks(void) { + int64_t ticks; + if (use_icount) { return cpu_get_icount(); } - if (!timers_state.cpu_ticks_enabled) { - return timers_state.cpu_ticks_offset; - } else { - int64_t ticks; - ticks = cpu_get_real_ticks(); - if (timers_state.cpu_ticks_prev > ticks) { - /* Note: non increasing ticks may happen if the host uses - software suspend */ - timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; - } - timers_state.cpu_ticks_prev = ticks; - return ticks + timers_state.cpu_ticks_offset; + + ticks = timers_state.cpu_ticks_offset; + if (timers_state.cpu_ticks_enabled) { + ticks += cpu_get_real_ticks(); } + + if (timers_state.cpu_ticks_prev > ticks) { + /* Note: non increasing ticks may happen if the host uses + software suspend */ + timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; + ticks = timers_state.cpu_ticks_prev; + } + + timers_state.cpu_ticks_prev = ticks; + return ticks; +} + +static int64_t cpu_get_clock_locked(void) +{ + int64_t ticks; + + ticks = timers_state.cpu_clock_offset; + if (timers_state.cpu_ticks_enabled) { + ticks += get_clock(); + } + + return ticks; } /* return the host CPU monotonic timer and handle stop/restart */ int64_t cpu_get_clock(void) { int64_t ti; - if (!timers_state.cpu_ticks_enabled) { - return timers_state.cpu_clock_offset; - } else { - ti = get_clock(); - return ti + timers_state.cpu_clock_offset; - } + unsigned start; + + do { + start = seqlock_read_begin(&timers_state.vm_clock_seqlock); + ti = cpu_get_clock_locked(); + } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); + + return ti; } -/* enable cpu_get_ticks() */ +/* enable cpu_get_ticks() + * Caller must hold BQL which server as mutex for vm_clock_seqlock. + */ void cpu_enable_ticks(void) { + /* Here, the really thing protected by seqlock is cpu_clock_offset. */ + seqlock_write_lock(&timers_state.vm_clock_seqlock); if (!timers_state.cpu_ticks_enabled) { timers_state.cpu_ticks_offset -= cpu_get_real_ticks(); timers_state.cpu_clock_offset -= get_clock(); timers_state.cpu_ticks_enabled = 1; } + seqlock_write_unlock(&timers_state.vm_clock_seqlock); } /* disable cpu_get_ticks() : the clock is stopped. You must not call - cpu_get_ticks() after that. */ + * cpu_get_ticks() after that. + * Caller must hold BQL which server as mutex for vm_clock_seqlock. + */ void cpu_disable_ticks(void) { + /* Here, the really thing protected by seqlock is cpu_clock_offset. */ + seqlock_write_lock(&timers_state.vm_clock_seqlock); if (timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset = cpu_get_ticks(); - timers_state.cpu_clock_offset = cpu_get_clock(); + timers_state.cpu_ticks_offset += cpu_get_real_ticks(); + timers_state.cpu_clock_offset = cpu_get_clock_locked(); timers_state.cpu_ticks_enabled = 0; } + seqlock_write_unlock(&timers_state.vm_clock_seqlock); } /* Correlation between real and virtual time is always going to be @@ -201,13 +255,19 @@ static void icount_adjust(void) int64_t cur_time; int64_t cur_icount; int64_t delta; + + /* Protected by TimersState mutex. */ static int64_t last_delta; + /* If the VM is not running, then do nothing. */ if (!runstate_is_running()) { return; } - cur_time = cpu_get_clock(); - cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + seqlock_write_lock(&timers_state.vm_clock_seqlock); + cur_time = cpu_get_clock_locked(); + cur_icount = cpu_get_icount_locked(); + delta = cur_icount - cur_time; /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ if (delta > 0 @@ -224,6 +284,7 @@ static void icount_adjust(void) } last_delta = delta; qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift); + seqlock_write_unlock(&timers_state.vm_clock_seqlock); } static void icount_adjust_rt(void *opaque) @@ -248,30 +309,37 @@ static int64_t qemu_icount_round(int64_t count) static void icount_warp_rt(void *opaque) { - if (vm_clock_warp_start == -1) { + /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start + * changes from -1 to another value, so the race here is okay. + */ + if (atomic_read(&vm_clock_warp_start) == -1) { return; } + seqlock_write_lock(&timers_state.vm_clock_seqlock); if (runstate_is_running()) { int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - int64_t warp_delta = clock - vm_clock_warp_start; - if (use_icount == 1) { - qemu_icount_bias += warp_delta; - } else { + int64_t warp_delta; + + warp_delta = clock - vm_clock_warp_start; + if (use_icount == 2) { /* * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too * far ahead of real time. */ - int64_t cur_time = cpu_get_clock(); - int64_t cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + int64_t cur_time = cpu_get_clock_locked(); + int64_t cur_icount = cpu_get_icount_locked(); int64_t delta = cur_time - cur_icount; - qemu_icount_bias += MIN(warp_delta, delta); - } - if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + warp_delta = MIN(warp_delta, delta); } + qemu_icount_bias += warp_delta; } vm_clock_warp_start = -1; + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + + if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { + qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + } } void qtest_clock_warp(int64_t dest) @@ -281,7 +349,10 @@ void qtest_clock_warp(int64_t dest) while (clock < dest) { int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); int64_t warp = MIN(dest - clock, deadline); + seqlock_write_lock(&timers_state.vm_clock_seqlock); qemu_icount_bias += warp; + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); } @@ -290,6 +361,7 @@ void qtest_clock_warp(int64_t dest) void qemu_clock_warp(QEMUClockType type) { + int64_t clock; int64_t deadline; /* @@ -309,8 +381,8 @@ void qemu_clock_warp(QEMUClockType type) * the earliest QEMU_CLOCK_VIRTUAL timer. */ icount_warp_rt(NULL); - if (!all_cpu_threads_idle() || !qemu_clock_has_timers(QEMU_CLOCK_VIRTUAL)) { - timer_del(icount_warp_timer); + timer_del(icount_warp_timer); + if (!all_cpu_threads_idle()) { return; } @@ -319,17 +391,11 @@ void qemu_clock_warp(QEMUClockType type) return; } - vm_clock_warp_start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); /* We want to use the earliest deadline from ALL vm_clocks */ + clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); - - /* Maintain prior (possibly buggy) behaviour where if no deadline - * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than - * INT32_MAX nanoseconds ahead, we still use INT32_MAX - * nanoseconds. - */ - if ((deadline < 0) || (deadline > INT32_MAX)) { - deadline = INT32_MAX; + if (deadline < 0) { + return; } if (deadline > 0) { @@ -350,7 +416,12 @@ void qemu_clock_warp(QEMUClockType type) * you will not be sending network packets continuously instead of * every 100ms. */ - timer_mod(icount_warp_timer, vm_clock_warp_start + deadline); + seqlock_write_lock(&timers_state.vm_clock_seqlock); + if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) { + vm_clock_warp_start = clock; + } + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + timer_mod_anticipate(icount_warp_timer, clock + deadline); } else if (deadline == 0) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } @@ -371,6 +442,7 @@ static const VMStateDescription vmstate_timers = { void configure_icount(const char *option) { + seqlock_init(&timers_state.vm_clock_seqlock, NULL); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); if (!option) { return; @@ -416,7 +488,7 @@ void hw_error(const char *fmt, ...) fprintf(stderr, "qemu: hardware error: "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); - for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU); } @@ -428,7 +500,7 @@ void cpu_synchronize_all_states(void) { CPUState *cpu; - for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { cpu_synchronize_state(cpu); } } @@ -437,7 +509,7 @@ void cpu_synchronize_all_post_reset(void) { CPUState *cpu; - for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { cpu_synchronize_post_reset(cpu); } } @@ -446,7 +518,7 @@ void cpu_synchronize_all_post_init(void) { CPUState *cpu; - for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { cpu_synchronize_post_init(cpu); } } @@ -760,7 +832,7 @@ static void qemu_tcg_wait_io_event(void) qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex); } - for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { qemu_wait_io_event_common(cpu); } } @@ -854,12 +926,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg) static void tcg_exec_all(void); -static void tcg_signal_cpu_creation(CPUState *cpu, void *data) -{ - cpu->thread_id = qemu_get_thread_id(); - cpu->created = true; -} - static void *qemu_tcg_cpu_thread_fn(void *arg) { CPUState *cpu = arg; @@ -868,15 +934,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) qemu_thread_get_self(cpu->thread); qemu_mutex_lock(&qemu_global_mutex); - qemu_for_each_cpu(tcg_signal_cpu_creation, NULL); + CPU_FOREACH(cpu) { + cpu->thread_id = qemu_get_thread_id(); + cpu->created = true; + } qemu_cond_signal(&qemu_cpu_cond); /* wait for initial kick-off after machine start */ - while (first_cpu->stopped) { + while (QTAILQ_FIRST(&cpus)->stopped) { qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex); /* process any pending work */ - for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { qemu_wait_io_event_common(cpu); } } @@ -991,13 +1060,12 @@ void qemu_mutex_unlock_iothread(void) static int all_vcpus_paused(void) { - CPUState *cpu = first_cpu; + CPUState *cpu; - while (cpu) { + CPU_FOREACH(cpu) { if (!cpu->stopped) { return 0; } - cpu = cpu->next_cpu; } return 1; @@ -1005,23 +1073,20 @@ static int all_vcpus_paused(void) void pause_all_vcpus(void) { - CPUState *cpu = first_cpu; + CPUState *cpu; qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); - while (cpu) { + CPU_FOREACH(cpu) { cpu->stop = true; qemu_cpu_kick(cpu); - cpu = cpu->next_cpu; } if (qemu_in_vcpu_thread()) { cpu_stop_current(); if (!kvm_enabled()) { - cpu = first_cpu; - while (cpu) { + CPU_FOREACH(cpu) { cpu->stop = false; cpu->stopped = true; - cpu = cpu->next_cpu; } return; } @@ -1029,10 +1094,8 @@ void pause_all_vcpus(void) while (!all_vcpus_paused()) { qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); - cpu = first_cpu; - while (cpu) { + CPU_FOREACH(cpu) { qemu_cpu_kick(cpu); - cpu = cpu->next_cpu; } } } @@ -1046,12 +1109,11 @@ void cpu_resume(CPUState *cpu) void resume_all_vcpus(void) { - CPUState *cpu = first_cpu; + CPUState *cpu; qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); - while (cpu) { + CPU_FOREACH(cpu) { cpu_resume(cpu); - cpu = cpu->next_cpu; } } @@ -1215,7 +1277,7 @@ static void tcg_exec_all(void) if (next_cpu == NULL) { next_cpu = first_cpu; } - for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) { + for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) { CPUState *cpu = next_cpu; CPUArchState *env = cpu->env_ptr; @@ -1240,7 +1302,7 @@ void set_numa_modes(void) CPUState *cpu; int i; - for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { for (i = 0; i < nb_numa_nodes; i++) { if (test_bit(cpu->cpu_index, node_cpumask[i])) { cpu->numa_node = i; @@ -1262,7 +1324,7 @@ CpuInfoList *qmp_query_cpus(Error **errp) CpuInfoList *head = NULL, *cur_item = NULL; CPUState *cpu; - for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) { + CPU_FOREACH(cpu) { CpuInfoList *info; #if defined(TARGET_I386) X86CPU *x86_cpu = X86_CPU(cpu); @@ -1343,7 +1405,10 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename, l = sizeof(buf); if (l > size) l = size; - cpu_memory_rw_debug(cpu, addr, buf, l, 0); + if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) { + error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr); + goto exit; + } if (fwrite(buf, 1, l, f) != l) { error_set(errp, QERR_IO_ERROR); goto exit; @@ -1391,7 +1456,7 @@ void qmp_inject_nmi(Error **errp) #if defined(TARGET_I386) CPUState *cs; - for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) { + CPU_FOREACH(cs) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; @@ -1405,7 +1470,7 @@ void qmp_inject_nmi(Error **errp) CPUState *cs; S390CPU *cpu; - for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) { + CPU_FOREACH(cs) { cpu = S390_CPU(cs); if (cpu->env.cpu_num == monitor_get_cpu_index()) { if (s390_cpu_restart(S390_CPU(cs)) == -1) {