[mirror_qemu.git] / accel / tcg / icount-common.c

/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "sysemu/cpus.h"
#include "sysemu/qtest.h"
#include "qemu/main-loop.h"
#include "qemu/option.h"
#include "qemu/seqlock.h"
#include "sysemu/replay.h"
#include "sysemu/runstate.h"
#include "hw/core/cpu.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/cpu-throttle.h"
#include "sysemu/cpu-timers-internal.h"

/*
 * ICOUNT: Instruction Counter
 *
 * this module is split off from cpu-timers because the icount part
 * is TCG-specific, and does not need to be built for other accels.
 */
static bool icount_sleep = true;
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
#define MAX_ICOUNT_SHIFT 10

/*
 * 0 = Do not count executed instructions.
 * 1 = Fixed conversion of insn to ns via "shift" option
 * 2 = Runtime adaptive algorithm to compute shift
 */
int use_icount;

static void icount_enable_precise(void)
{
    use_icount = 1;
}

static void icount_enable_adaptive(void)
{
    use_icount = 2;
}

/*
 * The current number of executed instructions is based on what we
 * originally budgeted minus the current state of the decrementing
 * icount counters in extra/u16.low.
 */
static int64_t icount_get_executed(CPUState *cpu)
{
    return (cpu->icount_budget -
            (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
}

/*
 * Update the global shared timer_state.qemu_icount to take into
 * account executed instructions. This is done by the TCG vCPU
 * thread so the main-loop can see time has moved forward.
 */
static void icount_update_locked(CPUState *cpu)
{
    int64_t executed = icount_get_executed(cpu);
    cpu->icount_budget -= executed;

    qatomic_set_i64(&timers_state.qemu_icount,
                    timers_state.qemu_icount + executed);
}

/*
 * Update the global shared timer_state.qemu_icount to take into
 * account executed instructions. This is done by the TCG vCPU
 * thread so the main-loop can see time has moved forward.
 */
void icount_update(CPUState *cpu)
{
    seqlock_write_lock(&timers_state.vm_clock_seqlock,
                       &timers_state.vm_clock_lock);
    icount_update_locked(cpu);
    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                         &timers_state.vm_clock_lock);
}

static int64_t icount_get_raw_locked(void)
{
    CPUState *cpu = current_cpu;

    if (cpu && cpu->running) {
        if (!cpu->neg.can_do_io) {
            error_report("Bad icount read");
            exit(1);
        }
        /* Take into account what has run */
        icount_update_locked(cpu);
    }
    /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
    return qatomic_read_i64(&timers_state.qemu_icount);
}

static int64_t icount_get_locked(void)
{
    int64_t icount = icount_get_raw_locked();
    return qatomic_read_i64(&timers_state.qemu_icount_bias) +
        icount_to_ns(icount);
}

int64_t icount_get_raw(void)
{
    int64_t icount;
    unsigned start;

    do {
        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
        icount = icount_get_raw_locked();
    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));

    return icount;
}

/* Return the virtual CPU time, based on the instruction counter.  */
int64_t icount_get(void)
{
    int64_t icount;
    unsigned start;

    do {
        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
        icount = icount_get_locked();
    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));

    return icount;
}

int64_t icount_to_ns(int64_t icount)
{
    return icount << qatomic_read(&timers_state.icount_time_shift);
}

/*
 * Correlation between real and virtual time is always going to be
 * fairly approximate, so ignore small variation.
 * When the guest is idle real and virtual time will be aligned in
 * the IO wait loop.
 */
#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)

static void icount_adjust(void)
{
    int64_t cur_time;
    int64_t cur_icount;
    int64_t delta;

    /* If the VM is not running, then do nothing.  */
    if (!runstate_is_running()) {
        return;
    }

    seqlock_write_lock(&timers_state.vm_clock_seqlock,
                       &timers_state.vm_clock_lock);
    cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
                                   cpu_get_clock_locked());
    cur_icount = icount_get_locked();

    delta = cur_icount - cur_time;
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
    if (delta > 0
        && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
        && timers_state.icount_time_shift > 0) {
        /* The guest is getting too far ahead.  Slow time down.  */
        qatomic_set(&timers_state.icount_time_shift,
                    timers_state.icount_time_shift - 1);
    }
    if (delta < 0
        && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
        && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
        /* The guest is getting too far behind.  Speed time up.  */
        qatomic_set(&timers_state.icount_time_shift,
                    timers_state.icount_time_shift + 1);
    }
    timers_state.last_delta = delta;
    qatomic_set_i64(&timers_state.qemu_icount_bias,
                    cur_icount - (timers_state.qemu_icount
                                  << timers_state.icount_time_shift));
    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                         &timers_state.vm_clock_lock);
}

static void icount_adjust_rt(void *opaque)
{
    timer_mod(timers_state.icount_rt_timer,
              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
    icount_adjust();
}

static void icount_adjust_vm(void *opaque)
{
    timer_mod(timers_state.icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                   NANOSECONDS_PER_SECOND / 10);
    icount_adjust();
}

int64_t icount_round(int64_t count)
{
    int shift = qatomic_read(&timers_state.icount_time_shift);
    return (count + (1 << shift) - 1) >> shift;
}

static void icount_warp_rt(void)
{
    unsigned seq;
    int64_t warp_start;

    /*
     * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
     * changes from -1 to another value, so the race here is okay.
     */
    do {
        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
        warp_start = timers_state.vm_clock_warp_start;
    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));

    if (warp_start == -1) {
        return;
    }

    seqlock_write_lock(&timers_state.vm_clock_seqlock,
                       &timers_state.vm_clock_lock);
    if (runstate_is_running()) {
        int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
                                            cpu_get_clock_locked());
        int64_t warp_delta;

        warp_delta = clock - timers_state.vm_clock_warp_start;
        if (icount_enabled() == 2) {
            /*
             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
             * ahead of real time (it might already be ahead so careful not
             * to go backwards).
             */
            int64_t cur_icount = icount_get_locked();
            int64_t delta = clock - cur_icount;

            if (delta < 0) {
                delta = 0;
            }
            warp_delta = MIN(warp_delta, delta);
        }
        qatomic_set_i64(&timers_state.qemu_icount_bias,
                        timers_state.qemu_icount_bias + warp_delta);
    }
    timers_state.vm_clock_warp_start = -1;
    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                       &timers_state.vm_clock_lock);

    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
    }
}

static void icount_timer_cb(void *opaque)
{
    /*
     * No need for a checkpoint because the timer already synchronizes
     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
     */
    icount_warp_rt();
}

void icount_start_warp_timer(void)
{
    int64_t clock;
    int64_t deadline;

    assert(icount_enabled());

    /*
     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
     * do not fire, so computing the deadline does not make sense.
     */
    if (!runstate_is_running()) {
        return;
    }

    if (replay_mode != REPLAY_MODE_PLAY) {
        if (!all_cpu_threads_idle()) {
            return;
        }

        if (qtest_enabled()) {
            /* When testing, qtest commands advance icount.  */
            return;
        }

        replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
    } else {
        /* warp clock deterministically in record/replay mode */
        if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
            /*
             * vCPU is sleeping and warp can't be started.
             * It is probably a race condition: notification sent
             * to vCPU was processed in advance and vCPU went to sleep.
             * Therefore we have to wake it up for doing something.
             */
            if (replay_has_event()) {
                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
            }
            return;
        }
    }

    /* We want to use the earliest deadline from ALL vm_clocks */
    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
                                          ~QEMU_TIMER_ATTR_EXTERNAL);
    if (deadline < 0) {
        static bool notified;
        if (!icount_sleep && !notified) {
            warn_report("icount sleep disabled and no active timers");
            notified = true;
        }
        return;
    }

    if (deadline > 0) {
        /*
         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
         * sleep.  Otherwise, the CPU might be waiting for a future timer
         * interrupt to wake it up, but the interrupt never comes because
         * the vCPU isn't running any insns and thus doesn't advance the
         * QEMU_CLOCK_VIRTUAL.
         */
        if (!icount_sleep) {
            /*
             * We never let VCPUs sleep in no sleep icount mode.
             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
             * It is useful when we want a deterministic execution time,
             * isolated from host latencies.
             */
            seqlock_write_lock(&timers_state.vm_clock_seqlock,
                               &timers_state.vm_clock_lock);
            qatomic_set_i64(&timers_state.qemu_icount_bias,
                            timers_state.qemu_icount_bias + deadline);
            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                                 &timers_state.vm_clock_lock);
            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
        } else {
            /*
             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
             * "real" time, (related to the time left until the next event) has
             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
             * This avoids that the warps are visible externally; for example,
             * you will not be sending network packets continuously instead of
             * every 100ms.
             */
            seqlock_write_lock(&timers_state.vm_clock_seqlock,
                               &timers_state.vm_clock_lock);
            if (timers_state.vm_clock_warp_start == -1
                || timers_state.vm_clock_warp_start > clock) {
                timers_state.vm_clock_warp_start = clock;
            }
            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                                 &timers_state.vm_clock_lock);
            timer_mod_anticipate(timers_state.icount_warp_timer,
                                 clock + deadline);
        }
    } else if (deadline == 0) {
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
    }
}

void icount_account_warp_timer(void)
{
    if (!icount_sleep) {
        return;
    }

    /*
     * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
     * do not fire, so computing the deadline does not make sense.
     */
    if (!runstate_is_running()) {
        return;
    }

    replay_async_events();

    /* warp clock deterministically in record/replay mode */
    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
        return;
    }

    timer_del(timers_state.icount_warp_timer);
    icount_warp_rt();
}

void icount_configure(QemuOpts *opts, Error **errp)
{
    const char *option = qemu_opt_get(opts, "shift");
    bool sleep = qemu_opt_get_bool(opts, "sleep", true);
    bool align = qemu_opt_get_bool(opts, "align", false);
    long time_shift = -1;

    if (!option) {
        if (qemu_opt_get(opts, "align") != NULL) {
            error_setg(errp, "Please specify shift option when using align");
        }
        return;
    }

    if (align && !sleep) {
        error_setg(errp, "align=on and sleep=off are incompatible");
        return;
    }

    if (strcmp(option, "auto") != 0) {
        if (qemu_strtol(option, NULL, 0, &time_shift) < 0
            || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
            error_setg(errp, "icount: Invalid shift value");
            return;
        }
    } else if (icount_align_option) {
        error_setg(errp, "shift=auto and align=on are incompatible");
        return;
    } else if (!icount_sleep) {
        error_setg(errp, "shift=auto and sleep=off are incompatible");
        return;
    }

    icount_sleep = sleep;
    if (icount_sleep) {
        timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                         icount_timer_cb, NULL);
    }

    icount_align_option = align;

    if (time_shift >= 0) {
        timers_state.icount_time_shift = time_shift;
        icount_enable_precise();
        return;
    }

    icount_enable_adaptive();

    /*
     * 125MIPS seems a reasonable initial guess at the guest speed.
     * It will be corrected fairly quickly anyway.
     */
    timers_state.icount_time_shift = 3;

    /*
     * Have both realtime and virtual time triggers for speed adjustment.
     * The realtime trigger catches emulated time passing too slowly,
     * the virtual time trigger catches emulated time passing too fast.
     * Realtime triggers occur even when idle, so use them less frequently
     * than VM triggers.
     */
    timers_state.vm_clock_warp_start = -1;
    timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
                                   icount_adjust_rt, NULL);
    timer_mod(timers_state.icount_rt_timer,
                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
    timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                        icount_adjust_vm, NULL);
    timer_mod(timers_state.icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                   NANOSECONDS_PER_SECOND / 10);
}

void icount_notify_exit(void)
{
    if (icount_enabled() && current_cpu) {
        qemu_cpu_kick(current_cpu);
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
    }
}
Commit	Line	Data
740b1759 CF	1	/*
	2	* QEMU System Emulator
	3	*
	4	* Copyright (c) 2003-2008 Fabrice Bellard
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a copy
	7	* of this software and associated documentation files (the "Software"), to deal
	8	* in the Software without restriction, including without limitation the rights
	9	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	10	* copies of the Software, and to permit persons to whom the Software is
	11	* furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	22	* THE SOFTWARE.
	23	*/
	24
	25	#include "qemu/osdep.h"
740b1759 CF	26	#include "qemu/cutils.h"
	27	#include "migration/vmstate.h"
	28	#include "qapi/error.h"
	29	#include "qemu/error-report.h"
740b1759 CF	30	#include "sysemu/cpus.h"
	31	#include "sysemu/qtest.h"
	32	#include "qemu/main-loop.h"
	33	#include "qemu/option.h"
	34	#include "qemu/seqlock.h"
	35	#include "sysemu/replay.h"
	36	#include "sysemu/runstate.h"
	37	#include "hw/core/cpu.h"
	38	#include "sysemu/cpu-timers.h"
	39	#include "sysemu/cpu-throttle.h"
8d7f2e76	40	#include "sysemu/cpu-timers-internal.h"
740b1759 CF	41
	42	/*
	43	* ICOUNT: Instruction Counter
	44	*
	45	* this module is split off from cpu-timers because the icount part
	46	* is TCG-specific, and does not need to be built for other accels.
	47	*/
	48	static bool icount_sleep = true;
	49	/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
	50	#define MAX_ICOUNT_SHIFT 10
	51
	52	/*
	53	* 0 = Do not count executed instructions.
	54	* 1 = Fixed conversion of insn to ns via "shift" option
	55	* 2 = Runtime adaptive algorithm to compute shift
	56	*/
	57	int use_icount;
	58
	59	static void icount_enable_precise(void)
	60	{
	61	use_icount = 1;
	62	}
	63
	64	static void icount_enable_adaptive(void)
	65	{
	66	use_icount = 2;
	67	}
	68
	69	/*
	70	* The current number of executed instructions is based on what we
	71	* originally budgeted minus the current state of the decrementing
	72	* icount counters in extra/u16.low.
	73	*/
8191d368	74	static int64_t icount_get_executed(CPUState *cpu)
740b1759 CF	75	{
740b1759 CF	76	return (cpu->icount_budget -
a953b5fa	77	(cpu->neg.icount_decr.u16.low + cpu->icount_extra));
740b1759 CF	78	}
	79
	80	/*
	81	* Update the global shared timer_state.qemu_icount to take into
	82	* account executed instructions. This is done by the TCG vCPU
	83	* thread so the main-loop can see time has moved forward.
	84	*/
8191d368	85	static void icount_update_locked(CPUState *cpu)
740b1759	86	{
8191d368	87	int64_t executed = icount_get_executed(cpu);
740b1759 CF	88	cpu->icount_budget -= executed;
	89
	90	qatomic_set_i64(&timers_state.qemu_icount,
	91	timers_state.qemu_icount + executed);
	92	}
	93
	94	/*
	95	* Update the global shared timer_state.qemu_icount to take into
	96	* account executed instructions. This is done by the TCG vCPU
	97	* thread so the main-loop can see time has moved forward.
	98	*/
8191d368	99	void icount_update(CPUState *cpu)
740b1759 CF	100	{
	101	seqlock_write_lock(&timers_state.vm_clock_seqlock,
	102	&timers_state.vm_clock_lock);
8191d368	103	icount_update_locked(cpu);
740b1759 CF	104	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
	105	&timers_state.vm_clock_lock);
	106	}
	107
8191d368	108	static int64_t icount_get_raw_locked(void)
740b1759 CF	109	{
	110	CPUState *cpu = current_cpu;
	111
	112	if (cpu && cpu->running) {
464dacf6	113	if (!cpu->neg.can_do_io) {
740b1759 CF	114	error_report("Bad icount read");
	115	exit(1);
	116	}
	117	/* Take into account what has run */
8191d368	118	icount_update_locked(cpu);
740b1759 CF	119	}
	120	/* The read is protected by the seqlock, but needs atomic64 to avoid UB */
	121	return qatomic_read_i64(&timers_state.qemu_icount);
	122	}
	123
8191d368	124	static int64_t icount_get_locked(void)
740b1759	125	{
8191d368	126	int64_t icount = icount_get_raw_locked();
740b1759	127	return qatomic_read_i64(&timers_state.qemu_icount_bias) +
8191d368	128	icount_to_ns(icount);
740b1759 CF	129	}
740b1759 CF	130
8191d368	131	int64_t icount_get_raw(void)
740b1759 CF	132	{
	133	int64_t icount;
	134	unsigned start;
	135
	136	do {
	137	start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
8191d368	138	icount = icount_get_raw_locked();
740b1759 CF	139	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
	140
	141	return icount;
	142	}
	143
	144	/* Return the virtual CPU time, based on the instruction counter. */
8191d368	145	int64_t icount_get(void)
740b1759 CF	146	{
	147	int64_t icount;
	148	unsigned start;
	149
	150	do {
	151	start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
8191d368	152	icount = icount_get_locked();
740b1759 CF	153	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
	154
	155	return icount;
	156	}
	157
8191d368	158	int64_t icount_to_ns(int64_t icount)
740b1759 CF	159	{
	160	return icount << qatomic_read(&timers_state.icount_time_shift);
	161	}
	162
	163	/*
	164	* Correlation between real and virtual time is always going to be
	165	* fairly approximate, so ignore small variation.
	166	* When the guest is idle real and virtual time will be aligned in
	167	* the IO wait loop.
	168	*/
	169	#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
	170
	171	static void icount_adjust(void)
	172	{
	173	int64_t cur_time;
	174	int64_t cur_icount;
	175	int64_t delta;
	176
740b1759 CF	177	/* If the VM is not running, then do nothing. */
	178	if (!runstate_is_running()) {
	179	return;
	180	}
	181
	182	seqlock_write_lock(&timers_state.vm_clock_seqlock,
	183	&timers_state.vm_clock_lock);
	184	cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
	185	cpu_get_clock_locked());
8191d368	186	cur_icount = icount_get_locked();
740b1759 CF	187
	188	delta = cur_icount - cur_time;
	189	/* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
	190	if (delta > 0
fe852ac2	191	&& timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
740b1759 CF	192	&& timers_state.icount_time_shift > 0) {
	193	/* The guest is getting too far ahead. Slow time down. */
	194	qatomic_set(&timers_state.icount_time_shift,
	195	timers_state.icount_time_shift - 1);
	196	}
	197	if (delta < 0
fe852ac2	198	&& timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
740b1759 CF	199	&& timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
	200	/* The guest is getting too far behind. Speed time up. */
	201	qatomic_set(&timers_state.icount_time_shift,
	202	timers_state.icount_time_shift + 1);
	203	}
fe852ac2	204	timers_state.last_delta = delta;
740b1759 CF	205	qatomic_set_i64(&timers_state.qemu_icount_bias,
	206	cur_icount - (timers_state.qemu_icount
	207	<< timers_state.icount_time_shift));
	208	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
	209	&timers_state.vm_clock_lock);
	210	}
	211
	212	static void icount_adjust_rt(void *opaque)
	213	{
	214	timer_mod(timers_state.icount_rt_timer,
	215	qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
	216	icount_adjust();
	217	}
	218
	219	static void icount_adjust_vm(void *opaque)
	220	{
	221	timer_mod(timers_state.icount_vm_timer,
	222	qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
	223	NANOSECONDS_PER_SECOND / 10);
	224	icount_adjust();
	225	}
	226
8191d368	227	int64_t icount_round(int64_t count)
740b1759 CF	228	{
	229	int shift = qatomic_read(&timers_state.icount_time_shift);
	230	return (count + (1 << shift) - 1) >> shift;
	231	}
	232
	233	static void icount_warp_rt(void)
	234	{
	235	unsigned seq;
	236	int64_t warp_start;
	237
	238	/*
	239	* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
	240	* changes from -1 to another value, so the race here is okay.
	241	*/
	242	do {
	243	seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
	244	warp_start = timers_state.vm_clock_warp_start;
	245	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
	246
	247	if (warp_start == -1) {
	248	return;
	249	}
	250
	251	seqlock_write_lock(&timers_state.vm_clock_seqlock,
	252	&timers_state.vm_clock_lock);
	253	if (runstate_is_running()) {
	254	int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
	255	cpu_get_clock_locked());
	256	int64_t warp_delta;
	257
	258	warp_delta = clock - timers_state.vm_clock_warp_start;
	259	if (icount_enabled() == 2) {
	260	/*
67f85346 NP	261	* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
	262	* ahead of real time (it might already be ahead so careful not
	263	* to go backwards).
740b1759	264	*/
8191d368	265	int64_t cur_icount = icount_get_locked();
740b1759	266	int64_t delta = clock - cur_icount;
67f85346 NP	267
	268	if (delta < 0) {
	269	delta = 0;
	270	}
740b1759 CF	271	warp_delta = MIN(warp_delta, delta);
	272	}
	273	qatomic_set_i64(&timers_state.qemu_icount_bias,
	274	timers_state.qemu_icount_bias + warp_delta);
	275	}
	276	timers_state.vm_clock_warp_start = -1;
	277	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
	278	&timers_state.vm_clock_lock);
	279
	280	if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
	281	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	282	}
	283	}
	284
	285	static void icount_timer_cb(void *opaque)
	286	{
	287	/*
	288	* No need for a checkpoint because the timer already synchronizes
	289	* with CHECKPOINT_CLOCK_VIRTUAL_RT.
	290	*/
	291	icount_warp_rt();
	292	}
	293
8191d368	294	void icount_start_warp_timer(void)
740b1759 CF	295	{
	296	int64_t clock;
	297	int64_t deadline;
	298
	299	assert(icount_enabled());
	300
	301	/*
	302	* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
	303	* do not fire, so computing the deadline does not make sense.
	304	*/
	305	if (!runstate_is_running()) {
	306	return;
	307	}
	308
	309	if (replay_mode != REPLAY_MODE_PLAY) {
	310	if (!all_cpu_threads_idle()) {
	311	return;
	312	}
	313
	314	if (qtest_enabled()) {
	315	/* When testing, qtest commands advance icount. */
	316	return;
	317	}
	318
	319	replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
	320	} else {
	321	/* warp clock deterministically in record/replay mode */
	322	if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
	323	/*
	324	* vCPU is sleeping and warp can't be started.
	325	* It is probably a race condition: notification sent
	326	* to vCPU was processed in advance and vCPU went to sleep.
669dcb60	327	* Therefore we have to wake it up for doing something.
740b1759	328	*/
60618e2d	329	if (replay_has_event()) {
740b1759 CF	330	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	331	}
	332	return;
	333	}
	334	}
	335
	336	/* We want to use the earliest deadline from ALL vm_clocks */
	337	clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
	338	deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
	339	~QEMU_TIMER_ATTR_EXTERNAL);
	340	if (deadline < 0) {
	341	static bool notified;
	342	if (!icount_sleep && !notified) {
	343	warn_report("icount sleep disabled and no active timers");
	344	notified = true;
	345	}
	346	return;
	347	}
	348
	349	if (deadline > 0) {
	350	/*
	351	* Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
	352	* sleep. Otherwise, the CPU might be waiting for a future timer
	353	* interrupt to wake it up, but the interrupt never comes because
	354	* the vCPU isn't running any insns and thus doesn't advance the
	355	* QEMU_CLOCK_VIRTUAL.
	356	*/
	357	if (!icount_sleep) {
	358	/*
	359	* We never let VCPUs sleep in no sleep icount mode.
	360	* If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
	361	* to the next QEMU_CLOCK_VIRTUAL event and notify it.
	362	* It is useful when we want a deterministic execution time,
	363	* isolated from host latencies.
	364	*/
	365	seqlock_write_lock(&timers_state.vm_clock_seqlock,
	366	&timers_state.vm_clock_lock);
	367	qatomic_set_i64(&timers_state.qemu_icount_bias,
	368	timers_state.qemu_icount_bias + deadline);
	369	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
	370	&timers_state.vm_clock_lock);
	371	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	372	} else {
	373	/*
	374	* We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
	375	* "real" time, (related to the time left until the next event) has
	376	* passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
	377	* This avoids that the warps are visible externally; for example,
	378	* you will not be sending network packets continuously instead of
	379	* every 100ms.
	380	*/
	381	seqlock_write_lock(&timers_state.vm_clock_seqlock,
	382	&timers_state.vm_clock_lock);
	383	if (timers_state.vm_clock_warp_start == -1
	384	\|\| timers_state.vm_clock_warp_start > clock) {
	385	timers_state.vm_clock_warp_start = clock;
	386	}
	387	seqlock_write_unlock(&timers_state.vm_clock_seqlock,
	388	&timers_state.vm_clock_lock);
	389	timer_mod_anticipate(timers_state.icount_warp_timer,
	390	clock + deadline);
	391	}
	392	} else if (deadline == 0) {
	393	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
394	}
395	}
396
8191d368	397	void icount_account_warp_timer(void)
740b1759	398	{
45e077d7	399	if (!icount_sleep) {
740b1759 CF	400	return;
	401	}
	402
	403	/*
	404	* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
	405	* do not fire, so computing the deadline does not make sense.
	406	*/
	407	if (!runstate_is_running()) {
	408	return;
	409	}
	410
60618e2d PD	411	replay_async_events();
60618e2d PD	412
740b1759 CF	413	/* warp clock deterministically in record/replay mode */
	414	if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
	415	return;
	416	}
	417
	418	timer_del(timers_state.icount_warp_timer);
	419	icount_warp_rt();
	420	}
	421
8191d368	422	void icount_configure(QemuOpts opts, Error *errp)
740b1759 CF	423	{
	424	const char *option = qemu_opt_get(opts, "shift");
	425	bool sleep = qemu_opt_get_bool(opts, "sleep", true);
	426	bool align = qemu_opt_get_bool(opts, "align", false);
	427	long time_shift = -1;
	428
	429	if (!option) {
	430	if (qemu_opt_get(opts, "align") != NULL) {
	431	error_setg(errp, "Please specify shift option when using align");
	432	}
	433	return;
	434	}
	435
	436	if (align && !sleep) {
	437	error_setg(errp, "align=on and sleep=off are incompatible");
	438	return;
	439	}
	440
	441	if (strcmp(option, "auto") != 0) {
	442	if (qemu_strtol(option, NULL, 0, &time_shift) < 0
	443	\|\| time_shift < 0 \|\| time_shift > MAX_ICOUNT_SHIFT) {
	444	error_setg(errp, "icount: Invalid shift value");
	445	return;
	446	}
	447	} else if (icount_align_option) {
	448	error_setg(errp, "shift=auto and align=on are incompatible");
	449	return;
	450	} else if (!icount_sleep) {
	451	error_setg(errp, "shift=auto and sleep=off are incompatible");
	452	return;
	453	}
	454
	455	icount_sleep = sleep;
	456	if (icount_sleep) {
	457	timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
	458	icount_timer_cb, NULL);
	459	}
	460
	461	icount_align_option = align;
	462
	463	if (time_shift >= 0) {
	464	timers_state.icount_time_shift = time_shift;
	465	icount_enable_precise();
	466	return;
	467	}
	468
	469	icount_enable_adaptive();
	470
	471	/*
	472	* 125MIPS seems a reasonable initial guess at the guest speed.
	473	* It will be corrected fairly quickly anyway.
	474	*/
	475	timers_state.icount_time_shift = 3;
	476
	477	/*
	478	* Have both realtime and virtual time triggers for speed adjustment.
	479	* The realtime trigger catches emulated time passing too slowly,
	480	* the virtual time trigger catches emulated time passing too fast.
	481	* Realtime triggers occur even when idle, so use them less frequently
	482	* than VM triggers.
	483	*/
	484	timers_state.vm_clock_warp_start = -1;
	485	timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
	486	icount_adjust_rt, NULL);
487	timer_mod(timers_state.icount_rt_timer,
488	qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
489	timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
490	icount_adjust_vm, NULL);
491	timer_mod(timers_state.icount_vm_timer,
492	qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
493	NANOSECONDS_PER_SECOND / 10);
494	}
75bbe5e5 PD	495
	496	void icount_notify_exit(void)
	497	{
	498	if (icount_enabled() && current_cpu) {
	499	qemu_cpu_kick(current_cpu);
	500	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	501	}
	502	}