]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/icount-common.c
system/cpu-timers: Introduce ICountMode enumerator
[mirror_qemu.git] / accel / tcg / icount-common.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu/cutils.h"
27 #include "migration/vmstate.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "sysemu/cpus.h"
31 #include "sysemu/qtest.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/option.h"
34 #include "qemu/seqlock.h"
35 #include "sysemu/replay.h"
36 #include "sysemu/runstate.h"
37 #include "hw/core/cpu.h"
38 #include "sysemu/cpu-timers.h"
39 #include "sysemu/cpu-throttle.h"
40 #include "sysemu/cpu-timers-internal.h"
41
42 /*
43 * ICOUNT: Instruction Counter
44 *
45 * this module is split off from cpu-timers because the icount part
46 * is TCG-specific, and does not need to be built for other accels.
47 */
48 static bool icount_sleep = true;
49 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
50 #define MAX_ICOUNT_SHIFT 10
51
52 /* Do not count executed instructions */
53 ICountMode use_icount = ICOUNT_DISABLED;
54
55 static void icount_enable_precise(void)
56 {
57 /* Fixed conversion of insn to ns via "shift" option */
58 use_icount = ICOUNT_PRECISE;
59 }
60
61 static void icount_enable_adaptive(void)
62 {
63 /* Runtime adaptive algorithm to compute shift */
64 use_icount = ICOUNT_ADAPTATIVE;
65 }
66
67 /*
68 * The current number of executed instructions is based on what we
69 * originally budgeted minus the current state of the decrementing
70 * icount counters in extra/u16.low.
71 */
72 static int64_t icount_get_executed(CPUState *cpu)
73 {
74 return (cpu->icount_budget -
75 (cpu->neg.icount_decr.u16.low + cpu->icount_extra));
76 }
77
78 /*
79 * Update the global shared timer_state.qemu_icount to take into
80 * account executed instructions. This is done by the TCG vCPU
81 * thread so the main-loop can see time has moved forward.
82 */
83 static void icount_update_locked(CPUState *cpu)
84 {
85 int64_t executed = icount_get_executed(cpu);
86 cpu->icount_budget -= executed;
87
88 qatomic_set_i64(&timers_state.qemu_icount,
89 timers_state.qemu_icount + executed);
90 }
91
92 /*
93 * Update the global shared timer_state.qemu_icount to take into
94 * account executed instructions. This is done by the TCG vCPU
95 * thread so the main-loop can see time has moved forward.
96 */
97 void icount_update(CPUState *cpu)
98 {
99 seqlock_write_lock(&timers_state.vm_clock_seqlock,
100 &timers_state.vm_clock_lock);
101 icount_update_locked(cpu);
102 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
103 &timers_state.vm_clock_lock);
104 }
105
106 static int64_t icount_get_raw_locked(void)
107 {
108 CPUState *cpu = current_cpu;
109
110 if (cpu && cpu->running) {
111 if (!cpu->neg.can_do_io) {
112 error_report("Bad icount read");
113 exit(1);
114 }
115 /* Take into account what has run */
116 icount_update_locked(cpu);
117 }
118 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
119 return qatomic_read_i64(&timers_state.qemu_icount);
120 }
121
122 static int64_t icount_get_locked(void)
123 {
124 int64_t icount = icount_get_raw_locked();
125 return qatomic_read_i64(&timers_state.qemu_icount_bias) +
126 icount_to_ns(icount);
127 }
128
129 int64_t icount_get_raw(void)
130 {
131 int64_t icount;
132 unsigned start;
133
134 do {
135 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
136 icount = icount_get_raw_locked();
137 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
138
139 return icount;
140 }
141
142 /* Return the virtual CPU time, based on the instruction counter. */
143 int64_t icount_get(void)
144 {
145 int64_t icount;
146 unsigned start;
147
148 do {
149 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
150 icount = icount_get_locked();
151 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
152
153 return icount;
154 }
155
156 int64_t icount_to_ns(int64_t icount)
157 {
158 return icount << qatomic_read(&timers_state.icount_time_shift);
159 }
160
161 /*
162 * Correlation between real and virtual time is always going to be
163 * fairly approximate, so ignore small variation.
164 * When the guest is idle real and virtual time will be aligned in
165 * the IO wait loop.
166 */
167 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
168
169 static void icount_adjust(void)
170 {
171 int64_t cur_time;
172 int64_t cur_icount;
173 int64_t delta;
174
175 /* If the VM is not running, then do nothing. */
176 if (!runstate_is_running()) {
177 return;
178 }
179
180 seqlock_write_lock(&timers_state.vm_clock_seqlock,
181 &timers_state.vm_clock_lock);
182 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
183 cpu_get_clock_locked());
184 cur_icount = icount_get_locked();
185
186 delta = cur_icount - cur_time;
187 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
188 if (delta > 0
189 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2
190 && timers_state.icount_time_shift > 0) {
191 /* The guest is getting too far ahead. Slow time down. */
192 qatomic_set(&timers_state.icount_time_shift,
193 timers_state.icount_time_shift - 1);
194 }
195 if (delta < 0
196 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2
197 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
198 /* The guest is getting too far behind. Speed time up. */
199 qatomic_set(&timers_state.icount_time_shift,
200 timers_state.icount_time_shift + 1);
201 }
202 timers_state.last_delta = delta;
203 qatomic_set_i64(&timers_state.qemu_icount_bias,
204 cur_icount - (timers_state.qemu_icount
205 << timers_state.icount_time_shift));
206 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
207 &timers_state.vm_clock_lock);
208 }
209
210 static void icount_adjust_rt(void *opaque)
211 {
212 timer_mod(timers_state.icount_rt_timer,
213 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
214 icount_adjust();
215 }
216
217 static void icount_adjust_vm(void *opaque)
218 {
219 timer_mod(timers_state.icount_vm_timer,
220 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
221 NANOSECONDS_PER_SECOND / 10);
222 icount_adjust();
223 }
224
225 int64_t icount_round(int64_t count)
226 {
227 int shift = qatomic_read(&timers_state.icount_time_shift);
228 return (count + (1 << shift) - 1) >> shift;
229 }
230
231 static void icount_warp_rt(void)
232 {
233 unsigned seq;
234 int64_t warp_start;
235
236 /*
237 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start
238 * changes from -1 to another value, so the race here is okay.
239 */
240 do {
241 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
242 warp_start = timers_state.vm_clock_warp_start;
243 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
244
245 if (warp_start == -1) {
246 return;
247 }
248
249 seqlock_write_lock(&timers_state.vm_clock_seqlock,
250 &timers_state.vm_clock_lock);
251 if (runstate_is_running()) {
252 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
253 cpu_get_clock_locked());
254 int64_t warp_delta;
255
256 warp_delta = clock - timers_state.vm_clock_warp_start;
257 if (icount_enabled() == ICOUNT_ADAPTATIVE) {
258 /*
259 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
260 * ahead of real time (it might already be ahead so careful not
261 * to go backwards).
262 */
263 int64_t cur_icount = icount_get_locked();
264 int64_t delta = clock - cur_icount;
265
266 if (delta < 0) {
267 delta = 0;
268 }
269 warp_delta = MIN(warp_delta, delta);
270 }
271 qatomic_set_i64(&timers_state.qemu_icount_bias,
272 timers_state.qemu_icount_bias + warp_delta);
273 }
274 timers_state.vm_clock_warp_start = -1;
275 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
276 &timers_state.vm_clock_lock);
277
278 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
279 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
280 }
281 }
282
283 static void icount_timer_cb(void *opaque)
284 {
285 /*
286 * No need for a checkpoint because the timer already synchronizes
287 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
288 */
289 icount_warp_rt();
290 }
291
292 void icount_start_warp_timer(void)
293 {
294 int64_t clock;
295 int64_t deadline;
296
297 assert(icount_enabled());
298
299 /*
300 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
301 * do not fire, so computing the deadline does not make sense.
302 */
303 if (!runstate_is_running()) {
304 return;
305 }
306
307 if (replay_mode != REPLAY_MODE_PLAY) {
308 if (!all_cpu_threads_idle()) {
309 return;
310 }
311
312 if (qtest_enabled()) {
313 /* When testing, qtest commands advance icount. */
314 return;
315 }
316
317 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
318 } else {
319 /* warp clock deterministically in record/replay mode */
320 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
321 /*
322 * vCPU is sleeping and warp can't be started.
323 * It is probably a race condition: notification sent
324 * to vCPU was processed in advance and vCPU went to sleep.
325 * Therefore we have to wake it up for doing something.
326 */
327 if (replay_has_event()) {
328 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
329 }
330 return;
331 }
332 }
333
334 /* We want to use the earliest deadline from ALL vm_clocks */
335 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
336 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
337 ~QEMU_TIMER_ATTR_EXTERNAL);
338 if (deadline < 0) {
339 static bool notified;
340 if (!icount_sleep && !notified) {
341 warn_report("icount sleep disabled and no active timers");
342 notified = true;
343 }
344 return;
345 }
346
347 if (deadline > 0) {
348 /*
349 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
350 * sleep. Otherwise, the CPU might be waiting for a future timer
351 * interrupt to wake it up, but the interrupt never comes because
352 * the vCPU isn't running any insns and thus doesn't advance the
353 * QEMU_CLOCK_VIRTUAL.
354 */
355 if (!icount_sleep) {
356 /*
357 * We never let VCPUs sleep in no sleep icount mode.
358 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
359 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
360 * It is useful when we want a deterministic execution time,
361 * isolated from host latencies.
362 */
363 seqlock_write_lock(&timers_state.vm_clock_seqlock,
364 &timers_state.vm_clock_lock);
365 qatomic_set_i64(&timers_state.qemu_icount_bias,
366 timers_state.qemu_icount_bias + deadline);
367 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
368 &timers_state.vm_clock_lock);
369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
370 } else {
371 /*
372 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
373 * "real" time, (related to the time left until the next event) has
374 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
375 * This avoids that the warps are visible externally; for example,
376 * you will not be sending network packets continuously instead of
377 * every 100ms.
378 */
379 seqlock_write_lock(&timers_state.vm_clock_seqlock,
380 &timers_state.vm_clock_lock);
381 if (timers_state.vm_clock_warp_start == -1
382 || timers_state.vm_clock_warp_start > clock) {
383 timers_state.vm_clock_warp_start = clock;
384 }
385 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
386 &timers_state.vm_clock_lock);
387 timer_mod_anticipate(timers_state.icount_warp_timer,
388 clock + deadline);
389 }
390 } else if (deadline == 0) {
391 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
392 }
393 }
394
395 void icount_account_warp_timer(void)
396 {
397 if (!icount_sleep) {
398 return;
399 }
400
401 /*
402 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
403 * do not fire, so computing the deadline does not make sense.
404 */
405 if (!runstate_is_running()) {
406 return;
407 }
408
409 replay_async_events();
410
411 /* warp clock deterministically in record/replay mode */
412 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
413 return;
414 }
415
416 timer_del(timers_state.icount_warp_timer);
417 icount_warp_rt();
418 }
419
420 bool icount_configure(QemuOpts *opts, Error **errp)
421 {
422 const char *option = qemu_opt_get(opts, "shift");
423 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
424 bool align = qemu_opt_get_bool(opts, "align", false);
425 long time_shift = -1;
426
427 if (!option) {
428 if (qemu_opt_get(opts, "align") != NULL) {
429 error_setg(errp, "Please specify shift option when using align");
430 return false;
431 }
432 return true;
433 }
434
435 if (align && !sleep) {
436 error_setg(errp, "align=on and sleep=off are incompatible");
437 return false;
438 }
439
440 if (strcmp(option, "auto") != 0) {
441 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
442 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
443 error_setg(errp, "icount: Invalid shift value");
444 return false;
445 }
446 } else if (icount_align_option) {
447 error_setg(errp, "shift=auto and align=on are incompatible");
448 return false;
449 } else if (!icount_sleep) {
450 error_setg(errp, "shift=auto and sleep=off are incompatible");
451 return false;
452 }
453
454 icount_sleep = sleep;
455 if (icount_sleep) {
456 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
457 icount_timer_cb, NULL);
458 }
459
460 icount_align_option = align;
461
462 if (time_shift >= 0) {
463 timers_state.icount_time_shift = time_shift;
464 icount_enable_precise();
465 return true;
466 }
467
468 icount_enable_adaptive();
469
470 /*
471 * 125MIPS seems a reasonable initial guess at the guest speed.
472 * It will be corrected fairly quickly anyway.
473 */
474 timers_state.icount_time_shift = 3;
475
476 /*
477 * Have both realtime and virtual time triggers for speed adjustment.
478 * The realtime trigger catches emulated time passing too slowly,
479 * the virtual time trigger catches emulated time passing too fast.
480 * Realtime triggers occur even when idle, so use them less frequently
481 * than VM triggers.
482 */
483 timers_state.vm_clock_warp_start = -1;
484 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
485 icount_adjust_rt, NULL);
486 timer_mod(timers_state.icount_rt_timer,
487 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
488 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
489 icount_adjust_vm, NULL);
490 timer_mod(timers_state.icount_vm_timer,
491 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
492 NANOSECONDS_PER_SECOND / 10);
493 return true;
494 }
495
496 void icount_notify_exit(void)
497 {
498 if (icount_enabled() && current_cpu) {
499 qemu_cpu_kick(current_cpu);
500 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
501 }
502 }