]>
Commit | Line | Data |
---|---|---|
740b1759 CF |
1 | /* |
2 | * QEMU System Emulator | |
3 | * | |
4 | * Copyright (c) 2003-2008 Fabrice Bellard | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 | * of this software and associated documentation files (the "Software"), to deal | |
8 | * in the Software without restriction, including without limitation the rights | |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 | * copies of the Software, and to permit persons to whom the Software is | |
11 | * furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 | * THE SOFTWARE. | |
23 | */ | |
24 | ||
25 | #include "qemu/osdep.h" | |
740b1759 CF |
26 | #include "qemu/cutils.h" |
27 | #include "migration/vmstate.h" | |
28 | #include "qapi/error.h" | |
29 | #include "qemu/error-report.h" | |
740b1759 CF |
30 | #include "sysemu/cpus.h" |
31 | #include "sysemu/qtest.h" | |
32 | #include "qemu/main-loop.h" | |
33 | #include "qemu/option.h" | |
34 | #include "qemu/seqlock.h" | |
35 | #include "sysemu/replay.h" | |
36 | #include "sysemu/runstate.h" | |
37 | #include "hw/core/cpu.h" | |
38 | #include "sysemu/cpu-timers.h" | |
39 | #include "sysemu/cpu-throttle.h" | |
8d7f2e76 | 40 | #include "sysemu/cpu-timers-internal.h" |
740b1759 CF |
41 | |
42 | /* | |
43 | * ICOUNT: Instruction Counter | |
44 | * | |
45 | * this module is split off from cpu-timers because the icount part | |
46 | * is TCG-specific, and does not need to be built for other accels. | |
47 | */ | |
48 | static bool icount_sleep = true; | |
49 | /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ | |
50 | #define MAX_ICOUNT_SHIFT 10 | |
51 | ||
52 | /* | |
53 | * 0 = Do not count executed instructions. | |
54 | * 1 = Fixed conversion of insn to ns via "shift" option | |
55 | * 2 = Runtime adaptive algorithm to compute shift | |
56 | */ | |
57 | int use_icount; | |
58 | ||
59 | static void icount_enable_precise(void) | |
60 | { | |
61 | use_icount = 1; | |
62 | } | |
63 | ||
64 | static void icount_enable_adaptive(void) | |
65 | { | |
66 | use_icount = 2; | |
67 | } | |
68 | ||
69 | /* | |
70 | * The current number of executed instructions is based on what we | |
71 | * originally budgeted minus the current state of the decrementing | |
72 | * icount counters in extra/u16.low. | |
73 | */ | |
8191d368 | 74 | static int64_t icount_get_executed(CPUState *cpu) |
740b1759 CF |
75 | { |
76 | return (cpu->icount_budget - | |
a953b5fa | 77 | (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); |
740b1759 CF |
78 | } |
79 | ||
80 | /* | |
81 | * Update the global shared timer_state.qemu_icount to take into | |
82 | * account executed instructions. This is done by the TCG vCPU | |
83 | * thread so the main-loop can see time has moved forward. | |
84 | */ | |
8191d368 | 85 | static void icount_update_locked(CPUState *cpu) |
740b1759 | 86 | { |
8191d368 | 87 | int64_t executed = icount_get_executed(cpu); |
740b1759 CF |
88 | cpu->icount_budget -= executed; |
89 | ||
90 | qatomic_set_i64(&timers_state.qemu_icount, | |
91 | timers_state.qemu_icount + executed); | |
92 | } | |
93 | ||
94 | /* | |
95 | * Update the global shared timer_state.qemu_icount to take into | |
96 | * account executed instructions. This is done by the TCG vCPU | |
97 | * thread so the main-loop can see time has moved forward. | |
98 | */ | |
8191d368 | 99 | void icount_update(CPUState *cpu) |
740b1759 CF |
100 | { |
101 | seqlock_write_lock(&timers_state.vm_clock_seqlock, | |
102 | &timers_state.vm_clock_lock); | |
8191d368 | 103 | icount_update_locked(cpu); |
740b1759 CF |
104 | seqlock_write_unlock(&timers_state.vm_clock_seqlock, |
105 | &timers_state.vm_clock_lock); | |
106 | } | |
107 | ||
8191d368 | 108 | static int64_t icount_get_raw_locked(void) |
740b1759 CF |
109 | { |
110 | CPUState *cpu = current_cpu; | |
111 | ||
112 | if (cpu && cpu->running) { | |
464dacf6 | 113 | if (!cpu->neg.can_do_io) { |
740b1759 CF |
114 | error_report("Bad icount read"); |
115 | exit(1); | |
116 | } | |
117 | /* Take into account what has run */ | |
8191d368 | 118 | icount_update_locked(cpu); |
740b1759 CF |
119 | } |
120 | /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ | |
121 | return qatomic_read_i64(&timers_state.qemu_icount); | |
122 | } | |
123 | ||
8191d368 | 124 | static int64_t icount_get_locked(void) |
740b1759 | 125 | { |
8191d368 | 126 | int64_t icount = icount_get_raw_locked(); |
740b1759 | 127 | return qatomic_read_i64(&timers_state.qemu_icount_bias) + |
8191d368 | 128 | icount_to_ns(icount); |
740b1759 CF |
129 | } |
130 | ||
8191d368 | 131 | int64_t icount_get_raw(void) |
740b1759 CF |
132 | { |
133 | int64_t icount; | |
134 | unsigned start; | |
135 | ||
136 | do { | |
137 | start = seqlock_read_begin(&timers_state.vm_clock_seqlock); | |
8191d368 | 138 | icount = icount_get_raw_locked(); |
740b1759 CF |
139 | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); |
140 | ||
141 | return icount; | |
142 | } | |
143 | ||
144 | /* Return the virtual CPU time, based on the instruction counter. */ | |
8191d368 | 145 | int64_t icount_get(void) |
740b1759 CF |
146 | { |
147 | int64_t icount; | |
148 | unsigned start; | |
149 | ||
150 | do { | |
151 | start = seqlock_read_begin(&timers_state.vm_clock_seqlock); | |
8191d368 | 152 | icount = icount_get_locked(); |
740b1759 CF |
153 | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); |
154 | ||
155 | return icount; | |
156 | } | |
157 | ||
8191d368 | 158 | int64_t icount_to_ns(int64_t icount) |
740b1759 CF |
159 | { |
160 | return icount << qatomic_read(&timers_state.icount_time_shift); | |
161 | } | |
162 | ||
163 | /* | |
164 | * Correlation between real and virtual time is always going to be | |
165 | * fairly approximate, so ignore small variation. | |
166 | * When the guest is idle real and virtual time will be aligned in | |
167 | * the IO wait loop. | |
168 | */ | |
169 | #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) | |
170 | ||
171 | static void icount_adjust(void) | |
172 | { | |
173 | int64_t cur_time; | |
174 | int64_t cur_icount; | |
175 | int64_t delta; | |
176 | ||
740b1759 CF |
177 | /* If the VM is not running, then do nothing. */ |
178 | if (!runstate_is_running()) { | |
179 | return; | |
180 | } | |
181 | ||
182 | seqlock_write_lock(&timers_state.vm_clock_seqlock, | |
183 | &timers_state.vm_clock_lock); | |
184 | cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, | |
185 | cpu_get_clock_locked()); | |
8191d368 | 186 | cur_icount = icount_get_locked(); |
740b1759 CF |
187 | |
188 | delta = cur_icount - cur_time; | |
189 | /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ | |
190 | if (delta > 0 | |
fe852ac2 | 191 | && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 |
740b1759 CF |
192 | && timers_state.icount_time_shift > 0) { |
193 | /* The guest is getting too far ahead. Slow time down. */ | |
194 | qatomic_set(&timers_state.icount_time_shift, | |
195 | timers_state.icount_time_shift - 1); | |
196 | } | |
197 | if (delta < 0 | |
fe852ac2 | 198 | && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 |
740b1759 CF |
199 | && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { |
200 | /* The guest is getting too far behind. Speed time up. */ | |
201 | qatomic_set(&timers_state.icount_time_shift, | |
202 | timers_state.icount_time_shift + 1); | |
203 | } | |
fe852ac2 | 204 | timers_state.last_delta = delta; |
740b1759 CF |
205 | qatomic_set_i64(&timers_state.qemu_icount_bias, |
206 | cur_icount - (timers_state.qemu_icount | |
207 | << timers_state.icount_time_shift)); | |
208 | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | |
209 | &timers_state.vm_clock_lock); | |
210 | } | |
211 | ||
212 | static void icount_adjust_rt(void *opaque) | |
213 | { | |
214 | timer_mod(timers_state.icount_rt_timer, | |
215 | qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); | |
216 | icount_adjust(); | |
217 | } | |
218 | ||
219 | static void icount_adjust_vm(void *opaque) | |
220 | { | |
221 | timer_mod(timers_state.icount_vm_timer, | |
222 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + | |
223 | NANOSECONDS_PER_SECOND / 10); | |
224 | icount_adjust(); | |
225 | } | |
226 | ||
8191d368 | 227 | int64_t icount_round(int64_t count) |
740b1759 CF |
228 | { |
229 | int shift = qatomic_read(&timers_state.icount_time_shift); | |
230 | return (count + (1 << shift) - 1) >> shift; | |
231 | } | |
232 | ||
233 | static void icount_warp_rt(void) | |
234 | { | |
235 | unsigned seq; | |
236 | int64_t warp_start; | |
237 | ||
238 | /* | |
239 | * The icount_warp_timer is rescheduled soon after vm_clock_warp_start | |
240 | * changes from -1 to another value, so the race here is okay. | |
241 | */ | |
242 | do { | |
243 | seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); | |
244 | warp_start = timers_state.vm_clock_warp_start; | |
245 | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); | |
246 | ||
247 | if (warp_start == -1) { | |
248 | return; | |
249 | } | |
250 | ||
251 | seqlock_write_lock(&timers_state.vm_clock_seqlock, | |
252 | &timers_state.vm_clock_lock); | |
253 | if (runstate_is_running()) { | |
254 | int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, | |
255 | cpu_get_clock_locked()); | |
256 | int64_t warp_delta; | |
257 | ||
258 | warp_delta = clock - timers_state.vm_clock_warp_start; | |
259 | if (icount_enabled() == 2) { | |
260 | /* | |
67f85346 NP |
261 | * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far |
262 | * ahead of real time (it might already be ahead so careful not | |
263 | * to go backwards). | |
740b1759 | 264 | */ |
8191d368 | 265 | int64_t cur_icount = icount_get_locked(); |
740b1759 | 266 | int64_t delta = clock - cur_icount; |
67f85346 NP |
267 | |
268 | if (delta < 0) { | |
269 | delta = 0; | |
270 | } | |
740b1759 CF |
271 | warp_delta = MIN(warp_delta, delta); |
272 | } | |
273 | qatomic_set_i64(&timers_state.qemu_icount_bias, | |
274 | timers_state.qemu_icount_bias + warp_delta); | |
275 | } | |
276 | timers_state.vm_clock_warp_start = -1; | |
277 | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | |
278 | &timers_state.vm_clock_lock); | |
279 | ||
280 | if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { | |
281 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
282 | } | |
283 | } | |
284 | ||
285 | static void icount_timer_cb(void *opaque) | |
286 | { | |
287 | /* | |
288 | * No need for a checkpoint because the timer already synchronizes | |
289 | * with CHECKPOINT_CLOCK_VIRTUAL_RT. | |
290 | */ | |
291 | icount_warp_rt(); | |
292 | } | |
293 | ||
8191d368 | 294 | void icount_start_warp_timer(void) |
740b1759 CF |
295 | { |
296 | int64_t clock; | |
297 | int64_t deadline; | |
298 | ||
299 | assert(icount_enabled()); | |
300 | ||
301 | /* | |
302 | * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers | |
303 | * do not fire, so computing the deadline does not make sense. | |
304 | */ | |
305 | if (!runstate_is_running()) { | |
306 | return; | |
307 | } | |
308 | ||
309 | if (replay_mode != REPLAY_MODE_PLAY) { | |
310 | if (!all_cpu_threads_idle()) { | |
311 | return; | |
312 | } | |
313 | ||
314 | if (qtest_enabled()) { | |
315 | /* When testing, qtest commands advance icount. */ | |
316 | return; | |
317 | } | |
318 | ||
319 | replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); | |
320 | } else { | |
321 | /* warp clock deterministically in record/replay mode */ | |
322 | if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { | |
323 | /* | |
324 | * vCPU is sleeping and warp can't be started. | |
325 | * It is probably a race condition: notification sent | |
326 | * to vCPU was processed in advance and vCPU went to sleep. | |
669dcb60 | 327 | * Therefore we have to wake it up for doing something. |
740b1759 | 328 | */ |
60618e2d | 329 | if (replay_has_event()) { |
740b1759 CF |
330 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); |
331 | } | |
332 | return; | |
333 | } | |
334 | } | |
335 | ||
336 | /* We want to use the earliest deadline from ALL vm_clocks */ | |
337 | clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); | |
338 | deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, | |
339 | ~QEMU_TIMER_ATTR_EXTERNAL); | |
340 | if (deadline < 0) { | |
341 | static bool notified; | |
342 | if (!icount_sleep && !notified) { | |
343 | warn_report("icount sleep disabled and no active timers"); | |
344 | notified = true; | |
345 | } | |
346 | return; | |
347 | } | |
348 | ||
349 | if (deadline > 0) { | |
350 | /* | |
351 | * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to | |
352 | * sleep. Otherwise, the CPU might be waiting for a future timer | |
353 | * interrupt to wake it up, but the interrupt never comes because | |
354 | * the vCPU isn't running any insns and thus doesn't advance the | |
355 | * QEMU_CLOCK_VIRTUAL. | |
356 | */ | |
357 | if (!icount_sleep) { | |
358 | /* | |
359 | * We never let VCPUs sleep in no sleep icount mode. | |
360 | * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance | |
361 | * to the next QEMU_CLOCK_VIRTUAL event and notify it. | |
362 | * It is useful when we want a deterministic execution time, | |
363 | * isolated from host latencies. | |
364 | */ | |
365 | seqlock_write_lock(&timers_state.vm_clock_seqlock, | |
366 | &timers_state.vm_clock_lock); | |
367 | qatomic_set_i64(&timers_state.qemu_icount_bias, | |
368 | timers_state.qemu_icount_bias + deadline); | |
369 | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | |
370 | &timers_state.vm_clock_lock); | |
371 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
372 | } else { | |
373 | /* | |
374 | * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some | |
375 | * "real" time, (related to the time left until the next event) has | |
376 | * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. | |
377 | * This avoids that the warps are visible externally; for example, | |
378 | * you will not be sending network packets continuously instead of | |
379 | * every 100ms. | |
380 | */ | |
381 | seqlock_write_lock(&timers_state.vm_clock_seqlock, | |
382 | &timers_state.vm_clock_lock); | |
383 | if (timers_state.vm_clock_warp_start == -1 | |
384 | || timers_state.vm_clock_warp_start > clock) { | |
385 | timers_state.vm_clock_warp_start = clock; | |
386 | } | |
387 | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | |
388 | &timers_state.vm_clock_lock); | |
389 | timer_mod_anticipate(timers_state.icount_warp_timer, | |
390 | clock + deadline); | |
391 | } | |
392 | } else if (deadline == 0) { | |
393 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
394 | } | |
395 | } | |
396 | ||
8191d368 | 397 | void icount_account_warp_timer(void) |
740b1759 | 398 | { |
45e077d7 | 399 | if (!icount_sleep) { |
740b1759 CF |
400 | return; |
401 | } | |
402 | ||
403 | /* | |
404 | * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers | |
405 | * do not fire, so computing the deadline does not make sense. | |
406 | */ | |
407 | if (!runstate_is_running()) { | |
408 | return; | |
409 | } | |
410 | ||
60618e2d PD |
411 | replay_async_events(); |
412 | ||
740b1759 CF |
413 | /* warp clock deterministically in record/replay mode */ |
414 | if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { | |
415 | return; | |
416 | } | |
417 | ||
418 | timer_del(timers_state.icount_warp_timer); | |
419 | icount_warp_rt(); | |
420 | } | |
421 | ||
8191d368 | 422 | void icount_configure(QemuOpts *opts, Error **errp) |
740b1759 CF |
423 | { |
424 | const char *option = qemu_opt_get(opts, "shift"); | |
425 | bool sleep = qemu_opt_get_bool(opts, "sleep", true); | |
426 | bool align = qemu_opt_get_bool(opts, "align", false); | |
427 | long time_shift = -1; | |
428 | ||
429 | if (!option) { | |
430 | if (qemu_opt_get(opts, "align") != NULL) { | |
431 | error_setg(errp, "Please specify shift option when using align"); | |
432 | } | |
433 | return; | |
434 | } | |
435 | ||
436 | if (align && !sleep) { | |
437 | error_setg(errp, "align=on and sleep=off are incompatible"); | |
438 | return; | |
439 | } | |
440 | ||
441 | if (strcmp(option, "auto") != 0) { | |
442 | if (qemu_strtol(option, NULL, 0, &time_shift) < 0 | |
443 | || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { | |
444 | error_setg(errp, "icount: Invalid shift value"); | |
445 | return; | |
446 | } | |
447 | } else if (icount_align_option) { | |
448 | error_setg(errp, "shift=auto and align=on are incompatible"); | |
449 | return; | |
450 | } else if (!icount_sleep) { | |
451 | error_setg(errp, "shift=auto and sleep=off are incompatible"); | |
452 | return; | |
453 | } | |
454 | ||
455 | icount_sleep = sleep; | |
456 | if (icount_sleep) { | |
457 | timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, | |
458 | icount_timer_cb, NULL); | |
459 | } | |
460 | ||
461 | icount_align_option = align; | |
462 | ||
463 | if (time_shift >= 0) { | |
464 | timers_state.icount_time_shift = time_shift; | |
465 | icount_enable_precise(); | |
466 | return; | |
467 | } | |
468 | ||
469 | icount_enable_adaptive(); | |
470 | ||
471 | /* | |
472 | * 125MIPS seems a reasonable initial guess at the guest speed. | |
473 | * It will be corrected fairly quickly anyway. | |
474 | */ | |
475 | timers_state.icount_time_shift = 3; | |
476 | ||
477 | /* | |
478 | * Have both realtime and virtual time triggers for speed adjustment. | |
479 | * The realtime trigger catches emulated time passing too slowly, | |
480 | * the virtual time trigger catches emulated time passing too fast. | |
481 | * Realtime triggers occur even when idle, so use them less frequently | |
482 | * than VM triggers. | |
483 | */ | |
484 | timers_state.vm_clock_warp_start = -1; | |
485 | timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, | |
486 | icount_adjust_rt, NULL); | |
487 | timer_mod(timers_state.icount_rt_timer, | |
488 | qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); | |
489 | timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, | |
490 | icount_adjust_vm, NULL); | |
491 | timer_mod(timers_state.icount_vm_timer, | |
492 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + | |
493 | NANOSECONDS_PER_SECOND / 10); | |
494 | } | |
75bbe5e5 PD |
495 | |
496 | void icount_notify_exit(void) | |
497 | { | |
498 | if (icount_enabled() && current_cpu) { | |
499 | qemu_cpu_kick(current_cpu); | |
500 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
501 | } | |
502 | } |