]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
cpu-exec: allow temporary disabling icount
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
022c62cb 32#include "exec/gdbstub.h"
9c17d615
PB
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
de0b36b6 35#include "qmp-commands.h"
296af7c9 36
1de7afc9 37#include "qemu/thread.h"
9c17d615
PB
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
1de7afc9
PB
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
cb365646 42#include "qemu/seqlock.h"
a4e15de9 43#include "qapi-event.h"
9cb805fd 44#include "hw/nmi.h"
0ff0fc19
JK
45
46#ifndef _WIN32
1de7afc9 47#include "qemu/compatfd.h"
0ff0fc19 48#endif
296af7c9 49
6d9cb73c
JK
50#ifdef CONFIG_LINUX
51
52#include <sys/prctl.h>
53
c0532a76
MT
54#ifndef PR_MCE_KILL
55#define PR_MCE_KILL 33
56#endif
57
6d9cb73c
JK
58#ifndef PR_MCE_KILL_SET
59#define PR_MCE_KILL_SET 1
60#endif
61
62#ifndef PR_MCE_KILL_EARLY
63#define PR_MCE_KILL_EARLY 1
64#endif
65
66#endif /* CONFIG_LINUX */
67
182735ef 68static CPUState *next_cpu;
27498bef
ST
69int64_t max_delay;
70int64_t max_advance;
296af7c9 71
2adcc85d
JH
72/* vcpu throttling controls */
73static QEMUTimer *throttle_timer;
74static unsigned int throttle_percentage;
75
76#define CPU_THROTTLE_PCT_MIN 1
77#define CPU_THROTTLE_PCT_MAX 99
78#define CPU_THROTTLE_TIMESLICE_NS 10000000
79
321bc0b2
TC
80bool cpu_is_stopped(CPUState *cpu)
81{
82 return cpu->stopped || !runstate_is_running();
83}
84
a98ae1d8 85static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 86{
c64ca814 87 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
88 return false;
89 }
321bc0b2 90 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
91 return true;
92 }
8c2e1b00 93 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 94 kvm_halt_in_kernel()) {
ac873f1e
PM
95 return false;
96 }
97 return true;
98}
99
100static bool all_cpu_threads_idle(void)
101{
182735ef 102 CPUState *cpu;
ac873f1e 103
bdc44640 104 CPU_FOREACH(cpu) {
182735ef 105 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
106 return false;
107 }
108 }
109 return true;
110}
111
946fb27c
PB
112/***********************************************************/
113/* guest cycle counter */
114
a3270e19
PB
115/* Protected by TimersState seqlock */
116
5045e9d9 117static bool icount_sleep = true;
71468395 118static int64_t vm_clock_warp_start = -1;
946fb27c
PB
119/* Conversion factor from emulated instructions to virtual clock ticks. */
120static int icount_time_shift;
121/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
122#define MAX_ICOUNT_SHIFT 10
a3270e19 123
946fb27c
PB
124static QEMUTimer *icount_rt_timer;
125static QEMUTimer *icount_vm_timer;
126static QEMUTimer *icount_warp_timer;
946fb27c
PB
127
128typedef struct TimersState {
cb365646 129 /* Protected by BQL. */
946fb27c
PB
130 int64_t cpu_ticks_prev;
131 int64_t cpu_ticks_offset;
cb365646
LPF
132
133 /* cpu_clock_offset can be read out of BQL, so protect it with
134 * this lock.
135 */
136 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
137 int64_t cpu_clock_offset;
138 int32_t cpu_ticks_enabled;
139 int64_t dummy;
c96778bb
FK
140
141 /* Compensate for varying guest execution speed. */
142 int64_t qemu_icount_bias;
143 /* Only written by TCG thread */
144 int64_t qemu_icount;
946fb27c
PB
145} TimersState;
146
d9cd4007 147static TimersState timers_state;
946fb27c 148
2a62914b 149int64_t cpu_get_icount_raw(void)
946fb27c
PB
150{
151 int64_t icount;
4917cf44 152 CPUState *cpu = current_cpu;
946fb27c 153
c96778bb 154 icount = timers_state.qemu_icount;
4917cf44 155 if (cpu) {
414b15c9 156 if (!cpu->can_do_io) {
2a62914b
PD
157 fprintf(stderr, "Bad icount read\n");
158 exit(1);
946fb27c 159 }
28ecfd7a 160 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 161 }
2a62914b
PD
162 return icount;
163}
164
165/* Return the virtual CPU time, based on the instruction counter. */
166static int64_t cpu_get_icount_locked(void)
167{
168 int64_t icount = cpu_get_icount_raw();
3f031313 169 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
170}
171
17a15f1b
PB
172int64_t cpu_get_icount(void)
173{
174 int64_t icount;
175 unsigned start;
176
177 do {
178 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
179 icount = cpu_get_icount_locked();
180 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
181
182 return icount;
183}
184
3f031313
FK
185int64_t cpu_icount_to_ns(int64_t icount)
186{
187 return icount << icount_time_shift;
188}
189
946fb27c 190/* return the host CPU cycle counter and handle stop/restart */
cb365646 191/* Caller must hold the BQL */
946fb27c
PB
192int64_t cpu_get_ticks(void)
193{
5f3e3101
PB
194 int64_t ticks;
195
946fb27c
PB
196 if (use_icount) {
197 return cpu_get_icount();
198 }
5f3e3101
PB
199
200 ticks = timers_state.cpu_ticks_offset;
201 if (timers_state.cpu_ticks_enabled) {
4a7428c5 202 ticks += cpu_get_host_ticks();
5f3e3101
PB
203 }
204
205 if (timers_state.cpu_ticks_prev > ticks) {
206 /* Note: non increasing ticks may happen if the host uses
207 software suspend */
208 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
209 ticks = timers_state.cpu_ticks_prev;
946fb27c 210 }
5f3e3101
PB
211
212 timers_state.cpu_ticks_prev = ticks;
213 return ticks;
946fb27c
PB
214}
215
cb365646 216static int64_t cpu_get_clock_locked(void)
946fb27c 217{
5f3e3101 218 int64_t ticks;
cb365646 219
5f3e3101
PB
220 ticks = timers_state.cpu_clock_offset;
221 if (timers_state.cpu_ticks_enabled) {
222 ticks += get_clock();
946fb27c 223 }
cb365646 224
5f3e3101 225 return ticks;
cb365646
LPF
226}
227
228/* return the host CPU monotonic timer and handle stop/restart */
229int64_t cpu_get_clock(void)
230{
231 int64_t ti;
232 unsigned start;
233
234 do {
235 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
236 ti = cpu_get_clock_locked();
237 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
238
239 return ti;
946fb27c
PB
240}
241
cb365646
LPF
242/* enable cpu_get_ticks()
243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
244 */
946fb27c
PB
245void cpu_enable_ticks(void)
246{
cb365646
LPF
247 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
248 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 249 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 250 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
251 timers_state.cpu_clock_offset -= get_clock();
252 timers_state.cpu_ticks_enabled = 1;
253 }
cb365646 254 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
255}
256
257/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
258 * cpu_get_ticks() after that.
259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
260 */
946fb27c
PB
261void cpu_disable_ticks(void)
262{
cb365646
LPF
263 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
264 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 265 if (timers_state.cpu_ticks_enabled) {
4a7428c5 266 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 267 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
268 timers_state.cpu_ticks_enabled = 0;
269 }
cb365646 270 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
271}
272
273/* Correlation between real and virtual time is always going to be
274 fairly approximate, so ignore small variation.
275 When the guest is idle real and virtual time will be aligned in
276 the IO wait loop. */
277#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
278
279static void icount_adjust(void)
280{
281 int64_t cur_time;
282 int64_t cur_icount;
283 int64_t delta;
a3270e19
PB
284
285 /* Protected by TimersState mutex. */
946fb27c 286 static int64_t last_delta;
468cc7cf 287
946fb27c
PB
288 /* If the VM is not running, then do nothing. */
289 if (!runstate_is_running()) {
290 return;
291 }
468cc7cf 292
17a15f1b
PB
293 seqlock_write_lock(&timers_state.vm_clock_seqlock);
294 cur_time = cpu_get_clock_locked();
295 cur_icount = cpu_get_icount_locked();
468cc7cf 296
946fb27c
PB
297 delta = cur_icount - cur_time;
298 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
299 if (delta > 0
300 && last_delta + ICOUNT_WOBBLE < delta * 2
301 && icount_time_shift > 0) {
302 /* The guest is getting too far ahead. Slow time down. */
303 icount_time_shift--;
304 }
305 if (delta < 0
306 && last_delta - ICOUNT_WOBBLE > delta * 2
307 && icount_time_shift < MAX_ICOUNT_SHIFT) {
308 /* The guest is getting too far behind. Speed time up. */
309 icount_time_shift++;
310 }
311 last_delta = delta;
c96778bb
FK
312 timers_state.qemu_icount_bias = cur_icount
313 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 314 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
315}
316
317static void icount_adjust_rt(void *opaque)
318{
40daca54 319 timer_mod(icount_rt_timer,
1979b908 320 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
321 icount_adjust();
322}
323
324static void icount_adjust_vm(void *opaque)
325{
40daca54
AB
326 timer_mod(icount_vm_timer,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328 get_ticks_per_sec() / 10);
946fb27c
PB
329 icount_adjust();
330}
331
332static int64_t qemu_icount_round(int64_t count)
333{
334 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
335}
336
337static void icount_warp_rt(void *opaque)
338{
17a15f1b
PB
339 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
340 * changes from -1 to another value, so the race here is okay.
341 */
342 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
343 return;
344 }
345
17a15f1b 346 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 347 if (runstate_is_running()) {
bf2a7ddb 348 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
349 int64_t warp_delta;
350
351 warp_delta = clock - vm_clock_warp_start;
352 if (use_icount == 2) {
946fb27c 353 /*
40daca54 354 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
355 * far ahead of real time.
356 */
17a15f1b 357 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 358 int64_t delta = clock - cur_icount;
8ed961d9 359 warp_delta = MIN(warp_delta, delta);
946fb27c 360 }
c96778bb 361 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
362 }
363 vm_clock_warp_start = -1;
17a15f1b 364 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
365
366 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
367 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
368 }
946fb27c
PB
369}
370
8156be56
PB
371void qtest_clock_warp(int64_t dest)
372{
40daca54 373 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 374 AioContext *aio_context;
8156be56 375 assert(qtest_enabled());
efef88b3 376 aio_context = qemu_get_aio_context();
8156be56 377 while (clock < dest) {
40daca54 378 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 379 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 380
17a15f1b 381 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 382 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
383 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
384
40daca54 385 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 386 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 387 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 388 }
40daca54 389 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
390}
391
40daca54 392void qemu_clock_warp(QEMUClockType type)
946fb27c 393{
ce78d18c 394 int64_t clock;
946fb27c
PB
395 int64_t deadline;
396
397 /*
398 * There are too many global variables to make the "warp" behavior
399 * applicable to other clocks. But a clock argument removes the
400 * need for if statements all over the place.
401 */
40daca54 402 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
403 return;
404 }
405
5045e9d9
VC
406 if (icount_sleep) {
407 /*
408 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
409 * This ensures that the deadline for the timer is computed correctly
410 * below.
411 * This also makes sure that the insn counter is synchronized before
412 * the CPU starts running, in case the CPU is woken by an event other
413 * than the earliest QEMU_CLOCK_VIRTUAL timer.
414 */
415 icount_warp_rt(NULL);
416 timer_del(icount_warp_timer);
417 }
ce78d18c 418 if (!all_cpu_threads_idle()) {
946fb27c
PB
419 return;
420 }
421
8156be56
PB
422 if (qtest_enabled()) {
423 /* When testing, qtest commands advance icount. */
424 return;
425 }
426
ac70aafc 427 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 428 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 429 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 430 if (deadline < 0) {
d7a0f71d
VC
431 static bool notified;
432 if (!icount_sleep && !notified) {
433 error_report("WARNING: icount sleep disabled and no active timers");
434 notified = true;
435 }
ce78d18c 436 return;
ac70aafc
AB
437 }
438
946fb27c
PB
439 if (deadline > 0) {
440 /*
40daca54 441 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
442 * sleep. Otherwise, the CPU might be waiting for a future timer
443 * interrupt to wake it up, but the interrupt never comes because
444 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 445 * QEMU_CLOCK_VIRTUAL.
946fb27c 446 */
5045e9d9
VC
447 if (!icount_sleep) {
448 /*
449 * We never let VCPUs sleep in no sleep icount mode.
450 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
451 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
452 * It is useful when we want a deterministic execution time,
453 * isolated from host latencies.
454 */
455 seqlock_write_lock(&timers_state.vm_clock_seqlock);
456 timers_state.qemu_icount_bias += deadline;
457 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
458 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
459 } else {
460 /*
461 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
462 * "real" time, (related to the time left until the next event) has
463 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
464 * This avoids that the warps are visible externally; for example,
465 * you will not be sending network packets continuously instead of
466 * every 100ms.
467 */
468 seqlock_write_lock(&timers_state.vm_clock_seqlock);
469 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
470 vm_clock_warp_start = clock;
471 }
472 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
473 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 474 }
ac70aafc 475 } else if (deadline == 0) {
40daca54 476 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
477 }
478}
479
d09eae37
FK
480static bool icount_state_needed(void *opaque)
481{
482 return use_icount;
483}
484
485/*
486 * This is a subsection for icount migration.
487 */
488static const VMStateDescription icount_vmstate_timers = {
489 .name = "timer/icount",
490 .version_id = 1,
491 .minimum_version_id = 1,
5cd8cada 492 .needed = icount_state_needed,
d09eae37
FK
493 .fields = (VMStateField[]) {
494 VMSTATE_INT64(qemu_icount_bias, TimersState),
495 VMSTATE_INT64(qemu_icount, TimersState),
496 VMSTATE_END_OF_LIST()
497 }
498};
499
946fb27c
PB
500static const VMStateDescription vmstate_timers = {
501 .name = "timer",
502 .version_id = 2,
503 .minimum_version_id = 1,
35d08458 504 .fields = (VMStateField[]) {
946fb27c
PB
505 VMSTATE_INT64(cpu_ticks_offset, TimersState),
506 VMSTATE_INT64(dummy, TimersState),
507 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
508 VMSTATE_END_OF_LIST()
d09eae37 509 },
5cd8cada
JQ
510 .subsections = (const VMStateDescription*[]) {
511 &icount_vmstate_timers,
512 NULL
946fb27c
PB
513 }
514};
515
2adcc85d
JH
516static void cpu_throttle_thread(void *opaque)
517{
518 CPUState *cpu = opaque;
519 double pct;
520 double throttle_ratio;
521 long sleeptime_ns;
522
523 if (!cpu_throttle_get_percentage()) {
524 return;
525 }
526
527 pct = (double)cpu_throttle_get_percentage()/100;
528 throttle_ratio = pct / (1 - pct);
529 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
530
531 qemu_mutex_unlock_iothread();
532 atomic_set(&cpu->throttle_thread_scheduled, 0);
533 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
534 qemu_mutex_lock_iothread();
535}
536
537static void cpu_throttle_timer_tick(void *opaque)
538{
539 CPUState *cpu;
540 double pct;
541
542 /* Stop the timer if needed */
543 if (!cpu_throttle_get_percentage()) {
544 return;
545 }
546 CPU_FOREACH(cpu) {
547 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
548 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
549 }
550 }
551
552 pct = (double)cpu_throttle_get_percentage()/100;
553 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
554 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
555}
556
557void cpu_throttle_set(int new_throttle_pct)
558{
559 /* Ensure throttle percentage is within valid range */
560 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
561 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
562
563 atomic_set(&throttle_percentage, new_throttle_pct);
564
565 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
566 CPU_THROTTLE_TIMESLICE_NS);
567}
568
569void cpu_throttle_stop(void)
570{
571 atomic_set(&throttle_percentage, 0);
572}
573
574bool cpu_throttle_active(void)
575{
576 return (cpu_throttle_get_percentage() != 0);
577}
578
579int cpu_throttle_get_percentage(void)
580{
581 return atomic_read(&throttle_percentage);
582}
583
4603ea01
PD
584void cpu_ticks_init(void)
585{
586 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
587 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
588 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
589 cpu_throttle_timer_tick, NULL);
4603ea01
PD
590}
591
1ad9580b 592void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 593{
1ad9580b 594 const char *option;
a8bfac37 595 char *rem_str = NULL;
1ad9580b 596
1ad9580b 597 option = qemu_opt_get(opts, "shift");
946fb27c 598 if (!option) {
a8bfac37
ST
599 if (qemu_opt_get(opts, "align") != NULL) {
600 error_setg(errp, "Please specify shift option when using align");
601 }
946fb27c
PB
602 return;
603 }
f1f4b57e
VC
604
605 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
606 if (icount_sleep) {
607 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
608 icount_warp_rt, NULL);
609 }
f1f4b57e 610
a8bfac37 611 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
612
613 if (icount_align_option && !icount_sleep) {
614 error_setg(errp, "align=on and sleep=no are incompatible");
615 }
946fb27c 616 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
617 errno = 0;
618 icount_time_shift = strtol(option, &rem_str, 0);
619 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
620 error_setg(errp, "icount: Invalid shift value");
621 }
946fb27c
PB
622 use_icount = 1;
623 return;
a8bfac37
ST
624 } else if (icount_align_option) {
625 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e
VC
626 } else if (!icount_sleep) {
627 error_setg(errp, "shift=auto and sleep=no are incompatible");
946fb27c
PB
628 }
629
630 use_icount = 2;
631
632 /* 125MIPS seems a reasonable initial guess at the guest speed.
633 It will be corrected fairly quickly anyway. */
634 icount_time_shift = 3;
635
636 /* Have both realtime and virtual time triggers for speed adjustment.
637 The realtime trigger catches emulated time passing too slowly,
638 the virtual time trigger catches emulated time passing too fast.
639 Realtime triggers occur even when idle, so use them less frequently
640 than VM triggers. */
bf2a7ddb
PD
641 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
642 icount_adjust_rt, NULL);
40daca54 643 timer_mod(icount_rt_timer,
bf2a7ddb 644 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
645 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
646 icount_adjust_vm, NULL);
647 timer_mod(icount_vm_timer,
648 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
649 get_ticks_per_sec() / 10);
946fb27c
PB
650}
651
296af7c9
BS
652/***********************************************************/
653void hw_error(const char *fmt, ...)
654{
655 va_list ap;
55e5c285 656 CPUState *cpu;
296af7c9
BS
657
658 va_start(ap, fmt);
659 fprintf(stderr, "qemu: hardware error: ");
660 vfprintf(stderr, fmt, ap);
661 fprintf(stderr, "\n");
bdc44640 662 CPU_FOREACH(cpu) {
55e5c285 663 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 664 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
665 }
666 va_end(ap);
667 abort();
668}
669
670void cpu_synchronize_all_states(void)
671{
182735ef 672 CPUState *cpu;
296af7c9 673
bdc44640 674 CPU_FOREACH(cpu) {
182735ef 675 cpu_synchronize_state(cpu);
296af7c9
BS
676 }
677}
678
679void cpu_synchronize_all_post_reset(void)
680{
182735ef 681 CPUState *cpu;
296af7c9 682
bdc44640 683 CPU_FOREACH(cpu) {
182735ef 684 cpu_synchronize_post_reset(cpu);
296af7c9
BS
685 }
686}
687
688void cpu_synchronize_all_post_init(void)
689{
182735ef 690 CPUState *cpu;
296af7c9 691
bdc44640 692 CPU_FOREACH(cpu) {
182735ef 693 cpu_synchronize_post_init(cpu);
296af7c9
BS
694 }
695}
696
de9d61e8
MT
697void cpu_clean_all_dirty(void)
698{
699 CPUState *cpu;
700
701 CPU_FOREACH(cpu) {
702 cpu_clean_state(cpu);
703 }
704}
705
56983463 706static int do_vm_stop(RunState state)
296af7c9 707{
56983463
KW
708 int ret = 0;
709
1354869c 710 if (runstate_is_running()) {
296af7c9 711 cpu_disable_ticks();
296af7c9 712 pause_all_vcpus();
f5bbfba1 713 runstate_set(state);
1dfb4dd9 714 vm_state_notify(0, state);
a4e15de9 715 qapi_event_send_stop(&error_abort);
296af7c9 716 }
56983463 717
594a45ce
KW
718 bdrv_drain_all();
719 ret = bdrv_flush_all();
720
56983463 721 return ret;
296af7c9
BS
722}
723
a1fcaa73 724static bool cpu_can_run(CPUState *cpu)
296af7c9 725{
4fdeee7c 726 if (cpu->stop) {
a1fcaa73 727 return false;
0ab07c62 728 }
321bc0b2 729 if (cpu_is_stopped(cpu)) {
a1fcaa73 730 return false;
0ab07c62 731 }
a1fcaa73 732 return true;
296af7c9
BS
733}
734
91325046 735static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 736{
64f6b346 737 gdb_set_stop_cpu(cpu);
8cf71710 738 qemu_system_debug_request();
f324e766 739 cpu->stopped = true;
3c638d06
JK
740}
741
6d9cb73c
JK
742#ifdef CONFIG_LINUX
743static void sigbus_reraise(void)
744{
745 sigset_t set;
746 struct sigaction action;
747
748 memset(&action, 0, sizeof(action));
749 action.sa_handler = SIG_DFL;
750 if (!sigaction(SIGBUS, &action, NULL)) {
751 raise(SIGBUS);
752 sigemptyset(&set);
753 sigaddset(&set, SIGBUS);
754 sigprocmask(SIG_UNBLOCK, &set, NULL);
755 }
756 perror("Failed to re-raise SIGBUS!\n");
757 abort();
758}
759
760static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
761 void *ctx)
762{
763 if (kvm_on_sigbus(siginfo->ssi_code,
764 (void *)(intptr_t)siginfo->ssi_addr)) {
765 sigbus_reraise();
766 }
767}
768
769static void qemu_init_sigbus(void)
770{
771 struct sigaction action;
772
773 memset(&action, 0, sizeof(action));
774 action.sa_flags = SA_SIGINFO;
775 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
776 sigaction(SIGBUS, &action, NULL);
777
778 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
779}
780
290adf38 781static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
782{
783 struct timespec ts = { 0, 0 };
784 siginfo_t siginfo;
785 sigset_t waitset;
786 sigset_t chkset;
787 int r;
788
789 sigemptyset(&waitset);
790 sigaddset(&waitset, SIG_IPI);
791 sigaddset(&waitset, SIGBUS);
792
793 do {
794 r = sigtimedwait(&waitset, &siginfo, &ts);
795 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
796 perror("sigtimedwait");
797 exit(1);
798 }
799
800 switch (r) {
801 case SIGBUS:
290adf38 802 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
803 sigbus_reraise();
804 }
805 break;
806 default:
807 break;
808 }
809
810 r = sigpending(&chkset);
811 if (r == -1) {
812 perror("sigpending");
813 exit(1);
814 }
815 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
816}
817
6d9cb73c
JK
818#else /* !CONFIG_LINUX */
819
820static void qemu_init_sigbus(void)
821{
822}
1ab3c6c0 823
290adf38 824static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
825{
826}
6d9cb73c
JK
827#endif /* !CONFIG_LINUX */
828
296af7c9 829#ifndef _WIN32
55f8d6ac
JK
830static void dummy_signal(int sig)
831{
832}
55f8d6ac 833
13618e05 834static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
835{
836 int r;
837 sigset_t set;
838 struct sigaction sigact;
839
840 memset(&sigact, 0, sizeof(sigact));
841 sigact.sa_handler = dummy_signal;
842 sigaction(SIG_IPI, &sigact, NULL);
843
714bd040
PB
844 pthread_sigmask(SIG_BLOCK, NULL, &set);
845 sigdelset(&set, SIG_IPI);
714bd040 846 sigdelset(&set, SIGBUS);
491d6e80 847 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
848 if (r) {
849 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
850 exit(1);
851 }
852}
853
55f8d6ac 854#else /* _WIN32 */
13618e05 855static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 856{
714bd040
PB
857 abort();
858}
714bd040 859#endif /* _WIN32 */
ff48eb5f 860
b2532d88 861static QemuMutex qemu_global_mutex;
46daff13 862static QemuCond qemu_io_proceeded_cond;
6b49809c 863static unsigned iothread_requesting_mutex;
296af7c9
BS
864
865static QemuThread io_thread;
866
296af7c9
BS
867/* cpu creation */
868static QemuCond qemu_cpu_cond;
869/* system init */
296af7c9 870static QemuCond qemu_pause_cond;
e82bcec2 871static QemuCond qemu_work_cond;
296af7c9 872
d3b12f5d 873void qemu_init_cpu_loop(void)
296af7c9 874{
6d9cb73c 875 qemu_init_sigbus();
ed94592b 876 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
877 qemu_cond_init(&qemu_pause_cond);
878 qemu_cond_init(&qemu_work_cond);
46daff13 879 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 880 qemu_mutex_init(&qemu_global_mutex);
296af7c9 881
b7680cb6 882 qemu_thread_get_self(&io_thread);
296af7c9
BS
883}
884
f100f0b3 885void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
886{
887 struct qemu_work_item wi;
888
60e82579 889 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
890 func(data);
891 return;
892 }
893
894 wi.func = func;
895 wi.data = data;
3c02270d 896 wi.free = false;
376692b9
PB
897
898 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
899 if (cpu->queued_work_first == NULL) {
900 cpu->queued_work_first = &wi;
0ab07c62 901 } else {
c64ca814 902 cpu->queued_work_last->next = &wi;
0ab07c62 903 }
c64ca814 904 cpu->queued_work_last = &wi;
e82bcec2
MT
905 wi.next = NULL;
906 wi.done = false;
376692b9 907 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 908
c08d7424 909 qemu_cpu_kick(cpu);
376692b9 910 while (!atomic_mb_read(&wi.done)) {
4917cf44 911 CPUState *self_cpu = current_cpu;
e82bcec2
MT
912
913 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 914 current_cpu = self_cpu;
e82bcec2
MT
915 }
916}
917
3c02270d
CV
918void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
919{
920 struct qemu_work_item *wi;
921
922 if (qemu_cpu_is_self(cpu)) {
923 func(data);
924 return;
925 }
926
927 wi = g_malloc0(sizeof(struct qemu_work_item));
928 wi->func = func;
929 wi->data = data;
930 wi->free = true;
376692b9
PB
931
932 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
933 if (cpu->queued_work_first == NULL) {
934 cpu->queued_work_first = wi;
935 } else {
936 cpu->queued_work_last->next = wi;
937 }
938 cpu->queued_work_last = wi;
939 wi->next = NULL;
940 wi->done = false;
376692b9 941 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
942
943 qemu_cpu_kick(cpu);
944}
945
6d45b109 946static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
947{
948 struct qemu_work_item *wi;
949
c64ca814 950 if (cpu->queued_work_first == NULL) {
e82bcec2 951 return;
0ab07c62 952 }
e82bcec2 953
376692b9
PB
954 qemu_mutex_lock(&cpu->work_mutex);
955 while (cpu->queued_work_first != NULL) {
956 wi = cpu->queued_work_first;
c64ca814 957 cpu->queued_work_first = wi->next;
376692b9
PB
958 if (!cpu->queued_work_first) {
959 cpu->queued_work_last = NULL;
960 }
961 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 962 wi->func(wi->data);
376692b9 963 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
964 if (wi->free) {
965 g_free(wi);
376692b9
PB
966 } else {
967 atomic_mb_set(&wi->done, true);
3c02270d 968 }
e82bcec2 969 }
376692b9 970 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
971 qemu_cond_broadcast(&qemu_work_cond);
972}
973
509a0d78 974static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 975{
4fdeee7c
AF
976 if (cpu->stop) {
977 cpu->stop = false;
f324e766 978 cpu->stopped = true;
296af7c9
BS
979 qemu_cond_signal(&qemu_pause_cond);
980 }
6d45b109 981 flush_queued_work(cpu);
216fc9a4 982 cpu->thread_kicked = false;
296af7c9
BS
983}
984
d5f8d613 985static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 986{
16400322 987 while (all_cpu_threads_idle()) {
ab33fcda
PB
988 /* Start accounting real time to the virtual clock if the CPUs
989 are idle. */
40daca54 990 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
d5f8d613 991 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 992 }
296af7c9 993
46daff13
PB
994 while (iothread_requesting_mutex) {
995 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
996 }
6cabe1f3 997
bdc44640 998 CPU_FOREACH(cpu) {
182735ef 999 qemu_wait_io_event_common(cpu);
6cabe1f3 1000 }
296af7c9
BS
1001}
1002
fd529e8f 1003static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1004{
a98ae1d8 1005 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1006 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1007 }
296af7c9 1008
290adf38 1009 qemu_kvm_eat_signals(cpu);
509a0d78 1010 qemu_wait_io_event_common(cpu);
296af7c9
BS
1011}
1012
7e97cd88 1013static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1014{
48a106bd 1015 CPUState *cpu = arg;
84b4915d 1016 int r;
296af7c9 1017
ab28bd23
PB
1018 rcu_register_thread();
1019
2e7f7a3c 1020 qemu_mutex_lock_iothread();
814e612e 1021 qemu_thread_get_self(cpu->thread);
9f09e18a 1022 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1023 cpu->can_do_io = 1;
4917cf44 1024 current_cpu = cpu;
296af7c9 1025
504134d2 1026 r = kvm_init_vcpu(cpu);
84b4915d
JK
1027 if (r < 0) {
1028 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1029 exit(1);
1030 }
296af7c9 1031
13618e05 1032 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1033
1034 /* signal CPU creation */
61a46217 1035 cpu->created = true;
296af7c9
BS
1036 qemu_cond_signal(&qemu_cpu_cond);
1037
296af7c9 1038 while (1) {
a1fcaa73 1039 if (cpu_can_run(cpu)) {
1458c363 1040 r = kvm_cpu_exec(cpu);
83f338f7 1041 if (r == EXCP_DEBUG) {
91325046 1042 cpu_handle_guest_debug(cpu);
83f338f7 1043 }
0ab07c62 1044 }
fd529e8f 1045 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
1046 }
1047
1048 return NULL;
1049}
1050
c7f0f3b1
AL
1051static void *qemu_dummy_cpu_thread_fn(void *arg)
1052{
1053#ifdef _WIN32
1054 fprintf(stderr, "qtest is not supported under Windows\n");
1055 exit(1);
1056#else
10a9021d 1057 CPUState *cpu = arg;
c7f0f3b1
AL
1058 sigset_t waitset;
1059 int r;
1060
ab28bd23
PB
1061 rcu_register_thread();
1062
c7f0f3b1 1063 qemu_mutex_lock_iothread();
814e612e 1064 qemu_thread_get_self(cpu->thread);
9f09e18a 1065 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1066 cpu->can_do_io = 1;
c7f0f3b1
AL
1067
1068 sigemptyset(&waitset);
1069 sigaddset(&waitset, SIG_IPI);
1070
1071 /* signal CPU creation */
61a46217 1072 cpu->created = true;
c7f0f3b1
AL
1073 qemu_cond_signal(&qemu_cpu_cond);
1074
4917cf44 1075 current_cpu = cpu;
c7f0f3b1 1076 while (1) {
4917cf44 1077 current_cpu = NULL;
c7f0f3b1
AL
1078 qemu_mutex_unlock_iothread();
1079 do {
1080 int sig;
1081 r = sigwait(&waitset, &sig);
1082 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1083 if (r == -1) {
1084 perror("sigwait");
1085 exit(1);
1086 }
1087 qemu_mutex_lock_iothread();
4917cf44 1088 current_cpu = cpu;
509a0d78 1089 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1090 }
1091
1092 return NULL;
1093#endif
1094}
1095
bdb7ca67
JK
1096static void tcg_exec_all(void);
1097
7e97cd88 1098static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1099{
c3586ba7 1100 CPUState *cpu = arg;
296af7c9 1101
ab28bd23
PB
1102 rcu_register_thread();
1103
2e7f7a3c 1104 qemu_mutex_lock_iothread();
814e612e 1105 qemu_thread_get_self(cpu->thread);
296af7c9 1106
38fcbd3f
AF
1107 CPU_FOREACH(cpu) {
1108 cpu->thread_id = qemu_get_thread_id();
1109 cpu->created = true;
626cf8f4 1110 cpu->can_do_io = 1;
38fcbd3f 1111 }
296af7c9
BS
1112 qemu_cond_signal(&qemu_cpu_cond);
1113
fa7d1867 1114 /* wait for initial kick-off after machine start */
c28e399c 1115 while (first_cpu->stopped) {
d5f8d613 1116 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1117
1118 /* process any pending work */
bdc44640 1119 CPU_FOREACH(cpu) {
182735ef 1120 qemu_wait_io_event_common(cpu);
8e564b4e 1121 }
0ab07c62 1122 }
296af7c9 1123
21618b3e 1124 /* process any pending work */
aed807c8 1125 atomic_mb_set(&exit_request, 1);
21618b3e 1126
296af7c9 1127 while (1) {
bdb7ca67 1128 tcg_exec_all();
ac70aafc
AB
1129
1130 if (use_icount) {
40daca54 1131 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1132
1133 if (deadline == 0) {
40daca54 1134 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1135 }
3b2319a3 1136 }
d5f8d613 1137 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
296af7c9
BS
1138 }
1139
1140 return NULL;
1141}
1142
2ff09a40 1143static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1144{
1145#ifndef _WIN32
1146 int err;
1147
e0c38211
PB
1148 if (cpu->thread_kicked) {
1149 return;
9102deda 1150 }
e0c38211 1151 cpu->thread_kicked = true;
814e612e 1152 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1153 if (err) {
1154 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1155 exit(1);
1156 }
1157#else /* _WIN32 */
e0c38211
PB
1158 abort();
1159#endif
1160}
ed9164a3 1161
e0c38211
PB
1162static void qemu_cpu_kick_no_halt(void)
1163{
1164 CPUState *cpu;
1165 /* Ensure whatever caused the exit has reached the CPU threads before
1166 * writing exit_request.
1167 */
1168 atomic_mb_set(&exit_request, 1);
1169 cpu = atomic_mb_read(&tcg_current_cpu);
1170 if (cpu) {
1171 cpu_exit(cpu);
cc015e9a 1172 }
cc015e9a
PB
1173}
1174
c08d7424 1175void qemu_cpu_kick(CPUState *cpu)
296af7c9 1176{
f5c121b8 1177 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1178 if (tcg_enabled()) {
1179 qemu_cpu_kick_no_halt();
1180 } else {
1181 qemu_cpu_kick_thread(cpu);
1182 }
296af7c9
BS
1183}
1184
46d62fac 1185void qemu_cpu_kick_self(void)
296af7c9 1186{
4917cf44 1187 assert(current_cpu);
9102deda 1188 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1189}
1190
60e82579 1191bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1192{
814e612e 1193 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1194}
1195
79e2b9ae 1196bool qemu_in_vcpu_thread(void)
aa723c23 1197{
4917cf44 1198 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1199}
1200
afbe7053
PB
1201static __thread bool iothread_locked = false;
1202
1203bool qemu_mutex_iothread_locked(void)
1204{
1205 return iothread_locked;
1206}
1207
296af7c9
BS
1208void qemu_mutex_lock_iothread(void)
1209{
21618b3e 1210 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1211 /* In the simple case there is no need to bump the VCPU thread out of
1212 * TCG code execution.
1213 */
1214 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1215 !first_cpu || !first_cpu->created) {
296af7c9 1216 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1217 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1218 } else {
1a28cac3 1219 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1220 qemu_cpu_kick_no_halt();
1a28cac3
MT
1221 qemu_mutex_lock(&qemu_global_mutex);
1222 }
6b49809c 1223 atomic_dec(&iothread_requesting_mutex);
46daff13 1224 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1225 }
afbe7053 1226 iothread_locked = true;
296af7c9
BS
1227}
1228
1229void qemu_mutex_unlock_iothread(void)
1230{
afbe7053 1231 iothread_locked = false;
296af7c9
BS
1232 qemu_mutex_unlock(&qemu_global_mutex);
1233}
1234
1235static int all_vcpus_paused(void)
1236{
bdc44640 1237 CPUState *cpu;
296af7c9 1238
bdc44640 1239 CPU_FOREACH(cpu) {
182735ef 1240 if (!cpu->stopped) {
296af7c9 1241 return 0;
0ab07c62 1242 }
296af7c9
BS
1243 }
1244
1245 return 1;
1246}
1247
1248void pause_all_vcpus(void)
1249{
bdc44640 1250 CPUState *cpu;
296af7c9 1251
40daca54 1252 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1253 CPU_FOREACH(cpu) {
182735ef
AF
1254 cpu->stop = true;
1255 qemu_cpu_kick(cpu);
296af7c9
BS
1256 }
1257
aa723c23 1258 if (qemu_in_vcpu_thread()) {
d798e974
JK
1259 cpu_stop_current();
1260 if (!kvm_enabled()) {
bdc44640 1261 CPU_FOREACH(cpu) {
182735ef
AF
1262 cpu->stop = false;
1263 cpu->stopped = true;
d798e974
JK
1264 }
1265 return;
1266 }
1267 }
1268
296af7c9 1269 while (!all_vcpus_paused()) {
be7d6c57 1270 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1271 CPU_FOREACH(cpu) {
182735ef 1272 qemu_cpu_kick(cpu);
296af7c9
BS
1273 }
1274 }
1275}
1276
2993683b
IM
1277void cpu_resume(CPUState *cpu)
1278{
1279 cpu->stop = false;
1280 cpu->stopped = false;
1281 qemu_cpu_kick(cpu);
1282}
1283
296af7c9
BS
1284void resume_all_vcpus(void)
1285{
bdc44640 1286 CPUState *cpu;
296af7c9 1287
40daca54 1288 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1289 CPU_FOREACH(cpu) {
182735ef 1290 cpu_resume(cpu);
296af7c9
BS
1291 }
1292}
1293
4900116e
DDAG
1294/* For temporary buffers for forming a name */
1295#define VCPU_THREAD_NAME_SIZE 16
1296
e5ab30a2 1297static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1298{
4900116e 1299 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1300 static QemuCond *tcg_halt_cond;
1301 static QemuThread *tcg_cpu_thread;
4900116e 1302
09daed84
EI
1303 tcg_cpu_address_space_init(cpu, cpu->as);
1304
296af7c9
BS
1305 /* share a single thread for all cpus with TCG */
1306 if (!tcg_cpu_thread) {
814e612e 1307 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1308 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1309 qemu_cond_init(cpu->halt_cond);
1310 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1311 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1312 cpu->cpu_index);
1313 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1314 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1315#ifdef _WIN32
814e612e 1316 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1317#endif
61a46217 1318 while (!cpu->created) {
18a85728 1319 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1320 }
814e612e 1321 tcg_cpu_thread = cpu->thread;
296af7c9 1322 } else {
814e612e 1323 cpu->thread = tcg_cpu_thread;
f5c121b8 1324 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1325 }
1326}
1327
48a106bd 1328static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1329{
4900116e
DDAG
1330 char thread_name[VCPU_THREAD_NAME_SIZE];
1331
814e612e 1332 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1333 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1334 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1335 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1336 cpu->cpu_index);
1337 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1338 cpu, QEMU_THREAD_JOINABLE);
61a46217 1339 while (!cpu->created) {
18a85728 1340 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1341 }
296af7c9
BS
1342}
1343
10a9021d 1344static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1345{
4900116e
DDAG
1346 char thread_name[VCPU_THREAD_NAME_SIZE];
1347
814e612e 1348 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1349 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1350 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1351 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1352 cpu->cpu_index);
1353 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1354 QEMU_THREAD_JOINABLE);
61a46217 1355 while (!cpu->created) {
c7f0f3b1
AL
1356 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1357 }
1358}
1359
c643bed9 1360void qemu_init_vcpu(CPUState *cpu)
296af7c9 1361{
ce3960eb
AF
1362 cpu->nr_cores = smp_cores;
1363 cpu->nr_threads = smp_threads;
f324e766 1364 cpu->stopped = true;
0ab07c62 1365 if (kvm_enabled()) {
48a106bd 1366 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1367 } else if (tcg_enabled()) {
e5ab30a2 1368 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1369 } else {
10a9021d 1370 qemu_dummy_start_vcpu(cpu);
0ab07c62 1371 }
296af7c9
BS
1372}
1373
b4a3d965 1374void cpu_stop_current(void)
296af7c9 1375{
4917cf44
AF
1376 if (current_cpu) {
1377 current_cpu->stop = false;
1378 current_cpu->stopped = true;
1379 cpu_exit(current_cpu);
67bb172f 1380 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1381 }
296af7c9
BS
1382}
1383
56983463 1384int vm_stop(RunState state)
296af7c9 1385{
aa723c23 1386 if (qemu_in_vcpu_thread()) {
74892d24 1387 qemu_system_vmstop_request_prepare();
1dfb4dd9 1388 qemu_system_vmstop_request(state);
296af7c9
BS
1389 /*
1390 * FIXME: should not return to device code in case
1391 * vm_stop() has been requested.
1392 */
b4a3d965 1393 cpu_stop_current();
56983463 1394 return 0;
296af7c9 1395 }
56983463
KW
1396
1397 return do_vm_stop(state);
296af7c9
BS
1398}
1399
8a9236f1
LC
1400/* does a state transition even if the VM is already stopped,
1401 current state is forgotten forever */
56983463 1402int vm_stop_force_state(RunState state)
8a9236f1
LC
1403{
1404 if (runstate_is_running()) {
56983463 1405 return vm_stop(state);
8a9236f1
LC
1406 } else {
1407 runstate_set(state);
594a45ce
KW
1408 /* Make sure to return an error if the flush in a previous vm_stop()
1409 * failed. */
1410 return bdrv_flush_all();
8a9236f1
LC
1411 }
1412}
1413
3d57f789 1414static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1415{
1416 int ret;
1417#ifdef CONFIG_PROFILER
1418 int64_t ti;
1419#endif
1420
1421#ifdef CONFIG_PROFILER
1422 ti = profile_getclock();
1423#endif
1424 if (use_icount) {
1425 int64_t count;
ac70aafc 1426 int64_t deadline;
296af7c9 1427 int decr;
c96778bb
FK
1428 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1429 + cpu->icount_extra);
28ecfd7a 1430 cpu->icount_decr.u16.low = 0;
efee7340 1431 cpu->icount_extra = 0;
40daca54 1432 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1433
1434 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1435 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1436 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1437 * nanoseconds.
1438 */
1439 if ((deadline < 0) || (deadline > INT32_MAX)) {
1440 deadline = INT32_MAX;
1441 }
1442
1443 count = qemu_icount_round(deadline);
c96778bb 1444 timers_state.qemu_icount += count;
296af7c9
BS
1445 decr = (count > 0xffff) ? 0xffff : count;
1446 count -= decr;
28ecfd7a 1447 cpu->icount_decr.u16.low = decr;
efee7340 1448 cpu->icount_extra = count;
296af7c9 1449 }
ea3e9847 1450 ret = cpu_exec(cpu);
296af7c9 1451#ifdef CONFIG_PROFILER
89d5cbdd 1452 tcg_time += profile_getclock() - ti;
296af7c9
BS
1453#endif
1454 if (use_icount) {
1455 /* Fold pending instructions back into the
1456 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1457 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1458 + cpu->icount_extra);
28ecfd7a 1459 cpu->icount_decr.u32 = 0;
efee7340 1460 cpu->icount_extra = 0;
296af7c9
BS
1461 }
1462 return ret;
1463}
1464
bdb7ca67 1465static void tcg_exec_all(void)
296af7c9 1466{
9a36085b
JK
1467 int r;
1468
40daca54
AB
1469 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1470 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1471
0ab07c62 1472 if (next_cpu == NULL) {
296af7c9 1473 next_cpu = first_cpu;
0ab07c62 1474 }
bdc44640 1475 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1476 CPUState *cpu = next_cpu;
296af7c9 1477
40daca54 1478 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1479 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1480
a1fcaa73 1481 if (cpu_can_run(cpu)) {
3d57f789 1482 r = tcg_cpu_exec(cpu);
9a36085b 1483 if (r == EXCP_DEBUG) {
91325046 1484 cpu_handle_guest_debug(cpu);
3c638d06
JK
1485 break;
1486 }
f324e766 1487 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1488 break;
1489 }
1490 }
aed807c8
PB
1491
1492 /* Pairs with smp_wmb in qemu_cpu_kick. */
1493 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1494}
1495
9a78eead 1496void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1497{
1498 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1499#if defined(cpu_list)
1500 cpu_list(f, cpu_fprintf);
262353cb
BS
1501#endif
1502}
de0b36b6
LC
1503
1504CpuInfoList *qmp_query_cpus(Error **errp)
1505{
1506 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1507 CPUState *cpu;
de0b36b6 1508
bdc44640 1509 CPU_FOREACH(cpu) {
de0b36b6 1510 CpuInfoList *info;
182735ef
AF
1511#if defined(TARGET_I386)
1512 X86CPU *x86_cpu = X86_CPU(cpu);
1513 CPUX86State *env = &x86_cpu->env;
1514#elif defined(TARGET_PPC)
1515 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1516 CPUPPCState *env = &ppc_cpu->env;
1517#elif defined(TARGET_SPARC)
1518 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1519 CPUSPARCState *env = &sparc_cpu->env;
1520#elif defined(TARGET_MIPS)
1521 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1522 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1523#elif defined(TARGET_TRICORE)
1524 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1525 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1526#endif
de0b36b6 1527
cb446eca 1528 cpu_synchronize_state(cpu);
de0b36b6
LC
1529
1530 info = g_malloc0(sizeof(*info));
1531 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1532 info->value->CPU = cpu->cpu_index;
182735ef 1533 info->value->current = (cpu == first_cpu);
259186a7 1534 info->value->halted = cpu->halted;
58f88d4b 1535 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1536 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1537#if defined(TARGET_I386)
1538 info->value->has_pc = true;
1539 info->value->pc = env->eip + env->segs[R_CS].base;
1540#elif defined(TARGET_PPC)
1541 info->value->has_nip = true;
1542 info->value->nip = env->nip;
1543#elif defined(TARGET_SPARC)
1544 info->value->has_pc = true;
1545 info->value->pc = env->pc;
1546 info->value->has_npc = true;
1547 info->value->npc = env->npc;
1548#elif defined(TARGET_MIPS)
1549 info->value->has_PC = true;
1550 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1551#elif defined(TARGET_TRICORE)
1552 info->value->has_PC = true;
1553 info->value->PC = env->PC;
de0b36b6
LC
1554#endif
1555
1556 /* XXX: waiting for the qapi to support GSList */
1557 if (!cur_item) {
1558 head = cur_item = info;
1559 } else {
1560 cur_item->next = info;
1561 cur_item = info;
1562 }
1563 }
1564
1565 return head;
1566}
0cfd6a9a
LC
1567
1568void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1569 bool has_cpu, int64_t cpu_index, Error **errp)
1570{
1571 FILE *f;
1572 uint32_t l;
55e5c285 1573 CPUState *cpu;
0cfd6a9a 1574 uint8_t buf[1024];
0dc9daf0 1575 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1576
1577 if (!has_cpu) {
1578 cpu_index = 0;
1579 }
1580
151d1322
AF
1581 cpu = qemu_get_cpu(cpu_index);
1582 if (cpu == NULL) {
c6bd8c70
MA
1583 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1584 "a CPU number");
0cfd6a9a
LC
1585 return;
1586 }
1587
1588 f = fopen(filename, "wb");
1589 if (!f) {
618da851 1590 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1591 return;
1592 }
1593
1594 while (size != 0) {
1595 l = sizeof(buf);
1596 if (l > size)
1597 l = size;
2f4d0f59 1598 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1599 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1600 " specified", orig_addr, orig_size);
2f4d0f59
AK
1601 goto exit;
1602 }
0cfd6a9a 1603 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1604 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1605 goto exit;
1606 }
1607 addr += l;
1608 size -= l;
1609 }
1610
1611exit:
1612 fclose(f);
1613}
6d3962bf
LC
1614
1615void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1616 Error **errp)
1617{
1618 FILE *f;
1619 uint32_t l;
1620 uint8_t buf[1024];
1621
1622 f = fopen(filename, "wb");
1623 if (!f) {
618da851 1624 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1625 return;
1626 }
1627
1628 while (size != 0) {
1629 l = sizeof(buf);
1630 if (l > size)
1631 l = size;
eb6282f2 1632 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1633 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1634 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1635 goto exit;
1636 }
1637 addr += l;
1638 size -= l;
1639 }
1640
1641exit:
1642 fclose(f);
1643}
ab49ab5c
LC
1644
1645void qmp_inject_nmi(Error **errp)
1646{
1647#if defined(TARGET_I386)
182735ef
AF
1648 CPUState *cs;
1649
bdc44640 1650 CPU_FOREACH(cs) {
182735ef 1651 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1652
02e51483 1653 if (!cpu->apic_state) {
182735ef 1654 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1655 } else {
02e51483 1656 apic_deliver_nmi(cpu->apic_state);
02c09195 1657 }
ab49ab5c
LC
1658 }
1659#else
9cb805fd 1660 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1661#endif
1662}
27498bef
ST
1663
1664void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1665{
1666 if (!use_icount) {
1667 return;
1668 }
1669
1670 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1671 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1672 if (icount_align_option) {
1673 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1674 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1675 } else {
1676 cpu_fprintf(f, "Max guest delay NA\n");
1677 cpu_fprintf(f, "Max guest advance NA\n");
1678 }
1679}