]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
tcg: synchronize exit_request and tcg_current_cpu accesses
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
022c62cb 32#include "exec/gdbstub.h"
9c17d615
PB
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
de0b36b6 35#include "qmp-commands.h"
296af7c9 36
1de7afc9 37#include "qemu/thread.h"
9c17d615
PB
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
1de7afc9
PB
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
cb365646 42#include "qemu/seqlock.h"
a4e15de9 43#include "qapi-event.h"
9cb805fd 44#include "hw/nmi.h"
0ff0fc19
JK
45
46#ifndef _WIN32
1de7afc9 47#include "qemu/compatfd.h"
0ff0fc19 48#endif
296af7c9 49
6d9cb73c
JK
50#ifdef CONFIG_LINUX
51
52#include <sys/prctl.h>
53
c0532a76
MT
54#ifndef PR_MCE_KILL
55#define PR_MCE_KILL 33
56#endif
57
6d9cb73c
JK
58#ifndef PR_MCE_KILL_SET
59#define PR_MCE_KILL_SET 1
60#endif
61
62#ifndef PR_MCE_KILL_EARLY
63#define PR_MCE_KILL_EARLY 1
64#endif
65
66#endif /* CONFIG_LINUX */
67
182735ef 68static CPUState *next_cpu;
27498bef
ST
69int64_t max_delay;
70int64_t max_advance;
296af7c9 71
321bc0b2
TC
72bool cpu_is_stopped(CPUState *cpu)
73{
74 return cpu->stopped || !runstate_is_running();
75}
76
a98ae1d8 77static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 78{
c64ca814 79 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
80 return false;
81 }
321bc0b2 82 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
83 return true;
84 }
8c2e1b00 85 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 86 kvm_halt_in_kernel()) {
ac873f1e
PM
87 return false;
88 }
89 return true;
90}
91
92static bool all_cpu_threads_idle(void)
93{
182735ef 94 CPUState *cpu;
ac873f1e 95
bdc44640 96 CPU_FOREACH(cpu) {
182735ef 97 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
98 return false;
99 }
100 }
101 return true;
102}
103
946fb27c
PB
104/***********************************************************/
105/* guest cycle counter */
106
a3270e19
PB
107/* Protected by TimersState seqlock */
108
5045e9d9 109static bool icount_sleep = true;
71468395 110static int64_t vm_clock_warp_start = -1;
946fb27c
PB
111/* Conversion factor from emulated instructions to virtual clock ticks. */
112static int icount_time_shift;
113/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
114#define MAX_ICOUNT_SHIFT 10
a3270e19 115
946fb27c
PB
116static QEMUTimer *icount_rt_timer;
117static QEMUTimer *icount_vm_timer;
118static QEMUTimer *icount_warp_timer;
946fb27c
PB
119
120typedef struct TimersState {
cb365646 121 /* Protected by BQL. */
946fb27c
PB
122 int64_t cpu_ticks_prev;
123 int64_t cpu_ticks_offset;
cb365646
LPF
124
125 /* cpu_clock_offset can be read out of BQL, so protect it with
126 * this lock.
127 */
128 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
129 int64_t cpu_clock_offset;
130 int32_t cpu_ticks_enabled;
131 int64_t dummy;
c96778bb
FK
132
133 /* Compensate for varying guest execution speed. */
134 int64_t qemu_icount_bias;
135 /* Only written by TCG thread */
136 int64_t qemu_icount;
946fb27c
PB
137} TimersState;
138
d9cd4007 139static TimersState timers_state;
946fb27c 140
2a62914b 141int64_t cpu_get_icount_raw(void)
946fb27c
PB
142{
143 int64_t icount;
4917cf44 144 CPUState *cpu = current_cpu;
946fb27c 145
c96778bb 146 icount = timers_state.qemu_icount;
4917cf44 147 if (cpu) {
414b15c9 148 if (!cpu->can_do_io) {
2a62914b
PD
149 fprintf(stderr, "Bad icount read\n");
150 exit(1);
946fb27c 151 }
28ecfd7a 152 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 153 }
2a62914b
PD
154 return icount;
155}
156
157/* Return the virtual CPU time, based on the instruction counter. */
158static int64_t cpu_get_icount_locked(void)
159{
160 int64_t icount = cpu_get_icount_raw();
3f031313 161 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
162}
163
17a15f1b
PB
164int64_t cpu_get_icount(void)
165{
166 int64_t icount;
167 unsigned start;
168
169 do {
170 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
171 icount = cpu_get_icount_locked();
172 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
173
174 return icount;
175}
176
3f031313
FK
177int64_t cpu_icount_to_ns(int64_t icount)
178{
179 return icount << icount_time_shift;
180}
181
946fb27c 182/* return the host CPU cycle counter and handle stop/restart */
cb365646 183/* Caller must hold the BQL */
946fb27c
PB
184int64_t cpu_get_ticks(void)
185{
5f3e3101
PB
186 int64_t ticks;
187
946fb27c
PB
188 if (use_icount) {
189 return cpu_get_icount();
190 }
5f3e3101
PB
191
192 ticks = timers_state.cpu_ticks_offset;
193 if (timers_state.cpu_ticks_enabled) {
194 ticks += cpu_get_real_ticks();
195 }
196
197 if (timers_state.cpu_ticks_prev > ticks) {
198 /* Note: non increasing ticks may happen if the host uses
199 software suspend */
200 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
201 ticks = timers_state.cpu_ticks_prev;
946fb27c 202 }
5f3e3101
PB
203
204 timers_state.cpu_ticks_prev = ticks;
205 return ticks;
946fb27c
PB
206}
207
cb365646 208static int64_t cpu_get_clock_locked(void)
946fb27c 209{
5f3e3101 210 int64_t ticks;
cb365646 211
5f3e3101
PB
212 ticks = timers_state.cpu_clock_offset;
213 if (timers_state.cpu_ticks_enabled) {
214 ticks += get_clock();
946fb27c 215 }
cb365646 216
5f3e3101 217 return ticks;
cb365646
LPF
218}
219
220/* return the host CPU monotonic timer and handle stop/restart */
221int64_t cpu_get_clock(void)
222{
223 int64_t ti;
224 unsigned start;
225
226 do {
227 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
228 ti = cpu_get_clock_locked();
229 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
230
231 return ti;
946fb27c
PB
232}
233
cb365646
LPF
234/* enable cpu_get_ticks()
235 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 */
946fb27c
PB
237void cpu_enable_ticks(void)
238{
cb365646
LPF
239 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
240 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
241 if (!timers_state.cpu_ticks_enabled) {
242 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
243 timers_state.cpu_clock_offset -= get_clock();
244 timers_state.cpu_ticks_enabled = 1;
245 }
cb365646 246 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
247}
248
249/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
250 * cpu_get_ticks() after that.
251 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
252 */
946fb27c
PB
253void cpu_disable_ticks(void)
254{
cb365646
LPF
255 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
256 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 257 if (timers_state.cpu_ticks_enabled) {
5f3e3101 258 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 259 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
260 timers_state.cpu_ticks_enabled = 0;
261 }
cb365646 262 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
263}
264
265/* Correlation between real and virtual time is always going to be
266 fairly approximate, so ignore small variation.
267 When the guest is idle real and virtual time will be aligned in
268 the IO wait loop. */
269#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
270
271static void icount_adjust(void)
272{
273 int64_t cur_time;
274 int64_t cur_icount;
275 int64_t delta;
a3270e19
PB
276
277 /* Protected by TimersState mutex. */
946fb27c 278 static int64_t last_delta;
468cc7cf 279
946fb27c
PB
280 /* If the VM is not running, then do nothing. */
281 if (!runstate_is_running()) {
282 return;
283 }
468cc7cf 284
17a15f1b
PB
285 seqlock_write_lock(&timers_state.vm_clock_seqlock);
286 cur_time = cpu_get_clock_locked();
287 cur_icount = cpu_get_icount_locked();
468cc7cf 288
946fb27c
PB
289 delta = cur_icount - cur_time;
290 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
291 if (delta > 0
292 && last_delta + ICOUNT_WOBBLE < delta * 2
293 && icount_time_shift > 0) {
294 /* The guest is getting too far ahead. Slow time down. */
295 icount_time_shift--;
296 }
297 if (delta < 0
298 && last_delta - ICOUNT_WOBBLE > delta * 2
299 && icount_time_shift < MAX_ICOUNT_SHIFT) {
300 /* The guest is getting too far behind. Speed time up. */
301 icount_time_shift++;
302 }
303 last_delta = delta;
c96778bb
FK
304 timers_state.qemu_icount_bias = cur_icount
305 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 306 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
307}
308
309static void icount_adjust_rt(void *opaque)
310{
40daca54 311 timer_mod(icount_rt_timer,
1979b908 312 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
313 icount_adjust();
314}
315
316static void icount_adjust_vm(void *opaque)
317{
40daca54
AB
318 timer_mod(icount_vm_timer,
319 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
320 get_ticks_per_sec() / 10);
946fb27c
PB
321 icount_adjust();
322}
323
324static int64_t qemu_icount_round(int64_t count)
325{
326 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
327}
328
329static void icount_warp_rt(void *opaque)
330{
17a15f1b
PB
331 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
332 * changes from -1 to another value, so the race here is okay.
333 */
334 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
335 return;
336 }
337
17a15f1b 338 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 339 if (runstate_is_running()) {
bf2a7ddb 340 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
341 int64_t warp_delta;
342
343 warp_delta = clock - vm_clock_warp_start;
344 if (use_icount == 2) {
946fb27c 345 /*
40daca54 346 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
347 * far ahead of real time.
348 */
17a15f1b 349 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 350 int64_t delta = clock - cur_icount;
8ed961d9 351 warp_delta = MIN(warp_delta, delta);
946fb27c 352 }
c96778bb 353 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
354 }
355 vm_clock_warp_start = -1;
17a15f1b 356 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
357
358 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
359 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
360 }
946fb27c
PB
361}
362
8156be56
PB
363void qtest_clock_warp(int64_t dest)
364{
40daca54 365 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 366 AioContext *aio_context;
8156be56 367 assert(qtest_enabled());
efef88b3 368 aio_context = qemu_get_aio_context();
8156be56 369 while (clock < dest) {
40daca54 370 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 371 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 372
17a15f1b 373 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 374 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
375 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
376
40daca54 377 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 378 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 379 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 380 }
40daca54 381 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
382}
383
40daca54 384void qemu_clock_warp(QEMUClockType type)
946fb27c 385{
ce78d18c 386 int64_t clock;
946fb27c
PB
387 int64_t deadline;
388
389 /*
390 * There are too many global variables to make the "warp" behavior
391 * applicable to other clocks. But a clock argument removes the
392 * need for if statements all over the place.
393 */
40daca54 394 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
395 return;
396 }
397
5045e9d9
VC
398 if (icount_sleep) {
399 /*
400 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
401 * This ensures that the deadline for the timer is computed correctly
402 * below.
403 * This also makes sure that the insn counter is synchronized before
404 * the CPU starts running, in case the CPU is woken by an event other
405 * than the earliest QEMU_CLOCK_VIRTUAL timer.
406 */
407 icount_warp_rt(NULL);
408 timer_del(icount_warp_timer);
409 }
ce78d18c 410 if (!all_cpu_threads_idle()) {
946fb27c
PB
411 return;
412 }
413
8156be56
PB
414 if (qtest_enabled()) {
415 /* When testing, qtest commands advance icount. */
416 return;
417 }
418
ac70aafc 419 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 420 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 421 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 422 if (deadline < 0) {
d7a0f71d
VC
423 static bool notified;
424 if (!icount_sleep && !notified) {
425 error_report("WARNING: icount sleep disabled and no active timers");
426 notified = true;
427 }
ce78d18c 428 return;
ac70aafc
AB
429 }
430
946fb27c
PB
431 if (deadline > 0) {
432 /*
40daca54 433 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
434 * sleep. Otherwise, the CPU might be waiting for a future timer
435 * interrupt to wake it up, but the interrupt never comes because
436 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 437 * QEMU_CLOCK_VIRTUAL.
946fb27c 438 */
5045e9d9
VC
439 if (!icount_sleep) {
440 /*
441 * We never let VCPUs sleep in no sleep icount mode.
442 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
443 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
444 * It is useful when we want a deterministic execution time,
445 * isolated from host latencies.
446 */
447 seqlock_write_lock(&timers_state.vm_clock_seqlock);
448 timers_state.qemu_icount_bias += deadline;
449 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
450 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
451 } else {
452 /*
453 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
454 * "real" time, (related to the time left until the next event) has
455 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
456 * This avoids that the warps are visible externally; for example,
457 * you will not be sending network packets continuously instead of
458 * every 100ms.
459 */
460 seqlock_write_lock(&timers_state.vm_clock_seqlock);
461 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
462 vm_clock_warp_start = clock;
463 }
464 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
465 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 466 }
ac70aafc 467 } else if (deadline == 0) {
40daca54 468 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
469 }
470}
471
d09eae37
FK
472static bool icount_state_needed(void *opaque)
473{
474 return use_icount;
475}
476
477/*
478 * This is a subsection for icount migration.
479 */
480static const VMStateDescription icount_vmstate_timers = {
481 .name = "timer/icount",
482 .version_id = 1,
483 .minimum_version_id = 1,
5cd8cada 484 .needed = icount_state_needed,
d09eae37
FK
485 .fields = (VMStateField[]) {
486 VMSTATE_INT64(qemu_icount_bias, TimersState),
487 VMSTATE_INT64(qemu_icount, TimersState),
488 VMSTATE_END_OF_LIST()
489 }
490};
491
946fb27c
PB
492static const VMStateDescription vmstate_timers = {
493 .name = "timer",
494 .version_id = 2,
495 .minimum_version_id = 1,
35d08458 496 .fields = (VMStateField[]) {
946fb27c
PB
497 VMSTATE_INT64(cpu_ticks_offset, TimersState),
498 VMSTATE_INT64(dummy, TimersState),
499 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
500 VMSTATE_END_OF_LIST()
d09eae37 501 },
5cd8cada
JQ
502 .subsections = (const VMStateDescription*[]) {
503 &icount_vmstate_timers,
504 NULL
946fb27c
PB
505 }
506};
507
4603ea01
PD
508void cpu_ticks_init(void)
509{
510 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
511 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
512}
513
1ad9580b 514void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 515{
1ad9580b 516 const char *option;
a8bfac37 517 char *rem_str = NULL;
1ad9580b 518
1ad9580b 519 option = qemu_opt_get(opts, "shift");
946fb27c 520 if (!option) {
a8bfac37
ST
521 if (qemu_opt_get(opts, "align") != NULL) {
522 error_setg(errp, "Please specify shift option when using align");
523 }
946fb27c
PB
524 return;
525 }
f1f4b57e
VC
526
527 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
528 if (icount_sleep) {
529 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
530 icount_warp_rt, NULL);
531 }
f1f4b57e 532
a8bfac37 533 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
534
535 if (icount_align_option && !icount_sleep) {
536 error_setg(errp, "align=on and sleep=no are incompatible");
537 }
946fb27c 538 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
539 errno = 0;
540 icount_time_shift = strtol(option, &rem_str, 0);
541 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
542 error_setg(errp, "icount: Invalid shift value");
543 }
946fb27c
PB
544 use_icount = 1;
545 return;
a8bfac37
ST
546 } else if (icount_align_option) {
547 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e
VC
548 } else if (!icount_sleep) {
549 error_setg(errp, "shift=auto and sleep=no are incompatible");
946fb27c
PB
550 }
551
552 use_icount = 2;
553
554 /* 125MIPS seems a reasonable initial guess at the guest speed.
555 It will be corrected fairly quickly anyway. */
556 icount_time_shift = 3;
557
558 /* Have both realtime and virtual time triggers for speed adjustment.
559 The realtime trigger catches emulated time passing too slowly,
560 the virtual time trigger catches emulated time passing too fast.
561 Realtime triggers occur even when idle, so use them less frequently
562 than VM triggers. */
bf2a7ddb
PD
563 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
564 icount_adjust_rt, NULL);
40daca54 565 timer_mod(icount_rt_timer,
bf2a7ddb 566 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
567 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
568 icount_adjust_vm, NULL);
569 timer_mod(icount_vm_timer,
570 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
571 get_ticks_per_sec() / 10);
946fb27c
PB
572}
573
296af7c9
BS
574/***********************************************************/
575void hw_error(const char *fmt, ...)
576{
577 va_list ap;
55e5c285 578 CPUState *cpu;
296af7c9
BS
579
580 va_start(ap, fmt);
581 fprintf(stderr, "qemu: hardware error: ");
582 vfprintf(stderr, fmt, ap);
583 fprintf(stderr, "\n");
bdc44640 584 CPU_FOREACH(cpu) {
55e5c285 585 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 586 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
587 }
588 va_end(ap);
589 abort();
590}
591
592void cpu_synchronize_all_states(void)
593{
182735ef 594 CPUState *cpu;
296af7c9 595
bdc44640 596 CPU_FOREACH(cpu) {
182735ef 597 cpu_synchronize_state(cpu);
296af7c9
BS
598 }
599}
600
601void cpu_synchronize_all_post_reset(void)
602{
182735ef 603 CPUState *cpu;
296af7c9 604
bdc44640 605 CPU_FOREACH(cpu) {
182735ef 606 cpu_synchronize_post_reset(cpu);
296af7c9
BS
607 }
608}
609
610void cpu_synchronize_all_post_init(void)
611{
182735ef 612 CPUState *cpu;
296af7c9 613
bdc44640 614 CPU_FOREACH(cpu) {
182735ef 615 cpu_synchronize_post_init(cpu);
296af7c9
BS
616 }
617}
618
de9d61e8
MT
619void cpu_clean_all_dirty(void)
620{
621 CPUState *cpu;
622
623 CPU_FOREACH(cpu) {
624 cpu_clean_state(cpu);
625 }
626}
627
56983463 628static int do_vm_stop(RunState state)
296af7c9 629{
56983463
KW
630 int ret = 0;
631
1354869c 632 if (runstate_is_running()) {
296af7c9 633 cpu_disable_ticks();
296af7c9 634 pause_all_vcpus();
f5bbfba1 635 runstate_set(state);
1dfb4dd9 636 vm_state_notify(0, state);
a4e15de9 637 qapi_event_send_stop(&error_abort);
296af7c9 638 }
56983463 639
594a45ce
KW
640 bdrv_drain_all();
641 ret = bdrv_flush_all();
642
56983463 643 return ret;
296af7c9
BS
644}
645
a1fcaa73 646static bool cpu_can_run(CPUState *cpu)
296af7c9 647{
4fdeee7c 648 if (cpu->stop) {
a1fcaa73 649 return false;
0ab07c62 650 }
321bc0b2 651 if (cpu_is_stopped(cpu)) {
a1fcaa73 652 return false;
0ab07c62 653 }
a1fcaa73 654 return true;
296af7c9
BS
655}
656
91325046 657static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 658{
64f6b346 659 gdb_set_stop_cpu(cpu);
8cf71710 660 qemu_system_debug_request();
f324e766 661 cpu->stopped = true;
3c638d06
JK
662}
663
714bd040
PB
664static void cpu_signal(int sig)
665{
aed807c8
PB
666 CPUState *cpu;
667 /* Ensure whatever caused the exit has reached the CPU threads before
668 * writing exit_request.
669 */
670 atomic_mb_set(&exit_request, 1);
671 cpu = atomic_mb_read(&tcg_current_cpu);
9373e632
PB
672 if (cpu) {
673 cpu_exit(cpu);
714bd040 674 }
714bd040 675}
714bd040 676
6d9cb73c
JK
677#ifdef CONFIG_LINUX
678static void sigbus_reraise(void)
679{
680 sigset_t set;
681 struct sigaction action;
682
683 memset(&action, 0, sizeof(action));
684 action.sa_handler = SIG_DFL;
685 if (!sigaction(SIGBUS, &action, NULL)) {
686 raise(SIGBUS);
687 sigemptyset(&set);
688 sigaddset(&set, SIGBUS);
689 sigprocmask(SIG_UNBLOCK, &set, NULL);
690 }
691 perror("Failed to re-raise SIGBUS!\n");
692 abort();
693}
694
695static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
696 void *ctx)
697{
698 if (kvm_on_sigbus(siginfo->ssi_code,
699 (void *)(intptr_t)siginfo->ssi_addr)) {
700 sigbus_reraise();
701 }
702}
703
704static void qemu_init_sigbus(void)
705{
706 struct sigaction action;
707
708 memset(&action, 0, sizeof(action));
709 action.sa_flags = SA_SIGINFO;
710 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
711 sigaction(SIGBUS, &action, NULL);
712
713 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
714}
715
290adf38 716static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
717{
718 struct timespec ts = { 0, 0 };
719 siginfo_t siginfo;
720 sigset_t waitset;
721 sigset_t chkset;
722 int r;
723
724 sigemptyset(&waitset);
725 sigaddset(&waitset, SIG_IPI);
726 sigaddset(&waitset, SIGBUS);
727
728 do {
729 r = sigtimedwait(&waitset, &siginfo, &ts);
730 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
731 perror("sigtimedwait");
732 exit(1);
733 }
734
735 switch (r) {
736 case SIGBUS:
290adf38 737 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
738 sigbus_reraise();
739 }
740 break;
741 default:
742 break;
743 }
744
745 r = sigpending(&chkset);
746 if (r == -1) {
747 perror("sigpending");
748 exit(1);
749 }
750 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
751}
752
6d9cb73c
JK
753#else /* !CONFIG_LINUX */
754
755static void qemu_init_sigbus(void)
756{
757}
1ab3c6c0 758
290adf38 759static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
760{
761}
6d9cb73c
JK
762#endif /* !CONFIG_LINUX */
763
296af7c9 764#ifndef _WIN32
55f8d6ac
JK
765static void dummy_signal(int sig)
766{
767}
55f8d6ac 768
13618e05 769static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
770{
771 int r;
772 sigset_t set;
773 struct sigaction sigact;
774
775 memset(&sigact, 0, sizeof(sigact));
776 sigact.sa_handler = dummy_signal;
777 sigaction(SIG_IPI, &sigact, NULL);
778
714bd040
PB
779 pthread_sigmask(SIG_BLOCK, NULL, &set);
780 sigdelset(&set, SIG_IPI);
714bd040 781 sigdelset(&set, SIGBUS);
491d6e80 782 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
783 if (r) {
784 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
785 exit(1);
786 }
787}
788
789static void qemu_tcg_init_cpu_signals(void)
790{
714bd040
PB
791 sigset_t set;
792 struct sigaction sigact;
793
794 memset(&sigact, 0, sizeof(sigact));
795 sigact.sa_handler = cpu_signal;
796 sigaction(SIG_IPI, &sigact, NULL);
797
798 sigemptyset(&set);
799 sigaddset(&set, SIG_IPI);
800 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
801}
802
55f8d6ac 803#else /* _WIN32 */
13618e05 804static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 805{
714bd040
PB
806 abort();
807}
ff48eb5f 808
714bd040
PB
809static void qemu_tcg_init_cpu_signals(void)
810{
ff48eb5f 811}
714bd040 812#endif /* _WIN32 */
ff48eb5f 813
b2532d88 814static QemuMutex qemu_global_mutex;
46daff13 815static QemuCond qemu_io_proceeded_cond;
6b49809c 816static unsigned iothread_requesting_mutex;
296af7c9
BS
817
818static QemuThread io_thread;
819
820static QemuThread *tcg_cpu_thread;
821static QemuCond *tcg_halt_cond;
822
296af7c9
BS
823/* cpu creation */
824static QemuCond qemu_cpu_cond;
825/* system init */
296af7c9 826static QemuCond qemu_pause_cond;
e82bcec2 827static QemuCond qemu_work_cond;
296af7c9 828
d3b12f5d 829void qemu_init_cpu_loop(void)
296af7c9 830{
6d9cb73c 831 qemu_init_sigbus();
ed94592b 832 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
833 qemu_cond_init(&qemu_pause_cond);
834 qemu_cond_init(&qemu_work_cond);
46daff13 835 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 836 qemu_mutex_init(&qemu_global_mutex);
296af7c9 837
b7680cb6 838 qemu_thread_get_self(&io_thread);
296af7c9
BS
839}
840
f100f0b3 841void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
842{
843 struct qemu_work_item wi;
844
60e82579 845 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
846 func(data);
847 return;
848 }
849
850 wi.func = func;
851 wi.data = data;
3c02270d 852 wi.free = false;
c64ca814
AF
853 if (cpu->queued_work_first == NULL) {
854 cpu->queued_work_first = &wi;
0ab07c62 855 } else {
c64ca814 856 cpu->queued_work_last->next = &wi;
0ab07c62 857 }
c64ca814 858 cpu->queued_work_last = &wi;
e82bcec2
MT
859 wi.next = NULL;
860 wi.done = false;
861
c08d7424 862 qemu_cpu_kick(cpu);
e82bcec2 863 while (!wi.done) {
4917cf44 864 CPUState *self_cpu = current_cpu;
e82bcec2
MT
865
866 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 867 current_cpu = self_cpu;
e82bcec2
MT
868 }
869}
870
3c02270d
CV
871void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
872{
873 struct qemu_work_item *wi;
874
875 if (qemu_cpu_is_self(cpu)) {
876 func(data);
877 return;
878 }
879
880 wi = g_malloc0(sizeof(struct qemu_work_item));
881 wi->func = func;
882 wi->data = data;
883 wi->free = true;
884 if (cpu->queued_work_first == NULL) {
885 cpu->queued_work_first = wi;
886 } else {
887 cpu->queued_work_last->next = wi;
888 }
889 cpu->queued_work_last = wi;
890 wi->next = NULL;
891 wi->done = false;
892
893 qemu_cpu_kick(cpu);
894}
895
6d45b109 896static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
897{
898 struct qemu_work_item *wi;
899
c64ca814 900 if (cpu->queued_work_first == NULL) {
e82bcec2 901 return;
0ab07c62 902 }
e82bcec2 903
c64ca814
AF
904 while ((wi = cpu->queued_work_first)) {
905 cpu->queued_work_first = wi->next;
e82bcec2
MT
906 wi->func(wi->data);
907 wi->done = true;
3c02270d
CV
908 if (wi->free) {
909 g_free(wi);
910 }
e82bcec2 911 }
c64ca814 912 cpu->queued_work_last = NULL;
e82bcec2
MT
913 qemu_cond_broadcast(&qemu_work_cond);
914}
915
509a0d78 916static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 917{
4fdeee7c
AF
918 if (cpu->stop) {
919 cpu->stop = false;
f324e766 920 cpu->stopped = true;
296af7c9
BS
921 qemu_cond_signal(&qemu_pause_cond);
922 }
6d45b109 923 flush_queued_work(cpu);
216fc9a4 924 cpu->thread_kicked = false;
296af7c9
BS
925}
926
6cabe1f3 927static void qemu_tcg_wait_io_event(void)
296af7c9 928{
182735ef 929 CPUState *cpu;
6cabe1f3 930
16400322 931 while (all_cpu_threads_idle()) {
ab33fcda
PB
932 /* Start accounting real time to the virtual clock if the CPUs
933 are idle. */
40daca54 934 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 935 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 936 }
296af7c9 937
46daff13
PB
938 while (iothread_requesting_mutex) {
939 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
940 }
6cabe1f3 941
bdc44640 942 CPU_FOREACH(cpu) {
182735ef 943 qemu_wait_io_event_common(cpu);
6cabe1f3 944 }
296af7c9
BS
945}
946
fd529e8f 947static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 948{
a98ae1d8 949 while (cpu_thread_is_idle(cpu)) {
f5c121b8 950 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 951 }
296af7c9 952
290adf38 953 qemu_kvm_eat_signals(cpu);
509a0d78 954 qemu_wait_io_event_common(cpu);
296af7c9
BS
955}
956
7e97cd88 957static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 958{
48a106bd 959 CPUState *cpu = arg;
84b4915d 960 int r;
296af7c9 961
ab28bd23
PB
962 rcu_register_thread();
963
2e7f7a3c 964 qemu_mutex_lock_iothread();
814e612e 965 qemu_thread_get_self(cpu->thread);
9f09e18a 966 cpu->thread_id = qemu_get_thread_id();
626cf8f4 967 cpu->can_do_io = 1;
4917cf44 968 current_cpu = cpu;
296af7c9 969
504134d2 970 r = kvm_init_vcpu(cpu);
84b4915d
JK
971 if (r < 0) {
972 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
973 exit(1);
974 }
296af7c9 975
13618e05 976 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
977
978 /* signal CPU creation */
61a46217 979 cpu->created = true;
296af7c9
BS
980 qemu_cond_signal(&qemu_cpu_cond);
981
296af7c9 982 while (1) {
a1fcaa73 983 if (cpu_can_run(cpu)) {
1458c363 984 r = kvm_cpu_exec(cpu);
83f338f7 985 if (r == EXCP_DEBUG) {
91325046 986 cpu_handle_guest_debug(cpu);
83f338f7 987 }
0ab07c62 988 }
fd529e8f 989 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
990 }
991
992 return NULL;
993}
994
c7f0f3b1
AL
995static void *qemu_dummy_cpu_thread_fn(void *arg)
996{
997#ifdef _WIN32
998 fprintf(stderr, "qtest is not supported under Windows\n");
999 exit(1);
1000#else
10a9021d 1001 CPUState *cpu = arg;
c7f0f3b1
AL
1002 sigset_t waitset;
1003 int r;
1004
ab28bd23
PB
1005 rcu_register_thread();
1006
c7f0f3b1 1007 qemu_mutex_lock_iothread();
814e612e 1008 qemu_thread_get_self(cpu->thread);
9f09e18a 1009 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1010 cpu->can_do_io = 1;
c7f0f3b1
AL
1011
1012 sigemptyset(&waitset);
1013 sigaddset(&waitset, SIG_IPI);
1014
1015 /* signal CPU creation */
61a46217 1016 cpu->created = true;
c7f0f3b1
AL
1017 qemu_cond_signal(&qemu_cpu_cond);
1018
4917cf44 1019 current_cpu = cpu;
c7f0f3b1 1020 while (1) {
4917cf44 1021 current_cpu = NULL;
c7f0f3b1
AL
1022 qemu_mutex_unlock_iothread();
1023 do {
1024 int sig;
1025 r = sigwait(&waitset, &sig);
1026 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1027 if (r == -1) {
1028 perror("sigwait");
1029 exit(1);
1030 }
1031 qemu_mutex_lock_iothread();
4917cf44 1032 current_cpu = cpu;
509a0d78 1033 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1034 }
1035
1036 return NULL;
1037#endif
1038}
1039
bdb7ca67
JK
1040static void tcg_exec_all(void);
1041
7e97cd88 1042static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1043{
c3586ba7 1044 CPUState *cpu = arg;
296af7c9 1045
ab28bd23
PB
1046 rcu_register_thread();
1047
2e7f7a3c 1048 qemu_mutex_lock_iothread();
55f8d6ac 1049 qemu_tcg_init_cpu_signals();
814e612e 1050 qemu_thread_get_self(cpu->thread);
296af7c9 1051
38fcbd3f
AF
1052 CPU_FOREACH(cpu) {
1053 cpu->thread_id = qemu_get_thread_id();
1054 cpu->created = true;
626cf8f4 1055 cpu->can_do_io = 1;
38fcbd3f 1056 }
296af7c9
BS
1057 qemu_cond_signal(&qemu_cpu_cond);
1058
fa7d1867 1059 /* wait for initial kick-off after machine start */
c28e399c 1060 while (first_cpu->stopped) {
fa7d1867 1061 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1062
1063 /* process any pending work */
bdc44640 1064 CPU_FOREACH(cpu) {
182735ef 1065 qemu_wait_io_event_common(cpu);
8e564b4e 1066 }
0ab07c62 1067 }
296af7c9 1068
21618b3e 1069 /* process any pending work */
aed807c8 1070 atomic_mb_set(&exit_request, 1);
21618b3e 1071
296af7c9 1072 while (1) {
bdb7ca67 1073 tcg_exec_all();
ac70aafc
AB
1074
1075 if (use_icount) {
40daca54 1076 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1077
1078 if (deadline == 0) {
40daca54 1079 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1080 }
3b2319a3 1081 }
6cabe1f3 1082 qemu_tcg_wait_io_event();
296af7c9
BS
1083 }
1084
1085 return NULL;
1086}
1087
2ff09a40 1088static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1089{
1090#ifndef _WIN32
1091 int err;
1092
814e612e 1093 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1094 if (err) {
1095 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1096 exit(1);
1097 }
1098#else /* _WIN32 */
60e82579 1099 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1100 CONTEXT tcgContext;
1101
1102 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1103 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1104 GetLastError());
1105 exit(1);
1106 }
1107
1108 /* On multi-core systems, we are not sure that the thread is actually
1109 * suspended until we can get the context.
1110 */
1111 tcgContext.ContextFlags = CONTEXT_CONTROL;
1112 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1113 continue;
1114 }
1115
cc015e9a 1116 cpu_signal(0);
ed9164a3
OH
1117
1118 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1119 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1120 GetLastError());
1121 exit(1);
1122 }
cc015e9a
PB
1123 }
1124#endif
1125}
1126
c08d7424 1127void qemu_cpu_kick(CPUState *cpu)
296af7c9 1128{
f5c121b8 1129 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1130 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1131 qemu_cpu_kick_thread(cpu);
216fc9a4 1132 cpu->thread_kicked = true;
aa2c364b 1133 }
296af7c9
BS
1134}
1135
46d62fac 1136void qemu_cpu_kick_self(void)
296af7c9 1137{
b55c22c6 1138#ifndef _WIN32
4917cf44 1139 assert(current_cpu);
296af7c9 1140
4917cf44
AF
1141 if (!current_cpu->thread_kicked) {
1142 qemu_cpu_kick_thread(current_cpu);
1143 current_cpu->thread_kicked = true;
296af7c9 1144 }
b55c22c6
PB
1145#else
1146 abort();
1147#endif
296af7c9
BS
1148}
1149
60e82579 1150bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1151{
814e612e 1152 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1153}
1154
79e2b9ae 1155bool qemu_in_vcpu_thread(void)
aa723c23 1156{
4917cf44 1157 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1158}
1159
afbe7053
PB
1160static __thread bool iothread_locked = false;
1161
1162bool qemu_mutex_iothread_locked(void)
1163{
1164 return iothread_locked;
1165}
1166
296af7c9
BS
1167void qemu_mutex_lock_iothread(void)
1168{
21618b3e 1169 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1170 /* In the simple case there is no need to bump the VCPU thread out of
1171 * TCG code execution.
1172 */
1173 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1174 !first_cpu || !first_cpu->created) {
296af7c9 1175 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1176 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1177 } else {
1a28cac3 1178 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1179 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1180 qemu_mutex_lock(&qemu_global_mutex);
1181 }
6b49809c 1182 atomic_dec(&iothread_requesting_mutex);
46daff13 1183 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1184 }
afbe7053 1185 iothread_locked = true;
296af7c9
BS
1186}
1187
1188void qemu_mutex_unlock_iothread(void)
1189{
afbe7053 1190 iothread_locked = false;
296af7c9
BS
1191 qemu_mutex_unlock(&qemu_global_mutex);
1192}
1193
1194static int all_vcpus_paused(void)
1195{
bdc44640 1196 CPUState *cpu;
296af7c9 1197
bdc44640 1198 CPU_FOREACH(cpu) {
182735ef 1199 if (!cpu->stopped) {
296af7c9 1200 return 0;
0ab07c62 1201 }
296af7c9
BS
1202 }
1203
1204 return 1;
1205}
1206
1207void pause_all_vcpus(void)
1208{
bdc44640 1209 CPUState *cpu;
296af7c9 1210
40daca54 1211 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1212 CPU_FOREACH(cpu) {
182735ef
AF
1213 cpu->stop = true;
1214 qemu_cpu_kick(cpu);
296af7c9
BS
1215 }
1216
aa723c23 1217 if (qemu_in_vcpu_thread()) {
d798e974
JK
1218 cpu_stop_current();
1219 if (!kvm_enabled()) {
bdc44640 1220 CPU_FOREACH(cpu) {
182735ef
AF
1221 cpu->stop = false;
1222 cpu->stopped = true;
d798e974
JK
1223 }
1224 return;
1225 }
1226 }
1227
296af7c9 1228 while (!all_vcpus_paused()) {
be7d6c57 1229 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1230 CPU_FOREACH(cpu) {
182735ef 1231 qemu_cpu_kick(cpu);
296af7c9
BS
1232 }
1233 }
1234}
1235
2993683b
IM
1236void cpu_resume(CPUState *cpu)
1237{
1238 cpu->stop = false;
1239 cpu->stopped = false;
1240 qemu_cpu_kick(cpu);
1241}
1242
296af7c9
BS
1243void resume_all_vcpus(void)
1244{
bdc44640 1245 CPUState *cpu;
296af7c9 1246
40daca54 1247 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1248 CPU_FOREACH(cpu) {
182735ef 1249 cpu_resume(cpu);
296af7c9
BS
1250 }
1251}
1252
4900116e
DDAG
1253/* For temporary buffers for forming a name */
1254#define VCPU_THREAD_NAME_SIZE 16
1255
e5ab30a2 1256static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1257{
4900116e
DDAG
1258 char thread_name[VCPU_THREAD_NAME_SIZE];
1259
09daed84
EI
1260 tcg_cpu_address_space_init(cpu, cpu->as);
1261
296af7c9
BS
1262 /* share a single thread for all cpus with TCG */
1263 if (!tcg_cpu_thread) {
814e612e 1264 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1265 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1266 qemu_cond_init(cpu->halt_cond);
1267 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1268 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1269 cpu->cpu_index);
1270 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1271 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1272#ifdef _WIN32
814e612e 1273 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1274#endif
61a46217 1275 while (!cpu->created) {
18a85728 1276 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1277 }
814e612e 1278 tcg_cpu_thread = cpu->thread;
296af7c9 1279 } else {
814e612e 1280 cpu->thread = tcg_cpu_thread;
f5c121b8 1281 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1282 }
1283}
1284
48a106bd 1285static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1286{
4900116e
DDAG
1287 char thread_name[VCPU_THREAD_NAME_SIZE];
1288
814e612e 1289 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1290 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1291 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1292 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1293 cpu->cpu_index);
1294 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1295 cpu, QEMU_THREAD_JOINABLE);
61a46217 1296 while (!cpu->created) {
18a85728 1297 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1298 }
296af7c9
BS
1299}
1300
10a9021d 1301static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1302{
4900116e
DDAG
1303 char thread_name[VCPU_THREAD_NAME_SIZE];
1304
814e612e 1305 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1306 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1307 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1308 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1309 cpu->cpu_index);
1310 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1311 QEMU_THREAD_JOINABLE);
61a46217 1312 while (!cpu->created) {
c7f0f3b1
AL
1313 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1314 }
1315}
1316
c643bed9 1317void qemu_init_vcpu(CPUState *cpu)
296af7c9 1318{
ce3960eb
AF
1319 cpu->nr_cores = smp_cores;
1320 cpu->nr_threads = smp_threads;
f324e766 1321 cpu->stopped = true;
0ab07c62 1322 if (kvm_enabled()) {
48a106bd 1323 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1324 } else if (tcg_enabled()) {
e5ab30a2 1325 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1326 } else {
10a9021d 1327 qemu_dummy_start_vcpu(cpu);
0ab07c62 1328 }
296af7c9
BS
1329}
1330
b4a3d965 1331void cpu_stop_current(void)
296af7c9 1332{
4917cf44
AF
1333 if (current_cpu) {
1334 current_cpu->stop = false;
1335 current_cpu->stopped = true;
1336 cpu_exit(current_cpu);
67bb172f 1337 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1338 }
296af7c9
BS
1339}
1340
56983463 1341int vm_stop(RunState state)
296af7c9 1342{
aa723c23 1343 if (qemu_in_vcpu_thread()) {
74892d24 1344 qemu_system_vmstop_request_prepare();
1dfb4dd9 1345 qemu_system_vmstop_request(state);
296af7c9
BS
1346 /*
1347 * FIXME: should not return to device code in case
1348 * vm_stop() has been requested.
1349 */
b4a3d965 1350 cpu_stop_current();
56983463 1351 return 0;
296af7c9 1352 }
56983463
KW
1353
1354 return do_vm_stop(state);
296af7c9
BS
1355}
1356
8a9236f1
LC
1357/* does a state transition even if the VM is already stopped,
1358 current state is forgotten forever */
56983463 1359int vm_stop_force_state(RunState state)
8a9236f1
LC
1360{
1361 if (runstate_is_running()) {
56983463 1362 return vm_stop(state);
8a9236f1
LC
1363 } else {
1364 runstate_set(state);
594a45ce
KW
1365 /* Make sure to return an error if the flush in a previous vm_stop()
1366 * failed. */
1367 return bdrv_flush_all();
8a9236f1
LC
1368 }
1369}
1370
3d57f789 1371static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1372{
1373 int ret;
1374#ifdef CONFIG_PROFILER
1375 int64_t ti;
1376#endif
1377
1378#ifdef CONFIG_PROFILER
1379 ti = profile_getclock();
1380#endif
1381 if (use_icount) {
1382 int64_t count;
ac70aafc 1383 int64_t deadline;
296af7c9 1384 int decr;
c96778bb
FK
1385 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1386 + cpu->icount_extra);
28ecfd7a 1387 cpu->icount_decr.u16.low = 0;
efee7340 1388 cpu->icount_extra = 0;
40daca54 1389 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1390
1391 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1392 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1393 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1394 * nanoseconds.
1395 */
1396 if ((deadline < 0) || (deadline > INT32_MAX)) {
1397 deadline = INT32_MAX;
1398 }
1399
1400 count = qemu_icount_round(deadline);
c96778bb 1401 timers_state.qemu_icount += count;
296af7c9
BS
1402 decr = (count > 0xffff) ? 0xffff : count;
1403 count -= decr;
28ecfd7a 1404 cpu->icount_decr.u16.low = decr;
efee7340 1405 cpu->icount_extra = count;
296af7c9 1406 }
ea3e9847 1407 ret = cpu_exec(cpu);
296af7c9 1408#ifdef CONFIG_PROFILER
89d5cbdd 1409 tcg_time += profile_getclock() - ti;
296af7c9
BS
1410#endif
1411 if (use_icount) {
1412 /* Fold pending instructions back into the
1413 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1414 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1415 + cpu->icount_extra);
28ecfd7a 1416 cpu->icount_decr.u32 = 0;
efee7340 1417 cpu->icount_extra = 0;
296af7c9
BS
1418 }
1419 return ret;
1420}
1421
bdb7ca67 1422static void tcg_exec_all(void)
296af7c9 1423{
9a36085b
JK
1424 int r;
1425
40daca54
AB
1426 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1427 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1428
0ab07c62 1429 if (next_cpu == NULL) {
296af7c9 1430 next_cpu = first_cpu;
0ab07c62 1431 }
bdc44640 1432 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1433 CPUState *cpu = next_cpu;
296af7c9 1434
40daca54 1435 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1436 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1437
a1fcaa73 1438 if (cpu_can_run(cpu)) {
3d57f789 1439 r = tcg_cpu_exec(cpu);
9a36085b 1440 if (r == EXCP_DEBUG) {
91325046 1441 cpu_handle_guest_debug(cpu);
3c638d06
JK
1442 break;
1443 }
f324e766 1444 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1445 break;
1446 }
1447 }
aed807c8
PB
1448
1449 /* Pairs with smp_wmb in qemu_cpu_kick. */
1450 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1451}
1452
9a78eead 1453void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1454{
1455 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1456#if defined(cpu_list)
1457 cpu_list(f, cpu_fprintf);
262353cb
BS
1458#endif
1459}
de0b36b6
LC
1460
1461CpuInfoList *qmp_query_cpus(Error **errp)
1462{
1463 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1464 CPUState *cpu;
de0b36b6 1465
bdc44640 1466 CPU_FOREACH(cpu) {
de0b36b6 1467 CpuInfoList *info;
182735ef
AF
1468#if defined(TARGET_I386)
1469 X86CPU *x86_cpu = X86_CPU(cpu);
1470 CPUX86State *env = &x86_cpu->env;
1471#elif defined(TARGET_PPC)
1472 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1473 CPUPPCState *env = &ppc_cpu->env;
1474#elif defined(TARGET_SPARC)
1475 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1476 CPUSPARCState *env = &sparc_cpu->env;
1477#elif defined(TARGET_MIPS)
1478 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1479 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1480#elif defined(TARGET_TRICORE)
1481 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1482 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1483#endif
de0b36b6 1484
cb446eca 1485 cpu_synchronize_state(cpu);
de0b36b6
LC
1486
1487 info = g_malloc0(sizeof(*info));
1488 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1489 info->value->CPU = cpu->cpu_index;
182735ef 1490 info->value->current = (cpu == first_cpu);
259186a7 1491 info->value->halted = cpu->halted;
58f88d4b 1492 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1493 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1494#if defined(TARGET_I386)
1495 info->value->has_pc = true;
1496 info->value->pc = env->eip + env->segs[R_CS].base;
1497#elif defined(TARGET_PPC)
1498 info->value->has_nip = true;
1499 info->value->nip = env->nip;
1500#elif defined(TARGET_SPARC)
1501 info->value->has_pc = true;
1502 info->value->pc = env->pc;
1503 info->value->has_npc = true;
1504 info->value->npc = env->npc;
1505#elif defined(TARGET_MIPS)
1506 info->value->has_PC = true;
1507 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1508#elif defined(TARGET_TRICORE)
1509 info->value->has_PC = true;
1510 info->value->PC = env->PC;
de0b36b6
LC
1511#endif
1512
1513 /* XXX: waiting for the qapi to support GSList */
1514 if (!cur_item) {
1515 head = cur_item = info;
1516 } else {
1517 cur_item->next = info;
1518 cur_item = info;
1519 }
1520 }
1521
1522 return head;
1523}
0cfd6a9a
LC
1524
1525void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1526 bool has_cpu, int64_t cpu_index, Error **errp)
1527{
1528 FILE *f;
1529 uint32_t l;
55e5c285 1530 CPUState *cpu;
0cfd6a9a 1531 uint8_t buf[1024];
0dc9daf0 1532 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1533
1534 if (!has_cpu) {
1535 cpu_index = 0;
1536 }
1537
151d1322
AF
1538 cpu = qemu_get_cpu(cpu_index);
1539 if (cpu == NULL) {
c6bd8c70
MA
1540 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1541 "a CPU number");
0cfd6a9a
LC
1542 return;
1543 }
1544
1545 f = fopen(filename, "wb");
1546 if (!f) {
618da851 1547 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1548 return;
1549 }
1550
1551 while (size != 0) {
1552 l = sizeof(buf);
1553 if (l > size)
1554 l = size;
2f4d0f59 1555 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1556 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1557 " specified", orig_addr, orig_size);
2f4d0f59
AK
1558 goto exit;
1559 }
0cfd6a9a 1560 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1561 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1562 goto exit;
1563 }
1564 addr += l;
1565 size -= l;
1566 }
1567
1568exit:
1569 fclose(f);
1570}
6d3962bf
LC
1571
1572void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1573 Error **errp)
1574{
1575 FILE *f;
1576 uint32_t l;
1577 uint8_t buf[1024];
1578
1579 f = fopen(filename, "wb");
1580 if (!f) {
618da851 1581 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1582 return;
1583 }
1584
1585 while (size != 0) {
1586 l = sizeof(buf);
1587 if (l > size)
1588 l = size;
eb6282f2 1589 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1590 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1591 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1592 goto exit;
1593 }
1594 addr += l;
1595 size -= l;
1596 }
1597
1598exit:
1599 fclose(f);
1600}
ab49ab5c
LC
1601
1602void qmp_inject_nmi(Error **errp)
1603{
1604#if defined(TARGET_I386)
182735ef
AF
1605 CPUState *cs;
1606
bdc44640 1607 CPU_FOREACH(cs) {
182735ef 1608 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1609
02e51483 1610 if (!cpu->apic_state) {
182735ef 1611 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1612 } else {
02e51483 1613 apic_deliver_nmi(cpu->apic_state);
02c09195 1614 }
ab49ab5c
LC
1615 }
1616#else
9cb805fd 1617 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1618#endif
1619}
27498bef
ST
1620
1621void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1622{
1623 if (!use_icount) {
1624 return;
1625 }
1626
1627 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1628 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1629 if (icount_align_option) {
1630 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1631 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1632 } else {
1633 cpu_fprintf(f, "Max guest delay NA\n");
1634 cpu_fprintf(f, "Max guest advance NA\n");
1635 }
1636}