]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
nfs: Handle failure for potentially large allocations
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
0ff0fc19
JK
43
44#ifndef _WIN32
1de7afc9 45#include "qemu/compatfd.h"
0ff0fc19 46#endif
296af7c9 47
6d9cb73c
JK
48#ifdef CONFIG_LINUX
49
50#include <sys/prctl.h>
51
c0532a76
MT
52#ifndef PR_MCE_KILL
53#define PR_MCE_KILL 33
54#endif
55
6d9cb73c
JK
56#ifndef PR_MCE_KILL_SET
57#define PR_MCE_KILL_SET 1
58#endif
59
60#ifndef PR_MCE_KILL_EARLY
61#define PR_MCE_KILL_EARLY 1
62#endif
63
64#endif /* CONFIG_LINUX */
65
182735ef 66static CPUState *next_cpu;
296af7c9 67
321bc0b2
TC
68bool cpu_is_stopped(CPUState *cpu)
69{
70 return cpu->stopped || !runstate_is_running();
71}
72
a98ae1d8 73static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 74{
c64ca814 75 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
76 return false;
77 }
321bc0b2 78 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
79 return true;
80 }
8c2e1b00 81 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 82 kvm_halt_in_kernel()) {
ac873f1e
PM
83 return false;
84 }
85 return true;
86}
87
88static bool all_cpu_threads_idle(void)
89{
182735ef 90 CPUState *cpu;
ac873f1e 91
bdc44640 92 CPU_FOREACH(cpu) {
182735ef 93 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
94 return false;
95 }
96 }
97 return true;
98}
99
946fb27c
PB
100/***********************************************************/
101/* guest cycle counter */
102
a3270e19
PB
103/* Protected by TimersState seqlock */
104
105/* Compensate for varying guest execution speed. */
106static int64_t qemu_icount_bias;
107static int64_t vm_clock_warp_start;
946fb27c
PB
108/* Conversion factor from emulated instructions to virtual clock ticks. */
109static int icount_time_shift;
110/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
111#define MAX_ICOUNT_SHIFT 10
a3270e19
PB
112
113/* Only written by TCG thread */
114static int64_t qemu_icount;
115
946fb27c
PB
116static QEMUTimer *icount_rt_timer;
117static QEMUTimer *icount_vm_timer;
118static QEMUTimer *icount_warp_timer;
946fb27c
PB
119
120typedef struct TimersState {
cb365646 121 /* Protected by BQL. */
946fb27c
PB
122 int64_t cpu_ticks_prev;
123 int64_t cpu_ticks_offset;
cb365646
LPF
124
125 /* cpu_clock_offset can be read out of BQL, so protect it with
126 * this lock.
127 */
128 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
129 int64_t cpu_clock_offset;
130 int32_t cpu_ticks_enabled;
131 int64_t dummy;
132} TimersState;
133
d9cd4007 134static TimersState timers_state;
946fb27c
PB
135
136/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b 137static int64_t cpu_get_icount_locked(void)
946fb27c
PB
138{
139 int64_t icount;
4917cf44 140 CPUState *cpu = current_cpu;
946fb27c
PB
141
142 icount = qemu_icount;
4917cf44 143 if (cpu) {
99df7dce 144 if (!cpu_can_do_io(cpu)) {
946fb27c
PB
145 fprintf(stderr, "Bad clock read\n");
146 }
28ecfd7a 147 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c
PB
148 }
149 return qemu_icount_bias + (icount << icount_time_shift);
150}
151
17a15f1b
PB
152int64_t cpu_get_icount(void)
153{
154 int64_t icount;
155 unsigned start;
156
157 do {
158 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
159 icount = cpu_get_icount_locked();
160 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
161
162 return icount;
163}
164
946fb27c 165/* return the host CPU cycle counter and handle stop/restart */
cb365646 166/* Caller must hold the BQL */
946fb27c
PB
167int64_t cpu_get_ticks(void)
168{
5f3e3101
PB
169 int64_t ticks;
170
946fb27c
PB
171 if (use_icount) {
172 return cpu_get_icount();
173 }
5f3e3101
PB
174
175 ticks = timers_state.cpu_ticks_offset;
176 if (timers_state.cpu_ticks_enabled) {
177 ticks += cpu_get_real_ticks();
178 }
179
180 if (timers_state.cpu_ticks_prev > ticks) {
181 /* Note: non increasing ticks may happen if the host uses
182 software suspend */
183 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
184 ticks = timers_state.cpu_ticks_prev;
946fb27c 185 }
5f3e3101
PB
186
187 timers_state.cpu_ticks_prev = ticks;
188 return ticks;
946fb27c
PB
189}
190
cb365646 191static int64_t cpu_get_clock_locked(void)
946fb27c 192{
5f3e3101 193 int64_t ticks;
cb365646 194
5f3e3101
PB
195 ticks = timers_state.cpu_clock_offset;
196 if (timers_state.cpu_ticks_enabled) {
197 ticks += get_clock();
946fb27c 198 }
cb365646 199
5f3e3101 200 return ticks;
cb365646
LPF
201}
202
203/* return the host CPU monotonic timer and handle stop/restart */
204int64_t cpu_get_clock(void)
205{
206 int64_t ti;
207 unsigned start;
208
209 do {
210 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
211 ti = cpu_get_clock_locked();
212 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
213
214 return ti;
946fb27c
PB
215}
216
cb365646
LPF
217/* enable cpu_get_ticks()
218 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
219 */
946fb27c
PB
220void cpu_enable_ticks(void)
221{
cb365646
LPF
222 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
223 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
224 if (!timers_state.cpu_ticks_enabled) {
225 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
226 timers_state.cpu_clock_offset -= get_clock();
227 timers_state.cpu_ticks_enabled = 1;
228 }
cb365646 229 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
230}
231
232/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
233 * cpu_get_ticks() after that.
234 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
235 */
946fb27c
PB
236void cpu_disable_ticks(void)
237{
cb365646
LPF
238 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
239 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 240 if (timers_state.cpu_ticks_enabled) {
5f3e3101 241 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 242 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
243 timers_state.cpu_ticks_enabled = 0;
244 }
cb365646 245 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
246}
247
248/* Correlation between real and virtual time is always going to be
249 fairly approximate, so ignore small variation.
250 When the guest is idle real and virtual time will be aligned in
251 the IO wait loop. */
252#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
253
254static void icount_adjust(void)
255{
256 int64_t cur_time;
257 int64_t cur_icount;
258 int64_t delta;
a3270e19
PB
259
260 /* Protected by TimersState mutex. */
946fb27c 261 static int64_t last_delta;
468cc7cf 262
946fb27c
PB
263 /* If the VM is not running, then do nothing. */
264 if (!runstate_is_running()) {
265 return;
266 }
468cc7cf 267
17a15f1b
PB
268 seqlock_write_lock(&timers_state.vm_clock_seqlock);
269 cur_time = cpu_get_clock_locked();
270 cur_icount = cpu_get_icount_locked();
468cc7cf 271
946fb27c
PB
272 delta = cur_icount - cur_time;
273 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
274 if (delta > 0
275 && last_delta + ICOUNT_WOBBLE < delta * 2
276 && icount_time_shift > 0) {
277 /* The guest is getting too far ahead. Slow time down. */
278 icount_time_shift--;
279 }
280 if (delta < 0
281 && last_delta - ICOUNT_WOBBLE > delta * 2
282 && icount_time_shift < MAX_ICOUNT_SHIFT) {
283 /* The guest is getting too far behind. Speed time up. */
284 icount_time_shift++;
285 }
286 last_delta = delta;
287 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
17a15f1b 288 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
289}
290
291static void icount_adjust_rt(void *opaque)
292{
40daca54
AB
293 timer_mod(icount_rt_timer,
294 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
946fb27c
PB
295 icount_adjust();
296}
297
298static void icount_adjust_vm(void *opaque)
299{
40daca54
AB
300 timer_mod(icount_vm_timer,
301 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
302 get_ticks_per_sec() / 10);
946fb27c
PB
303 icount_adjust();
304}
305
306static int64_t qemu_icount_round(int64_t count)
307{
308 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
309}
310
311static void icount_warp_rt(void *opaque)
312{
17a15f1b
PB
313 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
314 * changes from -1 to another value, so the race here is okay.
315 */
316 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
317 return;
318 }
319
17a15f1b 320 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 321 if (runstate_is_running()) {
40daca54 322 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
8ed961d9
PB
323 int64_t warp_delta;
324
325 warp_delta = clock - vm_clock_warp_start;
326 if (use_icount == 2) {
946fb27c 327 /*
40daca54 328 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
329 * far ahead of real time.
330 */
17a15f1b
PB
331 int64_t cur_time = cpu_get_clock_locked();
332 int64_t cur_icount = cpu_get_icount_locked();
946fb27c 333 int64_t delta = cur_time - cur_icount;
8ed961d9 334 warp_delta = MIN(warp_delta, delta);
946fb27c 335 }
8ed961d9 336 qemu_icount_bias += warp_delta;
946fb27c
PB
337 }
338 vm_clock_warp_start = -1;
17a15f1b 339 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
340
341 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
342 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
343 }
946fb27c
PB
344}
345
8156be56
PB
346void qtest_clock_warp(int64_t dest)
347{
40daca54 348 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56
PB
349 assert(qtest_enabled());
350 while (clock < dest) {
40daca54 351 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 352 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
17a15f1b 353 seqlock_write_lock(&timers_state.vm_clock_seqlock);
8156be56 354 qemu_icount_bias += warp;
17a15f1b
PB
355 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
356
40daca54
AB
357 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
358 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 359 }
40daca54 360 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
361}
362
40daca54 363void qemu_clock_warp(QEMUClockType type)
946fb27c 364{
ce78d18c 365 int64_t clock;
946fb27c
PB
366 int64_t deadline;
367
368 /*
369 * There are too many global variables to make the "warp" behavior
370 * applicable to other clocks. But a clock argument removes the
371 * need for if statements all over the place.
372 */
40daca54 373 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
374 return;
375 }
376
377 /*
40daca54
AB
378 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
379 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
380 * This also makes sure that the insn counter is synchronized before the
381 * CPU starts running, in case the CPU is woken by an event other than
40daca54 382 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
383 */
384 icount_warp_rt(NULL);
ce78d18c
PB
385 timer_del(icount_warp_timer);
386 if (!all_cpu_threads_idle()) {
946fb27c
PB
387 return;
388 }
389
8156be56
PB
390 if (qtest_enabled()) {
391 /* When testing, qtest commands advance icount. */
392 return;
393 }
394
ac70aafc 395 /* We want to use the earliest deadline from ALL vm_clocks */
ce78d18c 396 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
40daca54 397 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
398 if (deadline < 0) {
399 return;
ac70aafc
AB
400 }
401
946fb27c
PB
402 if (deadline > 0) {
403 /*
40daca54 404 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
405 * sleep. Otherwise, the CPU might be waiting for a future timer
406 * interrupt to wake it up, but the interrupt never comes because
407 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 408 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
409 *
410 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
411 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
412 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
413 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
414 * after some e"real" time, (related to the time left until the next
415 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
416 * This avoids that the warps are visible externally; for example,
417 * you will not be sending network packets continuously instead of
418 * every 100ms.
946fb27c 419 */
17a15f1b 420 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
421 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
422 vm_clock_warp_start = clock;
423 }
17a15f1b 424 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 425 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 426 } else if (deadline == 0) {
40daca54 427 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
428 }
429}
430
431static const VMStateDescription vmstate_timers = {
432 .name = "timer",
433 .version_id = 2,
434 .minimum_version_id = 1,
35d08458 435 .fields = (VMStateField[]) {
946fb27c
PB
436 VMSTATE_INT64(cpu_ticks_offset, TimersState),
437 VMSTATE_INT64(dummy, TimersState),
438 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
439 VMSTATE_END_OF_LIST()
440 }
441};
442
443void configure_icount(const char *option)
444{
cb365646 445 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
946fb27c
PB
446 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
447 if (!option) {
448 return;
449 }
450
40daca54
AB
451 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
452 icount_warp_rt, NULL);
946fb27c
PB
453 if (strcmp(option, "auto") != 0) {
454 icount_time_shift = strtol(option, NULL, 0);
455 use_icount = 1;
456 return;
457 }
458
459 use_icount = 2;
460
461 /* 125MIPS seems a reasonable initial guess at the guest speed.
462 It will be corrected fairly quickly anyway. */
463 icount_time_shift = 3;
464
465 /* Have both realtime and virtual time triggers for speed adjustment.
466 The realtime trigger catches emulated time passing too slowly,
467 the virtual time trigger catches emulated time passing too fast.
468 Realtime triggers occur even when idle, so use them less frequently
469 than VM triggers. */
40daca54
AB
470 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
471 icount_adjust_rt, NULL);
472 timer_mod(icount_rt_timer,
473 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
474 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
475 icount_adjust_vm, NULL);
476 timer_mod(icount_vm_timer,
477 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
478 get_ticks_per_sec() / 10);
946fb27c
PB
479}
480
296af7c9
BS
481/***********************************************************/
482void hw_error(const char *fmt, ...)
483{
484 va_list ap;
55e5c285 485 CPUState *cpu;
296af7c9
BS
486
487 va_start(ap, fmt);
488 fprintf(stderr, "qemu: hardware error: ");
489 vfprintf(stderr, fmt, ap);
490 fprintf(stderr, "\n");
bdc44640 491 CPU_FOREACH(cpu) {
55e5c285 492 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 493 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
494 }
495 va_end(ap);
496 abort();
497}
498
499void cpu_synchronize_all_states(void)
500{
182735ef 501 CPUState *cpu;
296af7c9 502
bdc44640 503 CPU_FOREACH(cpu) {
182735ef 504 cpu_synchronize_state(cpu);
296af7c9
BS
505 }
506}
507
508void cpu_synchronize_all_post_reset(void)
509{
182735ef 510 CPUState *cpu;
296af7c9 511
bdc44640 512 CPU_FOREACH(cpu) {
182735ef 513 cpu_synchronize_post_reset(cpu);
296af7c9
BS
514 }
515}
516
517void cpu_synchronize_all_post_init(void)
518{
182735ef 519 CPUState *cpu;
296af7c9 520
bdc44640 521 CPU_FOREACH(cpu) {
182735ef 522 cpu_synchronize_post_init(cpu);
296af7c9
BS
523 }
524}
525
56983463 526static int do_vm_stop(RunState state)
296af7c9 527{
56983463
KW
528 int ret = 0;
529
1354869c 530 if (runstate_is_running()) {
296af7c9 531 cpu_disable_ticks();
296af7c9 532 pause_all_vcpus();
f5bbfba1 533 runstate_set(state);
1dfb4dd9 534 vm_state_notify(0, state);
a4e15de9 535 qapi_event_send_stop(&error_abort);
296af7c9 536 }
56983463 537
594a45ce
KW
538 bdrv_drain_all();
539 ret = bdrv_flush_all();
540
56983463 541 return ret;
296af7c9
BS
542}
543
a1fcaa73 544static bool cpu_can_run(CPUState *cpu)
296af7c9 545{
4fdeee7c 546 if (cpu->stop) {
a1fcaa73 547 return false;
0ab07c62 548 }
321bc0b2 549 if (cpu_is_stopped(cpu)) {
a1fcaa73 550 return false;
0ab07c62 551 }
a1fcaa73 552 return true;
296af7c9
BS
553}
554
91325046 555static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 556{
64f6b346 557 gdb_set_stop_cpu(cpu);
8cf71710 558 qemu_system_debug_request();
f324e766 559 cpu->stopped = true;
3c638d06
JK
560}
561
714bd040
PB
562static void cpu_signal(int sig)
563{
4917cf44
AF
564 if (current_cpu) {
565 cpu_exit(current_cpu);
714bd040
PB
566 }
567 exit_request = 1;
568}
714bd040 569
6d9cb73c
JK
570#ifdef CONFIG_LINUX
571static void sigbus_reraise(void)
572{
573 sigset_t set;
574 struct sigaction action;
575
576 memset(&action, 0, sizeof(action));
577 action.sa_handler = SIG_DFL;
578 if (!sigaction(SIGBUS, &action, NULL)) {
579 raise(SIGBUS);
580 sigemptyset(&set);
581 sigaddset(&set, SIGBUS);
582 sigprocmask(SIG_UNBLOCK, &set, NULL);
583 }
584 perror("Failed to re-raise SIGBUS!\n");
585 abort();
586}
587
588static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
589 void *ctx)
590{
591 if (kvm_on_sigbus(siginfo->ssi_code,
592 (void *)(intptr_t)siginfo->ssi_addr)) {
593 sigbus_reraise();
594 }
595}
596
597static void qemu_init_sigbus(void)
598{
599 struct sigaction action;
600
601 memset(&action, 0, sizeof(action));
602 action.sa_flags = SA_SIGINFO;
603 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
604 sigaction(SIGBUS, &action, NULL);
605
606 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
607}
608
290adf38 609static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
610{
611 struct timespec ts = { 0, 0 };
612 siginfo_t siginfo;
613 sigset_t waitset;
614 sigset_t chkset;
615 int r;
616
617 sigemptyset(&waitset);
618 sigaddset(&waitset, SIG_IPI);
619 sigaddset(&waitset, SIGBUS);
620
621 do {
622 r = sigtimedwait(&waitset, &siginfo, &ts);
623 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
624 perror("sigtimedwait");
625 exit(1);
626 }
627
628 switch (r) {
629 case SIGBUS:
290adf38 630 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
631 sigbus_reraise();
632 }
633 break;
634 default:
635 break;
636 }
637
638 r = sigpending(&chkset);
639 if (r == -1) {
640 perror("sigpending");
641 exit(1);
642 }
643 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
644}
645
6d9cb73c
JK
646#else /* !CONFIG_LINUX */
647
648static void qemu_init_sigbus(void)
649{
650}
1ab3c6c0 651
290adf38 652static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
653{
654}
6d9cb73c
JK
655#endif /* !CONFIG_LINUX */
656
296af7c9 657#ifndef _WIN32
55f8d6ac
JK
658static void dummy_signal(int sig)
659{
660}
55f8d6ac 661
13618e05 662static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
663{
664 int r;
665 sigset_t set;
666 struct sigaction sigact;
667
668 memset(&sigact, 0, sizeof(sigact));
669 sigact.sa_handler = dummy_signal;
670 sigaction(SIG_IPI, &sigact, NULL);
671
714bd040
PB
672 pthread_sigmask(SIG_BLOCK, NULL, &set);
673 sigdelset(&set, SIG_IPI);
714bd040 674 sigdelset(&set, SIGBUS);
491d6e80 675 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
676 if (r) {
677 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
678 exit(1);
679 }
680}
681
682static void qemu_tcg_init_cpu_signals(void)
683{
714bd040
PB
684 sigset_t set;
685 struct sigaction sigact;
686
687 memset(&sigact, 0, sizeof(sigact));
688 sigact.sa_handler = cpu_signal;
689 sigaction(SIG_IPI, &sigact, NULL);
690
691 sigemptyset(&set);
692 sigaddset(&set, SIG_IPI);
693 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
694}
695
55f8d6ac 696#else /* _WIN32 */
13618e05 697static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 698{
714bd040
PB
699 abort();
700}
ff48eb5f 701
714bd040
PB
702static void qemu_tcg_init_cpu_signals(void)
703{
ff48eb5f 704}
714bd040 705#endif /* _WIN32 */
ff48eb5f 706
b2532d88 707static QemuMutex qemu_global_mutex;
46daff13
PB
708static QemuCond qemu_io_proceeded_cond;
709static bool iothread_requesting_mutex;
296af7c9
BS
710
711static QemuThread io_thread;
712
713static QemuThread *tcg_cpu_thread;
714static QemuCond *tcg_halt_cond;
715
296af7c9
BS
716/* cpu creation */
717static QemuCond qemu_cpu_cond;
718/* system init */
296af7c9 719static QemuCond qemu_pause_cond;
e82bcec2 720static QemuCond qemu_work_cond;
296af7c9 721
d3b12f5d 722void qemu_init_cpu_loop(void)
296af7c9 723{
6d9cb73c 724 qemu_init_sigbus();
ed94592b 725 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
726 qemu_cond_init(&qemu_pause_cond);
727 qemu_cond_init(&qemu_work_cond);
46daff13 728 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 729 qemu_mutex_init(&qemu_global_mutex);
296af7c9 730
b7680cb6 731 qemu_thread_get_self(&io_thread);
296af7c9
BS
732}
733
f100f0b3 734void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
735{
736 struct qemu_work_item wi;
737
60e82579 738 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
739 func(data);
740 return;
741 }
742
743 wi.func = func;
744 wi.data = data;
3c02270d 745 wi.free = false;
c64ca814
AF
746 if (cpu->queued_work_first == NULL) {
747 cpu->queued_work_first = &wi;
0ab07c62 748 } else {
c64ca814 749 cpu->queued_work_last->next = &wi;
0ab07c62 750 }
c64ca814 751 cpu->queued_work_last = &wi;
e82bcec2
MT
752 wi.next = NULL;
753 wi.done = false;
754
c08d7424 755 qemu_cpu_kick(cpu);
e82bcec2 756 while (!wi.done) {
4917cf44 757 CPUState *self_cpu = current_cpu;
e82bcec2
MT
758
759 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 760 current_cpu = self_cpu;
e82bcec2
MT
761 }
762}
763
3c02270d
CV
764void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
765{
766 struct qemu_work_item *wi;
767
768 if (qemu_cpu_is_self(cpu)) {
769 func(data);
770 return;
771 }
772
773 wi = g_malloc0(sizeof(struct qemu_work_item));
774 wi->func = func;
775 wi->data = data;
776 wi->free = true;
777 if (cpu->queued_work_first == NULL) {
778 cpu->queued_work_first = wi;
779 } else {
780 cpu->queued_work_last->next = wi;
781 }
782 cpu->queued_work_last = wi;
783 wi->next = NULL;
784 wi->done = false;
785
786 qemu_cpu_kick(cpu);
787}
788
6d45b109 789static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
790{
791 struct qemu_work_item *wi;
792
c64ca814 793 if (cpu->queued_work_first == NULL) {
e82bcec2 794 return;
0ab07c62 795 }
e82bcec2 796
c64ca814
AF
797 while ((wi = cpu->queued_work_first)) {
798 cpu->queued_work_first = wi->next;
e82bcec2
MT
799 wi->func(wi->data);
800 wi->done = true;
3c02270d
CV
801 if (wi->free) {
802 g_free(wi);
803 }
e82bcec2 804 }
c64ca814 805 cpu->queued_work_last = NULL;
e82bcec2
MT
806 qemu_cond_broadcast(&qemu_work_cond);
807}
808
509a0d78 809static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 810{
4fdeee7c
AF
811 if (cpu->stop) {
812 cpu->stop = false;
f324e766 813 cpu->stopped = true;
296af7c9
BS
814 qemu_cond_signal(&qemu_pause_cond);
815 }
6d45b109 816 flush_queued_work(cpu);
216fc9a4 817 cpu->thread_kicked = false;
296af7c9
BS
818}
819
6cabe1f3 820static void qemu_tcg_wait_io_event(void)
296af7c9 821{
182735ef 822 CPUState *cpu;
6cabe1f3 823
16400322 824 while (all_cpu_threads_idle()) {
ab33fcda
PB
825 /* Start accounting real time to the virtual clock if the CPUs
826 are idle. */
40daca54 827 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 828 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 829 }
296af7c9 830
46daff13
PB
831 while (iothread_requesting_mutex) {
832 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
833 }
6cabe1f3 834
bdc44640 835 CPU_FOREACH(cpu) {
182735ef 836 qemu_wait_io_event_common(cpu);
6cabe1f3 837 }
296af7c9
BS
838}
839
fd529e8f 840static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 841{
a98ae1d8 842 while (cpu_thread_is_idle(cpu)) {
f5c121b8 843 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 844 }
296af7c9 845
290adf38 846 qemu_kvm_eat_signals(cpu);
509a0d78 847 qemu_wait_io_event_common(cpu);
296af7c9
BS
848}
849
7e97cd88 850static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 851{
48a106bd 852 CPUState *cpu = arg;
84b4915d 853 int r;
296af7c9 854
6164e6d6 855 qemu_mutex_lock(&qemu_global_mutex);
814e612e 856 qemu_thread_get_self(cpu->thread);
9f09e18a 857 cpu->thread_id = qemu_get_thread_id();
4917cf44 858 current_cpu = cpu;
296af7c9 859
504134d2 860 r = kvm_init_vcpu(cpu);
84b4915d
JK
861 if (r < 0) {
862 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
863 exit(1);
864 }
296af7c9 865
13618e05 866 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
867
868 /* signal CPU creation */
61a46217 869 cpu->created = true;
296af7c9
BS
870 qemu_cond_signal(&qemu_cpu_cond);
871
296af7c9 872 while (1) {
a1fcaa73 873 if (cpu_can_run(cpu)) {
1458c363 874 r = kvm_cpu_exec(cpu);
83f338f7 875 if (r == EXCP_DEBUG) {
91325046 876 cpu_handle_guest_debug(cpu);
83f338f7 877 }
0ab07c62 878 }
fd529e8f 879 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
880 }
881
882 return NULL;
883}
884
c7f0f3b1
AL
885static void *qemu_dummy_cpu_thread_fn(void *arg)
886{
887#ifdef _WIN32
888 fprintf(stderr, "qtest is not supported under Windows\n");
889 exit(1);
890#else
10a9021d 891 CPUState *cpu = arg;
c7f0f3b1
AL
892 sigset_t waitset;
893 int r;
894
895 qemu_mutex_lock_iothread();
814e612e 896 qemu_thread_get_self(cpu->thread);
9f09e18a 897 cpu->thread_id = qemu_get_thread_id();
c7f0f3b1
AL
898
899 sigemptyset(&waitset);
900 sigaddset(&waitset, SIG_IPI);
901
902 /* signal CPU creation */
61a46217 903 cpu->created = true;
c7f0f3b1
AL
904 qemu_cond_signal(&qemu_cpu_cond);
905
4917cf44 906 current_cpu = cpu;
c7f0f3b1 907 while (1) {
4917cf44 908 current_cpu = NULL;
c7f0f3b1
AL
909 qemu_mutex_unlock_iothread();
910 do {
911 int sig;
912 r = sigwait(&waitset, &sig);
913 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
914 if (r == -1) {
915 perror("sigwait");
916 exit(1);
917 }
918 qemu_mutex_lock_iothread();
4917cf44 919 current_cpu = cpu;
509a0d78 920 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
921 }
922
923 return NULL;
924#endif
925}
926
bdb7ca67
JK
927static void tcg_exec_all(void);
928
7e97cd88 929static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 930{
c3586ba7 931 CPUState *cpu = arg;
296af7c9 932
55f8d6ac 933 qemu_tcg_init_cpu_signals();
814e612e 934 qemu_thread_get_self(cpu->thread);
296af7c9 935
296af7c9 936 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
937 CPU_FOREACH(cpu) {
938 cpu->thread_id = qemu_get_thread_id();
939 cpu->created = true;
940 }
296af7c9
BS
941 qemu_cond_signal(&qemu_cpu_cond);
942
fa7d1867 943 /* wait for initial kick-off after machine start */
bdc44640 944 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 945 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
946
947 /* process any pending work */
bdc44640 948 CPU_FOREACH(cpu) {
182735ef 949 qemu_wait_io_event_common(cpu);
8e564b4e 950 }
0ab07c62 951 }
296af7c9
BS
952
953 while (1) {
bdb7ca67 954 tcg_exec_all();
ac70aafc
AB
955
956 if (use_icount) {
40daca54 957 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
958
959 if (deadline == 0) {
40daca54 960 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 961 }
3b2319a3 962 }
6cabe1f3 963 qemu_tcg_wait_io_event();
296af7c9
BS
964 }
965
966 return NULL;
967}
968
2ff09a40 969static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
970{
971#ifndef _WIN32
972 int err;
973
814e612e 974 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
975 if (err) {
976 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
977 exit(1);
978 }
979#else /* _WIN32 */
60e82579 980 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
981 CONTEXT tcgContext;
982
983 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 984 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
985 GetLastError());
986 exit(1);
987 }
988
989 /* On multi-core systems, we are not sure that the thread is actually
990 * suspended until we can get the context.
991 */
992 tcgContext.ContextFlags = CONTEXT_CONTROL;
993 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
994 continue;
995 }
996
cc015e9a 997 cpu_signal(0);
ed9164a3
OH
998
999 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1000 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1001 GetLastError());
1002 exit(1);
1003 }
cc015e9a
PB
1004 }
1005#endif
1006}
1007
c08d7424 1008void qemu_cpu_kick(CPUState *cpu)
296af7c9 1009{
f5c121b8 1010 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1011 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1012 qemu_cpu_kick_thread(cpu);
216fc9a4 1013 cpu->thread_kicked = true;
aa2c364b 1014 }
296af7c9
BS
1015}
1016
46d62fac 1017void qemu_cpu_kick_self(void)
296af7c9 1018{
b55c22c6 1019#ifndef _WIN32
4917cf44 1020 assert(current_cpu);
296af7c9 1021
4917cf44
AF
1022 if (!current_cpu->thread_kicked) {
1023 qemu_cpu_kick_thread(current_cpu);
1024 current_cpu->thread_kicked = true;
296af7c9 1025 }
b55c22c6
PB
1026#else
1027 abort();
1028#endif
296af7c9
BS
1029}
1030
60e82579 1031bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1032{
814e612e 1033 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1034}
1035
aa723c23
JQ
1036static bool qemu_in_vcpu_thread(void)
1037{
4917cf44 1038 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1039}
1040
296af7c9
BS
1041void qemu_mutex_lock_iothread(void)
1042{
c7f0f3b1 1043 if (!tcg_enabled()) {
296af7c9 1044 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1045 } else {
46daff13 1046 iothread_requesting_mutex = true;
1a28cac3 1047 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1048 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1049 qemu_mutex_lock(&qemu_global_mutex);
1050 }
46daff13
PB
1051 iothread_requesting_mutex = false;
1052 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1053 }
296af7c9
BS
1054}
1055
1056void qemu_mutex_unlock_iothread(void)
1057{
1058 qemu_mutex_unlock(&qemu_global_mutex);
1059}
1060
1061static int all_vcpus_paused(void)
1062{
bdc44640 1063 CPUState *cpu;
296af7c9 1064
bdc44640 1065 CPU_FOREACH(cpu) {
182735ef 1066 if (!cpu->stopped) {
296af7c9 1067 return 0;
0ab07c62 1068 }
296af7c9
BS
1069 }
1070
1071 return 1;
1072}
1073
1074void pause_all_vcpus(void)
1075{
bdc44640 1076 CPUState *cpu;
296af7c9 1077
40daca54 1078 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1079 CPU_FOREACH(cpu) {
182735ef
AF
1080 cpu->stop = true;
1081 qemu_cpu_kick(cpu);
296af7c9
BS
1082 }
1083
aa723c23 1084 if (qemu_in_vcpu_thread()) {
d798e974
JK
1085 cpu_stop_current();
1086 if (!kvm_enabled()) {
bdc44640 1087 CPU_FOREACH(cpu) {
182735ef
AF
1088 cpu->stop = false;
1089 cpu->stopped = true;
d798e974
JK
1090 }
1091 return;
1092 }
1093 }
1094
296af7c9 1095 while (!all_vcpus_paused()) {
be7d6c57 1096 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1097 CPU_FOREACH(cpu) {
182735ef 1098 qemu_cpu_kick(cpu);
296af7c9
BS
1099 }
1100 }
1101}
1102
2993683b
IM
1103void cpu_resume(CPUState *cpu)
1104{
1105 cpu->stop = false;
1106 cpu->stopped = false;
1107 qemu_cpu_kick(cpu);
1108}
1109
296af7c9
BS
1110void resume_all_vcpus(void)
1111{
bdc44640 1112 CPUState *cpu;
296af7c9 1113
40daca54 1114 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1115 CPU_FOREACH(cpu) {
182735ef 1116 cpu_resume(cpu);
296af7c9
BS
1117 }
1118}
1119
4900116e
DDAG
1120/* For temporary buffers for forming a name */
1121#define VCPU_THREAD_NAME_SIZE 16
1122
e5ab30a2 1123static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1124{
4900116e
DDAG
1125 char thread_name[VCPU_THREAD_NAME_SIZE];
1126
09daed84
EI
1127 tcg_cpu_address_space_init(cpu, cpu->as);
1128
296af7c9
BS
1129 /* share a single thread for all cpus with TCG */
1130 if (!tcg_cpu_thread) {
814e612e 1131 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1132 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1133 qemu_cond_init(cpu->halt_cond);
1134 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1135 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1136 cpu->cpu_index);
1137 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1138 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1139#ifdef _WIN32
814e612e 1140 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1141#endif
61a46217 1142 while (!cpu->created) {
18a85728 1143 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1144 }
814e612e 1145 tcg_cpu_thread = cpu->thread;
296af7c9 1146 } else {
814e612e 1147 cpu->thread = tcg_cpu_thread;
f5c121b8 1148 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1149 }
1150}
1151
48a106bd 1152static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1153{
4900116e
DDAG
1154 char thread_name[VCPU_THREAD_NAME_SIZE];
1155
814e612e 1156 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1157 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1158 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1159 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1160 cpu->cpu_index);
1161 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1162 cpu, QEMU_THREAD_JOINABLE);
61a46217 1163 while (!cpu->created) {
18a85728 1164 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1165 }
296af7c9
BS
1166}
1167
10a9021d 1168static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1169{
4900116e
DDAG
1170 char thread_name[VCPU_THREAD_NAME_SIZE];
1171
814e612e 1172 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1173 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1174 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1175 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1176 cpu->cpu_index);
1177 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1178 QEMU_THREAD_JOINABLE);
61a46217 1179 while (!cpu->created) {
c7f0f3b1
AL
1180 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1181 }
1182}
1183
c643bed9 1184void qemu_init_vcpu(CPUState *cpu)
296af7c9 1185{
ce3960eb
AF
1186 cpu->nr_cores = smp_cores;
1187 cpu->nr_threads = smp_threads;
f324e766 1188 cpu->stopped = true;
0ab07c62 1189 if (kvm_enabled()) {
48a106bd 1190 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1191 } else if (tcg_enabled()) {
e5ab30a2 1192 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1193 } else {
10a9021d 1194 qemu_dummy_start_vcpu(cpu);
0ab07c62 1195 }
296af7c9
BS
1196}
1197
b4a3d965 1198void cpu_stop_current(void)
296af7c9 1199{
4917cf44
AF
1200 if (current_cpu) {
1201 current_cpu->stop = false;
1202 current_cpu->stopped = true;
1203 cpu_exit(current_cpu);
67bb172f 1204 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1205 }
296af7c9
BS
1206}
1207
56983463 1208int vm_stop(RunState state)
296af7c9 1209{
aa723c23 1210 if (qemu_in_vcpu_thread()) {
74892d24 1211 qemu_system_vmstop_request_prepare();
1dfb4dd9 1212 qemu_system_vmstop_request(state);
296af7c9
BS
1213 /*
1214 * FIXME: should not return to device code in case
1215 * vm_stop() has been requested.
1216 */
b4a3d965 1217 cpu_stop_current();
56983463 1218 return 0;
296af7c9 1219 }
56983463
KW
1220
1221 return do_vm_stop(state);
296af7c9
BS
1222}
1223
8a9236f1
LC
1224/* does a state transition even if the VM is already stopped,
1225 current state is forgotten forever */
56983463 1226int vm_stop_force_state(RunState state)
8a9236f1
LC
1227{
1228 if (runstate_is_running()) {
56983463 1229 return vm_stop(state);
8a9236f1
LC
1230 } else {
1231 runstate_set(state);
594a45ce
KW
1232 /* Make sure to return an error if the flush in a previous vm_stop()
1233 * failed. */
1234 return bdrv_flush_all();
8a9236f1
LC
1235 }
1236}
1237
9349b4f9 1238static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1239{
efee7340 1240 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1241 int ret;
1242#ifdef CONFIG_PROFILER
1243 int64_t ti;
1244#endif
1245
1246#ifdef CONFIG_PROFILER
1247 ti = profile_getclock();
1248#endif
1249 if (use_icount) {
1250 int64_t count;
ac70aafc 1251 int64_t deadline;
296af7c9 1252 int decr;
28ecfd7a
AF
1253 qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
1254 cpu->icount_decr.u16.low = 0;
efee7340 1255 cpu->icount_extra = 0;
40daca54 1256 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1257
1258 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1259 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1260 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1261 * nanoseconds.
1262 */
1263 if ((deadline < 0) || (deadline > INT32_MAX)) {
1264 deadline = INT32_MAX;
1265 }
1266
1267 count = qemu_icount_round(deadline);
296af7c9
BS
1268 qemu_icount += count;
1269 decr = (count > 0xffff) ? 0xffff : count;
1270 count -= decr;
28ecfd7a 1271 cpu->icount_decr.u16.low = decr;
efee7340 1272 cpu->icount_extra = count;
296af7c9
BS
1273 }
1274 ret = cpu_exec(env);
1275#ifdef CONFIG_PROFILER
1276 qemu_time += profile_getclock() - ti;
1277#endif
1278 if (use_icount) {
1279 /* Fold pending instructions back into the
1280 instruction counter, and clear the interrupt flag. */
28ecfd7a
AF
1281 qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
1282 cpu->icount_decr.u32 = 0;
efee7340 1283 cpu->icount_extra = 0;
296af7c9
BS
1284 }
1285 return ret;
1286}
1287
bdb7ca67 1288static void tcg_exec_all(void)
296af7c9 1289{
9a36085b
JK
1290 int r;
1291
40daca54
AB
1292 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1293 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1294
0ab07c62 1295 if (next_cpu == NULL) {
296af7c9 1296 next_cpu = first_cpu;
0ab07c62 1297 }
bdc44640 1298 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1299 CPUState *cpu = next_cpu;
1300 CPUArchState *env = cpu->env_ptr;
296af7c9 1301
40daca54 1302 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1303 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1304
a1fcaa73 1305 if (cpu_can_run(cpu)) {
bdb7ca67 1306 r = tcg_cpu_exec(env);
9a36085b 1307 if (r == EXCP_DEBUG) {
91325046 1308 cpu_handle_guest_debug(cpu);
3c638d06
JK
1309 break;
1310 }
f324e766 1311 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1312 break;
1313 }
1314 }
c629a4bc 1315 exit_request = 0;
296af7c9
BS
1316}
1317
9a78eead 1318void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1319{
1320 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1321#if defined(cpu_list)
1322 cpu_list(f, cpu_fprintf);
262353cb
BS
1323#endif
1324}
de0b36b6
LC
1325
1326CpuInfoList *qmp_query_cpus(Error **errp)
1327{
1328 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1329 CPUState *cpu;
de0b36b6 1330
bdc44640 1331 CPU_FOREACH(cpu) {
de0b36b6 1332 CpuInfoList *info;
182735ef
AF
1333#if defined(TARGET_I386)
1334 X86CPU *x86_cpu = X86_CPU(cpu);
1335 CPUX86State *env = &x86_cpu->env;
1336#elif defined(TARGET_PPC)
1337 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1338 CPUPPCState *env = &ppc_cpu->env;
1339#elif defined(TARGET_SPARC)
1340 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1341 CPUSPARCState *env = &sparc_cpu->env;
1342#elif defined(TARGET_MIPS)
1343 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1344 CPUMIPSState *env = &mips_cpu->env;
1345#endif
de0b36b6 1346
cb446eca 1347 cpu_synchronize_state(cpu);
de0b36b6
LC
1348
1349 info = g_malloc0(sizeof(*info));
1350 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1351 info->value->CPU = cpu->cpu_index;
182735ef 1352 info->value->current = (cpu == first_cpu);
259186a7 1353 info->value->halted = cpu->halted;
9f09e18a 1354 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1355#if defined(TARGET_I386)
1356 info->value->has_pc = true;
1357 info->value->pc = env->eip + env->segs[R_CS].base;
1358#elif defined(TARGET_PPC)
1359 info->value->has_nip = true;
1360 info->value->nip = env->nip;
1361#elif defined(TARGET_SPARC)
1362 info->value->has_pc = true;
1363 info->value->pc = env->pc;
1364 info->value->has_npc = true;
1365 info->value->npc = env->npc;
1366#elif defined(TARGET_MIPS)
1367 info->value->has_PC = true;
1368 info->value->PC = env->active_tc.PC;
1369#endif
1370
1371 /* XXX: waiting for the qapi to support GSList */
1372 if (!cur_item) {
1373 head = cur_item = info;
1374 } else {
1375 cur_item->next = info;
1376 cur_item = info;
1377 }
1378 }
1379
1380 return head;
1381}
0cfd6a9a
LC
1382
1383void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1384 bool has_cpu, int64_t cpu_index, Error **errp)
1385{
1386 FILE *f;
1387 uint32_t l;
55e5c285 1388 CPUState *cpu;
0cfd6a9a
LC
1389 uint8_t buf[1024];
1390
1391 if (!has_cpu) {
1392 cpu_index = 0;
1393 }
1394
151d1322
AF
1395 cpu = qemu_get_cpu(cpu_index);
1396 if (cpu == NULL) {
0cfd6a9a
LC
1397 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1398 "a CPU number");
1399 return;
1400 }
1401
1402 f = fopen(filename, "wb");
1403 if (!f) {
618da851 1404 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1405 return;
1406 }
1407
1408 while (size != 0) {
1409 l = sizeof(buf);
1410 if (l > size)
1411 l = size;
2f4d0f59
AK
1412 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1413 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1414 goto exit;
1415 }
0cfd6a9a
LC
1416 if (fwrite(buf, 1, l, f) != l) {
1417 error_set(errp, QERR_IO_ERROR);
1418 goto exit;
1419 }
1420 addr += l;
1421 size -= l;
1422 }
1423
1424exit:
1425 fclose(f);
1426}
6d3962bf
LC
1427
1428void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1429 Error **errp)
1430{
1431 FILE *f;
1432 uint32_t l;
1433 uint8_t buf[1024];
1434
1435 f = fopen(filename, "wb");
1436 if (!f) {
618da851 1437 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1438 return;
1439 }
1440
1441 while (size != 0) {
1442 l = sizeof(buf);
1443 if (l > size)
1444 l = size;
eb6282f2 1445 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1446 if (fwrite(buf, 1, l, f) != l) {
1447 error_set(errp, QERR_IO_ERROR);
1448 goto exit;
1449 }
1450 addr += l;
1451 size -= l;
1452 }
1453
1454exit:
1455 fclose(f);
1456}
ab49ab5c
LC
1457
1458void qmp_inject_nmi(Error **errp)
1459{
1460#if defined(TARGET_I386)
182735ef
AF
1461 CPUState *cs;
1462
bdc44640 1463 CPU_FOREACH(cs) {
182735ef 1464 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1465
02e51483 1466 if (!cpu->apic_state) {
182735ef 1467 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1468 } else {
02e51483 1469 apic_deliver_nmi(cpu->apic_state);
02c09195 1470 }
ab49ab5c 1471 }
7f7f9752
ED
1472#elif defined(TARGET_S390X)
1473 CPUState *cs;
1474 S390CPU *cpu;
1475
bdc44640 1476 CPU_FOREACH(cs) {
7f7f9752
ED
1477 cpu = S390_CPU(cs);
1478 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1479 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1480 error_set(errp, QERR_UNSUPPORTED);
1481 return;
1482 }
1483 break;
1484 }
1485 }
ab49ab5c
LC
1486#else
1487 error_set(errp, QERR_UNSUPPORTED);
1488#endif
1489}