]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
Fix irq route entries exceeding KVM_MAX_IRQ_ROUTES
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
022c62cb 32#include "exec/gdbstub.h"
9c17d615
PB
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
de0b36b6 35#include "qmp-commands.h"
296af7c9 36
1de7afc9 37#include "qemu/thread.h"
9c17d615
PB
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
1de7afc9
PB
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
cb365646 42#include "qemu/seqlock.h"
a4e15de9 43#include "qapi-event.h"
9cb805fd 44#include "hw/nmi.h"
0ff0fc19
JK
45
46#ifndef _WIN32
1de7afc9 47#include "qemu/compatfd.h"
0ff0fc19 48#endif
296af7c9 49
6d9cb73c
JK
50#ifdef CONFIG_LINUX
51
52#include <sys/prctl.h>
53
c0532a76
MT
54#ifndef PR_MCE_KILL
55#define PR_MCE_KILL 33
56#endif
57
6d9cb73c
JK
58#ifndef PR_MCE_KILL_SET
59#define PR_MCE_KILL_SET 1
60#endif
61
62#ifndef PR_MCE_KILL_EARLY
63#define PR_MCE_KILL_EARLY 1
64#endif
65
66#endif /* CONFIG_LINUX */
67
182735ef 68static CPUState *next_cpu;
27498bef
ST
69int64_t max_delay;
70int64_t max_advance;
296af7c9 71
321bc0b2
TC
72bool cpu_is_stopped(CPUState *cpu)
73{
74 return cpu->stopped || !runstate_is_running();
75}
76
a98ae1d8 77static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 78{
c64ca814 79 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
80 return false;
81 }
321bc0b2 82 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
83 return true;
84 }
8c2e1b00 85 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 86 kvm_halt_in_kernel()) {
ac873f1e
PM
87 return false;
88 }
89 return true;
90}
91
92static bool all_cpu_threads_idle(void)
93{
182735ef 94 CPUState *cpu;
ac873f1e 95
bdc44640 96 CPU_FOREACH(cpu) {
182735ef 97 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
98 return false;
99 }
100 }
101 return true;
102}
103
946fb27c
PB
104/***********************************************************/
105/* guest cycle counter */
106
a3270e19
PB
107/* Protected by TimersState seqlock */
108
5045e9d9 109static bool icount_sleep = true;
71468395 110static int64_t vm_clock_warp_start = -1;
946fb27c
PB
111/* Conversion factor from emulated instructions to virtual clock ticks. */
112static int icount_time_shift;
113/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
114#define MAX_ICOUNT_SHIFT 10
a3270e19 115
946fb27c
PB
116static QEMUTimer *icount_rt_timer;
117static QEMUTimer *icount_vm_timer;
118static QEMUTimer *icount_warp_timer;
946fb27c
PB
119
120typedef struct TimersState {
cb365646 121 /* Protected by BQL. */
946fb27c
PB
122 int64_t cpu_ticks_prev;
123 int64_t cpu_ticks_offset;
cb365646
LPF
124
125 /* cpu_clock_offset can be read out of BQL, so protect it with
126 * this lock.
127 */
128 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
129 int64_t cpu_clock_offset;
130 int32_t cpu_ticks_enabled;
131 int64_t dummy;
c96778bb
FK
132
133 /* Compensate for varying guest execution speed. */
134 int64_t qemu_icount_bias;
135 /* Only written by TCG thread */
136 int64_t qemu_icount;
946fb27c
PB
137} TimersState;
138
d9cd4007 139static TimersState timers_state;
946fb27c 140
2a62914b 141int64_t cpu_get_icount_raw(void)
946fb27c
PB
142{
143 int64_t icount;
4917cf44 144 CPUState *cpu = current_cpu;
946fb27c 145
c96778bb 146 icount = timers_state.qemu_icount;
4917cf44 147 if (cpu) {
99df7dce 148 if (!cpu_can_do_io(cpu)) {
2a62914b
PD
149 fprintf(stderr, "Bad icount read\n");
150 exit(1);
946fb27c 151 }
28ecfd7a 152 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 153 }
2a62914b
PD
154 return icount;
155}
156
157/* Return the virtual CPU time, based on the instruction counter. */
158static int64_t cpu_get_icount_locked(void)
159{
160 int64_t icount = cpu_get_icount_raw();
3f031313 161 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
162}
163
17a15f1b
PB
164int64_t cpu_get_icount(void)
165{
166 int64_t icount;
167 unsigned start;
168
169 do {
170 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
171 icount = cpu_get_icount_locked();
172 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
173
174 return icount;
175}
176
3f031313
FK
177int64_t cpu_icount_to_ns(int64_t icount)
178{
179 return icount << icount_time_shift;
180}
181
946fb27c 182/* return the host CPU cycle counter and handle stop/restart */
cb365646 183/* Caller must hold the BQL */
946fb27c
PB
184int64_t cpu_get_ticks(void)
185{
5f3e3101
PB
186 int64_t ticks;
187
946fb27c
PB
188 if (use_icount) {
189 return cpu_get_icount();
190 }
5f3e3101
PB
191
192 ticks = timers_state.cpu_ticks_offset;
193 if (timers_state.cpu_ticks_enabled) {
194 ticks += cpu_get_real_ticks();
195 }
196
197 if (timers_state.cpu_ticks_prev > ticks) {
198 /* Note: non increasing ticks may happen if the host uses
199 software suspend */
200 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
201 ticks = timers_state.cpu_ticks_prev;
946fb27c 202 }
5f3e3101
PB
203
204 timers_state.cpu_ticks_prev = ticks;
205 return ticks;
946fb27c
PB
206}
207
cb365646 208static int64_t cpu_get_clock_locked(void)
946fb27c 209{
5f3e3101 210 int64_t ticks;
cb365646 211
5f3e3101
PB
212 ticks = timers_state.cpu_clock_offset;
213 if (timers_state.cpu_ticks_enabled) {
214 ticks += get_clock();
946fb27c 215 }
cb365646 216
5f3e3101 217 return ticks;
cb365646
LPF
218}
219
220/* return the host CPU monotonic timer and handle stop/restart */
221int64_t cpu_get_clock(void)
222{
223 int64_t ti;
224 unsigned start;
225
226 do {
227 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
228 ti = cpu_get_clock_locked();
229 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
230
231 return ti;
946fb27c
PB
232}
233
cb365646
LPF
234/* enable cpu_get_ticks()
235 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 */
946fb27c
PB
237void cpu_enable_ticks(void)
238{
cb365646
LPF
239 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
240 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
241 if (!timers_state.cpu_ticks_enabled) {
242 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
243 timers_state.cpu_clock_offset -= get_clock();
244 timers_state.cpu_ticks_enabled = 1;
245 }
cb365646 246 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
247}
248
249/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
250 * cpu_get_ticks() after that.
251 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
252 */
946fb27c
PB
253void cpu_disable_ticks(void)
254{
cb365646
LPF
255 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
256 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 257 if (timers_state.cpu_ticks_enabled) {
5f3e3101 258 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 259 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
260 timers_state.cpu_ticks_enabled = 0;
261 }
cb365646 262 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
263}
264
265/* Correlation between real and virtual time is always going to be
266 fairly approximate, so ignore small variation.
267 When the guest is idle real and virtual time will be aligned in
268 the IO wait loop. */
269#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
270
271static void icount_adjust(void)
272{
273 int64_t cur_time;
274 int64_t cur_icount;
275 int64_t delta;
a3270e19
PB
276
277 /* Protected by TimersState mutex. */
946fb27c 278 static int64_t last_delta;
468cc7cf 279
946fb27c
PB
280 /* If the VM is not running, then do nothing. */
281 if (!runstate_is_running()) {
282 return;
283 }
468cc7cf 284
17a15f1b
PB
285 seqlock_write_lock(&timers_state.vm_clock_seqlock);
286 cur_time = cpu_get_clock_locked();
287 cur_icount = cpu_get_icount_locked();
468cc7cf 288
946fb27c
PB
289 delta = cur_icount - cur_time;
290 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
291 if (delta > 0
292 && last_delta + ICOUNT_WOBBLE < delta * 2
293 && icount_time_shift > 0) {
294 /* The guest is getting too far ahead. Slow time down. */
295 icount_time_shift--;
296 }
297 if (delta < 0
298 && last_delta - ICOUNT_WOBBLE > delta * 2
299 && icount_time_shift < MAX_ICOUNT_SHIFT) {
300 /* The guest is getting too far behind. Speed time up. */
301 icount_time_shift++;
302 }
303 last_delta = delta;
c96778bb
FK
304 timers_state.qemu_icount_bias = cur_icount
305 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 306 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
307}
308
309static void icount_adjust_rt(void *opaque)
310{
40daca54 311 timer_mod(icount_rt_timer,
1979b908 312 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
313 icount_adjust();
314}
315
316static void icount_adjust_vm(void *opaque)
317{
40daca54
AB
318 timer_mod(icount_vm_timer,
319 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
320 get_ticks_per_sec() / 10);
946fb27c
PB
321 icount_adjust();
322}
323
324static int64_t qemu_icount_round(int64_t count)
325{
326 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
327}
328
329static void icount_warp_rt(void *opaque)
330{
17a15f1b
PB
331 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
332 * changes from -1 to another value, so the race here is okay.
333 */
334 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
335 return;
336 }
337
17a15f1b 338 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 339 if (runstate_is_running()) {
bf2a7ddb 340 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
341 int64_t warp_delta;
342
343 warp_delta = clock - vm_clock_warp_start;
344 if (use_icount == 2) {
946fb27c 345 /*
40daca54 346 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
347 * far ahead of real time.
348 */
17a15f1b 349 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 350 int64_t delta = clock - cur_icount;
8ed961d9 351 warp_delta = MIN(warp_delta, delta);
946fb27c 352 }
c96778bb 353 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
354 }
355 vm_clock_warp_start = -1;
17a15f1b 356 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
357
358 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
359 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
360 }
946fb27c
PB
361}
362
8156be56
PB
363void qtest_clock_warp(int64_t dest)
364{
40daca54 365 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 366 AioContext *aio_context;
8156be56 367 assert(qtest_enabled());
efef88b3 368 aio_context = qemu_get_aio_context();
8156be56 369 while (clock < dest) {
40daca54 370 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 371 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 372
17a15f1b 373 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 374 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
375 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
376
40daca54 377 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 378 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 379 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 380 }
40daca54 381 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
382}
383
40daca54 384void qemu_clock_warp(QEMUClockType type)
946fb27c 385{
ce78d18c 386 int64_t clock;
946fb27c
PB
387 int64_t deadline;
388
389 /*
390 * There are too many global variables to make the "warp" behavior
391 * applicable to other clocks. But a clock argument removes the
392 * need for if statements all over the place.
393 */
40daca54 394 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
395 return;
396 }
397
5045e9d9
VC
398 if (icount_sleep) {
399 /*
400 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
401 * This ensures that the deadline for the timer is computed correctly
402 * below.
403 * This also makes sure that the insn counter is synchronized before
404 * the CPU starts running, in case the CPU is woken by an event other
405 * than the earliest QEMU_CLOCK_VIRTUAL timer.
406 */
407 icount_warp_rt(NULL);
408 timer_del(icount_warp_timer);
409 }
ce78d18c 410 if (!all_cpu_threads_idle()) {
946fb27c
PB
411 return;
412 }
413
8156be56
PB
414 if (qtest_enabled()) {
415 /* When testing, qtest commands advance icount. */
416 return;
417 }
418
ac70aafc 419 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 420 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 421 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 422 if (deadline < 0) {
d7a0f71d
VC
423 static bool notified;
424 if (!icount_sleep && !notified) {
425 error_report("WARNING: icount sleep disabled and no active timers");
426 notified = true;
427 }
ce78d18c 428 return;
ac70aafc
AB
429 }
430
946fb27c
PB
431 if (deadline > 0) {
432 /*
40daca54 433 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
434 * sleep. Otherwise, the CPU might be waiting for a future timer
435 * interrupt to wake it up, but the interrupt never comes because
436 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 437 * QEMU_CLOCK_VIRTUAL.
946fb27c 438 */
5045e9d9
VC
439 if (!icount_sleep) {
440 /*
441 * We never let VCPUs sleep in no sleep icount mode.
442 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
443 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
444 * It is useful when we want a deterministic execution time,
445 * isolated from host latencies.
446 */
447 seqlock_write_lock(&timers_state.vm_clock_seqlock);
448 timers_state.qemu_icount_bias += deadline;
449 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
450 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
451 } else {
452 /*
453 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
454 * "real" time, (related to the time left until the next event) has
455 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
456 * This avoids that the warps are visible externally; for example,
457 * you will not be sending network packets continuously instead of
458 * every 100ms.
459 */
460 seqlock_write_lock(&timers_state.vm_clock_seqlock);
461 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
462 vm_clock_warp_start = clock;
463 }
464 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
465 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 466 }
ac70aafc 467 } else if (deadline == 0) {
40daca54 468 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
469 }
470}
471
d09eae37
FK
472static bool icount_state_needed(void *opaque)
473{
474 return use_icount;
475}
476
477/*
478 * This is a subsection for icount migration.
479 */
480static const VMStateDescription icount_vmstate_timers = {
481 .name = "timer/icount",
482 .version_id = 1,
483 .minimum_version_id = 1,
5cd8cada 484 .needed = icount_state_needed,
d09eae37
FK
485 .fields = (VMStateField[]) {
486 VMSTATE_INT64(qemu_icount_bias, TimersState),
487 VMSTATE_INT64(qemu_icount, TimersState),
488 VMSTATE_END_OF_LIST()
489 }
490};
491
946fb27c
PB
492static const VMStateDescription vmstate_timers = {
493 .name = "timer",
494 .version_id = 2,
495 .minimum_version_id = 1,
35d08458 496 .fields = (VMStateField[]) {
946fb27c
PB
497 VMSTATE_INT64(cpu_ticks_offset, TimersState),
498 VMSTATE_INT64(dummy, TimersState),
499 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
500 VMSTATE_END_OF_LIST()
d09eae37 501 },
5cd8cada
JQ
502 .subsections = (const VMStateDescription*[]) {
503 &icount_vmstate_timers,
504 NULL
946fb27c
PB
505 }
506};
507
4603ea01
PD
508void cpu_ticks_init(void)
509{
510 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
511 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
512}
513
1ad9580b 514void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 515{
1ad9580b 516 const char *option;
a8bfac37 517 char *rem_str = NULL;
1ad9580b 518
1ad9580b 519 option = qemu_opt_get(opts, "shift");
946fb27c 520 if (!option) {
a8bfac37
ST
521 if (qemu_opt_get(opts, "align") != NULL) {
522 error_setg(errp, "Please specify shift option when using align");
523 }
946fb27c
PB
524 return;
525 }
f1f4b57e
VC
526
527 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
528 if (icount_sleep) {
529 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
530 icount_warp_rt, NULL);
531 }
f1f4b57e 532
a8bfac37 533 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
534
535 if (icount_align_option && !icount_sleep) {
536 error_setg(errp, "align=on and sleep=no are incompatible");
537 }
946fb27c 538 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
539 errno = 0;
540 icount_time_shift = strtol(option, &rem_str, 0);
541 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
542 error_setg(errp, "icount: Invalid shift value");
543 }
946fb27c
PB
544 use_icount = 1;
545 return;
a8bfac37
ST
546 } else if (icount_align_option) {
547 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e
VC
548 } else if (!icount_sleep) {
549 error_setg(errp, "shift=auto and sleep=no are incompatible");
946fb27c
PB
550 }
551
552 use_icount = 2;
553
554 /* 125MIPS seems a reasonable initial guess at the guest speed.
555 It will be corrected fairly quickly anyway. */
556 icount_time_shift = 3;
557
558 /* Have both realtime and virtual time triggers for speed adjustment.
559 The realtime trigger catches emulated time passing too slowly,
560 the virtual time trigger catches emulated time passing too fast.
561 Realtime triggers occur even when idle, so use them less frequently
562 than VM triggers. */
bf2a7ddb
PD
563 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
564 icount_adjust_rt, NULL);
40daca54 565 timer_mod(icount_rt_timer,
bf2a7ddb 566 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
567 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
568 icount_adjust_vm, NULL);
569 timer_mod(icount_vm_timer,
570 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
571 get_ticks_per_sec() / 10);
946fb27c
PB
572}
573
296af7c9
BS
574/***********************************************************/
575void hw_error(const char *fmt, ...)
576{
577 va_list ap;
55e5c285 578 CPUState *cpu;
296af7c9
BS
579
580 va_start(ap, fmt);
581 fprintf(stderr, "qemu: hardware error: ");
582 vfprintf(stderr, fmt, ap);
583 fprintf(stderr, "\n");
bdc44640 584 CPU_FOREACH(cpu) {
55e5c285 585 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 586 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
587 }
588 va_end(ap);
589 abort();
590}
591
592void cpu_synchronize_all_states(void)
593{
182735ef 594 CPUState *cpu;
296af7c9 595
bdc44640 596 CPU_FOREACH(cpu) {
182735ef 597 cpu_synchronize_state(cpu);
296af7c9
BS
598 }
599}
600
601void cpu_synchronize_all_post_reset(void)
602{
182735ef 603 CPUState *cpu;
296af7c9 604
bdc44640 605 CPU_FOREACH(cpu) {
182735ef 606 cpu_synchronize_post_reset(cpu);
296af7c9
BS
607 }
608}
609
610void cpu_synchronize_all_post_init(void)
611{
182735ef 612 CPUState *cpu;
296af7c9 613
bdc44640 614 CPU_FOREACH(cpu) {
182735ef 615 cpu_synchronize_post_init(cpu);
296af7c9
BS
616 }
617}
618
de9d61e8
MT
619void cpu_clean_all_dirty(void)
620{
621 CPUState *cpu;
622
623 CPU_FOREACH(cpu) {
624 cpu_clean_state(cpu);
625 }
626}
627
56983463 628static int do_vm_stop(RunState state)
296af7c9 629{
56983463
KW
630 int ret = 0;
631
1354869c 632 if (runstate_is_running()) {
296af7c9 633 cpu_disable_ticks();
296af7c9 634 pause_all_vcpus();
f5bbfba1 635 runstate_set(state);
1dfb4dd9 636 vm_state_notify(0, state);
a4e15de9 637 qapi_event_send_stop(&error_abort);
296af7c9 638 }
56983463 639
594a45ce
KW
640 bdrv_drain_all();
641 ret = bdrv_flush_all();
642
56983463 643 return ret;
296af7c9
BS
644}
645
a1fcaa73 646static bool cpu_can_run(CPUState *cpu)
296af7c9 647{
4fdeee7c 648 if (cpu->stop) {
a1fcaa73 649 return false;
0ab07c62 650 }
321bc0b2 651 if (cpu_is_stopped(cpu)) {
a1fcaa73 652 return false;
0ab07c62 653 }
a1fcaa73 654 return true;
296af7c9
BS
655}
656
91325046 657static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 658{
64f6b346 659 gdb_set_stop_cpu(cpu);
8cf71710 660 qemu_system_debug_request();
f324e766 661 cpu->stopped = true;
3c638d06
JK
662}
663
714bd040
PB
664static void cpu_signal(int sig)
665{
4917cf44
AF
666 if (current_cpu) {
667 cpu_exit(current_cpu);
714bd040
PB
668 }
669 exit_request = 1;
670}
714bd040 671
6d9cb73c
JK
672#ifdef CONFIG_LINUX
673static void sigbus_reraise(void)
674{
675 sigset_t set;
676 struct sigaction action;
677
678 memset(&action, 0, sizeof(action));
679 action.sa_handler = SIG_DFL;
680 if (!sigaction(SIGBUS, &action, NULL)) {
681 raise(SIGBUS);
682 sigemptyset(&set);
683 sigaddset(&set, SIGBUS);
684 sigprocmask(SIG_UNBLOCK, &set, NULL);
685 }
686 perror("Failed to re-raise SIGBUS!\n");
687 abort();
688}
689
690static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
691 void *ctx)
692{
693 if (kvm_on_sigbus(siginfo->ssi_code,
694 (void *)(intptr_t)siginfo->ssi_addr)) {
695 sigbus_reraise();
696 }
697}
698
699static void qemu_init_sigbus(void)
700{
701 struct sigaction action;
702
703 memset(&action, 0, sizeof(action));
704 action.sa_flags = SA_SIGINFO;
705 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
706 sigaction(SIGBUS, &action, NULL);
707
708 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
709}
710
290adf38 711static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
712{
713 struct timespec ts = { 0, 0 };
714 siginfo_t siginfo;
715 sigset_t waitset;
716 sigset_t chkset;
717 int r;
718
719 sigemptyset(&waitset);
720 sigaddset(&waitset, SIG_IPI);
721 sigaddset(&waitset, SIGBUS);
722
723 do {
724 r = sigtimedwait(&waitset, &siginfo, &ts);
725 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
726 perror("sigtimedwait");
727 exit(1);
728 }
729
730 switch (r) {
731 case SIGBUS:
290adf38 732 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
733 sigbus_reraise();
734 }
735 break;
736 default:
737 break;
738 }
739
740 r = sigpending(&chkset);
741 if (r == -1) {
742 perror("sigpending");
743 exit(1);
744 }
745 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
746}
747
6d9cb73c
JK
748#else /* !CONFIG_LINUX */
749
750static void qemu_init_sigbus(void)
751{
752}
1ab3c6c0 753
290adf38 754static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
755{
756}
6d9cb73c
JK
757#endif /* !CONFIG_LINUX */
758
296af7c9 759#ifndef _WIN32
55f8d6ac
JK
760static void dummy_signal(int sig)
761{
762}
55f8d6ac 763
13618e05 764static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
765{
766 int r;
767 sigset_t set;
768 struct sigaction sigact;
769
770 memset(&sigact, 0, sizeof(sigact));
771 sigact.sa_handler = dummy_signal;
772 sigaction(SIG_IPI, &sigact, NULL);
773
714bd040
PB
774 pthread_sigmask(SIG_BLOCK, NULL, &set);
775 sigdelset(&set, SIG_IPI);
714bd040 776 sigdelset(&set, SIGBUS);
491d6e80 777 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
778 if (r) {
779 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
780 exit(1);
781 }
782}
783
784static void qemu_tcg_init_cpu_signals(void)
785{
714bd040
PB
786 sigset_t set;
787 struct sigaction sigact;
788
789 memset(&sigact, 0, sizeof(sigact));
790 sigact.sa_handler = cpu_signal;
791 sigaction(SIG_IPI, &sigact, NULL);
792
793 sigemptyset(&set);
794 sigaddset(&set, SIG_IPI);
795 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
796}
797
55f8d6ac 798#else /* _WIN32 */
13618e05 799static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 800{
714bd040
PB
801 abort();
802}
ff48eb5f 803
714bd040
PB
804static void qemu_tcg_init_cpu_signals(void)
805{
ff48eb5f 806}
714bd040 807#endif /* _WIN32 */
ff48eb5f 808
b2532d88 809static QemuMutex qemu_global_mutex;
46daff13 810static QemuCond qemu_io_proceeded_cond;
6b49809c 811static unsigned iothread_requesting_mutex;
296af7c9
BS
812
813static QemuThread io_thread;
814
815static QemuThread *tcg_cpu_thread;
816static QemuCond *tcg_halt_cond;
817
296af7c9
BS
818/* cpu creation */
819static QemuCond qemu_cpu_cond;
820/* system init */
296af7c9 821static QemuCond qemu_pause_cond;
e82bcec2 822static QemuCond qemu_work_cond;
296af7c9 823
d3b12f5d 824void qemu_init_cpu_loop(void)
296af7c9 825{
6d9cb73c 826 qemu_init_sigbus();
ed94592b 827 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
828 qemu_cond_init(&qemu_pause_cond);
829 qemu_cond_init(&qemu_work_cond);
46daff13 830 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 831 qemu_mutex_init(&qemu_global_mutex);
296af7c9 832
b7680cb6 833 qemu_thread_get_self(&io_thread);
296af7c9
BS
834}
835
f100f0b3 836void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
837{
838 struct qemu_work_item wi;
839
60e82579 840 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
841 func(data);
842 return;
843 }
844
845 wi.func = func;
846 wi.data = data;
3c02270d 847 wi.free = false;
c64ca814
AF
848 if (cpu->queued_work_first == NULL) {
849 cpu->queued_work_first = &wi;
0ab07c62 850 } else {
c64ca814 851 cpu->queued_work_last->next = &wi;
0ab07c62 852 }
c64ca814 853 cpu->queued_work_last = &wi;
e82bcec2
MT
854 wi.next = NULL;
855 wi.done = false;
856
c08d7424 857 qemu_cpu_kick(cpu);
e82bcec2 858 while (!wi.done) {
4917cf44 859 CPUState *self_cpu = current_cpu;
e82bcec2
MT
860
861 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 862 current_cpu = self_cpu;
e82bcec2
MT
863 }
864}
865
3c02270d
CV
866void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
867{
868 struct qemu_work_item *wi;
869
870 if (qemu_cpu_is_self(cpu)) {
871 func(data);
872 return;
873 }
874
875 wi = g_malloc0(sizeof(struct qemu_work_item));
876 wi->func = func;
877 wi->data = data;
878 wi->free = true;
879 if (cpu->queued_work_first == NULL) {
880 cpu->queued_work_first = wi;
881 } else {
882 cpu->queued_work_last->next = wi;
883 }
884 cpu->queued_work_last = wi;
885 wi->next = NULL;
886 wi->done = false;
887
888 qemu_cpu_kick(cpu);
889}
890
6d45b109 891static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
892{
893 struct qemu_work_item *wi;
894
c64ca814 895 if (cpu->queued_work_first == NULL) {
e82bcec2 896 return;
0ab07c62 897 }
e82bcec2 898
c64ca814
AF
899 while ((wi = cpu->queued_work_first)) {
900 cpu->queued_work_first = wi->next;
e82bcec2
MT
901 wi->func(wi->data);
902 wi->done = true;
3c02270d
CV
903 if (wi->free) {
904 g_free(wi);
905 }
e82bcec2 906 }
c64ca814 907 cpu->queued_work_last = NULL;
e82bcec2
MT
908 qemu_cond_broadcast(&qemu_work_cond);
909}
910
509a0d78 911static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 912{
4fdeee7c
AF
913 if (cpu->stop) {
914 cpu->stop = false;
f324e766 915 cpu->stopped = true;
296af7c9
BS
916 qemu_cond_signal(&qemu_pause_cond);
917 }
6d45b109 918 flush_queued_work(cpu);
216fc9a4 919 cpu->thread_kicked = false;
296af7c9
BS
920}
921
6cabe1f3 922static void qemu_tcg_wait_io_event(void)
296af7c9 923{
182735ef 924 CPUState *cpu;
6cabe1f3 925
16400322 926 while (all_cpu_threads_idle()) {
ab33fcda
PB
927 /* Start accounting real time to the virtual clock if the CPUs
928 are idle. */
40daca54 929 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 930 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 931 }
296af7c9 932
46daff13
PB
933 while (iothread_requesting_mutex) {
934 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
935 }
6cabe1f3 936
bdc44640 937 CPU_FOREACH(cpu) {
182735ef 938 qemu_wait_io_event_common(cpu);
6cabe1f3 939 }
296af7c9
BS
940}
941
fd529e8f 942static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 943{
a98ae1d8 944 while (cpu_thread_is_idle(cpu)) {
f5c121b8 945 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 946 }
296af7c9 947
290adf38 948 qemu_kvm_eat_signals(cpu);
509a0d78 949 qemu_wait_io_event_common(cpu);
296af7c9
BS
950}
951
7e97cd88 952static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 953{
48a106bd 954 CPUState *cpu = arg;
84b4915d 955 int r;
296af7c9 956
6164e6d6 957 qemu_mutex_lock(&qemu_global_mutex);
814e612e 958 qemu_thread_get_self(cpu->thread);
9f09e18a 959 cpu->thread_id = qemu_get_thread_id();
626cf8f4 960 cpu->can_do_io = 1;
4917cf44 961 current_cpu = cpu;
296af7c9 962
504134d2 963 r = kvm_init_vcpu(cpu);
84b4915d
JK
964 if (r < 0) {
965 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
966 exit(1);
967 }
296af7c9 968
13618e05 969 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
970
971 /* signal CPU creation */
61a46217 972 cpu->created = true;
296af7c9
BS
973 qemu_cond_signal(&qemu_cpu_cond);
974
296af7c9 975 while (1) {
a1fcaa73 976 if (cpu_can_run(cpu)) {
1458c363 977 r = kvm_cpu_exec(cpu);
83f338f7 978 if (r == EXCP_DEBUG) {
91325046 979 cpu_handle_guest_debug(cpu);
83f338f7 980 }
0ab07c62 981 }
fd529e8f 982 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
983 }
984
985 return NULL;
986}
987
c7f0f3b1
AL
988static void *qemu_dummy_cpu_thread_fn(void *arg)
989{
990#ifdef _WIN32
991 fprintf(stderr, "qtest is not supported under Windows\n");
992 exit(1);
993#else
10a9021d 994 CPUState *cpu = arg;
c7f0f3b1
AL
995 sigset_t waitset;
996 int r;
997
998 qemu_mutex_lock_iothread();
814e612e 999 qemu_thread_get_self(cpu->thread);
9f09e18a 1000 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1001 cpu->can_do_io = 1;
c7f0f3b1
AL
1002
1003 sigemptyset(&waitset);
1004 sigaddset(&waitset, SIG_IPI);
1005
1006 /* signal CPU creation */
61a46217 1007 cpu->created = true;
c7f0f3b1
AL
1008 qemu_cond_signal(&qemu_cpu_cond);
1009
4917cf44 1010 current_cpu = cpu;
c7f0f3b1 1011 while (1) {
4917cf44 1012 current_cpu = NULL;
c7f0f3b1
AL
1013 qemu_mutex_unlock_iothread();
1014 do {
1015 int sig;
1016 r = sigwait(&waitset, &sig);
1017 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1018 if (r == -1) {
1019 perror("sigwait");
1020 exit(1);
1021 }
1022 qemu_mutex_lock_iothread();
4917cf44 1023 current_cpu = cpu;
509a0d78 1024 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1025 }
1026
1027 return NULL;
1028#endif
1029}
1030
bdb7ca67
JK
1031static void tcg_exec_all(void);
1032
7e97cd88 1033static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1034{
c3586ba7 1035 CPUState *cpu = arg;
296af7c9 1036
55f8d6ac 1037 qemu_tcg_init_cpu_signals();
814e612e 1038 qemu_thread_get_self(cpu->thread);
296af7c9 1039
296af7c9 1040 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1041 CPU_FOREACH(cpu) {
1042 cpu->thread_id = qemu_get_thread_id();
1043 cpu->created = true;
626cf8f4 1044 cpu->can_do_io = 1;
38fcbd3f 1045 }
296af7c9
BS
1046 qemu_cond_signal(&qemu_cpu_cond);
1047
fa7d1867 1048 /* wait for initial kick-off after machine start */
c28e399c 1049 while (first_cpu->stopped) {
fa7d1867 1050 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1051
1052 /* process any pending work */
bdc44640 1053 CPU_FOREACH(cpu) {
182735ef 1054 qemu_wait_io_event_common(cpu);
8e564b4e 1055 }
0ab07c62 1056 }
296af7c9 1057
21618b3e
PB
1058 /* process any pending work */
1059 exit_request = 1;
1060
296af7c9 1061 while (1) {
bdb7ca67 1062 tcg_exec_all();
ac70aafc
AB
1063
1064 if (use_icount) {
40daca54 1065 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1066
1067 if (deadline == 0) {
40daca54 1068 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1069 }
3b2319a3 1070 }
6cabe1f3 1071 qemu_tcg_wait_io_event();
296af7c9
BS
1072 }
1073
1074 return NULL;
1075}
1076
2ff09a40 1077static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1078{
1079#ifndef _WIN32
1080 int err;
1081
814e612e 1082 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1083 if (err) {
1084 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1085 exit(1);
1086 }
1087#else /* _WIN32 */
60e82579 1088 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1089 CONTEXT tcgContext;
1090
1091 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1092 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1093 GetLastError());
1094 exit(1);
1095 }
1096
1097 /* On multi-core systems, we are not sure that the thread is actually
1098 * suspended until we can get the context.
1099 */
1100 tcgContext.ContextFlags = CONTEXT_CONTROL;
1101 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1102 continue;
1103 }
1104
cc015e9a 1105 cpu_signal(0);
ed9164a3
OH
1106
1107 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1108 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1109 GetLastError());
1110 exit(1);
1111 }
cc015e9a
PB
1112 }
1113#endif
1114}
1115
c08d7424 1116void qemu_cpu_kick(CPUState *cpu)
296af7c9 1117{
f5c121b8 1118 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1119 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1120 qemu_cpu_kick_thread(cpu);
216fc9a4 1121 cpu->thread_kicked = true;
aa2c364b 1122 }
296af7c9
BS
1123}
1124
46d62fac 1125void qemu_cpu_kick_self(void)
296af7c9 1126{
b55c22c6 1127#ifndef _WIN32
4917cf44 1128 assert(current_cpu);
296af7c9 1129
4917cf44
AF
1130 if (!current_cpu->thread_kicked) {
1131 qemu_cpu_kick_thread(current_cpu);
1132 current_cpu->thread_kicked = true;
296af7c9 1133 }
b55c22c6
PB
1134#else
1135 abort();
1136#endif
296af7c9
BS
1137}
1138
60e82579 1139bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1140{
814e612e 1141 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1142}
1143
79e2b9ae 1144bool qemu_in_vcpu_thread(void)
aa723c23 1145{
4917cf44 1146 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1147}
1148
296af7c9
BS
1149void qemu_mutex_lock_iothread(void)
1150{
21618b3e 1151 atomic_inc(&iothread_requesting_mutex);
bdd459a0 1152 if (!tcg_enabled() || !first_cpu || !first_cpu->thread) {
296af7c9 1153 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1154 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1155 } else {
1a28cac3 1156 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1157 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1158 qemu_mutex_lock(&qemu_global_mutex);
1159 }
6b49809c 1160 atomic_dec(&iothread_requesting_mutex);
46daff13 1161 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1162 }
296af7c9
BS
1163}
1164
1165void qemu_mutex_unlock_iothread(void)
1166{
1167 qemu_mutex_unlock(&qemu_global_mutex);
1168}
1169
1170static int all_vcpus_paused(void)
1171{
bdc44640 1172 CPUState *cpu;
296af7c9 1173
bdc44640 1174 CPU_FOREACH(cpu) {
182735ef 1175 if (!cpu->stopped) {
296af7c9 1176 return 0;
0ab07c62 1177 }
296af7c9
BS
1178 }
1179
1180 return 1;
1181}
1182
1183void pause_all_vcpus(void)
1184{
bdc44640 1185 CPUState *cpu;
296af7c9 1186
40daca54 1187 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1188 CPU_FOREACH(cpu) {
182735ef
AF
1189 cpu->stop = true;
1190 qemu_cpu_kick(cpu);
296af7c9
BS
1191 }
1192
aa723c23 1193 if (qemu_in_vcpu_thread()) {
d798e974
JK
1194 cpu_stop_current();
1195 if (!kvm_enabled()) {
bdc44640 1196 CPU_FOREACH(cpu) {
182735ef
AF
1197 cpu->stop = false;
1198 cpu->stopped = true;
d798e974
JK
1199 }
1200 return;
1201 }
1202 }
1203
296af7c9 1204 while (!all_vcpus_paused()) {
be7d6c57 1205 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1206 CPU_FOREACH(cpu) {
182735ef 1207 qemu_cpu_kick(cpu);
296af7c9
BS
1208 }
1209 }
1210}
1211
2993683b
IM
1212void cpu_resume(CPUState *cpu)
1213{
1214 cpu->stop = false;
1215 cpu->stopped = false;
1216 qemu_cpu_kick(cpu);
1217}
1218
296af7c9
BS
1219void resume_all_vcpus(void)
1220{
bdc44640 1221 CPUState *cpu;
296af7c9 1222
40daca54 1223 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1224 CPU_FOREACH(cpu) {
182735ef 1225 cpu_resume(cpu);
296af7c9
BS
1226 }
1227}
1228
4900116e
DDAG
1229/* For temporary buffers for forming a name */
1230#define VCPU_THREAD_NAME_SIZE 16
1231
e5ab30a2 1232static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1233{
4900116e
DDAG
1234 char thread_name[VCPU_THREAD_NAME_SIZE];
1235
09daed84
EI
1236 tcg_cpu_address_space_init(cpu, cpu->as);
1237
296af7c9
BS
1238 /* share a single thread for all cpus with TCG */
1239 if (!tcg_cpu_thread) {
814e612e 1240 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1241 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1242 qemu_cond_init(cpu->halt_cond);
1243 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1244 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1245 cpu->cpu_index);
1246 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1247 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1248#ifdef _WIN32
814e612e 1249 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1250#endif
61a46217 1251 while (!cpu->created) {
18a85728 1252 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1253 }
814e612e 1254 tcg_cpu_thread = cpu->thread;
296af7c9 1255 } else {
814e612e 1256 cpu->thread = tcg_cpu_thread;
f5c121b8 1257 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1258 }
1259}
1260
48a106bd 1261static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1262{
4900116e
DDAG
1263 char thread_name[VCPU_THREAD_NAME_SIZE];
1264
814e612e 1265 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1266 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1267 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1268 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1269 cpu->cpu_index);
1270 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1271 cpu, QEMU_THREAD_JOINABLE);
61a46217 1272 while (!cpu->created) {
18a85728 1273 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1274 }
296af7c9
BS
1275}
1276
10a9021d 1277static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1278{
4900116e
DDAG
1279 char thread_name[VCPU_THREAD_NAME_SIZE];
1280
814e612e 1281 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1282 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1283 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1284 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1285 cpu->cpu_index);
1286 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1287 QEMU_THREAD_JOINABLE);
61a46217 1288 while (!cpu->created) {
c7f0f3b1
AL
1289 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1290 }
1291}
1292
c643bed9 1293void qemu_init_vcpu(CPUState *cpu)
296af7c9 1294{
ce3960eb
AF
1295 cpu->nr_cores = smp_cores;
1296 cpu->nr_threads = smp_threads;
f324e766 1297 cpu->stopped = true;
0ab07c62 1298 if (kvm_enabled()) {
48a106bd 1299 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1300 } else if (tcg_enabled()) {
e5ab30a2 1301 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1302 } else {
10a9021d 1303 qemu_dummy_start_vcpu(cpu);
0ab07c62 1304 }
296af7c9
BS
1305}
1306
b4a3d965 1307void cpu_stop_current(void)
296af7c9 1308{
4917cf44
AF
1309 if (current_cpu) {
1310 current_cpu->stop = false;
1311 current_cpu->stopped = true;
1312 cpu_exit(current_cpu);
67bb172f 1313 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1314 }
296af7c9
BS
1315}
1316
56983463 1317int vm_stop(RunState state)
296af7c9 1318{
aa723c23 1319 if (qemu_in_vcpu_thread()) {
74892d24 1320 qemu_system_vmstop_request_prepare();
1dfb4dd9 1321 qemu_system_vmstop_request(state);
296af7c9
BS
1322 /*
1323 * FIXME: should not return to device code in case
1324 * vm_stop() has been requested.
1325 */
b4a3d965 1326 cpu_stop_current();
56983463 1327 return 0;
296af7c9 1328 }
56983463
KW
1329
1330 return do_vm_stop(state);
296af7c9
BS
1331}
1332
8a9236f1
LC
1333/* does a state transition even if the VM is already stopped,
1334 current state is forgotten forever */
56983463 1335int vm_stop_force_state(RunState state)
8a9236f1
LC
1336{
1337 if (runstate_is_running()) {
56983463 1338 return vm_stop(state);
8a9236f1
LC
1339 } else {
1340 runstate_set(state);
594a45ce
KW
1341 /* Make sure to return an error if the flush in a previous vm_stop()
1342 * failed. */
1343 return bdrv_flush_all();
8a9236f1
LC
1344 }
1345}
1346
9349b4f9 1347static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1348{
efee7340 1349 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1350 int ret;
1351#ifdef CONFIG_PROFILER
1352 int64_t ti;
1353#endif
1354
1355#ifdef CONFIG_PROFILER
1356 ti = profile_getclock();
1357#endif
1358 if (use_icount) {
1359 int64_t count;
ac70aafc 1360 int64_t deadline;
296af7c9 1361 int decr;
c96778bb
FK
1362 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1363 + cpu->icount_extra);
28ecfd7a 1364 cpu->icount_decr.u16.low = 0;
efee7340 1365 cpu->icount_extra = 0;
40daca54 1366 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1367
1368 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1369 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1370 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1371 * nanoseconds.
1372 */
1373 if ((deadline < 0) || (deadline > INT32_MAX)) {
1374 deadline = INT32_MAX;
1375 }
1376
1377 count = qemu_icount_round(deadline);
c96778bb 1378 timers_state.qemu_icount += count;
296af7c9
BS
1379 decr = (count > 0xffff) ? 0xffff : count;
1380 count -= decr;
28ecfd7a 1381 cpu->icount_decr.u16.low = decr;
efee7340 1382 cpu->icount_extra = count;
296af7c9
BS
1383 }
1384 ret = cpu_exec(env);
1385#ifdef CONFIG_PROFILER
89d5cbdd 1386 tcg_time += profile_getclock() - ti;
296af7c9
BS
1387#endif
1388 if (use_icount) {
1389 /* Fold pending instructions back into the
1390 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1391 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1392 + cpu->icount_extra);
28ecfd7a 1393 cpu->icount_decr.u32 = 0;
efee7340 1394 cpu->icount_extra = 0;
296af7c9
BS
1395 }
1396 return ret;
1397}
1398
bdb7ca67 1399static void tcg_exec_all(void)
296af7c9 1400{
9a36085b
JK
1401 int r;
1402
40daca54
AB
1403 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1404 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1405
0ab07c62 1406 if (next_cpu == NULL) {
296af7c9 1407 next_cpu = first_cpu;
0ab07c62 1408 }
bdc44640 1409 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1410 CPUState *cpu = next_cpu;
1411 CPUArchState *env = cpu->env_ptr;
296af7c9 1412
40daca54 1413 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1414 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1415
a1fcaa73 1416 if (cpu_can_run(cpu)) {
bdb7ca67 1417 r = tcg_cpu_exec(env);
9a36085b 1418 if (r == EXCP_DEBUG) {
91325046 1419 cpu_handle_guest_debug(cpu);
3c638d06
JK
1420 break;
1421 }
f324e766 1422 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1423 break;
1424 }
1425 }
c629a4bc 1426 exit_request = 0;
296af7c9
BS
1427}
1428
9a78eead 1429void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1430{
1431 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1432#if defined(cpu_list)
1433 cpu_list(f, cpu_fprintf);
262353cb
BS
1434#endif
1435}
de0b36b6
LC
1436
1437CpuInfoList *qmp_query_cpus(Error **errp)
1438{
1439 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1440 CPUState *cpu;
de0b36b6 1441
bdc44640 1442 CPU_FOREACH(cpu) {
de0b36b6 1443 CpuInfoList *info;
182735ef
AF
1444#if defined(TARGET_I386)
1445 X86CPU *x86_cpu = X86_CPU(cpu);
1446 CPUX86State *env = &x86_cpu->env;
1447#elif defined(TARGET_PPC)
1448 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1449 CPUPPCState *env = &ppc_cpu->env;
1450#elif defined(TARGET_SPARC)
1451 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1452 CPUSPARCState *env = &sparc_cpu->env;
1453#elif defined(TARGET_MIPS)
1454 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1455 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1456#elif defined(TARGET_TRICORE)
1457 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1458 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1459#endif
de0b36b6 1460
cb446eca 1461 cpu_synchronize_state(cpu);
de0b36b6
LC
1462
1463 info = g_malloc0(sizeof(*info));
1464 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1465 info->value->CPU = cpu->cpu_index;
182735ef 1466 info->value->current = (cpu == first_cpu);
259186a7 1467 info->value->halted = cpu->halted;
58f88d4b 1468 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1469 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1470#if defined(TARGET_I386)
1471 info->value->has_pc = true;
1472 info->value->pc = env->eip + env->segs[R_CS].base;
1473#elif defined(TARGET_PPC)
1474 info->value->has_nip = true;
1475 info->value->nip = env->nip;
1476#elif defined(TARGET_SPARC)
1477 info->value->has_pc = true;
1478 info->value->pc = env->pc;
1479 info->value->has_npc = true;
1480 info->value->npc = env->npc;
1481#elif defined(TARGET_MIPS)
1482 info->value->has_PC = true;
1483 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1484#elif defined(TARGET_TRICORE)
1485 info->value->has_PC = true;
1486 info->value->PC = env->PC;
de0b36b6
LC
1487#endif
1488
1489 /* XXX: waiting for the qapi to support GSList */
1490 if (!cur_item) {
1491 head = cur_item = info;
1492 } else {
1493 cur_item->next = info;
1494 cur_item = info;
1495 }
1496 }
1497
1498 return head;
1499}
0cfd6a9a
LC
1500
1501void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1502 bool has_cpu, int64_t cpu_index, Error **errp)
1503{
1504 FILE *f;
1505 uint32_t l;
55e5c285 1506 CPUState *cpu;
0cfd6a9a 1507 uint8_t buf[1024];
0dc9daf0 1508 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1509
1510 if (!has_cpu) {
1511 cpu_index = 0;
1512 }
1513
151d1322
AF
1514 cpu = qemu_get_cpu(cpu_index);
1515 if (cpu == NULL) {
c6bd8c70
MA
1516 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1517 "a CPU number");
0cfd6a9a
LC
1518 return;
1519 }
1520
1521 f = fopen(filename, "wb");
1522 if (!f) {
618da851 1523 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1524 return;
1525 }
1526
1527 while (size != 0) {
1528 l = sizeof(buf);
1529 if (l > size)
1530 l = size;
2f4d0f59 1531 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1532 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1533 " specified", orig_addr, orig_size);
2f4d0f59
AK
1534 goto exit;
1535 }
0cfd6a9a 1536 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1537 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1538 goto exit;
1539 }
1540 addr += l;
1541 size -= l;
1542 }
1543
1544exit:
1545 fclose(f);
1546}
6d3962bf
LC
1547
1548void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1549 Error **errp)
1550{
1551 FILE *f;
1552 uint32_t l;
1553 uint8_t buf[1024];
1554
1555 f = fopen(filename, "wb");
1556 if (!f) {
618da851 1557 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1558 return;
1559 }
1560
1561 while (size != 0) {
1562 l = sizeof(buf);
1563 if (l > size)
1564 l = size;
eb6282f2 1565 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1566 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1567 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1568 goto exit;
1569 }
1570 addr += l;
1571 size -= l;
1572 }
1573
1574exit:
1575 fclose(f);
1576}
ab49ab5c
LC
1577
1578void qmp_inject_nmi(Error **errp)
1579{
1580#if defined(TARGET_I386)
182735ef
AF
1581 CPUState *cs;
1582
bdc44640 1583 CPU_FOREACH(cs) {
182735ef 1584 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1585
02e51483 1586 if (!cpu->apic_state) {
182735ef 1587 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1588 } else {
02e51483 1589 apic_deliver_nmi(cpu->apic_state);
02c09195 1590 }
ab49ab5c
LC
1591 }
1592#else
9cb805fd 1593 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1594#endif
1595}
27498bef
ST
1596
1597void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1598{
1599 if (!use_icount) {
1600 return;
1601 }
1602
1603 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1604 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1605 if (icount_align_option) {
1606 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1607 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1608 } else {
1609 cpu_fprintf(f, "Max guest delay NA\n");
1610 cpu_fprintf(f, "Max guest advance NA\n");
1611 }
1612}