]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
vl: use QLIST_FOREACH_SAFE to visit change state handlers
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
9cb805fd 43#include "hw/nmi.h"
0ff0fc19
JK
44
45#ifndef _WIN32
1de7afc9 46#include "qemu/compatfd.h"
0ff0fc19 47#endif
296af7c9 48
6d9cb73c
JK
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
c0532a76
MT
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
6d9cb73c
JK
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif /* CONFIG_LINUX */
66
182735ef 67static CPUState *next_cpu;
27498bef
ST
68int64_t max_delay;
69int64_t max_advance;
296af7c9 70
321bc0b2
TC
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
a98ae1d8 76static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 77{
c64ca814 78 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
79 return false;
80 }
321bc0b2 81 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
82 return true;
83 }
8c2e1b00 84 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 85 kvm_halt_in_kernel()) {
ac873f1e
PM
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
182735ef 93 CPUState *cpu;
ac873f1e 94
bdc44640 95 CPU_FOREACH(cpu) {
182735ef 96 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
97 return false;
98 }
99 }
100 return true;
101}
102
946fb27c
PB
103/***********************************************************/
104/* guest cycle counter */
105
a3270e19
PB
106/* Protected by TimersState seqlock */
107
71468395 108static int64_t vm_clock_warp_start = -1;
946fb27c
PB
109/* Conversion factor from emulated instructions to virtual clock ticks. */
110static int icount_time_shift;
111/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112#define MAX_ICOUNT_SHIFT 10
a3270e19 113
946fb27c
PB
114static QEMUTimer *icount_rt_timer;
115static QEMUTimer *icount_vm_timer;
116static QEMUTimer *icount_warp_timer;
946fb27c
PB
117
118typedef struct TimersState {
cb365646 119 /* Protected by BQL. */
946fb27c
PB
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
cb365646
LPF
122
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
125 */
126 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
c96778bb
FK
130
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
946fb27c
PB
135} TimersState;
136
d9cd4007 137static TimersState timers_state;
946fb27c
PB
138
139/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b 140static int64_t cpu_get_icount_locked(void)
946fb27c
PB
141{
142 int64_t icount;
4917cf44 143 CPUState *cpu = current_cpu;
946fb27c 144
c96778bb 145 icount = timers_state.qemu_icount;
4917cf44 146 if (cpu) {
99df7dce 147 if (!cpu_can_do_io(cpu)) {
946fb27c
PB
148 fprintf(stderr, "Bad clock read\n");
149 }
28ecfd7a 150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 151 }
3f031313 152 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
153}
154
17a15f1b
PB
155int64_t cpu_get_icount(void)
156{
157 int64_t icount;
158 unsigned start;
159
160 do {
161 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
162 icount = cpu_get_icount_locked();
163 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
164
165 return icount;
166}
167
3f031313
FK
168int64_t cpu_icount_to_ns(int64_t icount)
169{
170 return icount << icount_time_shift;
171}
172
946fb27c 173/* return the host CPU cycle counter and handle stop/restart */
cb365646 174/* Caller must hold the BQL */
946fb27c
PB
175int64_t cpu_get_ticks(void)
176{
5f3e3101
PB
177 int64_t ticks;
178
946fb27c
PB
179 if (use_icount) {
180 return cpu_get_icount();
181 }
5f3e3101
PB
182
183 ticks = timers_state.cpu_ticks_offset;
184 if (timers_state.cpu_ticks_enabled) {
185 ticks += cpu_get_real_ticks();
186 }
187
188 if (timers_state.cpu_ticks_prev > ticks) {
189 /* Note: non increasing ticks may happen if the host uses
190 software suspend */
191 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
192 ticks = timers_state.cpu_ticks_prev;
946fb27c 193 }
5f3e3101
PB
194
195 timers_state.cpu_ticks_prev = ticks;
196 return ticks;
946fb27c
PB
197}
198
cb365646 199static int64_t cpu_get_clock_locked(void)
946fb27c 200{
5f3e3101 201 int64_t ticks;
cb365646 202
5f3e3101
PB
203 ticks = timers_state.cpu_clock_offset;
204 if (timers_state.cpu_ticks_enabled) {
205 ticks += get_clock();
946fb27c 206 }
cb365646 207
5f3e3101 208 return ticks;
cb365646
LPF
209}
210
211/* return the host CPU monotonic timer and handle stop/restart */
212int64_t cpu_get_clock(void)
213{
214 int64_t ti;
215 unsigned start;
216
217 do {
218 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
219 ti = cpu_get_clock_locked();
220 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
221
222 return ti;
946fb27c
PB
223}
224
c2aa5f81
ST
225/* return the offset between the host clock and virtual CPU clock */
226int64_t cpu_get_clock_offset(void)
227{
228 int64_t ti;
229 unsigned start;
230
231 do {
232 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
233 ti = timers_state.cpu_clock_offset;
234 if (!timers_state.cpu_ticks_enabled) {
235 ti -= get_clock();
236 }
237 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
238
239 return -ti;
240}
241
cb365646
LPF
242/* enable cpu_get_ticks()
243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
244 */
946fb27c
PB
245void cpu_enable_ticks(void)
246{
cb365646
LPF
247 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
248 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
249 if (!timers_state.cpu_ticks_enabled) {
250 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
251 timers_state.cpu_clock_offset -= get_clock();
252 timers_state.cpu_ticks_enabled = 1;
253 }
cb365646 254 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
255}
256
257/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
258 * cpu_get_ticks() after that.
259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
260 */
946fb27c
PB
261void cpu_disable_ticks(void)
262{
cb365646
LPF
263 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
264 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 265 if (timers_state.cpu_ticks_enabled) {
5f3e3101 266 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 267 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
268 timers_state.cpu_ticks_enabled = 0;
269 }
cb365646 270 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
271}
272
273/* Correlation between real and virtual time is always going to be
274 fairly approximate, so ignore small variation.
275 When the guest is idle real and virtual time will be aligned in
276 the IO wait loop. */
277#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
278
279static void icount_adjust(void)
280{
281 int64_t cur_time;
282 int64_t cur_icount;
283 int64_t delta;
a3270e19
PB
284
285 /* Protected by TimersState mutex. */
946fb27c 286 static int64_t last_delta;
468cc7cf 287
946fb27c
PB
288 /* If the VM is not running, then do nothing. */
289 if (!runstate_is_running()) {
290 return;
291 }
468cc7cf 292
17a15f1b
PB
293 seqlock_write_lock(&timers_state.vm_clock_seqlock);
294 cur_time = cpu_get_clock_locked();
295 cur_icount = cpu_get_icount_locked();
468cc7cf 296
946fb27c
PB
297 delta = cur_icount - cur_time;
298 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
299 if (delta > 0
300 && last_delta + ICOUNT_WOBBLE < delta * 2
301 && icount_time_shift > 0) {
302 /* The guest is getting too far ahead. Slow time down. */
303 icount_time_shift--;
304 }
305 if (delta < 0
306 && last_delta - ICOUNT_WOBBLE > delta * 2
307 && icount_time_shift < MAX_ICOUNT_SHIFT) {
308 /* The guest is getting too far behind. Speed time up. */
309 icount_time_shift++;
310 }
311 last_delta = delta;
c96778bb
FK
312 timers_state.qemu_icount_bias = cur_icount
313 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 314 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
315}
316
317static void icount_adjust_rt(void *opaque)
318{
40daca54
AB
319 timer_mod(icount_rt_timer,
320 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
946fb27c
PB
321 icount_adjust();
322}
323
324static void icount_adjust_vm(void *opaque)
325{
40daca54
AB
326 timer_mod(icount_vm_timer,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328 get_ticks_per_sec() / 10);
946fb27c
PB
329 icount_adjust();
330}
331
332static int64_t qemu_icount_round(int64_t count)
333{
334 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
335}
336
337static void icount_warp_rt(void *opaque)
338{
17a15f1b
PB
339 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
340 * changes from -1 to another value, so the race here is okay.
341 */
342 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
343 return;
344 }
345
17a15f1b 346 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 347 if (runstate_is_running()) {
40daca54 348 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
8ed961d9
PB
349 int64_t warp_delta;
350
351 warp_delta = clock - vm_clock_warp_start;
352 if (use_icount == 2) {
946fb27c 353 /*
40daca54 354 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
355 * far ahead of real time.
356 */
17a15f1b
PB
357 int64_t cur_time = cpu_get_clock_locked();
358 int64_t cur_icount = cpu_get_icount_locked();
946fb27c 359 int64_t delta = cur_time - cur_icount;
8ed961d9 360 warp_delta = MIN(warp_delta, delta);
946fb27c 361 }
c96778bb 362 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
363 }
364 vm_clock_warp_start = -1;
17a15f1b 365 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
366
367 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369 }
946fb27c
PB
370}
371
8156be56
PB
372void qtest_clock_warp(int64_t dest)
373{
40daca54 374 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56
PB
375 assert(qtest_enabled());
376 while (clock < dest) {
40daca54 377 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 378 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
17a15f1b 379 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 380 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
381 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
382
40daca54
AB
383 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
384 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 385 }
40daca54 386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
387}
388
40daca54 389void qemu_clock_warp(QEMUClockType type)
946fb27c 390{
ce78d18c 391 int64_t clock;
946fb27c
PB
392 int64_t deadline;
393
394 /*
395 * There are too many global variables to make the "warp" behavior
396 * applicable to other clocks. But a clock argument removes the
397 * need for if statements all over the place.
398 */
40daca54 399 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
400 return;
401 }
402
403 /*
40daca54
AB
404 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
405 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
406 * This also makes sure that the insn counter is synchronized before the
407 * CPU starts running, in case the CPU is woken by an event other than
40daca54 408 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
409 */
410 icount_warp_rt(NULL);
ce78d18c
PB
411 timer_del(icount_warp_timer);
412 if (!all_cpu_threads_idle()) {
946fb27c
PB
413 return;
414 }
415
8156be56
PB
416 if (qtest_enabled()) {
417 /* When testing, qtest commands advance icount. */
418 return;
419 }
420
ac70aafc 421 /* We want to use the earliest deadline from ALL vm_clocks */
ce78d18c 422 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
40daca54 423 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
424 if (deadline < 0) {
425 return;
ac70aafc
AB
426 }
427
946fb27c
PB
428 if (deadline > 0) {
429 /*
40daca54 430 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
431 * sleep. Otherwise, the CPU might be waiting for a future timer
432 * interrupt to wake it up, but the interrupt never comes because
433 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 434 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
435 *
436 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
437 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
438 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
439 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
440 * after some e"real" time, (related to the time left until the next
441 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
442 * This avoids that the warps are visible externally; for example,
443 * you will not be sending network packets continuously instead of
444 * every 100ms.
946fb27c 445 */
17a15f1b 446 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
447 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
448 vm_clock_warp_start = clock;
449 }
17a15f1b 450 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 451 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 452 } else if (deadline == 0) {
40daca54 453 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
454 }
455}
456
d09eae37
FK
457static bool icount_state_needed(void *opaque)
458{
459 return use_icount;
460}
461
462/*
463 * This is a subsection for icount migration.
464 */
465static const VMStateDescription icount_vmstate_timers = {
466 .name = "timer/icount",
467 .version_id = 1,
468 .minimum_version_id = 1,
469 .fields = (VMStateField[]) {
470 VMSTATE_INT64(qemu_icount_bias, TimersState),
471 VMSTATE_INT64(qemu_icount, TimersState),
472 VMSTATE_END_OF_LIST()
473 }
474};
475
946fb27c
PB
476static const VMStateDescription vmstate_timers = {
477 .name = "timer",
478 .version_id = 2,
479 .minimum_version_id = 1,
35d08458 480 .fields = (VMStateField[]) {
946fb27c
PB
481 VMSTATE_INT64(cpu_ticks_offset, TimersState),
482 VMSTATE_INT64(dummy, TimersState),
483 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
484 VMSTATE_END_OF_LIST()
d09eae37
FK
485 },
486 .subsections = (VMStateSubsection[]) {
487 {
488 .vmsd = &icount_vmstate_timers,
489 .needed = icount_state_needed,
490 }, {
491 /* empty */
492 }
946fb27c
PB
493 }
494};
495
1ad9580b 496void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 497{
1ad9580b 498 const char *option;
a8bfac37 499 char *rem_str = NULL;
1ad9580b 500
cb365646 501 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
946fb27c 502 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
1ad9580b 503 option = qemu_opt_get(opts, "shift");
946fb27c 504 if (!option) {
a8bfac37
ST
505 if (qemu_opt_get(opts, "align") != NULL) {
506 error_setg(errp, "Please specify shift option when using align");
507 }
946fb27c
PB
508 return;
509 }
a8bfac37 510 icount_align_option = qemu_opt_get_bool(opts, "align", false);
40daca54
AB
511 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
512 icount_warp_rt, NULL);
946fb27c 513 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
514 errno = 0;
515 icount_time_shift = strtol(option, &rem_str, 0);
516 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
517 error_setg(errp, "icount: Invalid shift value");
518 }
946fb27c
PB
519 use_icount = 1;
520 return;
a8bfac37
ST
521 } else if (icount_align_option) {
522 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
523 }
524
525 use_icount = 2;
526
527 /* 125MIPS seems a reasonable initial guess at the guest speed.
528 It will be corrected fairly quickly anyway. */
529 icount_time_shift = 3;
530
531 /* Have both realtime and virtual time triggers for speed adjustment.
532 The realtime trigger catches emulated time passing too slowly,
533 the virtual time trigger catches emulated time passing too fast.
534 Realtime triggers occur even when idle, so use them less frequently
535 than VM triggers. */
40daca54
AB
536 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
537 icount_adjust_rt, NULL);
538 timer_mod(icount_rt_timer,
539 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
540 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
541 icount_adjust_vm, NULL);
542 timer_mod(icount_vm_timer,
543 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
544 get_ticks_per_sec() / 10);
946fb27c
PB
545}
546
296af7c9
BS
547/***********************************************************/
548void hw_error(const char *fmt, ...)
549{
550 va_list ap;
55e5c285 551 CPUState *cpu;
296af7c9
BS
552
553 va_start(ap, fmt);
554 fprintf(stderr, "qemu: hardware error: ");
555 vfprintf(stderr, fmt, ap);
556 fprintf(stderr, "\n");
bdc44640 557 CPU_FOREACH(cpu) {
55e5c285 558 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 559 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
560 }
561 va_end(ap);
562 abort();
563}
564
565void cpu_synchronize_all_states(void)
566{
182735ef 567 CPUState *cpu;
296af7c9 568
bdc44640 569 CPU_FOREACH(cpu) {
182735ef 570 cpu_synchronize_state(cpu);
296af7c9
BS
571 }
572}
573
574void cpu_synchronize_all_post_reset(void)
575{
182735ef 576 CPUState *cpu;
296af7c9 577
bdc44640 578 CPU_FOREACH(cpu) {
182735ef 579 cpu_synchronize_post_reset(cpu);
296af7c9
BS
580 }
581}
582
583void cpu_synchronize_all_post_init(void)
584{
182735ef 585 CPUState *cpu;
296af7c9 586
bdc44640 587 CPU_FOREACH(cpu) {
182735ef 588 cpu_synchronize_post_init(cpu);
296af7c9
BS
589 }
590}
591
56983463 592static int do_vm_stop(RunState state)
296af7c9 593{
56983463
KW
594 int ret = 0;
595
1354869c 596 if (runstate_is_running()) {
296af7c9 597 cpu_disable_ticks();
296af7c9 598 pause_all_vcpus();
f5bbfba1 599 runstate_set(state);
1dfb4dd9 600 vm_state_notify(0, state);
a4e15de9 601 qapi_event_send_stop(&error_abort);
296af7c9 602 }
56983463 603
594a45ce
KW
604 bdrv_drain_all();
605 ret = bdrv_flush_all();
606
56983463 607 return ret;
296af7c9
BS
608}
609
a1fcaa73 610static bool cpu_can_run(CPUState *cpu)
296af7c9 611{
4fdeee7c 612 if (cpu->stop) {
a1fcaa73 613 return false;
0ab07c62 614 }
321bc0b2 615 if (cpu_is_stopped(cpu)) {
a1fcaa73 616 return false;
0ab07c62 617 }
a1fcaa73 618 return true;
296af7c9
BS
619}
620
91325046 621static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 622{
64f6b346 623 gdb_set_stop_cpu(cpu);
8cf71710 624 qemu_system_debug_request();
f324e766 625 cpu->stopped = true;
3c638d06
JK
626}
627
714bd040
PB
628static void cpu_signal(int sig)
629{
4917cf44
AF
630 if (current_cpu) {
631 cpu_exit(current_cpu);
714bd040
PB
632 }
633 exit_request = 1;
634}
714bd040 635
6d9cb73c
JK
636#ifdef CONFIG_LINUX
637static void sigbus_reraise(void)
638{
639 sigset_t set;
640 struct sigaction action;
641
642 memset(&action, 0, sizeof(action));
643 action.sa_handler = SIG_DFL;
644 if (!sigaction(SIGBUS, &action, NULL)) {
645 raise(SIGBUS);
646 sigemptyset(&set);
647 sigaddset(&set, SIGBUS);
648 sigprocmask(SIG_UNBLOCK, &set, NULL);
649 }
650 perror("Failed to re-raise SIGBUS!\n");
651 abort();
652}
653
654static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
655 void *ctx)
656{
657 if (kvm_on_sigbus(siginfo->ssi_code,
658 (void *)(intptr_t)siginfo->ssi_addr)) {
659 sigbus_reraise();
660 }
661}
662
663static void qemu_init_sigbus(void)
664{
665 struct sigaction action;
666
667 memset(&action, 0, sizeof(action));
668 action.sa_flags = SA_SIGINFO;
669 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
670 sigaction(SIGBUS, &action, NULL);
671
672 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
673}
674
290adf38 675static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
676{
677 struct timespec ts = { 0, 0 };
678 siginfo_t siginfo;
679 sigset_t waitset;
680 sigset_t chkset;
681 int r;
682
683 sigemptyset(&waitset);
684 sigaddset(&waitset, SIG_IPI);
685 sigaddset(&waitset, SIGBUS);
686
687 do {
688 r = sigtimedwait(&waitset, &siginfo, &ts);
689 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
690 perror("sigtimedwait");
691 exit(1);
692 }
693
694 switch (r) {
695 case SIGBUS:
290adf38 696 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
697 sigbus_reraise();
698 }
699 break;
700 default:
701 break;
702 }
703
704 r = sigpending(&chkset);
705 if (r == -1) {
706 perror("sigpending");
707 exit(1);
708 }
709 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
710}
711
6d9cb73c
JK
712#else /* !CONFIG_LINUX */
713
714static void qemu_init_sigbus(void)
715{
716}
1ab3c6c0 717
290adf38 718static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
719{
720}
6d9cb73c
JK
721#endif /* !CONFIG_LINUX */
722
296af7c9 723#ifndef _WIN32
55f8d6ac
JK
724static void dummy_signal(int sig)
725{
726}
55f8d6ac 727
13618e05 728static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
729{
730 int r;
731 sigset_t set;
732 struct sigaction sigact;
733
734 memset(&sigact, 0, sizeof(sigact));
735 sigact.sa_handler = dummy_signal;
736 sigaction(SIG_IPI, &sigact, NULL);
737
714bd040
PB
738 pthread_sigmask(SIG_BLOCK, NULL, &set);
739 sigdelset(&set, SIG_IPI);
714bd040 740 sigdelset(&set, SIGBUS);
491d6e80 741 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
742 if (r) {
743 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
744 exit(1);
745 }
746}
747
748static void qemu_tcg_init_cpu_signals(void)
749{
714bd040
PB
750 sigset_t set;
751 struct sigaction sigact;
752
753 memset(&sigact, 0, sizeof(sigact));
754 sigact.sa_handler = cpu_signal;
755 sigaction(SIG_IPI, &sigact, NULL);
756
757 sigemptyset(&set);
758 sigaddset(&set, SIG_IPI);
759 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
760}
761
55f8d6ac 762#else /* _WIN32 */
13618e05 763static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 764{
714bd040
PB
765 abort();
766}
ff48eb5f 767
714bd040
PB
768static void qemu_tcg_init_cpu_signals(void)
769{
ff48eb5f 770}
714bd040 771#endif /* _WIN32 */
ff48eb5f 772
b2532d88 773static QemuMutex qemu_global_mutex;
46daff13
PB
774static QemuCond qemu_io_proceeded_cond;
775static bool iothread_requesting_mutex;
296af7c9
BS
776
777static QemuThread io_thread;
778
779static QemuThread *tcg_cpu_thread;
780static QemuCond *tcg_halt_cond;
781
296af7c9
BS
782/* cpu creation */
783static QemuCond qemu_cpu_cond;
784/* system init */
296af7c9 785static QemuCond qemu_pause_cond;
e82bcec2 786static QemuCond qemu_work_cond;
296af7c9 787
d3b12f5d 788void qemu_init_cpu_loop(void)
296af7c9 789{
6d9cb73c 790 qemu_init_sigbus();
ed94592b 791 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
792 qemu_cond_init(&qemu_pause_cond);
793 qemu_cond_init(&qemu_work_cond);
46daff13 794 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 795 qemu_mutex_init(&qemu_global_mutex);
296af7c9 796
b7680cb6 797 qemu_thread_get_self(&io_thread);
296af7c9
BS
798}
799
f100f0b3 800void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
801{
802 struct qemu_work_item wi;
803
60e82579 804 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
805 func(data);
806 return;
807 }
808
809 wi.func = func;
810 wi.data = data;
3c02270d 811 wi.free = false;
c64ca814
AF
812 if (cpu->queued_work_first == NULL) {
813 cpu->queued_work_first = &wi;
0ab07c62 814 } else {
c64ca814 815 cpu->queued_work_last->next = &wi;
0ab07c62 816 }
c64ca814 817 cpu->queued_work_last = &wi;
e82bcec2
MT
818 wi.next = NULL;
819 wi.done = false;
820
c08d7424 821 qemu_cpu_kick(cpu);
e82bcec2 822 while (!wi.done) {
4917cf44 823 CPUState *self_cpu = current_cpu;
e82bcec2
MT
824
825 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 826 current_cpu = self_cpu;
e82bcec2
MT
827 }
828}
829
3c02270d
CV
830void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
831{
832 struct qemu_work_item *wi;
833
834 if (qemu_cpu_is_self(cpu)) {
835 func(data);
836 return;
837 }
838
839 wi = g_malloc0(sizeof(struct qemu_work_item));
840 wi->func = func;
841 wi->data = data;
842 wi->free = true;
843 if (cpu->queued_work_first == NULL) {
844 cpu->queued_work_first = wi;
845 } else {
846 cpu->queued_work_last->next = wi;
847 }
848 cpu->queued_work_last = wi;
849 wi->next = NULL;
850 wi->done = false;
851
852 qemu_cpu_kick(cpu);
853}
854
6d45b109 855static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
856{
857 struct qemu_work_item *wi;
858
c64ca814 859 if (cpu->queued_work_first == NULL) {
e82bcec2 860 return;
0ab07c62 861 }
e82bcec2 862
c64ca814
AF
863 while ((wi = cpu->queued_work_first)) {
864 cpu->queued_work_first = wi->next;
e82bcec2
MT
865 wi->func(wi->data);
866 wi->done = true;
3c02270d
CV
867 if (wi->free) {
868 g_free(wi);
869 }
e82bcec2 870 }
c64ca814 871 cpu->queued_work_last = NULL;
e82bcec2
MT
872 qemu_cond_broadcast(&qemu_work_cond);
873}
874
509a0d78 875static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 876{
4fdeee7c
AF
877 if (cpu->stop) {
878 cpu->stop = false;
f324e766 879 cpu->stopped = true;
296af7c9
BS
880 qemu_cond_signal(&qemu_pause_cond);
881 }
6d45b109 882 flush_queued_work(cpu);
216fc9a4 883 cpu->thread_kicked = false;
296af7c9
BS
884}
885
6cabe1f3 886static void qemu_tcg_wait_io_event(void)
296af7c9 887{
182735ef 888 CPUState *cpu;
6cabe1f3 889
16400322 890 while (all_cpu_threads_idle()) {
ab33fcda
PB
891 /* Start accounting real time to the virtual clock if the CPUs
892 are idle. */
40daca54 893 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 894 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 895 }
296af7c9 896
46daff13
PB
897 while (iothread_requesting_mutex) {
898 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
899 }
6cabe1f3 900
bdc44640 901 CPU_FOREACH(cpu) {
182735ef 902 qemu_wait_io_event_common(cpu);
6cabe1f3 903 }
296af7c9
BS
904}
905
fd529e8f 906static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 907{
a98ae1d8 908 while (cpu_thread_is_idle(cpu)) {
f5c121b8 909 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 910 }
296af7c9 911
290adf38 912 qemu_kvm_eat_signals(cpu);
509a0d78 913 qemu_wait_io_event_common(cpu);
296af7c9
BS
914}
915
7e97cd88 916static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 917{
48a106bd 918 CPUState *cpu = arg;
84b4915d 919 int r;
296af7c9 920
6164e6d6 921 qemu_mutex_lock(&qemu_global_mutex);
814e612e 922 qemu_thread_get_self(cpu->thread);
9f09e18a 923 cpu->thread_id = qemu_get_thread_id();
4917cf44 924 current_cpu = cpu;
296af7c9 925
504134d2 926 r = kvm_init_vcpu(cpu);
84b4915d
JK
927 if (r < 0) {
928 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
929 exit(1);
930 }
296af7c9 931
13618e05 932 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
933
934 /* signal CPU creation */
61a46217 935 cpu->created = true;
296af7c9
BS
936 qemu_cond_signal(&qemu_cpu_cond);
937
296af7c9 938 while (1) {
a1fcaa73 939 if (cpu_can_run(cpu)) {
1458c363 940 r = kvm_cpu_exec(cpu);
83f338f7 941 if (r == EXCP_DEBUG) {
91325046 942 cpu_handle_guest_debug(cpu);
83f338f7 943 }
0ab07c62 944 }
fd529e8f 945 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
946 }
947
948 return NULL;
949}
950
c7f0f3b1
AL
951static void *qemu_dummy_cpu_thread_fn(void *arg)
952{
953#ifdef _WIN32
954 fprintf(stderr, "qtest is not supported under Windows\n");
955 exit(1);
956#else
10a9021d 957 CPUState *cpu = arg;
c7f0f3b1
AL
958 sigset_t waitset;
959 int r;
960
961 qemu_mutex_lock_iothread();
814e612e 962 qemu_thread_get_self(cpu->thread);
9f09e18a 963 cpu->thread_id = qemu_get_thread_id();
c7f0f3b1
AL
964
965 sigemptyset(&waitset);
966 sigaddset(&waitset, SIG_IPI);
967
968 /* signal CPU creation */
61a46217 969 cpu->created = true;
c7f0f3b1
AL
970 qemu_cond_signal(&qemu_cpu_cond);
971
4917cf44 972 current_cpu = cpu;
c7f0f3b1 973 while (1) {
4917cf44 974 current_cpu = NULL;
c7f0f3b1
AL
975 qemu_mutex_unlock_iothread();
976 do {
977 int sig;
978 r = sigwait(&waitset, &sig);
979 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
980 if (r == -1) {
981 perror("sigwait");
982 exit(1);
983 }
984 qemu_mutex_lock_iothread();
4917cf44 985 current_cpu = cpu;
509a0d78 986 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
987 }
988
989 return NULL;
990#endif
991}
992
bdb7ca67
JK
993static void tcg_exec_all(void);
994
7e97cd88 995static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 996{
c3586ba7 997 CPUState *cpu = arg;
296af7c9 998
55f8d6ac 999 qemu_tcg_init_cpu_signals();
814e612e 1000 qemu_thread_get_self(cpu->thread);
296af7c9 1001
296af7c9 1002 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1003 CPU_FOREACH(cpu) {
1004 cpu->thread_id = qemu_get_thread_id();
1005 cpu->created = true;
1006 }
296af7c9
BS
1007 qemu_cond_signal(&qemu_cpu_cond);
1008
fa7d1867 1009 /* wait for initial kick-off after machine start */
bdc44640 1010 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 1011 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1012
1013 /* process any pending work */
bdc44640 1014 CPU_FOREACH(cpu) {
182735ef 1015 qemu_wait_io_event_common(cpu);
8e564b4e 1016 }
0ab07c62 1017 }
296af7c9
BS
1018
1019 while (1) {
bdb7ca67 1020 tcg_exec_all();
ac70aafc
AB
1021
1022 if (use_icount) {
40daca54 1023 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1024
1025 if (deadline == 0) {
40daca54 1026 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1027 }
3b2319a3 1028 }
6cabe1f3 1029 qemu_tcg_wait_io_event();
296af7c9
BS
1030 }
1031
1032 return NULL;
1033}
1034
2ff09a40 1035static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1036{
1037#ifndef _WIN32
1038 int err;
1039
814e612e 1040 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1041 if (err) {
1042 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1043 exit(1);
1044 }
1045#else /* _WIN32 */
60e82579 1046 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1047 CONTEXT tcgContext;
1048
1049 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1050 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1051 GetLastError());
1052 exit(1);
1053 }
1054
1055 /* On multi-core systems, we are not sure that the thread is actually
1056 * suspended until we can get the context.
1057 */
1058 tcgContext.ContextFlags = CONTEXT_CONTROL;
1059 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1060 continue;
1061 }
1062
cc015e9a 1063 cpu_signal(0);
ed9164a3
OH
1064
1065 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1066 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1067 GetLastError());
1068 exit(1);
1069 }
cc015e9a
PB
1070 }
1071#endif
1072}
1073
c08d7424 1074void qemu_cpu_kick(CPUState *cpu)
296af7c9 1075{
f5c121b8 1076 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1077 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1078 qemu_cpu_kick_thread(cpu);
216fc9a4 1079 cpu->thread_kicked = true;
aa2c364b 1080 }
296af7c9
BS
1081}
1082
46d62fac 1083void qemu_cpu_kick_self(void)
296af7c9 1084{
b55c22c6 1085#ifndef _WIN32
4917cf44 1086 assert(current_cpu);
296af7c9 1087
4917cf44
AF
1088 if (!current_cpu->thread_kicked) {
1089 qemu_cpu_kick_thread(current_cpu);
1090 current_cpu->thread_kicked = true;
296af7c9 1091 }
b55c22c6
PB
1092#else
1093 abort();
1094#endif
296af7c9
BS
1095}
1096
60e82579 1097bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1098{
814e612e 1099 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1100}
1101
aa723c23
JQ
1102static bool qemu_in_vcpu_thread(void)
1103{
4917cf44 1104 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1105}
1106
296af7c9
BS
1107void qemu_mutex_lock_iothread(void)
1108{
c7f0f3b1 1109 if (!tcg_enabled()) {
296af7c9 1110 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1111 } else {
46daff13 1112 iothread_requesting_mutex = true;
1a28cac3 1113 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1114 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1115 qemu_mutex_lock(&qemu_global_mutex);
1116 }
46daff13
PB
1117 iothread_requesting_mutex = false;
1118 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1119 }
296af7c9
BS
1120}
1121
1122void qemu_mutex_unlock_iothread(void)
1123{
1124 qemu_mutex_unlock(&qemu_global_mutex);
1125}
1126
1127static int all_vcpus_paused(void)
1128{
bdc44640 1129 CPUState *cpu;
296af7c9 1130
bdc44640 1131 CPU_FOREACH(cpu) {
182735ef 1132 if (!cpu->stopped) {
296af7c9 1133 return 0;
0ab07c62 1134 }
296af7c9
BS
1135 }
1136
1137 return 1;
1138}
1139
1140void pause_all_vcpus(void)
1141{
bdc44640 1142 CPUState *cpu;
296af7c9 1143
40daca54 1144 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1145 CPU_FOREACH(cpu) {
182735ef
AF
1146 cpu->stop = true;
1147 qemu_cpu_kick(cpu);
296af7c9
BS
1148 }
1149
aa723c23 1150 if (qemu_in_vcpu_thread()) {
d798e974
JK
1151 cpu_stop_current();
1152 if (!kvm_enabled()) {
bdc44640 1153 CPU_FOREACH(cpu) {
182735ef
AF
1154 cpu->stop = false;
1155 cpu->stopped = true;
d798e974
JK
1156 }
1157 return;
1158 }
1159 }
1160
296af7c9 1161 while (!all_vcpus_paused()) {
be7d6c57 1162 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1163 CPU_FOREACH(cpu) {
182735ef 1164 qemu_cpu_kick(cpu);
296af7c9
BS
1165 }
1166 }
1167}
1168
2993683b
IM
1169void cpu_resume(CPUState *cpu)
1170{
1171 cpu->stop = false;
1172 cpu->stopped = false;
1173 qemu_cpu_kick(cpu);
1174}
1175
296af7c9
BS
1176void resume_all_vcpus(void)
1177{
bdc44640 1178 CPUState *cpu;
296af7c9 1179
40daca54 1180 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1181 CPU_FOREACH(cpu) {
182735ef 1182 cpu_resume(cpu);
296af7c9
BS
1183 }
1184}
1185
4900116e
DDAG
1186/* For temporary buffers for forming a name */
1187#define VCPU_THREAD_NAME_SIZE 16
1188
e5ab30a2 1189static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1190{
4900116e
DDAG
1191 char thread_name[VCPU_THREAD_NAME_SIZE];
1192
09daed84
EI
1193 tcg_cpu_address_space_init(cpu, cpu->as);
1194
296af7c9
BS
1195 /* share a single thread for all cpus with TCG */
1196 if (!tcg_cpu_thread) {
814e612e 1197 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1198 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1199 qemu_cond_init(cpu->halt_cond);
1200 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1201 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1202 cpu->cpu_index);
1203 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1204 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1205#ifdef _WIN32
814e612e 1206 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1207#endif
61a46217 1208 while (!cpu->created) {
18a85728 1209 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1210 }
814e612e 1211 tcg_cpu_thread = cpu->thread;
296af7c9 1212 } else {
814e612e 1213 cpu->thread = tcg_cpu_thread;
f5c121b8 1214 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1215 }
1216}
1217
48a106bd 1218static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1219{
4900116e
DDAG
1220 char thread_name[VCPU_THREAD_NAME_SIZE];
1221
814e612e 1222 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1223 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1224 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1225 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1226 cpu->cpu_index);
1227 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1228 cpu, QEMU_THREAD_JOINABLE);
61a46217 1229 while (!cpu->created) {
18a85728 1230 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1231 }
296af7c9
BS
1232}
1233
10a9021d 1234static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1235{
4900116e
DDAG
1236 char thread_name[VCPU_THREAD_NAME_SIZE];
1237
814e612e 1238 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1239 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1240 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1241 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1242 cpu->cpu_index);
1243 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1244 QEMU_THREAD_JOINABLE);
61a46217 1245 while (!cpu->created) {
c7f0f3b1
AL
1246 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1247 }
1248}
1249
c643bed9 1250void qemu_init_vcpu(CPUState *cpu)
296af7c9 1251{
ce3960eb
AF
1252 cpu->nr_cores = smp_cores;
1253 cpu->nr_threads = smp_threads;
f324e766 1254 cpu->stopped = true;
0ab07c62 1255 if (kvm_enabled()) {
48a106bd 1256 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1257 } else if (tcg_enabled()) {
e5ab30a2 1258 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1259 } else {
10a9021d 1260 qemu_dummy_start_vcpu(cpu);
0ab07c62 1261 }
296af7c9
BS
1262}
1263
b4a3d965 1264void cpu_stop_current(void)
296af7c9 1265{
4917cf44
AF
1266 if (current_cpu) {
1267 current_cpu->stop = false;
1268 current_cpu->stopped = true;
1269 cpu_exit(current_cpu);
67bb172f 1270 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1271 }
296af7c9
BS
1272}
1273
56983463 1274int vm_stop(RunState state)
296af7c9 1275{
aa723c23 1276 if (qemu_in_vcpu_thread()) {
74892d24 1277 qemu_system_vmstop_request_prepare();
1dfb4dd9 1278 qemu_system_vmstop_request(state);
296af7c9
BS
1279 /*
1280 * FIXME: should not return to device code in case
1281 * vm_stop() has been requested.
1282 */
b4a3d965 1283 cpu_stop_current();
56983463 1284 return 0;
296af7c9 1285 }
56983463
KW
1286
1287 return do_vm_stop(state);
296af7c9
BS
1288}
1289
8a9236f1
LC
1290/* does a state transition even if the VM is already stopped,
1291 current state is forgotten forever */
56983463 1292int vm_stop_force_state(RunState state)
8a9236f1
LC
1293{
1294 if (runstate_is_running()) {
56983463 1295 return vm_stop(state);
8a9236f1
LC
1296 } else {
1297 runstate_set(state);
594a45ce
KW
1298 /* Make sure to return an error if the flush in a previous vm_stop()
1299 * failed. */
1300 return bdrv_flush_all();
8a9236f1
LC
1301 }
1302}
1303
9349b4f9 1304static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1305{
efee7340 1306 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1307 int ret;
1308#ifdef CONFIG_PROFILER
1309 int64_t ti;
1310#endif
1311
1312#ifdef CONFIG_PROFILER
1313 ti = profile_getclock();
1314#endif
1315 if (use_icount) {
1316 int64_t count;
ac70aafc 1317 int64_t deadline;
296af7c9 1318 int decr;
c96778bb
FK
1319 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1320 + cpu->icount_extra);
28ecfd7a 1321 cpu->icount_decr.u16.low = 0;
efee7340 1322 cpu->icount_extra = 0;
40daca54 1323 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1324
1325 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1326 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1327 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1328 * nanoseconds.
1329 */
1330 if ((deadline < 0) || (deadline > INT32_MAX)) {
1331 deadline = INT32_MAX;
1332 }
1333
1334 count = qemu_icount_round(deadline);
c96778bb 1335 timers_state.qemu_icount += count;
296af7c9
BS
1336 decr = (count > 0xffff) ? 0xffff : count;
1337 count -= decr;
28ecfd7a 1338 cpu->icount_decr.u16.low = decr;
efee7340 1339 cpu->icount_extra = count;
296af7c9
BS
1340 }
1341 ret = cpu_exec(env);
1342#ifdef CONFIG_PROFILER
1343 qemu_time += profile_getclock() - ti;
1344#endif
1345 if (use_icount) {
1346 /* Fold pending instructions back into the
1347 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1348 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1349 + cpu->icount_extra);
28ecfd7a 1350 cpu->icount_decr.u32 = 0;
efee7340 1351 cpu->icount_extra = 0;
296af7c9
BS
1352 }
1353 return ret;
1354}
1355
bdb7ca67 1356static void tcg_exec_all(void)
296af7c9 1357{
9a36085b
JK
1358 int r;
1359
40daca54
AB
1360 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1361 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1362
0ab07c62 1363 if (next_cpu == NULL) {
296af7c9 1364 next_cpu = first_cpu;
0ab07c62 1365 }
bdc44640 1366 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1367 CPUState *cpu = next_cpu;
1368 CPUArchState *env = cpu->env_ptr;
296af7c9 1369
40daca54 1370 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1371 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1372
a1fcaa73 1373 if (cpu_can_run(cpu)) {
bdb7ca67 1374 r = tcg_cpu_exec(env);
9a36085b 1375 if (r == EXCP_DEBUG) {
91325046 1376 cpu_handle_guest_debug(cpu);
3c638d06
JK
1377 break;
1378 }
f324e766 1379 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1380 break;
1381 }
1382 }
c629a4bc 1383 exit_request = 0;
296af7c9
BS
1384}
1385
9a78eead 1386void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1387{
1388 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1389#if defined(cpu_list)
1390 cpu_list(f, cpu_fprintf);
262353cb
BS
1391#endif
1392}
de0b36b6
LC
1393
1394CpuInfoList *qmp_query_cpus(Error **errp)
1395{
1396 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1397 CPUState *cpu;
de0b36b6 1398
bdc44640 1399 CPU_FOREACH(cpu) {
de0b36b6 1400 CpuInfoList *info;
182735ef
AF
1401#if defined(TARGET_I386)
1402 X86CPU *x86_cpu = X86_CPU(cpu);
1403 CPUX86State *env = &x86_cpu->env;
1404#elif defined(TARGET_PPC)
1405 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1406 CPUPPCState *env = &ppc_cpu->env;
1407#elif defined(TARGET_SPARC)
1408 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1409 CPUSPARCState *env = &sparc_cpu->env;
1410#elif defined(TARGET_MIPS)
1411 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1412 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1413#elif defined(TARGET_TRICORE)
1414 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1415 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1416#endif
de0b36b6 1417
cb446eca 1418 cpu_synchronize_state(cpu);
de0b36b6
LC
1419
1420 info = g_malloc0(sizeof(*info));
1421 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1422 info->value->CPU = cpu->cpu_index;
182735ef 1423 info->value->current = (cpu == first_cpu);
259186a7 1424 info->value->halted = cpu->halted;
9f09e18a 1425 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1426#if defined(TARGET_I386)
1427 info->value->has_pc = true;
1428 info->value->pc = env->eip + env->segs[R_CS].base;
1429#elif defined(TARGET_PPC)
1430 info->value->has_nip = true;
1431 info->value->nip = env->nip;
1432#elif defined(TARGET_SPARC)
1433 info->value->has_pc = true;
1434 info->value->pc = env->pc;
1435 info->value->has_npc = true;
1436 info->value->npc = env->npc;
1437#elif defined(TARGET_MIPS)
1438 info->value->has_PC = true;
1439 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1440#elif defined(TARGET_TRICORE)
1441 info->value->has_PC = true;
1442 info->value->PC = env->PC;
de0b36b6
LC
1443#endif
1444
1445 /* XXX: waiting for the qapi to support GSList */
1446 if (!cur_item) {
1447 head = cur_item = info;
1448 } else {
1449 cur_item->next = info;
1450 cur_item = info;
1451 }
1452 }
1453
1454 return head;
1455}
0cfd6a9a
LC
1456
1457void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1458 bool has_cpu, int64_t cpu_index, Error **errp)
1459{
1460 FILE *f;
1461 uint32_t l;
55e5c285 1462 CPUState *cpu;
0cfd6a9a
LC
1463 uint8_t buf[1024];
1464
1465 if (!has_cpu) {
1466 cpu_index = 0;
1467 }
1468
151d1322
AF
1469 cpu = qemu_get_cpu(cpu_index);
1470 if (cpu == NULL) {
0cfd6a9a
LC
1471 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1472 "a CPU number");
1473 return;
1474 }
1475
1476 f = fopen(filename, "wb");
1477 if (!f) {
618da851 1478 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1479 return;
1480 }
1481
1482 while (size != 0) {
1483 l = sizeof(buf);
1484 if (l > size)
1485 l = size;
2f4d0f59
AK
1486 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1487 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1488 goto exit;
1489 }
0cfd6a9a
LC
1490 if (fwrite(buf, 1, l, f) != l) {
1491 error_set(errp, QERR_IO_ERROR);
1492 goto exit;
1493 }
1494 addr += l;
1495 size -= l;
1496 }
1497
1498exit:
1499 fclose(f);
1500}
6d3962bf
LC
1501
1502void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1503 Error **errp)
1504{
1505 FILE *f;
1506 uint32_t l;
1507 uint8_t buf[1024];
1508
1509 f = fopen(filename, "wb");
1510 if (!f) {
618da851 1511 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1512 return;
1513 }
1514
1515 while (size != 0) {
1516 l = sizeof(buf);
1517 if (l > size)
1518 l = size;
eb6282f2 1519 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1520 if (fwrite(buf, 1, l, f) != l) {
1521 error_set(errp, QERR_IO_ERROR);
1522 goto exit;
1523 }
1524 addr += l;
1525 size -= l;
1526 }
1527
1528exit:
1529 fclose(f);
1530}
ab49ab5c
LC
1531
1532void qmp_inject_nmi(Error **errp)
1533{
1534#if defined(TARGET_I386)
182735ef
AF
1535 CPUState *cs;
1536
bdc44640 1537 CPU_FOREACH(cs) {
182735ef 1538 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1539
02e51483 1540 if (!cpu->apic_state) {
182735ef 1541 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1542 } else {
02e51483 1543 apic_deliver_nmi(cpu->apic_state);
02c09195 1544 }
ab49ab5c
LC
1545 }
1546#else
9cb805fd 1547 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1548#endif
1549}
27498bef
ST
1550
1551void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1552{
1553 if (!use_icount) {
1554 return;
1555 }
1556
1557 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1558 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1559 if (icount_align_option) {
1560 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1561 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1562 } else {
1563 cpu_fprintf(f, "Max guest delay NA\n");
1564 cpu_fprintf(f, "Max guest advance NA\n");
1565 }
1566}