]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
hw/timer/imx_*: fix TIMER_MAX clash with system symbol
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
0ff0fc19
JK
43
44#ifndef _WIN32
1de7afc9 45#include "qemu/compatfd.h"
0ff0fc19 46#endif
296af7c9 47
6d9cb73c
JK
48#ifdef CONFIG_LINUX
49
50#include <sys/prctl.h>
51
c0532a76
MT
52#ifndef PR_MCE_KILL
53#define PR_MCE_KILL 33
54#endif
55
6d9cb73c
JK
56#ifndef PR_MCE_KILL_SET
57#define PR_MCE_KILL_SET 1
58#endif
59
60#ifndef PR_MCE_KILL_EARLY
61#define PR_MCE_KILL_EARLY 1
62#endif
63
64#endif /* CONFIG_LINUX */
65
182735ef 66static CPUState *next_cpu;
27498bef
ST
67int64_t max_delay;
68int64_t max_advance;
296af7c9 69
321bc0b2
TC
70bool cpu_is_stopped(CPUState *cpu)
71{
72 return cpu->stopped || !runstate_is_running();
73}
74
a98ae1d8 75static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 76{
c64ca814 77 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
78 return false;
79 }
321bc0b2 80 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
81 return true;
82 }
8c2e1b00 83 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 84 kvm_halt_in_kernel()) {
ac873f1e
PM
85 return false;
86 }
87 return true;
88}
89
90static bool all_cpu_threads_idle(void)
91{
182735ef 92 CPUState *cpu;
ac873f1e 93
bdc44640 94 CPU_FOREACH(cpu) {
182735ef 95 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
96 return false;
97 }
98 }
99 return true;
100}
101
946fb27c
PB
102/***********************************************************/
103/* guest cycle counter */
104
a3270e19
PB
105/* Protected by TimersState seqlock */
106
71468395 107static int64_t vm_clock_warp_start = -1;
946fb27c
PB
108/* Conversion factor from emulated instructions to virtual clock ticks. */
109static int icount_time_shift;
110/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
111#define MAX_ICOUNT_SHIFT 10
a3270e19 112
946fb27c
PB
113static QEMUTimer *icount_rt_timer;
114static QEMUTimer *icount_vm_timer;
115static QEMUTimer *icount_warp_timer;
946fb27c
PB
116
117typedef struct TimersState {
cb365646 118 /* Protected by BQL. */
946fb27c
PB
119 int64_t cpu_ticks_prev;
120 int64_t cpu_ticks_offset;
cb365646
LPF
121
122 /* cpu_clock_offset can be read out of BQL, so protect it with
123 * this lock.
124 */
125 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
126 int64_t cpu_clock_offset;
127 int32_t cpu_ticks_enabled;
128 int64_t dummy;
c96778bb
FK
129
130 /* Compensate for varying guest execution speed. */
131 int64_t qemu_icount_bias;
132 /* Only written by TCG thread */
133 int64_t qemu_icount;
946fb27c
PB
134} TimersState;
135
d9cd4007 136static TimersState timers_state;
946fb27c
PB
137
138/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b 139static int64_t cpu_get_icount_locked(void)
946fb27c
PB
140{
141 int64_t icount;
4917cf44 142 CPUState *cpu = current_cpu;
946fb27c 143
c96778bb 144 icount = timers_state.qemu_icount;
4917cf44 145 if (cpu) {
99df7dce 146 if (!cpu_can_do_io(cpu)) {
946fb27c
PB
147 fprintf(stderr, "Bad clock read\n");
148 }
28ecfd7a 149 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 150 }
3f031313 151 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
152}
153
17a15f1b
PB
154int64_t cpu_get_icount(void)
155{
156 int64_t icount;
157 unsigned start;
158
159 do {
160 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
161 icount = cpu_get_icount_locked();
162 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
163
164 return icount;
165}
166
3f031313
FK
167int64_t cpu_icount_to_ns(int64_t icount)
168{
169 return icount << icount_time_shift;
170}
171
946fb27c 172/* return the host CPU cycle counter and handle stop/restart */
cb365646 173/* Caller must hold the BQL */
946fb27c
PB
174int64_t cpu_get_ticks(void)
175{
5f3e3101
PB
176 int64_t ticks;
177
946fb27c
PB
178 if (use_icount) {
179 return cpu_get_icount();
180 }
5f3e3101
PB
181
182 ticks = timers_state.cpu_ticks_offset;
183 if (timers_state.cpu_ticks_enabled) {
184 ticks += cpu_get_real_ticks();
185 }
186
187 if (timers_state.cpu_ticks_prev > ticks) {
188 /* Note: non increasing ticks may happen if the host uses
189 software suspend */
190 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
191 ticks = timers_state.cpu_ticks_prev;
946fb27c 192 }
5f3e3101
PB
193
194 timers_state.cpu_ticks_prev = ticks;
195 return ticks;
946fb27c
PB
196}
197
cb365646 198static int64_t cpu_get_clock_locked(void)
946fb27c 199{
5f3e3101 200 int64_t ticks;
cb365646 201
5f3e3101
PB
202 ticks = timers_state.cpu_clock_offset;
203 if (timers_state.cpu_ticks_enabled) {
204 ticks += get_clock();
946fb27c 205 }
cb365646 206
5f3e3101 207 return ticks;
cb365646
LPF
208}
209
210/* return the host CPU monotonic timer and handle stop/restart */
211int64_t cpu_get_clock(void)
212{
213 int64_t ti;
214 unsigned start;
215
216 do {
217 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
218 ti = cpu_get_clock_locked();
219 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
220
221 return ti;
946fb27c
PB
222}
223
c2aa5f81
ST
224/* return the offset between the host clock and virtual CPU clock */
225int64_t cpu_get_clock_offset(void)
226{
227 int64_t ti;
228 unsigned start;
229
230 do {
231 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
232 ti = timers_state.cpu_clock_offset;
233 if (!timers_state.cpu_ticks_enabled) {
234 ti -= get_clock();
235 }
236 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
237
238 return -ti;
239}
240
cb365646
LPF
241/* enable cpu_get_ticks()
242 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
243 */
946fb27c
PB
244void cpu_enable_ticks(void)
245{
cb365646
LPF
246 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
247 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
248 if (!timers_state.cpu_ticks_enabled) {
249 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
250 timers_state.cpu_clock_offset -= get_clock();
251 timers_state.cpu_ticks_enabled = 1;
252 }
cb365646 253 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
254}
255
256/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
257 * cpu_get_ticks() after that.
258 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
259 */
946fb27c
PB
260void cpu_disable_ticks(void)
261{
cb365646
LPF
262 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
263 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 264 if (timers_state.cpu_ticks_enabled) {
5f3e3101 265 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 266 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
267 timers_state.cpu_ticks_enabled = 0;
268 }
cb365646 269 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
270}
271
272/* Correlation between real and virtual time is always going to be
273 fairly approximate, so ignore small variation.
274 When the guest is idle real and virtual time will be aligned in
275 the IO wait loop. */
276#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
277
278static void icount_adjust(void)
279{
280 int64_t cur_time;
281 int64_t cur_icount;
282 int64_t delta;
a3270e19
PB
283
284 /* Protected by TimersState mutex. */
946fb27c 285 static int64_t last_delta;
468cc7cf 286
946fb27c
PB
287 /* If the VM is not running, then do nothing. */
288 if (!runstate_is_running()) {
289 return;
290 }
468cc7cf 291
17a15f1b
PB
292 seqlock_write_lock(&timers_state.vm_clock_seqlock);
293 cur_time = cpu_get_clock_locked();
294 cur_icount = cpu_get_icount_locked();
468cc7cf 295
946fb27c
PB
296 delta = cur_icount - cur_time;
297 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
298 if (delta > 0
299 && last_delta + ICOUNT_WOBBLE < delta * 2
300 && icount_time_shift > 0) {
301 /* The guest is getting too far ahead. Slow time down. */
302 icount_time_shift--;
303 }
304 if (delta < 0
305 && last_delta - ICOUNT_WOBBLE > delta * 2
306 && icount_time_shift < MAX_ICOUNT_SHIFT) {
307 /* The guest is getting too far behind. Speed time up. */
308 icount_time_shift++;
309 }
310 last_delta = delta;
c96778bb
FK
311 timers_state.qemu_icount_bias = cur_icount
312 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 313 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
314}
315
316static void icount_adjust_rt(void *opaque)
317{
40daca54
AB
318 timer_mod(icount_rt_timer,
319 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
946fb27c
PB
320 icount_adjust();
321}
322
323static void icount_adjust_vm(void *opaque)
324{
40daca54
AB
325 timer_mod(icount_vm_timer,
326 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
327 get_ticks_per_sec() / 10);
946fb27c
PB
328 icount_adjust();
329}
330
331static int64_t qemu_icount_round(int64_t count)
332{
333 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
334}
335
336static void icount_warp_rt(void *opaque)
337{
17a15f1b
PB
338 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
339 * changes from -1 to another value, so the race here is okay.
340 */
341 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
342 return;
343 }
344
17a15f1b 345 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 346 if (runstate_is_running()) {
40daca54 347 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
8ed961d9
PB
348 int64_t warp_delta;
349
350 warp_delta = clock - vm_clock_warp_start;
351 if (use_icount == 2) {
946fb27c 352 /*
40daca54 353 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
354 * far ahead of real time.
355 */
17a15f1b
PB
356 int64_t cur_time = cpu_get_clock_locked();
357 int64_t cur_icount = cpu_get_icount_locked();
946fb27c 358 int64_t delta = cur_time - cur_icount;
8ed961d9 359 warp_delta = MIN(warp_delta, delta);
946fb27c 360 }
c96778bb 361 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
362 }
363 vm_clock_warp_start = -1;
17a15f1b 364 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
365
366 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
367 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
368 }
946fb27c
PB
369}
370
8156be56
PB
371void qtest_clock_warp(int64_t dest)
372{
40daca54 373 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56
PB
374 assert(qtest_enabled());
375 while (clock < dest) {
40daca54 376 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 377 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
17a15f1b 378 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 379 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
380 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
381
40daca54
AB
382 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
383 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 384 }
40daca54 385 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
386}
387
40daca54 388void qemu_clock_warp(QEMUClockType type)
946fb27c 389{
ce78d18c 390 int64_t clock;
946fb27c
PB
391 int64_t deadline;
392
393 /*
394 * There are too many global variables to make the "warp" behavior
395 * applicable to other clocks. But a clock argument removes the
396 * need for if statements all over the place.
397 */
40daca54 398 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
399 return;
400 }
401
402 /*
40daca54
AB
403 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
404 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
405 * This also makes sure that the insn counter is synchronized before the
406 * CPU starts running, in case the CPU is woken by an event other than
40daca54 407 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
408 */
409 icount_warp_rt(NULL);
ce78d18c
PB
410 timer_del(icount_warp_timer);
411 if (!all_cpu_threads_idle()) {
946fb27c
PB
412 return;
413 }
414
8156be56
PB
415 if (qtest_enabled()) {
416 /* When testing, qtest commands advance icount. */
417 return;
418 }
419
ac70aafc 420 /* We want to use the earliest deadline from ALL vm_clocks */
ce78d18c 421 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
40daca54 422 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
423 if (deadline < 0) {
424 return;
ac70aafc
AB
425 }
426
946fb27c
PB
427 if (deadline > 0) {
428 /*
40daca54 429 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
430 * sleep. Otherwise, the CPU might be waiting for a future timer
431 * interrupt to wake it up, but the interrupt never comes because
432 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 433 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
434 *
435 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
436 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
437 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
438 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
439 * after some e"real" time, (related to the time left until the next
440 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
441 * This avoids that the warps are visible externally; for example,
442 * you will not be sending network packets continuously instead of
443 * every 100ms.
946fb27c 444 */
17a15f1b 445 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
446 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
447 vm_clock_warp_start = clock;
448 }
17a15f1b 449 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 450 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 451 } else if (deadline == 0) {
40daca54 452 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
453 }
454}
455
d09eae37
FK
456static bool icount_state_needed(void *opaque)
457{
458 return use_icount;
459}
460
461/*
462 * This is a subsection for icount migration.
463 */
464static const VMStateDescription icount_vmstate_timers = {
465 .name = "timer/icount",
466 .version_id = 1,
467 .minimum_version_id = 1,
468 .fields = (VMStateField[]) {
469 VMSTATE_INT64(qemu_icount_bias, TimersState),
470 VMSTATE_INT64(qemu_icount, TimersState),
471 VMSTATE_END_OF_LIST()
472 }
473};
474
946fb27c
PB
475static const VMStateDescription vmstate_timers = {
476 .name = "timer",
477 .version_id = 2,
478 .minimum_version_id = 1,
35d08458 479 .fields = (VMStateField[]) {
946fb27c
PB
480 VMSTATE_INT64(cpu_ticks_offset, TimersState),
481 VMSTATE_INT64(dummy, TimersState),
482 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
483 VMSTATE_END_OF_LIST()
d09eae37
FK
484 },
485 .subsections = (VMStateSubsection[]) {
486 {
487 .vmsd = &icount_vmstate_timers,
488 .needed = icount_state_needed,
489 }, {
490 /* empty */
491 }
946fb27c
PB
492 }
493};
494
1ad9580b 495void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 496{
1ad9580b 497 const char *option;
a8bfac37 498 char *rem_str = NULL;
1ad9580b 499
cb365646 500 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
946fb27c 501 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
1ad9580b 502 option = qemu_opt_get(opts, "shift");
946fb27c 503 if (!option) {
a8bfac37
ST
504 if (qemu_opt_get(opts, "align") != NULL) {
505 error_setg(errp, "Please specify shift option when using align");
506 }
946fb27c
PB
507 return;
508 }
a8bfac37 509 icount_align_option = qemu_opt_get_bool(opts, "align", false);
40daca54
AB
510 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
511 icount_warp_rt, NULL);
946fb27c 512 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
513 errno = 0;
514 icount_time_shift = strtol(option, &rem_str, 0);
515 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
516 error_setg(errp, "icount: Invalid shift value");
517 }
946fb27c
PB
518 use_icount = 1;
519 return;
a8bfac37
ST
520 } else if (icount_align_option) {
521 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
522 }
523
524 use_icount = 2;
525
526 /* 125MIPS seems a reasonable initial guess at the guest speed.
527 It will be corrected fairly quickly anyway. */
528 icount_time_shift = 3;
529
530 /* Have both realtime and virtual time triggers for speed adjustment.
531 The realtime trigger catches emulated time passing too slowly,
532 the virtual time trigger catches emulated time passing too fast.
533 Realtime triggers occur even when idle, so use them less frequently
534 than VM triggers. */
40daca54
AB
535 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
536 icount_adjust_rt, NULL);
537 timer_mod(icount_rt_timer,
538 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
539 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
540 icount_adjust_vm, NULL);
541 timer_mod(icount_vm_timer,
542 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
543 get_ticks_per_sec() / 10);
946fb27c
PB
544}
545
296af7c9
BS
546/***********************************************************/
547void hw_error(const char *fmt, ...)
548{
549 va_list ap;
55e5c285 550 CPUState *cpu;
296af7c9
BS
551
552 va_start(ap, fmt);
553 fprintf(stderr, "qemu: hardware error: ");
554 vfprintf(stderr, fmt, ap);
555 fprintf(stderr, "\n");
bdc44640 556 CPU_FOREACH(cpu) {
55e5c285 557 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 558 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
559 }
560 va_end(ap);
561 abort();
562}
563
564void cpu_synchronize_all_states(void)
565{
182735ef 566 CPUState *cpu;
296af7c9 567
bdc44640 568 CPU_FOREACH(cpu) {
182735ef 569 cpu_synchronize_state(cpu);
296af7c9
BS
570 }
571}
572
573void cpu_synchronize_all_post_reset(void)
574{
182735ef 575 CPUState *cpu;
296af7c9 576
bdc44640 577 CPU_FOREACH(cpu) {
182735ef 578 cpu_synchronize_post_reset(cpu);
296af7c9
BS
579 }
580}
581
582void cpu_synchronize_all_post_init(void)
583{
182735ef 584 CPUState *cpu;
296af7c9 585
bdc44640 586 CPU_FOREACH(cpu) {
182735ef 587 cpu_synchronize_post_init(cpu);
296af7c9
BS
588 }
589}
590
56983463 591static int do_vm_stop(RunState state)
296af7c9 592{
56983463
KW
593 int ret = 0;
594
1354869c 595 if (runstate_is_running()) {
296af7c9 596 cpu_disable_ticks();
296af7c9 597 pause_all_vcpus();
f5bbfba1 598 runstate_set(state);
1dfb4dd9 599 vm_state_notify(0, state);
a4e15de9 600 qapi_event_send_stop(&error_abort);
296af7c9 601 }
56983463 602
594a45ce
KW
603 bdrv_drain_all();
604 ret = bdrv_flush_all();
605
56983463 606 return ret;
296af7c9
BS
607}
608
a1fcaa73 609static bool cpu_can_run(CPUState *cpu)
296af7c9 610{
4fdeee7c 611 if (cpu->stop) {
a1fcaa73 612 return false;
0ab07c62 613 }
321bc0b2 614 if (cpu_is_stopped(cpu)) {
a1fcaa73 615 return false;
0ab07c62 616 }
a1fcaa73 617 return true;
296af7c9
BS
618}
619
91325046 620static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 621{
64f6b346 622 gdb_set_stop_cpu(cpu);
8cf71710 623 qemu_system_debug_request();
f324e766 624 cpu->stopped = true;
3c638d06
JK
625}
626
714bd040
PB
627static void cpu_signal(int sig)
628{
4917cf44
AF
629 if (current_cpu) {
630 cpu_exit(current_cpu);
714bd040
PB
631 }
632 exit_request = 1;
633}
714bd040 634
6d9cb73c
JK
635#ifdef CONFIG_LINUX
636static void sigbus_reraise(void)
637{
638 sigset_t set;
639 struct sigaction action;
640
641 memset(&action, 0, sizeof(action));
642 action.sa_handler = SIG_DFL;
643 if (!sigaction(SIGBUS, &action, NULL)) {
644 raise(SIGBUS);
645 sigemptyset(&set);
646 sigaddset(&set, SIGBUS);
647 sigprocmask(SIG_UNBLOCK, &set, NULL);
648 }
649 perror("Failed to re-raise SIGBUS!\n");
650 abort();
651}
652
653static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
654 void *ctx)
655{
656 if (kvm_on_sigbus(siginfo->ssi_code,
657 (void *)(intptr_t)siginfo->ssi_addr)) {
658 sigbus_reraise();
659 }
660}
661
662static void qemu_init_sigbus(void)
663{
664 struct sigaction action;
665
666 memset(&action, 0, sizeof(action));
667 action.sa_flags = SA_SIGINFO;
668 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
669 sigaction(SIGBUS, &action, NULL);
670
671 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
672}
673
290adf38 674static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
675{
676 struct timespec ts = { 0, 0 };
677 siginfo_t siginfo;
678 sigset_t waitset;
679 sigset_t chkset;
680 int r;
681
682 sigemptyset(&waitset);
683 sigaddset(&waitset, SIG_IPI);
684 sigaddset(&waitset, SIGBUS);
685
686 do {
687 r = sigtimedwait(&waitset, &siginfo, &ts);
688 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
689 perror("sigtimedwait");
690 exit(1);
691 }
692
693 switch (r) {
694 case SIGBUS:
290adf38 695 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
696 sigbus_reraise();
697 }
698 break;
699 default:
700 break;
701 }
702
703 r = sigpending(&chkset);
704 if (r == -1) {
705 perror("sigpending");
706 exit(1);
707 }
708 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
709}
710
6d9cb73c
JK
711#else /* !CONFIG_LINUX */
712
713static void qemu_init_sigbus(void)
714{
715}
1ab3c6c0 716
290adf38 717static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
718{
719}
6d9cb73c
JK
720#endif /* !CONFIG_LINUX */
721
296af7c9 722#ifndef _WIN32
55f8d6ac
JK
723static void dummy_signal(int sig)
724{
725}
55f8d6ac 726
13618e05 727static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
728{
729 int r;
730 sigset_t set;
731 struct sigaction sigact;
732
733 memset(&sigact, 0, sizeof(sigact));
734 sigact.sa_handler = dummy_signal;
735 sigaction(SIG_IPI, &sigact, NULL);
736
714bd040
PB
737 pthread_sigmask(SIG_BLOCK, NULL, &set);
738 sigdelset(&set, SIG_IPI);
714bd040 739 sigdelset(&set, SIGBUS);
491d6e80 740 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
741 if (r) {
742 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
743 exit(1);
744 }
745}
746
747static void qemu_tcg_init_cpu_signals(void)
748{
714bd040
PB
749 sigset_t set;
750 struct sigaction sigact;
751
752 memset(&sigact, 0, sizeof(sigact));
753 sigact.sa_handler = cpu_signal;
754 sigaction(SIG_IPI, &sigact, NULL);
755
756 sigemptyset(&set);
757 sigaddset(&set, SIG_IPI);
758 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
759}
760
55f8d6ac 761#else /* _WIN32 */
13618e05 762static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 763{
714bd040
PB
764 abort();
765}
ff48eb5f 766
714bd040
PB
767static void qemu_tcg_init_cpu_signals(void)
768{
ff48eb5f 769}
714bd040 770#endif /* _WIN32 */
ff48eb5f 771
b2532d88 772static QemuMutex qemu_global_mutex;
46daff13
PB
773static QemuCond qemu_io_proceeded_cond;
774static bool iothread_requesting_mutex;
296af7c9
BS
775
776static QemuThread io_thread;
777
778static QemuThread *tcg_cpu_thread;
779static QemuCond *tcg_halt_cond;
780
296af7c9
BS
781/* cpu creation */
782static QemuCond qemu_cpu_cond;
783/* system init */
296af7c9 784static QemuCond qemu_pause_cond;
e82bcec2 785static QemuCond qemu_work_cond;
296af7c9 786
d3b12f5d 787void qemu_init_cpu_loop(void)
296af7c9 788{
6d9cb73c 789 qemu_init_sigbus();
ed94592b 790 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
791 qemu_cond_init(&qemu_pause_cond);
792 qemu_cond_init(&qemu_work_cond);
46daff13 793 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 794 qemu_mutex_init(&qemu_global_mutex);
296af7c9 795
b7680cb6 796 qemu_thread_get_self(&io_thread);
296af7c9
BS
797}
798
f100f0b3 799void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
800{
801 struct qemu_work_item wi;
802
60e82579 803 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
804 func(data);
805 return;
806 }
807
808 wi.func = func;
809 wi.data = data;
3c02270d 810 wi.free = false;
c64ca814
AF
811 if (cpu->queued_work_first == NULL) {
812 cpu->queued_work_first = &wi;
0ab07c62 813 } else {
c64ca814 814 cpu->queued_work_last->next = &wi;
0ab07c62 815 }
c64ca814 816 cpu->queued_work_last = &wi;
e82bcec2
MT
817 wi.next = NULL;
818 wi.done = false;
819
c08d7424 820 qemu_cpu_kick(cpu);
e82bcec2 821 while (!wi.done) {
4917cf44 822 CPUState *self_cpu = current_cpu;
e82bcec2
MT
823
824 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 825 current_cpu = self_cpu;
e82bcec2
MT
826 }
827}
828
3c02270d
CV
829void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
830{
831 struct qemu_work_item *wi;
832
833 if (qemu_cpu_is_self(cpu)) {
834 func(data);
835 return;
836 }
837
838 wi = g_malloc0(sizeof(struct qemu_work_item));
839 wi->func = func;
840 wi->data = data;
841 wi->free = true;
842 if (cpu->queued_work_first == NULL) {
843 cpu->queued_work_first = wi;
844 } else {
845 cpu->queued_work_last->next = wi;
846 }
847 cpu->queued_work_last = wi;
848 wi->next = NULL;
849 wi->done = false;
850
851 qemu_cpu_kick(cpu);
852}
853
6d45b109 854static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
855{
856 struct qemu_work_item *wi;
857
c64ca814 858 if (cpu->queued_work_first == NULL) {
e82bcec2 859 return;
0ab07c62 860 }
e82bcec2 861
c64ca814
AF
862 while ((wi = cpu->queued_work_first)) {
863 cpu->queued_work_first = wi->next;
e82bcec2
MT
864 wi->func(wi->data);
865 wi->done = true;
3c02270d
CV
866 if (wi->free) {
867 g_free(wi);
868 }
e82bcec2 869 }
c64ca814 870 cpu->queued_work_last = NULL;
e82bcec2
MT
871 qemu_cond_broadcast(&qemu_work_cond);
872}
873
509a0d78 874static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 875{
4fdeee7c
AF
876 if (cpu->stop) {
877 cpu->stop = false;
f324e766 878 cpu->stopped = true;
296af7c9
BS
879 qemu_cond_signal(&qemu_pause_cond);
880 }
6d45b109 881 flush_queued_work(cpu);
216fc9a4 882 cpu->thread_kicked = false;
296af7c9
BS
883}
884
6cabe1f3 885static void qemu_tcg_wait_io_event(void)
296af7c9 886{
182735ef 887 CPUState *cpu;
6cabe1f3 888
16400322 889 while (all_cpu_threads_idle()) {
ab33fcda
PB
890 /* Start accounting real time to the virtual clock if the CPUs
891 are idle. */
40daca54 892 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 893 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 894 }
296af7c9 895
46daff13
PB
896 while (iothread_requesting_mutex) {
897 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
898 }
6cabe1f3 899
bdc44640 900 CPU_FOREACH(cpu) {
182735ef 901 qemu_wait_io_event_common(cpu);
6cabe1f3 902 }
296af7c9
BS
903}
904
fd529e8f 905static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 906{
a98ae1d8 907 while (cpu_thread_is_idle(cpu)) {
f5c121b8 908 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 909 }
296af7c9 910
290adf38 911 qemu_kvm_eat_signals(cpu);
509a0d78 912 qemu_wait_io_event_common(cpu);
296af7c9
BS
913}
914
7e97cd88 915static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 916{
48a106bd 917 CPUState *cpu = arg;
84b4915d 918 int r;
296af7c9 919
6164e6d6 920 qemu_mutex_lock(&qemu_global_mutex);
814e612e 921 qemu_thread_get_self(cpu->thread);
9f09e18a 922 cpu->thread_id = qemu_get_thread_id();
4917cf44 923 current_cpu = cpu;
296af7c9 924
504134d2 925 r = kvm_init_vcpu(cpu);
84b4915d
JK
926 if (r < 0) {
927 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
928 exit(1);
929 }
296af7c9 930
13618e05 931 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
932
933 /* signal CPU creation */
61a46217 934 cpu->created = true;
296af7c9
BS
935 qemu_cond_signal(&qemu_cpu_cond);
936
296af7c9 937 while (1) {
a1fcaa73 938 if (cpu_can_run(cpu)) {
1458c363 939 r = kvm_cpu_exec(cpu);
83f338f7 940 if (r == EXCP_DEBUG) {
91325046 941 cpu_handle_guest_debug(cpu);
83f338f7 942 }
0ab07c62 943 }
fd529e8f 944 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
945 }
946
947 return NULL;
948}
949
c7f0f3b1
AL
950static void *qemu_dummy_cpu_thread_fn(void *arg)
951{
952#ifdef _WIN32
953 fprintf(stderr, "qtest is not supported under Windows\n");
954 exit(1);
955#else
10a9021d 956 CPUState *cpu = arg;
c7f0f3b1
AL
957 sigset_t waitset;
958 int r;
959
960 qemu_mutex_lock_iothread();
814e612e 961 qemu_thread_get_self(cpu->thread);
9f09e18a 962 cpu->thread_id = qemu_get_thread_id();
c7f0f3b1
AL
963
964 sigemptyset(&waitset);
965 sigaddset(&waitset, SIG_IPI);
966
967 /* signal CPU creation */
61a46217 968 cpu->created = true;
c7f0f3b1
AL
969 qemu_cond_signal(&qemu_cpu_cond);
970
4917cf44 971 current_cpu = cpu;
c7f0f3b1 972 while (1) {
4917cf44 973 current_cpu = NULL;
c7f0f3b1
AL
974 qemu_mutex_unlock_iothread();
975 do {
976 int sig;
977 r = sigwait(&waitset, &sig);
978 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
979 if (r == -1) {
980 perror("sigwait");
981 exit(1);
982 }
983 qemu_mutex_lock_iothread();
4917cf44 984 current_cpu = cpu;
509a0d78 985 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
986 }
987
988 return NULL;
989#endif
990}
991
bdb7ca67
JK
992static void tcg_exec_all(void);
993
7e97cd88 994static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 995{
c3586ba7 996 CPUState *cpu = arg;
296af7c9 997
55f8d6ac 998 qemu_tcg_init_cpu_signals();
814e612e 999 qemu_thread_get_self(cpu->thread);
296af7c9 1000
296af7c9 1001 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1002 CPU_FOREACH(cpu) {
1003 cpu->thread_id = qemu_get_thread_id();
1004 cpu->created = true;
1005 }
296af7c9
BS
1006 qemu_cond_signal(&qemu_cpu_cond);
1007
fa7d1867 1008 /* wait for initial kick-off after machine start */
bdc44640 1009 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 1010 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1011
1012 /* process any pending work */
bdc44640 1013 CPU_FOREACH(cpu) {
182735ef 1014 qemu_wait_io_event_common(cpu);
8e564b4e 1015 }
0ab07c62 1016 }
296af7c9
BS
1017
1018 while (1) {
bdb7ca67 1019 tcg_exec_all();
ac70aafc
AB
1020
1021 if (use_icount) {
40daca54 1022 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1023
1024 if (deadline == 0) {
40daca54 1025 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1026 }
3b2319a3 1027 }
6cabe1f3 1028 qemu_tcg_wait_io_event();
296af7c9
BS
1029 }
1030
1031 return NULL;
1032}
1033
2ff09a40 1034static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1035{
1036#ifndef _WIN32
1037 int err;
1038
814e612e 1039 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1040 if (err) {
1041 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1042 exit(1);
1043 }
1044#else /* _WIN32 */
60e82579 1045 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1046 CONTEXT tcgContext;
1047
1048 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1049 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1050 GetLastError());
1051 exit(1);
1052 }
1053
1054 /* On multi-core systems, we are not sure that the thread is actually
1055 * suspended until we can get the context.
1056 */
1057 tcgContext.ContextFlags = CONTEXT_CONTROL;
1058 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1059 continue;
1060 }
1061
cc015e9a 1062 cpu_signal(0);
ed9164a3
OH
1063
1064 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1065 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1066 GetLastError());
1067 exit(1);
1068 }
cc015e9a
PB
1069 }
1070#endif
1071}
1072
c08d7424 1073void qemu_cpu_kick(CPUState *cpu)
296af7c9 1074{
f5c121b8 1075 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1076 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1077 qemu_cpu_kick_thread(cpu);
216fc9a4 1078 cpu->thread_kicked = true;
aa2c364b 1079 }
296af7c9
BS
1080}
1081
46d62fac 1082void qemu_cpu_kick_self(void)
296af7c9 1083{
b55c22c6 1084#ifndef _WIN32
4917cf44 1085 assert(current_cpu);
296af7c9 1086
4917cf44
AF
1087 if (!current_cpu->thread_kicked) {
1088 qemu_cpu_kick_thread(current_cpu);
1089 current_cpu->thread_kicked = true;
296af7c9 1090 }
b55c22c6
PB
1091#else
1092 abort();
1093#endif
296af7c9
BS
1094}
1095
60e82579 1096bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1097{
814e612e 1098 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1099}
1100
aa723c23
JQ
1101static bool qemu_in_vcpu_thread(void)
1102{
4917cf44 1103 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1104}
1105
296af7c9
BS
1106void qemu_mutex_lock_iothread(void)
1107{
c7f0f3b1 1108 if (!tcg_enabled()) {
296af7c9 1109 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1110 } else {
46daff13 1111 iothread_requesting_mutex = true;
1a28cac3 1112 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1113 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1114 qemu_mutex_lock(&qemu_global_mutex);
1115 }
46daff13
PB
1116 iothread_requesting_mutex = false;
1117 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1118 }
296af7c9
BS
1119}
1120
1121void qemu_mutex_unlock_iothread(void)
1122{
1123 qemu_mutex_unlock(&qemu_global_mutex);
1124}
1125
1126static int all_vcpus_paused(void)
1127{
bdc44640 1128 CPUState *cpu;
296af7c9 1129
bdc44640 1130 CPU_FOREACH(cpu) {
182735ef 1131 if (!cpu->stopped) {
296af7c9 1132 return 0;
0ab07c62 1133 }
296af7c9
BS
1134 }
1135
1136 return 1;
1137}
1138
1139void pause_all_vcpus(void)
1140{
bdc44640 1141 CPUState *cpu;
296af7c9 1142
40daca54 1143 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1144 CPU_FOREACH(cpu) {
182735ef
AF
1145 cpu->stop = true;
1146 qemu_cpu_kick(cpu);
296af7c9
BS
1147 }
1148
aa723c23 1149 if (qemu_in_vcpu_thread()) {
d798e974
JK
1150 cpu_stop_current();
1151 if (!kvm_enabled()) {
bdc44640 1152 CPU_FOREACH(cpu) {
182735ef
AF
1153 cpu->stop = false;
1154 cpu->stopped = true;
d798e974
JK
1155 }
1156 return;
1157 }
1158 }
1159
296af7c9 1160 while (!all_vcpus_paused()) {
be7d6c57 1161 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1162 CPU_FOREACH(cpu) {
182735ef 1163 qemu_cpu_kick(cpu);
296af7c9
BS
1164 }
1165 }
1166}
1167
2993683b
IM
1168void cpu_resume(CPUState *cpu)
1169{
1170 cpu->stop = false;
1171 cpu->stopped = false;
1172 qemu_cpu_kick(cpu);
1173}
1174
296af7c9
BS
1175void resume_all_vcpus(void)
1176{
bdc44640 1177 CPUState *cpu;
296af7c9 1178
40daca54 1179 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1180 CPU_FOREACH(cpu) {
182735ef 1181 cpu_resume(cpu);
296af7c9
BS
1182 }
1183}
1184
4900116e
DDAG
1185/* For temporary buffers for forming a name */
1186#define VCPU_THREAD_NAME_SIZE 16
1187
e5ab30a2 1188static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1189{
4900116e
DDAG
1190 char thread_name[VCPU_THREAD_NAME_SIZE];
1191
09daed84
EI
1192 tcg_cpu_address_space_init(cpu, cpu->as);
1193
296af7c9
BS
1194 /* share a single thread for all cpus with TCG */
1195 if (!tcg_cpu_thread) {
814e612e 1196 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1197 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1198 qemu_cond_init(cpu->halt_cond);
1199 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1200 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1201 cpu->cpu_index);
1202 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1203 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1204#ifdef _WIN32
814e612e 1205 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1206#endif
61a46217 1207 while (!cpu->created) {
18a85728 1208 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1209 }
814e612e 1210 tcg_cpu_thread = cpu->thread;
296af7c9 1211 } else {
814e612e 1212 cpu->thread = tcg_cpu_thread;
f5c121b8 1213 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1214 }
1215}
1216
48a106bd 1217static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1218{
4900116e
DDAG
1219 char thread_name[VCPU_THREAD_NAME_SIZE];
1220
814e612e 1221 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1222 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1223 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1224 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1225 cpu->cpu_index);
1226 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1227 cpu, QEMU_THREAD_JOINABLE);
61a46217 1228 while (!cpu->created) {
18a85728 1229 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1230 }
296af7c9
BS
1231}
1232
10a9021d 1233static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1234{
4900116e
DDAG
1235 char thread_name[VCPU_THREAD_NAME_SIZE];
1236
814e612e 1237 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1238 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1239 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1240 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1241 cpu->cpu_index);
1242 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1243 QEMU_THREAD_JOINABLE);
61a46217 1244 while (!cpu->created) {
c7f0f3b1
AL
1245 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1246 }
1247}
1248
c643bed9 1249void qemu_init_vcpu(CPUState *cpu)
296af7c9 1250{
ce3960eb
AF
1251 cpu->nr_cores = smp_cores;
1252 cpu->nr_threads = smp_threads;
f324e766 1253 cpu->stopped = true;
0ab07c62 1254 if (kvm_enabled()) {
48a106bd 1255 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1256 } else if (tcg_enabled()) {
e5ab30a2 1257 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1258 } else {
10a9021d 1259 qemu_dummy_start_vcpu(cpu);
0ab07c62 1260 }
296af7c9
BS
1261}
1262
b4a3d965 1263void cpu_stop_current(void)
296af7c9 1264{
4917cf44
AF
1265 if (current_cpu) {
1266 current_cpu->stop = false;
1267 current_cpu->stopped = true;
1268 cpu_exit(current_cpu);
67bb172f 1269 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1270 }
296af7c9
BS
1271}
1272
56983463 1273int vm_stop(RunState state)
296af7c9 1274{
aa723c23 1275 if (qemu_in_vcpu_thread()) {
74892d24 1276 qemu_system_vmstop_request_prepare();
1dfb4dd9 1277 qemu_system_vmstop_request(state);
296af7c9
BS
1278 /*
1279 * FIXME: should not return to device code in case
1280 * vm_stop() has been requested.
1281 */
b4a3d965 1282 cpu_stop_current();
56983463 1283 return 0;
296af7c9 1284 }
56983463
KW
1285
1286 return do_vm_stop(state);
296af7c9
BS
1287}
1288
8a9236f1
LC
1289/* does a state transition even if the VM is already stopped,
1290 current state is forgotten forever */
56983463 1291int vm_stop_force_state(RunState state)
8a9236f1
LC
1292{
1293 if (runstate_is_running()) {
56983463 1294 return vm_stop(state);
8a9236f1
LC
1295 } else {
1296 runstate_set(state);
594a45ce
KW
1297 /* Make sure to return an error if the flush in a previous vm_stop()
1298 * failed. */
1299 return bdrv_flush_all();
8a9236f1
LC
1300 }
1301}
1302
9349b4f9 1303static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1304{
efee7340 1305 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1306 int ret;
1307#ifdef CONFIG_PROFILER
1308 int64_t ti;
1309#endif
1310
1311#ifdef CONFIG_PROFILER
1312 ti = profile_getclock();
1313#endif
1314 if (use_icount) {
1315 int64_t count;
ac70aafc 1316 int64_t deadline;
296af7c9 1317 int decr;
c96778bb
FK
1318 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1319 + cpu->icount_extra);
28ecfd7a 1320 cpu->icount_decr.u16.low = 0;
efee7340 1321 cpu->icount_extra = 0;
40daca54 1322 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1323
1324 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1325 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1326 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1327 * nanoseconds.
1328 */
1329 if ((deadline < 0) || (deadline > INT32_MAX)) {
1330 deadline = INT32_MAX;
1331 }
1332
1333 count = qemu_icount_round(deadline);
c96778bb 1334 timers_state.qemu_icount += count;
296af7c9
BS
1335 decr = (count > 0xffff) ? 0xffff : count;
1336 count -= decr;
28ecfd7a 1337 cpu->icount_decr.u16.low = decr;
efee7340 1338 cpu->icount_extra = count;
296af7c9
BS
1339 }
1340 ret = cpu_exec(env);
1341#ifdef CONFIG_PROFILER
1342 qemu_time += profile_getclock() - ti;
1343#endif
1344 if (use_icount) {
1345 /* Fold pending instructions back into the
1346 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1347 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1348 + cpu->icount_extra);
28ecfd7a 1349 cpu->icount_decr.u32 = 0;
efee7340 1350 cpu->icount_extra = 0;
296af7c9
BS
1351 }
1352 return ret;
1353}
1354
bdb7ca67 1355static void tcg_exec_all(void)
296af7c9 1356{
9a36085b
JK
1357 int r;
1358
40daca54
AB
1359 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1360 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1361
0ab07c62 1362 if (next_cpu == NULL) {
296af7c9 1363 next_cpu = first_cpu;
0ab07c62 1364 }
bdc44640 1365 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1366 CPUState *cpu = next_cpu;
1367 CPUArchState *env = cpu->env_ptr;
296af7c9 1368
40daca54 1369 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1370 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1371
a1fcaa73 1372 if (cpu_can_run(cpu)) {
bdb7ca67 1373 r = tcg_cpu_exec(env);
9a36085b 1374 if (r == EXCP_DEBUG) {
91325046 1375 cpu_handle_guest_debug(cpu);
3c638d06
JK
1376 break;
1377 }
f324e766 1378 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1379 break;
1380 }
1381 }
c629a4bc 1382 exit_request = 0;
296af7c9
BS
1383}
1384
9a78eead 1385void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1386{
1387 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1388#if defined(cpu_list)
1389 cpu_list(f, cpu_fprintf);
262353cb
BS
1390#endif
1391}
de0b36b6
LC
1392
1393CpuInfoList *qmp_query_cpus(Error **errp)
1394{
1395 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1396 CPUState *cpu;
de0b36b6 1397
bdc44640 1398 CPU_FOREACH(cpu) {
de0b36b6 1399 CpuInfoList *info;
182735ef
AF
1400#if defined(TARGET_I386)
1401 X86CPU *x86_cpu = X86_CPU(cpu);
1402 CPUX86State *env = &x86_cpu->env;
1403#elif defined(TARGET_PPC)
1404 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1405 CPUPPCState *env = &ppc_cpu->env;
1406#elif defined(TARGET_SPARC)
1407 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1408 CPUSPARCState *env = &sparc_cpu->env;
1409#elif defined(TARGET_MIPS)
1410 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1411 CPUMIPSState *env = &mips_cpu->env;
1412#endif
de0b36b6 1413
cb446eca 1414 cpu_synchronize_state(cpu);
de0b36b6
LC
1415
1416 info = g_malloc0(sizeof(*info));
1417 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1418 info->value->CPU = cpu->cpu_index;
182735ef 1419 info->value->current = (cpu == first_cpu);
259186a7 1420 info->value->halted = cpu->halted;
9f09e18a 1421 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1422#if defined(TARGET_I386)
1423 info->value->has_pc = true;
1424 info->value->pc = env->eip + env->segs[R_CS].base;
1425#elif defined(TARGET_PPC)
1426 info->value->has_nip = true;
1427 info->value->nip = env->nip;
1428#elif defined(TARGET_SPARC)
1429 info->value->has_pc = true;
1430 info->value->pc = env->pc;
1431 info->value->has_npc = true;
1432 info->value->npc = env->npc;
1433#elif defined(TARGET_MIPS)
1434 info->value->has_PC = true;
1435 info->value->PC = env->active_tc.PC;
1436#endif
1437
1438 /* XXX: waiting for the qapi to support GSList */
1439 if (!cur_item) {
1440 head = cur_item = info;
1441 } else {
1442 cur_item->next = info;
1443 cur_item = info;
1444 }
1445 }
1446
1447 return head;
1448}
0cfd6a9a
LC
1449
1450void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1451 bool has_cpu, int64_t cpu_index, Error **errp)
1452{
1453 FILE *f;
1454 uint32_t l;
55e5c285 1455 CPUState *cpu;
0cfd6a9a
LC
1456 uint8_t buf[1024];
1457
1458 if (!has_cpu) {
1459 cpu_index = 0;
1460 }
1461
151d1322
AF
1462 cpu = qemu_get_cpu(cpu_index);
1463 if (cpu == NULL) {
0cfd6a9a
LC
1464 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1465 "a CPU number");
1466 return;
1467 }
1468
1469 f = fopen(filename, "wb");
1470 if (!f) {
618da851 1471 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1472 return;
1473 }
1474
1475 while (size != 0) {
1476 l = sizeof(buf);
1477 if (l > size)
1478 l = size;
2f4d0f59
AK
1479 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1480 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1481 goto exit;
1482 }
0cfd6a9a
LC
1483 if (fwrite(buf, 1, l, f) != l) {
1484 error_set(errp, QERR_IO_ERROR);
1485 goto exit;
1486 }
1487 addr += l;
1488 size -= l;
1489 }
1490
1491exit:
1492 fclose(f);
1493}
6d3962bf
LC
1494
1495void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1496 Error **errp)
1497{
1498 FILE *f;
1499 uint32_t l;
1500 uint8_t buf[1024];
1501
1502 f = fopen(filename, "wb");
1503 if (!f) {
618da851 1504 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1505 return;
1506 }
1507
1508 while (size != 0) {
1509 l = sizeof(buf);
1510 if (l > size)
1511 l = size;
eb6282f2 1512 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1513 if (fwrite(buf, 1, l, f) != l) {
1514 error_set(errp, QERR_IO_ERROR);
1515 goto exit;
1516 }
1517 addr += l;
1518 size -= l;
1519 }
1520
1521exit:
1522 fclose(f);
1523}
ab49ab5c
LC
1524
1525void qmp_inject_nmi(Error **errp)
1526{
1527#if defined(TARGET_I386)
182735ef
AF
1528 CPUState *cs;
1529
bdc44640 1530 CPU_FOREACH(cs) {
182735ef 1531 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1532
02e51483 1533 if (!cpu->apic_state) {
182735ef 1534 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1535 } else {
02e51483 1536 apic_deliver_nmi(cpu->apic_state);
02c09195 1537 }
ab49ab5c 1538 }
7f7f9752
ED
1539#elif defined(TARGET_S390X)
1540 CPUState *cs;
1541 S390CPU *cpu;
1542
bdc44640 1543 CPU_FOREACH(cs) {
7f7f9752
ED
1544 cpu = S390_CPU(cs);
1545 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1546 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1547 error_set(errp, QERR_UNSUPPORTED);
1548 return;
1549 }
1550 break;
1551 }
1552 }
ab49ab5c
LC
1553#else
1554 error_set(errp, QERR_UNSUPPORTED);
1555#endif
1556}
27498bef
ST
1557
1558void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1559{
1560 if (!use_icount) {
1561 return;
1562 }
1563
1564 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1565 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1566 if (icount_align_option) {
1567 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1568 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1569 } else {
1570 cpu_fprintf(f, "Max guest delay NA\n");
1571 cpu_fprintf(f, "Max guest advance NA\n");
1572 }
1573}