]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
icount: implement a new icount_sleep mode toggleing real-time cpu sleep
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
9cb805fd 43#include "hw/nmi.h"
0ff0fc19
JK
44
45#ifndef _WIN32
1de7afc9 46#include "qemu/compatfd.h"
0ff0fc19 47#endif
296af7c9 48
6d9cb73c
JK
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
c0532a76
MT
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
6d9cb73c
JK
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif /* CONFIG_LINUX */
66
182735ef 67static CPUState *next_cpu;
27498bef
ST
68int64_t max_delay;
69int64_t max_advance;
296af7c9 70
321bc0b2
TC
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
a98ae1d8 76static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 77{
c64ca814 78 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
79 return false;
80 }
321bc0b2 81 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
82 return true;
83 }
8c2e1b00 84 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 85 kvm_halt_in_kernel()) {
ac873f1e
PM
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
182735ef 93 CPUState *cpu;
ac873f1e 94
bdc44640 95 CPU_FOREACH(cpu) {
182735ef 96 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
97 return false;
98 }
99 }
100 return true;
101}
102
946fb27c
PB
103/***********************************************************/
104/* guest cycle counter */
105
a3270e19
PB
106/* Protected by TimersState seqlock */
107
5045e9d9 108static bool icount_sleep = true;
71468395 109static int64_t vm_clock_warp_start = -1;
946fb27c
PB
110/* Conversion factor from emulated instructions to virtual clock ticks. */
111static int icount_time_shift;
112/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
113#define MAX_ICOUNT_SHIFT 10
a3270e19 114
946fb27c
PB
115static QEMUTimer *icount_rt_timer;
116static QEMUTimer *icount_vm_timer;
117static QEMUTimer *icount_warp_timer;
946fb27c
PB
118
119typedef struct TimersState {
cb365646 120 /* Protected by BQL. */
946fb27c
PB
121 int64_t cpu_ticks_prev;
122 int64_t cpu_ticks_offset;
cb365646
LPF
123
124 /* cpu_clock_offset can be read out of BQL, so protect it with
125 * this lock.
126 */
127 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
128 int64_t cpu_clock_offset;
129 int32_t cpu_ticks_enabled;
130 int64_t dummy;
c96778bb
FK
131
132 /* Compensate for varying guest execution speed. */
133 int64_t qemu_icount_bias;
134 /* Only written by TCG thread */
135 int64_t qemu_icount;
946fb27c
PB
136} TimersState;
137
d9cd4007 138static TimersState timers_state;
946fb27c 139
2a62914b 140int64_t cpu_get_icount_raw(void)
946fb27c
PB
141{
142 int64_t icount;
4917cf44 143 CPUState *cpu = current_cpu;
946fb27c 144
c96778bb 145 icount = timers_state.qemu_icount;
4917cf44 146 if (cpu) {
99df7dce 147 if (!cpu_can_do_io(cpu)) {
2a62914b
PD
148 fprintf(stderr, "Bad icount read\n");
149 exit(1);
946fb27c 150 }
28ecfd7a 151 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 152 }
2a62914b
PD
153 return icount;
154}
155
156/* Return the virtual CPU time, based on the instruction counter. */
157static int64_t cpu_get_icount_locked(void)
158{
159 int64_t icount = cpu_get_icount_raw();
3f031313 160 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
161}
162
17a15f1b
PB
163int64_t cpu_get_icount(void)
164{
165 int64_t icount;
166 unsigned start;
167
168 do {
169 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
170 icount = cpu_get_icount_locked();
171 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
172
173 return icount;
174}
175
3f031313
FK
176int64_t cpu_icount_to_ns(int64_t icount)
177{
178 return icount << icount_time_shift;
179}
180
946fb27c 181/* return the host CPU cycle counter and handle stop/restart */
cb365646 182/* Caller must hold the BQL */
946fb27c
PB
183int64_t cpu_get_ticks(void)
184{
5f3e3101
PB
185 int64_t ticks;
186
946fb27c
PB
187 if (use_icount) {
188 return cpu_get_icount();
189 }
5f3e3101
PB
190
191 ticks = timers_state.cpu_ticks_offset;
192 if (timers_state.cpu_ticks_enabled) {
193 ticks += cpu_get_real_ticks();
194 }
195
196 if (timers_state.cpu_ticks_prev > ticks) {
197 /* Note: non increasing ticks may happen if the host uses
198 software suspend */
199 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
200 ticks = timers_state.cpu_ticks_prev;
946fb27c 201 }
5f3e3101
PB
202
203 timers_state.cpu_ticks_prev = ticks;
204 return ticks;
946fb27c
PB
205}
206
cb365646 207static int64_t cpu_get_clock_locked(void)
946fb27c 208{
5f3e3101 209 int64_t ticks;
cb365646 210
5f3e3101
PB
211 ticks = timers_state.cpu_clock_offset;
212 if (timers_state.cpu_ticks_enabled) {
213 ticks += get_clock();
946fb27c 214 }
cb365646 215
5f3e3101 216 return ticks;
cb365646
LPF
217}
218
219/* return the host CPU monotonic timer and handle stop/restart */
220int64_t cpu_get_clock(void)
221{
222 int64_t ti;
223 unsigned start;
224
225 do {
226 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
227 ti = cpu_get_clock_locked();
228 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
229
230 return ti;
946fb27c
PB
231}
232
cb365646
LPF
233/* enable cpu_get_ticks()
234 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
235 */
946fb27c
PB
236void cpu_enable_ticks(void)
237{
cb365646
LPF
238 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
239 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
240 if (!timers_state.cpu_ticks_enabled) {
241 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
242 timers_state.cpu_clock_offset -= get_clock();
243 timers_state.cpu_ticks_enabled = 1;
244 }
cb365646 245 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
246}
247
248/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
249 * cpu_get_ticks() after that.
250 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
251 */
946fb27c
PB
252void cpu_disable_ticks(void)
253{
cb365646
LPF
254 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
255 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 256 if (timers_state.cpu_ticks_enabled) {
5f3e3101 257 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 258 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
259 timers_state.cpu_ticks_enabled = 0;
260 }
cb365646 261 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
262}
263
264/* Correlation between real and virtual time is always going to be
265 fairly approximate, so ignore small variation.
266 When the guest is idle real and virtual time will be aligned in
267 the IO wait loop. */
268#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
269
270static void icount_adjust(void)
271{
272 int64_t cur_time;
273 int64_t cur_icount;
274 int64_t delta;
a3270e19
PB
275
276 /* Protected by TimersState mutex. */
946fb27c 277 static int64_t last_delta;
468cc7cf 278
946fb27c
PB
279 /* If the VM is not running, then do nothing. */
280 if (!runstate_is_running()) {
281 return;
282 }
468cc7cf 283
17a15f1b
PB
284 seqlock_write_lock(&timers_state.vm_clock_seqlock);
285 cur_time = cpu_get_clock_locked();
286 cur_icount = cpu_get_icount_locked();
468cc7cf 287
946fb27c
PB
288 delta = cur_icount - cur_time;
289 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
290 if (delta > 0
291 && last_delta + ICOUNT_WOBBLE < delta * 2
292 && icount_time_shift > 0) {
293 /* The guest is getting too far ahead. Slow time down. */
294 icount_time_shift--;
295 }
296 if (delta < 0
297 && last_delta - ICOUNT_WOBBLE > delta * 2
298 && icount_time_shift < MAX_ICOUNT_SHIFT) {
299 /* The guest is getting too far behind. Speed time up. */
300 icount_time_shift++;
301 }
302 last_delta = delta;
c96778bb
FK
303 timers_state.qemu_icount_bias = cur_icount
304 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 305 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
306}
307
308static void icount_adjust_rt(void *opaque)
309{
40daca54 310 timer_mod(icount_rt_timer,
1979b908 311 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
312 icount_adjust();
313}
314
315static void icount_adjust_vm(void *opaque)
316{
40daca54
AB
317 timer_mod(icount_vm_timer,
318 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
319 get_ticks_per_sec() / 10);
946fb27c
PB
320 icount_adjust();
321}
322
323static int64_t qemu_icount_round(int64_t count)
324{
325 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
326}
327
328static void icount_warp_rt(void *opaque)
329{
17a15f1b
PB
330 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
331 * changes from -1 to another value, so the race here is okay.
332 */
333 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
334 return;
335 }
336
17a15f1b 337 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 338 if (runstate_is_running()) {
bf2a7ddb 339 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
340 int64_t warp_delta;
341
342 warp_delta = clock - vm_clock_warp_start;
343 if (use_icount == 2) {
946fb27c 344 /*
40daca54 345 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
346 * far ahead of real time.
347 */
17a15f1b 348 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 349 int64_t delta = clock - cur_icount;
8ed961d9 350 warp_delta = MIN(warp_delta, delta);
946fb27c 351 }
c96778bb 352 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
353 }
354 vm_clock_warp_start = -1;
17a15f1b 355 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
356
357 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
358 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
359 }
946fb27c
PB
360}
361
8156be56
PB
362void qtest_clock_warp(int64_t dest)
363{
40daca54 364 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 365 AioContext *aio_context;
8156be56 366 assert(qtest_enabled());
efef88b3 367 aio_context = qemu_get_aio_context();
8156be56 368 while (clock < dest) {
40daca54 369 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 370 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 371
17a15f1b 372 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 373 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
374 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
375
40daca54 376 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 377 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 378 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 379 }
40daca54 380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
381}
382
40daca54 383void qemu_clock_warp(QEMUClockType type)
946fb27c 384{
ce78d18c 385 int64_t clock;
946fb27c
PB
386 int64_t deadline;
387
388 /*
389 * There are too many global variables to make the "warp" behavior
390 * applicable to other clocks. But a clock argument removes the
391 * need for if statements all over the place.
392 */
40daca54 393 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
394 return;
395 }
396
5045e9d9
VC
397 if (icount_sleep) {
398 /*
399 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
400 * This ensures that the deadline for the timer is computed correctly
401 * below.
402 * This also makes sure that the insn counter is synchronized before
403 * the CPU starts running, in case the CPU is woken by an event other
404 * than the earliest QEMU_CLOCK_VIRTUAL timer.
405 */
406 icount_warp_rt(NULL);
407 timer_del(icount_warp_timer);
408 }
ce78d18c 409 if (!all_cpu_threads_idle()) {
946fb27c
PB
410 return;
411 }
412
8156be56
PB
413 if (qtest_enabled()) {
414 /* When testing, qtest commands advance icount. */
415 return;
416 }
417
ac70aafc 418 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 419 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 420 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
421 if (deadline < 0) {
422 return;
ac70aafc
AB
423 }
424
946fb27c
PB
425 if (deadline > 0) {
426 /*
40daca54 427 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
428 * sleep. Otherwise, the CPU might be waiting for a future timer
429 * interrupt to wake it up, but the interrupt never comes because
430 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 431 * QEMU_CLOCK_VIRTUAL.
946fb27c 432 */
5045e9d9
VC
433 if (!icount_sleep) {
434 /*
435 * We never let VCPUs sleep in no sleep icount mode.
436 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
437 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
438 * It is useful when we want a deterministic execution time,
439 * isolated from host latencies.
440 */
441 seqlock_write_lock(&timers_state.vm_clock_seqlock);
442 timers_state.qemu_icount_bias += deadline;
443 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
444 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
445 } else {
446 /*
447 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
448 * "real" time, (related to the time left until the next event) has
449 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
450 * This avoids that the warps are visible externally; for example,
451 * you will not be sending network packets continuously instead of
452 * every 100ms.
453 */
454 seqlock_write_lock(&timers_state.vm_clock_seqlock);
455 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
456 vm_clock_warp_start = clock;
457 }
458 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
459 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 460 }
ac70aafc 461 } else if (deadline == 0) {
40daca54 462 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
463 }
464}
465
d09eae37
FK
466static bool icount_state_needed(void *opaque)
467{
468 return use_icount;
469}
470
471/*
472 * This is a subsection for icount migration.
473 */
474static const VMStateDescription icount_vmstate_timers = {
475 .name = "timer/icount",
476 .version_id = 1,
477 .minimum_version_id = 1,
478 .fields = (VMStateField[]) {
479 VMSTATE_INT64(qemu_icount_bias, TimersState),
480 VMSTATE_INT64(qemu_icount, TimersState),
481 VMSTATE_END_OF_LIST()
482 }
483};
484
946fb27c
PB
485static const VMStateDescription vmstate_timers = {
486 .name = "timer",
487 .version_id = 2,
488 .minimum_version_id = 1,
35d08458 489 .fields = (VMStateField[]) {
946fb27c
PB
490 VMSTATE_INT64(cpu_ticks_offset, TimersState),
491 VMSTATE_INT64(dummy, TimersState),
492 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
493 VMSTATE_END_OF_LIST()
d09eae37
FK
494 },
495 .subsections = (VMStateSubsection[]) {
496 {
497 .vmsd = &icount_vmstate_timers,
498 .needed = icount_state_needed,
499 }, {
500 /* empty */
501 }
946fb27c
PB
502 }
503};
504
4603ea01
PD
505void cpu_ticks_init(void)
506{
507 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
508 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
509}
510
1ad9580b 511void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 512{
1ad9580b 513 const char *option;
a8bfac37 514 char *rem_str = NULL;
1ad9580b 515
1ad9580b 516 option = qemu_opt_get(opts, "shift");
946fb27c 517 if (!option) {
a8bfac37
ST
518 if (qemu_opt_get(opts, "align") != NULL) {
519 error_setg(errp, "Please specify shift option when using align");
520 }
946fb27c
PB
521 return;
522 }
5045e9d9
VC
523 if (icount_sleep) {
524 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
525 icount_warp_rt, NULL);
526 }
a8bfac37 527 icount_align_option = qemu_opt_get_bool(opts, "align", false);
946fb27c 528 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
529 errno = 0;
530 icount_time_shift = strtol(option, &rem_str, 0);
531 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
532 error_setg(errp, "icount: Invalid shift value");
533 }
946fb27c
PB
534 use_icount = 1;
535 return;
a8bfac37
ST
536 } else if (icount_align_option) {
537 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
538 }
539
540 use_icount = 2;
541
542 /* 125MIPS seems a reasonable initial guess at the guest speed.
543 It will be corrected fairly quickly anyway. */
544 icount_time_shift = 3;
545
546 /* Have both realtime and virtual time triggers for speed adjustment.
547 The realtime trigger catches emulated time passing too slowly,
548 the virtual time trigger catches emulated time passing too fast.
549 Realtime triggers occur even when idle, so use them less frequently
550 than VM triggers. */
bf2a7ddb
PD
551 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
552 icount_adjust_rt, NULL);
40daca54 553 timer_mod(icount_rt_timer,
bf2a7ddb 554 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
555 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
556 icount_adjust_vm, NULL);
557 timer_mod(icount_vm_timer,
558 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
559 get_ticks_per_sec() / 10);
946fb27c
PB
560}
561
296af7c9
BS
562/***********************************************************/
563void hw_error(const char *fmt, ...)
564{
565 va_list ap;
55e5c285 566 CPUState *cpu;
296af7c9
BS
567
568 va_start(ap, fmt);
569 fprintf(stderr, "qemu: hardware error: ");
570 vfprintf(stderr, fmt, ap);
571 fprintf(stderr, "\n");
bdc44640 572 CPU_FOREACH(cpu) {
55e5c285 573 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 574 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
575 }
576 va_end(ap);
577 abort();
578}
579
580void cpu_synchronize_all_states(void)
581{
182735ef 582 CPUState *cpu;
296af7c9 583
bdc44640 584 CPU_FOREACH(cpu) {
182735ef 585 cpu_synchronize_state(cpu);
296af7c9
BS
586 }
587}
588
589void cpu_synchronize_all_post_reset(void)
590{
182735ef 591 CPUState *cpu;
296af7c9 592
bdc44640 593 CPU_FOREACH(cpu) {
182735ef 594 cpu_synchronize_post_reset(cpu);
296af7c9
BS
595 }
596}
597
598void cpu_synchronize_all_post_init(void)
599{
182735ef 600 CPUState *cpu;
296af7c9 601
bdc44640 602 CPU_FOREACH(cpu) {
182735ef 603 cpu_synchronize_post_init(cpu);
296af7c9
BS
604 }
605}
606
de9d61e8
MT
607void cpu_clean_all_dirty(void)
608{
609 CPUState *cpu;
610
611 CPU_FOREACH(cpu) {
612 cpu_clean_state(cpu);
613 }
614}
615
56983463 616static int do_vm_stop(RunState state)
296af7c9 617{
56983463
KW
618 int ret = 0;
619
1354869c 620 if (runstate_is_running()) {
296af7c9 621 cpu_disable_ticks();
296af7c9 622 pause_all_vcpus();
f5bbfba1 623 runstate_set(state);
1dfb4dd9 624 vm_state_notify(0, state);
a4e15de9 625 qapi_event_send_stop(&error_abort);
296af7c9 626 }
56983463 627
594a45ce
KW
628 bdrv_drain_all();
629 ret = bdrv_flush_all();
630
56983463 631 return ret;
296af7c9
BS
632}
633
a1fcaa73 634static bool cpu_can_run(CPUState *cpu)
296af7c9 635{
4fdeee7c 636 if (cpu->stop) {
a1fcaa73 637 return false;
0ab07c62 638 }
321bc0b2 639 if (cpu_is_stopped(cpu)) {
a1fcaa73 640 return false;
0ab07c62 641 }
a1fcaa73 642 return true;
296af7c9
BS
643}
644
91325046 645static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 646{
64f6b346 647 gdb_set_stop_cpu(cpu);
8cf71710 648 qemu_system_debug_request();
f324e766 649 cpu->stopped = true;
3c638d06
JK
650}
651
714bd040
PB
652static void cpu_signal(int sig)
653{
4917cf44
AF
654 if (current_cpu) {
655 cpu_exit(current_cpu);
714bd040
PB
656 }
657 exit_request = 1;
658}
714bd040 659
6d9cb73c
JK
660#ifdef CONFIG_LINUX
661static void sigbus_reraise(void)
662{
663 sigset_t set;
664 struct sigaction action;
665
666 memset(&action, 0, sizeof(action));
667 action.sa_handler = SIG_DFL;
668 if (!sigaction(SIGBUS, &action, NULL)) {
669 raise(SIGBUS);
670 sigemptyset(&set);
671 sigaddset(&set, SIGBUS);
672 sigprocmask(SIG_UNBLOCK, &set, NULL);
673 }
674 perror("Failed to re-raise SIGBUS!\n");
675 abort();
676}
677
678static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
679 void *ctx)
680{
681 if (kvm_on_sigbus(siginfo->ssi_code,
682 (void *)(intptr_t)siginfo->ssi_addr)) {
683 sigbus_reraise();
684 }
685}
686
687static void qemu_init_sigbus(void)
688{
689 struct sigaction action;
690
691 memset(&action, 0, sizeof(action));
692 action.sa_flags = SA_SIGINFO;
693 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
694 sigaction(SIGBUS, &action, NULL);
695
696 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
697}
698
290adf38 699static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
700{
701 struct timespec ts = { 0, 0 };
702 siginfo_t siginfo;
703 sigset_t waitset;
704 sigset_t chkset;
705 int r;
706
707 sigemptyset(&waitset);
708 sigaddset(&waitset, SIG_IPI);
709 sigaddset(&waitset, SIGBUS);
710
711 do {
712 r = sigtimedwait(&waitset, &siginfo, &ts);
713 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
714 perror("sigtimedwait");
715 exit(1);
716 }
717
718 switch (r) {
719 case SIGBUS:
290adf38 720 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
721 sigbus_reraise();
722 }
723 break;
724 default:
725 break;
726 }
727
728 r = sigpending(&chkset);
729 if (r == -1) {
730 perror("sigpending");
731 exit(1);
732 }
733 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
734}
735
6d9cb73c
JK
736#else /* !CONFIG_LINUX */
737
738static void qemu_init_sigbus(void)
739{
740}
1ab3c6c0 741
290adf38 742static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
743{
744}
6d9cb73c
JK
745#endif /* !CONFIG_LINUX */
746
296af7c9 747#ifndef _WIN32
55f8d6ac
JK
748static void dummy_signal(int sig)
749{
750}
55f8d6ac 751
13618e05 752static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
753{
754 int r;
755 sigset_t set;
756 struct sigaction sigact;
757
758 memset(&sigact, 0, sizeof(sigact));
759 sigact.sa_handler = dummy_signal;
760 sigaction(SIG_IPI, &sigact, NULL);
761
714bd040
PB
762 pthread_sigmask(SIG_BLOCK, NULL, &set);
763 sigdelset(&set, SIG_IPI);
714bd040 764 sigdelset(&set, SIGBUS);
491d6e80 765 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
766 if (r) {
767 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
768 exit(1);
769 }
770}
771
772static void qemu_tcg_init_cpu_signals(void)
773{
714bd040
PB
774 sigset_t set;
775 struct sigaction sigact;
776
777 memset(&sigact, 0, sizeof(sigact));
778 sigact.sa_handler = cpu_signal;
779 sigaction(SIG_IPI, &sigact, NULL);
780
781 sigemptyset(&set);
782 sigaddset(&set, SIG_IPI);
783 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
784}
785
55f8d6ac 786#else /* _WIN32 */
13618e05 787static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 788{
714bd040
PB
789 abort();
790}
ff48eb5f 791
714bd040
PB
792static void qemu_tcg_init_cpu_signals(void)
793{
ff48eb5f 794}
714bd040 795#endif /* _WIN32 */
ff48eb5f 796
b2532d88 797static QemuMutex qemu_global_mutex;
46daff13 798static QemuCond qemu_io_proceeded_cond;
6b49809c 799static unsigned iothread_requesting_mutex;
296af7c9
BS
800
801static QemuThread io_thread;
802
803static QemuThread *tcg_cpu_thread;
804static QemuCond *tcg_halt_cond;
805
296af7c9
BS
806/* cpu creation */
807static QemuCond qemu_cpu_cond;
808/* system init */
296af7c9 809static QemuCond qemu_pause_cond;
e82bcec2 810static QemuCond qemu_work_cond;
296af7c9 811
d3b12f5d 812void qemu_init_cpu_loop(void)
296af7c9 813{
6d9cb73c 814 qemu_init_sigbus();
ed94592b 815 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
816 qemu_cond_init(&qemu_pause_cond);
817 qemu_cond_init(&qemu_work_cond);
46daff13 818 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 819 qemu_mutex_init(&qemu_global_mutex);
296af7c9 820
b7680cb6 821 qemu_thread_get_self(&io_thread);
296af7c9
BS
822}
823
f100f0b3 824void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
825{
826 struct qemu_work_item wi;
827
60e82579 828 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
829 func(data);
830 return;
831 }
832
833 wi.func = func;
834 wi.data = data;
3c02270d 835 wi.free = false;
c64ca814
AF
836 if (cpu->queued_work_first == NULL) {
837 cpu->queued_work_first = &wi;
0ab07c62 838 } else {
c64ca814 839 cpu->queued_work_last->next = &wi;
0ab07c62 840 }
c64ca814 841 cpu->queued_work_last = &wi;
e82bcec2
MT
842 wi.next = NULL;
843 wi.done = false;
844
c08d7424 845 qemu_cpu_kick(cpu);
e82bcec2 846 while (!wi.done) {
4917cf44 847 CPUState *self_cpu = current_cpu;
e82bcec2
MT
848
849 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 850 current_cpu = self_cpu;
e82bcec2
MT
851 }
852}
853
3c02270d
CV
854void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
855{
856 struct qemu_work_item *wi;
857
858 if (qemu_cpu_is_self(cpu)) {
859 func(data);
860 return;
861 }
862
863 wi = g_malloc0(sizeof(struct qemu_work_item));
864 wi->func = func;
865 wi->data = data;
866 wi->free = true;
867 if (cpu->queued_work_first == NULL) {
868 cpu->queued_work_first = wi;
869 } else {
870 cpu->queued_work_last->next = wi;
871 }
872 cpu->queued_work_last = wi;
873 wi->next = NULL;
874 wi->done = false;
875
876 qemu_cpu_kick(cpu);
877}
878
6d45b109 879static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
880{
881 struct qemu_work_item *wi;
882
c64ca814 883 if (cpu->queued_work_first == NULL) {
e82bcec2 884 return;
0ab07c62 885 }
e82bcec2 886
c64ca814
AF
887 while ((wi = cpu->queued_work_first)) {
888 cpu->queued_work_first = wi->next;
e82bcec2
MT
889 wi->func(wi->data);
890 wi->done = true;
3c02270d
CV
891 if (wi->free) {
892 g_free(wi);
893 }
e82bcec2 894 }
c64ca814 895 cpu->queued_work_last = NULL;
e82bcec2
MT
896 qemu_cond_broadcast(&qemu_work_cond);
897}
898
509a0d78 899static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 900{
4fdeee7c
AF
901 if (cpu->stop) {
902 cpu->stop = false;
f324e766 903 cpu->stopped = true;
296af7c9
BS
904 qemu_cond_signal(&qemu_pause_cond);
905 }
6d45b109 906 flush_queued_work(cpu);
216fc9a4 907 cpu->thread_kicked = false;
296af7c9
BS
908}
909
6cabe1f3 910static void qemu_tcg_wait_io_event(void)
296af7c9 911{
182735ef 912 CPUState *cpu;
6cabe1f3 913
16400322 914 while (all_cpu_threads_idle()) {
ab33fcda
PB
915 /* Start accounting real time to the virtual clock if the CPUs
916 are idle. */
40daca54 917 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 918 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 919 }
296af7c9 920
46daff13
PB
921 while (iothread_requesting_mutex) {
922 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
923 }
6cabe1f3 924
bdc44640 925 CPU_FOREACH(cpu) {
182735ef 926 qemu_wait_io_event_common(cpu);
6cabe1f3 927 }
296af7c9
BS
928}
929
fd529e8f 930static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 931{
a98ae1d8 932 while (cpu_thread_is_idle(cpu)) {
f5c121b8 933 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 934 }
296af7c9 935
290adf38 936 qemu_kvm_eat_signals(cpu);
509a0d78 937 qemu_wait_io_event_common(cpu);
296af7c9
BS
938}
939
7e97cd88 940static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 941{
48a106bd 942 CPUState *cpu = arg;
84b4915d 943 int r;
296af7c9 944
6164e6d6 945 qemu_mutex_lock(&qemu_global_mutex);
814e612e 946 qemu_thread_get_self(cpu->thread);
9f09e18a 947 cpu->thread_id = qemu_get_thread_id();
626cf8f4 948 cpu->can_do_io = 1;
4917cf44 949 current_cpu = cpu;
296af7c9 950
504134d2 951 r = kvm_init_vcpu(cpu);
84b4915d
JK
952 if (r < 0) {
953 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
954 exit(1);
955 }
296af7c9 956
13618e05 957 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
958
959 /* signal CPU creation */
61a46217 960 cpu->created = true;
296af7c9
BS
961 qemu_cond_signal(&qemu_cpu_cond);
962
296af7c9 963 while (1) {
a1fcaa73 964 if (cpu_can_run(cpu)) {
1458c363 965 r = kvm_cpu_exec(cpu);
83f338f7 966 if (r == EXCP_DEBUG) {
91325046 967 cpu_handle_guest_debug(cpu);
83f338f7 968 }
0ab07c62 969 }
fd529e8f 970 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
971 }
972
973 return NULL;
974}
975
c7f0f3b1
AL
976static void *qemu_dummy_cpu_thread_fn(void *arg)
977{
978#ifdef _WIN32
979 fprintf(stderr, "qtest is not supported under Windows\n");
980 exit(1);
981#else
10a9021d 982 CPUState *cpu = arg;
c7f0f3b1
AL
983 sigset_t waitset;
984 int r;
985
986 qemu_mutex_lock_iothread();
814e612e 987 qemu_thread_get_self(cpu->thread);
9f09e18a 988 cpu->thread_id = qemu_get_thread_id();
626cf8f4 989 cpu->can_do_io = 1;
c7f0f3b1
AL
990
991 sigemptyset(&waitset);
992 sigaddset(&waitset, SIG_IPI);
993
994 /* signal CPU creation */
61a46217 995 cpu->created = true;
c7f0f3b1
AL
996 qemu_cond_signal(&qemu_cpu_cond);
997
4917cf44 998 current_cpu = cpu;
c7f0f3b1 999 while (1) {
4917cf44 1000 current_cpu = NULL;
c7f0f3b1
AL
1001 qemu_mutex_unlock_iothread();
1002 do {
1003 int sig;
1004 r = sigwait(&waitset, &sig);
1005 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1006 if (r == -1) {
1007 perror("sigwait");
1008 exit(1);
1009 }
1010 qemu_mutex_lock_iothread();
4917cf44 1011 current_cpu = cpu;
509a0d78 1012 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1013 }
1014
1015 return NULL;
1016#endif
1017}
1018
bdb7ca67
JK
1019static void tcg_exec_all(void);
1020
7e97cd88 1021static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1022{
c3586ba7 1023 CPUState *cpu = arg;
296af7c9 1024
55f8d6ac 1025 qemu_tcg_init_cpu_signals();
814e612e 1026 qemu_thread_get_self(cpu->thread);
296af7c9 1027
296af7c9 1028 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1029 CPU_FOREACH(cpu) {
1030 cpu->thread_id = qemu_get_thread_id();
1031 cpu->created = true;
626cf8f4 1032 cpu->can_do_io = 1;
38fcbd3f 1033 }
296af7c9
BS
1034 qemu_cond_signal(&qemu_cpu_cond);
1035
fa7d1867 1036 /* wait for initial kick-off after machine start */
c28e399c 1037 while (first_cpu->stopped) {
fa7d1867 1038 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1039
1040 /* process any pending work */
bdc44640 1041 CPU_FOREACH(cpu) {
182735ef 1042 qemu_wait_io_event_common(cpu);
8e564b4e 1043 }
0ab07c62 1044 }
296af7c9 1045
21618b3e
PB
1046 /* process any pending work */
1047 exit_request = 1;
1048
296af7c9 1049 while (1) {
bdb7ca67 1050 tcg_exec_all();
ac70aafc
AB
1051
1052 if (use_icount) {
40daca54 1053 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1054
1055 if (deadline == 0) {
40daca54 1056 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1057 }
3b2319a3 1058 }
6cabe1f3 1059 qemu_tcg_wait_io_event();
296af7c9
BS
1060 }
1061
1062 return NULL;
1063}
1064
2ff09a40 1065static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1066{
1067#ifndef _WIN32
1068 int err;
1069
814e612e 1070 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1071 if (err) {
1072 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1073 exit(1);
1074 }
1075#else /* _WIN32 */
60e82579 1076 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1077 CONTEXT tcgContext;
1078
1079 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1080 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1081 GetLastError());
1082 exit(1);
1083 }
1084
1085 /* On multi-core systems, we are not sure that the thread is actually
1086 * suspended until we can get the context.
1087 */
1088 tcgContext.ContextFlags = CONTEXT_CONTROL;
1089 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1090 continue;
1091 }
1092
cc015e9a 1093 cpu_signal(0);
ed9164a3
OH
1094
1095 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1096 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1097 GetLastError());
1098 exit(1);
1099 }
cc015e9a
PB
1100 }
1101#endif
1102}
1103
c08d7424 1104void qemu_cpu_kick(CPUState *cpu)
296af7c9 1105{
f5c121b8 1106 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1107 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1108 qemu_cpu_kick_thread(cpu);
216fc9a4 1109 cpu->thread_kicked = true;
aa2c364b 1110 }
296af7c9
BS
1111}
1112
46d62fac 1113void qemu_cpu_kick_self(void)
296af7c9 1114{
b55c22c6 1115#ifndef _WIN32
4917cf44 1116 assert(current_cpu);
296af7c9 1117
4917cf44
AF
1118 if (!current_cpu->thread_kicked) {
1119 qemu_cpu_kick_thread(current_cpu);
1120 current_cpu->thread_kicked = true;
296af7c9 1121 }
b55c22c6
PB
1122#else
1123 abort();
1124#endif
296af7c9
BS
1125}
1126
60e82579 1127bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1128{
814e612e 1129 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1130}
1131
79e2b9ae 1132bool qemu_in_vcpu_thread(void)
aa723c23 1133{
4917cf44 1134 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1135}
1136
296af7c9
BS
1137void qemu_mutex_lock_iothread(void)
1138{
21618b3e 1139 atomic_inc(&iothread_requesting_mutex);
bdd459a0 1140 if (!tcg_enabled() || !first_cpu || !first_cpu->thread) {
296af7c9 1141 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1142 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1143 } else {
1a28cac3 1144 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1145 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1146 qemu_mutex_lock(&qemu_global_mutex);
1147 }
6b49809c 1148 atomic_dec(&iothread_requesting_mutex);
46daff13 1149 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1150 }
296af7c9
BS
1151}
1152
1153void qemu_mutex_unlock_iothread(void)
1154{
1155 qemu_mutex_unlock(&qemu_global_mutex);
1156}
1157
1158static int all_vcpus_paused(void)
1159{
bdc44640 1160 CPUState *cpu;
296af7c9 1161
bdc44640 1162 CPU_FOREACH(cpu) {
182735ef 1163 if (!cpu->stopped) {
296af7c9 1164 return 0;
0ab07c62 1165 }
296af7c9
BS
1166 }
1167
1168 return 1;
1169}
1170
1171void pause_all_vcpus(void)
1172{
bdc44640 1173 CPUState *cpu;
296af7c9 1174
40daca54 1175 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1176 CPU_FOREACH(cpu) {
182735ef
AF
1177 cpu->stop = true;
1178 qemu_cpu_kick(cpu);
296af7c9
BS
1179 }
1180
aa723c23 1181 if (qemu_in_vcpu_thread()) {
d798e974
JK
1182 cpu_stop_current();
1183 if (!kvm_enabled()) {
bdc44640 1184 CPU_FOREACH(cpu) {
182735ef
AF
1185 cpu->stop = false;
1186 cpu->stopped = true;
d798e974
JK
1187 }
1188 return;
1189 }
1190 }
1191
296af7c9 1192 while (!all_vcpus_paused()) {
be7d6c57 1193 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1194 CPU_FOREACH(cpu) {
182735ef 1195 qemu_cpu_kick(cpu);
296af7c9
BS
1196 }
1197 }
1198}
1199
2993683b
IM
1200void cpu_resume(CPUState *cpu)
1201{
1202 cpu->stop = false;
1203 cpu->stopped = false;
1204 qemu_cpu_kick(cpu);
1205}
1206
296af7c9
BS
1207void resume_all_vcpus(void)
1208{
bdc44640 1209 CPUState *cpu;
296af7c9 1210
40daca54 1211 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1212 CPU_FOREACH(cpu) {
182735ef 1213 cpu_resume(cpu);
296af7c9
BS
1214 }
1215}
1216
4900116e
DDAG
1217/* For temporary buffers for forming a name */
1218#define VCPU_THREAD_NAME_SIZE 16
1219
e5ab30a2 1220static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1221{
4900116e
DDAG
1222 char thread_name[VCPU_THREAD_NAME_SIZE];
1223
09daed84
EI
1224 tcg_cpu_address_space_init(cpu, cpu->as);
1225
296af7c9
BS
1226 /* share a single thread for all cpus with TCG */
1227 if (!tcg_cpu_thread) {
814e612e 1228 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1229 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1230 qemu_cond_init(cpu->halt_cond);
1231 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1232 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1233 cpu->cpu_index);
1234 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1235 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1236#ifdef _WIN32
814e612e 1237 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1238#endif
61a46217 1239 while (!cpu->created) {
18a85728 1240 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1241 }
814e612e 1242 tcg_cpu_thread = cpu->thread;
296af7c9 1243 } else {
814e612e 1244 cpu->thread = tcg_cpu_thread;
f5c121b8 1245 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1246 }
1247}
1248
48a106bd 1249static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1250{
4900116e
DDAG
1251 char thread_name[VCPU_THREAD_NAME_SIZE];
1252
814e612e 1253 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1254 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1255 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1256 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1257 cpu->cpu_index);
1258 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1259 cpu, QEMU_THREAD_JOINABLE);
61a46217 1260 while (!cpu->created) {
18a85728 1261 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1262 }
296af7c9
BS
1263}
1264
10a9021d 1265static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1266{
4900116e
DDAG
1267 char thread_name[VCPU_THREAD_NAME_SIZE];
1268
814e612e 1269 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1270 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1271 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1272 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1273 cpu->cpu_index);
1274 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1275 QEMU_THREAD_JOINABLE);
61a46217 1276 while (!cpu->created) {
c7f0f3b1
AL
1277 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1278 }
1279}
1280
c643bed9 1281void qemu_init_vcpu(CPUState *cpu)
296af7c9 1282{
ce3960eb
AF
1283 cpu->nr_cores = smp_cores;
1284 cpu->nr_threads = smp_threads;
f324e766 1285 cpu->stopped = true;
0ab07c62 1286 if (kvm_enabled()) {
48a106bd 1287 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1288 } else if (tcg_enabled()) {
e5ab30a2 1289 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1290 } else {
10a9021d 1291 qemu_dummy_start_vcpu(cpu);
0ab07c62 1292 }
296af7c9
BS
1293}
1294
b4a3d965 1295void cpu_stop_current(void)
296af7c9 1296{
4917cf44
AF
1297 if (current_cpu) {
1298 current_cpu->stop = false;
1299 current_cpu->stopped = true;
1300 cpu_exit(current_cpu);
67bb172f 1301 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1302 }
296af7c9
BS
1303}
1304
56983463 1305int vm_stop(RunState state)
296af7c9 1306{
aa723c23 1307 if (qemu_in_vcpu_thread()) {
74892d24 1308 qemu_system_vmstop_request_prepare();
1dfb4dd9 1309 qemu_system_vmstop_request(state);
296af7c9
BS
1310 /*
1311 * FIXME: should not return to device code in case
1312 * vm_stop() has been requested.
1313 */
b4a3d965 1314 cpu_stop_current();
56983463 1315 return 0;
296af7c9 1316 }
56983463
KW
1317
1318 return do_vm_stop(state);
296af7c9
BS
1319}
1320
8a9236f1
LC
1321/* does a state transition even if the VM is already stopped,
1322 current state is forgotten forever */
56983463 1323int vm_stop_force_state(RunState state)
8a9236f1
LC
1324{
1325 if (runstate_is_running()) {
56983463 1326 return vm_stop(state);
8a9236f1
LC
1327 } else {
1328 runstate_set(state);
594a45ce
KW
1329 /* Make sure to return an error if the flush in a previous vm_stop()
1330 * failed. */
1331 return bdrv_flush_all();
8a9236f1
LC
1332 }
1333}
1334
9349b4f9 1335static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1336{
efee7340 1337 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1338 int ret;
1339#ifdef CONFIG_PROFILER
1340 int64_t ti;
1341#endif
1342
1343#ifdef CONFIG_PROFILER
1344 ti = profile_getclock();
1345#endif
1346 if (use_icount) {
1347 int64_t count;
ac70aafc 1348 int64_t deadline;
296af7c9 1349 int decr;
c96778bb
FK
1350 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1351 + cpu->icount_extra);
28ecfd7a 1352 cpu->icount_decr.u16.low = 0;
efee7340 1353 cpu->icount_extra = 0;
40daca54 1354 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1355
1356 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1357 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1358 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1359 * nanoseconds.
1360 */
1361 if ((deadline < 0) || (deadline > INT32_MAX)) {
1362 deadline = INT32_MAX;
1363 }
1364
1365 count = qemu_icount_round(deadline);
c96778bb 1366 timers_state.qemu_icount += count;
296af7c9
BS
1367 decr = (count > 0xffff) ? 0xffff : count;
1368 count -= decr;
28ecfd7a 1369 cpu->icount_decr.u16.low = decr;
efee7340 1370 cpu->icount_extra = count;
296af7c9
BS
1371 }
1372 ret = cpu_exec(env);
1373#ifdef CONFIG_PROFILER
89d5cbdd 1374 tcg_time += profile_getclock() - ti;
296af7c9
BS
1375#endif
1376 if (use_icount) {
1377 /* Fold pending instructions back into the
1378 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1379 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1380 + cpu->icount_extra);
28ecfd7a 1381 cpu->icount_decr.u32 = 0;
efee7340 1382 cpu->icount_extra = 0;
296af7c9
BS
1383 }
1384 return ret;
1385}
1386
bdb7ca67 1387static void tcg_exec_all(void)
296af7c9 1388{
9a36085b
JK
1389 int r;
1390
40daca54
AB
1391 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1392 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1393
0ab07c62 1394 if (next_cpu == NULL) {
296af7c9 1395 next_cpu = first_cpu;
0ab07c62 1396 }
bdc44640 1397 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1398 CPUState *cpu = next_cpu;
1399 CPUArchState *env = cpu->env_ptr;
296af7c9 1400
40daca54 1401 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1402 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1403
a1fcaa73 1404 if (cpu_can_run(cpu)) {
bdb7ca67 1405 r = tcg_cpu_exec(env);
9a36085b 1406 if (r == EXCP_DEBUG) {
91325046 1407 cpu_handle_guest_debug(cpu);
3c638d06
JK
1408 break;
1409 }
f324e766 1410 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1411 break;
1412 }
1413 }
c629a4bc 1414 exit_request = 0;
296af7c9
BS
1415}
1416
9a78eead 1417void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1418{
1419 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1420#if defined(cpu_list)
1421 cpu_list(f, cpu_fprintf);
262353cb
BS
1422#endif
1423}
de0b36b6
LC
1424
1425CpuInfoList *qmp_query_cpus(Error **errp)
1426{
1427 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1428 CPUState *cpu;
de0b36b6 1429
bdc44640 1430 CPU_FOREACH(cpu) {
de0b36b6 1431 CpuInfoList *info;
182735ef
AF
1432#if defined(TARGET_I386)
1433 X86CPU *x86_cpu = X86_CPU(cpu);
1434 CPUX86State *env = &x86_cpu->env;
1435#elif defined(TARGET_PPC)
1436 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1437 CPUPPCState *env = &ppc_cpu->env;
1438#elif defined(TARGET_SPARC)
1439 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1440 CPUSPARCState *env = &sparc_cpu->env;
1441#elif defined(TARGET_MIPS)
1442 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1443 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1444#elif defined(TARGET_TRICORE)
1445 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1446 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1447#endif
de0b36b6 1448
cb446eca 1449 cpu_synchronize_state(cpu);
de0b36b6
LC
1450
1451 info = g_malloc0(sizeof(*info));
1452 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1453 info->value->CPU = cpu->cpu_index;
182735ef 1454 info->value->current = (cpu == first_cpu);
259186a7 1455 info->value->halted = cpu->halted;
58f88d4b 1456 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1457 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1458#if defined(TARGET_I386)
1459 info->value->has_pc = true;
1460 info->value->pc = env->eip + env->segs[R_CS].base;
1461#elif defined(TARGET_PPC)
1462 info->value->has_nip = true;
1463 info->value->nip = env->nip;
1464#elif defined(TARGET_SPARC)
1465 info->value->has_pc = true;
1466 info->value->pc = env->pc;
1467 info->value->has_npc = true;
1468 info->value->npc = env->npc;
1469#elif defined(TARGET_MIPS)
1470 info->value->has_PC = true;
1471 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1472#elif defined(TARGET_TRICORE)
1473 info->value->has_PC = true;
1474 info->value->PC = env->PC;
de0b36b6
LC
1475#endif
1476
1477 /* XXX: waiting for the qapi to support GSList */
1478 if (!cur_item) {
1479 head = cur_item = info;
1480 } else {
1481 cur_item->next = info;
1482 cur_item = info;
1483 }
1484 }
1485
1486 return head;
1487}
0cfd6a9a
LC
1488
1489void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1490 bool has_cpu, int64_t cpu_index, Error **errp)
1491{
1492 FILE *f;
1493 uint32_t l;
55e5c285 1494 CPUState *cpu;
0cfd6a9a 1495 uint8_t buf[1024];
0dc9daf0 1496 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1497
1498 if (!has_cpu) {
1499 cpu_index = 0;
1500 }
1501
151d1322
AF
1502 cpu = qemu_get_cpu(cpu_index);
1503 if (cpu == NULL) {
0cfd6a9a
LC
1504 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1505 "a CPU number");
1506 return;
1507 }
1508
1509 f = fopen(filename, "wb");
1510 if (!f) {
618da851 1511 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1512 return;
1513 }
1514
1515 while (size != 0) {
1516 l = sizeof(buf);
1517 if (l > size)
1518 l = size;
2f4d0f59 1519 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1520 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1521 " specified", orig_addr, orig_size);
2f4d0f59
AK
1522 goto exit;
1523 }
0cfd6a9a
LC
1524 if (fwrite(buf, 1, l, f) != l) {
1525 error_set(errp, QERR_IO_ERROR);
1526 goto exit;
1527 }
1528 addr += l;
1529 size -= l;
1530 }
1531
1532exit:
1533 fclose(f);
1534}
6d3962bf
LC
1535
1536void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1537 Error **errp)
1538{
1539 FILE *f;
1540 uint32_t l;
1541 uint8_t buf[1024];
1542
1543 f = fopen(filename, "wb");
1544 if (!f) {
618da851 1545 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1546 return;
1547 }
1548
1549 while (size != 0) {
1550 l = sizeof(buf);
1551 if (l > size)
1552 l = size;
eb6282f2 1553 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1554 if (fwrite(buf, 1, l, f) != l) {
1555 error_set(errp, QERR_IO_ERROR);
1556 goto exit;
1557 }
1558 addr += l;
1559 size -= l;
1560 }
1561
1562exit:
1563 fclose(f);
1564}
ab49ab5c
LC
1565
1566void qmp_inject_nmi(Error **errp)
1567{
1568#if defined(TARGET_I386)
182735ef
AF
1569 CPUState *cs;
1570
bdc44640 1571 CPU_FOREACH(cs) {
182735ef 1572 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1573
02e51483 1574 if (!cpu->apic_state) {
182735ef 1575 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1576 } else {
02e51483 1577 apic_deliver_nmi(cpu->apic_state);
02c09195 1578 }
ab49ab5c
LC
1579 }
1580#else
9cb805fd 1581 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1582#endif
1583}
27498bef
ST
1584
1585void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1586{
1587 if (!use_icount) {
1588 return;
1589 }
1590
1591 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1592 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1593 if (icount_align_option) {
1594 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1595 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1596 } else {
1597 cpu_fprintf(f, "Max guest delay NA\n");
1598 cpu_fprintf(f, "Max guest advance NA\n");
1599 }
1600}