]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
timer: introduce new QEMU_CLOCK_VIRTUAL_RT clock
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
9cb805fd 43#include "hw/nmi.h"
0ff0fc19
JK
44
45#ifndef _WIN32
1de7afc9 46#include "qemu/compatfd.h"
0ff0fc19 47#endif
296af7c9 48
6d9cb73c
JK
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
c0532a76
MT
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
6d9cb73c
JK
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif /* CONFIG_LINUX */
66
182735ef 67static CPUState *next_cpu;
27498bef
ST
68int64_t max_delay;
69int64_t max_advance;
296af7c9 70
321bc0b2
TC
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
a98ae1d8 76static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 77{
c64ca814 78 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
79 return false;
80 }
321bc0b2 81 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
82 return true;
83 }
8c2e1b00 84 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 85 kvm_halt_in_kernel()) {
ac873f1e
PM
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
182735ef 93 CPUState *cpu;
ac873f1e 94
bdc44640 95 CPU_FOREACH(cpu) {
182735ef 96 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
97 return false;
98 }
99 }
100 return true;
101}
102
946fb27c
PB
103/***********************************************************/
104/* guest cycle counter */
105
a3270e19
PB
106/* Protected by TimersState seqlock */
107
71468395 108static int64_t vm_clock_warp_start = -1;
946fb27c
PB
109/* Conversion factor from emulated instructions to virtual clock ticks. */
110static int icount_time_shift;
111/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112#define MAX_ICOUNT_SHIFT 10
a3270e19 113
946fb27c
PB
114static QEMUTimer *icount_rt_timer;
115static QEMUTimer *icount_vm_timer;
116static QEMUTimer *icount_warp_timer;
946fb27c
PB
117
118typedef struct TimersState {
cb365646 119 /* Protected by BQL. */
946fb27c
PB
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
cb365646
LPF
122
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
125 */
126 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
c96778bb
FK
130
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
946fb27c
PB
135} TimersState;
136
d9cd4007 137static TimersState timers_state;
946fb27c 138
2a62914b 139int64_t cpu_get_icount_raw(void)
946fb27c
PB
140{
141 int64_t icount;
4917cf44 142 CPUState *cpu = current_cpu;
946fb27c 143
c96778bb 144 icount = timers_state.qemu_icount;
4917cf44 145 if (cpu) {
99df7dce 146 if (!cpu_can_do_io(cpu)) {
2a62914b
PD
147 fprintf(stderr, "Bad icount read\n");
148 exit(1);
946fb27c 149 }
28ecfd7a 150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 151 }
2a62914b
PD
152 return icount;
153}
154
155/* Return the virtual CPU time, based on the instruction counter. */
156static int64_t cpu_get_icount_locked(void)
157{
158 int64_t icount = cpu_get_icount_raw();
3f031313 159 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
160}
161
17a15f1b
PB
162int64_t cpu_get_icount(void)
163{
164 int64_t icount;
165 unsigned start;
166
167 do {
168 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
169 icount = cpu_get_icount_locked();
170 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
171
172 return icount;
173}
174
3f031313
FK
175int64_t cpu_icount_to_ns(int64_t icount)
176{
177 return icount << icount_time_shift;
178}
179
946fb27c 180/* return the host CPU cycle counter and handle stop/restart */
cb365646 181/* Caller must hold the BQL */
946fb27c
PB
182int64_t cpu_get_ticks(void)
183{
5f3e3101
PB
184 int64_t ticks;
185
946fb27c
PB
186 if (use_icount) {
187 return cpu_get_icount();
188 }
5f3e3101
PB
189
190 ticks = timers_state.cpu_ticks_offset;
191 if (timers_state.cpu_ticks_enabled) {
192 ticks += cpu_get_real_ticks();
193 }
194
195 if (timers_state.cpu_ticks_prev > ticks) {
196 /* Note: non increasing ticks may happen if the host uses
197 software suspend */
198 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
199 ticks = timers_state.cpu_ticks_prev;
946fb27c 200 }
5f3e3101
PB
201
202 timers_state.cpu_ticks_prev = ticks;
203 return ticks;
946fb27c
PB
204}
205
cb365646 206static int64_t cpu_get_clock_locked(void)
946fb27c 207{
5f3e3101 208 int64_t ticks;
cb365646 209
5f3e3101
PB
210 ticks = timers_state.cpu_clock_offset;
211 if (timers_state.cpu_ticks_enabled) {
212 ticks += get_clock();
946fb27c 213 }
cb365646 214
5f3e3101 215 return ticks;
cb365646
LPF
216}
217
218/* return the host CPU monotonic timer and handle stop/restart */
219int64_t cpu_get_clock(void)
220{
221 int64_t ti;
222 unsigned start;
223
224 do {
225 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
226 ti = cpu_get_clock_locked();
227 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
228
229 return ti;
946fb27c
PB
230}
231
c2aa5f81
ST
232/* return the offset between the host clock and virtual CPU clock */
233int64_t cpu_get_clock_offset(void)
234{
235 int64_t ti;
236 unsigned start;
237
238 do {
239 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
240 ti = timers_state.cpu_clock_offset;
241 if (!timers_state.cpu_ticks_enabled) {
242 ti -= get_clock();
243 }
244 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
245
246 return -ti;
247}
248
cb365646
LPF
249/* enable cpu_get_ticks()
250 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
251 */
946fb27c
PB
252void cpu_enable_ticks(void)
253{
cb365646
LPF
254 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
255 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
256 if (!timers_state.cpu_ticks_enabled) {
257 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
258 timers_state.cpu_clock_offset -= get_clock();
259 timers_state.cpu_ticks_enabled = 1;
260 }
cb365646 261 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
262}
263
264/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
265 * cpu_get_ticks() after that.
266 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
267 */
946fb27c
PB
268void cpu_disable_ticks(void)
269{
cb365646
LPF
270 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
271 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 272 if (timers_state.cpu_ticks_enabled) {
5f3e3101 273 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 274 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
275 timers_state.cpu_ticks_enabled = 0;
276 }
cb365646 277 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
278}
279
280/* Correlation between real and virtual time is always going to be
281 fairly approximate, so ignore small variation.
282 When the guest is idle real and virtual time will be aligned in
283 the IO wait loop. */
284#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
285
286static void icount_adjust(void)
287{
288 int64_t cur_time;
289 int64_t cur_icount;
290 int64_t delta;
a3270e19
PB
291
292 /* Protected by TimersState mutex. */
946fb27c 293 static int64_t last_delta;
468cc7cf 294
946fb27c
PB
295 /* If the VM is not running, then do nothing. */
296 if (!runstate_is_running()) {
297 return;
298 }
468cc7cf 299
17a15f1b
PB
300 seqlock_write_lock(&timers_state.vm_clock_seqlock);
301 cur_time = cpu_get_clock_locked();
302 cur_icount = cpu_get_icount_locked();
468cc7cf 303
946fb27c
PB
304 delta = cur_icount - cur_time;
305 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
306 if (delta > 0
307 && last_delta + ICOUNT_WOBBLE < delta * 2
308 && icount_time_shift > 0) {
309 /* The guest is getting too far ahead. Slow time down. */
310 icount_time_shift--;
311 }
312 if (delta < 0
313 && last_delta - ICOUNT_WOBBLE > delta * 2
314 && icount_time_shift < MAX_ICOUNT_SHIFT) {
315 /* The guest is getting too far behind. Speed time up. */
316 icount_time_shift++;
317 }
318 last_delta = delta;
c96778bb
FK
319 timers_state.qemu_icount_bias = cur_icount
320 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 321 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
322}
323
324static void icount_adjust_rt(void *opaque)
325{
40daca54
AB
326 timer_mod(icount_rt_timer,
327 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
946fb27c
PB
328 icount_adjust();
329}
330
331static void icount_adjust_vm(void *opaque)
332{
40daca54
AB
333 timer_mod(icount_vm_timer,
334 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
335 get_ticks_per_sec() / 10);
946fb27c
PB
336 icount_adjust();
337}
338
339static int64_t qemu_icount_round(int64_t count)
340{
341 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
342}
343
344static void icount_warp_rt(void *opaque)
345{
17a15f1b
PB
346 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
347 * changes from -1 to another value, so the race here is okay.
348 */
349 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
350 return;
351 }
352
17a15f1b 353 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 354 if (runstate_is_running()) {
40daca54 355 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
8ed961d9
PB
356 int64_t warp_delta;
357
358 warp_delta = clock - vm_clock_warp_start;
359 if (use_icount == 2) {
946fb27c 360 /*
40daca54 361 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
362 * far ahead of real time.
363 */
17a15f1b
PB
364 int64_t cur_time = cpu_get_clock_locked();
365 int64_t cur_icount = cpu_get_icount_locked();
946fb27c 366 int64_t delta = cur_time - cur_icount;
8ed961d9 367 warp_delta = MIN(warp_delta, delta);
946fb27c 368 }
c96778bb 369 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
370 }
371 vm_clock_warp_start = -1;
17a15f1b 372 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
373
374 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
375 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
376 }
946fb27c
PB
377}
378
8156be56
PB
379void qtest_clock_warp(int64_t dest)
380{
40daca54 381 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56
PB
382 assert(qtest_enabled());
383 while (clock < dest) {
40daca54 384 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 385 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
17a15f1b 386 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 387 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
388 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
389
40daca54
AB
390 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
391 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 392 }
40daca54 393 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
394}
395
40daca54 396void qemu_clock_warp(QEMUClockType type)
946fb27c 397{
ce78d18c 398 int64_t clock;
946fb27c
PB
399 int64_t deadline;
400
401 /*
402 * There are too many global variables to make the "warp" behavior
403 * applicable to other clocks. But a clock argument removes the
404 * need for if statements all over the place.
405 */
40daca54 406 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
407 return;
408 }
409
410 /*
40daca54
AB
411 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
412 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
413 * This also makes sure that the insn counter is synchronized before the
414 * CPU starts running, in case the CPU is woken by an event other than
40daca54 415 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
416 */
417 icount_warp_rt(NULL);
ce78d18c
PB
418 timer_del(icount_warp_timer);
419 if (!all_cpu_threads_idle()) {
946fb27c
PB
420 return;
421 }
422
8156be56
PB
423 if (qtest_enabled()) {
424 /* When testing, qtest commands advance icount. */
425 return;
426 }
427
ac70aafc 428 /* We want to use the earliest deadline from ALL vm_clocks */
ce78d18c 429 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
40daca54 430 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
431 if (deadline < 0) {
432 return;
ac70aafc
AB
433 }
434
946fb27c
PB
435 if (deadline > 0) {
436 /*
40daca54 437 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
438 * sleep. Otherwise, the CPU might be waiting for a future timer
439 * interrupt to wake it up, but the interrupt never comes because
440 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 441 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
442 *
443 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
444 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
445 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
446 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
447 * after some e"real" time, (related to the time left until the next
448 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
449 * This avoids that the warps are visible externally; for example,
450 * you will not be sending network packets continuously instead of
451 * every 100ms.
946fb27c 452 */
17a15f1b 453 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
454 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
455 vm_clock_warp_start = clock;
456 }
17a15f1b 457 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 458 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 459 } else if (deadline == 0) {
40daca54 460 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
461 }
462}
463
d09eae37
FK
464static bool icount_state_needed(void *opaque)
465{
466 return use_icount;
467}
468
469/*
470 * This is a subsection for icount migration.
471 */
472static const VMStateDescription icount_vmstate_timers = {
473 .name = "timer/icount",
474 .version_id = 1,
475 .minimum_version_id = 1,
476 .fields = (VMStateField[]) {
477 VMSTATE_INT64(qemu_icount_bias, TimersState),
478 VMSTATE_INT64(qemu_icount, TimersState),
479 VMSTATE_END_OF_LIST()
480 }
481};
482
946fb27c
PB
483static const VMStateDescription vmstate_timers = {
484 .name = "timer",
485 .version_id = 2,
486 .minimum_version_id = 1,
35d08458 487 .fields = (VMStateField[]) {
946fb27c
PB
488 VMSTATE_INT64(cpu_ticks_offset, TimersState),
489 VMSTATE_INT64(dummy, TimersState),
490 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
491 VMSTATE_END_OF_LIST()
d09eae37
FK
492 },
493 .subsections = (VMStateSubsection[]) {
494 {
495 .vmsd = &icount_vmstate_timers,
496 .needed = icount_state_needed,
497 }, {
498 /* empty */
499 }
946fb27c
PB
500 }
501};
502
4603ea01
PD
503void cpu_ticks_init(void)
504{
505 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
506 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
507}
508
1ad9580b 509void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 510{
1ad9580b 511 const char *option;
a8bfac37 512 char *rem_str = NULL;
1ad9580b 513
1ad9580b 514 option = qemu_opt_get(opts, "shift");
946fb27c 515 if (!option) {
a8bfac37
ST
516 if (qemu_opt_get(opts, "align") != NULL) {
517 error_setg(errp, "Please specify shift option when using align");
518 }
946fb27c
PB
519 return;
520 }
a8bfac37 521 icount_align_option = qemu_opt_get_bool(opts, "align", false);
40daca54
AB
522 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
523 icount_warp_rt, NULL);
946fb27c 524 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
525 errno = 0;
526 icount_time_shift = strtol(option, &rem_str, 0);
527 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
528 error_setg(errp, "icount: Invalid shift value");
529 }
946fb27c
PB
530 use_icount = 1;
531 return;
a8bfac37
ST
532 } else if (icount_align_option) {
533 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
534 }
535
536 use_icount = 2;
537
538 /* 125MIPS seems a reasonable initial guess at the guest speed.
539 It will be corrected fairly quickly anyway. */
540 icount_time_shift = 3;
541
542 /* Have both realtime and virtual time triggers for speed adjustment.
543 The realtime trigger catches emulated time passing too slowly,
544 the virtual time trigger catches emulated time passing too fast.
545 Realtime triggers occur even when idle, so use them less frequently
546 than VM triggers. */
40daca54
AB
547 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
548 icount_adjust_rt, NULL);
549 timer_mod(icount_rt_timer,
550 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
551 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
552 icount_adjust_vm, NULL);
553 timer_mod(icount_vm_timer,
554 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
555 get_ticks_per_sec() / 10);
946fb27c
PB
556}
557
296af7c9
BS
558/***********************************************************/
559void hw_error(const char *fmt, ...)
560{
561 va_list ap;
55e5c285 562 CPUState *cpu;
296af7c9
BS
563
564 va_start(ap, fmt);
565 fprintf(stderr, "qemu: hardware error: ");
566 vfprintf(stderr, fmt, ap);
567 fprintf(stderr, "\n");
bdc44640 568 CPU_FOREACH(cpu) {
55e5c285 569 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 570 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
571 }
572 va_end(ap);
573 abort();
574}
575
576void cpu_synchronize_all_states(void)
577{
182735ef 578 CPUState *cpu;
296af7c9 579
bdc44640 580 CPU_FOREACH(cpu) {
182735ef 581 cpu_synchronize_state(cpu);
296af7c9
BS
582 }
583}
584
585void cpu_synchronize_all_post_reset(void)
586{
182735ef 587 CPUState *cpu;
296af7c9 588
bdc44640 589 CPU_FOREACH(cpu) {
182735ef 590 cpu_synchronize_post_reset(cpu);
296af7c9
BS
591 }
592}
593
594void cpu_synchronize_all_post_init(void)
595{
182735ef 596 CPUState *cpu;
296af7c9 597
bdc44640 598 CPU_FOREACH(cpu) {
182735ef 599 cpu_synchronize_post_init(cpu);
296af7c9
BS
600 }
601}
602
de9d61e8
MT
603void cpu_clean_all_dirty(void)
604{
605 CPUState *cpu;
606
607 CPU_FOREACH(cpu) {
608 cpu_clean_state(cpu);
609 }
610}
611
56983463 612static int do_vm_stop(RunState state)
296af7c9 613{
56983463
KW
614 int ret = 0;
615
1354869c 616 if (runstate_is_running()) {
296af7c9 617 cpu_disable_ticks();
296af7c9 618 pause_all_vcpus();
f5bbfba1 619 runstate_set(state);
1dfb4dd9 620 vm_state_notify(0, state);
a4e15de9 621 qapi_event_send_stop(&error_abort);
296af7c9 622 }
56983463 623
594a45ce
KW
624 bdrv_drain_all();
625 ret = bdrv_flush_all();
626
56983463 627 return ret;
296af7c9
BS
628}
629
a1fcaa73 630static bool cpu_can_run(CPUState *cpu)
296af7c9 631{
4fdeee7c 632 if (cpu->stop) {
a1fcaa73 633 return false;
0ab07c62 634 }
321bc0b2 635 if (cpu_is_stopped(cpu)) {
a1fcaa73 636 return false;
0ab07c62 637 }
a1fcaa73 638 return true;
296af7c9
BS
639}
640
91325046 641static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 642{
64f6b346 643 gdb_set_stop_cpu(cpu);
8cf71710 644 qemu_system_debug_request();
f324e766 645 cpu->stopped = true;
3c638d06
JK
646}
647
714bd040
PB
648static void cpu_signal(int sig)
649{
4917cf44
AF
650 if (current_cpu) {
651 cpu_exit(current_cpu);
714bd040
PB
652 }
653 exit_request = 1;
654}
714bd040 655
6d9cb73c
JK
656#ifdef CONFIG_LINUX
657static void sigbus_reraise(void)
658{
659 sigset_t set;
660 struct sigaction action;
661
662 memset(&action, 0, sizeof(action));
663 action.sa_handler = SIG_DFL;
664 if (!sigaction(SIGBUS, &action, NULL)) {
665 raise(SIGBUS);
666 sigemptyset(&set);
667 sigaddset(&set, SIGBUS);
668 sigprocmask(SIG_UNBLOCK, &set, NULL);
669 }
670 perror("Failed to re-raise SIGBUS!\n");
671 abort();
672}
673
674static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
675 void *ctx)
676{
677 if (kvm_on_sigbus(siginfo->ssi_code,
678 (void *)(intptr_t)siginfo->ssi_addr)) {
679 sigbus_reraise();
680 }
681}
682
683static void qemu_init_sigbus(void)
684{
685 struct sigaction action;
686
687 memset(&action, 0, sizeof(action));
688 action.sa_flags = SA_SIGINFO;
689 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
690 sigaction(SIGBUS, &action, NULL);
691
692 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
693}
694
290adf38 695static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
696{
697 struct timespec ts = { 0, 0 };
698 siginfo_t siginfo;
699 sigset_t waitset;
700 sigset_t chkset;
701 int r;
702
703 sigemptyset(&waitset);
704 sigaddset(&waitset, SIG_IPI);
705 sigaddset(&waitset, SIGBUS);
706
707 do {
708 r = sigtimedwait(&waitset, &siginfo, &ts);
709 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
710 perror("sigtimedwait");
711 exit(1);
712 }
713
714 switch (r) {
715 case SIGBUS:
290adf38 716 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
717 sigbus_reraise();
718 }
719 break;
720 default:
721 break;
722 }
723
724 r = sigpending(&chkset);
725 if (r == -1) {
726 perror("sigpending");
727 exit(1);
728 }
729 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
730}
731
6d9cb73c
JK
732#else /* !CONFIG_LINUX */
733
734static void qemu_init_sigbus(void)
735{
736}
1ab3c6c0 737
290adf38 738static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
739{
740}
6d9cb73c
JK
741#endif /* !CONFIG_LINUX */
742
296af7c9 743#ifndef _WIN32
55f8d6ac
JK
744static void dummy_signal(int sig)
745{
746}
55f8d6ac 747
13618e05 748static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
749{
750 int r;
751 sigset_t set;
752 struct sigaction sigact;
753
754 memset(&sigact, 0, sizeof(sigact));
755 sigact.sa_handler = dummy_signal;
756 sigaction(SIG_IPI, &sigact, NULL);
757
714bd040
PB
758 pthread_sigmask(SIG_BLOCK, NULL, &set);
759 sigdelset(&set, SIG_IPI);
714bd040 760 sigdelset(&set, SIGBUS);
491d6e80 761 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
762 if (r) {
763 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
764 exit(1);
765 }
766}
767
768static void qemu_tcg_init_cpu_signals(void)
769{
714bd040
PB
770 sigset_t set;
771 struct sigaction sigact;
772
773 memset(&sigact, 0, sizeof(sigact));
774 sigact.sa_handler = cpu_signal;
775 sigaction(SIG_IPI, &sigact, NULL);
776
777 sigemptyset(&set);
778 sigaddset(&set, SIG_IPI);
779 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
780}
781
55f8d6ac 782#else /* _WIN32 */
13618e05 783static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 784{
714bd040
PB
785 abort();
786}
ff48eb5f 787
714bd040
PB
788static void qemu_tcg_init_cpu_signals(void)
789{
ff48eb5f 790}
714bd040 791#endif /* _WIN32 */
ff48eb5f 792
b2532d88 793static QemuMutex qemu_global_mutex;
46daff13
PB
794static QemuCond qemu_io_proceeded_cond;
795static bool iothread_requesting_mutex;
296af7c9
BS
796
797static QemuThread io_thread;
798
799static QemuThread *tcg_cpu_thread;
800static QemuCond *tcg_halt_cond;
801
296af7c9
BS
802/* cpu creation */
803static QemuCond qemu_cpu_cond;
804/* system init */
296af7c9 805static QemuCond qemu_pause_cond;
e82bcec2 806static QemuCond qemu_work_cond;
296af7c9 807
d3b12f5d 808void qemu_init_cpu_loop(void)
296af7c9 809{
6d9cb73c 810 qemu_init_sigbus();
ed94592b 811 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
812 qemu_cond_init(&qemu_pause_cond);
813 qemu_cond_init(&qemu_work_cond);
46daff13 814 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 815 qemu_mutex_init(&qemu_global_mutex);
296af7c9 816
b7680cb6 817 qemu_thread_get_self(&io_thread);
296af7c9
BS
818}
819
f100f0b3 820void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
821{
822 struct qemu_work_item wi;
823
60e82579 824 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
825 func(data);
826 return;
827 }
828
829 wi.func = func;
830 wi.data = data;
3c02270d 831 wi.free = false;
c64ca814
AF
832 if (cpu->queued_work_first == NULL) {
833 cpu->queued_work_first = &wi;
0ab07c62 834 } else {
c64ca814 835 cpu->queued_work_last->next = &wi;
0ab07c62 836 }
c64ca814 837 cpu->queued_work_last = &wi;
e82bcec2
MT
838 wi.next = NULL;
839 wi.done = false;
840
c08d7424 841 qemu_cpu_kick(cpu);
e82bcec2 842 while (!wi.done) {
4917cf44 843 CPUState *self_cpu = current_cpu;
e82bcec2
MT
844
845 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 846 current_cpu = self_cpu;
e82bcec2
MT
847 }
848}
849
3c02270d
CV
850void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
851{
852 struct qemu_work_item *wi;
853
854 if (qemu_cpu_is_self(cpu)) {
855 func(data);
856 return;
857 }
858
859 wi = g_malloc0(sizeof(struct qemu_work_item));
860 wi->func = func;
861 wi->data = data;
862 wi->free = true;
863 if (cpu->queued_work_first == NULL) {
864 cpu->queued_work_first = wi;
865 } else {
866 cpu->queued_work_last->next = wi;
867 }
868 cpu->queued_work_last = wi;
869 wi->next = NULL;
870 wi->done = false;
871
872 qemu_cpu_kick(cpu);
873}
874
6d45b109 875static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
876{
877 struct qemu_work_item *wi;
878
c64ca814 879 if (cpu->queued_work_first == NULL) {
e82bcec2 880 return;
0ab07c62 881 }
e82bcec2 882
c64ca814
AF
883 while ((wi = cpu->queued_work_first)) {
884 cpu->queued_work_first = wi->next;
e82bcec2
MT
885 wi->func(wi->data);
886 wi->done = true;
3c02270d
CV
887 if (wi->free) {
888 g_free(wi);
889 }
e82bcec2 890 }
c64ca814 891 cpu->queued_work_last = NULL;
e82bcec2
MT
892 qemu_cond_broadcast(&qemu_work_cond);
893}
894
509a0d78 895static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 896{
4fdeee7c
AF
897 if (cpu->stop) {
898 cpu->stop = false;
f324e766 899 cpu->stopped = true;
296af7c9
BS
900 qemu_cond_signal(&qemu_pause_cond);
901 }
6d45b109 902 flush_queued_work(cpu);
216fc9a4 903 cpu->thread_kicked = false;
296af7c9
BS
904}
905
6cabe1f3 906static void qemu_tcg_wait_io_event(void)
296af7c9 907{
182735ef 908 CPUState *cpu;
6cabe1f3 909
16400322 910 while (all_cpu_threads_idle()) {
ab33fcda
PB
911 /* Start accounting real time to the virtual clock if the CPUs
912 are idle. */
40daca54 913 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 914 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 915 }
296af7c9 916
46daff13
PB
917 while (iothread_requesting_mutex) {
918 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
919 }
6cabe1f3 920
bdc44640 921 CPU_FOREACH(cpu) {
182735ef 922 qemu_wait_io_event_common(cpu);
6cabe1f3 923 }
296af7c9
BS
924}
925
fd529e8f 926static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 927{
a98ae1d8 928 while (cpu_thread_is_idle(cpu)) {
f5c121b8 929 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 930 }
296af7c9 931
290adf38 932 qemu_kvm_eat_signals(cpu);
509a0d78 933 qemu_wait_io_event_common(cpu);
296af7c9
BS
934}
935
7e97cd88 936static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 937{
48a106bd 938 CPUState *cpu = arg;
84b4915d 939 int r;
296af7c9 940
6164e6d6 941 qemu_mutex_lock(&qemu_global_mutex);
814e612e 942 qemu_thread_get_self(cpu->thread);
9f09e18a 943 cpu->thread_id = qemu_get_thread_id();
e511b4d7 944 cpu->exception_index = -1;
626cf8f4 945 cpu->can_do_io = 1;
4917cf44 946 current_cpu = cpu;
296af7c9 947
504134d2 948 r = kvm_init_vcpu(cpu);
84b4915d
JK
949 if (r < 0) {
950 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
951 exit(1);
952 }
296af7c9 953
13618e05 954 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
955
956 /* signal CPU creation */
61a46217 957 cpu->created = true;
296af7c9
BS
958 qemu_cond_signal(&qemu_cpu_cond);
959
296af7c9 960 while (1) {
a1fcaa73 961 if (cpu_can_run(cpu)) {
1458c363 962 r = kvm_cpu_exec(cpu);
83f338f7 963 if (r == EXCP_DEBUG) {
91325046 964 cpu_handle_guest_debug(cpu);
83f338f7 965 }
0ab07c62 966 }
fd529e8f 967 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
968 }
969
970 return NULL;
971}
972
c7f0f3b1
AL
973static void *qemu_dummy_cpu_thread_fn(void *arg)
974{
975#ifdef _WIN32
976 fprintf(stderr, "qtest is not supported under Windows\n");
977 exit(1);
978#else
10a9021d 979 CPUState *cpu = arg;
c7f0f3b1
AL
980 sigset_t waitset;
981 int r;
982
983 qemu_mutex_lock_iothread();
814e612e 984 qemu_thread_get_self(cpu->thread);
9f09e18a 985 cpu->thread_id = qemu_get_thread_id();
e511b4d7 986 cpu->exception_index = -1;
626cf8f4 987 cpu->can_do_io = 1;
c7f0f3b1
AL
988
989 sigemptyset(&waitset);
990 sigaddset(&waitset, SIG_IPI);
991
992 /* signal CPU creation */
61a46217 993 cpu->created = true;
c7f0f3b1
AL
994 qemu_cond_signal(&qemu_cpu_cond);
995
4917cf44 996 current_cpu = cpu;
c7f0f3b1 997 while (1) {
4917cf44 998 current_cpu = NULL;
c7f0f3b1
AL
999 qemu_mutex_unlock_iothread();
1000 do {
1001 int sig;
1002 r = sigwait(&waitset, &sig);
1003 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1004 if (r == -1) {
1005 perror("sigwait");
1006 exit(1);
1007 }
1008 qemu_mutex_lock_iothread();
4917cf44 1009 current_cpu = cpu;
509a0d78 1010 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1011 }
1012
1013 return NULL;
1014#endif
1015}
1016
bdb7ca67
JK
1017static void tcg_exec_all(void);
1018
7e97cd88 1019static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1020{
c3586ba7 1021 CPUState *cpu = arg;
296af7c9 1022
55f8d6ac 1023 qemu_tcg_init_cpu_signals();
814e612e 1024 qemu_thread_get_self(cpu->thread);
296af7c9 1025
296af7c9 1026 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1027 CPU_FOREACH(cpu) {
1028 cpu->thread_id = qemu_get_thread_id();
1029 cpu->created = true;
e511b4d7 1030 cpu->exception_index = -1;
626cf8f4 1031 cpu->can_do_io = 1;
38fcbd3f 1032 }
296af7c9
BS
1033 qemu_cond_signal(&qemu_cpu_cond);
1034
fa7d1867 1035 /* wait for initial kick-off after machine start */
bdc44640 1036 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 1037 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1038
1039 /* process any pending work */
bdc44640 1040 CPU_FOREACH(cpu) {
182735ef 1041 qemu_wait_io_event_common(cpu);
8e564b4e 1042 }
0ab07c62 1043 }
296af7c9
BS
1044
1045 while (1) {
bdb7ca67 1046 tcg_exec_all();
ac70aafc
AB
1047
1048 if (use_icount) {
40daca54 1049 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1050
1051 if (deadline == 0) {
40daca54 1052 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1053 }
3b2319a3 1054 }
6cabe1f3 1055 qemu_tcg_wait_io_event();
296af7c9
BS
1056 }
1057
1058 return NULL;
1059}
1060
2ff09a40 1061static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1062{
1063#ifndef _WIN32
1064 int err;
1065
814e612e 1066 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1067 if (err) {
1068 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1069 exit(1);
1070 }
1071#else /* _WIN32 */
60e82579 1072 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1073 CONTEXT tcgContext;
1074
1075 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1076 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1077 GetLastError());
1078 exit(1);
1079 }
1080
1081 /* On multi-core systems, we are not sure that the thread is actually
1082 * suspended until we can get the context.
1083 */
1084 tcgContext.ContextFlags = CONTEXT_CONTROL;
1085 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1086 continue;
1087 }
1088
cc015e9a 1089 cpu_signal(0);
ed9164a3
OH
1090
1091 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1092 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1093 GetLastError());
1094 exit(1);
1095 }
cc015e9a
PB
1096 }
1097#endif
1098}
1099
c08d7424 1100void qemu_cpu_kick(CPUState *cpu)
296af7c9 1101{
f5c121b8 1102 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1103 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1104 qemu_cpu_kick_thread(cpu);
216fc9a4 1105 cpu->thread_kicked = true;
aa2c364b 1106 }
296af7c9
BS
1107}
1108
46d62fac 1109void qemu_cpu_kick_self(void)
296af7c9 1110{
b55c22c6 1111#ifndef _WIN32
4917cf44 1112 assert(current_cpu);
296af7c9 1113
4917cf44
AF
1114 if (!current_cpu->thread_kicked) {
1115 qemu_cpu_kick_thread(current_cpu);
1116 current_cpu->thread_kicked = true;
296af7c9 1117 }
b55c22c6
PB
1118#else
1119 abort();
1120#endif
296af7c9
BS
1121}
1122
60e82579 1123bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1124{
814e612e 1125 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1126}
1127
aa723c23
JQ
1128static bool qemu_in_vcpu_thread(void)
1129{
4917cf44 1130 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1131}
1132
296af7c9
BS
1133void qemu_mutex_lock_iothread(void)
1134{
c7f0f3b1 1135 if (!tcg_enabled()) {
296af7c9 1136 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1137 } else {
46daff13 1138 iothread_requesting_mutex = true;
1a28cac3 1139 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1140 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1141 qemu_mutex_lock(&qemu_global_mutex);
1142 }
46daff13
PB
1143 iothread_requesting_mutex = false;
1144 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1145 }
296af7c9
BS
1146}
1147
1148void qemu_mutex_unlock_iothread(void)
1149{
1150 qemu_mutex_unlock(&qemu_global_mutex);
1151}
1152
1153static int all_vcpus_paused(void)
1154{
bdc44640 1155 CPUState *cpu;
296af7c9 1156
bdc44640 1157 CPU_FOREACH(cpu) {
182735ef 1158 if (!cpu->stopped) {
296af7c9 1159 return 0;
0ab07c62 1160 }
296af7c9
BS
1161 }
1162
1163 return 1;
1164}
1165
1166void pause_all_vcpus(void)
1167{
bdc44640 1168 CPUState *cpu;
296af7c9 1169
40daca54 1170 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1171 CPU_FOREACH(cpu) {
182735ef
AF
1172 cpu->stop = true;
1173 qemu_cpu_kick(cpu);
296af7c9
BS
1174 }
1175
aa723c23 1176 if (qemu_in_vcpu_thread()) {
d798e974
JK
1177 cpu_stop_current();
1178 if (!kvm_enabled()) {
bdc44640 1179 CPU_FOREACH(cpu) {
182735ef
AF
1180 cpu->stop = false;
1181 cpu->stopped = true;
d798e974
JK
1182 }
1183 return;
1184 }
1185 }
1186
296af7c9 1187 while (!all_vcpus_paused()) {
be7d6c57 1188 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1189 CPU_FOREACH(cpu) {
182735ef 1190 qemu_cpu_kick(cpu);
296af7c9
BS
1191 }
1192 }
1193}
1194
2993683b
IM
1195void cpu_resume(CPUState *cpu)
1196{
1197 cpu->stop = false;
1198 cpu->stopped = false;
1199 qemu_cpu_kick(cpu);
1200}
1201
296af7c9
BS
1202void resume_all_vcpus(void)
1203{
bdc44640 1204 CPUState *cpu;
296af7c9 1205
40daca54 1206 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1207 CPU_FOREACH(cpu) {
182735ef 1208 cpu_resume(cpu);
296af7c9
BS
1209 }
1210}
1211
4900116e
DDAG
1212/* For temporary buffers for forming a name */
1213#define VCPU_THREAD_NAME_SIZE 16
1214
e5ab30a2 1215static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1216{
4900116e
DDAG
1217 char thread_name[VCPU_THREAD_NAME_SIZE];
1218
09daed84
EI
1219 tcg_cpu_address_space_init(cpu, cpu->as);
1220
296af7c9
BS
1221 /* share a single thread for all cpus with TCG */
1222 if (!tcg_cpu_thread) {
814e612e 1223 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1224 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1225 qemu_cond_init(cpu->halt_cond);
1226 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1227 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1228 cpu->cpu_index);
1229 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1230 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1231#ifdef _WIN32
814e612e 1232 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1233#endif
61a46217 1234 while (!cpu->created) {
18a85728 1235 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1236 }
814e612e 1237 tcg_cpu_thread = cpu->thread;
296af7c9 1238 } else {
814e612e 1239 cpu->thread = tcg_cpu_thread;
f5c121b8 1240 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1241 }
1242}
1243
48a106bd 1244static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1245{
4900116e
DDAG
1246 char thread_name[VCPU_THREAD_NAME_SIZE];
1247
814e612e 1248 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1249 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1250 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1251 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1252 cpu->cpu_index);
1253 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1254 cpu, QEMU_THREAD_JOINABLE);
61a46217 1255 while (!cpu->created) {
18a85728 1256 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1257 }
296af7c9
BS
1258}
1259
10a9021d 1260static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1261{
4900116e
DDAG
1262 char thread_name[VCPU_THREAD_NAME_SIZE];
1263
814e612e 1264 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1265 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1266 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1267 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1268 cpu->cpu_index);
1269 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1270 QEMU_THREAD_JOINABLE);
61a46217 1271 while (!cpu->created) {
c7f0f3b1
AL
1272 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1273 }
1274}
1275
c643bed9 1276void qemu_init_vcpu(CPUState *cpu)
296af7c9 1277{
ce3960eb
AF
1278 cpu->nr_cores = smp_cores;
1279 cpu->nr_threads = smp_threads;
f324e766 1280 cpu->stopped = true;
0ab07c62 1281 if (kvm_enabled()) {
48a106bd 1282 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1283 } else if (tcg_enabled()) {
e5ab30a2 1284 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1285 } else {
10a9021d 1286 qemu_dummy_start_vcpu(cpu);
0ab07c62 1287 }
296af7c9
BS
1288}
1289
b4a3d965 1290void cpu_stop_current(void)
296af7c9 1291{
4917cf44
AF
1292 if (current_cpu) {
1293 current_cpu->stop = false;
1294 current_cpu->stopped = true;
1295 cpu_exit(current_cpu);
67bb172f 1296 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1297 }
296af7c9
BS
1298}
1299
56983463 1300int vm_stop(RunState state)
296af7c9 1301{
aa723c23 1302 if (qemu_in_vcpu_thread()) {
74892d24 1303 qemu_system_vmstop_request_prepare();
1dfb4dd9 1304 qemu_system_vmstop_request(state);
296af7c9
BS
1305 /*
1306 * FIXME: should not return to device code in case
1307 * vm_stop() has been requested.
1308 */
b4a3d965 1309 cpu_stop_current();
56983463 1310 return 0;
296af7c9 1311 }
56983463
KW
1312
1313 return do_vm_stop(state);
296af7c9
BS
1314}
1315
8a9236f1
LC
1316/* does a state transition even if the VM is already stopped,
1317 current state is forgotten forever */
56983463 1318int vm_stop_force_state(RunState state)
8a9236f1
LC
1319{
1320 if (runstate_is_running()) {
56983463 1321 return vm_stop(state);
8a9236f1
LC
1322 } else {
1323 runstate_set(state);
594a45ce
KW
1324 /* Make sure to return an error if the flush in a previous vm_stop()
1325 * failed. */
1326 return bdrv_flush_all();
8a9236f1
LC
1327 }
1328}
1329
9349b4f9 1330static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1331{
efee7340 1332 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1333 int ret;
1334#ifdef CONFIG_PROFILER
1335 int64_t ti;
1336#endif
1337
1338#ifdef CONFIG_PROFILER
1339 ti = profile_getclock();
1340#endif
1341 if (use_icount) {
1342 int64_t count;
ac70aafc 1343 int64_t deadline;
296af7c9 1344 int decr;
c96778bb
FK
1345 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1346 + cpu->icount_extra);
28ecfd7a 1347 cpu->icount_decr.u16.low = 0;
efee7340 1348 cpu->icount_extra = 0;
40daca54 1349 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1350
1351 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1352 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1353 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1354 * nanoseconds.
1355 */
1356 if ((deadline < 0) || (deadline > INT32_MAX)) {
1357 deadline = INT32_MAX;
1358 }
1359
1360 count = qemu_icount_round(deadline);
c96778bb 1361 timers_state.qemu_icount += count;
296af7c9
BS
1362 decr = (count > 0xffff) ? 0xffff : count;
1363 count -= decr;
28ecfd7a 1364 cpu->icount_decr.u16.low = decr;
efee7340 1365 cpu->icount_extra = count;
296af7c9
BS
1366 }
1367 ret = cpu_exec(env);
1368#ifdef CONFIG_PROFILER
1369 qemu_time += profile_getclock() - ti;
1370#endif
1371 if (use_icount) {
1372 /* Fold pending instructions back into the
1373 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1374 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1375 + cpu->icount_extra);
28ecfd7a 1376 cpu->icount_decr.u32 = 0;
efee7340 1377 cpu->icount_extra = 0;
296af7c9
BS
1378 }
1379 return ret;
1380}
1381
bdb7ca67 1382static void tcg_exec_all(void)
296af7c9 1383{
9a36085b
JK
1384 int r;
1385
40daca54
AB
1386 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1387 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1388
0ab07c62 1389 if (next_cpu == NULL) {
296af7c9 1390 next_cpu = first_cpu;
0ab07c62 1391 }
bdc44640 1392 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1393 CPUState *cpu = next_cpu;
1394 CPUArchState *env = cpu->env_ptr;
296af7c9 1395
40daca54 1396 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1397 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1398
a1fcaa73 1399 if (cpu_can_run(cpu)) {
bdb7ca67 1400 r = tcg_cpu_exec(env);
9a36085b 1401 if (r == EXCP_DEBUG) {
91325046 1402 cpu_handle_guest_debug(cpu);
3c638d06
JK
1403 break;
1404 }
f324e766 1405 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1406 break;
1407 }
1408 }
c629a4bc 1409 exit_request = 0;
296af7c9
BS
1410}
1411
9a78eead 1412void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1413{
1414 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1415#if defined(cpu_list)
1416 cpu_list(f, cpu_fprintf);
262353cb
BS
1417#endif
1418}
de0b36b6
LC
1419
1420CpuInfoList *qmp_query_cpus(Error **errp)
1421{
1422 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1423 CPUState *cpu;
de0b36b6 1424
bdc44640 1425 CPU_FOREACH(cpu) {
de0b36b6 1426 CpuInfoList *info;
182735ef
AF
1427#if defined(TARGET_I386)
1428 X86CPU *x86_cpu = X86_CPU(cpu);
1429 CPUX86State *env = &x86_cpu->env;
1430#elif defined(TARGET_PPC)
1431 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1432 CPUPPCState *env = &ppc_cpu->env;
1433#elif defined(TARGET_SPARC)
1434 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1435 CPUSPARCState *env = &sparc_cpu->env;
1436#elif defined(TARGET_MIPS)
1437 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1438 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1439#elif defined(TARGET_TRICORE)
1440 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1441 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1442#endif
de0b36b6 1443
cb446eca 1444 cpu_synchronize_state(cpu);
de0b36b6
LC
1445
1446 info = g_malloc0(sizeof(*info));
1447 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1448 info->value->CPU = cpu->cpu_index;
182735ef 1449 info->value->current = (cpu == first_cpu);
259186a7 1450 info->value->halted = cpu->halted;
9f09e18a 1451 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1452#if defined(TARGET_I386)
1453 info->value->has_pc = true;
1454 info->value->pc = env->eip + env->segs[R_CS].base;
1455#elif defined(TARGET_PPC)
1456 info->value->has_nip = true;
1457 info->value->nip = env->nip;
1458#elif defined(TARGET_SPARC)
1459 info->value->has_pc = true;
1460 info->value->pc = env->pc;
1461 info->value->has_npc = true;
1462 info->value->npc = env->npc;
1463#elif defined(TARGET_MIPS)
1464 info->value->has_PC = true;
1465 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1466#elif defined(TARGET_TRICORE)
1467 info->value->has_PC = true;
1468 info->value->PC = env->PC;
de0b36b6
LC
1469#endif
1470
1471 /* XXX: waiting for the qapi to support GSList */
1472 if (!cur_item) {
1473 head = cur_item = info;
1474 } else {
1475 cur_item->next = info;
1476 cur_item = info;
1477 }
1478 }
1479
1480 return head;
1481}
0cfd6a9a
LC
1482
1483void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1484 bool has_cpu, int64_t cpu_index, Error **errp)
1485{
1486 FILE *f;
1487 uint32_t l;
55e5c285 1488 CPUState *cpu;
0cfd6a9a
LC
1489 uint8_t buf[1024];
1490
1491 if (!has_cpu) {
1492 cpu_index = 0;
1493 }
1494
151d1322
AF
1495 cpu = qemu_get_cpu(cpu_index);
1496 if (cpu == NULL) {
0cfd6a9a
LC
1497 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1498 "a CPU number");
1499 return;
1500 }
1501
1502 f = fopen(filename, "wb");
1503 if (!f) {
618da851 1504 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1505 return;
1506 }
1507
1508 while (size != 0) {
1509 l = sizeof(buf);
1510 if (l > size)
1511 l = size;
2f4d0f59
AK
1512 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1513 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1514 goto exit;
1515 }
0cfd6a9a
LC
1516 if (fwrite(buf, 1, l, f) != l) {
1517 error_set(errp, QERR_IO_ERROR);
1518 goto exit;
1519 }
1520 addr += l;
1521 size -= l;
1522 }
1523
1524exit:
1525 fclose(f);
1526}
6d3962bf
LC
1527
1528void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1529 Error **errp)
1530{
1531 FILE *f;
1532 uint32_t l;
1533 uint8_t buf[1024];
1534
1535 f = fopen(filename, "wb");
1536 if (!f) {
618da851 1537 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1538 return;
1539 }
1540
1541 while (size != 0) {
1542 l = sizeof(buf);
1543 if (l > size)
1544 l = size;
eb6282f2 1545 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1546 if (fwrite(buf, 1, l, f) != l) {
1547 error_set(errp, QERR_IO_ERROR);
1548 goto exit;
1549 }
1550 addr += l;
1551 size -= l;
1552 }
1553
1554exit:
1555 fclose(f);
1556}
ab49ab5c
LC
1557
1558void qmp_inject_nmi(Error **errp)
1559{
1560#if defined(TARGET_I386)
182735ef
AF
1561 CPUState *cs;
1562
bdc44640 1563 CPU_FOREACH(cs) {
182735ef 1564 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1565
02e51483 1566 if (!cpu->apic_state) {
182735ef 1567 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1568 } else {
02e51483 1569 apic_deliver_nmi(cpu->apic_state);
02c09195 1570 }
ab49ab5c
LC
1571 }
1572#else
9cb805fd 1573 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1574#endif
1575}
27498bef
ST
1576
1577void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1578{
1579 if (!use_icount) {
1580 return;
1581 }
1582
1583 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1584 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1585 if (icount_align_option) {
1586 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1587 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1588 } else {
1589 cpu_fprintf(f, "Max guest delay NA\n");
1590 cpu_fprintf(f, "Max guest advance NA\n");
1591 }
1592}