]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
xen-hvm: increase maxmem before calling xc_domain_populate_physmap
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
9cb805fd 43#include "hw/nmi.h"
0ff0fc19
JK
44
45#ifndef _WIN32
1de7afc9 46#include "qemu/compatfd.h"
0ff0fc19 47#endif
296af7c9 48
6d9cb73c
JK
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
c0532a76
MT
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
6d9cb73c
JK
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif /* CONFIG_LINUX */
66
182735ef 67static CPUState *next_cpu;
27498bef
ST
68int64_t max_delay;
69int64_t max_advance;
296af7c9 70
321bc0b2
TC
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
a98ae1d8 76static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 77{
c64ca814 78 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
79 return false;
80 }
321bc0b2 81 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
82 return true;
83 }
8c2e1b00 84 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 85 kvm_halt_in_kernel()) {
ac873f1e
PM
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
182735ef 93 CPUState *cpu;
ac873f1e 94
bdc44640 95 CPU_FOREACH(cpu) {
182735ef 96 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
97 return false;
98 }
99 }
100 return true;
101}
102
946fb27c
PB
103/***********************************************************/
104/* guest cycle counter */
105
a3270e19
PB
106/* Protected by TimersState seqlock */
107
71468395 108static int64_t vm_clock_warp_start = -1;
946fb27c
PB
109/* Conversion factor from emulated instructions to virtual clock ticks. */
110static int icount_time_shift;
111/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112#define MAX_ICOUNT_SHIFT 10
a3270e19 113
946fb27c
PB
114static QEMUTimer *icount_rt_timer;
115static QEMUTimer *icount_vm_timer;
116static QEMUTimer *icount_warp_timer;
946fb27c
PB
117
118typedef struct TimersState {
cb365646 119 /* Protected by BQL. */
946fb27c
PB
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
cb365646
LPF
122
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
125 */
126 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
c96778bb
FK
130
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
946fb27c
PB
135} TimersState;
136
d9cd4007 137static TimersState timers_state;
946fb27c 138
2a62914b 139int64_t cpu_get_icount_raw(void)
946fb27c
PB
140{
141 int64_t icount;
4917cf44 142 CPUState *cpu = current_cpu;
946fb27c 143
c96778bb 144 icount = timers_state.qemu_icount;
4917cf44 145 if (cpu) {
99df7dce 146 if (!cpu_can_do_io(cpu)) {
2a62914b
PD
147 fprintf(stderr, "Bad icount read\n");
148 exit(1);
946fb27c 149 }
28ecfd7a 150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 151 }
2a62914b
PD
152 return icount;
153}
154
155/* Return the virtual CPU time, based on the instruction counter. */
156static int64_t cpu_get_icount_locked(void)
157{
158 int64_t icount = cpu_get_icount_raw();
3f031313 159 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
160}
161
17a15f1b
PB
162int64_t cpu_get_icount(void)
163{
164 int64_t icount;
165 unsigned start;
166
167 do {
168 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
169 icount = cpu_get_icount_locked();
170 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
171
172 return icount;
173}
174
3f031313
FK
175int64_t cpu_icount_to_ns(int64_t icount)
176{
177 return icount << icount_time_shift;
178}
179
946fb27c 180/* return the host CPU cycle counter and handle stop/restart */
cb365646 181/* Caller must hold the BQL */
946fb27c
PB
182int64_t cpu_get_ticks(void)
183{
5f3e3101
PB
184 int64_t ticks;
185
946fb27c
PB
186 if (use_icount) {
187 return cpu_get_icount();
188 }
5f3e3101
PB
189
190 ticks = timers_state.cpu_ticks_offset;
191 if (timers_state.cpu_ticks_enabled) {
192 ticks += cpu_get_real_ticks();
193 }
194
195 if (timers_state.cpu_ticks_prev > ticks) {
196 /* Note: non increasing ticks may happen if the host uses
197 software suspend */
198 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
199 ticks = timers_state.cpu_ticks_prev;
946fb27c 200 }
5f3e3101
PB
201
202 timers_state.cpu_ticks_prev = ticks;
203 return ticks;
946fb27c
PB
204}
205
cb365646 206static int64_t cpu_get_clock_locked(void)
946fb27c 207{
5f3e3101 208 int64_t ticks;
cb365646 209
5f3e3101
PB
210 ticks = timers_state.cpu_clock_offset;
211 if (timers_state.cpu_ticks_enabled) {
212 ticks += get_clock();
946fb27c 213 }
cb365646 214
5f3e3101 215 return ticks;
cb365646
LPF
216}
217
218/* return the host CPU monotonic timer and handle stop/restart */
219int64_t cpu_get_clock(void)
220{
221 int64_t ti;
222 unsigned start;
223
224 do {
225 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
226 ti = cpu_get_clock_locked();
227 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
228
229 return ti;
946fb27c
PB
230}
231
c2aa5f81
ST
232/* return the offset between the host clock and virtual CPU clock */
233int64_t cpu_get_clock_offset(void)
234{
235 int64_t ti;
236 unsigned start;
237
238 do {
239 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
240 ti = timers_state.cpu_clock_offset;
241 if (!timers_state.cpu_ticks_enabled) {
242 ti -= get_clock();
243 }
244 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
245
246 return -ti;
247}
248
cb365646
LPF
249/* enable cpu_get_ticks()
250 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
251 */
946fb27c
PB
252void cpu_enable_ticks(void)
253{
cb365646
LPF
254 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
255 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
256 if (!timers_state.cpu_ticks_enabled) {
257 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
258 timers_state.cpu_clock_offset -= get_clock();
259 timers_state.cpu_ticks_enabled = 1;
260 }
cb365646 261 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
262}
263
264/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
265 * cpu_get_ticks() after that.
266 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
267 */
946fb27c
PB
268void cpu_disable_ticks(void)
269{
cb365646
LPF
270 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
271 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 272 if (timers_state.cpu_ticks_enabled) {
5f3e3101 273 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 274 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
275 timers_state.cpu_ticks_enabled = 0;
276 }
cb365646 277 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
278}
279
280/* Correlation between real and virtual time is always going to be
281 fairly approximate, so ignore small variation.
282 When the guest is idle real and virtual time will be aligned in
283 the IO wait loop. */
284#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
285
286static void icount_adjust(void)
287{
288 int64_t cur_time;
289 int64_t cur_icount;
290 int64_t delta;
a3270e19
PB
291
292 /* Protected by TimersState mutex. */
946fb27c 293 static int64_t last_delta;
468cc7cf 294
946fb27c
PB
295 /* If the VM is not running, then do nothing. */
296 if (!runstate_is_running()) {
297 return;
298 }
468cc7cf 299
17a15f1b
PB
300 seqlock_write_lock(&timers_state.vm_clock_seqlock);
301 cur_time = cpu_get_clock_locked();
302 cur_icount = cpu_get_icount_locked();
468cc7cf 303
946fb27c
PB
304 delta = cur_icount - cur_time;
305 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
306 if (delta > 0
307 && last_delta + ICOUNT_WOBBLE < delta * 2
308 && icount_time_shift > 0) {
309 /* The guest is getting too far ahead. Slow time down. */
310 icount_time_shift--;
311 }
312 if (delta < 0
313 && last_delta - ICOUNT_WOBBLE > delta * 2
314 && icount_time_shift < MAX_ICOUNT_SHIFT) {
315 /* The guest is getting too far behind. Speed time up. */
316 icount_time_shift++;
317 }
318 last_delta = delta;
c96778bb
FK
319 timers_state.qemu_icount_bias = cur_icount
320 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 321 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
322}
323
324static void icount_adjust_rt(void *opaque)
325{
40daca54
AB
326 timer_mod(icount_rt_timer,
327 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
946fb27c
PB
328 icount_adjust();
329}
330
331static void icount_adjust_vm(void *opaque)
332{
40daca54
AB
333 timer_mod(icount_vm_timer,
334 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
335 get_ticks_per_sec() / 10);
946fb27c
PB
336 icount_adjust();
337}
338
339static int64_t qemu_icount_round(int64_t count)
340{
341 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
342}
343
344static void icount_warp_rt(void *opaque)
345{
17a15f1b
PB
346 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
347 * changes from -1 to another value, so the race here is okay.
348 */
349 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
350 return;
351 }
352
17a15f1b 353 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 354 if (runstate_is_running()) {
bf2a7ddb 355 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
356 int64_t warp_delta;
357
358 warp_delta = clock - vm_clock_warp_start;
359 if (use_icount == 2) {
946fb27c 360 /*
40daca54 361 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
362 * far ahead of real time.
363 */
17a15f1b 364 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 365 int64_t delta = clock - cur_icount;
8ed961d9 366 warp_delta = MIN(warp_delta, delta);
946fb27c 367 }
c96778bb 368 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
369 }
370 vm_clock_warp_start = -1;
17a15f1b 371 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
372
373 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
374 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
375 }
946fb27c
PB
376}
377
8156be56
PB
378void qtest_clock_warp(int64_t dest)
379{
40daca54 380 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56
PB
381 assert(qtest_enabled());
382 while (clock < dest) {
40daca54 383 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 384 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
17a15f1b 385 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 386 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
387 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
388
40daca54
AB
389 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
390 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 391 }
40daca54 392 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
393}
394
40daca54 395void qemu_clock_warp(QEMUClockType type)
946fb27c 396{
ce78d18c 397 int64_t clock;
946fb27c
PB
398 int64_t deadline;
399
400 /*
401 * There are too many global variables to make the "warp" behavior
402 * applicable to other clocks. But a clock argument removes the
403 * need for if statements all over the place.
404 */
40daca54 405 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
406 return;
407 }
408
409 /*
40daca54
AB
410 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
411 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
412 * This also makes sure that the insn counter is synchronized before the
413 * CPU starts running, in case the CPU is woken by an event other than
40daca54 414 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
415 */
416 icount_warp_rt(NULL);
ce78d18c
PB
417 timer_del(icount_warp_timer);
418 if (!all_cpu_threads_idle()) {
946fb27c
PB
419 return;
420 }
421
8156be56
PB
422 if (qtest_enabled()) {
423 /* When testing, qtest commands advance icount. */
424 return;
425 }
426
ac70aafc 427 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 428 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 429 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
430 if (deadline < 0) {
431 return;
ac70aafc
AB
432 }
433
946fb27c
PB
434 if (deadline > 0) {
435 /*
40daca54 436 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
437 * sleep. Otherwise, the CPU might be waiting for a future timer
438 * interrupt to wake it up, but the interrupt never comes because
439 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 440 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
441 *
442 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
443 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
444 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
445 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
bf2a7ddb
PD
446 * after some "real" time, (related to the time left until the next
447 * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
40daca54
AB
448 * This avoids that the warps are visible externally; for example,
449 * you will not be sending network packets continuously instead of
450 * every 100ms.
946fb27c 451 */
17a15f1b 452 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
453 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
454 vm_clock_warp_start = clock;
455 }
17a15f1b 456 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 457 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 458 } else if (deadline == 0) {
40daca54 459 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
460 }
461}
462
d09eae37
FK
463static bool icount_state_needed(void *opaque)
464{
465 return use_icount;
466}
467
468/*
469 * This is a subsection for icount migration.
470 */
471static const VMStateDescription icount_vmstate_timers = {
472 .name = "timer/icount",
473 .version_id = 1,
474 .minimum_version_id = 1,
475 .fields = (VMStateField[]) {
476 VMSTATE_INT64(qemu_icount_bias, TimersState),
477 VMSTATE_INT64(qemu_icount, TimersState),
478 VMSTATE_END_OF_LIST()
479 }
480};
481
946fb27c
PB
482static const VMStateDescription vmstate_timers = {
483 .name = "timer",
484 .version_id = 2,
485 .minimum_version_id = 1,
35d08458 486 .fields = (VMStateField[]) {
946fb27c
PB
487 VMSTATE_INT64(cpu_ticks_offset, TimersState),
488 VMSTATE_INT64(dummy, TimersState),
489 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
490 VMSTATE_END_OF_LIST()
d09eae37
FK
491 },
492 .subsections = (VMStateSubsection[]) {
493 {
494 .vmsd = &icount_vmstate_timers,
495 .needed = icount_state_needed,
496 }, {
497 /* empty */
498 }
946fb27c
PB
499 }
500};
501
4603ea01
PD
502void cpu_ticks_init(void)
503{
504 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
505 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
506}
507
1ad9580b 508void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 509{
1ad9580b 510 const char *option;
a8bfac37 511 char *rem_str = NULL;
1ad9580b 512
1ad9580b 513 option = qemu_opt_get(opts, "shift");
946fb27c 514 if (!option) {
a8bfac37
ST
515 if (qemu_opt_get(opts, "align") != NULL) {
516 error_setg(errp, "Please specify shift option when using align");
517 }
946fb27c
PB
518 return;
519 }
a8bfac37 520 icount_align_option = qemu_opt_get_bool(opts, "align", false);
bf2a7ddb
PD
521 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
522 icount_warp_rt, NULL);
946fb27c 523 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
524 errno = 0;
525 icount_time_shift = strtol(option, &rem_str, 0);
526 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
527 error_setg(errp, "icount: Invalid shift value");
528 }
946fb27c
PB
529 use_icount = 1;
530 return;
a8bfac37
ST
531 } else if (icount_align_option) {
532 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
533 }
534
535 use_icount = 2;
536
537 /* 125MIPS seems a reasonable initial guess at the guest speed.
538 It will be corrected fairly quickly anyway. */
539 icount_time_shift = 3;
540
541 /* Have both realtime and virtual time triggers for speed adjustment.
542 The realtime trigger catches emulated time passing too slowly,
543 the virtual time trigger catches emulated time passing too fast.
544 Realtime triggers occur even when idle, so use them less frequently
545 than VM triggers. */
bf2a7ddb
PD
546 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
547 icount_adjust_rt, NULL);
40daca54 548 timer_mod(icount_rt_timer,
bf2a7ddb 549 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
550 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
551 icount_adjust_vm, NULL);
552 timer_mod(icount_vm_timer,
553 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
554 get_ticks_per_sec() / 10);
946fb27c
PB
555}
556
296af7c9
BS
557/***********************************************************/
558void hw_error(const char *fmt, ...)
559{
560 va_list ap;
55e5c285 561 CPUState *cpu;
296af7c9
BS
562
563 va_start(ap, fmt);
564 fprintf(stderr, "qemu: hardware error: ");
565 vfprintf(stderr, fmt, ap);
566 fprintf(stderr, "\n");
bdc44640 567 CPU_FOREACH(cpu) {
55e5c285 568 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 569 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
570 }
571 va_end(ap);
572 abort();
573}
574
575void cpu_synchronize_all_states(void)
576{
182735ef 577 CPUState *cpu;
296af7c9 578
bdc44640 579 CPU_FOREACH(cpu) {
182735ef 580 cpu_synchronize_state(cpu);
296af7c9
BS
581 }
582}
583
584void cpu_synchronize_all_post_reset(void)
585{
182735ef 586 CPUState *cpu;
296af7c9 587
bdc44640 588 CPU_FOREACH(cpu) {
182735ef 589 cpu_synchronize_post_reset(cpu);
296af7c9
BS
590 }
591}
592
593void cpu_synchronize_all_post_init(void)
594{
182735ef 595 CPUState *cpu;
296af7c9 596
bdc44640 597 CPU_FOREACH(cpu) {
182735ef 598 cpu_synchronize_post_init(cpu);
296af7c9
BS
599 }
600}
601
de9d61e8
MT
602void cpu_clean_all_dirty(void)
603{
604 CPUState *cpu;
605
606 CPU_FOREACH(cpu) {
607 cpu_clean_state(cpu);
608 }
609}
610
56983463 611static int do_vm_stop(RunState state)
296af7c9 612{
56983463
KW
613 int ret = 0;
614
1354869c 615 if (runstate_is_running()) {
296af7c9 616 cpu_disable_ticks();
296af7c9 617 pause_all_vcpus();
f5bbfba1 618 runstate_set(state);
1dfb4dd9 619 vm_state_notify(0, state);
a4e15de9 620 qapi_event_send_stop(&error_abort);
296af7c9 621 }
56983463 622
594a45ce
KW
623 bdrv_drain_all();
624 ret = bdrv_flush_all();
625
56983463 626 return ret;
296af7c9
BS
627}
628
a1fcaa73 629static bool cpu_can_run(CPUState *cpu)
296af7c9 630{
4fdeee7c 631 if (cpu->stop) {
a1fcaa73 632 return false;
0ab07c62 633 }
321bc0b2 634 if (cpu_is_stopped(cpu)) {
a1fcaa73 635 return false;
0ab07c62 636 }
a1fcaa73 637 return true;
296af7c9
BS
638}
639
91325046 640static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 641{
64f6b346 642 gdb_set_stop_cpu(cpu);
8cf71710 643 qemu_system_debug_request();
f324e766 644 cpu->stopped = true;
3c638d06
JK
645}
646
714bd040
PB
647static void cpu_signal(int sig)
648{
4917cf44
AF
649 if (current_cpu) {
650 cpu_exit(current_cpu);
714bd040
PB
651 }
652 exit_request = 1;
653}
714bd040 654
6d9cb73c
JK
655#ifdef CONFIG_LINUX
656static void sigbus_reraise(void)
657{
658 sigset_t set;
659 struct sigaction action;
660
661 memset(&action, 0, sizeof(action));
662 action.sa_handler = SIG_DFL;
663 if (!sigaction(SIGBUS, &action, NULL)) {
664 raise(SIGBUS);
665 sigemptyset(&set);
666 sigaddset(&set, SIGBUS);
667 sigprocmask(SIG_UNBLOCK, &set, NULL);
668 }
669 perror("Failed to re-raise SIGBUS!\n");
670 abort();
671}
672
673static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
674 void *ctx)
675{
676 if (kvm_on_sigbus(siginfo->ssi_code,
677 (void *)(intptr_t)siginfo->ssi_addr)) {
678 sigbus_reraise();
679 }
680}
681
682static void qemu_init_sigbus(void)
683{
684 struct sigaction action;
685
686 memset(&action, 0, sizeof(action));
687 action.sa_flags = SA_SIGINFO;
688 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
689 sigaction(SIGBUS, &action, NULL);
690
691 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
692}
693
290adf38 694static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
695{
696 struct timespec ts = { 0, 0 };
697 siginfo_t siginfo;
698 sigset_t waitset;
699 sigset_t chkset;
700 int r;
701
702 sigemptyset(&waitset);
703 sigaddset(&waitset, SIG_IPI);
704 sigaddset(&waitset, SIGBUS);
705
706 do {
707 r = sigtimedwait(&waitset, &siginfo, &ts);
708 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
709 perror("sigtimedwait");
710 exit(1);
711 }
712
713 switch (r) {
714 case SIGBUS:
290adf38 715 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
716 sigbus_reraise();
717 }
718 break;
719 default:
720 break;
721 }
722
723 r = sigpending(&chkset);
724 if (r == -1) {
725 perror("sigpending");
726 exit(1);
727 }
728 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
729}
730
6d9cb73c
JK
731#else /* !CONFIG_LINUX */
732
733static void qemu_init_sigbus(void)
734{
735}
1ab3c6c0 736
290adf38 737static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
738{
739}
6d9cb73c
JK
740#endif /* !CONFIG_LINUX */
741
296af7c9 742#ifndef _WIN32
55f8d6ac
JK
743static void dummy_signal(int sig)
744{
745}
55f8d6ac 746
13618e05 747static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
748{
749 int r;
750 sigset_t set;
751 struct sigaction sigact;
752
753 memset(&sigact, 0, sizeof(sigact));
754 sigact.sa_handler = dummy_signal;
755 sigaction(SIG_IPI, &sigact, NULL);
756
714bd040
PB
757 pthread_sigmask(SIG_BLOCK, NULL, &set);
758 sigdelset(&set, SIG_IPI);
714bd040 759 sigdelset(&set, SIGBUS);
491d6e80 760 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
761 if (r) {
762 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
763 exit(1);
764 }
765}
766
767static void qemu_tcg_init_cpu_signals(void)
768{
714bd040
PB
769 sigset_t set;
770 struct sigaction sigact;
771
772 memset(&sigact, 0, sizeof(sigact));
773 sigact.sa_handler = cpu_signal;
774 sigaction(SIG_IPI, &sigact, NULL);
775
776 sigemptyset(&set);
777 sigaddset(&set, SIG_IPI);
778 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
779}
780
55f8d6ac 781#else /* _WIN32 */
13618e05 782static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 783{
714bd040
PB
784 abort();
785}
ff48eb5f 786
714bd040
PB
787static void qemu_tcg_init_cpu_signals(void)
788{
ff48eb5f 789}
714bd040 790#endif /* _WIN32 */
ff48eb5f 791
b2532d88 792static QemuMutex qemu_global_mutex;
46daff13
PB
793static QemuCond qemu_io_proceeded_cond;
794static bool iothread_requesting_mutex;
296af7c9
BS
795
796static QemuThread io_thread;
797
798static QemuThread *tcg_cpu_thread;
799static QemuCond *tcg_halt_cond;
800
296af7c9
BS
801/* cpu creation */
802static QemuCond qemu_cpu_cond;
803/* system init */
296af7c9 804static QemuCond qemu_pause_cond;
e82bcec2 805static QemuCond qemu_work_cond;
296af7c9 806
d3b12f5d 807void qemu_init_cpu_loop(void)
296af7c9 808{
6d9cb73c 809 qemu_init_sigbus();
ed94592b 810 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
811 qemu_cond_init(&qemu_pause_cond);
812 qemu_cond_init(&qemu_work_cond);
46daff13 813 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 814 qemu_mutex_init(&qemu_global_mutex);
296af7c9 815
b7680cb6 816 qemu_thread_get_self(&io_thread);
296af7c9
BS
817}
818
f100f0b3 819void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
820{
821 struct qemu_work_item wi;
822
60e82579 823 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
824 func(data);
825 return;
826 }
827
828 wi.func = func;
829 wi.data = data;
3c02270d 830 wi.free = false;
c64ca814
AF
831 if (cpu->queued_work_first == NULL) {
832 cpu->queued_work_first = &wi;
0ab07c62 833 } else {
c64ca814 834 cpu->queued_work_last->next = &wi;
0ab07c62 835 }
c64ca814 836 cpu->queued_work_last = &wi;
e82bcec2
MT
837 wi.next = NULL;
838 wi.done = false;
839
c08d7424 840 qemu_cpu_kick(cpu);
e82bcec2 841 while (!wi.done) {
4917cf44 842 CPUState *self_cpu = current_cpu;
e82bcec2
MT
843
844 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 845 current_cpu = self_cpu;
e82bcec2
MT
846 }
847}
848
3c02270d
CV
849void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
850{
851 struct qemu_work_item *wi;
852
853 if (qemu_cpu_is_self(cpu)) {
854 func(data);
855 return;
856 }
857
858 wi = g_malloc0(sizeof(struct qemu_work_item));
859 wi->func = func;
860 wi->data = data;
861 wi->free = true;
862 if (cpu->queued_work_first == NULL) {
863 cpu->queued_work_first = wi;
864 } else {
865 cpu->queued_work_last->next = wi;
866 }
867 cpu->queued_work_last = wi;
868 wi->next = NULL;
869 wi->done = false;
870
871 qemu_cpu_kick(cpu);
872}
873
6d45b109 874static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
875{
876 struct qemu_work_item *wi;
877
c64ca814 878 if (cpu->queued_work_first == NULL) {
e82bcec2 879 return;
0ab07c62 880 }
e82bcec2 881
c64ca814
AF
882 while ((wi = cpu->queued_work_first)) {
883 cpu->queued_work_first = wi->next;
e82bcec2
MT
884 wi->func(wi->data);
885 wi->done = true;
3c02270d
CV
886 if (wi->free) {
887 g_free(wi);
888 }
e82bcec2 889 }
c64ca814 890 cpu->queued_work_last = NULL;
e82bcec2
MT
891 qemu_cond_broadcast(&qemu_work_cond);
892}
893
509a0d78 894static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 895{
4fdeee7c
AF
896 if (cpu->stop) {
897 cpu->stop = false;
f324e766 898 cpu->stopped = true;
296af7c9
BS
899 qemu_cond_signal(&qemu_pause_cond);
900 }
6d45b109 901 flush_queued_work(cpu);
216fc9a4 902 cpu->thread_kicked = false;
296af7c9
BS
903}
904
6cabe1f3 905static void qemu_tcg_wait_io_event(void)
296af7c9 906{
182735ef 907 CPUState *cpu;
6cabe1f3 908
16400322 909 while (all_cpu_threads_idle()) {
ab33fcda
PB
910 /* Start accounting real time to the virtual clock if the CPUs
911 are idle. */
40daca54 912 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 913 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 914 }
296af7c9 915
46daff13
PB
916 while (iothread_requesting_mutex) {
917 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
918 }
6cabe1f3 919
bdc44640 920 CPU_FOREACH(cpu) {
182735ef 921 qemu_wait_io_event_common(cpu);
6cabe1f3 922 }
296af7c9
BS
923}
924
fd529e8f 925static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 926{
a98ae1d8 927 while (cpu_thread_is_idle(cpu)) {
f5c121b8 928 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 929 }
296af7c9 930
290adf38 931 qemu_kvm_eat_signals(cpu);
509a0d78 932 qemu_wait_io_event_common(cpu);
296af7c9
BS
933}
934
7e97cd88 935static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 936{
48a106bd 937 CPUState *cpu = arg;
84b4915d 938 int r;
296af7c9 939
6164e6d6 940 qemu_mutex_lock(&qemu_global_mutex);
814e612e 941 qemu_thread_get_self(cpu->thread);
9f09e18a 942 cpu->thread_id = qemu_get_thread_id();
626cf8f4 943 cpu->can_do_io = 1;
4917cf44 944 current_cpu = cpu;
296af7c9 945
504134d2 946 r = kvm_init_vcpu(cpu);
84b4915d
JK
947 if (r < 0) {
948 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
949 exit(1);
950 }
296af7c9 951
13618e05 952 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
953
954 /* signal CPU creation */
61a46217 955 cpu->created = true;
296af7c9
BS
956 qemu_cond_signal(&qemu_cpu_cond);
957
296af7c9 958 while (1) {
a1fcaa73 959 if (cpu_can_run(cpu)) {
1458c363 960 r = kvm_cpu_exec(cpu);
83f338f7 961 if (r == EXCP_DEBUG) {
91325046 962 cpu_handle_guest_debug(cpu);
83f338f7 963 }
0ab07c62 964 }
fd529e8f 965 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
966 }
967
968 return NULL;
969}
970
c7f0f3b1
AL
971static void *qemu_dummy_cpu_thread_fn(void *arg)
972{
973#ifdef _WIN32
974 fprintf(stderr, "qtest is not supported under Windows\n");
975 exit(1);
976#else
10a9021d 977 CPUState *cpu = arg;
c7f0f3b1
AL
978 sigset_t waitset;
979 int r;
980
981 qemu_mutex_lock_iothread();
814e612e 982 qemu_thread_get_self(cpu->thread);
9f09e18a 983 cpu->thread_id = qemu_get_thread_id();
626cf8f4 984 cpu->can_do_io = 1;
c7f0f3b1
AL
985
986 sigemptyset(&waitset);
987 sigaddset(&waitset, SIG_IPI);
988
989 /* signal CPU creation */
61a46217 990 cpu->created = true;
c7f0f3b1
AL
991 qemu_cond_signal(&qemu_cpu_cond);
992
4917cf44 993 current_cpu = cpu;
c7f0f3b1 994 while (1) {
4917cf44 995 current_cpu = NULL;
c7f0f3b1
AL
996 qemu_mutex_unlock_iothread();
997 do {
998 int sig;
999 r = sigwait(&waitset, &sig);
1000 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1001 if (r == -1) {
1002 perror("sigwait");
1003 exit(1);
1004 }
1005 qemu_mutex_lock_iothread();
4917cf44 1006 current_cpu = cpu;
509a0d78 1007 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1008 }
1009
1010 return NULL;
1011#endif
1012}
1013
bdb7ca67
JK
1014static void tcg_exec_all(void);
1015
7e97cd88 1016static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1017{
c3586ba7 1018 CPUState *cpu = arg;
296af7c9 1019
55f8d6ac 1020 qemu_tcg_init_cpu_signals();
814e612e 1021 qemu_thread_get_self(cpu->thread);
296af7c9 1022
296af7c9 1023 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1024 CPU_FOREACH(cpu) {
1025 cpu->thread_id = qemu_get_thread_id();
1026 cpu->created = true;
626cf8f4 1027 cpu->can_do_io = 1;
38fcbd3f 1028 }
296af7c9
BS
1029 qemu_cond_signal(&qemu_cpu_cond);
1030
fa7d1867 1031 /* wait for initial kick-off after machine start */
bdc44640 1032 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 1033 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1034
1035 /* process any pending work */
bdc44640 1036 CPU_FOREACH(cpu) {
182735ef 1037 qemu_wait_io_event_common(cpu);
8e564b4e 1038 }
0ab07c62 1039 }
296af7c9
BS
1040
1041 while (1) {
bdb7ca67 1042 tcg_exec_all();
ac70aafc
AB
1043
1044 if (use_icount) {
40daca54 1045 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1046
1047 if (deadline == 0) {
40daca54 1048 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1049 }
3b2319a3 1050 }
6cabe1f3 1051 qemu_tcg_wait_io_event();
296af7c9
BS
1052 }
1053
1054 return NULL;
1055}
1056
2ff09a40 1057static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1058{
1059#ifndef _WIN32
1060 int err;
1061
814e612e 1062 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1063 if (err) {
1064 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1065 exit(1);
1066 }
1067#else /* _WIN32 */
60e82579 1068 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1069 CONTEXT tcgContext;
1070
1071 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1072 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1073 GetLastError());
1074 exit(1);
1075 }
1076
1077 /* On multi-core systems, we are not sure that the thread is actually
1078 * suspended until we can get the context.
1079 */
1080 tcgContext.ContextFlags = CONTEXT_CONTROL;
1081 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1082 continue;
1083 }
1084
cc015e9a 1085 cpu_signal(0);
ed9164a3
OH
1086
1087 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1088 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1089 GetLastError());
1090 exit(1);
1091 }
cc015e9a
PB
1092 }
1093#endif
1094}
1095
c08d7424 1096void qemu_cpu_kick(CPUState *cpu)
296af7c9 1097{
f5c121b8 1098 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1099 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1100 qemu_cpu_kick_thread(cpu);
216fc9a4 1101 cpu->thread_kicked = true;
aa2c364b 1102 }
296af7c9
BS
1103}
1104
46d62fac 1105void qemu_cpu_kick_self(void)
296af7c9 1106{
b55c22c6 1107#ifndef _WIN32
4917cf44 1108 assert(current_cpu);
296af7c9 1109
4917cf44
AF
1110 if (!current_cpu->thread_kicked) {
1111 qemu_cpu_kick_thread(current_cpu);
1112 current_cpu->thread_kicked = true;
296af7c9 1113 }
b55c22c6
PB
1114#else
1115 abort();
1116#endif
296af7c9
BS
1117}
1118
60e82579 1119bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1120{
814e612e 1121 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1122}
1123
aa723c23
JQ
1124static bool qemu_in_vcpu_thread(void)
1125{
4917cf44 1126 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1127}
1128
296af7c9
BS
1129void qemu_mutex_lock_iothread(void)
1130{
c7f0f3b1 1131 if (!tcg_enabled()) {
296af7c9 1132 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1133 } else {
46daff13 1134 iothread_requesting_mutex = true;
1a28cac3 1135 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1136 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1137 qemu_mutex_lock(&qemu_global_mutex);
1138 }
46daff13
PB
1139 iothread_requesting_mutex = false;
1140 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1141 }
296af7c9
BS
1142}
1143
1144void qemu_mutex_unlock_iothread(void)
1145{
1146 qemu_mutex_unlock(&qemu_global_mutex);
1147}
1148
1149static int all_vcpus_paused(void)
1150{
bdc44640 1151 CPUState *cpu;
296af7c9 1152
bdc44640 1153 CPU_FOREACH(cpu) {
182735ef 1154 if (!cpu->stopped) {
296af7c9 1155 return 0;
0ab07c62 1156 }
296af7c9
BS
1157 }
1158
1159 return 1;
1160}
1161
1162void pause_all_vcpus(void)
1163{
bdc44640 1164 CPUState *cpu;
296af7c9 1165
40daca54 1166 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1167 CPU_FOREACH(cpu) {
182735ef
AF
1168 cpu->stop = true;
1169 qemu_cpu_kick(cpu);
296af7c9
BS
1170 }
1171
aa723c23 1172 if (qemu_in_vcpu_thread()) {
d798e974
JK
1173 cpu_stop_current();
1174 if (!kvm_enabled()) {
bdc44640 1175 CPU_FOREACH(cpu) {
182735ef
AF
1176 cpu->stop = false;
1177 cpu->stopped = true;
d798e974
JK
1178 }
1179 return;
1180 }
1181 }
1182
296af7c9 1183 while (!all_vcpus_paused()) {
be7d6c57 1184 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1185 CPU_FOREACH(cpu) {
182735ef 1186 qemu_cpu_kick(cpu);
296af7c9
BS
1187 }
1188 }
1189}
1190
2993683b
IM
1191void cpu_resume(CPUState *cpu)
1192{
1193 cpu->stop = false;
1194 cpu->stopped = false;
1195 qemu_cpu_kick(cpu);
1196}
1197
296af7c9
BS
1198void resume_all_vcpus(void)
1199{
bdc44640 1200 CPUState *cpu;
296af7c9 1201
40daca54 1202 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1203 CPU_FOREACH(cpu) {
182735ef 1204 cpu_resume(cpu);
296af7c9
BS
1205 }
1206}
1207
4900116e
DDAG
1208/* For temporary buffers for forming a name */
1209#define VCPU_THREAD_NAME_SIZE 16
1210
e5ab30a2 1211static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1212{
4900116e
DDAG
1213 char thread_name[VCPU_THREAD_NAME_SIZE];
1214
09daed84
EI
1215 tcg_cpu_address_space_init(cpu, cpu->as);
1216
296af7c9
BS
1217 /* share a single thread for all cpus with TCG */
1218 if (!tcg_cpu_thread) {
814e612e 1219 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1220 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1221 qemu_cond_init(cpu->halt_cond);
1222 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1223 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1224 cpu->cpu_index);
1225 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1226 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1227#ifdef _WIN32
814e612e 1228 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1229#endif
61a46217 1230 while (!cpu->created) {
18a85728 1231 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1232 }
814e612e 1233 tcg_cpu_thread = cpu->thread;
296af7c9 1234 } else {
814e612e 1235 cpu->thread = tcg_cpu_thread;
f5c121b8 1236 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1237 }
1238}
1239
48a106bd 1240static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1241{
4900116e
DDAG
1242 char thread_name[VCPU_THREAD_NAME_SIZE];
1243
814e612e 1244 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1245 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1246 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1247 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1248 cpu->cpu_index);
1249 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1250 cpu, QEMU_THREAD_JOINABLE);
61a46217 1251 while (!cpu->created) {
18a85728 1252 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1253 }
296af7c9
BS
1254}
1255
10a9021d 1256static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1257{
4900116e
DDAG
1258 char thread_name[VCPU_THREAD_NAME_SIZE];
1259
814e612e 1260 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1261 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1262 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1263 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1264 cpu->cpu_index);
1265 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1266 QEMU_THREAD_JOINABLE);
61a46217 1267 while (!cpu->created) {
c7f0f3b1
AL
1268 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1269 }
1270}
1271
c643bed9 1272void qemu_init_vcpu(CPUState *cpu)
296af7c9 1273{
ce3960eb
AF
1274 cpu->nr_cores = smp_cores;
1275 cpu->nr_threads = smp_threads;
f324e766 1276 cpu->stopped = true;
0ab07c62 1277 if (kvm_enabled()) {
48a106bd 1278 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1279 } else if (tcg_enabled()) {
e5ab30a2 1280 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1281 } else {
10a9021d 1282 qemu_dummy_start_vcpu(cpu);
0ab07c62 1283 }
296af7c9
BS
1284}
1285
b4a3d965 1286void cpu_stop_current(void)
296af7c9 1287{
4917cf44
AF
1288 if (current_cpu) {
1289 current_cpu->stop = false;
1290 current_cpu->stopped = true;
1291 cpu_exit(current_cpu);
67bb172f 1292 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1293 }
296af7c9
BS
1294}
1295
56983463 1296int vm_stop(RunState state)
296af7c9 1297{
aa723c23 1298 if (qemu_in_vcpu_thread()) {
74892d24 1299 qemu_system_vmstop_request_prepare();
1dfb4dd9 1300 qemu_system_vmstop_request(state);
296af7c9
BS
1301 /*
1302 * FIXME: should not return to device code in case
1303 * vm_stop() has been requested.
1304 */
b4a3d965 1305 cpu_stop_current();
56983463 1306 return 0;
296af7c9 1307 }
56983463
KW
1308
1309 return do_vm_stop(state);
296af7c9
BS
1310}
1311
8a9236f1
LC
1312/* does a state transition even if the VM is already stopped,
1313 current state is forgotten forever */
56983463 1314int vm_stop_force_state(RunState state)
8a9236f1
LC
1315{
1316 if (runstate_is_running()) {
56983463 1317 return vm_stop(state);
8a9236f1
LC
1318 } else {
1319 runstate_set(state);
594a45ce
KW
1320 /* Make sure to return an error if the flush in a previous vm_stop()
1321 * failed. */
1322 return bdrv_flush_all();
8a9236f1
LC
1323 }
1324}
1325
9349b4f9 1326static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1327{
efee7340 1328 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1329 int ret;
1330#ifdef CONFIG_PROFILER
1331 int64_t ti;
1332#endif
1333
1334#ifdef CONFIG_PROFILER
1335 ti = profile_getclock();
1336#endif
1337 if (use_icount) {
1338 int64_t count;
ac70aafc 1339 int64_t deadline;
296af7c9 1340 int decr;
c96778bb
FK
1341 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1342 + cpu->icount_extra);
28ecfd7a 1343 cpu->icount_decr.u16.low = 0;
efee7340 1344 cpu->icount_extra = 0;
40daca54 1345 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1346
1347 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1348 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1349 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1350 * nanoseconds.
1351 */
1352 if ((deadline < 0) || (deadline > INT32_MAX)) {
1353 deadline = INT32_MAX;
1354 }
1355
1356 count = qemu_icount_round(deadline);
c96778bb 1357 timers_state.qemu_icount += count;
296af7c9
BS
1358 decr = (count > 0xffff) ? 0xffff : count;
1359 count -= decr;
28ecfd7a 1360 cpu->icount_decr.u16.low = decr;
efee7340 1361 cpu->icount_extra = count;
296af7c9
BS
1362 }
1363 ret = cpu_exec(env);
1364#ifdef CONFIG_PROFILER
1365 qemu_time += profile_getclock() - ti;
1366#endif
1367 if (use_icount) {
1368 /* Fold pending instructions back into the
1369 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1370 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1371 + cpu->icount_extra);
28ecfd7a 1372 cpu->icount_decr.u32 = 0;
efee7340 1373 cpu->icount_extra = 0;
296af7c9
BS
1374 }
1375 return ret;
1376}
1377
bdb7ca67 1378static void tcg_exec_all(void)
296af7c9 1379{
9a36085b
JK
1380 int r;
1381
40daca54
AB
1382 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1383 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1384
0ab07c62 1385 if (next_cpu == NULL) {
296af7c9 1386 next_cpu = first_cpu;
0ab07c62 1387 }
bdc44640 1388 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1389 CPUState *cpu = next_cpu;
1390 CPUArchState *env = cpu->env_ptr;
296af7c9 1391
40daca54 1392 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1393 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1394
a1fcaa73 1395 if (cpu_can_run(cpu)) {
bdb7ca67 1396 r = tcg_cpu_exec(env);
9a36085b 1397 if (r == EXCP_DEBUG) {
91325046 1398 cpu_handle_guest_debug(cpu);
3c638d06
JK
1399 break;
1400 }
f324e766 1401 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1402 break;
1403 }
1404 }
c629a4bc 1405 exit_request = 0;
296af7c9
BS
1406}
1407
9a78eead 1408void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1409{
1410 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1411#if defined(cpu_list)
1412 cpu_list(f, cpu_fprintf);
262353cb
BS
1413#endif
1414}
de0b36b6
LC
1415
1416CpuInfoList *qmp_query_cpus(Error **errp)
1417{
1418 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1419 CPUState *cpu;
de0b36b6 1420
bdc44640 1421 CPU_FOREACH(cpu) {
de0b36b6 1422 CpuInfoList *info;
182735ef
AF
1423#if defined(TARGET_I386)
1424 X86CPU *x86_cpu = X86_CPU(cpu);
1425 CPUX86State *env = &x86_cpu->env;
1426#elif defined(TARGET_PPC)
1427 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1428 CPUPPCState *env = &ppc_cpu->env;
1429#elif defined(TARGET_SPARC)
1430 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1431 CPUSPARCState *env = &sparc_cpu->env;
1432#elif defined(TARGET_MIPS)
1433 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1434 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1435#elif defined(TARGET_TRICORE)
1436 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1437 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1438#endif
de0b36b6 1439
cb446eca 1440 cpu_synchronize_state(cpu);
de0b36b6
LC
1441
1442 info = g_malloc0(sizeof(*info));
1443 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1444 info->value->CPU = cpu->cpu_index;
182735ef 1445 info->value->current = (cpu == first_cpu);
259186a7 1446 info->value->halted = cpu->halted;
9f09e18a 1447 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1448#if defined(TARGET_I386)
1449 info->value->has_pc = true;
1450 info->value->pc = env->eip + env->segs[R_CS].base;
1451#elif defined(TARGET_PPC)
1452 info->value->has_nip = true;
1453 info->value->nip = env->nip;
1454#elif defined(TARGET_SPARC)
1455 info->value->has_pc = true;
1456 info->value->pc = env->pc;
1457 info->value->has_npc = true;
1458 info->value->npc = env->npc;
1459#elif defined(TARGET_MIPS)
1460 info->value->has_PC = true;
1461 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1462#elif defined(TARGET_TRICORE)
1463 info->value->has_PC = true;
1464 info->value->PC = env->PC;
de0b36b6
LC
1465#endif
1466
1467 /* XXX: waiting for the qapi to support GSList */
1468 if (!cur_item) {
1469 head = cur_item = info;
1470 } else {
1471 cur_item->next = info;
1472 cur_item = info;
1473 }
1474 }
1475
1476 return head;
1477}
0cfd6a9a
LC
1478
1479void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1480 bool has_cpu, int64_t cpu_index, Error **errp)
1481{
1482 FILE *f;
1483 uint32_t l;
55e5c285 1484 CPUState *cpu;
0cfd6a9a
LC
1485 uint8_t buf[1024];
1486
1487 if (!has_cpu) {
1488 cpu_index = 0;
1489 }
1490
151d1322
AF
1491 cpu = qemu_get_cpu(cpu_index);
1492 if (cpu == NULL) {
0cfd6a9a
LC
1493 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1494 "a CPU number");
1495 return;
1496 }
1497
1498 f = fopen(filename, "wb");
1499 if (!f) {
618da851 1500 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1501 return;
1502 }
1503
1504 while (size != 0) {
1505 l = sizeof(buf);
1506 if (l > size)
1507 l = size;
2f4d0f59
AK
1508 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1509 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1510 goto exit;
1511 }
0cfd6a9a
LC
1512 if (fwrite(buf, 1, l, f) != l) {
1513 error_set(errp, QERR_IO_ERROR);
1514 goto exit;
1515 }
1516 addr += l;
1517 size -= l;
1518 }
1519
1520exit:
1521 fclose(f);
1522}
6d3962bf
LC
1523
1524void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1525 Error **errp)
1526{
1527 FILE *f;
1528 uint32_t l;
1529 uint8_t buf[1024];
1530
1531 f = fopen(filename, "wb");
1532 if (!f) {
618da851 1533 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1534 return;
1535 }
1536
1537 while (size != 0) {
1538 l = sizeof(buf);
1539 if (l > size)
1540 l = size;
eb6282f2 1541 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1542 if (fwrite(buf, 1, l, f) != l) {
1543 error_set(errp, QERR_IO_ERROR);
1544 goto exit;
1545 }
1546 addr += l;
1547 size -= l;
1548 }
1549
1550exit:
1551 fclose(f);
1552}
ab49ab5c
LC
1553
1554void qmp_inject_nmi(Error **errp)
1555{
1556#if defined(TARGET_I386)
182735ef
AF
1557 CPUState *cs;
1558
bdc44640 1559 CPU_FOREACH(cs) {
182735ef 1560 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1561
02e51483 1562 if (!cpu->apic_state) {
182735ef 1563 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1564 } else {
02e51483 1565 apic_deliver_nmi(cpu->apic_state);
02c09195 1566 }
ab49ab5c
LC
1567 }
1568#else
9cb805fd 1569 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1570#endif
1571}
27498bef
ST
1572
1573void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1574{
1575 if (!use_icount) {
1576 return;
1577 }
1578
1579 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1580 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1581 if (icount_align_option) {
1582 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1583 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1584 } else {
1585 cpu_fprintf(f, "Max guest delay NA\n");
1586 cpu_fprintf(f, "Max guest advance NA\n");
1587 }
1588}