]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
PPC: e500: Support dynamically spawned sysbus devices
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
9cb805fd 43#include "hw/nmi.h"
0ff0fc19
JK
44
45#ifndef _WIN32
1de7afc9 46#include "qemu/compatfd.h"
0ff0fc19 47#endif
296af7c9 48
6d9cb73c
JK
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
c0532a76
MT
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
6d9cb73c
JK
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif /* CONFIG_LINUX */
66
182735ef 67static CPUState *next_cpu;
27498bef
ST
68int64_t max_delay;
69int64_t max_advance;
296af7c9 70
321bc0b2
TC
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
a98ae1d8 76static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 77{
c64ca814 78 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
79 return false;
80 }
321bc0b2 81 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
82 return true;
83 }
8c2e1b00 84 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 85 kvm_halt_in_kernel()) {
ac873f1e
PM
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
182735ef 93 CPUState *cpu;
ac873f1e 94
bdc44640 95 CPU_FOREACH(cpu) {
182735ef 96 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
97 return false;
98 }
99 }
100 return true;
101}
102
946fb27c
PB
103/***********************************************************/
104/* guest cycle counter */
105
a3270e19
PB
106/* Protected by TimersState seqlock */
107
71468395 108static int64_t vm_clock_warp_start = -1;
946fb27c
PB
109/* Conversion factor from emulated instructions to virtual clock ticks. */
110static int icount_time_shift;
111/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112#define MAX_ICOUNT_SHIFT 10
a3270e19 113
946fb27c
PB
114static QEMUTimer *icount_rt_timer;
115static QEMUTimer *icount_vm_timer;
116static QEMUTimer *icount_warp_timer;
946fb27c
PB
117
118typedef struct TimersState {
cb365646 119 /* Protected by BQL. */
946fb27c
PB
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
cb365646
LPF
122
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
125 */
126 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
c96778bb
FK
130
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
946fb27c
PB
135} TimersState;
136
d9cd4007 137static TimersState timers_state;
946fb27c
PB
138
139/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b 140static int64_t cpu_get_icount_locked(void)
946fb27c
PB
141{
142 int64_t icount;
4917cf44 143 CPUState *cpu = current_cpu;
946fb27c 144
c96778bb 145 icount = timers_state.qemu_icount;
4917cf44 146 if (cpu) {
99df7dce 147 if (!cpu_can_do_io(cpu)) {
946fb27c
PB
148 fprintf(stderr, "Bad clock read\n");
149 }
28ecfd7a 150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 151 }
3f031313 152 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
153}
154
17a15f1b
PB
155int64_t cpu_get_icount(void)
156{
157 int64_t icount;
158 unsigned start;
159
160 do {
161 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
162 icount = cpu_get_icount_locked();
163 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
164
165 return icount;
166}
167
3f031313
FK
168int64_t cpu_icount_to_ns(int64_t icount)
169{
170 return icount << icount_time_shift;
171}
172
946fb27c 173/* return the host CPU cycle counter and handle stop/restart */
cb365646 174/* Caller must hold the BQL */
946fb27c
PB
175int64_t cpu_get_ticks(void)
176{
5f3e3101
PB
177 int64_t ticks;
178
946fb27c
PB
179 if (use_icount) {
180 return cpu_get_icount();
181 }
5f3e3101
PB
182
183 ticks = timers_state.cpu_ticks_offset;
184 if (timers_state.cpu_ticks_enabled) {
185 ticks += cpu_get_real_ticks();
186 }
187
188 if (timers_state.cpu_ticks_prev > ticks) {
189 /* Note: non increasing ticks may happen if the host uses
190 software suspend */
191 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
192 ticks = timers_state.cpu_ticks_prev;
946fb27c 193 }
5f3e3101
PB
194
195 timers_state.cpu_ticks_prev = ticks;
196 return ticks;
946fb27c
PB
197}
198
cb365646 199static int64_t cpu_get_clock_locked(void)
946fb27c 200{
5f3e3101 201 int64_t ticks;
cb365646 202
5f3e3101
PB
203 ticks = timers_state.cpu_clock_offset;
204 if (timers_state.cpu_ticks_enabled) {
205 ticks += get_clock();
946fb27c 206 }
cb365646 207
5f3e3101 208 return ticks;
cb365646
LPF
209}
210
211/* return the host CPU monotonic timer and handle stop/restart */
212int64_t cpu_get_clock(void)
213{
214 int64_t ti;
215 unsigned start;
216
217 do {
218 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
219 ti = cpu_get_clock_locked();
220 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
221
222 return ti;
946fb27c
PB
223}
224
c2aa5f81
ST
225/* return the offset between the host clock and virtual CPU clock */
226int64_t cpu_get_clock_offset(void)
227{
228 int64_t ti;
229 unsigned start;
230
231 do {
232 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
233 ti = timers_state.cpu_clock_offset;
234 if (!timers_state.cpu_ticks_enabled) {
235 ti -= get_clock();
236 }
237 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
238
239 return -ti;
240}
241
cb365646
LPF
242/* enable cpu_get_ticks()
243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
244 */
946fb27c
PB
245void cpu_enable_ticks(void)
246{
cb365646
LPF
247 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
248 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
249 if (!timers_state.cpu_ticks_enabled) {
250 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
251 timers_state.cpu_clock_offset -= get_clock();
252 timers_state.cpu_ticks_enabled = 1;
253 }
cb365646 254 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
255}
256
257/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
258 * cpu_get_ticks() after that.
259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
260 */
946fb27c
PB
261void cpu_disable_ticks(void)
262{
cb365646
LPF
263 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
264 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 265 if (timers_state.cpu_ticks_enabled) {
5f3e3101 266 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 267 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
268 timers_state.cpu_ticks_enabled = 0;
269 }
cb365646 270 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
271}
272
273/* Correlation between real and virtual time is always going to be
274 fairly approximate, so ignore small variation.
275 When the guest is idle real and virtual time will be aligned in
276 the IO wait loop. */
277#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
278
279static void icount_adjust(void)
280{
281 int64_t cur_time;
282 int64_t cur_icount;
283 int64_t delta;
a3270e19
PB
284
285 /* Protected by TimersState mutex. */
946fb27c 286 static int64_t last_delta;
468cc7cf 287
946fb27c
PB
288 /* If the VM is not running, then do nothing. */
289 if (!runstate_is_running()) {
290 return;
291 }
468cc7cf 292
17a15f1b
PB
293 seqlock_write_lock(&timers_state.vm_clock_seqlock);
294 cur_time = cpu_get_clock_locked();
295 cur_icount = cpu_get_icount_locked();
468cc7cf 296
946fb27c
PB
297 delta = cur_icount - cur_time;
298 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
299 if (delta > 0
300 && last_delta + ICOUNT_WOBBLE < delta * 2
301 && icount_time_shift > 0) {
302 /* The guest is getting too far ahead. Slow time down. */
303 icount_time_shift--;
304 }
305 if (delta < 0
306 && last_delta - ICOUNT_WOBBLE > delta * 2
307 && icount_time_shift < MAX_ICOUNT_SHIFT) {
308 /* The guest is getting too far behind. Speed time up. */
309 icount_time_shift++;
310 }
311 last_delta = delta;
c96778bb
FK
312 timers_state.qemu_icount_bias = cur_icount
313 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 314 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
315}
316
317static void icount_adjust_rt(void *opaque)
318{
40daca54
AB
319 timer_mod(icount_rt_timer,
320 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
946fb27c
PB
321 icount_adjust();
322}
323
324static void icount_adjust_vm(void *opaque)
325{
40daca54
AB
326 timer_mod(icount_vm_timer,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328 get_ticks_per_sec() / 10);
946fb27c
PB
329 icount_adjust();
330}
331
332static int64_t qemu_icount_round(int64_t count)
333{
334 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
335}
336
337static void icount_warp_rt(void *opaque)
338{
17a15f1b
PB
339 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
340 * changes from -1 to another value, so the race here is okay.
341 */
342 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
343 return;
344 }
345
17a15f1b 346 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 347 if (runstate_is_running()) {
40daca54 348 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
8ed961d9
PB
349 int64_t warp_delta;
350
351 warp_delta = clock - vm_clock_warp_start;
352 if (use_icount == 2) {
946fb27c 353 /*
40daca54 354 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
355 * far ahead of real time.
356 */
17a15f1b
PB
357 int64_t cur_time = cpu_get_clock_locked();
358 int64_t cur_icount = cpu_get_icount_locked();
946fb27c 359 int64_t delta = cur_time - cur_icount;
8ed961d9 360 warp_delta = MIN(warp_delta, delta);
946fb27c 361 }
c96778bb 362 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
363 }
364 vm_clock_warp_start = -1;
17a15f1b 365 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
366
367 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369 }
946fb27c
PB
370}
371
8156be56
PB
372void qtest_clock_warp(int64_t dest)
373{
40daca54 374 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56
PB
375 assert(qtest_enabled());
376 while (clock < dest) {
40daca54 377 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 378 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
17a15f1b 379 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 380 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
381 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
382
40daca54
AB
383 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
384 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 385 }
40daca54 386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
387}
388
40daca54 389void qemu_clock_warp(QEMUClockType type)
946fb27c 390{
ce78d18c 391 int64_t clock;
946fb27c
PB
392 int64_t deadline;
393
394 /*
395 * There are too many global variables to make the "warp" behavior
396 * applicable to other clocks. But a clock argument removes the
397 * need for if statements all over the place.
398 */
40daca54 399 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
400 return;
401 }
402
403 /*
40daca54
AB
404 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
405 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
406 * This also makes sure that the insn counter is synchronized before the
407 * CPU starts running, in case the CPU is woken by an event other than
40daca54 408 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
409 */
410 icount_warp_rt(NULL);
ce78d18c
PB
411 timer_del(icount_warp_timer);
412 if (!all_cpu_threads_idle()) {
946fb27c
PB
413 return;
414 }
415
8156be56
PB
416 if (qtest_enabled()) {
417 /* When testing, qtest commands advance icount. */
418 return;
419 }
420
ac70aafc 421 /* We want to use the earliest deadline from ALL vm_clocks */
ce78d18c 422 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
40daca54 423 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
424 if (deadline < 0) {
425 return;
ac70aafc
AB
426 }
427
946fb27c
PB
428 if (deadline > 0) {
429 /*
40daca54 430 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
431 * sleep. Otherwise, the CPU might be waiting for a future timer
432 * interrupt to wake it up, but the interrupt never comes because
433 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 434 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
435 *
436 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
437 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
438 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
439 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
440 * after some e"real" time, (related to the time left until the next
441 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
442 * This avoids that the warps are visible externally; for example,
443 * you will not be sending network packets continuously instead of
444 * every 100ms.
946fb27c 445 */
17a15f1b 446 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
447 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
448 vm_clock_warp_start = clock;
449 }
17a15f1b 450 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 451 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 452 } else if (deadline == 0) {
40daca54 453 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
454 }
455}
456
d09eae37
FK
457static bool icount_state_needed(void *opaque)
458{
459 return use_icount;
460}
461
462/*
463 * This is a subsection for icount migration.
464 */
465static const VMStateDescription icount_vmstate_timers = {
466 .name = "timer/icount",
467 .version_id = 1,
468 .minimum_version_id = 1,
469 .fields = (VMStateField[]) {
470 VMSTATE_INT64(qemu_icount_bias, TimersState),
471 VMSTATE_INT64(qemu_icount, TimersState),
472 VMSTATE_END_OF_LIST()
473 }
474};
475
946fb27c
PB
476static const VMStateDescription vmstate_timers = {
477 .name = "timer",
478 .version_id = 2,
479 .minimum_version_id = 1,
35d08458 480 .fields = (VMStateField[]) {
946fb27c
PB
481 VMSTATE_INT64(cpu_ticks_offset, TimersState),
482 VMSTATE_INT64(dummy, TimersState),
483 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
484 VMSTATE_END_OF_LIST()
d09eae37
FK
485 },
486 .subsections = (VMStateSubsection[]) {
487 {
488 .vmsd = &icount_vmstate_timers,
489 .needed = icount_state_needed,
490 }, {
491 /* empty */
492 }
946fb27c
PB
493 }
494};
495
4603ea01
PD
496void cpu_ticks_init(void)
497{
498 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
499 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
500}
501
1ad9580b 502void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 503{
1ad9580b 504 const char *option;
a8bfac37 505 char *rem_str = NULL;
1ad9580b 506
1ad9580b 507 option = qemu_opt_get(opts, "shift");
946fb27c 508 if (!option) {
a8bfac37
ST
509 if (qemu_opt_get(opts, "align") != NULL) {
510 error_setg(errp, "Please specify shift option when using align");
511 }
946fb27c
PB
512 return;
513 }
a8bfac37 514 icount_align_option = qemu_opt_get_bool(opts, "align", false);
40daca54
AB
515 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
516 icount_warp_rt, NULL);
946fb27c 517 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
518 errno = 0;
519 icount_time_shift = strtol(option, &rem_str, 0);
520 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
521 error_setg(errp, "icount: Invalid shift value");
522 }
946fb27c
PB
523 use_icount = 1;
524 return;
a8bfac37
ST
525 } else if (icount_align_option) {
526 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
527 }
528
529 use_icount = 2;
530
531 /* 125MIPS seems a reasonable initial guess at the guest speed.
532 It will be corrected fairly quickly anyway. */
533 icount_time_shift = 3;
534
535 /* Have both realtime and virtual time triggers for speed adjustment.
536 The realtime trigger catches emulated time passing too slowly,
537 the virtual time trigger catches emulated time passing too fast.
538 Realtime triggers occur even when idle, so use them less frequently
539 than VM triggers. */
40daca54
AB
540 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
541 icount_adjust_rt, NULL);
542 timer_mod(icount_rt_timer,
543 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
544 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
545 icount_adjust_vm, NULL);
546 timer_mod(icount_vm_timer,
547 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
548 get_ticks_per_sec() / 10);
946fb27c
PB
549}
550
296af7c9
BS
551/***********************************************************/
552void hw_error(const char *fmt, ...)
553{
554 va_list ap;
55e5c285 555 CPUState *cpu;
296af7c9
BS
556
557 va_start(ap, fmt);
558 fprintf(stderr, "qemu: hardware error: ");
559 vfprintf(stderr, fmt, ap);
560 fprintf(stderr, "\n");
bdc44640 561 CPU_FOREACH(cpu) {
55e5c285 562 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 563 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
564 }
565 va_end(ap);
566 abort();
567}
568
569void cpu_synchronize_all_states(void)
570{
182735ef 571 CPUState *cpu;
296af7c9 572
bdc44640 573 CPU_FOREACH(cpu) {
182735ef 574 cpu_synchronize_state(cpu);
296af7c9
BS
575 }
576}
577
578void cpu_synchronize_all_post_reset(void)
579{
182735ef 580 CPUState *cpu;
296af7c9 581
bdc44640 582 CPU_FOREACH(cpu) {
182735ef 583 cpu_synchronize_post_reset(cpu);
296af7c9
BS
584 }
585}
586
587void cpu_synchronize_all_post_init(void)
588{
182735ef 589 CPUState *cpu;
296af7c9 590
bdc44640 591 CPU_FOREACH(cpu) {
182735ef 592 cpu_synchronize_post_init(cpu);
296af7c9
BS
593 }
594}
595
de9d61e8
MT
596void cpu_clean_all_dirty(void)
597{
598 CPUState *cpu;
599
600 CPU_FOREACH(cpu) {
601 cpu_clean_state(cpu);
602 }
603}
604
56983463 605static int do_vm_stop(RunState state)
296af7c9 606{
56983463
KW
607 int ret = 0;
608
1354869c 609 if (runstate_is_running()) {
296af7c9 610 cpu_disable_ticks();
296af7c9 611 pause_all_vcpus();
f5bbfba1 612 runstate_set(state);
1dfb4dd9 613 vm_state_notify(0, state);
a4e15de9 614 qapi_event_send_stop(&error_abort);
296af7c9 615 }
56983463 616
594a45ce
KW
617 bdrv_drain_all();
618 ret = bdrv_flush_all();
619
56983463 620 return ret;
296af7c9
BS
621}
622
a1fcaa73 623static bool cpu_can_run(CPUState *cpu)
296af7c9 624{
4fdeee7c 625 if (cpu->stop) {
a1fcaa73 626 return false;
0ab07c62 627 }
321bc0b2 628 if (cpu_is_stopped(cpu)) {
a1fcaa73 629 return false;
0ab07c62 630 }
a1fcaa73 631 return true;
296af7c9
BS
632}
633
91325046 634static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 635{
64f6b346 636 gdb_set_stop_cpu(cpu);
8cf71710 637 qemu_system_debug_request();
f324e766 638 cpu->stopped = true;
3c638d06
JK
639}
640
714bd040
PB
641static void cpu_signal(int sig)
642{
4917cf44
AF
643 if (current_cpu) {
644 cpu_exit(current_cpu);
714bd040
PB
645 }
646 exit_request = 1;
647}
714bd040 648
6d9cb73c
JK
649#ifdef CONFIG_LINUX
650static void sigbus_reraise(void)
651{
652 sigset_t set;
653 struct sigaction action;
654
655 memset(&action, 0, sizeof(action));
656 action.sa_handler = SIG_DFL;
657 if (!sigaction(SIGBUS, &action, NULL)) {
658 raise(SIGBUS);
659 sigemptyset(&set);
660 sigaddset(&set, SIGBUS);
661 sigprocmask(SIG_UNBLOCK, &set, NULL);
662 }
663 perror("Failed to re-raise SIGBUS!\n");
664 abort();
665}
666
667static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
668 void *ctx)
669{
670 if (kvm_on_sigbus(siginfo->ssi_code,
671 (void *)(intptr_t)siginfo->ssi_addr)) {
672 sigbus_reraise();
673 }
674}
675
676static void qemu_init_sigbus(void)
677{
678 struct sigaction action;
679
680 memset(&action, 0, sizeof(action));
681 action.sa_flags = SA_SIGINFO;
682 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
683 sigaction(SIGBUS, &action, NULL);
684
685 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
686}
687
290adf38 688static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
689{
690 struct timespec ts = { 0, 0 };
691 siginfo_t siginfo;
692 sigset_t waitset;
693 sigset_t chkset;
694 int r;
695
696 sigemptyset(&waitset);
697 sigaddset(&waitset, SIG_IPI);
698 sigaddset(&waitset, SIGBUS);
699
700 do {
701 r = sigtimedwait(&waitset, &siginfo, &ts);
702 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
703 perror("sigtimedwait");
704 exit(1);
705 }
706
707 switch (r) {
708 case SIGBUS:
290adf38 709 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
710 sigbus_reraise();
711 }
712 break;
713 default:
714 break;
715 }
716
717 r = sigpending(&chkset);
718 if (r == -1) {
719 perror("sigpending");
720 exit(1);
721 }
722 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
723}
724
6d9cb73c
JK
725#else /* !CONFIG_LINUX */
726
727static void qemu_init_sigbus(void)
728{
729}
1ab3c6c0 730
290adf38 731static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
732{
733}
6d9cb73c
JK
734#endif /* !CONFIG_LINUX */
735
296af7c9 736#ifndef _WIN32
55f8d6ac
JK
737static void dummy_signal(int sig)
738{
739}
55f8d6ac 740
13618e05 741static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
742{
743 int r;
744 sigset_t set;
745 struct sigaction sigact;
746
747 memset(&sigact, 0, sizeof(sigact));
748 sigact.sa_handler = dummy_signal;
749 sigaction(SIG_IPI, &sigact, NULL);
750
714bd040
PB
751 pthread_sigmask(SIG_BLOCK, NULL, &set);
752 sigdelset(&set, SIG_IPI);
714bd040 753 sigdelset(&set, SIGBUS);
491d6e80 754 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
755 if (r) {
756 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
757 exit(1);
758 }
759}
760
761static void qemu_tcg_init_cpu_signals(void)
762{
714bd040
PB
763 sigset_t set;
764 struct sigaction sigact;
765
766 memset(&sigact, 0, sizeof(sigact));
767 sigact.sa_handler = cpu_signal;
768 sigaction(SIG_IPI, &sigact, NULL);
769
770 sigemptyset(&set);
771 sigaddset(&set, SIG_IPI);
772 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
773}
774
55f8d6ac 775#else /* _WIN32 */
13618e05 776static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 777{
714bd040
PB
778 abort();
779}
ff48eb5f 780
714bd040
PB
781static void qemu_tcg_init_cpu_signals(void)
782{
ff48eb5f 783}
714bd040 784#endif /* _WIN32 */
ff48eb5f 785
b2532d88 786static QemuMutex qemu_global_mutex;
46daff13
PB
787static QemuCond qemu_io_proceeded_cond;
788static bool iothread_requesting_mutex;
296af7c9
BS
789
790static QemuThread io_thread;
791
792static QemuThread *tcg_cpu_thread;
793static QemuCond *tcg_halt_cond;
794
296af7c9
BS
795/* cpu creation */
796static QemuCond qemu_cpu_cond;
797/* system init */
296af7c9 798static QemuCond qemu_pause_cond;
e82bcec2 799static QemuCond qemu_work_cond;
296af7c9 800
d3b12f5d 801void qemu_init_cpu_loop(void)
296af7c9 802{
6d9cb73c 803 qemu_init_sigbus();
ed94592b 804 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
805 qemu_cond_init(&qemu_pause_cond);
806 qemu_cond_init(&qemu_work_cond);
46daff13 807 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 808 qemu_mutex_init(&qemu_global_mutex);
296af7c9 809
b7680cb6 810 qemu_thread_get_self(&io_thread);
296af7c9
BS
811}
812
f100f0b3 813void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
814{
815 struct qemu_work_item wi;
816
60e82579 817 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
818 func(data);
819 return;
820 }
821
822 wi.func = func;
823 wi.data = data;
3c02270d 824 wi.free = false;
c64ca814
AF
825 if (cpu->queued_work_first == NULL) {
826 cpu->queued_work_first = &wi;
0ab07c62 827 } else {
c64ca814 828 cpu->queued_work_last->next = &wi;
0ab07c62 829 }
c64ca814 830 cpu->queued_work_last = &wi;
e82bcec2
MT
831 wi.next = NULL;
832 wi.done = false;
833
c08d7424 834 qemu_cpu_kick(cpu);
e82bcec2 835 while (!wi.done) {
4917cf44 836 CPUState *self_cpu = current_cpu;
e82bcec2
MT
837
838 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 839 current_cpu = self_cpu;
e82bcec2
MT
840 }
841}
842
3c02270d
CV
843void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
844{
845 struct qemu_work_item *wi;
846
847 if (qemu_cpu_is_self(cpu)) {
848 func(data);
849 return;
850 }
851
852 wi = g_malloc0(sizeof(struct qemu_work_item));
853 wi->func = func;
854 wi->data = data;
855 wi->free = true;
856 if (cpu->queued_work_first == NULL) {
857 cpu->queued_work_first = wi;
858 } else {
859 cpu->queued_work_last->next = wi;
860 }
861 cpu->queued_work_last = wi;
862 wi->next = NULL;
863 wi->done = false;
864
865 qemu_cpu_kick(cpu);
866}
867
6d45b109 868static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
869{
870 struct qemu_work_item *wi;
871
c64ca814 872 if (cpu->queued_work_first == NULL) {
e82bcec2 873 return;
0ab07c62 874 }
e82bcec2 875
c64ca814
AF
876 while ((wi = cpu->queued_work_first)) {
877 cpu->queued_work_first = wi->next;
e82bcec2
MT
878 wi->func(wi->data);
879 wi->done = true;
3c02270d
CV
880 if (wi->free) {
881 g_free(wi);
882 }
e82bcec2 883 }
c64ca814 884 cpu->queued_work_last = NULL;
e82bcec2
MT
885 qemu_cond_broadcast(&qemu_work_cond);
886}
887
509a0d78 888static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 889{
4fdeee7c
AF
890 if (cpu->stop) {
891 cpu->stop = false;
f324e766 892 cpu->stopped = true;
296af7c9
BS
893 qemu_cond_signal(&qemu_pause_cond);
894 }
6d45b109 895 flush_queued_work(cpu);
216fc9a4 896 cpu->thread_kicked = false;
296af7c9
BS
897}
898
6cabe1f3 899static void qemu_tcg_wait_io_event(void)
296af7c9 900{
182735ef 901 CPUState *cpu;
6cabe1f3 902
16400322 903 while (all_cpu_threads_idle()) {
ab33fcda
PB
904 /* Start accounting real time to the virtual clock if the CPUs
905 are idle. */
40daca54 906 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 907 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 908 }
296af7c9 909
46daff13
PB
910 while (iothread_requesting_mutex) {
911 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
912 }
6cabe1f3 913
bdc44640 914 CPU_FOREACH(cpu) {
182735ef 915 qemu_wait_io_event_common(cpu);
6cabe1f3 916 }
296af7c9
BS
917}
918
fd529e8f 919static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 920{
a98ae1d8 921 while (cpu_thread_is_idle(cpu)) {
f5c121b8 922 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 923 }
296af7c9 924
290adf38 925 qemu_kvm_eat_signals(cpu);
509a0d78 926 qemu_wait_io_event_common(cpu);
296af7c9
BS
927}
928
7e97cd88 929static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 930{
48a106bd 931 CPUState *cpu = arg;
84b4915d 932 int r;
296af7c9 933
6164e6d6 934 qemu_mutex_lock(&qemu_global_mutex);
814e612e 935 qemu_thread_get_self(cpu->thread);
9f09e18a 936 cpu->thread_id = qemu_get_thread_id();
4917cf44 937 current_cpu = cpu;
296af7c9 938
504134d2 939 r = kvm_init_vcpu(cpu);
84b4915d
JK
940 if (r < 0) {
941 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
942 exit(1);
943 }
296af7c9 944
13618e05 945 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
946
947 /* signal CPU creation */
61a46217 948 cpu->created = true;
296af7c9
BS
949 qemu_cond_signal(&qemu_cpu_cond);
950
296af7c9 951 while (1) {
a1fcaa73 952 if (cpu_can_run(cpu)) {
1458c363 953 r = kvm_cpu_exec(cpu);
83f338f7 954 if (r == EXCP_DEBUG) {
91325046 955 cpu_handle_guest_debug(cpu);
83f338f7 956 }
0ab07c62 957 }
fd529e8f 958 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
959 }
960
961 return NULL;
962}
963
c7f0f3b1
AL
964static void *qemu_dummy_cpu_thread_fn(void *arg)
965{
966#ifdef _WIN32
967 fprintf(stderr, "qtest is not supported under Windows\n");
968 exit(1);
969#else
10a9021d 970 CPUState *cpu = arg;
c7f0f3b1
AL
971 sigset_t waitset;
972 int r;
973
974 qemu_mutex_lock_iothread();
814e612e 975 qemu_thread_get_self(cpu->thread);
9f09e18a 976 cpu->thread_id = qemu_get_thread_id();
c7f0f3b1
AL
977
978 sigemptyset(&waitset);
979 sigaddset(&waitset, SIG_IPI);
980
981 /* signal CPU creation */
61a46217 982 cpu->created = true;
c7f0f3b1
AL
983 qemu_cond_signal(&qemu_cpu_cond);
984
4917cf44 985 current_cpu = cpu;
c7f0f3b1 986 while (1) {
4917cf44 987 current_cpu = NULL;
c7f0f3b1
AL
988 qemu_mutex_unlock_iothread();
989 do {
990 int sig;
991 r = sigwait(&waitset, &sig);
992 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
993 if (r == -1) {
994 perror("sigwait");
995 exit(1);
996 }
997 qemu_mutex_lock_iothread();
4917cf44 998 current_cpu = cpu;
509a0d78 999 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1000 }
1001
1002 return NULL;
1003#endif
1004}
1005
bdb7ca67
JK
1006static void tcg_exec_all(void);
1007
7e97cd88 1008static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1009{
c3586ba7 1010 CPUState *cpu = arg;
296af7c9 1011
55f8d6ac 1012 qemu_tcg_init_cpu_signals();
814e612e 1013 qemu_thread_get_self(cpu->thread);
296af7c9 1014
296af7c9 1015 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1016 CPU_FOREACH(cpu) {
1017 cpu->thread_id = qemu_get_thread_id();
1018 cpu->created = true;
1019 }
296af7c9
BS
1020 qemu_cond_signal(&qemu_cpu_cond);
1021
fa7d1867 1022 /* wait for initial kick-off after machine start */
bdc44640 1023 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 1024 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1025
1026 /* process any pending work */
bdc44640 1027 CPU_FOREACH(cpu) {
182735ef 1028 qemu_wait_io_event_common(cpu);
8e564b4e 1029 }
0ab07c62 1030 }
296af7c9
BS
1031
1032 while (1) {
bdb7ca67 1033 tcg_exec_all();
ac70aafc
AB
1034
1035 if (use_icount) {
40daca54 1036 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1037
1038 if (deadline == 0) {
40daca54 1039 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1040 }
3b2319a3 1041 }
6cabe1f3 1042 qemu_tcg_wait_io_event();
296af7c9
BS
1043 }
1044
1045 return NULL;
1046}
1047
2ff09a40 1048static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1049{
1050#ifndef _WIN32
1051 int err;
1052
814e612e 1053 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1054 if (err) {
1055 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1056 exit(1);
1057 }
1058#else /* _WIN32 */
60e82579 1059 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1060 CONTEXT tcgContext;
1061
1062 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1063 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1064 GetLastError());
1065 exit(1);
1066 }
1067
1068 /* On multi-core systems, we are not sure that the thread is actually
1069 * suspended until we can get the context.
1070 */
1071 tcgContext.ContextFlags = CONTEXT_CONTROL;
1072 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1073 continue;
1074 }
1075
cc015e9a 1076 cpu_signal(0);
ed9164a3
OH
1077
1078 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1079 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1080 GetLastError());
1081 exit(1);
1082 }
cc015e9a
PB
1083 }
1084#endif
1085}
1086
c08d7424 1087void qemu_cpu_kick(CPUState *cpu)
296af7c9 1088{
f5c121b8 1089 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1090 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1091 qemu_cpu_kick_thread(cpu);
216fc9a4 1092 cpu->thread_kicked = true;
aa2c364b 1093 }
296af7c9
BS
1094}
1095
46d62fac 1096void qemu_cpu_kick_self(void)
296af7c9 1097{
b55c22c6 1098#ifndef _WIN32
4917cf44 1099 assert(current_cpu);
296af7c9 1100
4917cf44
AF
1101 if (!current_cpu->thread_kicked) {
1102 qemu_cpu_kick_thread(current_cpu);
1103 current_cpu->thread_kicked = true;
296af7c9 1104 }
b55c22c6
PB
1105#else
1106 abort();
1107#endif
296af7c9
BS
1108}
1109
60e82579 1110bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1111{
814e612e 1112 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1113}
1114
aa723c23
JQ
1115static bool qemu_in_vcpu_thread(void)
1116{
4917cf44 1117 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1118}
1119
296af7c9
BS
1120void qemu_mutex_lock_iothread(void)
1121{
c7f0f3b1 1122 if (!tcg_enabled()) {
296af7c9 1123 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1124 } else {
46daff13 1125 iothread_requesting_mutex = true;
1a28cac3 1126 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1127 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1128 qemu_mutex_lock(&qemu_global_mutex);
1129 }
46daff13
PB
1130 iothread_requesting_mutex = false;
1131 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1132 }
296af7c9
BS
1133}
1134
1135void qemu_mutex_unlock_iothread(void)
1136{
1137 qemu_mutex_unlock(&qemu_global_mutex);
1138}
1139
1140static int all_vcpus_paused(void)
1141{
bdc44640 1142 CPUState *cpu;
296af7c9 1143
bdc44640 1144 CPU_FOREACH(cpu) {
182735ef 1145 if (!cpu->stopped) {
296af7c9 1146 return 0;
0ab07c62 1147 }
296af7c9
BS
1148 }
1149
1150 return 1;
1151}
1152
1153void pause_all_vcpus(void)
1154{
bdc44640 1155 CPUState *cpu;
296af7c9 1156
40daca54 1157 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1158 CPU_FOREACH(cpu) {
182735ef
AF
1159 cpu->stop = true;
1160 qemu_cpu_kick(cpu);
296af7c9
BS
1161 }
1162
aa723c23 1163 if (qemu_in_vcpu_thread()) {
d798e974
JK
1164 cpu_stop_current();
1165 if (!kvm_enabled()) {
bdc44640 1166 CPU_FOREACH(cpu) {
182735ef
AF
1167 cpu->stop = false;
1168 cpu->stopped = true;
d798e974
JK
1169 }
1170 return;
1171 }
1172 }
1173
296af7c9 1174 while (!all_vcpus_paused()) {
be7d6c57 1175 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1176 CPU_FOREACH(cpu) {
182735ef 1177 qemu_cpu_kick(cpu);
296af7c9
BS
1178 }
1179 }
1180}
1181
2993683b
IM
1182void cpu_resume(CPUState *cpu)
1183{
1184 cpu->stop = false;
1185 cpu->stopped = false;
1186 qemu_cpu_kick(cpu);
1187}
1188
296af7c9
BS
1189void resume_all_vcpus(void)
1190{
bdc44640 1191 CPUState *cpu;
296af7c9 1192
40daca54 1193 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1194 CPU_FOREACH(cpu) {
182735ef 1195 cpu_resume(cpu);
296af7c9
BS
1196 }
1197}
1198
4900116e
DDAG
1199/* For temporary buffers for forming a name */
1200#define VCPU_THREAD_NAME_SIZE 16
1201
e5ab30a2 1202static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1203{
4900116e
DDAG
1204 char thread_name[VCPU_THREAD_NAME_SIZE];
1205
09daed84
EI
1206 tcg_cpu_address_space_init(cpu, cpu->as);
1207
296af7c9
BS
1208 /* share a single thread for all cpus with TCG */
1209 if (!tcg_cpu_thread) {
814e612e 1210 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1211 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1212 qemu_cond_init(cpu->halt_cond);
1213 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1214 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1215 cpu->cpu_index);
1216 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1217 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1218#ifdef _WIN32
814e612e 1219 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1220#endif
61a46217 1221 while (!cpu->created) {
18a85728 1222 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1223 }
814e612e 1224 tcg_cpu_thread = cpu->thread;
296af7c9 1225 } else {
814e612e 1226 cpu->thread = tcg_cpu_thread;
f5c121b8 1227 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1228 }
1229}
1230
48a106bd 1231static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1232{
4900116e
DDAG
1233 char thread_name[VCPU_THREAD_NAME_SIZE];
1234
814e612e 1235 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1236 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1237 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1238 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1239 cpu->cpu_index);
1240 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1241 cpu, QEMU_THREAD_JOINABLE);
61a46217 1242 while (!cpu->created) {
18a85728 1243 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1244 }
296af7c9
BS
1245}
1246
10a9021d 1247static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1248{
4900116e
DDAG
1249 char thread_name[VCPU_THREAD_NAME_SIZE];
1250
814e612e 1251 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1252 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1253 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1254 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1255 cpu->cpu_index);
1256 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1257 QEMU_THREAD_JOINABLE);
61a46217 1258 while (!cpu->created) {
c7f0f3b1
AL
1259 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1260 }
1261}
1262
c643bed9 1263void qemu_init_vcpu(CPUState *cpu)
296af7c9 1264{
ce3960eb
AF
1265 cpu->nr_cores = smp_cores;
1266 cpu->nr_threads = smp_threads;
f324e766 1267 cpu->stopped = true;
0ab07c62 1268 if (kvm_enabled()) {
48a106bd 1269 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1270 } else if (tcg_enabled()) {
e5ab30a2 1271 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1272 } else {
10a9021d 1273 qemu_dummy_start_vcpu(cpu);
0ab07c62 1274 }
296af7c9
BS
1275}
1276
b4a3d965 1277void cpu_stop_current(void)
296af7c9 1278{
4917cf44
AF
1279 if (current_cpu) {
1280 current_cpu->stop = false;
1281 current_cpu->stopped = true;
1282 cpu_exit(current_cpu);
67bb172f 1283 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1284 }
296af7c9
BS
1285}
1286
56983463 1287int vm_stop(RunState state)
296af7c9 1288{
aa723c23 1289 if (qemu_in_vcpu_thread()) {
74892d24 1290 qemu_system_vmstop_request_prepare();
1dfb4dd9 1291 qemu_system_vmstop_request(state);
296af7c9
BS
1292 /*
1293 * FIXME: should not return to device code in case
1294 * vm_stop() has been requested.
1295 */
b4a3d965 1296 cpu_stop_current();
56983463 1297 return 0;
296af7c9 1298 }
56983463
KW
1299
1300 return do_vm_stop(state);
296af7c9
BS
1301}
1302
8a9236f1
LC
1303/* does a state transition even if the VM is already stopped,
1304 current state is forgotten forever */
56983463 1305int vm_stop_force_state(RunState state)
8a9236f1
LC
1306{
1307 if (runstate_is_running()) {
56983463 1308 return vm_stop(state);
8a9236f1
LC
1309 } else {
1310 runstate_set(state);
594a45ce
KW
1311 /* Make sure to return an error if the flush in a previous vm_stop()
1312 * failed. */
1313 return bdrv_flush_all();
8a9236f1
LC
1314 }
1315}
1316
9349b4f9 1317static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1318{
efee7340 1319 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1320 int ret;
1321#ifdef CONFIG_PROFILER
1322 int64_t ti;
1323#endif
1324
1325#ifdef CONFIG_PROFILER
1326 ti = profile_getclock();
1327#endif
1328 if (use_icount) {
1329 int64_t count;
ac70aafc 1330 int64_t deadline;
296af7c9 1331 int decr;
c96778bb
FK
1332 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1333 + cpu->icount_extra);
28ecfd7a 1334 cpu->icount_decr.u16.low = 0;
efee7340 1335 cpu->icount_extra = 0;
40daca54 1336 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1337
1338 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1339 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1340 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1341 * nanoseconds.
1342 */
1343 if ((deadline < 0) || (deadline > INT32_MAX)) {
1344 deadline = INT32_MAX;
1345 }
1346
1347 count = qemu_icount_round(deadline);
c96778bb 1348 timers_state.qemu_icount += count;
296af7c9
BS
1349 decr = (count > 0xffff) ? 0xffff : count;
1350 count -= decr;
28ecfd7a 1351 cpu->icount_decr.u16.low = decr;
efee7340 1352 cpu->icount_extra = count;
296af7c9
BS
1353 }
1354 ret = cpu_exec(env);
1355#ifdef CONFIG_PROFILER
1356 qemu_time += profile_getclock() - ti;
1357#endif
1358 if (use_icount) {
1359 /* Fold pending instructions back into the
1360 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1361 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1362 + cpu->icount_extra);
28ecfd7a 1363 cpu->icount_decr.u32 = 0;
efee7340 1364 cpu->icount_extra = 0;
296af7c9
BS
1365 }
1366 return ret;
1367}
1368
bdb7ca67 1369static void tcg_exec_all(void)
296af7c9 1370{
9a36085b
JK
1371 int r;
1372
40daca54
AB
1373 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1374 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1375
0ab07c62 1376 if (next_cpu == NULL) {
296af7c9 1377 next_cpu = first_cpu;
0ab07c62 1378 }
bdc44640 1379 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1380 CPUState *cpu = next_cpu;
1381 CPUArchState *env = cpu->env_ptr;
296af7c9 1382
40daca54 1383 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1384 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1385
a1fcaa73 1386 if (cpu_can_run(cpu)) {
bdb7ca67 1387 r = tcg_cpu_exec(env);
9a36085b 1388 if (r == EXCP_DEBUG) {
91325046 1389 cpu_handle_guest_debug(cpu);
3c638d06
JK
1390 break;
1391 }
f324e766 1392 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1393 break;
1394 }
1395 }
c629a4bc 1396 exit_request = 0;
296af7c9
BS
1397}
1398
9a78eead 1399void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1400{
1401 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1402#if defined(cpu_list)
1403 cpu_list(f, cpu_fprintf);
262353cb
BS
1404#endif
1405}
de0b36b6
LC
1406
1407CpuInfoList *qmp_query_cpus(Error **errp)
1408{
1409 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1410 CPUState *cpu;
de0b36b6 1411
bdc44640 1412 CPU_FOREACH(cpu) {
de0b36b6 1413 CpuInfoList *info;
182735ef
AF
1414#if defined(TARGET_I386)
1415 X86CPU *x86_cpu = X86_CPU(cpu);
1416 CPUX86State *env = &x86_cpu->env;
1417#elif defined(TARGET_PPC)
1418 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1419 CPUPPCState *env = &ppc_cpu->env;
1420#elif defined(TARGET_SPARC)
1421 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1422 CPUSPARCState *env = &sparc_cpu->env;
1423#elif defined(TARGET_MIPS)
1424 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1425 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1426#elif defined(TARGET_TRICORE)
1427 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1428 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1429#endif
de0b36b6 1430
cb446eca 1431 cpu_synchronize_state(cpu);
de0b36b6
LC
1432
1433 info = g_malloc0(sizeof(*info));
1434 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1435 info->value->CPU = cpu->cpu_index;
182735ef 1436 info->value->current = (cpu == first_cpu);
259186a7 1437 info->value->halted = cpu->halted;
9f09e18a 1438 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1439#if defined(TARGET_I386)
1440 info->value->has_pc = true;
1441 info->value->pc = env->eip + env->segs[R_CS].base;
1442#elif defined(TARGET_PPC)
1443 info->value->has_nip = true;
1444 info->value->nip = env->nip;
1445#elif defined(TARGET_SPARC)
1446 info->value->has_pc = true;
1447 info->value->pc = env->pc;
1448 info->value->has_npc = true;
1449 info->value->npc = env->npc;
1450#elif defined(TARGET_MIPS)
1451 info->value->has_PC = true;
1452 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1453#elif defined(TARGET_TRICORE)
1454 info->value->has_PC = true;
1455 info->value->PC = env->PC;
de0b36b6
LC
1456#endif
1457
1458 /* XXX: waiting for the qapi to support GSList */
1459 if (!cur_item) {
1460 head = cur_item = info;
1461 } else {
1462 cur_item->next = info;
1463 cur_item = info;
1464 }
1465 }
1466
1467 return head;
1468}
0cfd6a9a
LC
1469
1470void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1471 bool has_cpu, int64_t cpu_index, Error **errp)
1472{
1473 FILE *f;
1474 uint32_t l;
55e5c285 1475 CPUState *cpu;
0cfd6a9a
LC
1476 uint8_t buf[1024];
1477
1478 if (!has_cpu) {
1479 cpu_index = 0;
1480 }
1481
151d1322
AF
1482 cpu = qemu_get_cpu(cpu_index);
1483 if (cpu == NULL) {
0cfd6a9a
LC
1484 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1485 "a CPU number");
1486 return;
1487 }
1488
1489 f = fopen(filename, "wb");
1490 if (!f) {
618da851 1491 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1492 return;
1493 }
1494
1495 while (size != 0) {
1496 l = sizeof(buf);
1497 if (l > size)
1498 l = size;
2f4d0f59
AK
1499 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1500 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1501 goto exit;
1502 }
0cfd6a9a
LC
1503 if (fwrite(buf, 1, l, f) != l) {
1504 error_set(errp, QERR_IO_ERROR);
1505 goto exit;
1506 }
1507 addr += l;
1508 size -= l;
1509 }
1510
1511exit:
1512 fclose(f);
1513}
6d3962bf
LC
1514
1515void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1516 Error **errp)
1517{
1518 FILE *f;
1519 uint32_t l;
1520 uint8_t buf[1024];
1521
1522 f = fopen(filename, "wb");
1523 if (!f) {
618da851 1524 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1525 return;
1526 }
1527
1528 while (size != 0) {
1529 l = sizeof(buf);
1530 if (l > size)
1531 l = size;
eb6282f2 1532 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1533 if (fwrite(buf, 1, l, f) != l) {
1534 error_set(errp, QERR_IO_ERROR);
1535 goto exit;
1536 }
1537 addr += l;
1538 size -= l;
1539 }
1540
1541exit:
1542 fclose(f);
1543}
ab49ab5c
LC
1544
1545void qmp_inject_nmi(Error **errp)
1546{
1547#if defined(TARGET_I386)
182735ef
AF
1548 CPUState *cs;
1549
bdc44640 1550 CPU_FOREACH(cs) {
182735ef 1551 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1552
02e51483 1553 if (!cpu->apic_state) {
182735ef 1554 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1555 } else {
02e51483 1556 apic_deliver_nmi(cpu->apic_state);
02c09195 1557 }
ab49ab5c
LC
1558 }
1559#else
9cb805fd 1560 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1561#endif
1562}
27498bef
ST
1563
1564void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1565{
1566 if (!use_icount) {
1567 return;
1568 }
1569
1570 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1571 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1572 if (icount_align_option) {
1573 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1574 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1575 } else {
1576 cpu_fprintf(f, "Max guest delay NA\n");
1577 cpu_fprintf(f, "Max guest advance NA\n");
1578 }
1579}