]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
block: vmdk - fixed sizeof() error
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
9cb805fd 43#include "hw/nmi.h"
0ff0fc19
JK
44
45#ifndef _WIN32
1de7afc9 46#include "qemu/compatfd.h"
0ff0fc19 47#endif
296af7c9 48
6d9cb73c
JK
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
c0532a76
MT
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
6d9cb73c
JK
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif /* CONFIG_LINUX */
66
182735ef 67static CPUState *next_cpu;
27498bef
ST
68int64_t max_delay;
69int64_t max_advance;
296af7c9 70
321bc0b2
TC
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
a98ae1d8 76static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 77{
c64ca814 78 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
79 return false;
80 }
321bc0b2 81 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
82 return true;
83 }
8c2e1b00 84 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 85 kvm_halt_in_kernel()) {
ac873f1e
PM
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
182735ef 93 CPUState *cpu;
ac873f1e 94
bdc44640 95 CPU_FOREACH(cpu) {
182735ef 96 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
97 return false;
98 }
99 }
100 return true;
101}
102
946fb27c
PB
103/***********************************************************/
104/* guest cycle counter */
105
a3270e19
PB
106/* Protected by TimersState seqlock */
107
71468395 108static int64_t vm_clock_warp_start = -1;
946fb27c
PB
109/* Conversion factor from emulated instructions to virtual clock ticks. */
110static int icount_time_shift;
111/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112#define MAX_ICOUNT_SHIFT 10
a3270e19 113
946fb27c
PB
114static QEMUTimer *icount_rt_timer;
115static QEMUTimer *icount_vm_timer;
116static QEMUTimer *icount_warp_timer;
946fb27c
PB
117
118typedef struct TimersState {
cb365646 119 /* Protected by BQL. */
946fb27c
PB
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
cb365646
LPF
122
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
125 */
126 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
c96778bb
FK
130
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
946fb27c
PB
135} TimersState;
136
d9cd4007 137static TimersState timers_state;
946fb27c 138
2a62914b 139int64_t cpu_get_icount_raw(void)
946fb27c
PB
140{
141 int64_t icount;
4917cf44 142 CPUState *cpu = current_cpu;
946fb27c 143
c96778bb 144 icount = timers_state.qemu_icount;
4917cf44 145 if (cpu) {
99df7dce 146 if (!cpu_can_do_io(cpu)) {
2a62914b
PD
147 fprintf(stderr, "Bad icount read\n");
148 exit(1);
946fb27c 149 }
28ecfd7a 150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 151 }
2a62914b
PD
152 return icount;
153}
154
155/* Return the virtual CPU time, based on the instruction counter. */
156static int64_t cpu_get_icount_locked(void)
157{
158 int64_t icount = cpu_get_icount_raw();
3f031313 159 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
160}
161
17a15f1b
PB
162int64_t cpu_get_icount(void)
163{
164 int64_t icount;
165 unsigned start;
166
167 do {
168 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
169 icount = cpu_get_icount_locked();
170 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
171
172 return icount;
173}
174
3f031313
FK
175int64_t cpu_icount_to_ns(int64_t icount)
176{
177 return icount << icount_time_shift;
178}
179
946fb27c 180/* return the host CPU cycle counter and handle stop/restart */
cb365646 181/* Caller must hold the BQL */
946fb27c
PB
182int64_t cpu_get_ticks(void)
183{
5f3e3101
PB
184 int64_t ticks;
185
946fb27c
PB
186 if (use_icount) {
187 return cpu_get_icount();
188 }
5f3e3101
PB
189
190 ticks = timers_state.cpu_ticks_offset;
191 if (timers_state.cpu_ticks_enabled) {
192 ticks += cpu_get_real_ticks();
193 }
194
195 if (timers_state.cpu_ticks_prev > ticks) {
196 /* Note: non increasing ticks may happen if the host uses
197 software suspend */
198 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
199 ticks = timers_state.cpu_ticks_prev;
946fb27c 200 }
5f3e3101
PB
201
202 timers_state.cpu_ticks_prev = ticks;
203 return ticks;
946fb27c
PB
204}
205
cb365646 206static int64_t cpu_get_clock_locked(void)
946fb27c 207{
5f3e3101 208 int64_t ticks;
cb365646 209
5f3e3101
PB
210 ticks = timers_state.cpu_clock_offset;
211 if (timers_state.cpu_ticks_enabled) {
212 ticks += get_clock();
946fb27c 213 }
cb365646 214
5f3e3101 215 return ticks;
cb365646
LPF
216}
217
218/* return the host CPU monotonic timer and handle stop/restart */
219int64_t cpu_get_clock(void)
220{
221 int64_t ti;
222 unsigned start;
223
224 do {
225 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
226 ti = cpu_get_clock_locked();
227 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
228
229 return ti;
946fb27c
PB
230}
231
cb365646
LPF
232/* enable cpu_get_ticks()
233 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
234 */
946fb27c
PB
235void cpu_enable_ticks(void)
236{
cb365646
LPF
237 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
238 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
239 if (!timers_state.cpu_ticks_enabled) {
240 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
241 timers_state.cpu_clock_offset -= get_clock();
242 timers_state.cpu_ticks_enabled = 1;
243 }
cb365646 244 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
245}
246
247/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
248 * cpu_get_ticks() after that.
249 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
250 */
946fb27c
PB
251void cpu_disable_ticks(void)
252{
cb365646
LPF
253 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
254 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 255 if (timers_state.cpu_ticks_enabled) {
5f3e3101 256 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 257 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
258 timers_state.cpu_ticks_enabled = 0;
259 }
cb365646 260 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
261}
262
263/* Correlation between real and virtual time is always going to be
264 fairly approximate, so ignore small variation.
265 When the guest is idle real and virtual time will be aligned in
266 the IO wait loop. */
267#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
268
269static void icount_adjust(void)
270{
271 int64_t cur_time;
272 int64_t cur_icount;
273 int64_t delta;
a3270e19
PB
274
275 /* Protected by TimersState mutex. */
946fb27c 276 static int64_t last_delta;
468cc7cf 277
946fb27c
PB
278 /* If the VM is not running, then do nothing. */
279 if (!runstate_is_running()) {
280 return;
281 }
468cc7cf 282
17a15f1b
PB
283 seqlock_write_lock(&timers_state.vm_clock_seqlock);
284 cur_time = cpu_get_clock_locked();
285 cur_icount = cpu_get_icount_locked();
468cc7cf 286
946fb27c
PB
287 delta = cur_icount - cur_time;
288 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
289 if (delta > 0
290 && last_delta + ICOUNT_WOBBLE < delta * 2
291 && icount_time_shift > 0) {
292 /* The guest is getting too far ahead. Slow time down. */
293 icount_time_shift--;
294 }
295 if (delta < 0
296 && last_delta - ICOUNT_WOBBLE > delta * 2
297 && icount_time_shift < MAX_ICOUNT_SHIFT) {
298 /* The guest is getting too far behind. Speed time up. */
299 icount_time_shift++;
300 }
301 last_delta = delta;
c96778bb
FK
302 timers_state.qemu_icount_bias = cur_icount
303 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 304 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
305}
306
307static void icount_adjust_rt(void *opaque)
308{
40daca54 309 timer_mod(icount_rt_timer,
1979b908 310 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
311 icount_adjust();
312}
313
314static void icount_adjust_vm(void *opaque)
315{
40daca54
AB
316 timer_mod(icount_vm_timer,
317 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
318 get_ticks_per_sec() / 10);
946fb27c
PB
319 icount_adjust();
320}
321
322static int64_t qemu_icount_round(int64_t count)
323{
324 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
325}
326
327static void icount_warp_rt(void *opaque)
328{
17a15f1b
PB
329 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
330 * changes from -1 to another value, so the race here is okay.
331 */
332 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
333 return;
334 }
335
17a15f1b 336 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 337 if (runstate_is_running()) {
bf2a7ddb 338 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
339 int64_t warp_delta;
340
341 warp_delta = clock - vm_clock_warp_start;
342 if (use_icount == 2) {
946fb27c 343 /*
40daca54 344 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
345 * far ahead of real time.
346 */
17a15f1b 347 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 348 int64_t delta = clock - cur_icount;
8ed961d9 349 warp_delta = MIN(warp_delta, delta);
946fb27c 350 }
c96778bb 351 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
352 }
353 vm_clock_warp_start = -1;
17a15f1b 354 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
355
356 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
357 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
358 }
946fb27c
PB
359}
360
8156be56
PB
361void qtest_clock_warp(int64_t dest)
362{
40daca54 363 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56
PB
364 assert(qtest_enabled());
365 while (clock < dest) {
40daca54 366 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 367 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
17a15f1b 368 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 369 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
370 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
371
40daca54
AB
372 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
373 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 374 }
40daca54 375 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
376}
377
40daca54 378void qemu_clock_warp(QEMUClockType type)
946fb27c 379{
ce78d18c 380 int64_t clock;
946fb27c
PB
381 int64_t deadline;
382
383 /*
384 * There are too many global variables to make the "warp" behavior
385 * applicable to other clocks. But a clock argument removes the
386 * need for if statements all over the place.
387 */
40daca54 388 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
389 return;
390 }
391
392 /*
40daca54
AB
393 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
394 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
395 * This also makes sure that the insn counter is synchronized before the
396 * CPU starts running, in case the CPU is woken by an event other than
40daca54 397 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
398 */
399 icount_warp_rt(NULL);
ce78d18c
PB
400 timer_del(icount_warp_timer);
401 if (!all_cpu_threads_idle()) {
946fb27c
PB
402 return;
403 }
404
8156be56
PB
405 if (qtest_enabled()) {
406 /* When testing, qtest commands advance icount. */
407 return;
408 }
409
ac70aafc 410 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 411 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 412 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
413 if (deadline < 0) {
414 return;
ac70aafc
AB
415 }
416
946fb27c
PB
417 if (deadline > 0) {
418 /*
40daca54 419 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
420 * sleep. Otherwise, the CPU might be waiting for a future timer
421 * interrupt to wake it up, but the interrupt never comes because
422 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 423 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
424 *
425 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
426 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
427 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
428 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
bf2a7ddb
PD
429 * after some "real" time, (related to the time left until the next
430 * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
40daca54
AB
431 * This avoids that the warps are visible externally; for example,
432 * you will not be sending network packets continuously instead of
433 * every 100ms.
946fb27c 434 */
17a15f1b 435 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
436 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
437 vm_clock_warp_start = clock;
438 }
17a15f1b 439 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 440 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 441 } else if (deadline == 0) {
40daca54 442 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
443 }
444}
445
d09eae37
FK
446static bool icount_state_needed(void *opaque)
447{
448 return use_icount;
449}
450
451/*
452 * This is a subsection for icount migration.
453 */
454static const VMStateDescription icount_vmstate_timers = {
455 .name = "timer/icount",
456 .version_id = 1,
457 .minimum_version_id = 1,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT64(qemu_icount_bias, TimersState),
460 VMSTATE_INT64(qemu_icount, TimersState),
461 VMSTATE_END_OF_LIST()
462 }
463};
464
946fb27c
PB
465static const VMStateDescription vmstate_timers = {
466 .name = "timer",
467 .version_id = 2,
468 .minimum_version_id = 1,
35d08458 469 .fields = (VMStateField[]) {
946fb27c
PB
470 VMSTATE_INT64(cpu_ticks_offset, TimersState),
471 VMSTATE_INT64(dummy, TimersState),
472 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
473 VMSTATE_END_OF_LIST()
d09eae37
FK
474 },
475 .subsections = (VMStateSubsection[]) {
476 {
477 .vmsd = &icount_vmstate_timers,
478 .needed = icount_state_needed,
479 }, {
480 /* empty */
481 }
946fb27c
PB
482 }
483};
484
4603ea01
PD
485void cpu_ticks_init(void)
486{
487 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
488 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
489}
490
1ad9580b 491void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 492{
1ad9580b 493 const char *option;
a8bfac37 494 char *rem_str = NULL;
1ad9580b 495
1ad9580b 496 option = qemu_opt_get(opts, "shift");
946fb27c 497 if (!option) {
a8bfac37
ST
498 if (qemu_opt_get(opts, "align") != NULL) {
499 error_setg(errp, "Please specify shift option when using align");
500 }
946fb27c
PB
501 return;
502 }
a8bfac37 503 icount_align_option = qemu_opt_get_bool(opts, "align", false);
bf2a7ddb
PD
504 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
505 icount_warp_rt, NULL);
946fb27c 506 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
507 errno = 0;
508 icount_time_shift = strtol(option, &rem_str, 0);
509 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
510 error_setg(errp, "icount: Invalid shift value");
511 }
946fb27c
PB
512 use_icount = 1;
513 return;
a8bfac37
ST
514 } else if (icount_align_option) {
515 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
516 }
517
518 use_icount = 2;
519
520 /* 125MIPS seems a reasonable initial guess at the guest speed.
521 It will be corrected fairly quickly anyway. */
522 icount_time_shift = 3;
523
524 /* Have both realtime and virtual time triggers for speed adjustment.
525 The realtime trigger catches emulated time passing too slowly,
526 the virtual time trigger catches emulated time passing too fast.
527 Realtime triggers occur even when idle, so use them less frequently
528 than VM triggers. */
bf2a7ddb
PD
529 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
530 icount_adjust_rt, NULL);
40daca54 531 timer_mod(icount_rt_timer,
bf2a7ddb 532 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
533 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
534 icount_adjust_vm, NULL);
535 timer_mod(icount_vm_timer,
536 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
537 get_ticks_per_sec() / 10);
946fb27c
PB
538}
539
296af7c9
BS
540/***********************************************************/
541void hw_error(const char *fmt, ...)
542{
543 va_list ap;
55e5c285 544 CPUState *cpu;
296af7c9
BS
545
546 va_start(ap, fmt);
547 fprintf(stderr, "qemu: hardware error: ");
548 vfprintf(stderr, fmt, ap);
549 fprintf(stderr, "\n");
bdc44640 550 CPU_FOREACH(cpu) {
55e5c285 551 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 552 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
553 }
554 va_end(ap);
555 abort();
556}
557
558void cpu_synchronize_all_states(void)
559{
182735ef 560 CPUState *cpu;
296af7c9 561
bdc44640 562 CPU_FOREACH(cpu) {
182735ef 563 cpu_synchronize_state(cpu);
296af7c9
BS
564 }
565}
566
567void cpu_synchronize_all_post_reset(void)
568{
182735ef 569 CPUState *cpu;
296af7c9 570
bdc44640 571 CPU_FOREACH(cpu) {
182735ef 572 cpu_synchronize_post_reset(cpu);
296af7c9
BS
573 }
574}
575
576void cpu_synchronize_all_post_init(void)
577{
182735ef 578 CPUState *cpu;
296af7c9 579
bdc44640 580 CPU_FOREACH(cpu) {
182735ef 581 cpu_synchronize_post_init(cpu);
296af7c9
BS
582 }
583}
584
de9d61e8
MT
585void cpu_clean_all_dirty(void)
586{
587 CPUState *cpu;
588
589 CPU_FOREACH(cpu) {
590 cpu_clean_state(cpu);
591 }
592}
593
56983463 594static int do_vm_stop(RunState state)
296af7c9 595{
56983463
KW
596 int ret = 0;
597
1354869c 598 if (runstate_is_running()) {
296af7c9 599 cpu_disable_ticks();
296af7c9 600 pause_all_vcpus();
f5bbfba1 601 runstate_set(state);
1dfb4dd9 602 vm_state_notify(0, state);
a4e15de9 603 qapi_event_send_stop(&error_abort);
296af7c9 604 }
56983463 605
594a45ce
KW
606 bdrv_drain_all();
607 ret = bdrv_flush_all();
608
56983463 609 return ret;
296af7c9
BS
610}
611
a1fcaa73 612static bool cpu_can_run(CPUState *cpu)
296af7c9 613{
4fdeee7c 614 if (cpu->stop) {
a1fcaa73 615 return false;
0ab07c62 616 }
321bc0b2 617 if (cpu_is_stopped(cpu)) {
a1fcaa73 618 return false;
0ab07c62 619 }
a1fcaa73 620 return true;
296af7c9
BS
621}
622
91325046 623static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 624{
64f6b346 625 gdb_set_stop_cpu(cpu);
8cf71710 626 qemu_system_debug_request();
f324e766 627 cpu->stopped = true;
3c638d06
JK
628}
629
714bd040
PB
630static void cpu_signal(int sig)
631{
4917cf44
AF
632 if (current_cpu) {
633 cpu_exit(current_cpu);
714bd040
PB
634 }
635 exit_request = 1;
636}
714bd040 637
6d9cb73c
JK
638#ifdef CONFIG_LINUX
639static void sigbus_reraise(void)
640{
641 sigset_t set;
642 struct sigaction action;
643
644 memset(&action, 0, sizeof(action));
645 action.sa_handler = SIG_DFL;
646 if (!sigaction(SIGBUS, &action, NULL)) {
647 raise(SIGBUS);
648 sigemptyset(&set);
649 sigaddset(&set, SIGBUS);
650 sigprocmask(SIG_UNBLOCK, &set, NULL);
651 }
652 perror("Failed to re-raise SIGBUS!\n");
653 abort();
654}
655
656static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
657 void *ctx)
658{
659 if (kvm_on_sigbus(siginfo->ssi_code,
660 (void *)(intptr_t)siginfo->ssi_addr)) {
661 sigbus_reraise();
662 }
663}
664
665static void qemu_init_sigbus(void)
666{
667 struct sigaction action;
668
669 memset(&action, 0, sizeof(action));
670 action.sa_flags = SA_SIGINFO;
671 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
672 sigaction(SIGBUS, &action, NULL);
673
674 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
675}
676
290adf38 677static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
678{
679 struct timespec ts = { 0, 0 };
680 siginfo_t siginfo;
681 sigset_t waitset;
682 sigset_t chkset;
683 int r;
684
685 sigemptyset(&waitset);
686 sigaddset(&waitset, SIG_IPI);
687 sigaddset(&waitset, SIGBUS);
688
689 do {
690 r = sigtimedwait(&waitset, &siginfo, &ts);
691 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
692 perror("sigtimedwait");
693 exit(1);
694 }
695
696 switch (r) {
697 case SIGBUS:
290adf38 698 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
699 sigbus_reraise();
700 }
701 break;
702 default:
703 break;
704 }
705
706 r = sigpending(&chkset);
707 if (r == -1) {
708 perror("sigpending");
709 exit(1);
710 }
711 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
712}
713
6d9cb73c
JK
714#else /* !CONFIG_LINUX */
715
716static void qemu_init_sigbus(void)
717{
718}
1ab3c6c0 719
290adf38 720static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
721{
722}
6d9cb73c
JK
723#endif /* !CONFIG_LINUX */
724
296af7c9 725#ifndef _WIN32
55f8d6ac
JK
726static void dummy_signal(int sig)
727{
728}
55f8d6ac 729
13618e05 730static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
731{
732 int r;
733 sigset_t set;
734 struct sigaction sigact;
735
736 memset(&sigact, 0, sizeof(sigact));
737 sigact.sa_handler = dummy_signal;
738 sigaction(SIG_IPI, &sigact, NULL);
739
714bd040
PB
740 pthread_sigmask(SIG_BLOCK, NULL, &set);
741 sigdelset(&set, SIG_IPI);
714bd040 742 sigdelset(&set, SIGBUS);
491d6e80 743 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
744 if (r) {
745 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
746 exit(1);
747 }
748}
749
750static void qemu_tcg_init_cpu_signals(void)
751{
714bd040
PB
752 sigset_t set;
753 struct sigaction sigact;
754
755 memset(&sigact, 0, sizeof(sigact));
756 sigact.sa_handler = cpu_signal;
757 sigaction(SIG_IPI, &sigact, NULL);
758
759 sigemptyset(&set);
760 sigaddset(&set, SIG_IPI);
761 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
762}
763
55f8d6ac 764#else /* _WIN32 */
13618e05 765static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 766{
714bd040
PB
767 abort();
768}
ff48eb5f 769
714bd040
PB
770static void qemu_tcg_init_cpu_signals(void)
771{
ff48eb5f 772}
714bd040 773#endif /* _WIN32 */
ff48eb5f 774
b2532d88 775static QemuMutex qemu_global_mutex;
46daff13
PB
776static QemuCond qemu_io_proceeded_cond;
777static bool iothread_requesting_mutex;
296af7c9
BS
778
779static QemuThread io_thread;
780
781static QemuThread *tcg_cpu_thread;
782static QemuCond *tcg_halt_cond;
783
296af7c9
BS
784/* cpu creation */
785static QemuCond qemu_cpu_cond;
786/* system init */
296af7c9 787static QemuCond qemu_pause_cond;
e82bcec2 788static QemuCond qemu_work_cond;
296af7c9 789
d3b12f5d 790void qemu_init_cpu_loop(void)
296af7c9 791{
6d9cb73c 792 qemu_init_sigbus();
ed94592b 793 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
794 qemu_cond_init(&qemu_pause_cond);
795 qemu_cond_init(&qemu_work_cond);
46daff13 796 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 797 qemu_mutex_init(&qemu_global_mutex);
296af7c9 798
b7680cb6 799 qemu_thread_get_self(&io_thread);
296af7c9
BS
800}
801
f100f0b3 802void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
803{
804 struct qemu_work_item wi;
805
60e82579 806 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
807 func(data);
808 return;
809 }
810
811 wi.func = func;
812 wi.data = data;
3c02270d 813 wi.free = false;
c64ca814
AF
814 if (cpu->queued_work_first == NULL) {
815 cpu->queued_work_first = &wi;
0ab07c62 816 } else {
c64ca814 817 cpu->queued_work_last->next = &wi;
0ab07c62 818 }
c64ca814 819 cpu->queued_work_last = &wi;
e82bcec2
MT
820 wi.next = NULL;
821 wi.done = false;
822
c08d7424 823 qemu_cpu_kick(cpu);
e82bcec2 824 while (!wi.done) {
4917cf44 825 CPUState *self_cpu = current_cpu;
e82bcec2
MT
826
827 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 828 current_cpu = self_cpu;
e82bcec2
MT
829 }
830}
831
3c02270d
CV
832void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
833{
834 struct qemu_work_item *wi;
835
836 if (qemu_cpu_is_self(cpu)) {
837 func(data);
838 return;
839 }
840
841 wi = g_malloc0(sizeof(struct qemu_work_item));
842 wi->func = func;
843 wi->data = data;
844 wi->free = true;
845 if (cpu->queued_work_first == NULL) {
846 cpu->queued_work_first = wi;
847 } else {
848 cpu->queued_work_last->next = wi;
849 }
850 cpu->queued_work_last = wi;
851 wi->next = NULL;
852 wi->done = false;
853
854 qemu_cpu_kick(cpu);
855}
856
6d45b109 857static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
858{
859 struct qemu_work_item *wi;
860
c64ca814 861 if (cpu->queued_work_first == NULL) {
e82bcec2 862 return;
0ab07c62 863 }
e82bcec2 864
c64ca814
AF
865 while ((wi = cpu->queued_work_first)) {
866 cpu->queued_work_first = wi->next;
e82bcec2
MT
867 wi->func(wi->data);
868 wi->done = true;
3c02270d
CV
869 if (wi->free) {
870 g_free(wi);
871 }
e82bcec2 872 }
c64ca814 873 cpu->queued_work_last = NULL;
e82bcec2
MT
874 qemu_cond_broadcast(&qemu_work_cond);
875}
876
509a0d78 877static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 878{
4fdeee7c
AF
879 if (cpu->stop) {
880 cpu->stop = false;
f324e766 881 cpu->stopped = true;
296af7c9
BS
882 qemu_cond_signal(&qemu_pause_cond);
883 }
6d45b109 884 flush_queued_work(cpu);
216fc9a4 885 cpu->thread_kicked = false;
296af7c9
BS
886}
887
6cabe1f3 888static void qemu_tcg_wait_io_event(void)
296af7c9 889{
182735ef 890 CPUState *cpu;
6cabe1f3 891
16400322 892 while (all_cpu_threads_idle()) {
ab33fcda
PB
893 /* Start accounting real time to the virtual clock if the CPUs
894 are idle. */
40daca54 895 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 896 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 897 }
296af7c9 898
46daff13
PB
899 while (iothread_requesting_mutex) {
900 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
901 }
6cabe1f3 902
bdc44640 903 CPU_FOREACH(cpu) {
182735ef 904 qemu_wait_io_event_common(cpu);
6cabe1f3 905 }
296af7c9
BS
906}
907
fd529e8f 908static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 909{
a98ae1d8 910 while (cpu_thread_is_idle(cpu)) {
f5c121b8 911 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 912 }
296af7c9 913
290adf38 914 qemu_kvm_eat_signals(cpu);
509a0d78 915 qemu_wait_io_event_common(cpu);
296af7c9
BS
916}
917
7e97cd88 918static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 919{
48a106bd 920 CPUState *cpu = arg;
84b4915d 921 int r;
296af7c9 922
6164e6d6 923 qemu_mutex_lock(&qemu_global_mutex);
814e612e 924 qemu_thread_get_self(cpu->thread);
9f09e18a 925 cpu->thread_id = qemu_get_thread_id();
626cf8f4 926 cpu->can_do_io = 1;
4917cf44 927 current_cpu = cpu;
296af7c9 928
504134d2 929 r = kvm_init_vcpu(cpu);
84b4915d
JK
930 if (r < 0) {
931 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
932 exit(1);
933 }
296af7c9 934
13618e05 935 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
936
937 /* signal CPU creation */
61a46217 938 cpu->created = true;
296af7c9
BS
939 qemu_cond_signal(&qemu_cpu_cond);
940
296af7c9 941 while (1) {
a1fcaa73 942 if (cpu_can_run(cpu)) {
1458c363 943 r = kvm_cpu_exec(cpu);
83f338f7 944 if (r == EXCP_DEBUG) {
91325046 945 cpu_handle_guest_debug(cpu);
83f338f7 946 }
0ab07c62 947 }
fd529e8f 948 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
949 }
950
951 return NULL;
952}
953
c7f0f3b1
AL
954static void *qemu_dummy_cpu_thread_fn(void *arg)
955{
956#ifdef _WIN32
957 fprintf(stderr, "qtest is not supported under Windows\n");
958 exit(1);
959#else
10a9021d 960 CPUState *cpu = arg;
c7f0f3b1
AL
961 sigset_t waitset;
962 int r;
963
964 qemu_mutex_lock_iothread();
814e612e 965 qemu_thread_get_self(cpu->thread);
9f09e18a 966 cpu->thread_id = qemu_get_thread_id();
626cf8f4 967 cpu->can_do_io = 1;
c7f0f3b1
AL
968
969 sigemptyset(&waitset);
970 sigaddset(&waitset, SIG_IPI);
971
972 /* signal CPU creation */
61a46217 973 cpu->created = true;
c7f0f3b1
AL
974 qemu_cond_signal(&qemu_cpu_cond);
975
4917cf44 976 current_cpu = cpu;
c7f0f3b1 977 while (1) {
4917cf44 978 current_cpu = NULL;
c7f0f3b1
AL
979 qemu_mutex_unlock_iothread();
980 do {
981 int sig;
982 r = sigwait(&waitset, &sig);
983 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
984 if (r == -1) {
985 perror("sigwait");
986 exit(1);
987 }
988 qemu_mutex_lock_iothread();
4917cf44 989 current_cpu = cpu;
509a0d78 990 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
991 }
992
993 return NULL;
994#endif
995}
996
bdb7ca67
JK
997static void tcg_exec_all(void);
998
7e97cd88 999static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1000{
c3586ba7 1001 CPUState *cpu = arg;
296af7c9 1002
55f8d6ac 1003 qemu_tcg_init_cpu_signals();
814e612e 1004 qemu_thread_get_self(cpu->thread);
296af7c9 1005
296af7c9 1006 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1007 CPU_FOREACH(cpu) {
1008 cpu->thread_id = qemu_get_thread_id();
1009 cpu->created = true;
626cf8f4 1010 cpu->can_do_io = 1;
38fcbd3f 1011 }
296af7c9
BS
1012 qemu_cond_signal(&qemu_cpu_cond);
1013
fa7d1867 1014 /* wait for initial kick-off after machine start */
bdc44640 1015 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 1016 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1017
1018 /* process any pending work */
bdc44640 1019 CPU_FOREACH(cpu) {
182735ef 1020 qemu_wait_io_event_common(cpu);
8e564b4e 1021 }
0ab07c62 1022 }
296af7c9
BS
1023
1024 while (1) {
bdb7ca67 1025 tcg_exec_all();
ac70aafc
AB
1026
1027 if (use_icount) {
40daca54 1028 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1029
1030 if (deadline == 0) {
40daca54 1031 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1032 }
3b2319a3 1033 }
6cabe1f3 1034 qemu_tcg_wait_io_event();
296af7c9
BS
1035 }
1036
1037 return NULL;
1038}
1039
2ff09a40 1040static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1041{
1042#ifndef _WIN32
1043 int err;
1044
814e612e 1045 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1046 if (err) {
1047 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1048 exit(1);
1049 }
1050#else /* _WIN32 */
60e82579 1051 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1052 CONTEXT tcgContext;
1053
1054 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1055 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1056 GetLastError());
1057 exit(1);
1058 }
1059
1060 /* On multi-core systems, we are not sure that the thread is actually
1061 * suspended until we can get the context.
1062 */
1063 tcgContext.ContextFlags = CONTEXT_CONTROL;
1064 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1065 continue;
1066 }
1067
cc015e9a 1068 cpu_signal(0);
ed9164a3
OH
1069
1070 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1071 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1072 GetLastError());
1073 exit(1);
1074 }
cc015e9a
PB
1075 }
1076#endif
1077}
1078
c08d7424 1079void qemu_cpu_kick(CPUState *cpu)
296af7c9 1080{
f5c121b8 1081 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1082 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1083 qemu_cpu_kick_thread(cpu);
216fc9a4 1084 cpu->thread_kicked = true;
aa2c364b 1085 }
296af7c9
BS
1086}
1087
46d62fac 1088void qemu_cpu_kick_self(void)
296af7c9 1089{
b55c22c6 1090#ifndef _WIN32
4917cf44 1091 assert(current_cpu);
296af7c9 1092
4917cf44
AF
1093 if (!current_cpu->thread_kicked) {
1094 qemu_cpu_kick_thread(current_cpu);
1095 current_cpu->thread_kicked = true;
296af7c9 1096 }
b55c22c6
PB
1097#else
1098 abort();
1099#endif
296af7c9
BS
1100}
1101
60e82579 1102bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1103{
814e612e 1104 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1105}
1106
aa723c23
JQ
1107static bool qemu_in_vcpu_thread(void)
1108{
4917cf44 1109 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1110}
1111
296af7c9
BS
1112void qemu_mutex_lock_iothread(void)
1113{
c7f0f3b1 1114 if (!tcg_enabled()) {
296af7c9 1115 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1116 } else {
46daff13 1117 iothread_requesting_mutex = true;
1a28cac3 1118 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1119 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1120 qemu_mutex_lock(&qemu_global_mutex);
1121 }
46daff13
PB
1122 iothread_requesting_mutex = false;
1123 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1124 }
296af7c9
BS
1125}
1126
1127void qemu_mutex_unlock_iothread(void)
1128{
1129 qemu_mutex_unlock(&qemu_global_mutex);
1130}
1131
1132static int all_vcpus_paused(void)
1133{
bdc44640 1134 CPUState *cpu;
296af7c9 1135
bdc44640 1136 CPU_FOREACH(cpu) {
182735ef 1137 if (!cpu->stopped) {
296af7c9 1138 return 0;
0ab07c62 1139 }
296af7c9
BS
1140 }
1141
1142 return 1;
1143}
1144
1145void pause_all_vcpus(void)
1146{
bdc44640 1147 CPUState *cpu;
296af7c9 1148
40daca54 1149 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1150 CPU_FOREACH(cpu) {
182735ef
AF
1151 cpu->stop = true;
1152 qemu_cpu_kick(cpu);
296af7c9
BS
1153 }
1154
aa723c23 1155 if (qemu_in_vcpu_thread()) {
d798e974
JK
1156 cpu_stop_current();
1157 if (!kvm_enabled()) {
bdc44640 1158 CPU_FOREACH(cpu) {
182735ef
AF
1159 cpu->stop = false;
1160 cpu->stopped = true;
d798e974
JK
1161 }
1162 return;
1163 }
1164 }
1165
296af7c9 1166 while (!all_vcpus_paused()) {
be7d6c57 1167 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1168 CPU_FOREACH(cpu) {
182735ef 1169 qemu_cpu_kick(cpu);
296af7c9
BS
1170 }
1171 }
1172}
1173
2993683b
IM
1174void cpu_resume(CPUState *cpu)
1175{
1176 cpu->stop = false;
1177 cpu->stopped = false;
1178 qemu_cpu_kick(cpu);
1179}
1180
296af7c9
BS
1181void resume_all_vcpus(void)
1182{
bdc44640 1183 CPUState *cpu;
296af7c9 1184
40daca54 1185 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1186 CPU_FOREACH(cpu) {
182735ef 1187 cpu_resume(cpu);
296af7c9
BS
1188 }
1189}
1190
4900116e
DDAG
1191/* For temporary buffers for forming a name */
1192#define VCPU_THREAD_NAME_SIZE 16
1193
e5ab30a2 1194static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1195{
4900116e
DDAG
1196 char thread_name[VCPU_THREAD_NAME_SIZE];
1197
09daed84
EI
1198 tcg_cpu_address_space_init(cpu, cpu->as);
1199
296af7c9
BS
1200 /* share a single thread for all cpus with TCG */
1201 if (!tcg_cpu_thread) {
814e612e 1202 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1203 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1204 qemu_cond_init(cpu->halt_cond);
1205 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1206 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1207 cpu->cpu_index);
1208 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1209 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1210#ifdef _WIN32
814e612e 1211 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1212#endif
61a46217 1213 while (!cpu->created) {
18a85728 1214 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1215 }
814e612e 1216 tcg_cpu_thread = cpu->thread;
296af7c9 1217 } else {
814e612e 1218 cpu->thread = tcg_cpu_thread;
f5c121b8 1219 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1220 }
1221}
1222
48a106bd 1223static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1224{
4900116e
DDAG
1225 char thread_name[VCPU_THREAD_NAME_SIZE];
1226
814e612e 1227 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1228 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1229 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1230 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1231 cpu->cpu_index);
1232 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1233 cpu, QEMU_THREAD_JOINABLE);
61a46217 1234 while (!cpu->created) {
18a85728 1235 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1236 }
296af7c9
BS
1237}
1238
10a9021d 1239static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1240{
4900116e
DDAG
1241 char thread_name[VCPU_THREAD_NAME_SIZE];
1242
814e612e 1243 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1244 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1245 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1246 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1247 cpu->cpu_index);
1248 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1249 QEMU_THREAD_JOINABLE);
61a46217 1250 while (!cpu->created) {
c7f0f3b1
AL
1251 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1252 }
1253}
1254
c643bed9 1255void qemu_init_vcpu(CPUState *cpu)
296af7c9 1256{
ce3960eb
AF
1257 cpu->nr_cores = smp_cores;
1258 cpu->nr_threads = smp_threads;
f324e766 1259 cpu->stopped = true;
0ab07c62 1260 if (kvm_enabled()) {
48a106bd 1261 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1262 } else if (tcg_enabled()) {
e5ab30a2 1263 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1264 } else {
10a9021d 1265 qemu_dummy_start_vcpu(cpu);
0ab07c62 1266 }
296af7c9
BS
1267}
1268
b4a3d965 1269void cpu_stop_current(void)
296af7c9 1270{
4917cf44
AF
1271 if (current_cpu) {
1272 current_cpu->stop = false;
1273 current_cpu->stopped = true;
1274 cpu_exit(current_cpu);
67bb172f 1275 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1276 }
296af7c9
BS
1277}
1278
56983463 1279int vm_stop(RunState state)
296af7c9 1280{
aa723c23 1281 if (qemu_in_vcpu_thread()) {
74892d24 1282 qemu_system_vmstop_request_prepare();
1dfb4dd9 1283 qemu_system_vmstop_request(state);
296af7c9
BS
1284 /*
1285 * FIXME: should not return to device code in case
1286 * vm_stop() has been requested.
1287 */
b4a3d965 1288 cpu_stop_current();
56983463 1289 return 0;
296af7c9 1290 }
56983463
KW
1291
1292 return do_vm_stop(state);
296af7c9
BS
1293}
1294
8a9236f1
LC
1295/* does a state transition even if the VM is already stopped,
1296 current state is forgotten forever */
56983463 1297int vm_stop_force_state(RunState state)
8a9236f1
LC
1298{
1299 if (runstate_is_running()) {
56983463 1300 return vm_stop(state);
8a9236f1
LC
1301 } else {
1302 runstate_set(state);
594a45ce
KW
1303 /* Make sure to return an error if the flush in a previous vm_stop()
1304 * failed. */
1305 return bdrv_flush_all();
8a9236f1
LC
1306 }
1307}
1308
9349b4f9 1309static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1310{
efee7340 1311 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1312 int ret;
1313#ifdef CONFIG_PROFILER
1314 int64_t ti;
1315#endif
1316
1317#ifdef CONFIG_PROFILER
1318 ti = profile_getclock();
1319#endif
1320 if (use_icount) {
1321 int64_t count;
ac70aafc 1322 int64_t deadline;
296af7c9 1323 int decr;
c96778bb
FK
1324 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1325 + cpu->icount_extra);
28ecfd7a 1326 cpu->icount_decr.u16.low = 0;
efee7340 1327 cpu->icount_extra = 0;
40daca54 1328 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1329
1330 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1331 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1332 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1333 * nanoseconds.
1334 */
1335 if ((deadline < 0) || (deadline > INT32_MAX)) {
1336 deadline = INT32_MAX;
1337 }
1338
1339 count = qemu_icount_round(deadline);
c96778bb 1340 timers_state.qemu_icount += count;
296af7c9
BS
1341 decr = (count > 0xffff) ? 0xffff : count;
1342 count -= decr;
28ecfd7a 1343 cpu->icount_decr.u16.low = decr;
efee7340 1344 cpu->icount_extra = count;
296af7c9
BS
1345 }
1346 ret = cpu_exec(env);
1347#ifdef CONFIG_PROFILER
1348 qemu_time += profile_getclock() - ti;
1349#endif
1350 if (use_icount) {
1351 /* Fold pending instructions back into the
1352 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1353 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1354 + cpu->icount_extra);
28ecfd7a 1355 cpu->icount_decr.u32 = 0;
efee7340 1356 cpu->icount_extra = 0;
296af7c9
BS
1357 }
1358 return ret;
1359}
1360
bdb7ca67 1361static void tcg_exec_all(void)
296af7c9 1362{
9a36085b
JK
1363 int r;
1364
40daca54
AB
1365 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1366 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1367
0ab07c62 1368 if (next_cpu == NULL) {
296af7c9 1369 next_cpu = first_cpu;
0ab07c62 1370 }
bdc44640 1371 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1372 CPUState *cpu = next_cpu;
1373 CPUArchState *env = cpu->env_ptr;
296af7c9 1374
40daca54 1375 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1376 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1377
a1fcaa73 1378 if (cpu_can_run(cpu)) {
bdb7ca67 1379 r = tcg_cpu_exec(env);
9a36085b 1380 if (r == EXCP_DEBUG) {
91325046 1381 cpu_handle_guest_debug(cpu);
3c638d06
JK
1382 break;
1383 }
f324e766 1384 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1385 break;
1386 }
1387 }
c629a4bc 1388 exit_request = 0;
296af7c9
BS
1389}
1390
9a78eead 1391void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1392{
1393 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1394#if defined(cpu_list)
1395 cpu_list(f, cpu_fprintf);
262353cb
BS
1396#endif
1397}
de0b36b6
LC
1398
1399CpuInfoList *qmp_query_cpus(Error **errp)
1400{
1401 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1402 CPUState *cpu;
de0b36b6 1403
bdc44640 1404 CPU_FOREACH(cpu) {
de0b36b6 1405 CpuInfoList *info;
182735ef
AF
1406#if defined(TARGET_I386)
1407 X86CPU *x86_cpu = X86_CPU(cpu);
1408 CPUX86State *env = &x86_cpu->env;
1409#elif defined(TARGET_PPC)
1410 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1411 CPUPPCState *env = &ppc_cpu->env;
1412#elif defined(TARGET_SPARC)
1413 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1414 CPUSPARCState *env = &sparc_cpu->env;
1415#elif defined(TARGET_MIPS)
1416 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1417 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1418#elif defined(TARGET_TRICORE)
1419 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1420 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1421#endif
de0b36b6 1422
cb446eca 1423 cpu_synchronize_state(cpu);
de0b36b6
LC
1424
1425 info = g_malloc0(sizeof(*info));
1426 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1427 info->value->CPU = cpu->cpu_index;
182735ef 1428 info->value->current = (cpu == first_cpu);
259186a7 1429 info->value->halted = cpu->halted;
9f09e18a 1430 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1431#if defined(TARGET_I386)
1432 info->value->has_pc = true;
1433 info->value->pc = env->eip + env->segs[R_CS].base;
1434#elif defined(TARGET_PPC)
1435 info->value->has_nip = true;
1436 info->value->nip = env->nip;
1437#elif defined(TARGET_SPARC)
1438 info->value->has_pc = true;
1439 info->value->pc = env->pc;
1440 info->value->has_npc = true;
1441 info->value->npc = env->npc;
1442#elif defined(TARGET_MIPS)
1443 info->value->has_PC = true;
1444 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1445#elif defined(TARGET_TRICORE)
1446 info->value->has_PC = true;
1447 info->value->PC = env->PC;
de0b36b6
LC
1448#endif
1449
1450 /* XXX: waiting for the qapi to support GSList */
1451 if (!cur_item) {
1452 head = cur_item = info;
1453 } else {
1454 cur_item->next = info;
1455 cur_item = info;
1456 }
1457 }
1458
1459 return head;
1460}
0cfd6a9a
LC
1461
1462void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1463 bool has_cpu, int64_t cpu_index, Error **errp)
1464{
1465 FILE *f;
1466 uint32_t l;
55e5c285 1467 CPUState *cpu;
0cfd6a9a
LC
1468 uint8_t buf[1024];
1469
1470 if (!has_cpu) {
1471 cpu_index = 0;
1472 }
1473
151d1322
AF
1474 cpu = qemu_get_cpu(cpu_index);
1475 if (cpu == NULL) {
0cfd6a9a
LC
1476 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1477 "a CPU number");
1478 return;
1479 }
1480
1481 f = fopen(filename, "wb");
1482 if (!f) {
618da851 1483 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1484 return;
1485 }
1486
1487 while (size != 0) {
1488 l = sizeof(buf);
1489 if (l > size)
1490 l = size;
2f4d0f59
AK
1491 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1492 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1493 goto exit;
1494 }
0cfd6a9a
LC
1495 if (fwrite(buf, 1, l, f) != l) {
1496 error_set(errp, QERR_IO_ERROR);
1497 goto exit;
1498 }
1499 addr += l;
1500 size -= l;
1501 }
1502
1503exit:
1504 fclose(f);
1505}
6d3962bf
LC
1506
1507void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1508 Error **errp)
1509{
1510 FILE *f;
1511 uint32_t l;
1512 uint8_t buf[1024];
1513
1514 f = fopen(filename, "wb");
1515 if (!f) {
618da851 1516 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1517 return;
1518 }
1519
1520 while (size != 0) {
1521 l = sizeof(buf);
1522 if (l > size)
1523 l = size;
eb6282f2 1524 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1525 if (fwrite(buf, 1, l, f) != l) {
1526 error_set(errp, QERR_IO_ERROR);
1527 goto exit;
1528 }
1529 addr += l;
1530 size -= l;
1531 }
1532
1533exit:
1534 fclose(f);
1535}
ab49ab5c
LC
1536
1537void qmp_inject_nmi(Error **errp)
1538{
1539#if defined(TARGET_I386)
182735ef
AF
1540 CPUState *cs;
1541
bdc44640 1542 CPU_FOREACH(cs) {
182735ef 1543 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1544
02e51483 1545 if (!cpu->apic_state) {
182735ef 1546 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1547 } else {
02e51483 1548 apic_deliver_nmi(cpu->apic_state);
02c09195 1549 }
ab49ab5c
LC
1550 }
1551#else
9cb805fd 1552 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1553#endif
1554}
27498bef
ST
1555
1556void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1557{
1558 if (!use_icount) {
1559 return;
1560 }
1561
1562 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1563 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1564 if (icount_align_option) {
1565 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1566 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1567 } else {
1568 cpu_fprintf(f, "Max guest delay NA\n");
1569 cpu_fprintf(f, "Max guest advance NA\n");
1570 }
1571}