]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
replay: shutdown event
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
022c62cb 32#include "exec/gdbstub.h"
9c17d615
PB
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
de0b36b6 35#include "qmp-commands.h"
296af7c9 36
1de7afc9 37#include "qemu/thread.h"
9c17d615
PB
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
1de7afc9
PB
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
cb365646 42#include "qemu/seqlock.h"
a4e15de9 43#include "qapi-event.h"
9cb805fd 44#include "hw/nmi.h"
8b427044 45#include "sysemu/replay.h"
0ff0fc19
JK
46
47#ifndef _WIN32
1de7afc9 48#include "qemu/compatfd.h"
0ff0fc19 49#endif
296af7c9 50
6d9cb73c
JK
51#ifdef CONFIG_LINUX
52
53#include <sys/prctl.h>
54
c0532a76
MT
55#ifndef PR_MCE_KILL
56#define PR_MCE_KILL 33
57#endif
58
6d9cb73c
JK
59#ifndef PR_MCE_KILL_SET
60#define PR_MCE_KILL_SET 1
61#endif
62
63#ifndef PR_MCE_KILL_EARLY
64#define PR_MCE_KILL_EARLY 1
65#endif
66
67#endif /* CONFIG_LINUX */
68
182735ef 69static CPUState *next_cpu;
27498bef
ST
70int64_t max_delay;
71int64_t max_advance;
296af7c9 72
2adcc85d
JH
73/* vcpu throttling controls */
74static QEMUTimer *throttle_timer;
75static unsigned int throttle_percentage;
76
77#define CPU_THROTTLE_PCT_MIN 1
78#define CPU_THROTTLE_PCT_MAX 99
79#define CPU_THROTTLE_TIMESLICE_NS 10000000
80
321bc0b2
TC
81bool cpu_is_stopped(CPUState *cpu)
82{
83 return cpu->stopped || !runstate_is_running();
84}
85
a98ae1d8 86static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 87{
c64ca814 88 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
89 return false;
90 }
321bc0b2 91 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
92 return true;
93 }
8c2e1b00 94 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 95 kvm_halt_in_kernel()) {
ac873f1e
PM
96 return false;
97 }
98 return true;
99}
100
101static bool all_cpu_threads_idle(void)
102{
182735ef 103 CPUState *cpu;
ac873f1e 104
bdc44640 105 CPU_FOREACH(cpu) {
182735ef 106 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
107 return false;
108 }
109 }
110 return true;
111}
112
946fb27c
PB
113/***********************************************************/
114/* guest cycle counter */
115
a3270e19
PB
116/* Protected by TimersState seqlock */
117
5045e9d9 118static bool icount_sleep = true;
71468395 119static int64_t vm_clock_warp_start = -1;
946fb27c
PB
120/* Conversion factor from emulated instructions to virtual clock ticks. */
121static int icount_time_shift;
122/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
123#define MAX_ICOUNT_SHIFT 10
a3270e19 124
946fb27c
PB
125static QEMUTimer *icount_rt_timer;
126static QEMUTimer *icount_vm_timer;
127static QEMUTimer *icount_warp_timer;
946fb27c
PB
128
129typedef struct TimersState {
cb365646 130 /* Protected by BQL. */
946fb27c
PB
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
cb365646
LPF
133
134 /* cpu_clock_offset can be read out of BQL, so protect it with
135 * this lock.
136 */
137 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
138 int64_t cpu_clock_offset;
139 int32_t cpu_ticks_enabled;
140 int64_t dummy;
c96778bb
FK
141
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias;
144 /* Only written by TCG thread */
145 int64_t qemu_icount;
946fb27c
PB
146} TimersState;
147
d9cd4007 148static TimersState timers_state;
946fb27c 149
2a62914b 150int64_t cpu_get_icount_raw(void)
946fb27c
PB
151{
152 int64_t icount;
4917cf44 153 CPUState *cpu = current_cpu;
946fb27c 154
c96778bb 155 icount = timers_state.qemu_icount;
4917cf44 156 if (cpu) {
414b15c9 157 if (!cpu->can_do_io) {
2a62914b
PD
158 fprintf(stderr, "Bad icount read\n");
159 exit(1);
946fb27c 160 }
28ecfd7a 161 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 162 }
2a62914b
PD
163 return icount;
164}
165
166/* Return the virtual CPU time, based on the instruction counter. */
167static int64_t cpu_get_icount_locked(void)
168{
169 int64_t icount = cpu_get_icount_raw();
3f031313 170 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
171}
172
17a15f1b
PB
173int64_t cpu_get_icount(void)
174{
175 int64_t icount;
176 unsigned start;
177
178 do {
179 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
180 icount = cpu_get_icount_locked();
181 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
182
183 return icount;
184}
185
3f031313
FK
186int64_t cpu_icount_to_ns(int64_t icount)
187{
188 return icount << icount_time_shift;
189}
190
946fb27c 191/* return the host CPU cycle counter and handle stop/restart */
cb365646 192/* Caller must hold the BQL */
946fb27c
PB
193int64_t cpu_get_ticks(void)
194{
5f3e3101
PB
195 int64_t ticks;
196
946fb27c
PB
197 if (use_icount) {
198 return cpu_get_icount();
199 }
5f3e3101
PB
200
201 ticks = timers_state.cpu_ticks_offset;
202 if (timers_state.cpu_ticks_enabled) {
4a7428c5 203 ticks += cpu_get_host_ticks();
5f3e3101
PB
204 }
205
206 if (timers_state.cpu_ticks_prev > ticks) {
207 /* Note: non increasing ticks may happen if the host uses
208 software suspend */
209 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
210 ticks = timers_state.cpu_ticks_prev;
946fb27c 211 }
5f3e3101
PB
212
213 timers_state.cpu_ticks_prev = ticks;
214 return ticks;
946fb27c
PB
215}
216
cb365646 217static int64_t cpu_get_clock_locked(void)
946fb27c 218{
5f3e3101 219 int64_t ticks;
cb365646 220
5f3e3101
PB
221 ticks = timers_state.cpu_clock_offset;
222 if (timers_state.cpu_ticks_enabled) {
223 ticks += get_clock();
946fb27c 224 }
cb365646 225
5f3e3101 226 return ticks;
cb365646
LPF
227}
228
229/* return the host CPU monotonic timer and handle stop/restart */
230int64_t cpu_get_clock(void)
231{
232 int64_t ti;
233 unsigned start;
234
235 do {
236 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
237 ti = cpu_get_clock_locked();
238 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
239
240 return ti;
946fb27c
PB
241}
242
cb365646
LPF
243/* enable cpu_get_ticks()
244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
245 */
946fb27c
PB
246void cpu_enable_ticks(void)
247{
cb365646
LPF
248 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
249 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 250 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 251 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
252 timers_state.cpu_clock_offset -= get_clock();
253 timers_state.cpu_ticks_enabled = 1;
254 }
cb365646 255 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
256}
257
258/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
259 * cpu_get_ticks() after that.
260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
261 */
946fb27c
PB
262void cpu_disable_ticks(void)
263{
cb365646
LPF
264 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
265 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 266 if (timers_state.cpu_ticks_enabled) {
4a7428c5 267 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 268 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
269 timers_state.cpu_ticks_enabled = 0;
270 }
cb365646 271 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
272}
273
274/* Correlation between real and virtual time is always going to be
275 fairly approximate, so ignore small variation.
276 When the guest is idle real and virtual time will be aligned in
277 the IO wait loop. */
278#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
279
280static void icount_adjust(void)
281{
282 int64_t cur_time;
283 int64_t cur_icount;
284 int64_t delta;
a3270e19
PB
285
286 /* Protected by TimersState mutex. */
946fb27c 287 static int64_t last_delta;
468cc7cf 288
946fb27c
PB
289 /* If the VM is not running, then do nothing. */
290 if (!runstate_is_running()) {
291 return;
292 }
468cc7cf 293
17a15f1b
PB
294 seqlock_write_lock(&timers_state.vm_clock_seqlock);
295 cur_time = cpu_get_clock_locked();
296 cur_icount = cpu_get_icount_locked();
468cc7cf 297
946fb27c
PB
298 delta = cur_icount - cur_time;
299 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
300 if (delta > 0
301 && last_delta + ICOUNT_WOBBLE < delta * 2
302 && icount_time_shift > 0) {
303 /* The guest is getting too far ahead. Slow time down. */
304 icount_time_shift--;
305 }
306 if (delta < 0
307 && last_delta - ICOUNT_WOBBLE > delta * 2
308 && icount_time_shift < MAX_ICOUNT_SHIFT) {
309 /* The guest is getting too far behind. Speed time up. */
310 icount_time_shift++;
311 }
312 last_delta = delta;
c96778bb
FK
313 timers_state.qemu_icount_bias = cur_icount
314 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 315 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
316}
317
318static void icount_adjust_rt(void *opaque)
319{
40daca54 320 timer_mod(icount_rt_timer,
1979b908 321 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
322 icount_adjust();
323}
324
325static void icount_adjust_vm(void *opaque)
326{
40daca54
AB
327 timer_mod(icount_vm_timer,
328 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
329 get_ticks_per_sec() / 10);
946fb27c
PB
330 icount_adjust();
331}
332
333static int64_t qemu_icount_round(int64_t count)
334{
335 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
336}
337
338static void icount_warp_rt(void *opaque)
339{
17a15f1b
PB
340 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
341 * changes from -1 to another value, so the race here is okay.
342 */
343 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
344 return;
345 }
346
17a15f1b 347 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 348 if (runstate_is_running()) {
8eda206e
PD
349 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
350 cpu_get_clock_locked());
8ed961d9
PB
351 int64_t warp_delta;
352
353 warp_delta = clock - vm_clock_warp_start;
354 if (use_icount == 2) {
946fb27c 355 /*
40daca54 356 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
357 * far ahead of real time.
358 */
17a15f1b 359 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 360 int64_t delta = clock - cur_icount;
8ed961d9 361 warp_delta = MIN(warp_delta, delta);
946fb27c 362 }
c96778bb 363 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
364 }
365 vm_clock_warp_start = -1;
17a15f1b 366 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
367
368 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
370 }
946fb27c
PB
371}
372
8156be56
PB
373void qtest_clock_warp(int64_t dest)
374{
40daca54 375 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 376 AioContext *aio_context;
8156be56 377 assert(qtest_enabled());
efef88b3 378 aio_context = qemu_get_aio_context();
8156be56 379 while (clock < dest) {
40daca54 380 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 381 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 382
17a15f1b 383 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 384 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
385 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
386
40daca54 387 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 388 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 389 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 390 }
40daca54 391 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
392}
393
40daca54 394void qemu_clock_warp(QEMUClockType type)
946fb27c 395{
ce78d18c 396 int64_t clock;
946fb27c
PB
397 int64_t deadline;
398
399 /*
400 * There are too many global variables to make the "warp" behavior
401 * applicable to other clocks. But a clock argument removes the
402 * need for if statements all over the place.
403 */
40daca54 404 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
405 return;
406 }
407
5045e9d9
VC
408 if (icount_sleep) {
409 /*
410 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
411 * This ensures that the deadline for the timer is computed correctly
412 * below.
413 * This also makes sure that the insn counter is synchronized before
414 * the CPU starts running, in case the CPU is woken by an event other
415 * than the earliest QEMU_CLOCK_VIRTUAL timer.
416 */
417 icount_warp_rt(NULL);
418 timer_del(icount_warp_timer);
419 }
ce78d18c 420 if (!all_cpu_threads_idle()) {
946fb27c
PB
421 return;
422 }
423
8156be56
PB
424 if (qtest_enabled()) {
425 /* When testing, qtest commands advance icount. */
426 return;
427 }
428
ac70aafc 429 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 430 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 431 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 432 if (deadline < 0) {
d7a0f71d
VC
433 static bool notified;
434 if (!icount_sleep && !notified) {
435 error_report("WARNING: icount sleep disabled and no active timers");
436 notified = true;
437 }
ce78d18c 438 return;
ac70aafc
AB
439 }
440
946fb27c
PB
441 if (deadline > 0) {
442 /*
40daca54 443 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
444 * sleep. Otherwise, the CPU might be waiting for a future timer
445 * interrupt to wake it up, but the interrupt never comes because
446 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 447 * QEMU_CLOCK_VIRTUAL.
946fb27c 448 */
5045e9d9
VC
449 if (!icount_sleep) {
450 /*
451 * We never let VCPUs sleep in no sleep icount mode.
452 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
453 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
454 * It is useful when we want a deterministic execution time,
455 * isolated from host latencies.
456 */
457 seqlock_write_lock(&timers_state.vm_clock_seqlock);
458 timers_state.qemu_icount_bias += deadline;
459 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
460 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
461 } else {
462 /*
463 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
464 * "real" time, (related to the time left until the next event) has
465 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
466 * This avoids that the warps are visible externally; for example,
467 * you will not be sending network packets continuously instead of
468 * every 100ms.
469 */
470 seqlock_write_lock(&timers_state.vm_clock_seqlock);
471 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
472 vm_clock_warp_start = clock;
473 }
474 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
475 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 476 }
ac70aafc 477 } else if (deadline == 0) {
40daca54 478 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
479 }
480}
481
d09eae37
FK
482static bool icount_state_needed(void *opaque)
483{
484 return use_icount;
485}
486
487/*
488 * This is a subsection for icount migration.
489 */
490static const VMStateDescription icount_vmstate_timers = {
491 .name = "timer/icount",
492 .version_id = 1,
493 .minimum_version_id = 1,
5cd8cada 494 .needed = icount_state_needed,
d09eae37
FK
495 .fields = (VMStateField[]) {
496 VMSTATE_INT64(qemu_icount_bias, TimersState),
497 VMSTATE_INT64(qemu_icount, TimersState),
498 VMSTATE_END_OF_LIST()
499 }
500};
501
946fb27c
PB
502static const VMStateDescription vmstate_timers = {
503 .name = "timer",
504 .version_id = 2,
505 .minimum_version_id = 1,
35d08458 506 .fields = (VMStateField[]) {
946fb27c
PB
507 VMSTATE_INT64(cpu_ticks_offset, TimersState),
508 VMSTATE_INT64(dummy, TimersState),
509 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
510 VMSTATE_END_OF_LIST()
d09eae37 511 },
5cd8cada
JQ
512 .subsections = (const VMStateDescription*[]) {
513 &icount_vmstate_timers,
514 NULL
946fb27c
PB
515 }
516};
517
2adcc85d
JH
518static void cpu_throttle_thread(void *opaque)
519{
520 CPUState *cpu = opaque;
521 double pct;
522 double throttle_ratio;
523 long sleeptime_ns;
524
525 if (!cpu_throttle_get_percentage()) {
526 return;
527 }
528
529 pct = (double)cpu_throttle_get_percentage()/100;
530 throttle_ratio = pct / (1 - pct);
531 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
532
533 qemu_mutex_unlock_iothread();
534 atomic_set(&cpu->throttle_thread_scheduled, 0);
535 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
536 qemu_mutex_lock_iothread();
537}
538
539static void cpu_throttle_timer_tick(void *opaque)
540{
541 CPUState *cpu;
542 double pct;
543
544 /* Stop the timer if needed */
545 if (!cpu_throttle_get_percentage()) {
546 return;
547 }
548 CPU_FOREACH(cpu) {
549 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
550 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
551 }
552 }
553
554 pct = (double)cpu_throttle_get_percentage()/100;
555 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
556 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
557}
558
559void cpu_throttle_set(int new_throttle_pct)
560{
561 /* Ensure throttle percentage is within valid range */
562 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
563 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
564
565 atomic_set(&throttle_percentage, new_throttle_pct);
566
567 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
568 CPU_THROTTLE_TIMESLICE_NS);
569}
570
571void cpu_throttle_stop(void)
572{
573 atomic_set(&throttle_percentage, 0);
574}
575
576bool cpu_throttle_active(void)
577{
578 return (cpu_throttle_get_percentage() != 0);
579}
580
581int cpu_throttle_get_percentage(void)
582{
583 return atomic_read(&throttle_percentage);
584}
585
4603ea01
PD
586void cpu_ticks_init(void)
587{
588 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
589 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
590 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
591 cpu_throttle_timer_tick, NULL);
4603ea01
PD
592}
593
1ad9580b 594void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 595{
1ad9580b 596 const char *option;
a8bfac37 597 char *rem_str = NULL;
1ad9580b 598
1ad9580b 599 option = qemu_opt_get(opts, "shift");
946fb27c 600 if (!option) {
a8bfac37
ST
601 if (qemu_opt_get(opts, "align") != NULL) {
602 error_setg(errp, "Please specify shift option when using align");
603 }
946fb27c
PB
604 return;
605 }
f1f4b57e
VC
606
607 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
608 if (icount_sleep) {
609 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
610 icount_warp_rt, NULL);
611 }
f1f4b57e 612
a8bfac37 613 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
614
615 if (icount_align_option && !icount_sleep) {
616 error_setg(errp, "align=on and sleep=no are incompatible");
617 }
946fb27c 618 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
619 errno = 0;
620 icount_time_shift = strtol(option, &rem_str, 0);
621 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
622 error_setg(errp, "icount: Invalid shift value");
623 }
946fb27c
PB
624 use_icount = 1;
625 return;
a8bfac37
ST
626 } else if (icount_align_option) {
627 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e
VC
628 } else if (!icount_sleep) {
629 error_setg(errp, "shift=auto and sleep=no are incompatible");
946fb27c
PB
630 }
631
632 use_icount = 2;
633
634 /* 125MIPS seems a reasonable initial guess at the guest speed.
635 It will be corrected fairly quickly anyway. */
636 icount_time_shift = 3;
637
638 /* Have both realtime and virtual time triggers for speed adjustment.
639 The realtime trigger catches emulated time passing too slowly,
640 the virtual time trigger catches emulated time passing too fast.
641 Realtime triggers occur even when idle, so use them less frequently
642 than VM triggers. */
bf2a7ddb
PD
643 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
644 icount_adjust_rt, NULL);
40daca54 645 timer_mod(icount_rt_timer,
bf2a7ddb 646 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
647 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
648 icount_adjust_vm, NULL);
649 timer_mod(icount_vm_timer,
650 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
651 get_ticks_per_sec() / 10);
946fb27c
PB
652}
653
296af7c9
BS
654/***********************************************************/
655void hw_error(const char *fmt, ...)
656{
657 va_list ap;
55e5c285 658 CPUState *cpu;
296af7c9
BS
659
660 va_start(ap, fmt);
661 fprintf(stderr, "qemu: hardware error: ");
662 vfprintf(stderr, fmt, ap);
663 fprintf(stderr, "\n");
bdc44640 664 CPU_FOREACH(cpu) {
55e5c285 665 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 666 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
667 }
668 va_end(ap);
669 abort();
670}
671
672void cpu_synchronize_all_states(void)
673{
182735ef 674 CPUState *cpu;
296af7c9 675
bdc44640 676 CPU_FOREACH(cpu) {
182735ef 677 cpu_synchronize_state(cpu);
296af7c9
BS
678 }
679}
680
681void cpu_synchronize_all_post_reset(void)
682{
182735ef 683 CPUState *cpu;
296af7c9 684
bdc44640 685 CPU_FOREACH(cpu) {
182735ef 686 cpu_synchronize_post_reset(cpu);
296af7c9
BS
687 }
688}
689
690void cpu_synchronize_all_post_init(void)
691{
182735ef 692 CPUState *cpu;
296af7c9 693
bdc44640 694 CPU_FOREACH(cpu) {
182735ef 695 cpu_synchronize_post_init(cpu);
296af7c9
BS
696 }
697}
698
de9d61e8
MT
699void cpu_clean_all_dirty(void)
700{
701 CPUState *cpu;
702
703 CPU_FOREACH(cpu) {
704 cpu_clean_state(cpu);
705 }
706}
707
56983463 708static int do_vm_stop(RunState state)
296af7c9 709{
56983463
KW
710 int ret = 0;
711
1354869c 712 if (runstate_is_running()) {
296af7c9 713 cpu_disable_ticks();
296af7c9 714 pause_all_vcpus();
f5bbfba1 715 runstate_set(state);
1dfb4dd9 716 vm_state_notify(0, state);
a4e15de9 717 qapi_event_send_stop(&error_abort);
296af7c9 718 }
56983463 719
594a45ce
KW
720 bdrv_drain_all();
721 ret = bdrv_flush_all();
722
56983463 723 return ret;
296af7c9
BS
724}
725
a1fcaa73 726static bool cpu_can_run(CPUState *cpu)
296af7c9 727{
4fdeee7c 728 if (cpu->stop) {
a1fcaa73 729 return false;
0ab07c62 730 }
321bc0b2 731 if (cpu_is_stopped(cpu)) {
a1fcaa73 732 return false;
0ab07c62 733 }
a1fcaa73 734 return true;
296af7c9
BS
735}
736
91325046 737static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 738{
64f6b346 739 gdb_set_stop_cpu(cpu);
8cf71710 740 qemu_system_debug_request();
f324e766 741 cpu->stopped = true;
3c638d06
JK
742}
743
6d9cb73c
JK
744#ifdef CONFIG_LINUX
745static void sigbus_reraise(void)
746{
747 sigset_t set;
748 struct sigaction action;
749
750 memset(&action, 0, sizeof(action));
751 action.sa_handler = SIG_DFL;
752 if (!sigaction(SIGBUS, &action, NULL)) {
753 raise(SIGBUS);
754 sigemptyset(&set);
755 sigaddset(&set, SIGBUS);
756 sigprocmask(SIG_UNBLOCK, &set, NULL);
757 }
758 perror("Failed to re-raise SIGBUS!\n");
759 abort();
760}
761
762static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
763 void *ctx)
764{
765 if (kvm_on_sigbus(siginfo->ssi_code,
766 (void *)(intptr_t)siginfo->ssi_addr)) {
767 sigbus_reraise();
768 }
769}
770
771static void qemu_init_sigbus(void)
772{
773 struct sigaction action;
774
775 memset(&action, 0, sizeof(action));
776 action.sa_flags = SA_SIGINFO;
777 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
778 sigaction(SIGBUS, &action, NULL);
779
780 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
781}
782
290adf38 783static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
784{
785 struct timespec ts = { 0, 0 };
786 siginfo_t siginfo;
787 sigset_t waitset;
788 sigset_t chkset;
789 int r;
790
791 sigemptyset(&waitset);
792 sigaddset(&waitset, SIG_IPI);
793 sigaddset(&waitset, SIGBUS);
794
795 do {
796 r = sigtimedwait(&waitset, &siginfo, &ts);
797 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
798 perror("sigtimedwait");
799 exit(1);
800 }
801
802 switch (r) {
803 case SIGBUS:
290adf38 804 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
805 sigbus_reraise();
806 }
807 break;
808 default:
809 break;
810 }
811
812 r = sigpending(&chkset);
813 if (r == -1) {
814 perror("sigpending");
815 exit(1);
816 }
817 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
818}
819
6d9cb73c
JK
820#else /* !CONFIG_LINUX */
821
822static void qemu_init_sigbus(void)
823{
824}
1ab3c6c0 825
290adf38 826static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
827{
828}
6d9cb73c
JK
829#endif /* !CONFIG_LINUX */
830
296af7c9 831#ifndef _WIN32
55f8d6ac
JK
832static void dummy_signal(int sig)
833{
834}
55f8d6ac 835
13618e05 836static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
837{
838 int r;
839 sigset_t set;
840 struct sigaction sigact;
841
842 memset(&sigact, 0, sizeof(sigact));
843 sigact.sa_handler = dummy_signal;
844 sigaction(SIG_IPI, &sigact, NULL);
845
714bd040
PB
846 pthread_sigmask(SIG_BLOCK, NULL, &set);
847 sigdelset(&set, SIG_IPI);
714bd040 848 sigdelset(&set, SIGBUS);
491d6e80 849 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
850 if (r) {
851 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
852 exit(1);
853 }
854}
855
55f8d6ac 856#else /* _WIN32 */
13618e05 857static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 858{
714bd040
PB
859 abort();
860}
714bd040 861#endif /* _WIN32 */
ff48eb5f 862
b2532d88 863static QemuMutex qemu_global_mutex;
46daff13 864static QemuCond qemu_io_proceeded_cond;
6b49809c 865static unsigned iothread_requesting_mutex;
296af7c9
BS
866
867static QemuThread io_thread;
868
296af7c9
BS
869/* cpu creation */
870static QemuCond qemu_cpu_cond;
871/* system init */
296af7c9 872static QemuCond qemu_pause_cond;
e82bcec2 873static QemuCond qemu_work_cond;
296af7c9 874
d3b12f5d 875void qemu_init_cpu_loop(void)
296af7c9 876{
6d9cb73c 877 qemu_init_sigbus();
ed94592b 878 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
879 qemu_cond_init(&qemu_pause_cond);
880 qemu_cond_init(&qemu_work_cond);
46daff13 881 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 882 qemu_mutex_init(&qemu_global_mutex);
296af7c9 883
b7680cb6 884 qemu_thread_get_self(&io_thread);
296af7c9
BS
885}
886
f100f0b3 887void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
888{
889 struct qemu_work_item wi;
890
60e82579 891 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
892 func(data);
893 return;
894 }
895
896 wi.func = func;
897 wi.data = data;
3c02270d 898 wi.free = false;
376692b9
PB
899
900 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
901 if (cpu->queued_work_first == NULL) {
902 cpu->queued_work_first = &wi;
0ab07c62 903 } else {
c64ca814 904 cpu->queued_work_last->next = &wi;
0ab07c62 905 }
c64ca814 906 cpu->queued_work_last = &wi;
e82bcec2
MT
907 wi.next = NULL;
908 wi.done = false;
376692b9 909 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 910
c08d7424 911 qemu_cpu_kick(cpu);
376692b9 912 while (!atomic_mb_read(&wi.done)) {
4917cf44 913 CPUState *self_cpu = current_cpu;
e82bcec2
MT
914
915 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 916 current_cpu = self_cpu;
e82bcec2
MT
917 }
918}
919
3c02270d
CV
920void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
921{
922 struct qemu_work_item *wi;
923
924 if (qemu_cpu_is_self(cpu)) {
925 func(data);
926 return;
927 }
928
929 wi = g_malloc0(sizeof(struct qemu_work_item));
930 wi->func = func;
931 wi->data = data;
932 wi->free = true;
376692b9
PB
933
934 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
935 if (cpu->queued_work_first == NULL) {
936 cpu->queued_work_first = wi;
937 } else {
938 cpu->queued_work_last->next = wi;
939 }
940 cpu->queued_work_last = wi;
941 wi->next = NULL;
942 wi->done = false;
376692b9 943 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
944
945 qemu_cpu_kick(cpu);
946}
947
6d45b109 948static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
949{
950 struct qemu_work_item *wi;
951
c64ca814 952 if (cpu->queued_work_first == NULL) {
e82bcec2 953 return;
0ab07c62 954 }
e82bcec2 955
376692b9
PB
956 qemu_mutex_lock(&cpu->work_mutex);
957 while (cpu->queued_work_first != NULL) {
958 wi = cpu->queued_work_first;
c64ca814 959 cpu->queued_work_first = wi->next;
376692b9
PB
960 if (!cpu->queued_work_first) {
961 cpu->queued_work_last = NULL;
962 }
963 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 964 wi->func(wi->data);
376692b9 965 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
966 if (wi->free) {
967 g_free(wi);
376692b9
PB
968 } else {
969 atomic_mb_set(&wi->done, true);
3c02270d 970 }
e82bcec2 971 }
376692b9 972 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
973 qemu_cond_broadcast(&qemu_work_cond);
974}
975
509a0d78 976static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 977{
4fdeee7c
AF
978 if (cpu->stop) {
979 cpu->stop = false;
f324e766 980 cpu->stopped = true;
296af7c9
BS
981 qemu_cond_signal(&qemu_pause_cond);
982 }
6d45b109 983 flush_queued_work(cpu);
216fc9a4 984 cpu->thread_kicked = false;
296af7c9
BS
985}
986
d5f8d613 987static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 988{
16400322 989 while (all_cpu_threads_idle()) {
ab33fcda
PB
990 /* Start accounting real time to the virtual clock if the CPUs
991 are idle. */
40daca54 992 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
d5f8d613 993 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 994 }
296af7c9 995
46daff13
PB
996 while (iothread_requesting_mutex) {
997 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
998 }
6cabe1f3 999
bdc44640 1000 CPU_FOREACH(cpu) {
182735ef 1001 qemu_wait_io_event_common(cpu);
6cabe1f3 1002 }
296af7c9
BS
1003}
1004
fd529e8f 1005static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1006{
a98ae1d8 1007 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1008 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1009 }
296af7c9 1010
290adf38 1011 qemu_kvm_eat_signals(cpu);
509a0d78 1012 qemu_wait_io_event_common(cpu);
296af7c9
BS
1013}
1014
7e97cd88 1015static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1016{
48a106bd 1017 CPUState *cpu = arg;
84b4915d 1018 int r;
296af7c9 1019
ab28bd23
PB
1020 rcu_register_thread();
1021
2e7f7a3c 1022 qemu_mutex_lock_iothread();
814e612e 1023 qemu_thread_get_self(cpu->thread);
9f09e18a 1024 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1025 cpu->can_do_io = 1;
4917cf44 1026 current_cpu = cpu;
296af7c9 1027
504134d2 1028 r = kvm_init_vcpu(cpu);
84b4915d
JK
1029 if (r < 0) {
1030 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1031 exit(1);
1032 }
296af7c9 1033
13618e05 1034 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1035
1036 /* signal CPU creation */
61a46217 1037 cpu->created = true;
296af7c9
BS
1038 qemu_cond_signal(&qemu_cpu_cond);
1039
296af7c9 1040 while (1) {
a1fcaa73 1041 if (cpu_can_run(cpu)) {
1458c363 1042 r = kvm_cpu_exec(cpu);
83f338f7 1043 if (r == EXCP_DEBUG) {
91325046 1044 cpu_handle_guest_debug(cpu);
83f338f7 1045 }
0ab07c62 1046 }
fd529e8f 1047 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
1048 }
1049
1050 return NULL;
1051}
1052
c7f0f3b1
AL
1053static void *qemu_dummy_cpu_thread_fn(void *arg)
1054{
1055#ifdef _WIN32
1056 fprintf(stderr, "qtest is not supported under Windows\n");
1057 exit(1);
1058#else
10a9021d 1059 CPUState *cpu = arg;
c7f0f3b1
AL
1060 sigset_t waitset;
1061 int r;
1062
ab28bd23
PB
1063 rcu_register_thread();
1064
c7f0f3b1 1065 qemu_mutex_lock_iothread();
814e612e 1066 qemu_thread_get_self(cpu->thread);
9f09e18a 1067 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1068 cpu->can_do_io = 1;
c7f0f3b1
AL
1069
1070 sigemptyset(&waitset);
1071 sigaddset(&waitset, SIG_IPI);
1072
1073 /* signal CPU creation */
61a46217 1074 cpu->created = true;
c7f0f3b1
AL
1075 qemu_cond_signal(&qemu_cpu_cond);
1076
4917cf44 1077 current_cpu = cpu;
c7f0f3b1 1078 while (1) {
4917cf44 1079 current_cpu = NULL;
c7f0f3b1
AL
1080 qemu_mutex_unlock_iothread();
1081 do {
1082 int sig;
1083 r = sigwait(&waitset, &sig);
1084 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1085 if (r == -1) {
1086 perror("sigwait");
1087 exit(1);
1088 }
1089 qemu_mutex_lock_iothread();
4917cf44 1090 current_cpu = cpu;
509a0d78 1091 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1092 }
1093
1094 return NULL;
1095#endif
1096}
1097
bdb7ca67
JK
1098static void tcg_exec_all(void);
1099
7e97cd88 1100static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1101{
c3586ba7 1102 CPUState *cpu = arg;
296af7c9 1103
ab28bd23
PB
1104 rcu_register_thread();
1105
2e7f7a3c 1106 qemu_mutex_lock_iothread();
814e612e 1107 qemu_thread_get_self(cpu->thread);
296af7c9 1108
38fcbd3f
AF
1109 CPU_FOREACH(cpu) {
1110 cpu->thread_id = qemu_get_thread_id();
1111 cpu->created = true;
626cf8f4 1112 cpu->can_do_io = 1;
38fcbd3f 1113 }
296af7c9
BS
1114 qemu_cond_signal(&qemu_cpu_cond);
1115
fa7d1867 1116 /* wait for initial kick-off after machine start */
c28e399c 1117 while (first_cpu->stopped) {
d5f8d613 1118 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1119
1120 /* process any pending work */
bdc44640 1121 CPU_FOREACH(cpu) {
182735ef 1122 qemu_wait_io_event_common(cpu);
8e564b4e 1123 }
0ab07c62 1124 }
296af7c9 1125
21618b3e 1126 /* process any pending work */
aed807c8 1127 atomic_mb_set(&exit_request, 1);
21618b3e 1128
296af7c9 1129 while (1) {
bdb7ca67 1130 tcg_exec_all();
ac70aafc
AB
1131
1132 if (use_icount) {
40daca54 1133 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1134
1135 if (deadline == 0) {
40daca54 1136 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1137 }
3b2319a3 1138 }
d5f8d613 1139 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
296af7c9
BS
1140 }
1141
1142 return NULL;
1143}
1144
2ff09a40 1145static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1146{
1147#ifndef _WIN32
1148 int err;
1149
e0c38211
PB
1150 if (cpu->thread_kicked) {
1151 return;
9102deda 1152 }
e0c38211 1153 cpu->thread_kicked = true;
814e612e 1154 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1155 if (err) {
1156 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1157 exit(1);
1158 }
1159#else /* _WIN32 */
e0c38211
PB
1160 abort();
1161#endif
1162}
ed9164a3 1163
e0c38211
PB
1164static void qemu_cpu_kick_no_halt(void)
1165{
1166 CPUState *cpu;
1167 /* Ensure whatever caused the exit has reached the CPU threads before
1168 * writing exit_request.
1169 */
1170 atomic_mb_set(&exit_request, 1);
1171 cpu = atomic_mb_read(&tcg_current_cpu);
1172 if (cpu) {
1173 cpu_exit(cpu);
cc015e9a 1174 }
cc015e9a
PB
1175}
1176
c08d7424 1177void qemu_cpu_kick(CPUState *cpu)
296af7c9 1178{
f5c121b8 1179 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1180 if (tcg_enabled()) {
1181 qemu_cpu_kick_no_halt();
1182 } else {
1183 qemu_cpu_kick_thread(cpu);
1184 }
296af7c9
BS
1185}
1186
46d62fac 1187void qemu_cpu_kick_self(void)
296af7c9 1188{
4917cf44 1189 assert(current_cpu);
9102deda 1190 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1191}
1192
60e82579 1193bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1194{
814e612e 1195 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1196}
1197
79e2b9ae 1198bool qemu_in_vcpu_thread(void)
aa723c23 1199{
4917cf44 1200 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1201}
1202
afbe7053
PB
1203static __thread bool iothread_locked = false;
1204
1205bool qemu_mutex_iothread_locked(void)
1206{
1207 return iothread_locked;
1208}
1209
296af7c9
BS
1210void qemu_mutex_lock_iothread(void)
1211{
21618b3e 1212 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1213 /* In the simple case there is no need to bump the VCPU thread out of
1214 * TCG code execution.
1215 */
1216 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1217 !first_cpu || !first_cpu->created) {
296af7c9 1218 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1219 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1220 } else {
1a28cac3 1221 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1222 qemu_cpu_kick_no_halt();
1a28cac3
MT
1223 qemu_mutex_lock(&qemu_global_mutex);
1224 }
6b49809c 1225 atomic_dec(&iothread_requesting_mutex);
46daff13 1226 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1227 }
afbe7053 1228 iothread_locked = true;
296af7c9
BS
1229}
1230
1231void qemu_mutex_unlock_iothread(void)
1232{
afbe7053 1233 iothread_locked = false;
296af7c9
BS
1234 qemu_mutex_unlock(&qemu_global_mutex);
1235}
1236
1237static int all_vcpus_paused(void)
1238{
bdc44640 1239 CPUState *cpu;
296af7c9 1240
bdc44640 1241 CPU_FOREACH(cpu) {
182735ef 1242 if (!cpu->stopped) {
296af7c9 1243 return 0;
0ab07c62 1244 }
296af7c9
BS
1245 }
1246
1247 return 1;
1248}
1249
1250void pause_all_vcpus(void)
1251{
bdc44640 1252 CPUState *cpu;
296af7c9 1253
40daca54 1254 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1255 CPU_FOREACH(cpu) {
182735ef
AF
1256 cpu->stop = true;
1257 qemu_cpu_kick(cpu);
296af7c9
BS
1258 }
1259
aa723c23 1260 if (qemu_in_vcpu_thread()) {
d798e974
JK
1261 cpu_stop_current();
1262 if (!kvm_enabled()) {
bdc44640 1263 CPU_FOREACH(cpu) {
182735ef
AF
1264 cpu->stop = false;
1265 cpu->stopped = true;
d798e974
JK
1266 }
1267 return;
1268 }
1269 }
1270
296af7c9 1271 while (!all_vcpus_paused()) {
be7d6c57 1272 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1273 CPU_FOREACH(cpu) {
182735ef 1274 qemu_cpu_kick(cpu);
296af7c9
BS
1275 }
1276 }
1277}
1278
2993683b
IM
1279void cpu_resume(CPUState *cpu)
1280{
1281 cpu->stop = false;
1282 cpu->stopped = false;
1283 qemu_cpu_kick(cpu);
1284}
1285
296af7c9
BS
1286void resume_all_vcpus(void)
1287{
bdc44640 1288 CPUState *cpu;
296af7c9 1289
40daca54 1290 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1291 CPU_FOREACH(cpu) {
182735ef 1292 cpu_resume(cpu);
296af7c9
BS
1293 }
1294}
1295
4900116e
DDAG
1296/* For temporary buffers for forming a name */
1297#define VCPU_THREAD_NAME_SIZE 16
1298
e5ab30a2 1299static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1300{
4900116e 1301 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1302 static QemuCond *tcg_halt_cond;
1303 static QemuThread *tcg_cpu_thread;
4900116e 1304
09daed84
EI
1305 tcg_cpu_address_space_init(cpu, cpu->as);
1306
296af7c9
BS
1307 /* share a single thread for all cpus with TCG */
1308 if (!tcg_cpu_thread) {
814e612e 1309 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1310 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1311 qemu_cond_init(cpu->halt_cond);
1312 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1313 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1314 cpu->cpu_index);
1315 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1316 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1317#ifdef _WIN32
814e612e 1318 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1319#endif
61a46217 1320 while (!cpu->created) {
18a85728 1321 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1322 }
814e612e 1323 tcg_cpu_thread = cpu->thread;
296af7c9 1324 } else {
814e612e 1325 cpu->thread = tcg_cpu_thread;
f5c121b8 1326 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1327 }
1328}
1329
48a106bd 1330static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1331{
4900116e
DDAG
1332 char thread_name[VCPU_THREAD_NAME_SIZE];
1333
814e612e 1334 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1335 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1336 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1337 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1338 cpu->cpu_index);
1339 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1340 cpu, QEMU_THREAD_JOINABLE);
61a46217 1341 while (!cpu->created) {
18a85728 1342 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1343 }
296af7c9
BS
1344}
1345
10a9021d 1346static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1347{
4900116e
DDAG
1348 char thread_name[VCPU_THREAD_NAME_SIZE];
1349
814e612e 1350 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1351 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1352 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1353 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1354 cpu->cpu_index);
1355 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1356 QEMU_THREAD_JOINABLE);
61a46217 1357 while (!cpu->created) {
c7f0f3b1
AL
1358 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1359 }
1360}
1361
c643bed9 1362void qemu_init_vcpu(CPUState *cpu)
296af7c9 1363{
ce3960eb
AF
1364 cpu->nr_cores = smp_cores;
1365 cpu->nr_threads = smp_threads;
f324e766 1366 cpu->stopped = true;
0ab07c62 1367 if (kvm_enabled()) {
48a106bd 1368 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1369 } else if (tcg_enabled()) {
e5ab30a2 1370 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1371 } else {
10a9021d 1372 qemu_dummy_start_vcpu(cpu);
0ab07c62 1373 }
296af7c9
BS
1374}
1375
b4a3d965 1376void cpu_stop_current(void)
296af7c9 1377{
4917cf44
AF
1378 if (current_cpu) {
1379 current_cpu->stop = false;
1380 current_cpu->stopped = true;
1381 cpu_exit(current_cpu);
67bb172f 1382 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1383 }
296af7c9
BS
1384}
1385
56983463 1386int vm_stop(RunState state)
296af7c9 1387{
aa723c23 1388 if (qemu_in_vcpu_thread()) {
74892d24 1389 qemu_system_vmstop_request_prepare();
1dfb4dd9 1390 qemu_system_vmstop_request(state);
296af7c9
BS
1391 /*
1392 * FIXME: should not return to device code in case
1393 * vm_stop() has been requested.
1394 */
b4a3d965 1395 cpu_stop_current();
56983463 1396 return 0;
296af7c9 1397 }
56983463
KW
1398
1399 return do_vm_stop(state);
296af7c9
BS
1400}
1401
8a9236f1
LC
1402/* does a state transition even if the VM is already stopped,
1403 current state is forgotten forever */
56983463 1404int vm_stop_force_state(RunState state)
8a9236f1
LC
1405{
1406 if (runstate_is_running()) {
56983463 1407 return vm_stop(state);
8a9236f1
LC
1408 } else {
1409 runstate_set(state);
594a45ce
KW
1410 /* Make sure to return an error if the flush in a previous vm_stop()
1411 * failed. */
1412 return bdrv_flush_all();
8a9236f1
LC
1413 }
1414}
1415
8b427044
PD
1416static int64_t tcg_get_icount_limit(void)
1417{
1418 int64_t deadline;
1419
1420 if (replay_mode != REPLAY_MODE_PLAY) {
1421 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1422
1423 /* Maintain prior (possibly buggy) behaviour where if no deadline
1424 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1425 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1426 * nanoseconds.
1427 */
1428 if ((deadline < 0) || (deadline > INT32_MAX)) {
1429 deadline = INT32_MAX;
1430 }
1431
1432 return qemu_icount_round(deadline);
1433 } else {
1434 return replay_get_instructions();
1435 }
1436}
1437
3d57f789 1438static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1439{
1440 int ret;
1441#ifdef CONFIG_PROFILER
1442 int64_t ti;
1443#endif
1444
1445#ifdef CONFIG_PROFILER
1446 ti = profile_getclock();
1447#endif
1448 if (use_icount) {
1449 int64_t count;
1450 int decr;
c96778bb
FK
1451 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1452 + cpu->icount_extra);
28ecfd7a 1453 cpu->icount_decr.u16.low = 0;
efee7340 1454 cpu->icount_extra = 0;
8b427044 1455 count = tcg_get_icount_limit();
c96778bb 1456 timers_state.qemu_icount += count;
296af7c9
BS
1457 decr = (count > 0xffff) ? 0xffff : count;
1458 count -= decr;
28ecfd7a 1459 cpu->icount_decr.u16.low = decr;
efee7340 1460 cpu->icount_extra = count;
296af7c9 1461 }
ea3e9847 1462 ret = cpu_exec(cpu);
296af7c9 1463#ifdef CONFIG_PROFILER
89d5cbdd 1464 tcg_time += profile_getclock() - ti;
296af7c9
BS
1465#endif
1466 if (use_icount) {
1467 /* Fold pending instructions back into the
1468 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1469 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1470 + cpu->icount_extra);
28ecfd7a 1471 cpu->icount_decr.u32 = 0;
efee7340 1472 cpu->icount_extra = 0;
8b427044 1473 replay_account_executed_instructions();
296af7c9
BS
1474 }
1475 return ret;
1476}
1477
bdb7ca67 1478static void tcg_exec_all(void)
296af7c9 1479{
9a36085b
JK
1480 int r;
1481
40daca54
AB
1482 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1483 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1484
0ab07c62 1485 if (next_cpu == NULL) {
296af7c9 1486 next_cpu = first_cpu;
0ab07c62 1487 }
bdc44640 1488 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1489 CPUState *cpu = next_cpu;
296af7c9 1490
40daca54 1491 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1492 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1493
a1fcaa73 1494 if (cpu_can_run(cpu)) {
3d57f789 1495 r = tcg_cpu_exec(cpu);
9a36085b 1496 if (r == EXCP_DEBUG) {
91325046 1497 cpu_handle_guest_debug(cpu);
3c638d06
JK
1498 break;
1499 }
f324e766 1500 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1501 break;
1502 }
1503 }
aed807c8
PB
1504
1505 /* Pairs with smp_wmb in qemu_cpu_kick. */
1506 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1507}
1508
9a78eead 1509void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1510{
1511 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1512#if defined(cpu_list)
1513 cpu_list(f, cpu_fprintf);
262353cb
BS
1514#endif
1515}
de0b36b6
LC
1516
1517CpuInfoList *qmp_query_cpus(Error **errp)
1518{
1519 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1520 CPUState *cpu;
de0b36b6 1521
bdc44640 1522 CPU_FOREACH(cpu) {
de0b36b6 1523 CpuInfoList *info;
182735ef
AF
1524#if defined(TARGET_I386)
1525 X86CPU *x86_cpu = X86_CPU(cpu);
1526 CPUX86State *env = &x86_cpu->env;
1527#elif defined(TARGET_PPC)
1528 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1529 CPUPPCState *env = &ppc_cpu->env;
1530#elif defined(TARGET_SPARC)
1531 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1532 CPUSPARCState *env = &sparc_cpu->env;
1533#elif defined(TARGET_MIPS)
1534 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1535 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1536#elif defined(TARGET_TRICORE)
1537 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1538 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1539#endif
de0b36b6 1540
cb446eca 1541 cpu_synchronize_state(cpu);
de0b36b6
LC
1542
1543 info = g_malloc0(sizeof(*info));
1544 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1545 info->value->CPU = cpu->cpu_index;
182735ef 1546 info->value->current = (cpu == first_cpu);
259186a7 1547 info->value->halted = cpu->halted;
58f88d4b 1548 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1549 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1550#if defined(TARGET_I386)
1551 info->value->has_pc = true;
1552 info->value->pc = env->eip + env->segs[R_CS].base;
1553#elif defined(TARGET_PPC)
1554 info->value->has_nip = true;
1555 info->value->nip = env->nip;
1556#elif defined(TARGET_SPARC)
1557 info->value->has_pc = true;
1558 info->value->pc = env->pc;
1559 info->value->has_npc = true;
1560 info->value->npc = env->npc;
1561#elif defined(TARGET_MIPS)
1562 info->value->has_PC = true;
1563 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1564#elif defined(TARGET_TRICORE)
1565 info->value->has_PC = true;
1566 info->value->PC = env->PC;
de0b36b6
LC
1567#endif
1568
1569 /* XXX: waiting for the qapi to support GSList */
1570 if (!cur_item) {
1571 head = cur_item = info;
1572 } else {
1573 cur_item->next = info;
1574 cur_item = info;
1575 }
1576 }
1577
1578 return head;
1579}
0cfd6a9a
LC
1580
1581void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1582 bool has_cpu, int64_t cpu_index, Error **errp)
1583{
1584 FILE *f;
1585 uint32_t l;
55e5c285 1586 CPUState *cpu;
0cfd6a9a 1587 uint8_t buf[1024];
0dc9daf0 1588 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1589
1590 if (!has_cpu) {
1591 cpu_index = 0;
1592 }
1593
151d1322
AF
1594 cpu = qemu_get_cpu(cpu_index);
1595 if (cpu == NULL) {
c6bd8c70
MA
1596 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1597 "a CPU number");
0cfd6a9a
LC
1598 return;
1599 }
1600
1601 f = fopen(filename, "wb");
1602 if (!f) {
618da851 1603 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1604 return;
1605 }
1606
1607 while (size != 0) {
1608 l = sizeof(buf);
1609 if (l > size)
1610 l = size;
2f4d0f59 1611 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1612 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1613 " specified", orig_addr, orig_size);
2f4d0f59
AK
1614 goto exit;
1615 }
0cfd6a9a 1616 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1617 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1618 goto exit;
1619 }
1620 addr += l;
1621 size -= l;
1622 }
1623
1624exit:
1625 fclose(f);
1626}
6d3962bf
LC
1627
1628void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1629 Error **errp)
1630{
1631 FILE *f;
1632 uint32_t l;
1633 uint8_t buf[1024];
1634
1635 f = fopen(filename, "wb");
1636 if (!f) {
618da851 1637 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1638 return;
1639 }
1640
1641 while (size != 0) {
1642 l = sizeof(buf);
1643 if (l > size)
1644 l = size;
eb6282f2 1645 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1646 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1647 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1648 goto exit;
1649 }
1650 addr += l;
1651 size -= l;
1652 }
1653
1654exit:
1655 fclose(f);
1656}
ab49ab5c
LC
1657
1658void qmp_inject_nmi(Error **errp)
1659{
1660#if defined(TARGET_I386)
182735ef
AF
1661 CPUState *cs;
1662
bdc44640 1663 CPU_FOREACH(cs) {
182735ef 1664 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1665
02e51483 1666 if (!cpu->apic_state) {
182735ef 1667 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1668 } else {
02e51483 1669 apic_deliver_nmi(cpu->apic_state);
02c09195 1670 }
ab49ab5c
LC
1671 }
1672#else
9cb805fd 1673 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1674#endif
1675}
27498bef
ST
1676
1677void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1678{
1679 if (!use_icount) {
1680 return;
1681 }
1682
1683 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1684 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1685 if (icount_align_option) {
1686 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1687 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1688 } else {
1689 cpu_fprintf(f, "Max guest delay NA\n");
1690 cpu_fprintf(f, "Max guest advance NA\n");
1691 }
1692}