]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
tcg: move tcg_exec_all and helpers above thread fn
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615
PB
35#include "sysemu/dma.h"
36#include "sysemu/kvm.h"
de0b36b6 37#include "qmp-commands.h"
63c91552 38#include "exec/exec-all.h"
296af7c9 39
1de7afc9 40#include "qemu/thread.h"
9c17d615
PB
41#include "sysemu/cpus.h"
42#include "sysemu/qtest.h"
1de7afc9
PB
43#include "qemu/main-loop.h"
44#include "qemu/bitmap.h"
cb365646 45#include "qemu/seqlock.h"
a4e15de9 46#include "qapi-event.h"
9cb805fd 47#include "hw/nmi.h"
8b427044 48#include "sysemu/replay.h"
0ff0fc19
JK
49
50#ifndef _WIN32
1de7afc9 51#include "qemu/compatfd.h"
0ff0fc19 52#endif
296af7c9 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
182735ef 72static CPUState *next_cpu;
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
946fb27c 152
2a62914b 153int64_t cpu_get_icount_raw(void)
946fb27c
PB
154{
155 int64_t icount;
4917cf44 156 CPUState *cpu = current_cpu;
946fb27c 157
c96778bb 158 icount = timers_state.qemu_icount;
4917cf44 159 if (cpu) {
414b15c9 160 if (!cpu->can_do_io) {
2a62914b
PD
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
946fb27c 163 }
28ecfd7a 164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 165 }
2a62914b
PD
166 return icount;
167}
168
169/* Return the virtual CPU time, based on the instruction counter. */
170static int64_t cpu_get_icount_locked(void)
171{
172 int64_t icount = cpu_get_icount_raw();
3f031313 173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
174}
175
17a15f1b
PB
176int64_t cpu_get_icount(void)
177{
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187}
188
3f031313
FK
189int64_t cpu_icount_to_ns(int64_t icount)
190{
191 return icount << icount_time_shift;
192}
193
d90f3cca
C
194/* return the time elapsed in VM between vm_start and vm_stop. Unless
195 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
196 * counter.
197 *
198 * Caller must hold the BQL
199 */
946fb27c
PB
200int64_t cpu_get_ticks(void)
201{
5f3e3101
PB
202 int64_t ticks;
203
946fb27c
PB
204 if (use_icount) {
205 return cpu_get_icount();
206 }
5f3e3101
PB
207
208 ticks = timers_state.cpu_ticks_offset;
209 if (timers_state.cpu_ticks_enabled) {
4a7428c5 210 ticks += cpu_get_host_ticks();
5f3e3101
PB
211 }
212
213 if (timers_state.cpu_ticks_prev > ticks) {
214 /* Note: non increasing ticks may happen if the host uses
215 software suspend */
216 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
217 ticks = timers_state.cpu_ticks_prev;
946fb27c 218 }
5f3e3101
PB
219
220 timers_state.cpu_ticks_prev = ticks;
221 return ticks;
946fb27c
PB
222}
223
cb365646 224static int64_t cpu_get_clock_locked(void)
946fb27c 225{
1d45cea5 226 int64_t time;
cb365646 227
1d45cea5 228 time = timers_state.cpu_clock_offset;
5f3e3101 229 if (timers_state.cpu_ticks_enabled) {
1d45cea5 230 time += get_clock();
946fb27c 231 }
cb365646 232
1d45cea5 233 return time;
cb365646
LPF
234}
235
d90f3cca 236/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
237 * the time between vm_start and vm_stop
238 */
cb365646
LPF
239int64_t cpu_get_clock(void)
240{
241 int64_t ti;
242 unsigned start;
243
244 do {
245 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246 ti = cpu_get_clock_locked();
247 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
248
249 return ti;
946fb27c
PB
250}
251
cb365646 252/* enable cpu_get_ticks()
3224e878 253 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 254 */
946fb27c
PB
255void cpu_enable_ticks(void)
256{
cb365646 257 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 258 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 259 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 260 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
261 timers_state.cpu_clock_offset -= get_clock();
262 timers_state.cpu_ticks_enabled = 1;
263 }
03719e44 264 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
265}
266
267/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 268 * cpu_get_ticks() after that.
3224e878 269 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 270 */
946fb27c
PB
271void cpu_disable_ticks(void)
272{
cb365646 273 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 274 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 275 if (timers_state.cpu_ticks_enabled) {
4a7428c5 276 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 277 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
278 timers_state.cpu_ticks_enabled = 0;
279 }
03719e44 280 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
281}
282
283/* Correlation between real and virtual time is always going to be
284 fairly approximate, so ignore small variation.
285 When the guest is idle real and virtual time will be aligned in
286 the IO wait loop. */
73bcb24d 287#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
288
289static void icount_adjust(void)
290{
291 int64_t cur_time;
292 int64_t cur_icount;
293 int64_t delta;
a3270e19
PB
294
295 /* Protected by TimersState mutex. */
946fb27c 296 static int64_t last_delta;
468cc7cf 297
946fb27c
PB
298 /* If the VM is not running, then do nothing. */
299 if (!runstate_is_running()) {
300 return;
301 }
468cc7cf 302
03719e44 303 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
304 cur_time = cpu_get_clock_locked();
305 cur_icount = cpu_get_icount_locked();
468cc7cf 306
946fb27c
PB
307 delta = cur_icount - cur_time;
308 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
309 if (delta > 0
310 && last_delta + ICOUNT_WOBBLE < delta * 2
311 && icount_time_shift > 0) {
312 /* The guest is getting too far ahead. Slow time down. */
313 icount_time_shift--;
314 }
315 if (delta < 0
316 && last_delta - ICOUNT_WOBBLE > delta * 2
317 && icount_time_shift < MAX_ICOUNT_SHIFT) {
318 /* The guest is getting too far behind. Speed time up. */
319 icount_time_shift++;
320 }
321 last_delta = delta;
c96778bb
FK
322 timers_state.qemu_icount_bias = cur_icount
323 - (timers_state.qemu_icount << icount_time_shift);
03719e44 324 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
325}
326
327static void icount_adjust_rt(void *opaque)
328{
40daca54 329 timer_mod(icount_rt_timer,
1979b908 330 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
331 icount_adjust();
332}
333
334static void icount_adjust_vm(void *opaque)
335{
40daca54
AB
336 timer_mod(icount_vm_timer,
337 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 338 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
339 icount_adjust();
340}
341
342static int64_t qemu_icount_round(int64_t count)
343{
344 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
345}
346
efab87cf 347static void icount_warp_rt(void)
946fb27c 348{
ccffff48
AB
349 unsigned seq;
350 int64_t warp_start;
351
17a15f1b
PB
352 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
353 * changes from -1 to another value, so the race here is okay.
354 */
ccffff48
AB
355 do {
356 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
357 warp_start = vm_clock_warp_start;
358 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
359
360 if (warp_start == -1) {
946fb27c
PB
361 return;
362 }
363
03719e44 364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 365 if (runstate_is_running()) {
8eda206e
PD
366 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
367 cpu_get_clock_locked());
8ed961d9
PB
368 int64_t warp_delta;
369
370 warp_delta = clock - vm_clock_warp_start;
371 if (use_icount == 2) {
946fb27c 372 /*
40daca54 373 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
374 * far ahead of real time.
375 */
17a15f1b 376 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 377 int64_t delta = clock - cur_icount;
8ed961d9 378 warp_delta = MIN(warp_delta, delta);
946fb27c 379 }
c96778bb 380 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
381 }
382 vm_clock_warp_start = -1;
03719e44 383 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
384
385 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387 }
946fb27c
PB
388}
389
e76d1798 390static void icount_timer_cb(void *opaque)
efab87cf 391{
e76d1798
PD
392 /* No need for a checkpoint because the timer already synchronizes
393 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
394 */
395 icount_warp_rt();
efab87cf
PD
396}
397
8156be56
PB
398void qtest_clock_warp(int64_t dest)
399{
40daca54 400 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 401 AioContext *aio_context;
8156be56 402 assert(qtest_enabled());
efef88b3 403 aio_context = qemu_get_aio_context();
8156be56 404 while (clock < dest) {
40daca54 405 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 406 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 407
03719e44 408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 409 timers_state.qemu_icount_bias += warp;
03719e44 410 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 411
40daca54 412 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 413 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 414 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 415 }
40daca54 416 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
417}
418
e76d1798 419void qemu_start_warp_timer(void)
946fb27c 420{
ce78d18c 421 int64_t clock;
946fb27c
PB
422 int64_t deadline;
423
e76d1798 424 if (!use_icount) {
946fb27c
PB
425 return;
426 }
427
8bd7f71d
PD
428 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
429 * do not fire, so computing the deadline does not make sense.
430 */
431 if (!runstate_is_running()) {
432 return;
433 }
434
435 /* warp clock deterministically in record/replay mode */
e76d1798 436 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
437 return;
438 }
439
ce78d18c 440 if (!all_cpu_threads_idle()) {
946fb27c
PB
441 return;
442 }
443
8156be56
PB
444 if (qtest_enabled()) {
445 /* When testing, qtest commands advance icount. */
e76d1798 446 return;
8156be56
PB
447 }
448
ac70aafc 449 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 450 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 451 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 452 if (deadline < 0) {
d7a0f71d
VC
453 static bool notified;
454 if (!icount_sleep && !notified) {
455 error_report("WARNING: icount sleep disabled and no active timers");
456 notified = true;
457 }
ce78d18c 458 return;
ac70aafc
AB
459 }
460
946fb27c
PB
461 if (deadline > 0) {
462 /*
40daca54 463 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
464 * sleep. Otherwise, the CPU might be waiting for a future timer
465 * interrupt to wake it up, but the interrupt never comes because
466 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 467 * QEMU_CLOCK_VIRTUAL.
946fb27c 468 */
5045e9d9
VC
469 if (!icount_sleep) {
470 /*
471 * We never let VCPUs sleep in no sleep icount mode.
472 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
473 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
474 * It is useful when we want a deterministic execution time,
475 * isolated from host latencies.
476 */
03719e44 477 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 478 timers_state.qemu_icount_bias += deadline;
03719e44 479 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
480 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
481 } else {
482 /*
483 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
484 * "real" time, (related to the time left until the next event) has
485 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
486 * This avoids that the warps are visible externally; for example,
487 * you will not be sending network packets continuously instead of
488 * every 100ms.
489 */
03719e44 490 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
491 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
492 vm_clock_warp_start = clock;
493 }
03719e44 494 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 495 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 496 }
ac70aafc 497 } else if (deadline == 0) {
40daca54 498 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
499 }
500}
501
e76d1798
PD
502static void qemu_account_warp_timer(void)
503{
504 if (!use_icount || !icount_sleep) {
505 return;
506 }
507
508 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
509 * do not fire, so computing the deadline does not make sense.
510 */
511 if (!runstate_is_running()) {
512 return;
513 }
514
515 /* warp clock deterministically in record/replay mode */
516 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
517 return;
518 }
519
520 timer_del(icount_warp_timer);
521 icount_warp_rt();
522}
523
d09eae37
FK
524static bool icount_state_needed(void *opaque)
525{
526 return use_icount;
527}
528
529/*
530 * This is a subsection for icount migration.
531 */
532static const VMStateDescription icount_vmstate_timers = {
533 .name = "timer/icount",
534 .version_id = 1,
535 .minimum_version_id = 1,
5cd8cada 536 .needed = icount_state_needed,
d09eae37
FK
537 .fields = (VMStateField[]) {
538 VMSTATE_INT64(qemu_icount_bias, TimersState),
539 VMSTATE_INT64(qemu_icount, TimersState),
540 VMSTATE_END_OF_LIST()
541 }
542};
543
946fb27c
PB
544static const VMStateDescription vmstate_timers = {
545 .name = "timer",
546 .version_id = 2,
547 .minimum_version_id = 1,
35d08458 548 .fields = (VMStateField[]) {
946fb27c
PB
549 VMSTATE_INT64(cpu_ticks_offset, TimersState),
550 VMSTATE_INT64(dummy, TimersState),
551 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
552 VMSTATE_END_OF_LIST()
d09eae37 553 },
5cd8cada
JQ
554 .subsections = (const VMStateDescription*[]) {
555 &icount_vmstate_timers,
556 NULL
946fb27c
PB
557 }
558};
559
e0eeb4a2 560static void cpu_throttle_thread(CPUState *cpu, void *opaque)
2adcc85d 561{
2adcc85d
JH
562 double pct;
563 double throttle_ratio;
564 long sleeptime_ns;
565
566 if (!cpu_throttle_get_percentage()) {
567 return;
568 }
569
570 pct = (double)cpu_throttle_get_percentage()/100;
571 throttle_ratio = pct / (1 - pct);
572 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
573
574 qemu_mutex_unlock_iothread();
575 atomic_set(&cpu->throttle_thread_scheduled, 0);
576 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
577 qemu_mutex_lock_iothread();
578}
579
580static void cpu_throttle_timer_tick(void *opaque)
581{
582 CPUState *cpu;
583 double pct;
584
585 /* Stop the timer if needed */
586 if (!cpu_throttle_get_percentage()) {
587 return;
588 }
589 CPU_FOREACH(cpu) {
590 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
e0eeb4a2 591 async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
2adcc85d
JH
592 }
593 }
594
595 pct = (double)cpu_throttle_get_percentage()/100;
596 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598}
599
600void cpu_throttle_set(int new_throttle_pct)
601{
602 /* Ensure throttle percentage is within valid range */
603 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606 atomic_set(&throttle_percentage, new_throttle_pct);
607
608 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609 CPU_THROTTLE_TIMESLICE_NS);
610}
611
612void cpu_throttle_stop(void)
613{
614 atomic_set(&throttle_percentage, 0);
615}
616
617bool cpu_throttle_active(void)
618{
619 return (cpu_throttle_get_percentage() != 0);
620}
621
622int cpu_throttle_get_percentage(void)
623{
624 return atomic_read(&throttle_percentage);
625}
626
4603ea01
PD
627void cpu_ticks_init(void)
628{
ccdb3c1f 629 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 630 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
631 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632 cpu_throttle_timer_tick, NULL);
4603ea01
PD
633}
634
1ad9580b 635void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 636{
1ad9580b 637 const char *option;
a8bfac37 638 char *rem_str = NULL;
1ad9580b 639
1ad9580b 640 option = qemu_opt_get(opts, "shift");
946fb27c 641 if (!option) {
a8bfac37
ST
642 if (qemu_opt_get(opts, "align") != NULL) {
643 error_setg(errp, "Please specify shift option when using align");
644 }
946fb27c
PB
645 return;
646 }
f1f4b57e
VC
647
648 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
649 if (icount_sleep) {
650 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 651 icount_timer_cb, NULL);
5045e9d9 652 }
f1f4b57e 653
a8bfac37 654 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
655
656 if (icount_align_option && !icount_sleep) {
778d9f9b 657 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 658 }
946fb27c 659 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
660 errno = 0;
661 icount_time_shift = strtol(option, &rem_str, 0);
662 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663 error_setg(errp, "icount: Invalid shift value");
664 }
946fb27c
PB
665 use_icount = 1;
666 return;
a8bfac37
ST
667 } else if (icount_align_option) {
668 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 669 } else if (!icount_sleep) {
778d9f9b 670 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
671 }
672
673 use_icount = 2;
674
675 /* 125MIPS seems a reasonable initial guess at the guest speed.
676 It will be corrected fairly quickly anyway. */
677 icount_time_shift = 3;
678
679 /* Have both realtime and virtual time triggers for speed adjustment.
680 The realtime trigger catches emulated time passing too slowly,
681 the virtual time trigger catches emulated time passing too fast.
682 Realtime triggers occur even when idle, so use them less frequently
683 than VM triggers. */
bf2a7ddb
PD
684 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685 icount_adjust_rt, NULL);
40daca54 686 timer_mod(icount_rt_timer,
bf2a7ddb 687 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
688 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689 icount_adjust_vm, NULL);
690 timer_mod(icount_vm_timer,
691 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 692 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
693}
694
296af7c9
BS
695/***********************************************************/
696void hw_error(const char *fmt, ...)
697{
698 va_list ap;
55e5c285 699 CPUState *cpu;
296af7c9
BS
700
701 va_start(ap, fmt);
702 fprintf(stderr, "qemu: hardware error: ");
703 vfprintf(stderr, fmt, ap);
704 fprintf(stderr, "\n");
bdc44640 705 CPU_FOREACH(cpu) {
55e5c285 706 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 707 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
708 }
709 va_end(ap);
710 abort();
711}
712
713void cpu_synchronize_all_states(void)
714{
182735ef 715 CPUState *cpu;
296af7c9 716
bdc44640 717 CPU_FOREACH(cpu) {
182735ef 718 cpu_synchronize_state(cpu);
296af7c9
BS
719 }
720}
721
722void cpu_synchronize_all_post_reset(void)
723{
182735ef 724 CPUState *cpu;
296af7c9 725
bdc44640 726 CPU_FOREACH(cpu) {
182735ef 727 cpu_synchronize_post_reset(cpu);
296af7c9
BS
728 }
729}
730
731void cpu_synchronize_all_post_init(void)
732{
182735ef 733 CPUState *cpu;
296af7c9 734
bdc44640 735 CPU_FOREACH(cpu) {
182735ef 736 cpu_synchronize_post_init(cpu);
296af7c9
BS
737 }
738}
739
56983463 740static int do_vm_stop(RunState state)
296af7c9 741{
56983463
KW
742 int ret = 0;
743
1354869c 744 if (runstate_is_running()) {
296af7c9 745 cpu_disable_ticks();
296af7c9 746 pause_all_vcpus();
f5bbfba1 747 runstate_set(state);
1dfb4dd9 748 vm_state_notify(0, state);
a4e15de9 749 qapi_event_send_stop(&error_abort);
296af7c9 750 }
56983463 751
594a45ce 752 bdrv_drain_all();
6d0ceb80 753 replay_disable_events();
22af08ea 754 ret = bdrv_flush_all();
594a45ce 755
56983463 756 return ret;
296af7c9
BS
757}
758
a1fcaa73 759static bool cpu_can_run(CPUState *cpu)
296af7c9 760{
4fdeee7c 761 if (cpu->stop) {
a1fcaa73 762 return false;
0ab07c62 763 }
321bc0b2 764 if (cpu_is_stopped(cpu)) {
a1fcaa73 765 return false;
0ab07c62 766 }
a1fcaa73 767 return true;
296af7c9
BS
768}
769
91325046 770static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 771{
64f6b346 772 gdb_set_stop_cpu(cpu);
8cf71710 773 qemu_system_debug_request();
f324e766 774 cpu->stopped = true;
3c638d06
JK
775}
776
6d9cb73c
JK
777#ifdef CONFIG_LINUX
778static void sigbus_reraise(void)
779{
780 sigset_t set;
781 struct sigaction action;
782
783 memset(&action, 0, sizeof(action));
784 action.sa_handler = SIG_DFL;
785 if (!sigaction(SIGBUS, &action, NULL)) {
786 raise(SIGBUS);
787 sigemptyset(&set);
788 sigaddset(&set, SIGBUS);
a2d1761d 789 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
790 }
791 perror("Failed to re-raise SIGBUS!\n");
792 abort();
793}
794
795static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
796 void *ctx)
797{
798 if (kvm_on_sigbus(siginfo->ssi_code,
799 (void *)(intptr_t)siginfo->ssi_addr)) {
800 sigbus_reraise();
801 }
802}
803
804static void qemu_init_sigbus(void)
805{
806 struct sigaction action;
807
808 memset(&action, 0, sizeof(action));
809 action.sa_flags = SA_SIGINFO;
810 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
811 sigaction(SIGBUS, &action, NULL);
812
813 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
814}
815
290adf38 816static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
817{
818 struct timespec ts = { 0, 0 };
819 siginfo_t siginfo;
820 sigset_t waitset;
821 sigset_t chkset;
822 int r;
823
824 sigemptyset(&waitset);
825 sigaddset(&waitset, SIG_IPI);
826 sigaddset(&waitset, SIGBUS);
827
828 do {
829 r = sigtimedwait(&waitset, &siginfo, &ts);
830 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
831 perror("sigtimedwait");
832 exit(1);
833 }
834
835 switch (r) {
836 case SIGBUS:
290adf38 837 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
838 sigbus_reraise();
839 }
840 break;
841 default:
842 break;
843 }
844
845 r = sigpending(&chkset);
846 if (r == -1) {
847 perror("sigpending");
848 exit(1);
849 }
850 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
851}
852
6d9cb73c
JK
853#else /* !CONFIG_LINUX */
854
855static void qemu_init_sigbus(void)
856{
857}
1ab3c6c0 858
290adf38 859static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
860{
861}
6d9cb73c
JK
862#endif /* !CONFIG_LINUX */
863
296af7c9 864#ifndef _WIN32
55f8d6ac
JK
865static void dummy_signal(int sig)
866{
867}
55f8d6ac 868
13618e05 869static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
870{
871 int r;
872 sigset_t set;
873 struct sigaction sigact;
874
875 memset(&sigact, 0, sizeof(sigact));
876 sigact.sa_handler = dummy_signal;
877 sigaction(SIG_IPI, &sigact, NULL);
878
714bd040
PB
879 pthread_sigmask(SIG_BLOCK, NULL, &set);
880 sigdelset(&set, SIG_IPI);
714bd040 881 sigdelset(&set, SIGBUS);
491d6e80 882 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
883 if (r) {
884 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
885 exit(1);
886 }
887}
888
55f8d6ac 889#else /* _WIN32 */
13618e05 890static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 891{
714bd040
PB
892 abort();
893}
714bd040 894#endif /* _WIN32 */
ff48eb5f 895
b2532d88 896static QemuMutex qemu_global_mutex;
46daff13 897static QemuCond qemu_io_proceeded_cond;
6b49809c 898static unsigned iothread_requesting_mutex;
296af7c9
BS
899
900static QemuThread io_thread;
901
296af7c9
BS
902/* cpu creation */
903static QemuCond qemu_cpu_cond;
904/* system init */
296af7c9
BS
905static QemuCond qemu_pause_cond;
906
d3b12f5d 907void qemu_init_cpu_loop(void)
296af7c9 908{
6d9cb73c 909 qemu_init_sigbus();
ed94592b 910 qemu_cond_init(&qemu_cpu_cond);
ed94592b 911 qemu_cond_init(&qemu_pause_cond);
46daff13 912 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 913 qemu_mutex_init(&qemu_global_mutex);
296af7c9 914
b7680cb6 915 qemu_thread_get_self(&io_thread);
296af7c9
BS
916}
917
e0eeb4a2 918void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
e82bcec2 919{
d148d90e 920 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
921}
922
4c055ab5
GZ
923static void qemu_kvm_destroy_vcpu(CPUState *cpu)
924{
925 if (kvm_destroy_vcpu(cpu) < 0) {
926 error_report("kvm_destroy_vcpu failed");
927 exit(EXIT_FAILURE);
928 }
929}
930
931static void qemu_tcg_destroy_vcpu(CPUState *cpu)
932{
933}
934
509a0d78 935static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 936{
4fdeee7c
AF
937 if (cpu->stop) {
938 cpu->stop = false;
f324e766 939 cpu->stopped = true;
96bce683 940 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 941 }
a5403c69 942 process_queued_cpu_work(cpu);
216fc9a4 943 cpu->thread_kicked = false;
296af7c9
BS
944}
945
d5f8d613 946static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 947{
16400322 948 while (all_cpu_threads_idle()) {
d5f8d613 949 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 950 }
296af7c9 951
46daff13
PB
952 while (iothread_requesting_mutex) {
953 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
954 }
6cabe1f3 955
bdc44640 956 CPU_FOREACH(cpu) {
182735ef 957 qemu_wait_io_event_common(cpu);
6cabe1f3 958 }
296af7c9
BS
959}
960
fd529e8f 961static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 962{
a98ae1d8 963 while (cpu_thread_is_idle(cpu)) {
f5c121b8 964 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 965 }
296af7c9 966
290adf38 967 qemu_kvm_eat_signals(cpu);
509a0d78 968 qemu_wait_io_event_common(cpu);
296af7c9
BS
969}
970
7e97cd88 971static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 972{
48a106bd 973 CPUState *cpu = arg;
84b4915d 974 int r;
296af7c9 975
ab28bd23
PB
976 rcu_register_thread();
977
2e7f7a3c 978 qemu_mutex_lock_iothread();
814e612e 979 qemu_thread_get_self(cpu->thread);
9f09e18a 980 cpu->thread_id = qemu_get_thread_id();
626cf8f4 981 cpu->can_do_io = 1;
4917cf44 982 current_cpu = cpu;
296af7c9 983
504134d2 984 r = kvm_init_vcpu(cpu);
84b4915d
JK
985 if (r < 0) {
986 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
987 exit(1);
988 }
296af7c9 989
13618e05 990 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
991
992 /* signal CPU creation */
61a46217 993 cpu->created = true;
296af7c9
BS
994 qemu_cond_signal(&qemu_cpu_cond);
995
4c055ab5 996 do {
a1fcaa73 997 if (cpu_can_run(cpu)) {
1458c363 998 r = kvm_cpu_exec(cpu);
83f338f7 999 if (r == EXCP_DEBUG) {
91325046 1000 cpu_handle_guest_debug(cpu);
83f338f7 1001 }
0ab07c62 1002 }
fd529e8f 1003 qemu_kvm_wait_io_event(cpu);
4c055ab5 1004 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1005
4c055ab5 1006 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1007 cpu->created = false;
1008 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1009 qemu_mutex_unlock_iothread();
296af7c9
BS
1010 return NULL;
1011}
1012
c7f0f3b1
AL
1013static void *qemu_dummy_cpu_thread_fn(void *arg)
1014{
1015#ifdef _WIN32
1016 fprintf(stderr, "qtest is not supported under Windows\n");
1017 exit(1);
1018#else
10a9021d 1019 CPUState *cpu = arg;
c7f0f3b1
AL
1020 sigset_t waitset;
1021 int r;
1022
ab28bd23
PB
1023 rcu_register_thread();
1024
c7f0f3b1 1025 qemu_mutex_lock_iothread();
814e612e 1026 qemu_thread_get_self(cpu->thread);
9f09e18a 1027 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1028 cpu->can_do_io = 1;
c7f0f3b1
AL
1029
1030 sigemptyset(&waitset);
1031 sigaddset(&waitset, SIG_IPI);
1032
1033 /* signal CPU creation */
61a46217 1034 cpu->created = true;
c7f0f3b1
AL
1035 qemu_cond_signal(&qemu_cpu_cond);
1036
4917cf44 1037 current_cpu = cpu;
c7f0f3b1 1038 while (1) {
4917cf44 1039 current_cpu = NULL;
c7f0f3b1
AL
1040 qemu_mutex_unlock_iothread();
1041 do {
1042 int sig;
1043 r = sigwait(&waitset, &sig);
1044 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1045 if (r == -1) {
1046 perror("sigwait");
1047 exit(1);
1048 }
1049 qemu_mutex_lock_iothread();
4917cf44 1050 current_cpu = cpu;
509a0d78 1051 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1052 }
1053
1054 return NULL;
1055#endif
1056}
1057
1be7fcb8
AB
1058static int64_t tcg_get_icount_limit(void)
1059{
1060 int64_t deadline;
1061
1062 if (replay_mode != REPLAY_MODE_PLAY) {
1063 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1064
1065 /* Maintain prior (possibly buggy) behaviour where if no deadline
1066 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1067 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1068 * nanoseconds.
1069 */
1070 if ((deadline < 0) || (deadline > INT32_MAX)) {
1071 deadline = INT32_MAX;
1072 }
1073
1074 return qemu_icount_round(deadline);
1075 } else {
1076 return replay_get_instructions();
1077 }
1078}
1079
1080static int tcg_cpu_exec(CPUState *cpu)
1081{
1082 int ret;
1083#ifdef CONFIG_PROFILER
1084 int64_t ti;
1085#endif
1086
1087#ifdef CONFIG_PROFILER
1088 ti = profile_getclock();
1089#endif
1090 if (use_icount) {
1091 int64_t count;
1092 int decr;
1093 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1094 + cpu->icount_extra);
1095 cpu->icount_decr.u16.low = 0;
1096 cpu->icount_extra = 0;
1097 count = tcg_get_icount_limit();
1098 timers_state.qemu_icount += count;
1099 decr = (count > 0xffff) ? 0xffff : count;
1100 count -= decr;
1101 cpu->icount_decr.u16.low = decr;
1102 cpu->icount_extra = count;
1103 }
1104 cpu_exec_start(cpu);
1105 ret = cpu_exec(cpu);
1106 cpu_exec_end(cpu);
1107#ifdef CONFIG_PROFILER
1108 tcg_time += profile_getclock() - ti;
1109#endif
1110 if (use_icount) {
1111 /* Fold pending instructions back into the
1112 instruction counter, and clear the interrupt flag. */
1113 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1114 + cpu->icount_extra);
1115 cpu->icount_decr.u32 = 0;
1116 cpu->icount_extra = 0;
1117 replay_account_executed_instructions();
1118 }
1119 return ret;
1120}
1121
1122static void tcg_exec_all(void)
1123{
1124 int r;
1125
1126 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1127 qemu_account_warp_timer();
1128
1129 if (next_cpu == NULL) {
1130 next_cpu = first_cpu;
1131 }
1132 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1133 CPUState *cpu = next_cpu;
1134
1135 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1136 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1137
1138 if (cpu_can_run(cpu)) {
1139 r = tcg_cpu_exec(cpu);
1140 if (r == EXCP_DEBUG) {
1141 cpu_handle_guest_debug(cpu);
1142 break;
1143 } else if (r == EXCP_ATOMIC) {
1144 cpu_exec_step_atomic(cpu);
1145 }
1146 } else if (cpu->stop || cpu->stopped) {
1147 if (cpu->unplug) {
1148 next_cpu = CPU_NEXT(cpu);
1149 }
1150 break;
1151 }
1152 }
1153
1154 /* Pairs with smp_wmb in qemu_cpu_kick. */
1155 atomic_mb_set(&exit_request, 0);
1156}
bdb7ca67 1157
7e97cd88 1158static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1159{
c3586ba7 1160 CPUState *cpu = arg;
4c055ab5 1161 CPUState *remove_cpu = NULL;
296af7c9 1162
ab28bd23
PB
1163 rcu_register_thread();
1164
2e7f7a3c 1165 qemu_mutex_lock_iothread();
814e612e 1166 qemu_thread_get_self(cpu->thread);
296af7c9 1167
38fcbd3f
AF
1168 CPU_FOREACH(cpu) {
1169 cpu->thread_id = qemu_get_thread_id();
1170 cpu->created = true;
626cf8f4 1171 cpu->can_do_io = 1;
38fcbd3f 1172 }
296af7c9
BS
1173 qemu_cond_signal(&qemu_cpu_cond);
1174
fa7d1867 1175 /* wait for initial kick-off after machine start */
c28e399c 1176 while (first_cpu->stopped) {
d5f8d613 1177 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1178
1179 /* process any pending work */
bdc44640 1180 CPU_FOREACH(cpu) {
182735ef 1181 qemu_wait_io_event_common(cpu);
8e564b4e 1182 }
0ab07c62 1183 }
296af7c9 1184
21618b3e 1185 /* process any pending work */
aed807c8 1186 atomic_mb_set(&exit_request, 1);
21618b3e 1187
296af7c9 1188 while (1) {
bdb7ca67 1189 tcg_exec_all();
ac70aafc
AB
1190
1191 if (use_icount) {
40daca54 1192 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1193
1194 if (deadline == 0) {
40daca54 1195 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1196 }
3b2319a3 1197 }
d5f8d613 1198 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
4c055ab5
GZ
1199 CPU_FOREACH(cpu) {
1200 if (cpu->unplug && !cpu_can_run(cpu)) {
1201 remove_cpu = cpu;
1202 break;
1203 }
1204 }
1205 if (remove_cpu) {
1206 qemu_tcg_destroy_vcpu(remove_cpu);
2c579042
BR
1207 cpu->created = false;
1208 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5
GZ
1209 remove_cpu = NULL;
1210 }
296af7c9
BS
1211 }
1212
1213 return NULL;
1214}
1215
2ff09a40 1216static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1217{
1218#ifndef _WIN32
1219 int err;
1220
e0c38211
PB
1221 if (cpu->thread_kicked) {
1222 return;
9102deda 1223 }
e0c38211 1224 cpu->thread_kicked = true;
814e612e 1225 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1226 if (err) {
1227 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1228 exit(1);
1229 }
1230#else /* _WIN32 */
e0c38211
PB
1231 abort();
1232#endif
1233}
ed9164a3 1234
e0c38211
PB
1235static void qemu_cpu_kick_no_halt(void)
1236{
1237 CPUState *cpu;
1238 /* Ensure whatever caused the exit has reached the CPU threads before
1239 * writing exit_request.
1240 */
1241 atomic_mb_set(&exit_request, 1);
1242 cpu = atomic_mb_read(&tcg_current_cpu);
1243 if (cpu) {
1244 cpu_exit(cpu);
cc015e9a 1245 }
cc015e9a
PB
1246}
1247
c08d7424 1248void qemu_cpu_kick(CPUState *cpu)
296af7c9 1249{
f5c121b8 1250 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1251 if (tcg_enabled()) {
1252 qemu_cpu_kick_no_halt();
1253 } else {
1254 qemu_cpu_kick_thread(cpu);
1255 }
296af7c9
BS
1256}
1257
46d62fac 1258void qemu_cpu_kick_self(void)
296af7c9 1259{
4917cf44 1260 assert(current_cpu);
9102deda 1261 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1262}
1263
60e82579 1264bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1265{
814e612e 1266 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1267}
1268
79e2b9ae 1269bool qemu_in_vcpu_thread(void)
aa723c23 1270{
4917cf44 1271 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1272}
1273
afbe7053
PB
1274static __thread bool iothread_locked = false;
1275
1276bool qemu_mutex_iothread_locked(void)
1277{
1278 return iothread_locked;
1279}
1280
296af7c9
BS
1281void qemu_mutex_lock_iothread(void)
1282{
21618b3e 1283 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1284 /* In the simple case there is no need to bump the VCPU thread out of
1285 * TCG code execution.
1286 */
1287 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1288 !first_cpu || !first_cpu->created) {
296af7c9 1289 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1290 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1291 } else {
1a28cac3 1292 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1293 qemu_cpu_kick_no_halt();
1a28cac3
MT
1294 qemu_mutex_lock(&qemu_global_mutex);
1295 }
6b49809c 1296 atomic_dec(&iothread_requesting_mutex);
46daff13 1297 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1298 }
afbe7053 1299 iothread_locked = true;
296af7c9
BS
1300}
1301
1302void qemu_mutex_unlock_iothread(void)
1303{
afbe7053 1304 iothread_locked = false;
296af7c9
BS
1305 qemu_mutex_unlock(&qemu_global_mutex);
1306}
1307
e8faee06 1308static bool all_vcpus_paused(void)
296af7c9 1309{
bdc44640 1310 CPUState *cpu;
296af7c9 1311
bdc44640 1312 CPU_FOREACH(cpu) {
182735ef 1313 if (!cpu->stopped) {
e8faee06 1314 return false;
0ab07c62 1315 }
296af7c9
BS
1316 }
1317
e8faee06 1318 return true;
296af7c9
BS
1319}
1320
1321void pause_all_vcpus(void)
1322{
bdc44640 1323 CPUState *cpu;
296af7c9 1324
40daca54 1325 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1326 CPU_FOREACH(cpu) {
182735ef
AF
1327 cpu->stop = true;
1328 qemu_cpu_kick(cpu);
296af7c9
BS
1329 }
1330
aa723c23 1331 if (qemu_in_vcpu_thread()) {
d798e974
JK
1332 cpu_stop_current();
1333 if (!kvm_enabled()) {
bdc44640 1334 CPU_FOREACH(cpu) {
182735ef
AF
1335 cpu->stop = false;
1336 cpu->stopped = true;
d798e974
JK
1337 }
1338 return;
1339 }
1340 }
1341
296af7c9 1342 while (!all_vcpus_paused()) {
be7d6c57 1343 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1344 CPU_FOREACH(cpu) {
182735ef 1345 qemu_cpu_kick(cpu);
296af7c9
BS
1346 }
1347 }
1348}
1349
2993683b
IM
1350void cpu_resume(CPUState *cpu)
1351{
1352 cpu->stop = false;
1353 cpu->stopped = false;
1354 qemu_cpu_kick(cpu);
1355}
1356
296af7c9
BS
1357void resume_all_vcpus(void)
1358{
bdc44640 1359 CPUState *cpu;
296af7c9 1360
40daca54 1361 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1362 CPU_FOREACH(cpu) {
182735ef 1363 cpu_resume(cpu);
296af7c9
BS
1364 }
1365}
1366
4c055ab5
GZ
1367void cpu_remove(CPUState *cpu)
1368{
1369 cpu->stop = true;
1370 cpu->unplug = true;
1371 qemu_cpu_kick(cpu);
1372}
1373
2c579042
BR
1374void cpu_remove_sync(CPUState *cpu)
1375{
1376 cpu_remove(cpu);
1377 while (cpu->created) {
1378 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1379 }
1380}
1381
4900116e
DDAG
1382/* For temporary buffers for forming a name */
1383#define VCPU_THREAD_NAME_SIZE 16
1384
e5ab30a2 1385static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1386{
4900116e 1387 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1388 static QemuCond *tcg_halt_cond;
1389 static QemuThread *tcg_cpu_thread;
4900116e 1390
296af7c9
BS
1391 /* share a single thread for all cpus with TCG */
1392 if (!tcg_cpu_thread) {
814e612e 1393 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1394 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1395 qemu_cond_init(cpu->halt_cond);
1396 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1397 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1398 cpu->cpu_index);
1399 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1400 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1401#ifdef _WIN32
814e612e 1402 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1403#endif
61a46217 1404 while (!cpu->created) {
18a85728 1405 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1406 }
814e612e 1407 tcg_cpu_thread = cpu->thread;
296af7c9 1408 } else {
814e612e 1409 cpu->thread = tcg_cpu_thread;
f5c121b8 1410 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1411 }
1412}
1413
48a106bd 1414static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1415{
4900116e
DDAG
1416 char thread_name[VCPU_THREAD_NAME_SIZE];
1417
814e612e 1418 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1419 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1420 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1421 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1422 cpu->cpu_index);
1423 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1424 cpu, QEMU_THREAD_JOINABLE);
61a46217 1425 while (!cpu->created) {
18a85728 1426 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1427 }
296af7c9
BS
1428}
1429
10a9021d 1430static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1431{
4900116e
DDAG
1432 char thread_name[VCPU_THREAD_NAME_SIZE];
1433
814e612e 1434 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1435 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1436 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1437 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1438 cpu->cpu_index);
1439 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1440 QEMU_THREAD_JOINABLE);
61a46217 1441 while (!cpu->created) {
c7f0f3b1
AL
1442 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1443 }
1444}
1445
c643bed9 1446void qemu_init_vcpu(CPUState *cpu)
296af7c9 1447{
ce3960eb
AF
1448 cpu->nr_cores = smp_cores;
1449 cpu->nr_threads = smp_threads;
f324e766 1450 cpu->stopped = true;
56943e8c
PM
1451
1452 if (!cpu->as) {
1453 /* If the target cpu hasn't set up any address spaces itself,
1454 * give it the default one.
1455 */
6731d864
PC
1456 AddressSpace *as = address_space_init_shareable(cpu->memory,
1457 "cpu-memory");
12ebc9a7 1458 cpu->num_ases = 1;
6731d864 1459 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1460 }
1461
0ab07c62 1462 if (kvm_enabled()) {
48a106bd 1463 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1464 } else if (tcg_enabled()) {
e5ab30a2 1465 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1466 } else {
10a9021d 1467 qemu_dummy_start_vcpu(cpu);
0ab07c62 1468 }
296af7c9
BS
1469}
1470
b4a3d965 1471void cpu_stop_current(void)
296af7c9 1472{
4917cf44
AF
1473 if (current_cpu) {
1474 current_cpu->stop = false;
1475 current_cpu->stopped = true;
1476 cpu_exit(current_cpu);
96bce683 1477 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1478 }
296af7c9
BS
1479}
1480
56983463 1481int vm_stop(RunState state)
296af7c9 1482{
aa723c23 1483 if (qemu_in_vcpu_thread()) {
74892d24 1484 qemu_system_vmstop_request_prepare();
1dfb4dd9 1485 qemu_system_vmstop_request(state);
296af7c9
BS
1486 /*
1487 * FIXME: should not return to device code in case
1488 * vm_stop() has been requested.
1489 */
b4a3d965 1490 cpu_stop_current();
56983463 1491 return 0;
296af7c9 1492 }
56983463
KW
1493
1494 return do_vm_stop(state);
296af7c9
BS
1495}
1496
8a9236f1
LC
1497/* does a state transition even if the VM is already stopped,
1498 current state is forgotten forever */
56983463 1499int vm_stop_force_state(RunState state)
8a9236f1
LC
1500{
1501 if (runstate_is_running()) {
56983463 1502 return vm_stop(state);
8a9236f1
LC
1503 } else {
1504 runstate_set(state);
b2780d32
WC
1505
1506 bdrv_drain_all();
594a45ce
KW
1507 /* Make sure to return an error if the flush in a previous vm_stop()
1508 * failed. */
22af08ea 1509 return bdrv_flush_all();
8a9236f1
LC
1510 }
1511}
1512
9a78eead 1513void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1514{
1515 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1516#if defined(cpu_list)
1517 cpu_list(f, cpu_fprintf);
262353cb
BS
1518#endif
1519}
de0b36b6
LC
1520
1521CpuInfoList *qmp_query_cpus(Error **errp)
1522{
1523 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1524 CPUState *cpu;
de0b36b6 1525
bdc44640 1526 CPU_FOREACH(cpu) {
de0b36b6 1527 CpuInfoList *info;
182735ef
AF
1528#if defined(TARGET_I386)
1529 X86CPU *x86_cpu = X86_CPU(cpu);
1530 CPUX86State *env = &x86_cpu->env;
1531#elif defined(TARGET_PPC)
1532 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1533 CPUPPCState *env = &ppc_cpu->env;
1534#elif defined(TARGET_SPARC)
1535 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1536 CPUSPARCState *env = &sparc_cpu->env;
1537#elif defined(TARGET_MIPS)
1538 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1539 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1540#elif defined(TARGET_TRICORE)
1541 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1542 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1543#endif
de0b36b6 1544
cb446eca 1545 cpu_synchronize_state(cpu);
de0b36b6
LC
1546
1547 info = g_malloc0(sizeof(*info));
1548 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1549 info->value->CPU = cpu->cpu_index;
182735ef 1550 info->value->current = (cpu == first_cpu);
259186a7 1551 info->value->halted = cpu->halted;
58f88d4b 1552 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1553 info->value->thread_id = cpu->thread_id;
de0b36b6 1554#if defined(TARGET_I386)
86f4b687 1555 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1556 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1557#elif defined(TARGET_PPC)
86f4b687 1558 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1559 info->value->u.ppc.nip = env->nip;
de0b36b6 1560#elif defined(TARGET_SPARC)
86f4b687 1561 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1562 info->value->u.q_sparc.pc = env->pc;
1563 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1564#elif defined(TARGET_MIPS)
86f4b687 1565 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1566 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1567#elif defined(TARGET_TRICORE)
86f4b687 1568 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1569 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1570#else
1571 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1572#endif
1573
1574 /* XXX: waiting for the qapi to support GSList */
1575 if (!cur_item) {
1576 head = cur_item = info;
1577 } else {
1578 cur_item->next = info;
1579 cur_item = info;
1580 }
1581 }
1582
1583 return head;
1584}
0cfd6a9a
LC
1585
1586void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1587 bool has_cpu, int64_t cpu_index, Error **errp)
1588{
1589 FILE *f;
1590 uint32_t l;
55e5c285 1591 CPUState *cpu;
0cfd6a9a 1592 uint8_t buf[1024];
0dc9daf0 1593 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1594
1595 if (!has_cpu) {
1596 cpu_index = 0;
1597 }
1598
151d1322
AF
1599 cpu = qemu_get_cpu(cpu_index);
1600 if (cpu == NULL) {
c6bd8c70
MA
1601 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1602 "a CPU number");
0cfd6a9a
LC
1603 return;
1604 }
1605
1606 f = fopen(filename, "wb");
1607 if (!f) {
618da851 1608 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1609 return;
1610 }
1611
1612 while (size != 0) {
1613 l = sizeof(buf);
1614 if (l > size)
1615 l = size;
2f4d0f59 1616 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1617 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1618 " specified", orig_addr, orig_size);
2f4d0f59
AK
1619 goto exit;
1620 }
0cfd6a9a 1621 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1622 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1623 goto exit;
1624 }
1625 addr += l;
1626 size -= l;
1627 }
1628
1629exit:
1630 fclose(f);
1631}
6d3962bf
LC
1632
1633void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1634 Error **errp)
1635{
1636 FILE *f;
1637 uint32_t l;
1638 uint8_t buf[1024];
1639
1640 f = fopen(filename, "wb");
1641 if (!f) {
618da851 1642 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1643 return;
1644 }
1645
1646 while (size != 0) {
1647 l = sizeof(buf);
1648 if (l > size)
1649 l = size;
eb6282f2 1650 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1651 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1652 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1653 goto exit;
1654 }
1655 addr += l;
1656 size -= l;
1657 }
1658
1659exit:
1660 fclose(f);
1661}
ab49ab5c
LC
1662
1663void qmp_inject_nmi(Error **errp)
1664{
9cb805fd 1665 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1666}
27498bef
ST
1667
1668void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1669{
1670 if (!use_icount) {
1671 return;
1672 }
1673
1674 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1675 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1676 if (icount_align_option) {
1677 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1678 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1679 } else {
1680 cpu_fprintf(f, "Max guest delay NA\n");
1681 cpu_fprintf(f, "Max guest advance NA\n");
1682 }
1683}