]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615
PB
35#include "sysemu/dma.h"
36#include "sysemu/kvm.h"
de0b36b6 37#include "qmp-commands.h"
63c91552 38#include "exec/exec-all.h"
296af7c9 39
1de7afc9 40#include "qemu/thread.h"
9c17d615
PB
41#include "sysemu/cpus.h"
42#include "sysemu/qtest.h"
1de7afc9
PB
43#include "qemu/main-loop.h"
44#include "qemu/bitmap.h"
cb365646 45#include "qemu/seqlock.h"
a4e15de9 46#include "qapi-event.h"
9cb805fd 47#include "hw/nmi.h"
8b427044 48#include "sysemu/replay.h"
0ff0fc19
JK
49
50#ifndef _WIN32
1de7afc9 51#include "qemu/compatfd.h"
0ff0fc19 52#endif
296af7c9 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
182735ef 72static CPUState *next_cpu;
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
946fb27c 152
2a62914b 153int64_t cpu_get_icount_raw(void)
946fb27c
PB
154{
155 int64_t icount;
4917cf44 156 CPUState *cpu = current_cpu;
946fb27c 157
c96778bb 158 icount = timers_state.qemu_icount;
4917cf44 159 if (cpu) {
414b15c9 160 if (!cpu->can_do_io) {
2a62914b
PD
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
946fb27c 163 }
28ecfd7a 164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 165 }
2a62914b
PD
166 return icount;
167}
168
169/* Return the virtual CPU time, based on the instruction counter. */
170static int64_t cpu_get_icount_locked(void)
171{
172 int64_t icount = cpu_get_icount_raw();
3f031313 173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
174}
175
17a15f1b
PB
176int64_t cpu_get_icount(void)
177{
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187}
188
3f031313
FK
189int64_t cpu_icount_to_ns(int64_t icount)
190{
191 return icount << icount_time_shift;
192}
193
d90f3cca
C
194/* return the time elapsed in VM between vm_start and vm_stop. Unless
195 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
196 * counter.
197 *
198 * Caller must hold the BQL
199 */
946fb27c
PB
200int64_t cpu_get_ticks(void)
201{
5f3e3101
PB
202 int64_t ticks;
203
946fb27c
PB
204 if (use_icount) {
205 return cpu_get_icount();
206 }
5f3e3101
PB
207
208 ticks = timers_state.cpu_ticks_offset;
209 if (timers_state.cpu_ticks_enabled) {
4a7428c5 210 ticks += cpu_get_host_ticks();
5f3e3101
PB
211 }
212
213 if (timers_state.cpu_ticks_prev > ticks) {
214 /* Note: non increasing ticks may happen if the host uses
215 software suspend */
216 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
217 ticks = timers_state.cpu_ticks_prev;
946fb27c 218 }
5f3e3101
PB
219
220 timers_state.cpu_ticks_prev = ticks;
221 return ticks;
946fb27c
PB
222}
223
cb365646 224static int64_t cpu_get_clock_locked(void)
946fb27c 225{
1d45cea5 226 int64_t time;
cb365646 227
1d45cea5 228 time = timers_state.cpu_clock_offset;
5f3e3101 229 if (timers_state.cpu_ticks_enabled) {
1d45cea5 230 time += get_clock();
946fb27c 231 }
cb365646 232
1d45cea5 233 return time;
cb365646
LPF
234}
235
d90f3cca 236/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
237 * the time between vm_start and vm_stop
238 */
cb365646
LPF
239int64_t cpu_get_clock(void)
240{
241 int64_t ti;
242 unsigned start;
243
244 do {
245 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246 ti = cpu_get_clock_locked();
247 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
248
249 return ti;
946fb27c
PB
250}
251
cb365646 252/* enable cpu_get_ticks()
3224e878 253 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 254 */
946fb27c
PB
255void cpu_enable_ticks(void)
256{
cb365646 257 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 258 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 259 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 260 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
261 timers_state.cpu_clock_offset -= get_clock();
262 timers_state.cpu_ticks_enabled = 1;
263 }
03719e44 264 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
265}
266
267/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 268 * cpu_get_ticks() after that.
3224e878 269 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 270 */
946fb27c
PB
271void cpu_disable_ticks(void)
272{
cb365646 273 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 274 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 275 if (timers_state.cpu_ticks_enabled) {
4a7428c5 276 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 277 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
278 timers_state.cpu_ticks_enabled = 0;
279 }
03719e44 280 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
281}
282
283/* Correlation between real and virtual time is always going to be
284 fairly approximate, so ignore small variation.
285 When the guest is idle real and virtual time will be aligned in
286 the IO wait loop. */
73bcb24d 287#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
288
289static void icount_adjust(void)
290{
291 int64_t cur_time;
292 int64_t cur_icount;
293 int64_t delta;
a3270e19
PB
294
295 /* Protected by TimersState mutex. */
946fb27c 296 static int64_t last_delta;
468cc7cf 297
946fb27c
PB
298 /* If the VM is not running, then do nothing. */
299 if (!runstate_is_running()) {
300 return;
301 }
468cc7cf 302
03719e44 303 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
304 cur_time = cpu_get_clock_locked();
305 cur_icount = cpu_get_icount_locked();
468cc7cf 306
946fb27c
PB
307 delta = cur_icount - cur_time;
308 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
309 if (delta > 0
310 && last_delta + ICOUNT_WOBBLE < delta * 2
311 && icount_time_shift > 0) {
312 /* The guest is getting too far ahead. Slow time down. */
313 icount_time_shift--;
314 }
315 if (delta < 0
316 && last_delta - ICOUNT_WOBBLE > delta * 2
317 && icount_time_shift < MAX_ICOUNT_SHIFT) {
318 /* The guest is getting too far behind. Speed time up. */
319 icount_time_shift++;
320 }
321 last_delta = delta;
c96778bb
FK
322 timers_state.qemu_icount_bias = cur_icount
323 - (timers_state.qemu_icount << icount_time_shift);
03719e44 324 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
325}
326
327static void icount_adjust_rt(void *opaque)
328{
40daca54 329 timer_mod(icount_rt_timer,
1979b908 330 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
331 icount_adjust();
332}
333
334static void icount_adjust_vm(void *opaque)
335{
40daca54
AB
336 timer_mod(icount_vm_timer,
337 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 338 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
339 icount_adjust();
340}
341
342static int64_t qemu_icount_round(int64_t count)
343{
344 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
345}
346
efab87cf 347static void icount_warp_rt(void)
946fb27c 348{
ccffff48
AB
349 unsigned seq;
350 int64_t warp_start;
351
17a15f1b
PB
352 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
353 * changes from -1 to another value, so the race here is okay.
354 */
ccffff48
AB
355 do {
356 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
357 warp_start = vm_clock_warp_start;
358 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
359
360 if (warp_start == -1) {
946fb27c
PB
361 return;
362 }
363
03719e44 364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 365 if (runstate_is_running()) {
8eda206e
PD
366 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
367 cpu_get_clock_locked());
8ed961d9
PB
368 int64_t warp_delta;
369
370 warp_delta = clock - vm_clock_warp_start;
371 if (use_icount == 2) {
946fb27c 372 /*
40daca54 373 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
374 * far ahead of real time.
375 */
17a15f1b 376 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 377 int64_t delta = clock - cur_icount;
8ed961d9 378 warp_delta = MIN(warp_delta, delta);
946fb27c 379 }
c96778bb 380 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
381 }
382 vm_clock_warp_start = -1;
03719e44 383 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
384
385 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387 }
946fb27c
PB
388}
389
e76d1798 390static void icount_timer_cb(void *opaque)
efab87cf 391{
e76d1798
PD
392 /* No need for a checkpoint because the timer already synchronizes
393 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
394 */
395 icount_warp_rt();
efab87cf
PD
396}
397
8156be56
PB
398void qtest_clock_warp(int64_t dest)
399{
40daca54 400 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 401 AioContext *aio_context;
8156be56 402 assert(qtest_enabled());
efef88b3 403 aio_context = qemu_get_aio_context();
8156be56 404 while (clock < dest) {
40daca54 405 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 406 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 407
03719e44 408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 409 timers_state.qemu_icount_bias += warp;
03719e44 410 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 411
40daca54 412 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 413 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 414 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 415 }
40daca54 416 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
417}
418
e76d1798 419void qemu_start_warp_timer(void)
946fb27c 420{
ce78d18c 421 int64_t clock;
946fb27c
PB
422 int64_t deadline;
423
e76d1798 424 if (!use_icount) {
946fb27c
PB
425 return;
426 }
427
8bd7f71d
PD
428 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
429 * do not fire, so computing the deadline does not make sense.
430 */
431 if (!runstate_is_running()) {
432 return;
433 }
434
435 /* warp clock deterministically in record/replay mode */
e76d1798 436 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
437 return;
438 }
439
ce78d18c 440 if (!all_cpu_threads_idle()) {
946fb27c
PB
441 return;
442 }
443
8156be56
PB
444 if (qtest_enabled()) {
445 /* When testing, qtest commands advance icount. */
e76d1798 446 return;
8156be56
PB
447 }
448
ac70aafc 449 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 450 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 451 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 452 if (deadline < 0) {
d7a0f71d
VC
453 static bool notified;
454 if (!icount_sleep && !notified) {
455 error_report("WARNING: icount sleep disabled and no active timers");
456 notified = true;
457 }
ce78d18c 458 return;
ac70aafc
AB
459 }
460
946fb27c
PB
461 if (deadline > 0) {
462 /*
40daca54 463 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
464 * sleep. Otherwise, the CPU might be waiting for a future timer
465 * interrupt to wake it up, but the interrupt never comes because
466 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 467 * QEMU_CLOCK_VIRTUAL.
946fb27c 468 */
5045e9d9
VC
469 if (!icount_sleep) {
470 /*
471 * We never let VCPUs sleep in no sleep icount mode.
472 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
473 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
474 * It is useful when we want a deterministic execution time,
475 * isolated from host latencies.
476 */
03719e44 477 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 478 timers_state.qemu_icount_bias += deadline;
03719e44 479 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
480 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
481 } else {
482 /*
483 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
484 * "real" time, (related to the time left until the next event) has
485 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
486 * This avoids that the warps are visible externally; for example,
487 * you will not be sending network packets continuously instead of
488 * every 100ms.
489 */
03719e44 490 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
491 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
492 vm_clock_warp_start = clock;
493 }
03719e44 494 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 495 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 496 }
ac70aafc 497 } else if (deadline == 0) {
40daca54 498 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
499 }
500}
501
e76d1798
PD
502static void qemu_account_warp_timer(void)
503{
504 if (!use_icount || !icount_sleep) {
505 return;
506 }
507
508 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
509 * do not fire, so computing the deadline does not make sense.
510 */
511 if (!runstate_is_running()) {
512 return;
513 }
514
515 /* warp clock deterministically in record/replay mode */
516 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
517 return;
518 }
519
520 timer_del(icount_warp_timer);
521 icount_warp_rt();
522}
523
d09eae37
FK
524static bool icount_state_needed(void *opaque)
525{
526 return use_icount;
527}
528
529/*
530 * This is a subsection for icount migration.
531 */
532static const VMStateDescription icount_vmstate_timers = {
533 .name = "timer/icount",
534 .version_id = 1,
535 .minimum_version_id = 1,
5cd8cada 536 .needed = icount_state_needed,
d09eae37
FK
537 .fields = (VMStateField[]) {
538 VMSTATE_INT64(qemu_icount_bias, TimersState),
539 VMSTATE_INT64(qemu_icount, TimersState),
540 VMSTATE_END_OF_LIST()
541 }
542};
543
946fb27c
PB
544static const VMStateDescription vmstate_timers = {
545 .name = "timer",
546 .version_id = 2,
547 .minimum_version_id = 1,
35d08458 548 .fields = (VMStateField[]) {
946fb27c
PB
549 VMSTATE_INT64(cpu_ticks_offset, TimersState),
550 VMSTATE_INT64(dummy, TimersState),
551 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
552 VMSTATE_END_OF_LIST()
d09eae37 553 },
5cd8cada
JQ
554 .subsections = (const VMStateDescription*[]) {
555 &icount_vmstate_timers,
556 NULL
946fb27c
PB
557 }
558};
559
e0eeb4a2 560static void cpu_throttle_thread(CPUState *cpu, void *opaque)
2adcc85d 561{
2adcc85d
JH
562 double pct;
563 double throttle_ratio;
564 long sleeptime_ns;
565
566 if (!cpu_throttle_get_percentage()) {
567 return;
568 }
569
570 pct = (double)cpu_throttle_get_percentage()/100;
571 throttle_ratio = pct / (1 - pct);
572 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
573
574 qemu_mutex_unlock_iothread();
575 atomic_set(&cpu->throttle_thread_scheduled, 0);
576 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
577 qemu_mutex_lock_iothread();
578}
579
580static void cpu_throttle_timer_tick(void *opaque)
581{
582 CPUState *cpu;
583 double pct;
584
585 /* Stop the timer if needed */
586 if (!cpu_throttle_get_percentage()) {
587 return;
588 }
589 CPU_FOREACH(cpu) {
590 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
e0eeb4a2 591 async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
2adcc85d
JH
592 }
593 }
594
595 pct = (double)cpu_throttle_get_percentage()/100;
596 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598}
599
600void cpu_throttle_set(int new_throttle_pct)
601{
602 /* Ensure throttle percentage is within valid range */
603 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606 atomic_set(&throttle_percentage, new_throttle_pct);
607
608 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609 CPU_THROTTLE_TIMESLICE_NS);
610}
611
612void cpu_throttle_stop(void)
613{
614 atomic_set(&throttle_percentage, 0);
615}
616
617bool cpu_throttle_active(void)
618{
619 return (cpu_throttle_get_percentage() != 0);
620}
621
622int cpu_throttle_get_percentage(void)
623{
624 return atomic_read(&throttle_percentage);
625}
626
4603ea01
PD
627void cpu_ticks_init(void)
628{
ccdb3c1f 629 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 630 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
631 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632 cpu_throttle_timer_tick, NULL);
4603ea01
PD
633}
634
1ad9580b 635void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 636{
1ad9580b 637 const char *option;
a8bfac37 638 char *rem_str = NULL;
1ad9580b 639
1ad9580b 640 option = qemu_opt_get(opts, "shift");
946fb27c 641 if (!option) {
a8bfac37
ST
642 if (qemu_opt_get(opts, "align") != NULL) {
643 error_setg(errp, "Please specify shift option when using align");
644 }
946fb27c
PB
645 return;
646 }
f1f4b57e
VC
647
648 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
649 if (icount_sleep) {
650 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 651 icount_timer_cb, NULL);
5045e9d9 652 }
f1f4b57e 653
a8bfac37 654 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
655
656 if (icount_align_option && !icount_sleep) {
778d9f9b 657 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 658 }
946fb27c 659 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
660 errno = 0;
661 icount_time_shift = strtol(option, &rem_str, 0);
662 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663 error_setg(errp, "icount: Invalid shift value");
664 }
946fb27c
PB
665 use_icount = 1;
666 return;
a8bfac37
ST
667 } else if (icount_align_option) {
668 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 669 } else if (!icount_sleep) {
778d9f9b 670 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
671 }
672
673 use_icount = 2;
674
675 /* 125MIPS seems a reasonable initial guess at the guest speed.
676 It will be corrected fairly quickly anyway. */
677 icount_time_shift = 3;
678
679 /* Have both realtime and virtual time triggers for speed adjustment.
680 The realtime trigger catches emulated time passing too slowly,
681 the virtual time trigger catches emulated time passing too fast.
682 Realtime triggers occur even when idle, so use them less frequently
683 than VM triggers. */
bf2a7ddb
PD
684 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685 icount_adjust_rt, NULL);
40daca54 686 timer_mod(icount_rt_timer,
bf2a7ddb 687 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
688 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689 icount_adjust_vm, NULL);
690 timer_mod(icount_vm_timer,
691 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 692 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
693}
694
296af7c9
BS
695/***********************************************************/
696void hw_error(const char *fmt, ...)
697{
698 va_list ap;
55e5c285 699 CPUState *cpu;
296af7c9
BS
700
701 va_start(ap, fmt);
702 fprintf(stderr, "qemu: hardware error: ");
703 vfprintf(stderr, fmt, ap);
704 fprintf(stderr, "\n");
bdc44640 705 CPU_FOREACH(cpu) {
55e5c285 706 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 707 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
708 }
709 va_end(ap);
710 abort();
711}
712
713void cpu_synchronize_all_states(void)
714{
182735ef 715 CPUState *cpu;
296af7c9 716
bdc44640 717 CPU_FOREACH(cpu) {
182735ef 718 cpu_synchronize_state(cpu);
296af7c9
BS
719 }
720}
721
722void cpu_synchronize_all_post_reset(void)
723{
182735ef 724 CPUState *cpu;
296af7c9 725
bdc44640 726 CPU_FOREACH(cpu) {
182735ef 727 cpu_synchronize_post_reset(cpu);
296af7c9
BS
728 }
729}
730
731void cpu_synchronize_all_post_init(void)
732{
182735ef 733 CPUState *cpu;
296af7c9 734
bdc44640 735 CPU_FOREACH(cpu) {
182735ef 736 cpu_synchronize_post_init(cpu);
296af7c9
BS
737 }
738}
739
56983463 740static int do_vm_stop(RunState state)
296af7c9 741{
56983463
KW
742 int ret = 0;
743
1354869c 744 if (runstate_is_running()) {
296af7c9 745 cpu_disable_ticks();
296af7c9 746 pause_all_vcpus();
f5bbfba1 747 runstate_set(state);
1dfb4dd9 748 vm_state_notify(0, state);
a4e15de9 749 qapi_event_send_stop(&error_abort);
296af7c9 750 }
56983463 751
594a45ce 752 bdrv_drain_all();
6d0ceb80 753 replay_disable_events();
22af08ea 754 ret = bdrv_flush_all();
594a45ce 755
56983463 756 return ret;
296af7c9
BS
757}
758
a1fcaa73 759static bool cpu_can_run(CPUState *cpu)
296af7c9 760{
4fdeee7c 761 if (cpu->stop) {
a1fcaa73 762 return false;
0ab07c62 763 }
321bc0b2 764 if (cpu_is_stopped(cpu)) {
a1fcaa73 765 return false;
0ab07c62 766 }
a1fcaa73 767 return true;
296af7c9
BS
768}
769
91325046 770static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 771{
64f6b346 772 gdb_set_stop_cpu(cpu);
8cf71710 773 qemu_system_debug_request();
f324e766 774 cpu->stopped = true;
3c638d06
JK
775}
776
6d9cb73c
JK
777#ifdef CONFIG_LINUX
778static void sigbus_reraise(void)
779{
780 sigset_t set;
781 struct sigaction action;
782
783 memset(&action, 0, sizeof(action));
784 action.sa_handler = SIG_DFL;
785 if (!sigaction(SIGBUS, &action, NULL)) {
786 raise(SIGBUS);
787 sigemptyset(&set);
788 sigaddset(&set, SIGBUS);
a2d1761d 789 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
790 }
791 perror("Failed to re-raise SIGBUS!\n");
792 abort();
793}
794
795static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
796 void *ctx)
797{
798 if (kvm_on_sigbus(siginfo->ssi_code,
799 (void *)(intptr_t)siginfo->ssi_addr)) {
800 sigbus_reraise();
801 }
802}
803
804static void qemu_init_sigbus(void)
805{
806 struct sigaction action;
807
808 memset(&action, 0, sizeof(action));
809 action.sa_flags = SA_SIGINFO;
810 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
811 sigaction(SIGBUS, &action, NULL);
812
813 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
814}
815
290adf38 816static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
817{
818 struct timespec ts = { 0, 0 };
819 siginfo_t siginfo;
820 sigset_t waitset;
821 sigset_t chkset;
822 int r;
823
824 sigemptyset(&waitset);
825 sigaddset(&waitset, SIG_IPI);
826 sigaddset(&waitset, SIGBUS);
827
828 do {
829 r = sigtimedwait(&waitset, &siginfo, &ts);
830 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
831 perror("sigtimedwait");
832 exit(1);
833 }
834
835 switch (r) {
836 case SIGBUS:
290adf38 837 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
838 sigbus_reraise();
839 }
840 break;
841 default:
842 break;
843 }
844
845 r = sigpending(&chkset);
846 if (r == -1) {
847 perror("sigpending");
848 exit(1);
849 }
850 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
851}
852
6d9cb73c
JK
853#else /* !CONFIG_LINUX */
854
855static void qemu_init_sigbus(void)
856{
857}
1ab3c6c0 858
290adf38 859static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
860{
861}
6d9cb73c
JK
862#endif /* !CONFIG_LINUX */
863
296af7c9 864#ifndef _WIN32
55f8d6ac
JK
865static void dummy_signal(int sig)
866{
867}
55f8d6ac 868
13618e05 869static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
870{
871 int r;
872 sigset_t set;
873 struct sigaction sigact;
874
875 memset(&sigact, 0, sizeof(sigact));
876 sigact.sa_handler = dummy_signal;
877 sigaction(SIG_IPI, &sigact, NULL);
878
714bd040
PB
879 pthread_sigmask(SIG_BLOCK, NULL, &set);
880 sigdelset(&set, SIG_IPI);
714bd040 881 sigdelset(&set, SIGBUS);
491d6e80 882 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
883 if (r) {
884 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
885 exit(1);
886 }
887}
888
55f8d6ac 889#else /* _WIN32 */
13618e05 890static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 891{
714bd040
PB
892 abort();
893}
714bd040 894#endif /* _WIN32 */
ff48eb5f 895
b2532d88 896static QemuMutex qemu_global_mutex;
46daff13 897static QemuCond qemu_io_proceeded_cond;
6b49809c 898static unsigned iothread_requesting_mutex;
296af7c9
BS
899
900static QemuThread io_thread;
901
296af7c9
BS
902/* cpu creation */
903static QemuCond qemu_cpu_cond;
904/* system init */
296af7c9
BS
905static QemuCond qemu_pause_cond;
906
d3b12f5d 907void qemu_init_cpu_loop(void)
296af7c9 908{
6d9cb73c 909 qemu_init_sigbus();
ed94592b 910 qemu_cond_init(&qemu_cpu_cond);
ed94592b 911 qemu_cond_init(&qemu_pause_cond);
46daff13 912 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 913 qemu_mutex_init(&qemu_global_mutex);
296af7c9 914
b7680cb6 915 qemu_thread_get_self(&io_thread);
296af7c9
BS
916}
917
e0eeb4a2 918void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
e82bcec2 919{
d148d90e 920 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
921}
922
4c055ab5
GZ
923static void qemu_kvm_destroy_vcpu(CPUState *cpu)
924{
925 if (kvm_destroy_vcpu(cpu) < 0) {
926 error_report("kvm_destroy_vcpu failed");
927 exit(EXIT_FAILURE);
928 }
929}
930
931static void qemu_tcg_destroy_vcpu(CPUState *cpu)
932{
933}
934
509a0d78 935static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 936{
4fdeee7c
AF
937 if (cpu->stop) {
938 cpu->stop = false;
f324e766 939 cpu->stopped = true;
96bce683 940 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 941 }
a5403c69 942 process_queued_cpu_work(cpu);
216fc9a4 943 cpu->thread_kicked = false;
296af7c9
BS
944}
945
d5f8d613 946static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 947{
16400322 948 while (all_cpu_threads_idle()) {
d5f8d613 949 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 950 }
296af7c9 951
46daff13
PB
952 while (iothread_requesting_mutex) {
953 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
954 }
6cabe1f3 955
bdc44640 956 CPU_FOREACH(cpu) {
182735ef 957 qemu_wait_io_event_common(cpu);
6cabe1f3 958 }
296af7c9
BS
959}
960
fd529e8f 961static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 962{
a98ae1d8 963 while (cpu_thread_is_idle(cpu)) {
f5c121b8 964 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 965 }
296af7c9 966
290adf38 967 qemu_kvm_eat_signals(cpu);
509a0d78 968 qemu_wait_io_event_common(cpu);
296af7c9
BS
969}
970
7e97cd88 971static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 972{
48a106bd 973 CPUState *cpu = arg;
84b4915d 974 int r;
296af7c9 975
ab28bd23
PB
976 rcu_register_thread();
977
2e7f7a3c 978 qemu_mutex_lock_iothread();
814e612e 979 qemu_thread_get_self(cpu->thread);
9f09e18a 980 cpu->thread_id = qemu_get_thread_id();
626cf8f4 981 cpu->can_do_io = 1;
4917cf44 982 current_cpu = cpu;
296af7c9 983
504134d2 984 r = kvm_init_vcpu(cpu);
84b4915d
JK
985 if (r < 0) {
986 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
987 exit(1);
988 }
296af7c9 989
13618e05 990 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
991
992 /* signal CPU creation */
61a46217 993 cpu->created = true;
296af7c9
BS
994 qemu_cond_signal(&qemu_cpu_cond);
995
4c055ab5 996 do {
a1fcaa73 997 if (cpu_can_run(cpu)) {
1458c363 998 r = kvm_cpu_exec(cpu);
83f338f7 999 if (r == EXCP_DEBUG) {
91325046 1000 cpu_handle_guest_debug(cpu);
83f338f7 1001 }
0ab07c62 1002 }
fd529e8f 1003 qemu_kvm_wait_io_event(cpu);
4c055ab5 1004 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1005
4c055ab5 1006 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1007 cpu->created = false;
1008 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1009 qemu_mutex_unlock_iothread();
296af7c9
BS
1010 return NULL;
1011}
1012
c7f0f3b1
AL
1013static void *qemu_dummy_cpu_thread_fn(void *arg)
1014{
1015#ifdef _WIN32
1016 fprintf(stderr, "qtest is not supported under Windows\n");
1017 exit(1);
1018#else
10a9021d 1019 CPUState *cpu = arg;
c7f0f3b1
AL
1020 sigset_t waitset;
1021 int r;
1022
ab28bd23
PB
1023 rcu_register_thread();
1024
c7f0f3b1 1025 qemu_mutex_lock_iothread();
814e612e 1026 qemu_thread_get_self(cpu->thread);
9f09e18a 1027 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1028 cpu->can_do_io = 1;
c7f0f3b1
AL
1029
1030 sigemptyset(&waitset);
1031 sigaddset(&waitset, SIG_IPI);
1032
1033 /* signal CPU creation */
61a46217 1034 cpu->created = true;
c7f0f3b1
AL
1035 qemu_cond_signal(&qemu_cpu_cond);
1036
4917cf44 1037 current_cpu = cpu;
c7f0f3b1 1038 while (1) {
4917cf44 1039 current_cpu = NULL;
c7f0f3b1
AL
1040 qemu_mutex_unlock_iothread();
1041 do {
1042 int sig;
1043 r = sigwait(&waitset, &sig);
1044 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1045 if (r == -1) {
1046 perror("sigwait");
1047 exit(1);
1048 }
1049 qemu_mutex_lock_iothread();
4917cf44 1050 current_cpu = cpu;
509a0d78 1051 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1052 }
1053
1054 return NULL;
1055#endif
1056}
1057
bdb7ca67
JK
1058static void tcg_exec_all(void);
1059
7e97cd88 1060static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1061{
c3586ba7 1062 CPUState *cpu = arg;
4c055ab5 1063 CPUState *remove_cpu = NULL;
296af7c9 1064
ab28bd23
PB
1065 rcu_register_thread();
1066
2e7f7a3c 1067 qemu_mutex_lock_iothread();
814e612e 1068 qemu_thread_get_self(cpu->thread);
296af7c9 1069
38fcbd3f
AF
1070 CPU_FOREACH(cpu) {
1071 cpu->thread_id = qemu_get_thread_id();
1072 cpu->created = true;
626cf8f4 1073 cpu->can_do_io = 1;
38fcbd3f 1074 }
296af7c9
BS
1075 qemu_cond_signal(&qemu_cpu_cond);
1076
fa7d1867 1077 /* wait for initial kick-off after machine start */
c28e399c 1078 while (first_cpu->stopped) {
d5f8d613 1079 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1080
1081 /* process any pending work */
bdc44640 1082 CPU_FOREACH(cpu) {
182735ef 1083 qemu_wait_io_event_common(cpu);
8e564b4e 1084 }
0ab07c62 1085 }
296af7c9 1086
21618b3e 1087 /* process any pending work */
aed807c8 1088 atomic_mb_set(&exit_request, 1);
21618b3e 1089
296af7c9 1090 while (1) {
bdb7ca67 1091 tcg_exec_all();
ac70aafc
AB
1092
1093 if (use_icount) {
40daca54 1094 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1095
1096 if (deadline == 0) {
40daca54 1097 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1098 }
3b2319a3 1099 }
d5f8d613 1100 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
4c055ab5
GZ
1101 CPU_FOREACH(cpu) {
1102 if (cpu->unplug && !cpu_can_run(cpu)) {
1103 remove_cpu = cpu;
1104 break;
1105 }
1106 }
1107 if (remove_cpu) {
1108 qemu_tcg_destroy_vcpu(remove_cpu);
2c579042
BR
1109 cpu->created = false;
1110 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5
GZ
1111 remove_cpu = NULL;
1112 }
296af7c9
BS
1113 }
1114
1115 return NULL;
1116}
1117
2ff09a40 1118static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1119{
1120#ifndef _WIN32
1121 int err;
1122
e0c38211
PB
1123 if (cpu->thread_kicked) {
1124 return;
9102deda 1125 }
e0c38211 1126 cpu->thread_kicked = true;
814e612e 1127 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1128 if (err) {
1129 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1130 exit(1);
1131 }
1132#else /* _WIN32 */
e0c38211
PB
1133 abort();
1134#endif
1135}
ed9164a3 1136
e0c38211
PB
1137static void qemu_cpu_kick_no_halt(void)
1138{
1139 CPUState *cpu;
1140 /* Ensure whatever caused the exit has reached the CPU threads before
1141 * writing exit_request.
1142 */
1143 atomic_mb_set(&exit_request, 1);
1144 cpu = atomic_mb_read(&tcg_current_cpu);
1145 if (cpu) {
1146 cpu_exit(cpu);
cc015e9a 1147 }
cc015e9a
PB
1148}
1149
c08d7424 1150void qemu_cpu_kick(CPUState *cpu)
296af7c9 1151{
f5c121b8 1152 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1153 if (tcg_enabled()) {
1154 qemu_cpu_kick_no_halt();
1155 } else {
1156 qemu_cpu_kick_thread(cpu);
1157 }
296af7c9
BS
1158}
1159
46d62fac 1160void qemu_cpu_kick_self(void)
296af7c9 1161{
4917cf44 1162 assert(current_cpu);
9102deda 1163 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1164}
1165
60e82579 1166bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1167{
814e612e 1168 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1169}
1170
79e2b9ae 1171bool qemu_in_vcpu_thread(void)
aa723c23 1172{
4917cf44 1173 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1174}
1175
afbe7053
PB
1176static __thread bool iothread_locked = false;
1177
1178bool qemu_mutex_iothread_locked(void)
1179{
1180 return iothread_locked;
1181}
1182
296af7c9
BS
1183void qemu_mutex_lock_iothread(void)
1184{
21618b3e 1185 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1186 /* In the simple case there is no need to bump the VCPU thread out of
1187 * TCG code execution.
1188 */
1189 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1190 !first_cpu || !first_cpu->created) {
296af7c9 1191 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1192 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1193 } else {
1a28cac3 1194 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1195 qemu_cpu_kick_no_halt();
1a28cac3
MT
1196 qemu_mutex_lock(&qemu_global_mutex);
1197 }
6b49809c 1198 atomic_dec(&iothread_requesting_mutex);
46daff13 1199 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1200 }
afbe7053 1201 iothread_locked = true;
296af7c9
BS
1202}
1203
1204void qemu_mutex_unlock_iothread(void)
1205{
afbe7053 1206 iothread_locked = false;
296af7c9
BS
1207 qemu_mutex_unlock(&qemu_global_mutex);
1208}
1209
1210static int all_vcpus_paused(void)
1211{
bdc44640 1212 CPUState *cpu;
296af7c9 1213
bdc44640 1214 CPU_FOREACH(cpu) {
182735ef 1215 if (!cpu->stopped) {
296af7c9 1216 return 0;
0ab07c62 1217 }
296af7c9
BS
1218 }
1219
1220 return 1;
1221}
1222
1223void pause_all_vcpus(void)
1224{
bdc44640 1225 CPUState *cpu;
296af7c9 1226
40daca54 1227 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1228 CPU_FOREACH(cpu) {
182735ef
AF
1229 cpu->stop = true;
1230 qemu_cpu_kick(cpu);
296af7c9
BS
1231 }
1232
aa723c23 1233 if (qemu_in_vcpu_thread()) {
d798e974
JK
1234 cpu_stop_current();
1235 if (!kvm_enabled()) {
bdc44640 1236 CPU_FOREACH(cpu) {
182735ef
AF
1237 cpu->stop = false;
1238 cpu->stopped = true;
d798e974
JK
1239 }
1240 return;
1241 }
1242 }
1243
296af7c9 1244 while (!all_vcpus_paused()) {
be7d6c57 1245 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1246 CPU_FOREACH(cpu) {
182735ef 1247 qemu_cpu_kick(cpu);
296af7c9
BS
1248 }
1249 }
1250}
1251
2993683b
IM
1252void cpu_resume(CPUState *cpu)
1253{
1254 cpu->stop = false;
1255 cpu->stopped = false;
1256 qemu_cpu_kick(cpu);
1257}
1258
296af7c9
BS
1259void resume_all_vcpus(void)
1260{
bdc44640 1261 CPUState *cpu;
296af7c9 1262
40daca54 1263 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1264 CPU_FOREACH(cpu) {
182735ef 1265 cpu_resume(cpu);
296af7c9
BS
1266 }
1267}
1268
4c055ab5
GZ
1269void cpu_remove(CPUState *cpu)
1270{
1271 cpu->stop = true;
1272 cpu->unplug = true;
1273 qemu_cpu_kick(cpu);
1274}
1275
2c579042
BR
1276void cpu_remove_sync(CPUState *cpu)
1277{
1278 cpu_remove(cpu);
1279 while (cpu->created) {
1280 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1281 }
1282}
1283
4900116e
DDAG
1284/* For temporary buffers for forming a name */
1285#define VCPU_THREAD_NAME_SIZE 16
1286
e5ab30a2 1287static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1288{
4900116e 1289 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1290 static QemuCond *tcg_halt_cond;
1291 static QemuThread *tcg_cpu_thread;
4900116e 1292
296af7c9
BS
1293 /* share a single thread for all cpus with TCG */
1294 if (!tcg_cpu_thread) {
814e612e 1295 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1296 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1297 qemu_cond_init(cpu->halt_cond);
1298 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1299 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1300 cpu->cpu_index);
1301 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1302 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1303#ifdef _WIN32
814e612e 1304 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1305#endif
61a46217 1306 while (!cpu->created) {
18a85728 1307 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1308 }
814e612e 1309 tcg_cpu_thread = cpu->thread;
296af7c9 1310 } else {
814e612e 1311 cpu->thread = tcg_cpu_thread;
f5c121b8 1312 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1313 }
1314}
1315
48a106bd 1316static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1317{
4900116e
DDAG
1318 char thread_name[VCPU_THREAD_NAME_SIZE];
1319
814e612e 1320 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1321 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1322 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1323 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1324 cpu->cpu_index);
1325 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1326 cpu, QEMU_THREAD_JOINABLE);
61a46217 1327 while (!cpu->created) {
18a85728 1328 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1329 }
296af7c9
BS
1330}
1331
10a9021d 1332static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1333{
4900116e
DDAG
1334 char thread_name[VCPU_THREAD_NAME_SIZE];
1335
814e612e 1336 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1337 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1338 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1339 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1340 cpu->cpu_index);
1341 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1342 QEMU_THREAD_JOINABLE);
61a46217 1343 while (!cpu->created) {
c7f0f3b1
AL
1344 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1345 }
1346}
1347
c643bed9 1348void qemu_init_vcpu(CPUState *cpu)
296af7c9 1349{
ce3960eb
AF
1350 cpu->nr_cores = smp_cores;
1351 cpu->nr_threads = smp_threads;
f324e766 1352 cpu->stopped = true;
56943e8c
PM
1353
1354 if (!cpu->as) {
1355 /* If the target cpu hasn't set up any address spaces itself,
1356 * give it the default one.
1357 */
6731d864
PC
1358 AddressSpace *as = address_space_init_shareable(cpu->memory,
1359 "cpu-memory");
12ebc9a7 1360 cpu->num_ases = 1;
6731d864 1361 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1362 }
1363
0ab07c62 1364 if (kvm_enabled()) {
48a106bd 1365 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1366 } else if (tcg_enabled()) {
e5ab30a2 1367 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1368 } else {
10a9021d 1369 qemu_dummy_start_vcpu(cpu);
0ab07c62 1370 }
296af7c9
BS
1371}
1372
b4a3d965 1373void cpu_stop_current(void)
296af7c9 1374{
4917cf44
AF
1375 if (current_cpu) {
1376 current_cpu->stop = false;
1377 current_cpu->stopped = true;
1378 cpu_exit(current_cpu);
96bce683 1379 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1380 }
296af7c9
BS
1381}
1382
56983463 1383int vm_stop(RunState state)
296af7c9 1384{
aa723c23 1385 if (qemu_in_vcpu_thread()) {
74892d24 1386 qemu_system_vmstop_request_prepare();
1dfb4dd9 1387 qemu_system_vmstop_request(state);
296af7c9
BS
1388 /*
1389 * FIXME: should not return to device code in case
1390 * vm_stop() has been requested.
1391 */
b4a3d965 1392 cpu_stop_current();
56983463 1393 return 0;
296af7c9 1394 }
56983463
KW
1395
1396 return do_vm_stop(state);
296af7c9
BS
1397}
1398
8a9236f1
LC
1399/* does a state transition even if the VM is already stopped,
1400 current state is forgotten forever */
56983463 1401int vm_stop_force_state(RunState state)
8a9236f1
LC
1402{
1403 if (runstate_is_running()) {
56983463 1404 return vm_stop(state);
8a9236f1
LC
1405 } else {
1406 runstate_set(state);
b2780d32
WC
1407
1408 bdrv_drain_all();
594a45ce
KW
1409 /* Make sure to return an error if the flush in a previous vm_stop()
1410 * failed. */
22af08ea 1411 return bdrv_flush_all();
8a9236f1
LC
1412 }
1413}
1414
8b427044
PD
1415static int64_t tcg_get_icount_limit(void)
1416{
1417 int64_t deadline;
1418
1419 if (replay_mode != REPLAY_MODE_PLAY) {
1420 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1421
1422 /* Maintain prior (possibly buggy) behaviour where if no deadline
1423 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1424 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1425 * nanoseconds.
1426 */
1427 if ((deadline < 0) || (deadline > INT32_MAX)) {
1428 deadline = INT32_MAX;
1429 }
1430
1431 return qemu_icount_round(deadline);
1432 } else {
1433 return replay_get_instructions();
1434 }
1435}
1436
3d57f789 1437static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1438{
1439 int ret;
1440#ifdef CONFIG_PROFILER
1441 int64_t ti;
1442#endif
1443
1444#ifdef CONFIG_PROFILER
1445 ti = profile_getclock();
1446#endif
1447 if (use_icount) {
1448 int64_t count;
1449 int decr;
c96778bb
FK
1450 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1451 + cpu->icount_extra);
28ecfd7a 1452 cpu->icount_decr.u16.low = 0;
efee7340 1453 cpu->icount_extra = 0;
8b427044 1454 count = tcg_get_icount_limit();
c96778bb 1455 timers_state.qemu_icount += count;
296af7c9
BS
1456 decr = (count > 0xffff) ? 0xffff : count;
1457 count -= decr;
28ecfd7a 1458 cpu->icount_decr.u16.low = decr;
efee7340 1459 cpu->icount_extra = count;
296af7c9 1460 }
ab129972 1461 cpu_exec_start(cpu);
ea3e9847 1462 ret = cpu_exec(cpu);
ab129972 1463 cpu_exec_end(cpu);
296af7c9 1464#ifdef CONFIG_PROFILER
89d5cbdd 1465 tcg_time += profile_getclock() - ti;
296af7c9
BS
1466#endif
1467 if (use_icount) {
1468 /* Fold pending instructions back into the
1469 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1470 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1471 + cpu->icount_extra);
28ecfd7a 1472 cpu->icount_decr.u32 = 0;
efee7340 1473 cpu->icount_extra = 0;
8b427044 1474 replay_account_executed_instructions();
296af7c9
BS
1475 }
1476 return ret;
1477}
1478
bdb7ca67 1479static void tcg_exec_all(void)
296af7c9 1480{
9a36085b
JK
1481 int r;
1482
40daca54 1483 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
e76d1798 1484 qemu_account_warp_timer();
ab33fcda 1485
0ab07c62 1486 if (next_cpu == NULL) {
296af7c9 1487 next_cpu = first_cpu;
0ab07c62 1488 }
bdc44640 1489 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1490 CPUState *cpu = next_cpu;
296af7c9 1491
40daca54 1492 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1493 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1494
a1fcaa73 1495 if (cpu_can_run(cpu)) {
3d57f789 1496 r = tcg_cpu_exec(cpu);
9a36085b 1497 if (r == EXCP_DEBUG) {
91325046 1498 cpu_handle_guest_debug(cpu);
3c638d06
JK
1499 break;
1500 }
f324e766 1501 } else if (cpu->stop || cpu->stopped) {
4c055ab5
GZ
1502 if (cpu->unplug) {
1503 next_cpu = CPU_NEXT(cpu);
1504 }
296af7c9
BS
1505 break;
1506 }
1507 }
aed807c8
PB
1508
1509 /* Pairs with smp_wmb in qemu_cpu_kick. */
1510 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1511}
1512
9a78eead 1513void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1514{
1515 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1516#if defined(cpu_list)
1517 cpu_list(f, cpu_fprintf);
262353cb
BS
1518#endif
1519}
de0b36b6
LC
1520
1521CpuInfoList *qmp_query_cpus(Error **errp)
1522{
1523 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1524 CPUState *cpu;
de0b36b6 1525
bdc44640 1526 CPU_FOREACH(cpu) {
de0b36b6 1527 CpuInfoList *info;
182735ef
AF
1528#if defined(TARGET_I386)
1529 X86CPU *x86_cpu = X86_CPU(cpu);
1530 CPUX86State *env = &x86_cpu->env;
1531#elif defined(TARGET_PPC)
1532 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1533 CPUPPCState *env = &ppc_cpu->env;
1534#elif defined(TARGET_SPARC)
1535 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1536 CPUSPARCState *env = &sparc_cpu->env;
1537#elif defined(TARGET_MIPS)
1538 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1539 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1540#elif defined(TARGET_TRICORE)
1541 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1542 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1543#endif
de0b36b6 1544
cb446eca 1545 cpu_synchronize_state(cpu);
de0b36b6
LC
1546
1547 info = g_malloc0(sizeof(*info));
1548 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1549 info->value->CPU = cpu->cpu_index;
182735ef 1550 info->value->current = (cpu == first_cpu);
259186a7 1551 info->value->halted = cpu->halted;
58f88d4b 1552 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1553 info->value->thread_id = cpu->thread_id;
de0b36b6 1554#if defined(TARGET_I386)
86f4b687 1555 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1556 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1557#elif defined(TARGET_PPC)
86f4b687 1558 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1559 info->value->u.ppc.nip = env->nip;
de0b36b6 1560#elif defined(TARGET_SPARC)
86f4b687 1561 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1562 info->value->u.q_sparc.pc = env->pc;
1563 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1564#elif defined(TARGET_MIPS)
86f4b687 1565 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1566 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1567#elif defined(TARGET_TRICORE)
86f4b687 1568 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1569 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1570#else
1571 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1572#endif
1573
1574 /* XXX: waiting for the qapi to support GSList */
1575 if (!cur_item) {
1576 head = cur_item = info;
1577 } else {
1578 cur_item->next = info;
1579 cur_item = info;
1580 }
1581 }
1582
1583 return head;
1584}
0cfd6a9a
LC
1585
1586void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1587 bool has_cpu, int64_t cpu_index, Error **errp)
1588{
1589 FILE *f;
1590 uint32_t l;
55e5c285 1591 CPUState *cpu;
0cfd6a9a 1592 uint8_t buf[1024];
0dc9daf0 1593 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1594
1595 if (!has_cpu) {
1596 cpu_index = 0;
1597 }
1598
151d1322
AF
1599 cpu = qemu_get_cpu(cpu_index);
1600 if (cpu == NULL) {
c6bd8c70
MA
1601 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1602 "a CPU number");
0cfd6a9a
LC
1603 return;
1604 }
1605
1606 f = fopen(filename, "wb");
1607 if (!f) {
618da851 1608 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1609 return;
1610 }
1611
1612 while (size != 0) {
1613 l = sizeof(buf);
1614 if (l > size)
1615 l = size;
2f4d0f59 1616 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1617 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1618 " specified", orig_addr, orig_size);
2f4d0f59
AK
1619 goto exit;
1620 }
0cfd6a9a 1621 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1622 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1623 goto exit;
1624 }
1625 addr += l;
1626 size -= l;
1627 }
1628
1629exit:
1630 fclose(f);
1631}
6d3962bf
LC
1632
1633void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1634 Error **errp)
1635{
1636 FILE *f;
1637 uint32_t l;
1638 uint8_t buf[1024];
1639
1640 f = fopen(filename, "wb");
1641 if (!f) {
618da851 1642 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1643 return;
1644 }
1645
1646 while (size != 0) {
1647 l = sizeof(buf);
1648 if (l > size)
1649 l = size;
eb6282f2 1650 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1651 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1652 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1653 goto exit;
1654 }
1655 addr += l;
1656 size -= l;
1657 }
1658
1659exit:
1660 fclose(f);
1661}
ab49ab5c
LC
1662
1663void qmp_inject_nmi(Error **errp)
1664{
9cb805fd 1665 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1666}
27498bef
ST
1667
1668void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1669{
1670 if (!use_icount) {
1671 return;
1672 }
1673
1674 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1675 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1676 if (icount_align_option) {
1677 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1678 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1679 } else {
1680 cpu_fprintf(f, "Max guest delay NA\n");
1681 cpu_fprintf(f, "Max guest advance NA\n");
1682 }
1683}