]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
cpus: don't use atomic_read for vm_clock_warp_start
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
296af7c9 27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
da31d594 32#include "sysemu/block-backend.h"
022c62cb 33#include "exec/gdbstub.h"
9c17d615
PB
34#include "sysemu/dma.h"
35#include "sysemu/kvm.h"
de0b36b6 36#include "qmp-commands.h"
296af7c9 37
1de7afc9 38#include "qemu/thread.h"
9c17d615
PB
39#include "sysemu/cpus.h"
40#include "sysemu/qtest.h"
1de7afc9
PB
41#include "qemu/main-loop.h"
42#include "qemu/bitmap.h"
cb365646 43#include "qemu/seqlock.h"
a4e15de9 44#include "qapi-event.h"
9cb805fd 45#include "hw/nmi.h"
8b427044 46#include "sysemu/replay.h"
0ff0fc19
JK
47
48#ifndef _WIN32
1de7afc9 49#include "qemu/compatfd.h"
0ff0fc19 50#endif
296af7c9 51
6d9cb73c
JK
52#ifdef CONFIG_LINUX
53
54#include <sys/prctl.h>
55
c0532a76
MT
56#ifndef PR_MCE_KILL
57#define PR_MCE_KILL 33
58#endif
59
6d9cb73c
JK
60#ifndef PR_MCE_KILL_SET
61#define PR_MCE_KILL_SET 1
62#endif
63
64#ifndef PR_MCE_KILL_EARLY
65#define PR_MCE_KILL_EARLY 1
66#endif
67
68#endif /* CONFIG_LINUX */
69
182735ef 70static CPUState *next_cpu;
27498bef
ST
71int64_t max_delay;
72int64_t max_advance;
296af7c9 73
2adcc85d
JH
74/* vcpu throttling controls */
75static QEMUTimer *throttle_timer;
76static unsigned int throttle_percentage;
77
78#define CPU_THROTTLE_PCT_MIN 1
79#define CPU_THROTTLE_PCT_MAX 99
80#define CPU_THROTTLE_TIMESLICE_NS 10000000
81
321bc0b2
TC
82bool cpu_is_stopped(CPUState *cpu)
83{
84 return cpu->stopped || !runstate_is_running();
85}
86
a98ae1d8 87static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 88{
c64ca814 89 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
90 return false;
91 }
321bc0b2 92 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
93 return true;
94 }
8c2e1b00 95 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 96 kvm_halt_in_kernel()) {
ac873f1e
PM
97 return false;
98 }
99 return true;
100}
101
102static bool all_cpu_threads_idle(void)
103{
182735ef 104 CPUState *cpu;
ac873f1e 105
bdc44640 106 CPU_FOREACH(cpu) {
182735ef 107 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
108 return false;
109 }
110 }
111 return true;
112}
113
946fb27c
PB
114/***********************************************************/
115/* guest cycle counter */
116
a3270e19
PB
117/* Protected by TimersState seqlock */
118
5045e9d9 119static bool icount_sleep = true;
71468395 120static int64_t vm_clock_warp_start = -1;
946fb27c
PB
121/* Conversion factor from emulated instructions to virtual clock ticks. */
122static int icount_time_shift;
123/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
124#define MAX_ICOUNT_SHIFT 10
a3270e19 125
946fb27c
PB
126static QEMUTimer *icount_rt_timer;
127static QEMUTimer *icount_vm_timer;
128static QEMUTimer *icount_warp_timer;
946fb27c
PB
129
130typedef struct TimersState {
cb365646 131 /* Protected by BQL. */
946fb27c
PB
132 int64_t cpu_ticks_prev;
133 int64_t cpu_ticks_offset;
cb365646
LPF
134
135 /* cpu_clock_offset can be read out of BQL, so protect it with
136 * this lock.
137 */
138 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
139 int64_t cpu_clock_offset;
140 int32_t cpu_ticks_enabled;
141 int64_t dummy;
c96778bb
FK
142
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
145 /* Only written by TCG thread */
146 int64_t qemu_icount;
946fb27c
PB
147} TimersState;
148
d9cd4007 149static TimersState timers_state;
946fb27c 150
2a62914b 151int64_t cpu_get_icount_raw(void)
946fb27c
PB
152{
153 int64_t icount;
4917cf44 154 CPUState *cpu = current_cpu;
946fb27c 155
c96778bb 156 icount = timers_state.qemu_icount;
4917cf44 157 if (cpu) {
414b15c9 158 if (!cpu->can_do_io) {
2a62914b
PD
159 fprintf(stderr, "Bad icount read\n");
160 exit(1);
946fb27c 161 }
28ecfd7a 162 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 163 }
2a62914b
PD
164 return icount;
165}
166
167/* Return the virtual CPU time, based on the instruction counter. */
168static int64_t cpu_get_icount_locked(void)
169{
170 int64_t icount = cpu_get_icount_raw();
3f031313 171 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
172}
173
17a15f1b
PB
174int64_t cpu_get_icount(void)
175{
176 int64_t icount;
177 unsigned start;
178
179 do {
180 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
181 icount = cpu_get_icount_locked();
182 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
183
184 return icount;
185}
186
3f031313
FK
187int64_t cpu_icount_to_ns(int64_t icount)
188{
189 return icount << icount_time_shift;
190}
191
946fb27c 192/* return the host CPU cycle counter and handle stop/restart */
cb365646 193/* Caller must hold the BQL */
946fb27c
PB
194int64_t cpu_get_ticks(void)
195{
5f3e3101
PB
196 int64_t ticks;
197
946fb27c
PB
198 if (use_icount) {
199 return cpu_get_icount();
200 }
5f3e3101
PB
201
202 ticks = timers_state.cpu_ticks_offset;
203 if (timers_state.cpu_ticks_enabled) {
4a7428c5 204 ticks += cpu_get_host_ticks();
5f3e3101
PB
205 }
206
207 if (timers_state.cpu_ticks_prev > ticks) {
208 /* Note: non increasing ticks may happen if the host uses
209 software suspend */
210 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
211 ticks = timers_state.cpu_ticks_prev;
946fb27c 212 }
5f3e3101
PB
213
214 timers_state.cpu_ticks_prev = ticks;
215 return ticks;
946fb27c
PB
216}
217
cb365646 218static int64_t cpu_get_clock_locked(void)
946fb27c 219{
5f3e3101 220 int64_t ticks;
cb365646 221
5f3e3101
PB
222 ticks = timers_state.cpu_clock_offset;
223 if (timers_state.cpu_ticks_enabled) {
224 ticks += get_clock();
946fb27c 225 }
cb365646 226
5f3e3101 227 return ticks;
cb365646
LPF
228}
229
230/* return the host CPU monotonic timer and handle stop/restart */
231int64_t cpu_get_clock(void)
232{
233 int64_t ti;
234 unsigned start;
235
236 do {
237 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
238 ti = cpu_get_clock_locked();
239 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
240
241 return ti;
946fb27c
PB
242}
243
cb365646
LPF
244/* enable cpu_get_ticks()
245 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
246 */
946fb27c
PB
247void cpu_enable_ticks(void)
248{
cb365646
LPF
249 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
250 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 251 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 252 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
253 timers_state.cpu_clock_offset -= get_clock();
254 timers_state.cpu_ticks_enabled = 1;
255 }
cb365646 256 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
257}
258
259/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
260 * cpu_get_ticks() after that.
261 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
262 */
946fb27c
PB
263void cpu_disable_ticks(void)
264{
cb365646
LPF
265 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
266 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 267 if (timers_state.cpu_ticks_enabled) {
4a7428c5 268 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 269 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
270 timers_state.cpu_ticks_enabled = 0;
271 }
cb365646 272 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
273}
274
275/* Correlation between real and virtual time is always going to be
276 fairly approximate, so ignore small variation.
277 When the guest is idle real and virtual time will be aligned in
278 the IO wait loop. */
73bcb24d 279#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
280
281static void icount_adjust(void)
282{
283 int64_t cur_time;
284 int64_t cur_icount;
285 int64_t delta;
a3270e19
PB
286
287 /* Protected by TimersState mutex. */
946fb27c 288 static int64_t last_delta;
468cc7cf 289
946fb27c
PB
290 /* If the VM is not running, then do nothing. */
291 if (!runstate_is_running()) {
292 return;
293 }
468cc7cf 294
17a15f1b
PB
295 seqlock_write_lock(&timers_state.vm_clock_seqlock);
296 cur_time = cpu_get_clock_locked();
297 cur_icount = cpu_get_icount_locked();
468cc7cf 298
946fb27c
PB
299 delta = cur_icount - cur_time;
300 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
301 if (delta > 0
302 && last_delta + ICOUNT_WOBBLE < delta * 2
303 && icount_time_shift > 0) {
304 /* The guest is getting too far ahead. Slow time down. */
305 icount_time_shift--;
306 }
307 if (delta < 0
308 && last_delta - ICOUNT_WOBBLE > delta * 2
309 && icount_time_shift < MAX_ICOUNT_SHIFT) {
310 /* The guest is getting too far behind. Speed time up. */
311 icount_time_shift++;
312 }
313 last_delta = delta;
c96778bb
FK
314 timers_state.qemu_icount_bias = cur_icount
315 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 316 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
317}
318
319static void icount_adjust_rt(void *opaque)
320{
40daca54 321 timer_mod(icount_rt_timer,
1979b908 322 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
323 icount_adjust();
324}
325
326static void icount_adjust_vm(void *opaque)
327{
40daca54
AB
328 timer_mod(icount_vm_timer,
329 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 330 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
331 icount_adjust();
332}
333
334static int64_t qemu_icount_round(int64_t count)
335{
336 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
337}
338
efab87cf 339static void icount_warp_rt(void)
946fb27c 340{
ccffff48
AB
341 unsigned seq;
342 int64_t warp_start;
343
17a15f1b
PB
344 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
345 * changes from -1 to another value, so the race here is okay.
346 */
ccffff48
AB
347 do {
348 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
349 warp_start = vm_clock_warp_start;
350 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
351
352 if (warp_start == -1) {
946fb27c
PB
353 return;
354 }
355
17a15f1b 356 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 357 if (runstate_is_running()) {
8eda206e
PD
358 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
359 cpu_get_clock_locked());
8ed961d9
PB
360 int64_t warp_delta;
361
362 warp_delta = clock - vm_clock_warp_start;
363 if (use_icount == 2) {
946fb27c 364 /*
40daca54 365 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
366 * far ahead of real time.
367 */
17a15f1b 368 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 369 int64_t delta = clock - cur_icount;
8ed961d9 370 warp_delta = MIN(warp_delta, delta);
946fb27c 371 }
c96778bb 372 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
373 }
374 vm_clock_warp_start = -1;
17a15f1b 375 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
376
377 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
378 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
379 }
946fb27c
PB
380}
381
e76d1798 382static void icount_timer_cb(void *opaque)
efab87cf 383{
e76d1798
PD
384 /* No need for a checkpoint because the timer already synchronizes
385 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
386 */
387 icount_warp_rt();
efab87cf
PD
388}
389
8156be56
PB
390void qtest_clock_warp(int64_t dest)
391{
40daca54 392 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 393 AioContext *aio_context;
8156be56 394 assert(qtest_enabled());
efef88b3 395 aio_context = qemu_get_aio_context();
8156be56 396 while (clock < dest) {
40daca54 397 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 398 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 399
17a15f1b 400 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 401 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
402 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
403
40daca54 404 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 405 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 406 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 407 }
40daca54 408 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
409}
410
e76d1798 411void qemu_start_warp_timer(void)
946fb27c 412{
ce78d18c 413 int64_t clock;
946fb27c
PB
414 int64_t deadline;
415
e76d1798 416 if (!use_icount) {
946fb27c
PB
417 return;
418 }
419
8bd7f71d
PD
420 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
421 * do not fire, so computing the deadline does not make sense.
422 */
423 if (!runstate_is_running()) {
424 return;
425 }
426
427 /* warp clock deterministically in record/replay mode */
e76d1798 428 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
429 return;
430 }
431
ce78d18c 432 if (!all_cpu_threads_idle()) {
946fb27c
PB
433 return;
434 }
435
8156be56
PB
436 if (qtest_enabled()) {
437 /* When testing, qtest commands advance icount. */
e76d1798 438 return;
8156be56
PB
439 }
440
ac70aafc 441 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 442 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 443 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 444 if (deadline < 0) {
d7a0f71d
VC
445 static bool notified;
446 if (!icount_sleep && !notified) {
447 error_report("WARNING: icount sleep disabled and no active timers");
448 notified = true;
449 }
ce78d18c 450 return;
ac70aafc
AB
451 }
452
946fb27c
PB
453 if (deadline > 0) {
454 /*
40daca54 455 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
456 * sleep. Otherwise, the CPU might be waiting for a future timer
457 * interrupt to wake it up, but the interrupt never comes because
458 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 459 * QEMU_CLOCK_VIRTUAL.
946fb27c 460 */
5045e9d9
VC
461 if (!icount_sleep) {
462 /*
463 * We never let VCPUs sleep in no sleep icount mode.
464 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
465 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
466 * It is useful when we want a deterministic execution time,
467 * isolated from host latencies.
468 */
469 seqlock_write_lock(&timers_state.vm_clock_seqlock);
470 timers_state.qemu_icount_bias += deadline;
471 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
472 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
473 } else {
474 /*
475 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
476 * "real" time, (related to the time left until the next event) has
477 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
478 * This avoids that the warps are visible externally; for example,
479 * you will not be sending network packets continuously instead of
480 * every 100ms.
481 */
482 seqlock_write_lock(&timers_state.vm_clock_seqlock);
483 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
484 vm_clock_warp_start = clock;
485 }
486 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
487 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 488 }
ac70aafc 489 } else if (deadline == 0) {
40daca54 490 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
491 }
492}
493
e76d1798
PD
494static void qemu_account_warp_timer(void)
495{
496 if (!use_icount || !icount_sleep) {
497 return;
498 }
499
500 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
501 * do not fire, so computing the deadline does not make sense.
502 */
503 if (!runstate_is_running()) {
504 return;
505 }
506
507 /* warp clock deterministically in record/replay mode */
508 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
509 return;
510 }
511
512 timer_del(icount_warp_timer);
513 icount_warp_rt();
514}
515
d09eae37
FK
516static bool icount_state_needed(void *opaque)
517{
518 return use_icount;
519}
520
521/*
522 * This is a subsection for icount migration.
523 */
524static const VMStateDescription icount_vmstate_timers = {
525 .name = "timer/icount",
526 .version_id = 1,
527 .minimum_version_id = 1,
5cd8cada 528 .needed = icount_state_needed,
d09eae37
FK
529 .fields = (VMStateField[]) {
530 VMSTATE_INT64(qemu_icount_bias, TimersState),
531 VMSTATE_INT64(qemu_icount, TimersState),
532 VMSTATE_END_OF_LIST()
533 }
534};
535
946fb27c
PB
536static const VMStateDescription vmstate_timers = {
537 .name = "timer",
538 .version_id = 2,
539 .minimum_version_id = 1,
35d08458 540 .fields = (VMStateField[]) {
946fb27c
PB
541 VMSTATE_INT64(cpu_ticks_offset, TimersState),
542 VMSTATE_INT64(dummy, TimersState),
543 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
544 VMSTATE_END_OF_LIST()
d09eae37 545 },
5cd8cada
JQ
546 .subsections = (const VMStateDescription*[]) {
547 &icount_vmstate_timers,
548 NULL
946fb27c
PB
549 }
550};
551
2adcc85d
JH
552static void cpu_throttle_thread(void *opaque)
553{
554 CPUState *cpu = opaque;
555 double pct;
556 double throttle_ratio;
557 long sleeptime_ns;
558
559 if (!cpu_throttle_get_percentage()) {
560 return;
561 }
562
563 pct = (double)cpu_throttle_get_percentage()/100;
564 throttle_ratio = pct / (1 - pct);
565 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
566
567 qemu_mutex_unlock_iothread();
568 atomic_set(&cpu->throttle_thread_scheduled, 0);
569 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
570 qemu_mutex_lock_iothread();
571}
572
573static void cpu_throttle_timer_tick(void *opaque)
574{
575 CPUState *cpu;
576 double pct;
577
578 /* Stop the timer if needed */
579 if (!cpu_throttle_get_percentage()) {
580 return;
581 }
582 CPU_FOREACH(cpu) {
583 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
584 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
585 }
586 }
587
588 pct = (double)cpu_throttle_get_percentage()/100;
589 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
590 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
591}
592
593void cpu_throttle_set(int new_throttle_pct)
594{
595 /* Ensure throttle percentage is within valid range */
596 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
597 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
598
599 atomic_set(&throttle_percentage, new_throttle_pct);
600
601 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
602 CPU_THROTTLE_TIMESLICE_NS);
603}
604
605void cpu_throttle_stop(void)
606{
607 atomic_set(&throttle_percentage, 0);
608}
609
610bool cpu_throttle_active(void)
611{
612 return (cpu_throttle_get_percentage() != 0);
613}
614
615int cpu_throttle_get_percentage(void)
616{
617 return atomic_read(&throttle_percentage);
618}
619
4603ea01
PD
620void cpu_ticks_init(void)
621{
622 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
623 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
624 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
625 cpu_throttle_timer_tick, NULL);
4603ea01
PD
626}
627
1ad9580b 628void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 629{
1ad9580b 630 const char *option;
a8bfac37 631 char *rem_str = NULL;
1ad9580b 632
1ad9580b 633 option = qemu_opt_get(opts, "shift");
946fb27c 634 if (!option) {
a8bfac37
ST
635 if (qemu_opt_get(opts, "align") != NULL) {
636 error_setg(errp, "Please specify shift option when using align");
637 }
946fb27c
PB
638 return;
639 }
f1f4b57e
VC
640
641 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
642 if (icount_sleep) {
643 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 644 icount_timer_cb, NULL);
5045e9d9 645 }
f1f4b57e 646
a8bfac37 647 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
648
649 if (icount_align_option && !icount_sleep) {
778d9f9b 650 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 651 }
946fb27c 652 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
653 errno = 0;
654 icount_time_shift = strtol(option, &rem_str, 0);
655 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
656 error_setg(errp, "icount: Invalid shift value");
657 }
946fb27c
PB
658 use_icount = 1;
659 return;
a8bfac37
ST
660 } else if (icount_align_option) {
661 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 662 } else if (!icount_sleep) {
778d9f9b 663 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
664 }
665
666 use_icount = 2;
667
668 /* 125MIPS seems a reasonable initial guess at the guest speed.
669 It will be corrected fairly quickly anyway. */
670 icount_time_shift = 3;
671
672 /* Have both realtime and virtual time triggers for speed adjustment.
673 The realtime trigger catches emulated time passing too slowly,
674 the virtual time trigger catches emulated time passing too fast.
675 Realtime triggers occur even when idle, so use them less frequently
676 than VM triggers. */
bf2a7ddb
PD
677 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
678 icount_adjust_rt, NULL);
40daca54 679 timer_mod(icount_rt_timer,
bf2a7ddb 680 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
681 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
682 icount_adjust_vm, NULL);
683 timer_mod(icount_vm_timer,
684 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 685 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
686}
687
296af7c9
BS
688/***********************************************************/
689void hw_error(const char *fmt, ...)
690{
691 va_list ap;
55e5c285 692 CPUState *cpu;
296af7c9
BS
693
694 va_start(ap, fmt);
695 fprintf(stderr, "qemu: hardware error: ");
696 vfprintf(stderr, fmt, ap);
697 fprintf(stderr, "\n");
bdc44640 698 CPU_FOREACH(cpu) {
55e5c285 699 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 700 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
701 }
702 va_end(ap);
703 abort();
704}
705
706void cpu_synchronize_all_states(void)
707{
182735ef 708 CPUState *cpu;
296af7c9 709
bdc44640 710 CPU_FOREACH(cpu) {
182735ef 711 cpu_synchronize_state(cpu);
296af7c9
BS
712 }
713}
714
715void cpu_synchronize_all_post_reset(void)
716{
182735ef 717 CPUState *cpu;
296af7c9 718
bdc44640 719 CPU_FOREACH(cpu) {
182735ef 720 cpu_synchronize_post_reset(cpu);
296af7c9
BS
721 }
722}
723
724void cpu_synchronize_all_post_init(void)
725{
182735ef 726 CPUState *cpu;
296af7c9 727
bdc44640 728 CPU_FOREACH(cpu) {
182735ef 729 cpu_synchronize_post_init(cpu);
296af7c9
BS
730 }
731}
732
56983463 733static int do_vm_stop(RunState state)
296af7c9 734{
56983463
KW
735 int ret = 0;
736
1354869c 737 if (runstate_is_running()) {
296af7c9 738 cpu_disable_ticks();
296af7c9 739 pause_all_vcpus();
f5bbfba1 740 runstate_set(state);
1dfb4dd9 741 vm_state_notify(0, state);
a4e15de9 742 qapi_event_send_stop(&error_abort);
296af7c9 743 }
56983463 744
594a45ce 745 bdrv_drain_all();
da31d594 746 ret = blk_flush_all();
594a45ce 747
56983463 748 return ret;
296af7c9
BS
749}
750
a1fcaa73 751static bool cpu_can_run(CPUState *cpu)
296af7c9 752{
4fdeee7c 753 if (cpu->stop) {
a1fcaa73 754 return false;
0ab07c62 755 }
321bc0b2 756 if (cpu_is_stopped(cpu)) {
a1fcaa73 757 return false;
0ab07c62 758 }
a1fcaa73 759 return true;
296af7c9
BS
760}
761
91325046 762static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 763{
64f6b346 764 gdb_set_stop_cpu(cpu);
8cf71710 765 qemu_system_debug_request();
f324e766 766 cpu->stopped = true;
3c638d06
JK
767}
768
6d9cb73c
JK
769#ifdef CONFIG_LINUX
770static void sigbus_reraise(void)
771{
772 sigset_t set;
773 struct sigaction action;
774
775 memset(&action, 0, sizeof(action));
776 action.sa_handler = SIG_DFL;
777 if (!sigaction(SIGBUS, &action, NULL)) {
778 raise(SIGBUS);
779 sigemptyset(&set);
780 sigaddset(&set, SIGBUS);
781 sigprocmask(SIG_UNBLOCK, &set, NULL);
782 }
783 perror("Failed to re-raise SIGBUS!\n");
784 abort();
785}
786
787static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
788 void *ctx)
789{
790 if (kvm_on_sigbus(siginfo->ssi_code,
791 (void *)(intptr_t)siginfo->ssi_addr)) {
792 sigbus_reraise();
793 }
794}
795
796static void qemu_init_sigbus(void)
797{
798 struct sigaction action;
799
800 memset(&action, 0, sizeof(action));
801 action.sa_flags = SA_SIGINFO;
802 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
803 sigaction(SIGBUS, &action, NULL);
804
805 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
806}
807
290adf38 808static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
809{
810 struct timespec ts = { 0, 0 };
811 siginfo_t siginfo;
812 sigset_t waitset;
813 sigset_t chkset;
814 int r;
815
816 sigemptyset(&waitset);
817 sigaddset(&waitset, SIG_IPI);
818 sigaddset(&waitset, SIGBUS);
819
820 do {
821 r = sigtimedwait(&waitset, &siginfo, &ts);
822 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
823 perror("sigtimedwait");
824 exit(1);
825 }
826
827 switch (r) {
828 case SIGBUS:
290adf38 829 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
830 sigbus_reraise();
831 }
832 break;
833 default:
834 break;
835 }
836
837 r = sigpending(&chkset);
838 if (r == -1) {
839 perror("sigpending");
840 exit(1);
841 }
842 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
843}
844
6d9cb73c
JK
845#else /* !CONFIG_LINUX */
846
847static void qemu_init_sigbus(void)
848{
849}
1ab3c6c0 850
290adf38 851static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
852{
853}
6d9cb73c
JK
854#endif /* !CONFIG_LINUX */
855
296af7c9 856#ifndef _WIN32
55f8d6ac
JK
857static void dummy_signal(int sig)
858{
859}
55f8d6ac 860
13618e05 861static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
862{
863 int r;
864 sigset_t set;
865 struct sigaction sigact;
866
867 memset(&sigact, 0, sizeof(sigact));
868 sigact.sa_handler = dummy_signal;
869 sigaction(SIG_IPI, &sigact, NULL);
870
714bd040
PB
871 pthread_sigmask(SIG_BLOCK, NULL, &set);
872 sigdelset(&set, SIG_IPI);
714bd040 873 sigdelset(&set, SIGBUS);
491d6e80 874 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
875 if (r) {
876 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
877 exit(1);
878 }
879}
880
55f8d6ac 881#else /* _WIN32 */
13618e05 882static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 883{
714bd040
PB
884 abort();
885}
714bd040 886#endif /* _WIN32 */
ff48eb5f 887
b2532d88 888static QemuMutex qemu_global_mutex;
46daff13 889static QemuCond qemu_io_proceeded_cond;
6b49809c 890static unsigned iothread_requesting_mutex;
296af7c9
BS
891
892static QemuThread io_thread;
893
296af7c9
BS
894/* cpu creation */
895static QemuCond qemu_cpu_cond;
896/* system init */
296af7c9 897static QemuCond qemu_pause_cond;
e82bcec2 898static QemuCond qemu_work_cond;
296af7c9 899
d3b12f5d 900void qemu_init_cpu_loop(void)
296af7c9 901{
6d9cb73c 902 qemu_init_sigbus();
ed94592b 903 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
904 qemu_cond_init(&qemu_pause_cond);
905 qemu_cond_init(&qemu_work_cond);
46daff13 906 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 907 qemu_mutex_init(&qemu_global_mutex);
296af7c9 908
b7680cb6 909 qemu_thread_get_self(&io_thread);
296af7c9
BS
910}
911
f100f0b3 912void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
913{
914 struct qemu_work_item wi;
915
60e82579 916 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
917 func(data);
918 return;
919 }
920
921 wi.func = func;
922 wi.data = data;
3c02270d 923 wi.free = false;
376692b9
PB
924
925 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
926 if (cpu->queued_work_first == NULL) {
927 cpu->queued_work_first = &wi;
0ab07c62 928 } else {
c64ca814 929 cpu->queued_work_last->next = &wi;
0ab07c62 930 }
c64ca814 931 cpu->queued_work_last = &wi;
e82bcec2
MT
932 wi.next = NULL;
933 wi.done = false;
376692b9 934 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 935
c08d7424 936 qemu_cpu_kick(cpu);
376692b9 937 while (!atomic_mb_read(&wi.done)) {
4917cf44 938 CPUState *self_cpu = current_cpu;
e82bcec2
MT
939
940 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 941 current_cpu = self_cpu;
e82bcec2
MT
942 }
943}
944
3c02270d
CV
945void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
946{
947 struct qemu_work_item *wi;
948
949 if (qemu_cpu_is_self(cpu)) {
950 func(data);
951 return;
952 }
953
954 wi = g_malloc0(sizeof(struct qemu_work_item));
955 wi->func = func;
956 wi->data = data;
957 wi->free = true;
376692b9
PB
958
959 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
960 if (cpu->queued_work_first == NULL) {
961 cpu->queued_work_first = wi;
962 } else {
963 cpu->queued_work_last->next = wi;
964 }
965 cpu->queued_work_last = wi;
966 wi->next = NULL;
967 wi->done = false;
376692b9 968 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
969
970 qemu_cpu_kick(cpu);
971}
972
6d45b109 973static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
974{
975 struct qemu_work_item *wi;
976
c64ca814 977 if (cpu->queued_work_first == NULL) {
e82bcec2 978 return;
0ab07c62 979 }
e82bcec2 980
376692b9
PB
981 qemu_mutex_lock(&cpu->work_mutex);
982 while (cpu->queued_work_first != NULL) {
983 wi = cpu->queued_work_first;
c64ca814 984 cpu->queued_work_first = wi->next;
376692b9
PB
985 if (!cpu->queued_work_first) {
986 cpu->queued_work_last = NULL;
987 }
988 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 989 wi->func(wi->data);
376692b9 990 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
991 if (wi->free) {
992 g_free(wi);
376692b9
PB
993 } else {
994 atomic_mb_set(&wi->done, true);
3c02270d 995 }
e82bcec2 996 }
376692b9 997 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
998 qemu_cond_broadcast(&qemu_work_cond);
999}
1000
509a0d78 1001static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1002{
4fdeee7c
AF
1003 if (cpu->stop) {
1004 cpu->stop = false;
f324e766 1005 cpu->stopped = true;
96bce683 1006 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1007 }
6d45b109 1008 flush_queued_work(cpu);
216fc9a4 1009 cpu->thread_kicked = false;
296af7c9
BS
1010}
1011
d5f8d613 1012static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1013{
16400322 1014 while (all_cpu_threads_idle()) {
d5f8d613 1015 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1016 }
296af7c9 1017
46daff13
PB
1018 while (iothread_requesting_mutex) {
1019 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1020 }
6cabe1f3 1021
bdc44640 1022 CPU_FOREACH(cpu) {
182735ef 1023 qemu_wait_io_event_common(cpu);
6cabe1f3 1024 }
296af7c9
BS
1025}
1026
fd529e8f 1027static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1028{
a98ae1d8 1029 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1030 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1031 }
296af7c9 1032
290adf38 1033 qemu_kvm_eat_signals(cpu);
509a0d78 1034 qemu_wait_io_event_common(cpu);
296af7c9
BS
1035}
1036
7e97cd88 1037static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1038{
48a106bd 1039 CPUState *cpu = arg;
84b4915d 1040 int r;
296af7c9 1041
ab28bd23
PB
1042 rcu_register_thread();
1043
2e7f7a3c 1044 qemu_mutex_lock_iothread();
814e612e 1045 qemu_thread_get_self(cpu->thread);
9f09e18a 1046 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1047 cpu->can_do_io = 1;
4917cf44 1048 current_cpu = cpu;
296af7c9 1049
504134d2 1050 r = kvm_init_vcpu(cpu);
84b4915d
JK
1051 if (r < 0) {
1052 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1053 exit(1);
1054 }
296af7c9 1055
13618e05 1056 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1057
1058 /* signal CPU creation */
61a46217 1059 cpu->created = true;
296af7c9
BS
1060 qemu_cond_signal(&qemu_cpu_cond);
1061
296af7c9 1062 while (1) {
a1fcaa73 1063 if (cpu_can_run(cpu)) {
1458c363 1064 r = kvm_cpu_exec(cpu);
83f338f7 1065 if (r == EXCP_DEBUG) {
91325046 1066 cpu_handle_guest_debug(cpu);
83f338f7 1067 }
0ab07c62 1068 }
fd529e8f 1069 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
1070 }
1071
1072 return NULL;
1073}
1074
c7f0f3b1
AL
1075static void *qemu_dummy_cpu_thread_fn(void *arg)
1076{
1077#ifdef _WIN32
1078 fprintf(stderr, "qtest is not supported under Windows\n");
1079 exit(1);
1080#else
10a9021d 1081 CPUState *cpu = arg;
c7f0f3b1
AL
1082 sigset_t waitset;
1083 int r;
1084
ab28bd23
PB
1085 rcu_register_thread();
1086
c7f0f3b1 1087 qemu_mutex_lock_iothread();
814e612e 1088 qemu_thread_get_self(cpu->thread);
9f09e18a 1089 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1090 cpu->can_do_io = 1;
c7f0f3b1
AL
1091
1092 sigemptyset(&waitset);
1093 sigaddset(&waitset, SIG_IPI);
1094
1095 /* signal CPU creation */
61a46217 1096 cpu->created = true;
c7f0f3b1
AL
1097 qemu_cond_signal(&qemu_cpu_cond);
1098
4917cf44 1099 current_cpu = cpu;
c7f0f3b1 1100 while (1) {
4917cf44 1101 current_cpu = NULL;
c7f0f3b1
AL
1102 qemu_mutex_unlock_iothread();
1103 do {
1104 int sig;
1105 r = sigwait(&waitset, &sig);
1106 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1107 if (r == -1) {
1108 perror("sigwait");
1109 exit(1);
1110 }
1111 qemu_mutex_lock_iothread();
4917cf44 1112 current_cpu = cpu;
509a0d78 1113 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1114 }
1115
1116 return NULL;
1117#endif
1118}
1119
bdb7ca67
JK
1120static void tcg_exec_all(void);
1121
7e97cd88 1122static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1123{
c3586ba7 1124 CPUState *cpu = arg;
296af7c9 1125
ab28bd23
PB
1126 rcu_register_thread();
1127
2e7f7a3c 1128 qemu_mutex_lock_iothread();
814e612e 1129 qemu_thread_get_self(cpu->thread);
296af7c9 1130
38fcbd3f
AF
1131 CPU_FOREACH(cpu) {
1132 cpu->thread_id = qemu_get_thread_id();
1133 cpu->created = true;
626cf8f4 1134 cpu->can_do_io = 1;
38fcbd3f 1135 }
296af7c9
BS
1136 qemu_cond_signal(&qemu_cpu_cond);
1137
fa7d1867 1138 /* wait for initial kick-off after machine start */
c28e399c 1139 while (first_cpu->stopped) {
d5f8d613 1140 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1141
1142 /* process any pending work */
bdc44640 1143 CPU_FOREACH(cpu) {
182735ef 1144 qemu_wait_io_event_common(cpu);
8e564b4e 1145 }
0ab07c62 1146 }
296af7c9 1147
21618b3e 1148 /* process any pending work */
aed807c8 1149 atomic_mb_set(&exit_request, 1);
21618b3e 1150
296af7c9 1151 while (1) {
bdb7ca67 1152 tcg_exec_all();
ac70aafc
AB
1153
1154 if (use_icount) {
40daca54 1155 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1156
1157 if (deadline == 0) {
40daca54 1158 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1159 }
3b2319a3 1160 }
d5f8d613 1161 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
296af7c9
BS
1162 }
1163
1164 return NULL;
1165}
1166
2ff09a40 1167static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1168{
1169#ifndef _WIN32
1170 int err;
1171
e0c38211
PB
1172 if (cpu->thread_kicked) {
1173 return;
9102deda 1174 }
e0c38211 1175 cpu->thread_kicked = true;
814e612e 1176 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1177 if (err) {
1178 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1179 exit(1);
1180 }
1181#else /* _WIN32 */
e0c38211
PB
1182 abort();
1183#endif
1184}
ed9164a3 1185
e0c38211
PB
1186static void qemu_cpu_kick_no_halt(void)
1187{
1188 CPUState *cpu;
1189 /* Ensure whatever caused the exit has reached the CPU threads before
1190 * writing exit_request.
1191 */
1192 atomic_mb_set(&exit_request, 1);
1193 cpu = atomic_mb_read(&tcg_current_cpu);
1194 if (cpu) {
1195 cpu_exit(cpu);
cc015e9a 1196 }
cc015e9a
PB
1197}
1198
c08d7424 1199void qemu_cpu_kick(CPUState *cpu)
296af7c9 1200{
f5c121b8 1201 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1202 if (tcg_enabled()) {
1203 qemu_cpu_kick_no_halt();
1204 } else {
1205 qemu_cpu_kick_thread(cpu);
1206 }
296af7c9
BS
1207}
1208
46d62fac 1209void qemu_cpu_kick_self(void)
296af7c9 1210{
4917cf44 1211 assert(current_cpu);
9102deda 1212 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1213}
1214
60e82579 1215bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1216{
814e612e 1217 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1218}
1219
79e2b9ae 1220bool qemu_in_vcpu_thread(void)
aa723c23 1221{
4917cf44 1222 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1223}
1224
afbe7053
PB
1225static __thread bool iothread_locked = false;
1226
1227bool qemu_mutex_iothread_locked(void)
1228{
1229 return iothread_locked;
1230}
1231
296af7c9
BS
1232void qemu_mutex_lock_iothread(void)
1233{
21618b3e 1234 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1235 /* In the simple case there is no need to bump the VCPU thread out of
1236 * TCG code execution.
1237 */
1238 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1239 !first_cpu || !first_cpu->created) {
296af7c9 1240 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1241 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1242 } else {
1a28cac3 1243 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1244 qemu_cpu_kick_no_halt();
1a28cac3
MT
1245 qemu_mutex_lock(&qemu_global_mutex);
1246 }
6b49809c 1247 atomic_dec(&iothread_requesting_mutex);
46daff13 1248 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1249 }
afbe7053 1250 iothread_locked = true;
296af7c9
BS
1251}
1252
1253void qemu_mutex_unlock_iothread(void)
1254{
afbe7053 1255 iothread_locked = false;
296af7c9
BS
1256 qemu_mutex_unlock(&qemu_global_mutex);
1257}
1258
1259static int all_vcpus_paused(void)
1260{
bdc44640 1261 CPUState *cpu;
296af7c9 1262
bdc44640 1263 CPU_FOREACH(cpu) {
182735ef 1264 if (!cpu->stopped) {
296af7c9 1265 return 0;
0ab07c62 1266 }
296af7c9
BS
1267 }
1268
1269 return 1;
1270}
1271
1272void pause_all_vcpus(void)
1273{
bdc44640 1274 CPUState *cpu;
296af7c9 1275
40daca54 1276 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1277 CPU_FOREACH(cpu) {
182735ef
AF
1278 cpu->stop = true;
1279 qemu_cpu_kick(cpu);
296af7c9
BS
1280 }
1281
aa723c23 1282 if (qemu_in_vcpu_thread()) {
d798e974
JK
1283 cpu_stop_current();
1284 if (!kvm_enabled()) {
bdc44640 1285 CPU_FOREACH(cpu) {
182735ef
AF
1286 cpu->stop = false;
1287 cpu->stopped = true;
d798e974
JK
1288 }
1289 return;
1290 }
1291 }
1292
296af7c9 1293 while (!all_vcpus_paused()) {
be7d6c57 1294 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1295 CPU_FOREACH(cpu) {
182735ef 1296 qemu_cpu_kick(cpu);
296af7c9
BS
1297 }
1298 }
1299}
1300
2993683b
IM
1301void cpu_resume(CPUState *cpu)
1302{
1303 cpu->stop = false;
1304 cpu->stopped = false;
1305 qemu_cpu_kick(cpu);
1306}
1307
296af7c9
BS
1308void resume_all_vcpus(void)
1309{
bdc44640 1310 CPUState *cpu;
296af7c9 1311
40daca54 1312 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1313 CPU_FOREACH(cpu) {
182735ef 1314 cpu_resume(cpu);
296af7c9
BS
1315 }
1316}
1317
4900116e
DDAG
1318/* For temporary buffers for forming a name */
1319#define VCPU_THREAD_NAME_SIZE 16
1320
e5ab30a2 1321static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1322{
4900116e 1323 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1324 static QemuCond *tcg_halt_cond;
1325 static QemuThread *tcg_cpu_thread;
4900116e 1326
296af7c9
BS
1327 /* share a single thread for all cpus with TCG */
1328 if (!tcg_cpu_thread) {
814e612e 1329 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1330 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1331 qemu_cond_init(cpu->halt_cond);
1332 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1333 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1334 cpu->cpu_index);
1335 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1336 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1337#ifdef _WIN32
814e612e 1338 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1339#endif
61a46217 1340 while (!cpu->created) {
18a85728 1341 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1342 }
814e612e 1343 tcg_cpu_thread = cpu->thread;
296af7c9 1344 } else {
814e612e 1345 cpu->thread = tcg_cpu_thread;
f5c121b8 1346 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1347 }
1348}
1349
48a106bd 1350static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1351{
4900116e
DDAG
1352 char thread_name[VCPU_THREAD_NAME_SIZE];
1353
814e612e 1354 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1355 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1356 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1357 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1358 cpu->cpu_index);
1359 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1360 cpu, QEMU_THREAD_JOINABLE);
61a46217 1361 while (!cpu->created) {
18a85728 1362 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1363 }
296af7c9
BS
1364}
1365
10a9021d 1366static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1367{
4900116e
DDAG
1368 char thread_name[VCPU_THREAD_NAME_SIZE];
1369
814e612e 1370 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1371 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1372 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1373 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1374 cpu->cpu_index);
1375 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1376 QEMU_THREAD_JOINABLE);
61a46217 1377 while (!cpu->created) {
c7f0f3b1
AL
1378 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1379 }
1380}
1381
c643bed9 1382void qemu_init_vcpu(CPUState *cpu)
296af7c9 1383{
ce3960eb
AF
1384 cpu->nr_cores = smp_cores;
1385 cpu->nr_threads = smp_threads;
f324e766 1386 cpu->stopped = true;
56943e8c
PM
1387
1388 if (!cpu->as) {
1389 /* If the target cpu hasn't set up any address spaces itself,
1390 * give it the default one.
1391 */
6731d864
PC
1392 AddressSpace *as = address_space_init_shareable(cpu->memory,
1393 "cpu-memory");
12ebc9a7 1394 cpu->num_ases = 1;
6731d864 1395 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1396 }
1397
0ab07c62 1398 if (kvm_enabled()) {
48a106bd 1399 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1400 } else if (tcg_enabled()) {
e5ab30a2 1401 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1402 } else {
10a9021d 1403 qemu_dummy_start_vcpu(cpu);
0ab07c62 1404 }
296af7c9
BS
1405}
1406
b4a3d965 1407void cpu_stop_current(void)
296af7c9 1408{
4917cf44
AF
1409 if (current_cpu) {
1410 current_cpu->stop = false;
1411 current_cpu->stopped = true;
1412 cpu_exit(current_cpu);
96bce683 1413 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1414 }
296af7c9
BS
1415}
1416
56983463 1417int vm_stop(RunState state)
296af7c9 1418{
aa723c23 1419 if (qemu_in_vcpu_thread()) {
74892d24 1420 qemu_system_vmstop_request_prepare();
1dfb4dd9 1421 qemu_system_vmstop_request(state);
296af7c9
BS
1422 /*
1423 * FIXME: should not return to device code in case
1424 * vm_stop() has been requested.
1425 */
b4a3d965 1426 cpu_stop_current();
56983463 1427 return 0;
296af7c9 1428 }
56983463
KW
1429
1430 return do_vm_stop(state);
296af7c9
BS
1431}
1432
8a9236f1
LC
1433/* does a state transition even if the VM is already stopped,
1434 current state is forgotten forever */
56983463 1435int vm_stop_force_state(RunState state)
8a9236f1
LC
1436{
1437 if (runstate_is_running()) {
56983463 1438 return vm_stop(state);
8a9236f1
LC
1439 } else {
1440 runstate_set(state);
b2780d32
WC
1441
1442 bdrv_drain_all();
594a45ce
KW
1443 /* Make sure to return an error if the flush in a previous vm_stop()
1444 * failed. */
da31d594 1445 return blk_flush_all();
8a9236f1
LC
1446 }
1447}
1448
8b427044
PD
1449static int64_t tcg_get_icount_limit(void)
1450{
1451 int64_t deadline;
1452
1453 if (replay_mode != REPLAY_MODE_PLAY) {
1454 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1455
1456 /* Maintain prior (possibly buggy) behaviour where if no deadline
1457 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1458 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1459 * nanoseconds.
1460 */
1461 if ((deadline < 0) || (deadline > INT32_MAX)) {
1462 deadline = INT32_MAX;
1463 }
1464
1465 return qemu_icount_round(deadline);
1466 } else {
1467 return replay_get_instructions();
1468 }
1469}
1470
3d57f789 1471static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1472{
1473 int ret;
1474#ifdef CONFIG_PROFILER
1475 int64_t ti;
1476#endif
1477
1478#ifdef CONFIG_PROFILER
1479 ti = profile_getclock();
1480#endif
1481 if (use_icount) {
1482 int64_t count;
1483 int decr;
c96778bb
FK
1484 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1485 + cpu->icount_extra);
28ecfd7a 1486 cpu->icount_decr.u16.low = 0;
efee7340 1487 cpu->icount_extra = 0;
8b427044 1488 count = tcg_get_icount_limit();
c96778bb 1489 timers_state.qemu_icount += count;
296af7c9
BS
1490 decr = (count > 0xffff) ? 0xffff : count;
1491 count -= decr;
28ecfd7a 1492 cpu->icount_decr.u16.low = decr;
efee7340 1493 cpu->icount_extra = count;
296af7c9 1494 }
ea3e9847 1495 ret = cpu_exec(cpu);
296af7c9 1496#ifdef CONFIG_PROFILER
89d5cbdd 1497 tcg_time += profile_getclock() - ti;
296af7c9
BS
1498#endif
1499 if (use_icount) {
1500 /* Fold pending instructions back into the
1501 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1502 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1503 + cpu->icount_extra);
28ecfd7a 1504 cpu->icount_decr.u32 = 0;
efee7340 1505 cpu->icount_extra = 0;
8b427044 1506 replay_account_executed_instructions();
296af7c9
BS
1507 }
1508 return ret;
1509}
1510
bdb7ca67 1511static void tcg_exec_all(void)
296af7c9 1512{
9a36085b
JK
1513 int r;
1514
40daca54 1515 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
e76d1798 1516 qemu_account_warp_timer();
ab33fcda 1517
0ab07c62 1518 if (next_cpu == NULL) {
296af7c9 1519 next_cpu = first_cpu;
0ab07c62 1520 }
bdc44640 1521 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1522 CPUState *cpu = next_cpu;
296af7c9 1523
40daca54 1524 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1525 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1526
a1fcaa73 1527 if (cpu_can_run(cpu)) {
3d57f789 1528 r = tcg_cpu_exec(cpu);
9a36085b 1529 if (r == EXCP_DEBUG) {
91325046 1530 cpu_handle_guest_debug(cpu);
3c638d06
JK
1531 break;
1532 }
f324e766 1533 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1534 break;
1535 }
1536 }
aed807c8
PB
1537
1538 /* Pairs with smp_wmb in qemu_cpu_kick. */
1539 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1540}
1541
9a78eead 1542void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1543{
1544 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1545#if defined(cpu_list)
1546 cpu_list(f, cpu_fprintf);
262353cb
BS
1547#endif
1548}
de0b36b6
LC
1549
1550CpuInfoList *qmp_query_cpus(Error **errp)
1551{
1552 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1553 CPUState *cpu;
de0b36b6 1554
bdc44640 1555 CPU_FOREACH(cpu) {
de0b36b6 1556 CpuInfoList *info;
182735ef
AF
1557#if defined(TARGET_I386)
1558 X86CPU *x86_cpu = X86_CPU(cpu);
1559 CPUX86State *env = &x86_cpu->env;
1560#elif defined(TARGET_PPC)
1561 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1562 CPUPPCState *env = &ppc_cpu->env;
1563#elif defined(TARGET_SPARC)
1564 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1565 CPUSPARCState *env = &sparc_cpu->env;
1566#elif defined(TARGET_MIPS)
1567 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1568 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1569#elif defined(TARGET_TRICORE)
1570 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1571 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1572#endif
de0b36b6 1573
cb446eca 1574 cpu_synchronize_state(cpu);
de0b36b6
LC
1575
1576 info = g_malloc0(sizeof(*info));
1577 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1578 info->value->CPU = cpu->cpu_index;
182735ef 1579 info->value->current = (cpu == first_cpu);
259186a7 1580 info->value->halted = cpu->halted;
58f88d4b 1581 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1582 info->value->thread_id = cpu->thread_id;
de0b36b6 1583#if defined(TARGET_I386)
86f4b687 1584 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1585 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1586#elif defined(TARGET_PPC)
86f4b687 1587 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1588 info->value->u.ppc.nip = env->nip;
de0b36b6 1589#elif defined(TARGET_SPARC)
86f4b687 1590 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1591 info->value->u.q_sparc.pc = env->pc;
1592 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1593#elif defined(TARGET_MIPS)
86f4b687 1594 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1595 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1596#elif defined(TARGET_TRICORE)
86f4b687 1597 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1598 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1599#else
1600 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1601#endif
1602
1603 /* XXX: waiting for the qapi to support GSList */
1604 if (!cur_item) {
1605 head = cur_item = info;
1606 } else {
1607 cur_item->next = info;
1608 cur_item = info;
1609 }
1610 }
1611
1612 return head;
1613}
0cfd6a9a
LC
1614
1615void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1616 bool has_cpu, int64_t cpu_index, Error **errp)
1617{
1618 FILE *f;
1619 uint32_t l;
55e5c285 1620 CPUState *cpu;
0cfd6a9a 1621 uint8_t buf[1024];
0dc9daf0 1622 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1623
1624 if (!has_cpu) {
1625 cpu_index = 0;
1626 }
1627
151d1322
AF
1628 cpu = qemu_get_cpu(cpu_index);
1629 if (cpu == NULL) {
c6bd8c70
MA
1630 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1631 "a CPU number");
0cfd6a9a
LC
1632 return;
1633 }
1634
1635 f = fopen(filename, "wb");
1636 if (!f) {
618da851 1637 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1638 return;
1639 }
1640
1641 while (size != 0) {
1642 l = sizeof(buf);
1643 if (l > size)
1644 l = size;
2f4d0f59 1645 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1646 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1647 " specified", orig_addr, orig_size);
2f4d0f59
AK
1648 goto exit;
1649 }
0cfd6a9a 1650 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1651 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1652 goto exit;
1653 }
1654 addr += l;
1655 size -= l;
1656 }
1657
1658exit:
1659 fclose(f);
1660}
6d3962bf
LC
1661
1662void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1663 Error **errp)
1664{
1665 FILE *f;
1666 uint32_t l;
1667 uint8_t buf[1024];
1668
1669 f = fopen(filename, "wb");
1670 if (!f) {
618da851 1671 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1672 return;
1673 }
1674
1675 while (size != 0) {
1676 l = sizeof(buf);
1677 if (l > size)
1678 l = size;
eb6282f2 1679 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1680 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1681 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1682 goto exit;
1683 }
1684 addr += l;
1685 size -= l;
1686 }
1687
1688exit:
1689 fclose(f);
1690}
ab49ab5c
LC
1691
1692void qmp_inject_nmi(Error **errp)
1693{
1694#if defined(TARGET_I386)
182735ef
AF
1695 CPUState *cs;
1696
bdc44640 1697 CPU_FOREACH(cs) {
182735ef 1698 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1699
02e51483 1700 if (!cpu->apic_state) {
182735ef 1701 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1702 } else {
02e51483 1703 apic_deliver_nmi(cpu->apic_state);
02c09195 1704 }
ab49ab5c
LC
1705 }
1706#else
9cb805fd 1707 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1708#endif
1709}
27498bef
ST
1710
1711void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1712{
1713 if (!use_icount) {
1714 return;
1715 }
1716
1717 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1718 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1719 if (icount_align_option) {
1720 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1721 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1722 } else {
1723 cpu_fprintf(f, "Max guest delay NA\n");
1724 cpu_fprintf(f, "Max guest advance NA\n");
1725 }
1726}