]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
vfio/pci: Setup BAR quirks after capabilities probing
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615
PB
35#include "sysemu/dma.h"
36#include "sysemu/kvm.h"
de0b36b6 37#include "qmp-commands.h"
63c91552 38#include "exec/exec-all.h"
296af7c9 39
1de7afc9 40#include "qemu/thread.h"
9c17d615
PB
41#include "sysemu/cpus.h"
42#include "sysemu/qtest.h"
1de7afc9
PB
43#include "qemu/main-loop.h"
44#include "qemu/bitmap.h"
cb365646 45#include "qemu/seqlock.h"
a4e15de9 46#include "qapi-event.h"
9cb805fd 47#include "hw/nmi.h"
8b427044 48#include "sysemu/replay.h"
0ff0fc19
JK
49
50#ifndef _WIN32
1de7afc9 51#include "qemu/compatfd.h"
0ff0fc19 52#endif
296af7c9 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
182735ef 72static CPUState *next_cpu;
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
946fb27c 152
2a62914b 153int64_t cpu_get_icount_raw(void)
946fb27c
PB
154{
155 int64_t icount;
4917cf44 156 CPUState *cpu = current_cpu;
946fb27c 157
c96778bb 158 icount = timers_state.qemu_icount;
4917cf44 159 if (cpu) {
414b15c9 160 if (!cpu->can_do_io) {
2a62914b
PD
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
946fb27c 163 }
28ecfd7a 164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 165 }
2a62914b
PD
166 return icount;
167}
168
169/* Return the virtual CPU time, based on the instruction counter. */
170static int64_t cpu_get_icount_locked(void)
171{
172 int64_t icount = cpu_get_icount_raw();
3f031313 173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
174}
175
17a15f1b
PB
176int64_t cpu_get_icount(void)
177{
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187}
188
3f031313
FK
189int64_t cpu_icount_to_ns(int64_t icount)
190{
191 return icount << icount_time_shift;
192}
193
946fb27c 194/* return the host CPU cycle counter and handle stop/restart */
cb365646 195/* Caller must hold the BQL */
946fb27c
PB
196int64_t cpu_get_ticks(void)
197{
5f3e3101
PB
198 int64_t ticks;
199
946fb27c
PB
200 if (use_icount) {
201 return cpu_get_icount();
202 }
5f3e3101
PB
203
204 ticks = timers_state.cpu_ticks_offset;
205 if (timers_state.cpu_ticks_enabled) {
4a7428c5 206 ticks += cpu_get_host_ticks();
5f3e3101
PB
207 }
208
209 if (timers_state.cpu_ticks_prev > ticks) {
210 /* Note: non increasing ticks may happen if the host uses
211 software suspend */
212 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
213 ticks = timers_state.cpu_ticks_prev;
946fb27c 214 }
5f3e3101
PB
215
216 timers_state.cpu_ticks_prev = ticks;
217 return ticks;
946fb27c
PB
218}
219
cb365646 220static int64_t cpu_get_clock_locked(void)
946fb27c 221{
5f3e3101 222 int64_t ticks;
cb365646 223
5f3e3101
PB
224 ticks = timers_state.cpu_clock_offset;
225 if (timers_state.cpu_ticks_enabled) {
226 ticks += get_clock();
946fb27c 227 }
cb365646 228
5f3e3101 229 return ticks;
cb365646
LPF
230}
231
232/* return the host CPU monotonic timer and handle stop/restart */
233int64_t cpu_get_clock(void)
234{
235 int64_t ti;
236 unsigned start;
237
238 do {
239 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
240 ti = cpu_get_clock_locked();
241 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
242
243 return ti;
946fb27c
PB
244}
245
cb365646
LPF
246/* enable cpu_get_ticks()
247 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
248 */
946fb27c
PB
249void cpu_enable_ticks(void)
250{
cb365646
LPF
251 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
252 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 253 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 254 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
255 timers_state.cpu_clock_offset -= get_clock();
256 timers_state.cpu_ticks_enabled = 1;
257 }
cb365646 258 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
259}
260
261/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
262 * cpu_get_ticks() after that.
263 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
264 */
946fb27c
PB
265void cpu_disable_ticks(void)
266{
cb365646
LPF
267 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
268 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 269 if (timers_state.cpu_ticks_enabled) {
4a7428c5 270 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 271 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
272 timers_state.cpu_ticks_enabled = 0;
273 }
cb365646 274 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
275}
276
277/* Correlation between real and virtual time is always going to be
278 fairly approximate, so ignore small variation.
279 When the guest is idle real and virtual time will be aligned in
280 the IO wait loop. */
73bcb24d 281#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
282
283static void icount_adjust(void)
284{
285 int64_t cur_time;
286 int64_t cur_icount;
287 int64_t delta;
a3270e19
PB
288
289 /* Protected by TimersState mutex. */
946fb27c 290 static int64_t last_delta;
468cc7cf 291
946fb27c
PB
292 /* If the VM is not running, then do nothing. */
293 if (!runstate_is_running()) {
294 return;
295 }
468cc7cf 296
17a15f1b
PB
297 seqlock_write_lock(&timers_state.vm_clock_seqlock);
298 cur_time = cpu_get_clock_locked();
299 cur_icount = cpu_get_icount_locked();
468cc7cf 300
946fb27c
PB
301 delta = cur_icount - cur_time;
302 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
303 if (delta > 0
304 && last_delta + ICOUNT_WOBBLE < delta * 2
305 && icount_time_shift > 0) {
306 /* The guest is getting too far ahead. Slow time down. */
307 icount_time_shift--;
308 }
309 if (delta < 0
310 && last_delta - ICOUNT_WOBBLE > delta * 2
311 && icount_time_shift < MAX_ICOUNT_SHIFT) {
312 /* The guest is getting too far behind. Speed time up. */
313 icount_time_shift++;
314 }
315 last_delta = delta;
c96778bb
FK
316 timers_state.qemu_icount_bias = cur_icount
317 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 318 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
319}
320
321static void icount_adjust_rt(void *opaque)
322{
40daca54 323 timer_mod(icount_rt_timer,
1979b908 324 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
325 icount_adjust();
326}
327
328static void icount_adjust_vm(void *opaque)
329{
40daca54
AB
330 timer_mod(icount_vm_timer,
331 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 332 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
333 icount_adjust();
334}
335
336static int64_t qemu_icount_round(int64_t count)
337{
338 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
339}
340
efab87cf 341static void icount_warp_rt(void)
946fb27c 342{
ccffff48
AB
343 unsigned seq;
344 int64_t warp_start;
345
17a15f1b
PB
346 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
347 * changes from -1 to another value, so the race here is okay.
348 */
ccffff48
AB
349 do {
350 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
351 warp_start = vm_clock_warp_start;
352 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
353
354 if (warp_start == -1) {
946fb27c
PB
355 return;
356 }
357
17a15f1b 358 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 359 if (runstate_is_running()) {
8eda206e
PD
360 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
361 cpu_get_clock_locked());
8ed961d9
PB
362 int64_t warp_delta;
363
364 warp_delta = clock - vm_clock_warp_start;
365 if (use_icount == 2) {
946fb27c 366 /*
40daca54 367 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
368 * far ahead of real time.
369 */
17a15f1b 370 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 371 int64_t delta = clock - cur_icount;
8ed961d9 372 warp_delta = MIN(warp_delta, delta);
946fb27c 373 }
c96778bb 374 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
375 }
376 vm_clock_warp_start = -1;
17a15f1b 377 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
378
379 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
381 }
946fb27c
PB
382}
383
e76d1798 384static void icount_timer_cb(void *opaque)
efab87cf 385{
e76d1798
PD
386 /* No need for a checkpoint because the timer already synchronizes
387 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
388 */
389 icount_warp_rt();
efab87cf
PD
390}
391
8156be56
PB
392void qtest_clock_warp(int64_t dest)
393{
40daca54 394 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 395 AioContext *aio_context;
8156be56 396 assert(qtest_enabled());
efef88b3 397 aio_context = qemu_get_aio_context();
8156be56 398 while (clock < dest) {
40daca54 399 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 400 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 401
17a15f1b 402 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 403 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
404 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
405
40daca54 406 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 407 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 408 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 409 }
40daca54 410 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
411}
412
e76d1798 413void qemu_start_warp_timer(void)
946fb27c 414{
ce78d18c 415 int64_t clock;
946fb27c
PB
416 int64_t deadline;
417
e76d1798 418 if (!use_icount) {
946fb27c
PB
419 return;
420 }
421
8bd7f71d
PD
422 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
423 * do not fire, so computing the deadline does not make sense.
424 */
425 if (!runstate_is_running()) {
426 return;
427 }
428
429 /* warp clock deterministically in record/replay mode */
e76d1798 430 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
431 return;
432 }
433
ce78d18c 434 if (!all_cpu_threads_idle()) {
946fb27c
PB
435 return;
436 }
437
8156be56
PB
438 if (qtest_enabled()) {
439 /* When testing, qtest commands advance icount. */
e76d1798 440 return;
8156be56
PB
441 }
442
ac70aafc 443 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 444 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 445 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 446 if (deadline < 0) {
d7a0f71d
VC
447 static bool notified;
448 if (!icount_sleep && !notified) {
449 error_report("WARNING: icount sleep disabled and no active timers");
450 notified = true;
451 }
ce78d18c 452 return;
ac70aafc
AB
453 }
454
946fb27c
PB
455 if (deadline > 0) {
456 /*
40daca54 457 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
458 * sleep. Otherwise, the CPU might be waiting for a future timer
459 * interrupt to wake it up, but the interrupt never comes because
460 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 461 * QEMU_CLOCK_VIRTUAL.
946fb27c 462 */
5045e9d9
VC
463 if (!icount_sleep) {
464 /*
465 * We never let VCPUs sleep in no sleep icount mode.
466 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
467 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
468 * It is useful when we want a deterministic execution time,
469 * isolated from host latencies.
470 */
471 seqlock_write_lock(&timers_state.vm_clock_seqlock);
472 timers_state.qemu_icount_bias += deadline;
473 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
474 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
475 } else {
476 /*
477 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
478 * "real" time, (related to the time left until the next event) has
479 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
480 * This avoids that the warps are visible externally; for example,
481 * you will not be sending network packets continuously instead of
482 * every 100ms.
483 */
484 seqlock_write_lock(&timers_state.vm_clock_seqlock);
485 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
486 vm_clock_warp_start = clock;
487 }
488 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
489 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 490 }
ac70aafc 491 } else if (deadline == 0) {
40daca54 492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
493 }
494}
495
e76d1798
PD
496static void qemu_account_warp_timer(void)
497{
498 if (!use_icount || !icount_sleep) {
499 return;
500 }
501
502 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
503 * do not fire, so computing the deadline does not make sense.
504 */
505 if (!runstate_is_running()) {
506 return;
507 }
508
509 /* warp clock deterministically in record/replay mode */
510 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
511 return;
512 }
513
514 timer_del(icount_warp_timer);
515 icount_warp_rt();
516}
517
d09eae37
FK
518static bool icount_state_needed(void *opaque)
519{
520 return use_icount;
521}
522
523/*
524 * This is a subsection for icount migration.
525 */
526static const VMStateDescription icount_vmstate_timers = {
527 .name = "timer/icount",
528 .version_id = 1,
529 .minimum_version_id = 1,
5cd8cada 530 .needed = icount_state_needed,
d09eae37
FK
531 .fields = (VMStateField[]) {
532 VMSTATE_INT64(qemu_icount_bias, TimersState),
533 VMSTATE_INT64(qemu_icount, TimersState),
534 VMSTATE_END_OF_LIST()
535 }
536};
537
946fb27c
PB
538static const VMStateDescription vmstate_timers = {
539 .name = "timer",
540 .version_id = 2,
541 .minimum_version_id = 1,
35d08458 542 .fields = (VMStateField[]) {
946fb27c
PB
543 VMSTATE_INT64(cpu_ticks_offset, TimersState),
544 VMSTATE_INT64(dummy, TimersState),
545 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
546 VMSTATE_END_OF_LIST()
d09eae37 547 },
5cd8cada
JQ
548 .subsections = (const VMStateDescription*[]) {
549 &icount_vmstate_timers,
550 NULL
946fb27c
PB
551 }
552};
553
2adcc85d
JH
554static void cpu_throttle_thread(void *opaque)
555{
556 CPUState *cpu = opaque;
557 double pct;
558 double throttle_ratio;
559 long sleeptime_ns;
560
561 if (!cpu_throttle_get_percentage()) {
562 return;
563 }
564
565 pct = (double)cpu_throttle_get_percentage()/100;
566 throttle_ratio = pct / (1 - pct);
567 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
568
569 qemu_mutex_unlock_iothread();
570 atomic_set(&cpu->throttle_thread_scheduled, 0);
571 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
572 qemu_mutex_lock_iothread();
573}
574
575static void cpu_throttle_timer_tick(void *opaque)
576{
577 CPUState *cpu;
578 double pct;
579
580 /* Stop the timer if needed */
581 if (!cpu_throttle_get_percentage()) {
582 return;
583 }
584 CPU_FOREACH(cpu) {
585 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
586 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
587 }
588 }
589
590 pct = (double)cpu_throttle_get_percentage()/100;
591 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
592 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
593}
594
595void cpu_throttle_set(int new_throttle_pct)
596{
597 /* Ensure throttle percentage is within valid range */
598 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
599 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
600
601 atomic_set(&throttle_percentage, new_throttle_pct);
602
603 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
604 CPU_THROTTLE_TIMESLICE_NS);
605}
606
607void cpu_throttle_stop(void)
608{
609 atomic_set(&throttle_percentage, 0);
610}
611
612bool cpu_throttle_active(void)
613{
614 return (cpu_throttle_get_percentage() != 0);
615}
616
617int cpu_throttle_get_percentage(void)
618{
619 return atomic_read(&throttle_percentage);
620}
621
4603ea01
PD
622void cpu_ticks_init(void)
623{
624 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
625 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
626 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
627 cpu_throttle_timer_tick, NULL);
4603ea01
PD
628}
629
1ad9580b 630void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 631{
1ad9580b 632 const char *option;
a8bfac37 633 char *rem_str = NULL;
1ad9580b 634
1ad9580b 635 option = qemu_opt_get(opts, "shift");
946fb27c 636 if (!option) {
a8bfac37
ST
637 if (qemu_opt_get(opts, "align") != NULL) {
638 error_setg(errp, "Please specify shift option when using align");
639 }
946fb27c
PB
640 return;
641 }
f1f4b57e
VC
642
643 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
644 if (icount_sleep) {
645 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 646 icount_timer_cb, NULL);
5045e9d9 647 }
f1f4b57e 648
a8bfac37 649 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
650
651 if (icount_align_option && !icount_sleep) {
778d9f9b 652 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 653 }
946fb27c 654 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
655 errno = 0;
656 icount_time_shift = strtol(option, &rem_str, 0);
657 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
658 error_setg(errp, "icount: Invalid shift value");
659 }
946fb27c
PB
660 use_icount = 1;
661 return;
a8bfac37
ST
662 } else if (icount_align_option) {
663 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 664 } else if (!icount_sleep) {
778d9f9b 665 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
666 }
667
668 use_icount = 2;
669
670 /* 125MIPS seems a reasonable initial guess at the guest speed.
671 It will be corrected fairly quickly anyway. */
672 icount_time_shift = 3;
673
674 /* Have both realtime and virtual time triggers for speed adjustment.
675 The realtime trigger catches emulated time passing too slowly,
676 the virtual time trigger catches emulated time passing too fast.
677 Realtime triggers occur even when idle, so use them less frequently
678 than VM triggers. */
bf2a7ddb
PD
679 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
680 icount_adjust_rt, NULL);
40daca54 681 timer_mod(icount_rt_timer,
bf2a7ddb 682 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
683 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
684 icount_adjust_vm, NULL);
685 timer_mod(icount_vm_timer,
686 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 687 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
688}
689
296af7c9
BS
690/***********************************************************/
691void hw_error(const char *fmt, ...)
692{
693 va_list ap;
55e5c285 694 CPUState *cpu;
296af7c9
BS
695
696 va_start(ap, fmt);
697 fprintf(stderr, "qemu: hardware error: ");
698 vfprintf(stderr, fmt, ap);
699 fprintf(stderr, "\n");
bdc44640 700 CPU_FOREACH(cpu) {
55e5c285 701 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 702 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
703 }
704 va_end(ap);
705 abort();
706}
707
708void cpu_synchronize_all_states(void)
709{
182735ef 710 CPUState *cpu;
296af7c9 711
bdc44640 712 CPU_FOREACH(cpu) {
182735ef 713 cpu_synchronize_state(cpu);
296af7c9
BS
714 }
715}
716
717void cpu_synchronize_all_post_reset(void)
718{
182735ef 719 CPUState *cpu;
296af7c9 720
bdc44640 721 CPU_FOREACH(cpu) {
182735ef 722 cpu_synchronize_post_reset(cpu);
296af7c9
BS
723 }
724}
725
726void cpu_synchronize_all_post_init(void)
727{
182735ef 728 CPUState *cpu;
296af7c9 729
bdc44640 730 CPU_FOREACH(cpu) {
182735ef 731 cpu_synchronize_post_init(cpu);
296af7c9
BS
732 }
733}
734
56983463 735static int do_vm_stop(RunState state)
296af7c9 736{
56983463
KW
737 int ret = 0;
738
1354869c 739 if (runstate_is_running()) {
296af7c9 740 cpu_disable_ticks();
296af7c9 741 pause_all_vcpus();
f5bbfba1 742 runstate_set(state);
1dfb4dd9 743 vm_state_notify(0, state);
a4e15de9 744 qapi_event_send_stop(&error_abort);
296af7c9 745 }
56983463 746
594a45ce 747 bdrv_drain_all();
da31d594 748 ret = blk_flush_all();
594a45ce 749
56983463 750 return ret;
296af7c9
BS
751}
752
a1fcaa73 753static bool cpu_can_run(CPUState *cpu)
296af7c9 754{
4fdeee7c 755 if (cpu->stop) {
a1fcaa73 756 return false;
0ab07c62 757 }
321bc0b2 758 if (cpu_is_stopped(cpu)) {
a1fcaa73 759 return false;
0ab07c62 760 }
a1fcaa73 761 return true;
296af7c9
BS
762}
763
91325046 764static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 765{
64f6b346 766 gdb_set_stop_cpu(cpu);
8cf71710 767 qemu_system_debug_request();
f324e766 768 cpu->stopped = true;
3c638d06
JK
769}
770
6d9cb73c
JK
771#ifdef CONFIG_LINUX
772static void sigbus_reraise(void)
773{
774 sigset_t set;
775 struct sigaction action;
776
777 memset(&action, 0, sizeof(action));
778 action.sa_handler = SIG_DFL;
779 if (!sigaction(SIGBUS, &action, NULL)) {
780 raise(SIGBUS);
781 sigemptyset(&set);
782 sigaddset(&set, SIGBUS);
a2d1761d 783 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
784 }
785 perror("Failed to re-raise SIGBUS!\n");
786 abort();
787}
788
789static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
790 void *ctx)
791{
792 if (kvm_on_sigbus(siginfo->ssi_code,
793 (void *)(intptr_t)siginfo->ssi_addr)) {
794 sigbus_reraise();
795 }
796}
797
798static void qemu_init_sigbus(void)
799{
800 struct sigaction action;
801
802 memset(&action, 0, sizeof(action));
803 action.sa_flags = SA_SIGINFO;
804 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
805 sigaction(SIGBUS, &action, NULL);
806
807 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
808}
809
290adf38 810static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
811{
812 struct timespec ts = { 0, 0 };
813 siginfo_t siginfo;
814 sigset_t waitset;
815 sigset_t chkset;
816 int r;
817
818 sigemptyset(&waitset);
819 sigaddset(&waitset, SIG_IPI);
820 sigaddset(&waitset, SIGBUS);
821
822 do {
823 r = sigtimedwait(&waitset, &siginfo, &ts);
824 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
825 perror("sigtimedwait");
826 exit(1);
827 }
828
829 switch (r) {
830 case SIGBUS:
290adf38 831 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
832 sigbus_reraise();
833 }
834 break;
835 default:
836 break;
837 }
838
839 r = sigpending(&chkset);
840 if (r == -1) {
841 perror("sigpending");
842 exit(1);
843 }
844 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
845}
846
6d9cb73c
JK
847#else /* !CONFIG_LINUX */
848
849static void qemu_init_sigbus(void)
850{
851}
1ab3c6c0 852
290adf38 853static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
854{
855}
6d9cb73c
JK
856#endif /* !CONFIG_LINUX */
857
296af7c9 858#ifndef _WIN32
55f8d6ac
JK
859static void dummy_signal(int sig)
860{
861}
55f8d6ac 862
13618e05 863static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
864{
865 int r;
866 sigset_t set;
867 struct sigaction sigact;
868
869 memset(&sigact, 0, sizeof(sigact));
870 sigact.sa_handler = dummy_signal;
871 sigaction(SIG_IPI, &sigact, NULL);
872
714bd040
PB
873 pthread_sigmask(SIG_BLOCK, NULL, &set);
874 sigdelset(&set, SIG_IPI);
714bd040 875 sigdelset(&set, SIGBUS);
491d6e80 876 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
877 if (r) {
878 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
879 exit(1);
880 }
881}
882
55f8d6ac 883#else /* _WIN32 */
13618e05 884static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 885{
714bd040
PB
886 abort();
887}
714bd040 888#endif /* _WIN32 */
ff48eb5f 889
b2532d88 890static QemuMutex qemu_global_mutex;
46daff13 891static QemuCond qemu_io_proceeded_cond;
6b49809c 892static unsigned iothread_requesting_mutex;
296af7c9
BS
893
894static QemuThread io_thread;
895
296af7c9
BS
896/* cpu creation */
897static QemuCond qemu_cpu_cond;
898/* system init */
296af7c9 899static QemuCond qemu_pause_cond;
e82bcec2 900static QemuCond qemu_work_cond;
296af7c9 901
d3b12f5d 902void qemu_init_cpu_loop(void)
296af7c9 903{
6d9cb73c 904 qemu_init_sigbus();
ed94592b 905 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
906 qemu_cond_init(&qemu_pause_cond);
907 qemu_cond_init(&qemu_work_cond);
46daff13 908 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 909 qemu_mutex_init(&qemu_global_mutex);
296af7c9 910
b7680cb6 911 qemu_thread_get_self(&io_thread);
296af7c9
BS
912}
913
f100f0b3 914void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
915{
916 struct qemu_work_item wi;
917
60e82579 918 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
919 func(data);
920 return;
921 }
922
923 wi.func = func;
924 wi.data = data;
3c02270d 925 wi.free = false;
376692b9
PB
926
927 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
928 if (cpu->queued_work_first == NULL) {
929 cpu->queued_work_first = &wi;
0ab07c62 930 } else {
c64ca814 931 cpu->queued_work_last->next = &wi;
0ab07c62 932 }
c64ca814 933 cpu->queued_work_last = &wi;
e82bcec2
MT
934 wi.next = NULL;
935 wi.done = false;
376692b9 936 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 937
c08d7424 938 qemu_cpu_kick(cpu);
376692b9 939 while (!atomic_mb_read(&wi.done)) {
4917cf44 940 CPUState *self_cpu = current_cpu;
e82bcec2
MT
941
942 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 943 current_cpu = self_cpu;
e82bcec2
MT
944 }
945}
946
3c02270d
CV
947void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
948{
949 struct qemu_work_item *wi;
950
951 if (qemu_cpu_is_self(cpu)) {
952 func(data);
953 return;
954 }
955
956 wi = g_malloc0(sizeof(struct qemu_work_item));
957 wi->func = func;
958 wi->data = data;
959 wi->free = true;
376692b9
PB
960
961 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
962 if (cpu->queued_work_first == NULL) {
963 cpu->queued_work_first = wi;
964 } else {
965 cpu->queued_work_last->next = wi;
966 }
967 cpu->queued_work_last = wi;
968 wi->next = NULL;
969 wi->done = false;
376692b9 970 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
971
972 qemu_cpu_kick(cpu);
973}
974
6d45b109 975static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
976{
977 struct qemu_work_item *wi;
978
c64ca814 979 if (cpu->queued_work_first == NULL) {
e82bcec2 980 return;
0ab07c62 981 }
e82bcec2 982
376692b9
PB
983 qemu_mutex_lock(&cpu->work_mutex);
984 while (cpu->queued_work_first != NULL) {
985 wi = cpu->queued_work_first;
c64ca814 986 cpu->queued_work_first = wi->next;
376692b9
PB
987 if (!cpu->queued_work_first) {
988 cpu->queued_work_last = NULL;
989 }
990 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 991 wi->func(wi->data);
376692b9 992 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
993 if (wi->free) {
994 g_free(wi);
376692b9
PB
995 } else {
996 atomic_mb_set(&wi->done, true);
3c02270d 997 }
e82bcec2 998 }
376692b9 999 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
1000 qemu_cond_broadcast(&qemu_work_cond);
1001}
1002
509a0d78 1003static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1004{
4fdeee7c
AF
1005 if (cpu->stop) {
1006 cpu->stop = false;
f324e766 1007 cpu->stopped = true;
96bce683 1008 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1009 }
6d45b109 1010 flush_queued_work(cpu);
216fc9a4 1011 cpu->thread_kicked = false;
296af7c9
BS
1012}
1013
d5f8d613 1014static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1015{
16400322 1016 while (all_cpu_threads_idle()) {
d5f8d613 1017 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1018 }
296af7c9 1019
46daff13
PB
1020 while (iothread_requesting_mutex) {
1021 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1022 }
6cabe1f3 1023
bdc44640 1024 CPU_FOREACH(cpu) {
182735ef 1025 qemu_wait_io_event_common(cpu);
6cabe1f3 1026 }
296af7c9
BS
1027}
1028
fd529e8f 1029static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1030{
a98ae1d8 1031 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1032 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1033 }
296af7c9 1034
290adf38 1035 qemu_kvm_eat_signals(cpu);
509a0d78 1036 qemu_wait_io_event_common(cpu);
296af7c9
BS
1037}
1038
7e97cd88 1039static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1040{
48a106bd 1041 CPUState *cpu = arg;
84b4915d 1042 int r;
296af7c9 1043
ab28bd23
PB
1044 rcu_register_thread();
1045
2e7f7a3c 1046 qemu_mutex_lock_iothread();
814e612e 1047 qemu_thread_get_self(cpu->thread);
9f09e18a 1048 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1049 cpu->can_do_io = 1;
4917cf44 1050 current_cpu = cpu;
296af7c9 1051
504134d2 1052 r = kvm_init_vcpu(cpu);
84b4915d
JK
1053 if (r < 0) {
1054 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1055 exit(1);
1056 }
296af7c9 1057
13618e05 1058 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1059
1060 /* signal CPU creation */
61a46217 1061 cpu->created = true;
296af7c9
BS
1062 qemu_cond_signal(&qemu_cpu_cond);
1063
296af7c9 1064 while (1) {
a1fcaa73 1065 if (cpu_can_run(cpu)) {
1458c363 1066 r = kvm_cpu_exec(cpu);
83f338f7 1067 if (r == EXCP_DEBUG) {
91325046 1068 cpu_handle_guest_debug(cpu);
83f338f7 1069 }
0ab07c62 1070 }
fd529e8f 1071 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
1072 }
1073
1074 return NULL;
1075}
1076
c7f0f3b1
AL
1077static void *qemu_dummy_cpu_thread_fn(void *arg)
1078{
1079#ifdef _WIN32
1080 fprintf(stderr, "qtest is not supported under Windows\n");
1081 exit(1);
1082#else
10a9021d 1083 CPUState *cpu = arg;
c7f0f3b1
AL
1084 sigset_t waitset;
1085 int r;
1086
ab28bd23
PB
1087 rcu_register_thread();
1088
c7f0f3b1 1089 qemu_mutex_lock_iothread();
814e612e 1090 qemu_thread_get_self(cpu->thread);
9f09e18a 1091 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1092 cpu->can_do_io = 1;
c7f0f3b1
AL
1093
1094 sigemptyset(&waitset);
1095 sigaddset(&waitset, SIG_IPI);
1096
1097 /* signal CPU creation */
61a46217 1098 cpu->created = true;
c7f0f3b1
AL
1099 qemu_cond_signal(&qemu_cpu_cond);
1100
4917cf44 1101 current_cpu = cpu;
c7f0f3b1 1102 while (1) {
4917cf44 1103 current_cpu = NULL;
c7f0f3b1
AL
1104 qemu_mutex_unlock_iothread();
1105 do {
1106 int sig;
1107 r = sigwait(&waitset, &sig);
1108 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1109 if (r == -1) {
1110 perror("sigwait");
1111 exit(1);
1112 }
1113 qemu_mutex_lock_iothread();
4917cf44 1114 current_cpu = cpu;
509a0d78 1115 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1116 }
1117
1118 return NULL;
1119#endif
1120}
1121
bdb7ca67
JK
1122static void tcg_exec_all(void);
1123
7e97cd88 1124static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1125{
c3586ba7 1126 CPUState *cpu = arg;
296af7c9 1127
ab28bd23
PB
1128 rcu_register_thread();
1129
2e7f7a3c 1130 qemu_mutex_lock_iothread();
814e612e 1131 qemu_thread_get_self(cpu->thread);
296af7c9 1132
38fcbd3f
AF
1133 CPU_FOREACH(cpu) {
1134 cpu->thread_id = qemu_get_thread_id();
1135 cpu->created = true;
626cf8f4 1136 cpu->can_do_io = 1;
38fcbd3f 1137 }
296af7c9
BS
1138 qemu_cond_signal(&qemu_cpu_cond);
1139
fa7d1867 1140 /* wait for initial kick-off after machine start */
c28e399c 1141 while (first_cpu->stopped) {
d5f8d613 1142 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1143
1144 /* process any pending work */
bdc44640 1145 CPU_FOREACH(cpu) {
182735ef 1146 qemu_wait_io_event_common(cpu);
8e564b4e 1147 }
0ab07c62 1148 }
296af7c9 1149
21618b3e 1150 /* process any pending work */
aed807c8 1151 atomic_mb_set(&exit_request, 1);
21618b3e 1152
296af7c9 1153 while (1) {
bdb7ca67 1154 tcg_exec_all();
ac70aafc
AB
1155
1156 if (use_icount) {
40daca54 1157 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1158
1159 if (deadline == 0) {
40daca54 1160 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1161 }
3b2319a3 1162 }
d5f8d613 1163 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
296af7c9
BS
1164 }
1165
1166 return NULL;
1167}
1168
2ff09a40 1169static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1170{
1171#ifndef _WIN32
1172 int err;
1173
e0c38211
PB
1174 if (cpu->thread_kicked) {
1175 return;
9102deda 1176 }
e0c38211 1177 cpu->thread_kicked = true;
814e612e 1178 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1179 if (err) {
1180 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1181 exit(1);
1182 }
1183#else /* _WIN32 */
e0c38211
PB
1184 abort();
1185#endif
1186}
ed9164a3 1187
e0c38211
PB
1188static void qemu_cpu_kick_no_halt(void)
1189{
1190 CPUState *cpu;
1191 /* Ensure whatever caused the exit has reached the CPU threads before
1192 * writing exit_request.
1193 */
1194 atomic_mb_set(&exit_request, 1);
1195 cpu = atomic_mb_read(&tcg_current_cpu);
1196 if (cpu) {
1197 cpu_exit(cpu);
cc015e9a 1198 }
cc015e9a
PB
1199}
1200
c08d7424 1201void qemu_cpu_kick(CPUState *cpu)
296af7c9 1202{
f5c121b8 1203 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1204 if (tcg_enabled()) {
1205 qemu_cpu_kick_no_halt();
1206 } else {
1207 qemu_cpu_kick_thread(cpu);
1208 }
296af7c9
BS
1209}
1210
46d62fac 1211void qemu_cpu_kick_self(void)
296af7c9 1212{
4917cf44 1213 assert(current_cpu);
9102deda 1214 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1215}
1216
60e82579 1217bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1218{
814e612e 1219 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1220}
1221
79e2b9ae 1222bool qemu_in_vcpu_thread(void)
aa723c23 1223{
4917cf44 1224 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1225}
1226
afbe7053
PB
1227static __thread bool iothread_locked = false;
1228
1229bool qemu_mutex_iothread_locked(void)
1230{
1231 return iothread_locked;
1232}
1233
296af7c9
BS
1234void qemu_mutex_lock_iothread(void)
1235{
21618b3e 1236 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1237 /* In the simple case there is no need to bump the VCPU thread out of
1238 * TCG code execution.
1239 */
1240 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1241 !first_cpu || !first_cpu->created) {
296af7c9 1242 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1243 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1244 } else {
1a28cac3 1245 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1246 qemu_cpu_kick_no_halt();
1a28cac3
MT
1247 qemu_mutex_lock(&qemu_global_mutex);
1248 }
6b49809c 1249 atomic_dec(&iothread_requesting_mutex);
46daff13 1250 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1251 }
afbe7053 1252 iothread_locked = true;
296af7c9
BS
1253}
1254
1255void qemu_mutex_unlock_iothread(void)
1256{
afbe7053 1257 iothread_locked = false;
296af7c9
BS
1258 qemu_mutex_unlock(&qemu_global_mutex);
1259}
1260
1261static int all_vcpus_paused(void)
1262{
bdc44640 1263 CPUState *cpu;
296af7c9 1264
bdc44640 1265 CPU_FOREACH(cpu) {
182735ef 1266 if (!cpu->stopped) {
296af7c9 1267 return 0;
0ab07c62 1268 }
296af7c9
BS
1269 }
1270
1271 return 1;
1272}
1273
1274void pause_all_vcpus(void)
1275{
bdc44640 1276 CPUState *cpu;
296af7c9 1277
40daca54 1278 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1279 CPU_FOREACH(cpu) {
182735ef
AF
1280 cpu->stop = true;
1281 qemu_cpu_kick(cpu);
296af7c9
BS
1282 }
1283
aa723c23 1284 if (qemu_in_vcpu_thread()) {
d798e974
JK
1285 cpu_stop_current();
1286 if (!kvm_enabled()) {
bdc44640 1287 CPU_FOREACH(cpu) {
182735ef
AF
1288 cpu->stop = false;
1289 cpu->stopped = true;
d798e974
JK
1290 }
1291 return;
1292 }
1293 }
1294
296af7c9 1295 while (!all_vcpus_paused()) {
be7d6c57 1296 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1297 CPU_FOREACH(cpu) {
182735ef 1298 qemu_cpu_kick(cpu);
296af7c9
BS
1299 }
1300 }
1301}
1302
2993683b
IM
1303void cpu_resume(CPUState *cpu)
1304{
1305 cpu->stop = false;
1306 cpu->stopped = false;
1307 qemu_cpu_kick(cpu);
1308}
1309
296af7c9
BS
1310void resume_all_vcpus(void)
1311{
bdc44640 1312 CPUState *cpu;
296af7c9 1313
40daca54 1314 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1315 CPU_FOREACH(cpu) {
182735ef 1316 cpu_resume(cpu);
296af7c9
BS
1317 }
1318}
1319
4900116e
DDAG
1320/* For temporary buffers for forming a name */
1321#define VCPU_THREAD_NAME_SIZE 16
1322
e5ab30a2 1323static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1324{
4900116e 1325 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1326 static QemuCond *tcg_halt_cond;
1327 static QemuThread *tcg_cpu_thread;
4900116e 1328
296af7c9
BS
1329 /* share a single thread for all cpus with TCG */
1330 if (!tcg_cpu_thread) {
814e612e 1331 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1332 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1333 qemu_cond_init(cpu->halt_cond);
1334 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1335 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1336 cpu->cpu_index);
1337 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1338 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1339#ifdef _WIN32
814e612e 1340 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1341#endif
61a46217 1342 while (!cpu->created) {
18a85728 1343 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1344 }
814e612e 1345 tcg_cpu_thread = cpu->thread;
296af7c9 1346 } else {
814e612e 1347 cpu->thread = tcg_cpu_thread;
f5c121b8 1348 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1349 }
1350}
1351
48a106bd 1352static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1353{
4900116e
DDAG
1354 char thread_name[VCPU_THREAD_NAME_SIZE];
1355
814e612e 1356 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1357 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1358 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1359 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1360 cpu->cpu_index);
1361 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1362 cpu, QEMU_THREAD_JOINABLE);
61a46217 1363 while (!cpu->created) {
18a85728 1364 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1365 }
296af7c9
BS
1366}
1367
10a9021d 1368static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1369{
4900116e
DDAG
1370 char thread_name[VCPU_THREAD_NAME_SIZE];
1371
814e612e 1372 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1373 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1374 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1375 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1376 cpu->cpu_index);
1377 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1378 QEMU_THREAD_JOINABLE);
61a46217 1379 while (!cpu->created) {
c7f0f3b1
AL
1380 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1381 }
1382}
1383
c643bed9 1384void qemu_init_vcpu(CPUState *cpu)
296af7c9 1385{
ce3960eb
AF
1386 cpu->nr_cores = smp_cores;
1387 cpu->nr_threads = smp_threads;
f324e766 1388 cpu->stopped = true;
56943e8c
PM
1389
1390 if (!cpu->as) {
1391 /* If the target cpu hasn't set up any address spaces itself,
1392 * give it the default one.
1393 */
6731d864
PC
1394 AddressSpace *as = address_space_init_shareable(cpu->memory,
1395 "cpu-memory");
12ebc9a7 1396 cpu->num_ases = 1;
6731d864 1397 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1398 }
1399
0ab07c62 1400 if (kvm_enabled()) {
48a106bd 1401 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1402 } else if (tcg_enabled()) {
e5ab30a2 1403 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1404 } else {
10a9021d 1405 qemu_dummy_start_vcpu(cpu);
0ab07c62 1406 }
296af7c9
BS
1407}
1408
b4a3d965 1409void cpu_stop_current(void)
296af7c9 1410{
4917cf44
AF
1411 if (current_cpu) {
1412 current_cpu->stop = false;
1413 current_cpu->stopped = true;
1414 cpu_exit(current_cpu);
96bce683 1415 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1416 }
296af7c9
BS
1417}
1418
56983463 1419int vm_stop(RunState state)
296af7c9 1420{
aa723c23 1421 if (qemu_in_vcpu_thread()) {
74892d24 1422 qemu_system_vmstop_request_prepare();
1dfb4dd9 1423 qemu_system_vmstop_request(state);
296af7c9
BS
1424 /*
1425 * FIXME: should not return to device code in case
1426 * vm_stop() has been requested.
1427 */
b4a3d965 1428 cpu_stop_current();
56983463 1429 return 0;
296af7c9 1430 }
56983463
KW
1431
1432 return do_vm_stop(state);
296af7c9
BS
1433}
1434
8a9236f1
LC
1435/* does a state transition even if the VM is already stopped,
1436 current state is forgotten forever */
56983463 1437int vm_stop_force_state(RunState state)
8a9236f1
LC
1438{
1439 if (runstate_is_running()) {
56983463 1440 return vm_stop(state);
8a9236f1
LC
1441 } else {
1442 runstate_set(state);
b2780d32
WC
1443
1444 bdrv_drain_all();
594a45ce
KW
1445 /* Make sure to return an error if the flush in a previous vm_stop()
1446 * failed. */
da31d594 1447 return blk_flush_all();
8a9236f1
LC
1448 }
1449}
1450
8b427044
PD
1451static int64_t tcg_get_icount_limit(void)
1452{
1453 int64_t deadline;
1454
1455 if (replay_mode != REPLAY_MODE_PLAY) {
1456 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1457
1458 /* Maintain prior (possibly buggy) behaviour where if no deadline
1459 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1460 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1461 * nanoseconds.
1462 */
1463 if ((deadline < 0) || (deadline > INT32_MAX)) {
1464 deadline = INT32_MAX;
1465 }
1466
1467 return qemu_icount_round(deadline);
1468 } else {
1469 return replay_get_instructions();
1470 }
1471}
1472
3d57f789 1473static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1474{
1475 int ret;
1476#ifdef CONFIG_PROFILER
1477 int64_t ti;
1478#endif
1479
1480#ifdef CONFIG_PROFILER
1481 ti = profile_getclock();
1482#endif
1483 if (use_icount) {
1484 int64_t count;
1485 int decr;
c96778bb
FK
1486 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1487 + cpu->icount_extra);
28ecfd7a 1488 cpu->icount_decr.u16.low = 0;
efee7340 1489 cpu->icount_extra = 0;
8b427044 1490 count = tcg_get_icount_limit();
c96778bb 1491 timers_state.qemu_icount += count;
296af7c9
BS
1492 decr = (count > 0xffff) ? 0xffff : count;
1493 count -= decr;
28ecfd7a 1494 cpu->icount_decr.u16.low = decr;
efee7340 1495 cpu->icount_extra = count;
296af7c9 1496 }
ea3e9847 1497 ret = cpu_exec(cpu);
296af7c9 1498#ifdef CONFIG_PROFILER
89d5cbdd 1499 tcg_time += profile_getclock() - ti;
296af7c9
BS
1500#endif
1501 if (use_icount) {
1502 /* Fold pending instructions back into the
1503 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1504 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1505 + cpu->icount_extra);
28ecfd7a 1506 cpu->icount_decr.u32 = 0;
efee7340 1507 cpu->icount_extra = 0;
8b427044 1508 replay_account_executed_instructions();
296af7c9
BS
1509 }
1510 return ret;
1511}
1512
bdb7ca67 1513static void tcg_exec_all(void)
296af7c9 1514{
9a36085b
JK
1515 int r;
1516
40daca54 1517 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
e76d1798 1518 qemu_account_warp_timer();
ab33fcda 1519
0ab07c62 1520 if (next_cpu == NULL) {
296af7c9 1521 next_cpu = first_cpu;
0ab07c62 1522 }
bdc44640 1523 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1524 CPUState *cpu = next_cpu;
296af7c9 1525
40daca54 1526 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1527 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1528
a1fcaa73 1529 if (cpu_can_run(cpu)) {
3d57f789 1530 r = tcg_cpu_exec(cpu);
9a36085b 1531 if (r == EXCP_DEBUG) {
91325046 1532 cpu_handle_guest_debug(cpu);
3c638d06
JK
1533 break;
1534 }
f324e766 1535 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1536 break;
1537 }
1538 }
aed807c8
PB
1539
1540 /* Pairs with smp_wmb in qemu_cpu_kick. */
1541 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1542}
1543
9a78eead 1544void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1545{
1546 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1547#if defined(cpu_list)
1548 cpu_list(f, cpu_fprintf);
262353cb
BS
1549#endif
1550}
de0b36b6
LC
1551
1552CpuInfoList *qmp_query_cpus(Error **errp)
1553{
1554 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1555 CPUState *cpu;
de0b36b6 1556
bdc44640 1557 CPU_FOREACH(cpu) {
de0b36b6 1558 CpuInfoList *info;
182735ef
AF
1559#if defined(TARGET_I386)
1560 X86CPU *x86_cpu = X86_CPU(cpu);
1561 CPUX86State *env = &x86_cpu->env;
1562#elif defined(TARGET_PPC)
1563 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1564 CPUPPCState *env = &ppc_cpu->env;
1565#elif defined(TARGET_SPARC)
1566 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1567 CPUSPARCState *env = &sparc_cpu->env;
1568#elif defined(TARGET_MIPS)
1569 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1570 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1571#elif defined(TARGET_TRICORE)
1572 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1573 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1574#endif
de0b36b6 1575
cb446eca 1576 cpu_synchronize_state(cpu);
de0b36b6
LC
1577
1578 info = g_malloc0(sizeof(*info));
1579 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1580 info->value->CPU = cpu->cpu_index;
182735ef 1581 info->value->current = (cpu == first_cpu);
259186a7 1582 info->value->halted = cpu->halted;
58f88d4b 1583 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1584 info->value->thread_id = cpu->thread_id;
de0b36b6 1585#if defined(TARGET_I386)
86f4b687 1586 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1587 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1588#elif defined(TARGET_PPC)
86f4b687 1589 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1590 info->value->u.ppc.nip = env->nip;
de0b36b6 1591#elif defined(TARGET_SPARC)
86f4b687 1592 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1593 info->value->u.q_sparc.pc = env->pc;
1594 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1595#elif defined(TARGET_MIPS)
86f4b687 1596 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1597 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1598#elif defined(TARGET_TRICORE)
86f4b687 1599 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1600 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1601#else
1602 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1603#endif
1604
1605 /* XXX: waiting for the qapi to support GSList */
1606 if (!cur_item) {
1607 head = cur_item = info;
1608 } else {
1609 cur_item->next = info;
1610 cur_item = info;
1611 }
1612 }
1613
1614 return head;
1615}
0cfd6a9a
LC
1616
1617void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1618 bool has_cpu, int64_t cpu_index, Error **errp)
1619{
1620 FILE *f;
1621 uint32_t l;
55e5c285 1622 CPUState *cpu;
0cfd6a9a 1623 uint8_t buf[1024];
0dc9daf0 1624 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1625
1626 if (!has_cpu) {
1627 cpu_index = 0;
1628 }
1629
151d1322
AF
1630 cpu = qemu_get_cpu(cpu_index);
1631 if (cpu == NULL) {
c6bd8c70
MA
1632 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1633 "a CPU number");
0cfd6a9a
LC
1634 return;
1635 }
1636
1637 f = fopen(filename, "wb");
1638 if (!f) {
618da851 1639 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1640 return;
1641 }
1642
1643 while (size != 0) {
1644 l = sizeof(buf);
1645 if (l > size)
1646 l = size;
2f4d0f59 1647 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1648 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1649 " specified", orig_addr, orig_size);
2f4d0f59
AK
1650 goto exit;
1651 }
0cfd6a9a 1652 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1653 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1654 goto exit;
1655 }
1656 addr += l;
1657 size -= l;
1658 }
1659
1660exit:
1661 fclose(f);
1662}
6d3962bf
LC
1663
1664void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1665 Error **errp)
1666{
1667 FILE *f;
1668 uint32_t l;
1669 uint8_t buf[1024];
1670
1671 f = fopen(filename, "wb");
1672 if (!f) {
618da851 1673 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1674 return;
1675 }
1676
1677 while (size != 0) {
1678 l = sizeof(buf);
1679 if (l > size)
1680 l = size;
eb6282f2 1681 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1682 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1683 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1684 goto exit;
1685 }
1686 addr += l;
1687 size -= l;
1688 }
1689
1690exit:
1691 fclose(f);
1692}
ab49ab5c
LC
1693
1694void qmp_inject_nmi(Error **errp)
1695{
9cb805fd 1696 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1697}
27498bef
ST
1698
1699void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1700{
1701 if (!use_icount) {
1702 return;
1703 }
1704
1705 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1706 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1707 if (icount_align_option) {
1708 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1709 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1710 } else {
1711 cpu_fprintf(f, "Max guest delay NA\n");
1712 cpu_fprintf(f, "Max guest advance NA\n");
1713 }
1714}