]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
tcg: cpus rm tcg_exec_all()
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615
PB
35#include "sysemu/dma.h"
36#include "sysemu/kvm.h"
de0b36b6 37#include "qmp-commands.h"
63c91552 38#include "exec/exec-all.h"
296af7c9 39
1de7afc9 40#include "qemu/thread.h"
9c17d615
PB
41#include "sysemu/cpus.h"
42#include "sysemu/qtest.h"
1de7afc9
PB
43#include "qemu/main-loop.h"
44#include "qemu/bitmap.h"
cb365646 45#include "qemu/seqlock.h"
a4e15de9 46#include "qapi-event.h"
9cb805fd 47#include "hw/nmi.h"
8b427044 48#include "sysemu/replay.h"
0ff0fc19
JK
49
50#ifndef _WIN32
1de7afc9 51#include "qemu/compatfd.h"
0ff0fc19 52#endif
296af7c9 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
27498bef
ST
72int64_t max_delay;
73int64_t max_advance;
296af7c9 74
2adcc85d
JH
75/* vcpu throttling controls */
76static QEMUTimer *throttle_timer;
77static unsigned int throttle_percentage;
78
79#define CPU_THROTTLE_PCT_MIN 1
80#define CPU_THROTTLE_PCT_MAX 99
81#define CPU_THROTTLE_TIMESLICE_NS 10000000
82
321bc0b2
TC
83bool cpu_is_stopped(CPUState *cpu)
84{
85 return cpu->stopped || !runstate_is_running();
86}
87
a98ae1d8 88static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 89{
c64ca814 90 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
91 return false;
92 }
321bc0b2 93 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
94 return true;
95 }
8c2e1b00 96 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 97 kvm_halt_in_kernel()) {
ac873f1e
PM
98 return false;
99 }
100 return true;
101}
102
103static bool all_cpu_threads_idle(void)
104{
182735ef 105 CPUState *cpu;
ac873f1e 106
bdc44640 107 CPU_FOREACH(cpu) {
182735ef 108 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
109 return false;
110 }
111 }
112 return true;
113}
114
946fb27c
PB
115/***********************************************************/
116/* guest cycle counter */
117
a3270e19
PB
118/* Protected by TimersState seqlock */
119
5045e9d9 120static bool icount_sleep = true;
71468395 121static int64_t vm_clock_warp_start = -1;
946fb27c
PB
122/* Conversion factor from emulated instructions to virtual clock ticks. */
123static int icount_time_shift;
124/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125#define MAX_ICOUNT_SHIFT 10
a3270e19 126
946fb27c
PB
127static QEMUTimer *icount_rt_timer;
128static QEMUTimer *icount_vm_timer;
129static QEMUTimer *icount_warp_timer;
946fb27c
PB
130
131typedef struct TimersState {
cb365646 132 /* Protected by BQL. */
946fb27c
PB
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
cb365646
LPF
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
c96778bb
FK
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
946fb27c
PB
148} TimersState;
149
d9cd4007 150static TimersState timers_state;
946fb27c 151
2a62914b 152int64_t cpu_get_icount_raw(void)
946fb27c
PB
153{
154 int64_t icount;
4917cf44 155 CPUState *cpu = current_cpu;
946fb27c 156
c96778bb 157 icount = timers_state.qemu_icount;
4917cf44 158 if (cpu) {
414b15c9 159 if (!cpu->can_do_io) {
2a62914b
PD
160 fprintf(stderr, "Bad icount read\n");
161 exit(1);
946fb27c 162 }
28ecfd7a 163 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 164 }
2a62914b
PD
165 return icount;
166}
167
168/* Return the virtual CPU time, based on the instruction counter. */
169static int64_t cpu_get_icount_locked(void)
170{
171 int64_t icount = cpu_get_icount_raw();
3f031313 172 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
173}
174
17a15f1b
PB
175int64_t cpu_get_icount(void)
176{
177 int64_t icount;
178 unsigned start;
179
180 do {
181 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
182 icount = cpu_get_icount_locked();
183 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
184
185 return icount;
186}
187
3f031313
FK
188int64_t cpu_icount_to_ns(int64_t icount)
189{
190 return icount << icount_time_shift;
191}
192
d90f3cca
C
193/* return the time elapsed in VM between vm_start and vm_stop. Unless
194 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
195 * counter.
196 *
197 * Caller must hold the BQL
198 */
946fb27c
PB
199int64_t cpu_get_ticks(void)
200{
5f3e3101
PB
201 int64_t ticks;
202
946fb27c
PB
203 if (use_icount) {
204 return cpu_get_icount();
205 }
5f3e3101
PB
206
207 ticks = timers_state.cpu_ticks_offset;
208 if (timers_state.cpu_ticks_enabled) {
4a7428c5 209 ticks += cpu_get_host_ticks();
5f3e3101
PB
210 }
211
212 if (timers_state.cpu_ticks_prev > ticks) {
213 /* Note: non increasing ticks may happen if the host uses
214 software suspend */
215 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
216 ticks = timers_state.cpu_ticks_prev;
946fb27c 217 }
5f3e3101
PB
218
219 timers_state.cpu_ticks_prev = ticks;
220 return ticks;
946fb27c
PB
221}
222
cb365646 223static int64_t cpu_get_clock_locked(void)
946fb27c 224{
1d45cea5 225 int64_t time;
cb365646 226
1d45cea5 227 time = timers_state.cpu_clock_offset;
5f3e3101 228 if (timers_state.cpu_ticks_enabled) {
1d45cea5 229 time += get_clock();
946fb27c 230 }
cb365646 231
1d45cea5 232 return time;
cb365646
LPF
233}
234
d90f3cca 235/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
236 * the time between vm_start and vm_stop
237 */
cb365646
LPF
238int64_t cpu_get_clock(void)
239{
240 int64_t ti;
241 unsigned start;
242
243 do {
244 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
245 ti = cpu_get_clock_locked();
246 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
247
248 return ti;
946fb27c
PB
249}
250
cb365646 251/* enable cpu_get_ticks()
3224e878 252 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 253 */
946fb27c
PB
254void cpu_enable_ticks(void)
255{
cb365646 256 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 257 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 258 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 259 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
260 timers_state.cpu_clock_offset -= get_clock();
261 timers_state.cpu_ticks_enabled = 1;
262 }
03719e44 263 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
264}
265
266/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 267 * cpu_get_ticks() after that.
3224e878 268 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 269 */
946fb27c
PB
270void cpu_disable_ticks(void)
271{
cb365646 272 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 273 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 274 if (timers_state.cpu_ticks_enabled) {
4a7428c5 275 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 276 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
277 timers_state.cpu_ticks_enabled = 0;
278 }
03719e44 279 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
280}
281
282/* Correlation between real and virtual time is always going to be
283 fairly approximate, so ignore small variation.
284 When the guest is idle real and virtual time will be aligned in
285 the IO wait loop. */
73bcb24d 286#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
287
288static void icount_adjust(void)
289{
290 int64_t cur_time;
291 int64_t cur_icount;
292 int64_t delta;
a3270e19
PB
293
294 /* Protected by TimersState mutex. */
946fb27c 295 static int64_t last_delta;
468cc7cf 296
946fb27c
PB
297 /* If the VM is not running, then do nothing. */
298 if (!runstate_is_running()) {
299 return;
300 }
468cc7cf 301
03719e44 302 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
303 cur_time = cpu_get_clock_locked();
304 cur_icount = cpu_get_icount_locked();
468cc7cf 305
946fb27c
PB
306 delta = cur_icount - cur_time;
307 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
308 if (delta > 0
309 && last_delta + ICOUNT_WOBBLE < delta * 2
310 && icount_time_shift > 0) {
311 /* The guest is getting too far ahead. Slow time down. */
312 icount_time_shift--;
313 }
314 if (delta < 0
315 && last_delta - ICOUNT_WOBBLE > delta * 2
316 && icount_time_shift < MAX_ICOUNT_SHIFT) {
317 /* The guest is getting too far behind. Speed time up. */
318 icount_time_shift++;
319 }
320 last_delta = delta;
c96778bb
FK
321 timers_state.qemu_icount_bias = cur_icount
322 - (timers_state.qemu_icount << icount_time_shift);
03719e44 323 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
324}
325
326static void icount_adjust_rt(void *opaque)
327{
40daca54 328 timer_mod(icount_rt_timer,
1979b908 329 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
330 icount_adjust();
331}
332
333static void icount_adjust_vm(void *opaque)
334{
40daca54
AB
335 timer_mod(icount_vm_timer,
336 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 337 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
338 icount_adjust();
339}
340
341static int64_t qemu_icount_round(int64_t count)
342{
343 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
344}
345
efab87cf 346static void icount_warp_rt(void)
946fb27c 347{
ccffff48
AB
348 unsigned seq;
349 int64_t warp_start;
350
17a15f1b
PB
351 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
352 * changes from -1 to another value, so the race here is okay.
353 */
ccffff48
AB
354 do {
355 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
356 warp_start = vm_clock_warp_start;
357 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
358
359 if (warp_start == -1) {
946fb27c
PB
360 return;
361 }
362
03719e44 363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 364 if (runstate_is_running()) {
8eda206e
PD
365 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
366 cpu_get_clock_locked());
8ed961d9
PB
367 int64_t warp_delta;
368
369 warp_delta = clock - vm_clock_warp_start;
370 if (use_icount == 2) {
946fb27c 371 /*
40daca54 372 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
373 * far ahead of real time.
374 */
17a15f1b 375 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 376 int64_t delta = clock - cur_icount;
8ed961d9 377 warp_delta = MIN(warp_delta, delta);
946fb27c 378 }
c96778bb 379 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
380 }
381 vm_clock_warp_start = -1;
03719e44 382 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
383
384 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
385 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
386 }
946fb27c
PB
387}
388
e76d1798 389static void icount_timer_cb(void *opaque)
efab87cf 390{
e76d1798
PD
391 /* No need for a checkpoint because the timer already synchronizes
392 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
393 */
394 icount_warp_rt();
efab87cf
PD
395}
396
8156be56
PB
397void qtest_clock_warp(int64_t dest)
398{
40daca54 399 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 400 AioContext *aio_context;
8156be56 401 assert(qtest_enabled());
efef88b3 402 aio_context = qemu_get_aio_context();
8156be56 403 while (clock < dest) {
40daca54 404 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 405 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 406
03719e44 407 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 408 timers_state.qemu_icount_bias += warp;
03719e44 409 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 410
40daca54 411 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 412 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 413 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 414 }
40daca54 415 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
416}
417
e76d1798 418void qemu_start_warp_timer(void)
946fb27c 419{
ce78d18c 420 int64_t clock;
946fb27c
PB
421 int64_t deadline;
422
e76d1798 423 if (!use_icount) {
946fb27c
PB
424 return;
425 }
426
8bd7f71d
PD
427 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
428 * do not fire, so computing the deadline does not make sense.
429 */
430 if (!runstate_is_running()) {
431 return;
432 }
433
434 /* warp clock deterministically in record/replay mode */
e76d1798 435 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
436 return;
437 }
438
ce78d18c 439 if (!all_cpu_threads_idle()) {
946fb27c
PB
440 return;
441 }
442
8156be56
PB
443 if (qtest_enabled()) {
444 /* When testing, qtest commands advance icount. */
e76d1798 445 return;
8156be56
PB
446 }
447
ac70aafc 448 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 449 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 450 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 451 if (deadline < 0) {
d7a0f71d
VC
452 static bool notified;
453 if (!icount_sleep && !notified) {
454 error_report("WARNING: icount sleep disabled and no active timers");
455 notified = true;
456 }
ce78d18c 457 return;
ac70aafc
AB
458 }
459
946fb27c
PB
460 if (deadline > 0) {
461 /*
40daca54 462 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
463 * sleep. Otherwise, the CPU might be waiting for a future timer
464 * interrupt to wake it up, but the interrupt never comes because
465 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 466 * QEMU_CLOCK_VIRTUAL.
946fb27c 467 */
5045e9d9
VC
468 if (!icount_sleep) {
469 /*
470 * We never let VCPUs sleep in no sleep icount mode.
471 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
472 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
473 * It is useful when we want a deterministic execution time,
474 * isolated from host latencies.
475 */
03719e44 476 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 477 timers_state.qemu_icount_bias += deadline;
03719e44 478 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
479 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
480 } else {
481 /*
482 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
483 * "real" time, (related to the time left until the next event) has
484 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
485 * This avoids that the warps are visible externally; for example,
486 * you will not be sending network packets continuously instead of
487 * every 100ms.
488 */
03719e44 489 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
490 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
491 vm_clock_warp_start = clock;
492 }
03719e44 493 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 494 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 495 }
ac70aafc 496 } else if (deadline == 0) {
40daca54 497 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
498 }
499}
500
e76d1798
PD
501static void qemu_account_warp_timer(void)
502{
503 if (!use_icount || !icount_sleep) {
504 return;
505 }
506
507 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
508 * do not fire, so computing the deadline does not make sense.
509 */
510 if (!runstate_is_running()) {
511 return;
512 }
513
514 /* warp clock deterministically in record/replay mode */
515 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
516 return;
517 }
518
519 timer_del(icount_warp_timer);
520 icount_warp_rt();
521}
522
d09eae37
FK
523static bool icount_state_needed(void *opaque)
524{
525 return use_icount;
526}
527
528/*
529 * This is a subsection for icount migration.
530 */
531static const VMStateDescription icount_vmstate_timers = {
532 .name = "timer/icount",
533 .version_id = 1,
534 .minimum_version_id = 1,
5cd8cada 535 .needed = icount_state_needed,
d09eae37
FK
536 .fields = (VMStateField[]) {
537 VMSTATE_INT64(qemu_icount_bias, TimersState),
538 VMSTATE_INT64(qemu_icount, TimersState),
539 VMSTATE_END_OF_LIST()
540 }
541};
542
946fb27c
PB
543static const VMStateDescription vmstate_timers = {
544 .name = "timer",
545 .version_id = 2,
546 .minimum_version_id = 1,
35d08458 547 .fields = (VMStateField[]) {
946fb27c
PB
548 VMSTATE_INT64(cpu_ticks_offset, TimersState),
549 VMSTATE_INT64(dummy, TimersState),
550 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
551 VMSTATE_END_OF_LIST()
d09eae37 552 },
5cd8cada
JQ
553 .subsections = (const VMStateDescription*[]) {
554 &icount_vmstate_timers,
555 NULL
946fb27c
PB
556 }
557};
558
e0eeb4a2 559static void cpu_throttle_thread(CPUState *cpu, void *opaque)
2adcc85d 560{
2adcc85d
JH
561 double pct;
562 double throttle_ratio;
563 long sleeptime_ns;
564
565 if (!cpu_throttle_get_percentage()) {
566 return;
567 }
568
569 pct = (double)cpu_throttle_get_percentage()/100;
570 throttle_ratio = pct / (1 - pct);
571 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
572
573 qemu_mutex_unlock_iothread();
574 atomic_set(&cpu->throttle_thread_scheduled, 0);
575 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
576 qemu_mutex_lock_iothread();
577}
578
579static void cpu_throttle_timer_tick(void *opaque)
580{
581 CPUState *cpu;
582 double pct;
583
584 /* Stop the timer if needed */
585 if (!cpu_throttle_get_percentage()) {
586 return;
587 }
588 CPU_FOREACH(cpu) {
589 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
e0eeb4a2 590 async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
2adcc85d
JH
591 }
592 }
593
594 pct = (double)cpu_throttle_get_percentage()/100;
595 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
596 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
597}
598
599void cpu_throttle_set(int new_throttle_pct)
600{
601 /* Ensure throttle percentage is within valid range */
602 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
603 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
604
605 atomic_set(&throttle_percentage, new_throttle_pct);
606
607 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
608 CPU_THROTTLE_TIMESLICE_NS);
609}
610
611void cpu_throttle_stop(void)
612{
613 atomic_set(&throttle_percentage, 0);
614}
615
616bool cpu_throttle_active(void)
617{
618 return (cpu_throttle_get_percentage() != 0);
619}
620
621int cpu_throttle_get_percentage(void)
622{
623 return atomic_read(&throttle_percentage);
624}
625
4603ea01
PD
626void cpu_ticks_init(void)
627{
ccdb3c1f 628 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 629 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
630 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
631 cpu_throttle_timer_tick, NULL);
4603ea01
PD
632}
633
1ad9580b 634void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 635{
1ad9580b 636 const char *option;
a8bfac37 637 char *rem_str = NULL;
1ad9580b 638
1ad9580b 639 option = qemu_opt_get(opts, "shift");
946fb27c 640 if (!option) {
a8bfac37
ST
641 if (qemu_opt_get(opts, "align") != NULL) {
642 error_setg(errp, "Please specify shift option when using align");
643 }
946fb27c
PB
644 return;
645 }
f1f4b57e
VC
646
647 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
648 if (icount_sleep) {
649 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 650 icount_timer_cb, NULL);
5045e9d9 651 }
f1f4b57e 652
a8bfac37 653 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
654
655 if (icount_align_option && !icount_sleep) {
778d9f9b 656 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 657 }
946fb27c 658 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
659 errno = 0;
660 icount_time_shift = strtol(option, &rem_str, 0);
661 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
662 error_setg(errp, "icount: Invalid shift value");
663 }
946fb27c
PB
664 use_icount = 1;
665 return;
a8bfac37
ST
666 } else if (icount_align_option) {
667 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 668 } else if (!icount_sleep) {
778d9f9b 669 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
670 }
671
672 use_icount = 2;
673
674 /* 125MIPS seems a reasonable initial guess at the guest speed.
675 It will be corrected fairly quickly anyway. */
676 icount_time_shift = 3;
677
678 /* Have both realtime and virtual time triggers for speed adjustment.
679 The realtime trigger catches emulated time passing too slowly,
680 the virtual time trigger catches emulated time passing too fast.
681 Realtime triggers occur even when idle, so use them less frequently
682 than VM triggers. */
bf2a7ddb
PD
683 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
684 icount_adjust_rt, NULL);
40daca54 685 timer_mod(icount_rt_timer,
bf2a7ddb 686 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
687 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
688 icount_adjust_vm, NULL);
689 timer_mod(icount_vm_timer,
690 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 691 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
692}
693
296af7c9
BS
694/***********************************************************/
695void hw_error(const char *fmt, ...)
696{
697 va_list ap;
55e5c285 698 CPUState *cpu;
296af7c9
BS
699
700 va_start(ap, fmt);
701 fprintf(stderr, "qemu: hardware error: ");
702 vfprintf(stderr, fmt, ap);
703 fprintf(stderr, "\n");
bdc44640 704 CPU_FOREACH(cpu) {
55e5c285 705 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 706 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
707 }
708 va_end(ap);
709 abort();
710}
711
712void cpu_synchronize_all_states(void)
713{
182735ef 714 CPUState *cpu;
296af7c9 715
bdc44640 716 CPU_FOREACH(cpu) {
182735ef 717 cpu_synchronize_state(cpu);
296af7c9
BS
718 }
719}
720
721void cpu_synchronize_all_post_reset(void)
722{
182735ef 723 CPUState *cpu;
296af7c9 724
bdc44640 725 CPU_FOREACH(cpu) {
182735ef 726 cpu_synchronize_post_reset(cpu);
296af7c9
BS
727 }
728}
729
730void cpu_synchronize_all_post_init(void)
731{
182735ef 732 CPUState *cpu;
296af7c9 733
bdc44640 734 CPU_FOREACH(cpu) {
182735ef 735 cpu_synchronize_post_init(cpu);
296af7c9
BS
736 }
737}
738
56983463 739static int do_vm_stop(RunState state)
296af7c9 740{
56983463
KW
741 int ret = 0;
742
1354869c 743 if (runstate_is_running()) {
296af7c9 744 cpu_disable_ticks();
296af7c9 745 pause_all_vcpus();
f5bbfba1 746 runstate_set(state);
1dfb4dd9 747 vm_state_notify(0, state);
a4e15de9 748 qapi_event_send_stop(&error_abort);
296af7c9 749 }
56983463 750
594a45ce 751 bdrv_drain_all();
6d0ceb80 752 replay_disable_events();
22af08ea 753 ret = bdrv_flush_all();
594a45ce 754
56983463 755 return ret;
296af7c9
BS
756}
757
a1fcaa73 758static bool cpu_can_run(CPUState *cpu)
296af7c9 759{
4fdeee7c 760 if (cpu->stop) {
a1fcaa73 761 return false;
0ab07c62 762 }
321bc0b2 763 if (cpu_is_stopped(cpu)) {
a1fcaa73 764 return false;
0ab07c62 765 }
a1fcaa73 766 return true;
296af7c9
BS
767}
768
91325046 769static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 770{
64f6b346 771 gdb_set_stop_cpu(cpu);
8cf71710 772 qemu_system_debug_request();
f324e766 773 cpu->stopped = true;
3c638d06
JK
774}
775
6d9cb73c
JK
776#ifdef CONFIG_LINUX
777static void sigbus_reraise(void)
778{
779 sigset_t set;
780 struct sigaction action;
781
782 memset(&action, 0, sizeof(action));
783 action.sa_handler = SIG_DFL;
784 if (!sigaction(SIGBUS, &action, NULL)) {
785 raise(SIGBUS);
786 sigemptyset(&set);
787 sigaddset(&set, SIGBUS);
a2d1761d 788 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
789 }
790 perror("Failed to re-raise SIGBUS!\n");
791 abort();
792}
793
794static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
795 void *ctx)
796{
797 if (kvm_on_sigbus(siginfo->ssi_code,
798 (void *)(intptr_t)siginfo->ssi_addr)) {
799 sigbus_reraise();
800 }
801}
802
803static void qemu_init_sigbus(void)
804{
805 struct sigaction action;
806
807 memset(&action, 0, sizeof(action));
808 action.sa_flags = SA_SIGINFO;
809 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
810 sigaction(SIGBUS, &action, NULL);
811
812 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
813}
814
290adf38 815static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
816{
817 struct timespec ts = { 0, 0 };
818 siginfo_t siginfo;
819 sigset_t waitset;
820 sigset_t chkset;
821 int r;
822
823 sigemptyset(&waitset);
824 sigaddset(&waitset, SIG_IPI);
825 sigaddset(&waitset, SIGBUS);
826
827 do {
828 r = sigtimedwait(&waitset, &siginfo, &ts);
829 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
830 perror("sigtimedwait");
831 exit(1);
832 }
833
834 switch (r) {
835 case SIGBUS:
290adf38 836 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
837 sigbus_reraise();
838 }
839 break;
840 default:
841 break;
842 }
843
844 r = sigpending(&chkset);
845 if (r == -1) {
846 perror("sigpending");
847 exit(1);
848 }
849 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
850}
851
6d9cb73c
JK
852#else /* !CONFIG_LINUX */
853
854static void qemu_init_sigbus(void)
855{
856}
1ab3c6c0 857
290adf38 858static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
859{
860}
6d9cb73c
JK
861#endif /* !CONFIG_LINUX */
862
296af7c9 863#ifndef _WIN32
55f8d6ac
JK
864static void dummy_signal(int sig)
865{
866}
55f8d6ac 867
13618e05 868static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
869{
870 int r;
871 sigset_t set;
872 struct sigaction sigact;
873
874 memset(&sigact, 0, sizeof(sigact));
875 sigact.sa_handler = dummy_signal;
876 sigaction(SIG_IPI, &sigact, NULL);
877
714bd040
PB
878 pthread_sigmask(SIG_BLOCK, NULL, &set);
879 sigdelset(&set, SIG_IPI);
714bd040 880 sigdelset(&set, SIGBUS);
491d6e80 881 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
882 if (r) {
883 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
884 exit(1);
885 }
886}
887
55f8d6ac 888#else /* _WIN32 */
13618e05 889static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 890{
714bd040
PB
891 abort();
892}
714bd040 893#endif /* _WIN32 */
ff48eb5f 894
b2532d88 895static QemuMutex qemu_global_mutex;
46daff13 896static QemuCond qemu_io_proceeded_cond;
6b49809c 897static unsigned iothread_requesting_mutex;
296af7c9
BS
898
899static QemuThread io_thread;
900
296af7c9
BS
901/* cpu creation */
902static QemuCond qemu_cpu_cond;
903/* system init */
296af7c9
BS
904static QemuCond qemu_pause_cond;
905
d3b12f5d 906void qemu_init_cpu_loop(void)
296af7c9 907{
6d9cb73c 908 qemu_init_sigbus();
ed94592b 909 qemu_cond_init(&qemu_cpu_cond);
ed94592b 910 qemu_cond_init(&qemu_pause_cond);
46daff13 911 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 912 qemu_mutex_init(&qemu_global_mutex);
296af7c9 913
b7680cb6 914 qemu_thread_get_self(&io_thread);
296af7c9
BS
915}
916
e0eeb4a2 917void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
e82bcec2 918{
d148d90e 919 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
920}
921
4c055ab5
GZ
922static void qemu_kvm_destroy_vcpu(CPUState *cpu)
923{
924 if (kvm_destroy_vcpu(cpu) < 0) {
925 error_report("kvm_destroy_vcpu failed");
926 exit(EXIT_FAILURE);
927 }
928}
929
930static void qemu_tcg_destroy_vcpu(CPUState *cpu)
931{
932}
933
509a0d78 934static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 935{
4fdeee7c
AF
936 if (cpu->stop) {
937 cpu->stop = false;
f324e766 938 cpu->stopped = true;
96bce683 939 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 940 }
a5403c69 941 process_queued_cpu_work(cpu);
216fc9a4 942 cpu->thread_kicked = false;
296af7c9
BS
943}
944
d5f8d613 945static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 946{
16400322 947 while (all_cpu_threads_idle()) {
d5f8d613 948 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 949 }
296af7c9 950
46daff13
PB
951 while (iothread_requesting_mutex) {
952 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
953 }
6cabe1f3 954
bdc44640 955 CPU_FOREACH(cpu) {
182735ef 956 qemu_wait_io_event_common(cpu);
6cabe1f3 957 }
296af7c9
BS
958}
959
fd529e8f 960static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 961{
a98ae1d8 962 while (cpu_thread_is_idle(cpu)) {
f5c121b8 963 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 964 }
296af7c9 965
290adf38 966 qemu_kvm_eat_signals(cpu);
509a0d78 967 qemu_wait_io_event_common(cpu);
296af7c9
BS
968}
969
7e97cd88 970static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 971{
48a106bd 972 CPUState *cpu = arg;
84b4915d 973 int r;
296af7c9 974
ab28bd23
PB
975 rcu_register_thread();
976
2e7f7a3c 977 qemu_mutex_lock_iothread();
814e612e 978 qemu_thread_get_self(cpu->thread);
9f09e18a 979 cpu->thread_id = qemu_get_thread_id();
626cf8f4 980 cpu->can_do_io = 1;
4917cf44 981 current_cpu = cpu;
296af7c9 982
504134d2 983 r = kvm_init_vcpu(cpu);
84b4915d
JK
984 if (r < 0) {
985 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
986 exit(1);
987 }
296af7c9 988
13618e05 989 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
990
991 /* signal CPU creation */
61a46217 992 cpu->created = true;
296af7c9
BS
993 qemu_cond_signal(&qemu_cpu_cond);
994
4c055ab5 995 do {
a1fcaa73 996 if (cpu_can_run(cpu)) {
1458c363 997 r = kvm_cpu_exec(cpu);
83f338f7 998 if (r == EXCP_DEBUG) {
91325046 999 cpu_handle_guest_debug(cpu);
83f338f7 1000 }
0ab07c62 1001 }
fd529e8f 1002 qemu_kvm_wait_io_event(cpu);
4c055ab5 1003 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1004
4c055ab5 1005 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1006 cpu->created = false;
1007 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1008 qemu_mutex_unlock_iothread();
296af7c9
BS
1009 return NULL;
1010}
1011
c7f0f3b1
AL
1012static void *qemu_dummy_cpu_thread_fn(void *arg)
1013{
1014#ifdef _WIN32
1015 fprintf(stderr, "qtest is not supported under Windows\n");
1016 exit(1);
1017#else
10a9021d 1018 CPUState *cpu = arg;
c7f0f3b1
AL
1019 sigset_t waitset;
1020 int r;
1021
ab28bd23
PB
1022 rcu_register_thread();
1023
c7f0f3b1 1024 qemu_mutex_lock_iothread();
814e612e 1025 qemu_thread_get_self(cpu->thread);
9f09e18a 1026 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1027 cpu->can_do_io = 1;
c7f0f3b1
AL
1028
1029 sigemptyset(&waitset);
1030 sigaddset(&waitset, SIG_IPI);
1031
1032 /* signal CPU creation */
61a46217 1033 cpu->created = true;
c7f0f3b1
AL
1034 qemu_cond_signal(&qemu_cpu_cond);
1035
4917cf44 1036 current_cpu = cpu;
c7f0f3b1 1037 while (1) {
4917cf44 1038 current_cpu = NULL;
c7f0f3b1
AL
1039 qemu_mutex_unlock_iothread();
1040 do {
1041 int sig;
1042 r = sigwait(&waitset, &sig);
1043 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1044 if (r == -1) {
1045 perror("sigwait");
1046 exit(1);
1047 }
1048 qemu_mutex_lock_iothread();
4917cf44 1049 current_cpu = cpu;
509a0d78 1050 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1051 }
1052
1053 return NULL;
1054#endif
1055}
1056
1be7fcb8
AB
1057static int64_t tcg_get_icount_limit(void)
1058{
1059 int64_t deadline;
1060
1061 if (replay_mode != REPLAY_MODE_PLAY) {
1062 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1063
1064 /* Maintain prior (possibly buggy) behaviour where if no deadline
1065 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1066 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1067 * nanoseconds.
1068 */
1069 if ((deadline < 0) || (deadline > INT32_MAX)) {
1070 deadline = INT32_MAX;
1071 }
1072
1073 return qemu_icount_round(deadline);
1074 } else {
1075 return replay_get_instructions();
1076 }
1077}
1078
1079static int tcg_cpu_exec(CPUState *cpu)
1080{
1081 int ret;
1082#ifdef CONFIG_PROFILER
1083 int64_t ti;
1084#endif
1085
1086#ifdef CONFIG_PROFILER
1087 ti = profile_getclock();
1088#endif
1089 if (use_icount) {
1090 int64_t count;
1091 int decr;
1092 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1093 + cpu->icount_extra);
1094 cpu->icount_decr.u16.low = 0;
1095 cpu->icount_extra = 0;
1096 count = tcg_get_icount_limit();
1097 timers_state.qemu_icount += count;
1098 decr = (count > 0xffff) ? 0xffff : count;
1099 count -= decr;
1100 cpu->icount_decr.u16.low = decr;
1101 cpu->icount_extra = count;
1102 }
1103 cpu_exec_start(cpu);
1104 ret = cpu_exec(cpu);
1105 cpu_exec_end(cpu);
1106#ifdef CONFIG_PROFILER
1107 tcg_time += profile_getclock() - ti;
1108#endif
1109 if (use_icount) {
1110 /* Fold pending instructions back into the
1111 instruction counter, and clear the interrupt flag. */
1112 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1113 + cpu->icount_extra);
1114 cpu->icount_decr.u32 = 0;
1115 cpu->icount_extra = 0;
1116 replay_account_executed_instructions();
1117 }
1118 return ret;
1119}
1120
c93bbbef
AB
1121/* Destroy any remaining vCPUs which have been unplugged and have
1122 * finished running
1123 */
1124static void deal_with_unplugged_cpus(void)
1be7fcb8 1125{
c93bbbef 1126 CPUState *cpu;
1be7fcb8 1127
c93bbbef
AB
1128 CPU_FOREACH(cpu) {
1129 if (cpu->unplug && !cpu_can_run(cpu)) {
1130 qemu_tcg_destroy_vcpu(cpu);
1131 cpu->created = false;
1132 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1133 break;
1134 }
1135 }
1be7fcb8 1136}
bdb7ca67 1137
7e97cd88 1138static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1139{
c3586ba7 1140 CPUState *cpu = arg;
296af7c9 1141
ab28bd23
PB
1142 rcu_register_thread();
1143
2e7f7a3c 1144 qemu_mutex_lock_iothread();
814e612e 1145 qemu_thread_get_self(cpu->thread);
296af7c9 1146
38fcbd3f
AF
1147 CPU_FOREACH(cpu) {
1148 cpu->thread_id = qemu_get_thread_id();
1149 cpu->created = true;
626cf8f4 1150 cpu->can_do_io = 1;
38fcbd3f 1151 }
296af7c9
BS
1152 qemu_cond_signal(&qemu_cpu_cond);
1153
fa7d1867 1154 /* wait for initial kick-off after machine start */
c28e399c 1155 while (first_cpu->stopped) {
d5f8d613 1156 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1157
1158 /* process any pending work */
bdc44640 1159 CPU_FOREACH(cpu) {
182735ef 1160 qemu_wait_io_event_common(cpu);
8e564b4e 1161 }
0ab07c62 1162 }
296af7c9 1163
21618b3e 1164 /* process any pending work */
aed807c8 1165 atomic_mb_set(&exit_request, 1);
21618b3e 1166
c93bbbef
AB
1167 cpu = first_cpu;
1168
296af7c9 1169 while (1) {
c93bbbef
AB
1170 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1171 qemu_account_warp_timer();
1172
1173 if (!cpu) {
1174 cpu = first_cpu;
1175 }
1176
1177 for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1178
1179 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1180 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1181
1182 if (cpu_can_run(cpu)) {
1183 int r;
1184 r = tcg_cpu_exec(cpu);
1185 if (r == EXCP_DEBUG) {
1186 cpu_handle_guest_debug(cpu);
1187 break;
1188 }
1189 } else if (cpu->stop || cpu->stopped) {
1190 if (cpu->unplug) {
1191 cpu = CPU_NEXT(cpu);
1192 }
1193 break;
1194 }
1195
1196 } /* for cpu.. */
1197
1198 /* Pairs with smp_wmb in qemu_cpu_kick. */
1199 atomic_mb_set(&exit_request, 0);
ac70aafc
AB
1200
1201 if (use_icount) {
40daca54 1202 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1203
1204 if (deadline == 0) {
40daca54 1205 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1206 }
3b2319a3 1207 }
d5f8d613 1208 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
c93bbbef 1209 deal_with_unplugged_cpus();
296af7c9
BS
1210 }
1211
1212 return NULL;
1213}
1214
2ff09a40 1215static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1216{
1217#ifndef _WIN32
1218 int err;
1219
e0c38211
PB
1220 if (cpu->thread_kicked) {
1221 return;
9102deda 1222 }
e0c38211 1223 cpu->thread_kicked = true;
814e612e 1224 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1225 if (err) {
1226 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1227 exit(1);
1228 }
1229#else /* _WIN32 */
e0c38211
PB
1230 abort();
1231#endif
1232}
ed9164a3 1233
e0c38211
PB
1234static void qemu_cpu_kick_no_halt(void)
1235{
1236 CPUState *cpu;
1237 /* Ensure whatever caused the exit has reached the CPU threads before
1238 * writing exit_request.
1239 */
1240 atomic_mb_set(&exit_request, 1);
1241 cpu = atomic_mb_read(&tcg_current_cpu);
1242 if (cpu) {
1243 cpu_exit(cpu);
cc015e9a 1244 }
cc015e9a
PB
1245}
1246
c08d7424 1247void qemu_cpu_kick(CPUState *cpu)
296af7c9 1248{
f5c121b8 1249 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1250 if (tcg_enabled()) {
1251 qemu_cpu_kick_no_halt();
1252 } else {
1253 qemu_cpu_kick_thread(cpu);
1254 }
296af7c9
BS
1255}
1256
46d62fac 1257void qemu_cpu_kick_self(void)
296af7c9 1258{
4917cf44 1259 assert(current_cpu);
9102deda 1260 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1261}
1262
60e82579 1263bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1264{
814e612e 1265 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1266}
1267
79e2b9ae 1268bool qemu_in_vcpu_thread(void)
aa723c23 1269{
4917cf44 1270 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1271}
1272
afbe7053
PB
1273static __thread bool iothread_locked = false;
1274
1275bool qemu_mutex_iothread_locked(void)
1276{
1277 return iothread_locked;
1278}
1279
296af7c9
BS
1280void qemu_mutex_lock_iothread(void)
1281{
21618b3e 1282 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1283 /* In the simple case there is no need to bump the VCPU thread out of
1284 * TCG code execution.
1285 */
1286 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1287 !first_cpu || !first_cpu->created) {
296af7c9 1288 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1289 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1290 } else {
1a28cac3 1291 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1292 qemu_cpu_kick_no_halt();
1a28cac3
MT
1293 qemu_mutex_lock(&qemu_global_mutex);
1294 }
6b49809c 1295 atomic_dec(&iothread_requesting_mutex);
46daff13 1296 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1297 }
afbe7053 1298 iothread_locked = true;
296af7c9
BS
1299}
1300
1301void qemu_mutex_unlock_iothread(void)
1302{
afbe7053 1303 iothread_locked = false;
296af7c9
BS
1304 qemu_mutex_unlock(&qemu_global_mutex);
1305}
1306
e8faee06 1307static bool all_vcpus_paused(void)
296af7c9 1308{
bdc44640 1309 CPUState *cpu;
296af7c9 1310
bdc44640 1311 CPU_FOREACH(cpu) {
182735ef 1312 if (!cpu->stopped) {
e8faee06 1313 return false;
0ab07c62 1314 }
296af7c9
BS
1315 }
1316
e8faee06 1317 return true;
296af7c9
BS
1318}
1319
1320void pause_all_vcpus(void)
1321{
bdc44640 1322 CPUState *cpu;
296af7c9 1323
40daca54 1324 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1325 CPU_FOREACH(cpu) {
182735ef
AF
1326 cpu->stop = true;
1327 qemu_cpu_kick(cpu);
296af7c9
BS
1328 }
1329
aa723c23 1330 if (qemu_in_vcpu_thread()) {
d798e974
JK
1331 cpu_stop_current();
1332 if (!kvm_enabled()) {
bdc44640 1333 CPU_FOREACH(cpu) {
182735ef
AF
1334 cpu->stop = false;
1335 cpu->stopped = true;
d798e974
JK
1336 }
1337 return;
1338 }
1339 }
1340
296af7c9 1341 while (!all_vcpus_paused()) {
be7d6c57 1342 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1343 CPU_FOREACH(cpu) {
182735ef 1344 qemu_cpu_kick(cpu);
296af7c9
BS
1345 }
1346 }
1347}
1348
2993683b
IM
1349void cpu_resume(CPUState *cpu)
1350{
1351 cpu->stop = false;
1352 cpu->stopped = false;
1353 qemu_cpu_kick(cpu);
1354}
1355
296af7c9
BS
1356void resume_all_vcpus(void)
1357{
bdc44640 1358 CPUState *cpu;
296af7c9 1359
40daca54 1360 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1361 CPU_FOREACH(cpu) {
182735ef 1362 cpu_resume(cpu);
296af7c9
BS
1363 }
1364}
1365
4c055ab5
GZ
1366void cpu_remove(CPUState *cpu)
1367{
1368 cpu->stop = true;
1369 cpu->unplug = true;
1370 qemu_cpu_kick(cpu);
1371}
1372
2c579042
BR
1373void cpu_remove_sync(CPUState *cpu)
1374{
1375 cpu_remove(cpu);
1376 while (cpu->created) {
1377 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1378 }
1379}
1380
4900116e
DDAG
1381/* For temporary buffers for forming a name */
1382#define VCPU_THREAD_NAME_SIZE 16
1383
e5ab30a2 1384static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1385{
4900116e 1386 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1387 static QemuCond *tcg_halt_cond;
1388 static QemuThread *tcg_cpu_thread;
4900116e 1389
296af7c9
BS
1390 /* share a single thread for all cpus with TCG */
1391 if (!tcg_cpu_thread) {
814e612e 1392 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1393 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1394 qemu_cond_init(cpu->halt_cond);
1395 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1396 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1397 cpu->cpu_index);
1398 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1399 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1400#ifdef _WIN32
814e612e 1401 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1402#endif
61a46217 1403 while (!cpu->created) {
18a85728 1404 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1405 }
814e612e 1406 tcg_cpu_thread = cpu->thread;
296af7c9 1407 } else {
814e612e 1408 cpu->thread = tcg_cpu_thread;
f5c121b8 1409 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1410 }
1411}
1412
48a106bd 1413static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1414{
4900116e
DDAG
1415 char thread_name[VCPU_THREAD_NAME_SIZE];
1416
814e612e 1417 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1418 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1419 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1420 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1421 cpu->cpu_index);
1422 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1423 cpu, QEMU_THREAD_JOINABLE);
61a46217 1424 while (!cpu->created) {
18a85728 1425 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1426 }
296af7c9
BS
1427}
1428
10a9021d 1429static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1430{
4900116e
DDAG
1431 char thread_name[VCPU_THREAD_NAME_SIZE];
1432
814e612e 1433 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1434 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1435 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1436 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1437 cpu->cpu_index);
1438 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1439 QEMU_THREAD_JOINABLE);
61a46217 1440 while (!cpu->created) {
c7f0f3b1
AL
1441 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1442 }
1443}
1444
c643bed9 1445void qemu_init_vcpu(CPUState *cpu)
296af7c9 1446{
ce3960eb
AF
1447 cpu->nr_cores = smp_cores;
1448 cpu->nr_threads = smp_threads;
f324e766 1449 cpu->stopped = true;
56943e8c
PM
1450
1451 if (!cpu->as) {
1452 /* If the target cpu hasn't set up any address spaces itself,
1453 * give it the default one.
1454 */
6731d864
PC
1455 AddressSpace *as = address_space_init_shareable(cpu->memory,
1456 "cpu-memory");
12ebc9a7 1457 cpu->num_ases = 1;
6731d864 1458 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1459 }
1460
0ab07c62 1461 if (kvm_enabled()) {
48a106bd 1462 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1463 } else if (tcg_enabled()) {
e5ab30a2 1464 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1465 } else {
10a9021d 1466 qemu_dummy_start_vcpu(cpu);
0ab07c62 1467 }
296af7c9
BS
1468}
1469
b4a3d965 1470void cpu_stop_current(void)
296af7c9 1471{
4917cf44
AF
1472 if (current_cpu) {
1473 current_cpu->stop = false;
1474 current_cpu->stopped = true;
1475 cpu_exit(current_cpu);
96bce683 1476 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1477 }
296af7c9
BS
1478}
1479
56983463 1480int vm_stop(RunState state)
296af7c9 1481{
aa723c23 1482 if (qemu_in_vcpu_thread()) {
74892d24 1483 qemu_system_vmstop_request_prepare();
1dfb4dd9 1484 qemu_system_vmstop_request(state);
296af7c9
BS
1485 /*
1486 * FIXME: should not return to device code in case
1487 * vm_stop() has been requested.
1488 */
b4a3d965 1489 cpu_stop_current();
56983463 1490 return 0;
296af7c9 1491 }
56983463
KW
1492
1493 return do_vm_stop(state);
296af7c9
BS
1494}
1495
8a9236f1
LC
1496/* does a state transition even if the VM is already stopped,
1497 current state is forgotten forever */
56983463 1498int vm_stop_force_state(RunState state)
8a9236f1
LC
1499{
1500 if (runstate_is_running()) {
56983463 1501 return vm_stop(state);
8a9236f1
LC
1502 } else {
1503 runstate_set(state);
b2780d32
WC
1504
1505 bdrv_drain_all();
594a45ce
KW
1506 /* Make sure to return an error if the flush in a previous vm_stop()
1507 * failed. */
22af08ea 1508 return bdrv_flush_all();
8a9236f1
LC
1509 }
1510}
1511
9a78eead 1512void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1513{
1514 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1515#if defined(cpu_list)
1516 cpu_list(f, cpu_fprintf);
262353cb
BS
1517#endif
1518}
de0b36b6
LC
1519
1520CpuInfoList *qmp_query_cpus(Error **errp)
1521{
1522 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1523 CPUState *cpu;
de0b36b6 1524
bdc44640 1525 CPU_FOREACH(cpu) {
de0b36b6 1526 CpuInfoList *info;
182735ef
AF
1527#if defined(TARGET_I386)
1528 X86CPU *x86_cpu = X86_CPU(cpu);
1529 CPUX86State *env = &x86_cpu->env;
1530#elif defined(TARGET_PPC)
1531 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1532 CPUPPCState *env = &ppc_cpu->env;
1533#elif defined(TARGET_SPARC)
1534 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1535 CPUSPARCState *env = &sparc_cpu->env;
1536#elif defined(TARGET_MIPS)
1537 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1538 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1539#elif defined(TARGET_TRICORE)
1540 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1541 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1542#endif
de0b36b6 1543
cb446eca 1544 cpu_synchronize_state(cpu);
de0b36b6
LC
1545
1546 info = g_malloc0(sizeof(*info));
1547 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1548 info->value->CPU = cpu->cpu_index;
182735ef 1549 info->value->current = (cpu == first_cpu);
259186a7 1550 info->value->halted = cpu->halted;
58f88d4b 1551 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1552 info->value->thread_id = cpu->thread_id;
de0b36b6 1553#if defined(TARGET_I386)
86f4b687 1554 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1555 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1556#elif defined(TARGET_PPC)
86f4b687 1557 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1558 info->value->u.ppc.nip = env->nip;
de0b36b6 1559#elif defined(TARGET_SPARC)
86f4b687 1560 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1561 info->value->u.q_sparc.pc = env->pc;
1562 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1563#elif defined(TARGET_MIPS)
86f4b687 1564 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1565 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1566#elif defined(TARGET_TRICORE)
86f4b687 1567 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1568 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1569#else
1570 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1571#endif
1572
1573 /* XXX: waiting for the qapi to support GSList */
1574 if (!cur_item) {
1575 head = cur_item = info;
1576 } else {
1577 cur_item->next = info;
1578 cur_item = info;
1579 }
1580 }
1581
1582 return head;
1583}
0cfd6a9a
LC
1584
1585void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1586 bool has_cpu, int64_t cpu_index, Error **errp)
1587{
1588 FILE *f;
1589 uint32_t l;
55e5c285 1590 CPUState *cpu;
0cfd6a9a 1591 uint8_t buf[1024];
0dc9daf0 1592 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1593
1594 if (!has_cpu) {
1595 cpu_index = 0;
1596 }
1597
151d1322
AF
1598 cpu = qemu_get_cpu(cpu_index);
1599 if (cpu == NULL) {
c6bd8c70
MA
1600 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1601 "a CPU number");
0cfd6a9a
LC
1602 return;
1603 }
1604
1605 f = fopen(filename, "wb");
1606 if (!f) {
618da851 1607 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1608 return;
1609 }
1610
1611 while (size != 0) {
1612 l = sizeof(buf);
1613 if (l > size)
1614 l = size;
2f4d0f59 1615 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1616 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1617 " specified", orig_addr, orig_size);
2f4d0f59
AK
1618 goto exit;
1619 }
0cfd6a9a 1620 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1621 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1622 goto exit;
1623 }
1624 addr += l;
1625 size -= l;
1626 }
1627
1628exit:
1629 fclose(f);
1630}
6d3962bf
LC
1631
1632void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1633 Error **errp)
1634{
1635 FILE *f;
1636 uint32_t l;
1637 uint8_t buf[1024];
1638
1639 f = fopen(filename, "wb");
1640 if (!f) {
618da851 1641 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1642 return;
1643 }
1644
1645 while (size != 0) {
1646 l = sizeof(buf);
1647 if (l > size)
1648 l = size;
eb6282f2 1649 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1650 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1651 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1652 goto exit;
1653 }
1654 addr += l;
1655 size -= l;
1656 }
1657
1658exit:
1659 fclose(f);
1660}
ab49ab5c
LC
1661
1662void qmp_inject_nmi(Error **errp)
1663{
9cb805fd 1664 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1665}
27498bef
ST
1666
1667void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1668{
1669 if (!use_icount) {
1670 return;
1671 }
1672
1673 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1674 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1675 if (icount_align_option) {
1676 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1677 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1678 } else {
1679 cpu_fprintf(f, "Max guest delay NA\n");
1680 cpu_fprintf(f, "Max guest advance NA\n");
1681 }
1682}