]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
replay: introduce block devices record/replay
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
296af7c9 27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
da31d594 32#include "sysemu/block-backend.h"
022c62cb 33#include "exec/gdbstub.h"
9c17d615
PB
34#include "sysemu/dma.h"
35#include "sysemu/kvm.h"
de0b36b6 36#include "qmp-commands.h"
296af7c9 37
1de7afc9 38#include "qemu/thread.h"
9c17d615
PB
39#include "sysemu/cpus.h"
40#include "sysemu/qtest.h"
1de7afc9
PB
41#include "qemu/main-loop.h"
42#include "qemu/bitmap.h"
cb365646 43#include "qemu/seqlock.h"
a4e15de9 44#include "qapi-event.h"
9cb805fd 45#include "hw/nmi.h"
8b427044 46#include "sysemu/replay.h"
0ff0fc19
JK
47
48#ifndef _WIN32
1de7afc9 49#include "qemu/compatfd.h"
0ff0fc19 50#endif
296af7c9 51
6d9cb73c
JK
52#ifdef CONFIG_LINUX
53
54#include <sys/prctl.h>
55
c0532a76
MT
56#ifndef PR_MCE_KILL
57#define PR_MCE_KILL 33
58#endif
59
6d9cb73c
JK
60#ifndef PR_MCE_KILL_SET
61#define PR_MCE_KILL_SET 1
62#endif
63
64#ifndef PR_MCE_KILL_EARLY
65#define PR_MCE_KILL_EARLY 1
66#endif
67
68#endif /* CONFIG_LINUX */
69
182735ef 70static CPUState *next_cpu;
27498bef
ST
71int64_t max_delay;
72int64_t max_advance;
296af7c9 73
2adcc85d
JH
74/* vcpu throttling controls */
75static QEMUTimer *throttle_timer;
76static unsigned int throttle_percentage;
77
78#define CPU_THROTTLE_PCT_MIN 1
79#define CPU_THROTTLE_PCT_MAX 99
80#define CPU_THROTTLE_TIMESLICE_NS 10000000
81
321bc0b2
TC
82bool cpu_is_stopped(CPUState *cpu)
83{
84 return cpu->stopped || !runstate_is_running();
85}
86
a98ae1d8 87static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 88{
c64ca814 89 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
90 return false;
91 }
321bc0b2 92 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
93 return true;
94 }
8c2e1b00 95 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 96 kvm_halt_in_kernel()) {
ac873f1e
PM
97 return false;
98 }
99 return true;
100}
101
102static bool all_cpu_threads_idle(void)
103{
182735ef 104 CPUState *cpu;
ac873f1e 105
bdc44640 106 CPU_FOREACH(cpu) {
182735ef 107 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
108 return false;
109 }
110 }
111 return true;
112}
113
946fb27c
PB
114/***********************************************************/
115/* guest cycle counter */
116
a3270e19
PB
117/* Protected by TimersState seqlock */
118
5045e9d9 119static bool icount_sleep = true;
71468395 120static int64_t vm_clock_warp_start = -1;
946fb27c
PB
121/* Conversion factor from emulated instructions to virtual clock ticks. */
122static int icount_time_shift;
123/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
124#define MAX_ICOUNT_SHIFT 10
a3270e19 125
946fb27c
PB
126static QEMUTimer *icount_rt_timer;
127static QEMUTimer *icount_vm_timer;
128static QEMUTimer *icount_warp_timer;
946fb27c
PB
129
130typedef struct TimersState {
cb365646 131 /* Protected by BQL. */
946fb27c
PB
132 int64_t cpu_ticks_prev;
133 int64_t cpu_ticks_offset;
cb365646
LPF
134
135 /* cpu_clock_offset can be read out of BQL, so protect it with
136 * this lock.
137 */
138 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
139 int64_t cpu_clock_offset;
140 int32_t cpu_ticks_enabled;
141 int64_t dummy;
c96778bb
FK
142
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
145 /* Only written by TCG thread */
146 int64_t qemu_icount;
946fb27c
PB
147} TimersState;
148
d9cd4007 149static TimersState timers_state;
946fb27c 150
2a62914b 151int64_t cpu_get_icount_raw(void)
946fb27c
PB
152{
153 int64_t icount;
4917cf44 154 CPUState *cpu = current_cpu;
946fb27c 155
c96778bb 156 icount = timers_state.qemu_icount;
4917cf44 157 if (cpu) {
414b15c9 158 if (!cpu->can_do_io) {
2a62914b
PD
159 fprintf(stderr, "Bad icount read\n");
160 exit(1);
946fb27c 161 }
28ecfd7a 162 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 163 }
2a62914b
PD
164 return icount;
165}
166
167/* Return the virtual CPU time, based on the instruction counter. */
168static int64_t cpu_get_icount_locked(void)
169{
170 int64_t icount = cpu_get_icount_raw();
3f031313 171 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
172}
173
17a15f1b
PB
174int64_t cpu_get_icount(void)
175{
176 int64_t icount;
177 unsigned start;
178
179 do {
180 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
181 icount = cpu_get_icount_locked();
182 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
183
184 return icount;
185}
186
3f031313
FK
187int64_t cpu_icount_to_ns(int64_t icount)
188{
189 return icount << icount_time_shift;
190}
191
946fb27c 192/* return the host CPU cycle counter and handle stop/restart */
cb365646 193/* Caller must hold the BQL */
946fb27c
PB
194int64_t cpu_get_ticks(void)
195{
5f3e3101
PB
196 int64_t ticks;
197
946fb27c
PB
198 if (use_icount) {
199 return cpu_get_icount();
200 }
5f3e3101
PB
201
202 ticks = timers_state.cpu_ticks_offset;
203 if (timers_state.cpu_ticks_enabled) {
4a7428c5 204 ticks += cpu_get_host_ticks();
5f3e3101
PB
205 }
206
207 if (timers_state.cpu_ticks_prev > ticks) {
208 /* Note: non increasing ticks may happen if the host uses
209 software suspend */
210 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
211 ticks = timers_state.cpu_ticks_prev;
946fb27c 212 }
5f3e3101
PB
213
214 timers_state.cpu_ticks_prev = ticks;
215 return ticks;
946fb27c
PB
216}
217
cb365646 218static int64_t cpu_get_clock_locked(void)
946fb27c 219{
5f3e3101 220 int64_t ticks;
cb365646 221
5f3e3101
PB
222 ticks = timers_state.cpu_clock_offset;
223 if (timers_state.cpu_ticks_enabled) {
224 ticks += get_clock();
946fb27c 225 }
cb365646 226
5f3e3101 227 return ticks;
cb365646
LPF
228}
229
230/* return the host CPU monotonic timer and handle stop/restart */
231int64_t cpu_get_clock(void)
232{
233 int64_t ti;
234 unsigned start;
235
236 do {
237 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
238 ti = cpu_get_clock_locked();
239 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
240
241 return ti;
946fb27c
PB
242}
243
cb365646
LPF
244/* enable cpu_get_ticks()
245 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
246 */
946fb27c
PB
247void cpu_enable_ticks(void)
248{
cb365646
LPF
249 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
250 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 251 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 252 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
253 timers_state.cpu_clock_offset -= get_clock();
254 timers_state.cpu_ticks_enabled = 1;
255 }
cb365646 256 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
257}
258
259/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
260 * cpu_get_ticks() after that.
261 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
262 */
946fb27c
PB
263void cpu_disable_ticks(void)
264{
cb365646
LPF
265 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
266 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 267 if (timers_state.cpu_ticks_enabled) {
4a7428c5 268 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 269 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
270 timers_state.cpu_ticks_enabled = 0;
271 }
cb365646 272 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
273}
274
275/* Correlation between real and virtual time is always going to be
276 fairly approximate, so ignore small variation.
277 When the guest is idle real and virtual time will be aligned in
278 the IO wait loop. */
73bcb24d 279#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
280
281static void icount_adjust(void)
282{
283 int64_t cur_time;
284 int64_t cur_icount;
285 int64_t delta;
a3270e19
PB
286
287 /* Protected by TimersState mutex. */
946fb27c 288 static int64_t last_delta;
468cc7cf 289
946fb27c
PB
290 /* If the VM is not running, then do nothing. */
291 if (!runstate_is_running()) {
292 return;
293 }
468cc7cf 294
17a15f1b
PB
295 seqlock_write_lock(&timers_state.vm_clock_seqlock);
296 cur_time = cpu_get_clock_locked();
297 cur_icount = cpu_get_icount_locked();
468cc7cf 298
946fb27c
PB
299 delta = cur_icount - cur_time;
300 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
301 if (delta > 0
302 && last_delta + ICOUNT_WOBBLE < delta * 2
303 && icount_time_shift > 0) {
304 /* The guest is getting too far ahead. Slow time down. */
305 icount_time_shift--;
306 }
307 if (delta < 0
308 && last_delta - ICOUNT_WOBBLE > delta * 2
309 && icount_time_shift < MAX_ICOUNT_SHIFT) {
310 /* The guest is getting too far behind. Speed time up. */
311 icount_time_shift++;
312 }
313 last_delta = delta;
c96778bb
FK
314 timers_state.qemu_icount_bias = cur_icount
315 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 316 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
317}
318
319static void icount_adjust_rt(void *opaque)
320{
40daca54 321 timer_mod(icount_rt_timer,
1979b908 322 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
323 icount_adjust();
324}
325
326static void icount_adjust_vm(void *opaque)
327{
40daca54
AB
328 timer_mod(icount_vm_timer,
329 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 330 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
331 icount_adjust();
332}
333
334static int64_t qemu_icount_round(int64_t count)
335{
336 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
337}
338
efab87cf 339static void icount_warp_rt(void)
946fb27c 340{
17a15f1b
PB
341 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
342 * changes from -1 to another value, so the race here is okay.
343 */
344 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
345 return;
346 }
347
17a15f1b 348 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 349 if (runstate_is_running()) {
8eda206e
PD
350 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
351 cpu_get_clock_locked());
8ed961d9
PB
352 int64_t warp_delta;
353
354 warp_delta = clock - vm_clock_warp_start;
355 if (use_icount == 2) {
946fb27c 356 /*
40daca54 357 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
358 * far ahead of real time.
359 */
17a15f1b 360 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 361 int64_t delta = clock - cur_icount;
8ed961d9 362 warp_delta = MIN(warp_delta, delta);
946fb27c 363 }
c96778bb 364 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
365 }
366 vm_clock_warp_start = -1;
17a15f1b 367 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
368
369 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
370 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
371 }
946fb27c
PB
372}
373
e76d1798 374static void icount_timer_cb(void *opaque)
efab87cf 375{
e76d1798
PD
376 /* No need for a checkpoint because the timer already synchronizes
377 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
378 */
379 icount_warp_rt();
efab87cf
PD
380}
381
8156be56
PB
382void qtest_clock_warp(int64_t dest)
383{
40daca54 384 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 385 AioContext *aio_context;
8156be56 386 assert(qtest_enabled());
efef88b3 387 aio_context = qemu_get_aio_context();
8156be56 388 while (clock < dest) {
40daca54 389 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 390 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 391
17a15f1b 392 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 393 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
394 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
395
40daca54 396 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 397 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 398 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 399 }
40daca54 400 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
401}
402
e76d1798 403void qemu_start_warp_timer(void)
946fb27c 404{
ce78d18c 405 int64_t clock;
946fb27c
PB
406 int64_t deadline;
407
e76d1798 408 if (!use_icount) {
946fb27c
PB
409 return;
410 }
411
8bd7f71d
PD
412 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
413 * do not fire, so computing the deadline does not make sense.
414 */
415 if (!runstate_is_running()) {
416 return;
417 }
418
419 /* warp clock deterministically in record/replay mode */
e76d1798 420 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
421 return;
422 }
423
ce78d18c 424 if (!all_cpu_threads_idle()) {
946fb27c
PB
425 return;
426 }
427
8156be56
PB
428 if (qtest_enabled()) {
429 /* When testing, qtest commands advance icount. */
e76d1798 430 return;
8156be56
PB
431 }
432
ac70aafc 433 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 434 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 435 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 436 if (deadline < 0) {
d7a0f71d
VC
437 static bool notified;
438 if (!icount_sleep && !notified) {
439 error_report("WARNING: icount sleep disabled and no active timers");
440 notified = true;
441 }
ce78d18c 442 return;
ac70aafc
AB
443 }
444
946fb27c
PB
445 if (deadline > 0) {
446 /*
40daca54 447 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
448 * sleep. Otherwise, the CPU might be waiting for a future timer
449 * interrupt to wake it up, but the interrupt never comes because
450 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 451 * QEMU_CLOCK_VIRTUAL.
946fb27c 452 */
5045e9d9
VC
453 if (!icount_sleep) {
454 /*
455 * We never let VCPUs sleep in no sleep icount mode.
456 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
457 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
458 * It is useful when we want a deterministic execution time,
459 * isolated from host latencies.
460 */
461 seqlock_write_lock(&timers_state.vm_clock_seqlock);
462 timers_state.qemu_icount_bias += deadline;
463 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
464 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
465 } else {
466 /*
467 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
468 * "real" time, (related to the time left until the next event) has
469 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
470 * This avoids that the warps are visible externally; for example,
471 * you will not be sending network packets continuously instead of
472 * every 100ms.
473 */
474 seqlock_write_lock(&timers_state.vm_clock_seqlock);
475 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
476 vm_clock_warp_start = clock;
477 }
478 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
479 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 480 }
ac70aafc 481 } else if (deadline == 0) {
40daca54 482 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
483 }
484}
485
e76d1798
PD
486static void qemu_account_warp_timer(void)
487{
488 if (!use_icount || !icount_sleep) {
489 return;
490 }
491
492 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
493 * do not fire, so computing the deadline does not make sense.
494 */
495 if (!runstate_is_running()) {
496 return;
497 }
498
499 /* warp clock deterministically in record/replay mode */
500 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
501 return;
502 }
503
504 timer_del(icount_warp_timer);
505 icount_warp_rt();
506}
507
d09eae37
FK
508static bool icount_state_needed(void *opaque)
509{
510 return use_icount;
511}
512
513/*
514 * This is a subsection for icount migration.
515 */
516static const VMStateDescription icount_vmstate_timers = {
517 .name = "timer/icount",
518 .version_id = 1,
519 .minimum_version_id = 1,
5cd8cada 520 .needed = icount_state_needed,
d09eae37
FK
521 .fields = (VMStateField[]) {
522 VMSTATE_INT64(qemu_icount_bias, TimersState),
523 VMSTATE_INT64(qemu_icount, TimersState),
524 VMSTATE_END_OF_LIST()
525 }
526};
527
946fb27c
PB
528static const VMStateDescription vmstate_timers = {
529 .name = "timer",
530 .version_id = 2,
531 .minimum_version_id = 1,
35d08458 532 .fields = (VMStateField[]) {
946fb27c
PB
533 VMSTATE_INT64(cpu_ticks_offset, TimersState),
534 VMSTATE_INT64(dummy, TimersState),
535 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
536 VMSTATE_END_OF_LIST()
d09eae37 537 },
5cd8cada
JQ
538 .subsections = (const VMStateDescription*[]) {
539 &icount_vmstate_timers,
540 NULL
946fb27c
PB
541 }
542};
543
2adcc85d
JH
544static void cpu_throttle_thread(void *opaque)
545{
546 CPUState *cpu = opaque;
547 double pct;
548 double throttle_ratio;
549 long sleeptime_ns;
550
551 if (!cpu_throttle_get_percentage()) {
552 return;
553 }
554
555 pct = (double)cpu_throttle_get_percentage()/100;
556 throttle_ratio = pct / (1 - pct);
557 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
558
559 qemu_mutex_unlock_iothread();
560 atomic_set(&cpu->throttle_thread_scheduled, 0);
561 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
562 qemu_mutex_lock_iothread();
563}
564
565static void cpu_throttle_timer_tick(void *opaque)
566{
567 CPUState *cpu;
568 double pct;
569
570 /* Stop the timer if needed */
571 if (!cpu_throttle_get_percentage()) {
572 return;
573 }
574 CPU_FOREACH(cpu) {
575 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
576 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
577 }
578 }
579
580 pct = (double)cpu_throttle_get_percentage()/100;
581 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
582 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
583}
584
585void cpu_throttle_set(int new_throttle_pct)
586{
587 /* Ensure throttle percentage is within valid range */
588 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
589 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
590
591 atomic_set(&throttle_percentage, new_throttle_pct);
592
593 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
594 CPU_THROTTLE_TIMESLICE_NS);
595}
596
597void cpu_throttle_stop(void)
598{
599 atomic_set(&throttle_percentage, 0);
600}
601
602bool cpu_throttle_active(void)
603{
604 return (cpu_throttle_get_percentage() != 0);
605}
606
607int cpu_throttle_get_percentage(void)
608{
609 return atomic_read(&throttle_percentage);
610}
611
4603ea01
PD
612void cpu_ticks_init(void)
613{
614 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
615 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
616 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
617 cpu_throttle_timer_tick, NULL);
4603ea01
PD
618}
619
1ad9580b 620void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 621{
1ad9580b 622 const char *option;
a8bfac37 623 char *rem_str = NULL;
1ad9580b 624
1ad9580b 625 option = qemu_opt_get(opts, "shift");
946fb27c 626 if (!option) {
a8bfac37
ST
627 if (qemu_opt_get(opts, "align") != NULL) {
628 error_setg(errp, "Please specify shift option when using align");
629 }
946fb27c
PB
630 return;
631 }
f1f4b57e
VC
632
633 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
634 if (icount_sleep) {
635 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 636 icount_timer_cb, NULL);
5045e9d9 637 }
f1f4b57e 638
a8bfac37 639 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
640
641 if (icount_align_option && !icount_sleep) {
778d9f9b 642 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 643 }
946fb27c 644 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
645 errno = 0;
646 icount_time_shift = strtol(option, &rem_str, 0);
647 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
648 error_setg(errp, "icount: Invalid shift value");
649 }
946fb27c
PB
650 use_icount = 1;
651 return;
a8bfac37
ST
652 } else if (icount_align_option) {
653 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 654 } else if (!icount_sleep) {
778d9f9b 655 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
656 }
657
658 use_icount = 2;
659
660 /* 125MIPS seems a reasonable initial guess at the guest speed.
661 It will be corrected fairly quickly anyway. */
662 icount_time_shift = 3;
663
664 /* Have both realtime and virtual time triggers for speed adjustment.
665 The realtime trigger catches emulated time passing too slowly,
666 the virtual time trigger catches emulated time passing too fast.
667 Realtime triggers occur even when idle, so use them less frequently
668 than VM triggers. */
bf2a7ddb
PD
669 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
670 icount_adjust_rt, NULL);
40daca54 671 timer_mod(icount_rt_timer,
bf2a7ddb 672 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
673 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
674 icount_adjust_vm, NULL);
675 timer_mod(icount_vm_timer,
676 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 677 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
678}
679
296af7c9
BS
680/***********************************************************/
681void hw_error(const char *fmt, ...)
682{
683 va_list ap;
55e5c285 684 CPUState *cpu;
296af7c9
BS
685
686 va_start(ap, fmt);
687 fprintf(stderr, "qemu: hardware error: ");
688 vfprintf(stderr, fmt, ap);
689 fprintf(stderr, "\n");
bdc44640 690 CPU_FOREACH(cpu) {
55e5c285 691 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 692 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
693 }
694 va_end(ap);
695 abort();
696}
697
698void cpu_synchronize_all_states(void)
699{
182735ef 700 CPUState *cpu;
296af7c9 701
bdc44640 702 CPU_FOREACH(cpu) {
182735ef 703 cpu_synchronize_state(cpu);
296af7c9
BS
704 }
705}
706
707void cpu_synchronize_all_post_reset(void)
708{
182735ef 709 CPUState *cpu;
296af7c9 710
bdc44640 711 CPU_FOREACH(cpu) {
182735ef 712 cpu_synchronize_post_reset(cpu);
296af7c9
BS
713 }
714}
715
716void cpu_synchronize_all_post_init(void)
717{
182735ef 718 CPUState *cpu;
296af7c9 719
bdc44640 720 CPU_FOREACH(cpu) {
182735ef 721 cpu_synchronize_post_init(cpu);
296af7c9
BS
722 }
723}
724
56983463 725static int do_vm_stop(RunState state)
296af7c9 726{
56983463
KW
727 int ret = 0;
728
1354869c 729 if (runstate_is_running()) {
296af7c9 730 cpu_disable_ticks();
296af7c9 731 pause_all_vcpus();
f5bbfba1 732 runstate_set(state);
1dfb4dd9 733 vm_state_notify(0, state);
a4e15de9 734 qapi_event_send_stop(&error_abort);
296af7c9 735 }
56983463 736
594a45ce 737 bdrv_drain_all();
da31d594 738 ret = blk_flush_all();
594a45ce 739
56983463 740 return ret;
296af7c9
BS
741}
742
a1fcaa73 743static bool cpu_can_run(CPUState *cpu)
296af7c9 744{
4fdeee7c 745 if (cpu->stop) {
a1fcaa73 746 return false;
0ab07c62 747 }
321bc0b2 748 if (cpu_is_stopped(cpu)) {
a1fcaa73 749 return false;
0ab07c62 750 }
a1fcaa73 751 return true;
296af7c9
BS
752}
753
91325046 754static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 755{
64f6b346 756 gdb_set_stop_cpu(cpu);
8cf71710 757 qemu_system_debug_request();
f324e766 758 cpu->stopped = true;
3c638d06
JK
759}
760
6d9cb73c
JK
761#ifdef CONFIG_LINUX
762static void sigbus_reraise(void)
763{
764 sigset_t set;
765 struct sigaction action;
766
767 memset(&action, 0, sizeof(action));
768 action.sa_handler = SIG_DFL;
769 if (!sigaction(SIGBUS, &action, NULL)) {
770 raise(SIGBUS);
771 sigemptyset(&set);
772 sigaddset(&set, SIGBUS);
773 sigprocmask(SIG_UNBLOCK, &set, NULL);
774 }
775 perror("Failed to re-raise SIGBUS!\n");
776 abort();
777}
778
779static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
780 void *ctx)
781{
782 if (kvm_on_sigbus(siginfo->ssi_code,
783 (void *)(intptr_t)siginfo->ssi_addr)) {
784 sigbus_reraise();
785 }
786}
787
788static void qemu_init_sigbus(void)
789{
790 struct sigaction action;
791
792 memset(&action, 0, sizeof(action));
793 action.sa_flags = SA_SIGINFO;
794 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
795 sigaction(SIGBUS, &action, NULL);
796
797 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
798}
799
290adf38 800static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
801{
802 struct timespec ts = { 0, 0 };
803 siginfo_t siginfo;
804 sigset_t waitset;
805 sigset_t chkset;
806 int r;
807
808 sigemptyset(&waitset);
809 sigaddset(&waitset, SIG_IPI);
810 sigaddset(&waitset, SIGBUS);
811
812 do {
813 r = sigtimedwait(&waitset, &siginfo, &ts);
814 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
815 perror("sigtimedwait");
816 exit(1);
817 }
818
819 switch (r) {
820 case SIGBUS:
290adf38 821 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
822 sigbus_reraise();
823 }
824 break;
825 default:
826 break;
827 }
828
829 r = sigpending(&chkset);
830 if (r == -1) {
831 perror("sigpending");
832 exit(1);
833 }
834 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
835}
836
6d9cb73c
JK
837#else /* !CONFIG_LINUX */
838
839static void qemu_init_sigbus(void)
840{
841}
1ab3c6c0 842
290adf38 843static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
844{
845}
6d9cb73c
JK
846#endif /* !CONFIG_LINUX */
847
296af7c9 848#ifndef _WIN32
55f8d6ac
JK
849static void dummy_signal(int sig)
850{
851}
55f8d6ac 852
13618e05 853static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
854{
855 int r;
856 sigset_t set;
857 struct sigaction sigact;
858
859 memset(&sigact, 0, sizeof(sigact));
860 sigact.sa_handler = dummy_signal;
861 sigaction(SIG_IPI, &sigact, NULL);
862
714bd040
PB
863 pthread_sigmask(SIG_BLOCK, NULL, &set);
864 sigdelset(&set, SIG_IPI);
714bd040 865 sigdelset(&set, SIGBUS);
491d6e80 866 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
867 if (r) {
868 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
869 exit(1);
870 }
871}
872
55f8d6ac 873#else /* _WIN32 */
13618e05 874static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 875{
714bd040
PB
876 abort();
877}
714bd040 878#endif /* _WIN32 */
ff48eb5f 879
b2532d88 880static QemuMutex qemu_global_mutex;
46daff13 881static QemuCond qemu_io_proceeded_cond;
6b49809c 882static unsigned iothread_requesting_mutex;
296af7c9
BS
883
884static QemuThread io_thread;
885
296af7c9
BS
886/* cpu creation */
887static QemuCond qemu_cpu_cond;
888/* system init */
296af7c9 889static QemuCond qemu_pause_cond;
e82bcec2 890static QemuCond qemu_work_cond;
296af7c9 891
d3b12f5d 892void qemu_init_cpu_loop(void)
296af7c9 893{
6d9cb73c 894 qemu_init_sigbus();
ed94592b 895 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
896 qemu_cond_init(&qemu_pause_cond);
897 qemu_cond_init(&qemu_work_cond);
46daff13 898 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 899 qemu_mutex_init(&qemu_global_mutex);
296af7c9 900
b7680cb6 901 qemu_thread_get_self(&io_thread);
296af7c9
BS
902}
903
f100f0b3 904void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
905{
906 struct qemu_work_item wi;
907
60e82579 908 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
909 func(data);
910 return;
911 }
912
913 wi.func = func;
914 wi.data = data;
3c02270d 915 wi.free = false;
376692b9
PB
916
917 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
918 if (cpu->queued_work_first == NULL) {
919 cpu->queued_work_first = &wi;
0ab07c62 920 } else {
c64ca814 921 cpu->queued_work_last->next = &wi;
0ab07c62 922 }
c64ca814 923 cpu->queued_work_last = &wi;
e82bcec2
MT
924 wi.next = NULL;
925 wi.done = false;
376692b9 926 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 927
c08d7424 928 qemu_cpu_kick(cpu);
376692b9 929 while (!atomic_mb_read(&wi.done)) {
4917cf44 930 CPUState *self_cpu = current_cpu;
e82bcec2
MT
931
932 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 933 current_cpu = self_cpu;
e82bcec2
MT
934 }
935}
936
3c02270d
CV
937void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
938{
939 struct qemu_work_item *wi;
940
941 if (qemu_cpu_is_self(cpu)) {
942 func(data);
943 return;
944 }
945
946 wi = g_malloc0(sizeof(struct qemu_work_item));
947 wi->func = func;
948 wi->data = data;
949 wi->free = true;
376692b9
PB
950
951 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
952 if (cpu->queued_work_first == NULL) {
953 cpu->queued_work_first = wi;
954 } else {
955 cpu->queued_work_last->next = wi;
956 }
957 cpu->queued_work_last = wi;
958 wi->next = NULL;
959 wi->done = false;
376692b9 960 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
961
962 qemu_cpu_kick(cpu);
963}
964
6d45b109 965static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
966{
967 struct qemu_work_item *wi;
968
c64ca814 969 if (cpu->queued_work_first == NULL) {
e82bcec2 970 return;
0ab07c62 971 }
e82bcec2 972
376692b9
PB
973 qemu_mutex_lock(&cpu->work_mutex);
974 while (cpu->queued_work_first != NULL) {
975 wi = cpu->queued_work_first;
c64ca814 976 cpu->queued_work_first = wi->next;
376692b9
PB
977 if (!cpu->queued_work_first) {
978 cpu->queued_work_last = NULL;
979 }
980 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 981 wi->func(wi->data);
376692b9 982 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
983 if (wi->free) {
984 g_free(wi);
376692b9
PB
985 } else {
986 atomic_mb_set(&wi->done, true);
3c02270d 987 }
e82bcec2 988 }
376692b9 989 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
990 qemu_cond_broadcast(&qemu_work_cond);
991}
992
509a0d78 993static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 994{
4fdeee7c
AF
995 if (cpu->stop) {
996 cpu->stop = false;
f324e766 997 cpu->stopped = true;
96bce683 998 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 999 }
6d45b109 1000 flush_queued_work(cpu);
216fc9a4 1001 cpu->thread_kicked = false;
296af7c9
BS
1002}
1003
d5f8d613 1004static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1005{
16400322 1006 while (all_cpu_threads_idle()) {
d5f8d613 1007 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1008 }
296af7c9 1009
46daff13
PB
1010 while (iothread_requesting_mutex) {
1011 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1012 }
6cabe1f3 1013
bdc44640 1014 CPU_FOREACH(cpu) {
182735ef 1015 qemu_wait_io_event_common(cpu);
6cabe1f3 1016 }
296af7c9
BS
1017}
1018
fd529e8f 1019static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1020{
a98ae1d8 1021 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1022 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1023 }
296af7c9 1024
290adf38 1025 qemu_kvm_eat_signals(cpu);
509a0d78 1026 qemu_wait_io_event_common(cpu);
296af7c9
BS
1027}
1028
7e97cd88 1029static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1030{
48a106bd 1031 CPUState *cpu = arg;
84b4915d 1032 int r;
296af7c9 1033
ab28bd23
PB
1034 rcu_register_thread();
1035
2e7f7a3c 1036 qemu_mutex_lock_iothread();
814e612e 1037 qemu_thread_get_self(cpu->thread);
9f09e18a 1038 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1039 cpu->can_do_io = 1;
4917cf44 1040 current_cpu = cpu;
296af7c9 1041
504134d2 1042 r = kvm_init_vcpu(cpu);
84b4915d
JK
1043 if (r < 0) {
1044 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1045 exit(1);
1046 }
296af7c9 1047
13618e05 1048 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1049
1050 /* signal CPU creation */
61a46217 1051 cpu->created = true;
296af7c9
BS
1052 qemu_cond_signal(&qemu_cpu_cond);
1053
296af7c9 1054 while (1) {
a1fcaa73 1055 if (cpu_can_run(cpu)) {
1458c363 1056 r = kvm_cpu_exec(cpu);
83f338f7 1057 if (r == EXCP_DEBUG) {
91325046 1058 cpu_handle_guest_debug(cpu);
83f338f7 1059 }
0ab07c62 1060 }
fd529e8f 1061 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
1062 }
1063
1064 return NULL;
1065}
1066
c7f0f3b1
AL
1067static void *qemu_dummy_cpu_thread_fn(void *arg)
1068{
1069#ifdef _WIN32
1070 fprintf(stderr, "qtest is not supported under Windows\n");
1071 exit(1);
1072#else
10a9021d 1073 CPUState *cpu = arg;
c7f0f3b1
AL
1074 sigset_t waitset;
1075 int r;
1076
ab28bd23
PB
1077 rcu_register_thread();
1078
c7f0f3b1 1079 qemu_mutex_lock_iothread();
814e612e 1080 qemu_thread_get_self(cpu->thread);
9f09e18a 1081 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1082 cpu->can_do_io = 1;
c7f0f3b1
AL
1083
1084 sigemptyset(&waitset);
1085 sigaddset(&waitset, SIG_IPI);
1086
1087 /* signal CPU creation */
61a46217 1088 cpu->created = true;
c7f0f3b1
AL
1089 qemu_cond_signal(&qemu_cpu_cond);
1090
4917cf44 1091 current_cpu = cpu;
c7f0f3b1 1092 while (1) {
4917cf44 1093 current_cpu = NULL;
c7f0f3b1
AL
1094 qemu_mutex_unlock_iothread();
1095 do {
1096 int sig;
1097 r = sigwait(&waitset, &sig);
1098 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1099 if (r == -1) {
1100 perror("sigwait");
1101 exit(1);
1102 }
1103 qemu_mutex_lock_iothread();
4917cf44 1104 current_cpu = cpu;
509a0d78 1105 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1106 }
1107
1108 return NULL;
1109#endif
1110}
1111
bdb7ca67
JK
1112static void tcg_exec_all(void);
1113
7e97cd88 1114static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1115{
c3586ba7 1116 CPUState *cpu = arg;
296af7c9 1117
ab28bd23
PB
1118 rcu_register_thread();
1119
2e7f7a3c 1120 qemu_mutex_lock_iothread();
814e612e 1121 qemu_thread_get_self(cpu->thread);
296af7c9 1122
38fcbd3f
AF
1123 CPU_FOREACH(cpu) {
1124 cpu->thread_id = qemu_get_thread_id();
1125 cpu->created = true;
626cf8f4 1126 cpu->can_do_io = 1;
38fcbd3f 1127 }
296af7c9
BS
1128 qemu_cond_signal(&qemu_cpu_cond);
1129
fa7d1867 1130 /* wait for initial kick-off after machine start */
c28e399c 1131 while (first_cpu->stopped) {
d5f8d613 1132 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1133
1134 /* process any pending work */
bdc44640 1135 CPU_FOREACH(cpu) {
182735ef 1136 qemu_wait_io_event_common(cpu);
8e564b4e 1137 }
0ab07c62 1138 }
296af7c9 1139
21618b3e 1140 /* process any pending work */
aed807c8 1141 atomic_mb_set(&exit_request, 1);
21618b3e 1142
296af7c9 1143 while (1) {
bdb7ca67 1144 tcg_exec_all();
ac70aafc
AB
1145
1146 if (use_icount) {
40daca54 1147 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1148
1149 if (deadline == 0) {
40daca54 1150 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1151 }
3b2319a3 1152 }
d5f8d613 1153 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
296af7c9
BS
1154 }
1155
1156 return NULL;
1157}
1158
2ff09a40 1159static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1160{
1161#ifndef _WIN32
1162 int err;
1163
e0c38211
PB
1164 if (cpu->thread_kicked) {
1165 return;
9102deda 1166 }
e0c38211 1167 cpu->thread_kicked = true;
814e612e 1168 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1169 if (err) {
1170 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1171 exit(1);
1172 }
1173#else /* _WIN32 */
e0c38211
PB
1174 abort();
1175#endif
1176}
ed9164a3 1177
e0c38211
PB
1178static void qemu_cpu_kick_no_halt(void)
1179{
1180 CPUState *cpu;
1181 /* Ensure whatever caused the exit has reached the CPU threads before
1182 * writing exit_request.
1183 */
1184 atomic_mb_set(&exit_request, 1);
1185 cpu = atomic_mb_read(&tcg_current_cpu);
1186 if (cpu) {
1187 cpu_exit(cpu);
cc015e9a 1188 }
cc015e9a
PB
1189}
1190
c08d7424 1191void qemu_cpu_kick(CPUState *cpu)
296af7c9 1192{
f5c121b8 1193 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1194 if (tcg_enabled()) {
1195 qemu_cpu_kick_no_halt();
1196 } else {
1197 qemu_cpu_kick_thread(cpu);
1198 }
296af7c9
BS
1199}
1200
46d62fac 1201void qemu_cpu_kick_self(void)
296af7c9 1202{
4917cf44 1203 assert(current_cpu);
9102deda 1204 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1205}
1206
60e82579 1207bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1208{
814e612e 1209 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1210}
1211
79e2b9ae 1212bool qemu_in_vcpu_thread(void)
aa723c23 1213{
4917cf44 1214 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1215}
1216
afbe7053
PB
1217static __thread bool iothread_locked = false;
1218
1219bool qemu_mutex_iothread_locked(void)
1220{
1221 return iothread_locked;
1222}
1223
296af7c9
BS
1224void qemu_mutex_lock_iothread(void)
1225{
21618b3e 1226 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1227 /* In the simple case there is no need to bump the VCPU thread out of
1228 * TCG code execution.
1229 */
1230 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1231 !first_cpu || !first_cpu->created) {
296af7c9 1232 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1233 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1234 } else {
1a28cac3 1235 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1236 qemu_cpu_kick_no_halt();
1a28cac3
MT
1237 qemu_mutex_lock(&qemu_global_mutex);
1238 }
6b49809c 1239 atomic_dec(&iothread_requesting_mutex);
46daff13 1240 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1241 }
afbe7053 1242 iothread_locked = true;
296af7c9
BS
1243}
1244
1245void qemu_mutex_unlock_iothread(void)
1246{
afbe7053 1247 iothread_locked = false;
296af7c9
BS
1248 qemu_mutex_unlock(&qemu_global_mutex);
1249}
1250
1251static int all_vcpus_paused(void)
1252{
bdc44640 1253 CPUState *cpu;
296af7c9 1254
bdc44640 1255 CPU_FOREACH(cpu) {
182735ef 1256 if (!cpu->stopped) {
296af7c9 1257 return 0;
0ab07c62 1258 }
296af7c9
BS
1259 }
1260
1261 return 1;
1262}
1263
1264void pause_all_vcpus(void)
1265{
bdc44640 1266 CPUState *cpu;
296af7c9 1267
40daca54 1268 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1269 CPU_FOREACH(cpu) {
182735ef
AF
1270 cpu->stop = true;
1271 qemu_cpu_kick(cpu);
296af7c9
BS
1272 }
1273
aa723c23 1274 if (qemu_in_vcpu_thread()) {
d798e974
JK
1275 cpu_stop_current();
1276 if (!kvm_enabled()) {
bdc44640 1277 CPU_FOREACH(cpu) {
182735ef
AF
1278 cpu->stop = false;
1279 cpu->stopped = true;
d798e974
JK
1280 }
1281 return;
1282 }
1283 }
1284
296af7c9 1285 while (!all_vcpus_paused()) {
be7d6c57 1286 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1287 CPU_FOREACH(cpu) {
182735ef 1288 qemu_cpu_kick(cpu);
296af7c9
BS
1289 }
1290 }
1291}
1292
2993683b
IM
1293void cpu_resume(CPUState *cpu)
1294{
1295 cpu->stop = false;
1296 cpu->stopped = false;
1297 qemu_cpu_kick(cpu);
1298}
1299
296af7c9
BS
1300void resume_all_vcpus(void)
1301{
bdc44640 1302 CPUState *cpu;
296af7c9 1303
40daca54 1304 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1305 CPU_FOREACH(cpu) {
182735ef 1306 cpu_resume(cpu);
296af7c9
BS
1307 }
1308}
1309
4900116e
DDAG
1310/* For temporary buffers for forming a name */
1311#define VCPU_THREAD_NAME_SIZE 16
1312
e5ab30a2 1313static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1314{
4900116e 1315 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1316 static QemuCond *tcg_halt_cond;
1317 static QemuThread *tcg_cpu_thread;
4900116e 1318
296af7c9
BS
1319 /* share a single thread for all cpus with TCG */
1320 if (!tcg_cpu_thread) {
814e612e 1321 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1322 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1323 qemu_cond_init(cpu->halt_cond);
1324 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1325 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1326 cpu->cpu_index);
1327 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1328 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1329#ifdef _WIN32
814e612e 1330 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1331#endif
61a46217 1332 while (!cpu->created) {
18a85728 1333 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1334 }
814e612e 1335 tcg_cpu_thread = cpu->thread;
296af7c9 1336 } else {
814e612e 1337 cpu->thread = tcg_cpu_thread;
f5c121b8 1338 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1339 }
1340}
1341
48a106bd 1342static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1343{
4900116e
DDAG
1344 char thread_name[VCPU_THREAD_NAME_SIZE];
1345
814e612e 1346 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1347 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1348 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1349 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1350 cpu->cpu_index);
1351 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1352 cpu, QEMU_THREAD_JOINABLE);
61a46217 1353 while (!cpu->created) {
18a85728 1354 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1355 }
296af7c9
BS
1356}
1357
10a9021d 1358static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1359{
4900116e
DDAG
1360 char thread_name[VCPU_THREAD_NAME_SIZE];
1361
814e612e 1362 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1363 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1364 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1365 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1366 cpu->cpu_index);
1367 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1368 QEMU_THREAD_JOINABLE);
61a46217 1369 while (!cpu->created) {
c7f0f3b1
AL
1370 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1371 }
1372}
1373
c643bed9 1374void qemu_init_vcpu(CPUState *cpu)
296af7c9 1375{
ce3960eb
AF
1376 cpu->nr_cores = smp_cores;
1377 cpu->nr_threads = smp_threads;
f324e766 1378 cpu->stopped = true;
56943e8c
PM
1379
1380 if (!cpu->as) {
1381 /* If the target cpu hasn't set up any address spaces itself,
1382 * give it the default one.
1383 */
6731d864
PC
1384 AddressSpace *as = address_space_init_shareable(cpu->memory,
1385 "cpu-memory");
12ebc9a7 1386 cpu->num_ases = 1;
6731d864 1387 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1388 }
1389
0ab07c62 1390 if (kvm_enabled()) {
48a106bd 1391 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1392 } else if (tcg_enabled()) {
e5ab30a2 1393 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1394 } else {
10a9021d 1395 qemu_dummy_start_vcpu(cpu);
0ab07c62 1396 }
296af7c9
BS
1397}
1398
b4a3d965 1399void cpu_stop_current(void)
296af7c9 1400{
4917cf44
AF
1401 if (current_cpu) {
1402 current_cpu->stop = false;
1403 current_cpu->stopped = true;
1404 cpu_exit(current_cpu);
96bce683 1405 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1406 }
296af7c9
BS
1407}
1408
56983463 1409int vm_stop(RunState state)
296af7c9 1410{
aa723c23 1411 if (qemu_in_vcpu_thread()) {
74892d24 1412 qemu_system_vmstop_request_prepare();
1dfb4dd9 1413 qemu_system_vmstop_request(state);
296af7c9
BS
1414 /*
1415 * FIXME: should not return to device code in case
1416 * vm_stop() has been requested.
1417 */
b4a3d965 1418 cpu_stop_current();
56983463 1419 return 0;
296af7c9 1420 }
56983463
KW
1421
1422 return do_vm_stop(state);
296af7c9
BS
1423}
1424
8a9236f1
LC
1425/* does a state transition even if the VM is already stopped,
1426 current state is forgotten forever */
56983463 1427int vm_stop_force_state(RunState state)
8a9236f1
LC
1428{
1429 if (runstate_is_running()) {
56983463 1430 return vm_stop(state);
8a9236f1
LC
1431 } else {
1432 runstate_set(state);
b2780d32
WC
1433
1434 bdrv_drain_all();
594a45ce
KW
1435 /* Make sure to return an error if the flush in a previous vm_stop()
1436 * failed. */
da31d594 1437 return blk_flush_all();
8a9236f1
LC
1438 }
1439}
1440
8b427044
PD
1441static int64_t tcg_get_icount_limit(void)
1442{
1443 int64_t deadline;
1444
1445 if (replay_mode != REPLAY_MODE_PLAY) {
1446 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1447
1448 /* Maintain prior (possibly buggy) behaviour where if no deadline
1449 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1450 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1451 * nanoseconds.
1452 */
1453 if ((deadline < 0) || (deadline > INT32_MAX)) {
1454 deadline = INT32_MAX;
1455 }
1456
1457 return qemu_icount_round(deadline);
1458 } else {
1459 return replay_get_instructions();
1460 }
1461}
1462
3d57f789 1463static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1464{
1465 int ret;
1466#ifdef CONFIG_PROFILER
1467 int64_t ti;
1468#endif
1469
1470#ifdef CONFIG_PROFILER
1471 ti = profile_getclock();
1472#endif
1473 if (use_icount) {
1474 int64_t count;
1475 int decr;
c96778bb
FK
1476 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1477 + cpu->icount_extra);
28ecfd7a 1478 cpu->icount_decr.u16.low = 0;
efee7340 1479 cpu->icount_extra = 0;
8b427044 1480 count = tcg_get_icount_limit();
c96778bb 1481 timers_state.qemu_icount += count;
296af7c9
BS
1482 decr = (count > 0xffff) ? 0xffff : count;
1483 count -= decr;
28ecfd7a 1484 cpu->icount_decr.u16.low = decr;
efee7340 1485 cpu->icount_extra = count;
296af7c9 1486 }
ea3e9847 1487 ret = cpu_exec(cpu);
296af7c9 1488#ifdef CONFIG_PROFILER
89d5cbdd 1489 tcg_time += profile_getclock() - ti;
296af7c9
BS
1490#endif
1491 if (use_icount) {
1492 /* Fold pending instructions back into the
1493 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1494 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1495 + cpu->icount_extra);
28ecfd7a 1496 cpu->icount_decr.u32 = 0;
efee7340 1497 cpu->icount_extra = 0;
8b427044 1498 replay_account_executed_instructions();
296af7c9
BS
1499 }
1500 return ret;
1501}
1502
bdb7ca67 1503static void tcg_exec_all(void)
296af7c9 1504{
9a36085b
JK
1505 int r;
1506
40daca54 1507 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
e76d1798 1508 qemu_account_warp_timer();
ab33fcda 1509
0ab07c62 1510 if (next_cpu == NULL) {
296af7c9 1511 next_cpu = first_cpu;
0ab07c62 1512 }
bdc44640 1513 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1514 CPUState *cpu = next_cpu;
296af7c9 1515
40daca54 1516 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1517 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1518
a1fcaa73 1519 if (cpu_can_run(cpu)) {
3d57f789 1520 r = tcg_cpu_exec(cpu);
9a36085b 1521 if (r == EXCP_DEBUG) {
91325046 1522 cpu_handle_guest_debug(cpu);
3c638d06
JK
1523 break;
1524 }
f324e766 1525 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1526 break;
1527 }
1528 }
aed807c8
PB
1529
1530 /* Pairs with smp_wmb in qemu_cpu_kick. */
1531 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1532}
1533
9a78eead 1534void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1535{
1536 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1537#if defined(cpu_list)
1538 cpu_list(f, cpu_fprintf);
262353cb
BS
1539#endif
1540}
de0b36b6
LC
1541
1542CpuInfoList *qmp_query_cpus(Error **errp)
1543{
1544 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1545 CPUState *cpu;
de0b36b6 1546
bdc44640 1547 CPU_FOREACH(cpu) {
de0b36b6 1548 CpuInfoList *info;
182735ef
AF
1549#if defined(TARGET_I386)
1550 X86CPU *x86_cpu = X86_CPU(cpu);
1551 CPUX86State *env = &x86_cpu->env;
1552#elif defined(TARGET_PPC)
1553 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1554 CPUPPCState *env = &ppc_cpu->env;
1555#elif defined(TARGET_SPARC)
1556 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1557 CPUSPARCState *env = &sparc_cpu->env;
1558#elif defined(TARGET_MIPS)
1559 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1560 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1561#elif defined(TARGET_TRICORE)
1562 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1563 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1564#endif
de0b36b6 1565
cb446eca 1566 cpu_synchronize_state(cpu);
de0b36b6
LC
1567
1568 info = g_malloc0(sizeof(*info));
1569 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1570 info->value->CPU = cpu->cpu_index;
182735ef 1571 info->value->current = (cpu == first_cpu);
259186a7 1572 info->value->halted = cpu->halted;
58f88d4b 1573 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1574 info->value->thread_id = cpu->thread_id;
de0b36b6 1575#if defined(TARGET_I386)
86f4b687 1576 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1577 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1578#elif defined(TARGET_PPC)
86f4b687 1579 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1580 info->value->u.ppc.nip = env->nip;
de0b36b6 1581#elif defined(TARGET_SPARC)
86f4b687 1582 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1583 info->value->u.q_sparc.pc = env->pc;
1584 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1585#elif defined(TARGET_MIPS)
86f4b687 1586 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1587 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1588#elif defined(TARGET_TRICORE)
86f4b687 1589 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1590 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1591#else
1592 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1593#endif
1594
1595 /* XXX: waiting for the qapi to support GSList */
1596 if (!cur_item) {
1597 head = cur_item = info;
1598 } else {
1599 cur_item->next = info;
1600 cur_item = info;
1601 }
1602 }
1603
1604 return head;
1605}
0cfd6a9a
LC
1606
1607void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1608 bool has_cpu, int64_t cpu_index, Error **errp)
1609{
1610 FILE *f;
1611 uint32_t l;
55e5c285 1612 CPUState *cpu;
0cfd6a9a 1613 uint8_t buf[1024];
0dc9daf0 1614 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1615
1616 if (!has_cpu) {
1617 cpu_index = 0;
1618 }
1619
151d1322
AF
1620 cpu = qemu_get_cpu(cpu_index);
1621 if (cpu == NULL) {
c6bd8c70
MA
1622 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1623 "a CPU number");
0cfd6a9a
LC
1624 return;
1625 }
1626
1627 f = fopen(filename, "wb");
1628 if (!f) {
618da851 1629 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1630 return;
1631 }
1632
1633 while (size != 0) {
1634 l = sizeof(buf);
1635 if (l > size)
1636 l = size;
2f4d0f59 1637 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1638 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1639 " specified", orig_addr, orig_size);
2f4d0f59
AK
1640 goto exit;
1641 }
0cfd6a9a 1642 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1643 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1644 goto exit;
1645 }
1646 addr += l;
1647 size -= l;
1648 }
1649
1650exit:
1651 fclose(f);
1652}
6d3962bf
LC
1653
1654void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1655 Error **errp)
1656{
1657 FILE *f;
1658 uint32_t l;
1659 uint8_t buf[1024];
1660
1661 f = fopen(filename, "wb");
1662 if (!f) {
618da851 1663 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1664 return;
1665 }
1666
1667 while (size != 0) {
1668 l = sizeof(buf);
1669 if (l > size)
1670 l = size;
eb6282f2 1671 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1672 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1673 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1674 goto exit;
1675 }
1676 addr += l;
1677 size -= l;
1678 }
1679
1680exit:
1681 fclose(f);
1682}
ab49ab5c
LC
1683
1684void qmp_inject_nmi(Error **errp)
1685{
1686#if defined(TARGET_I386)
182735ef
AF
1687 CPUState *cs;
1688
bdc44640 1689 CPU_FOREACH(cs) {
182735ef 1690 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1691
02e51483 1692 if (!cpu->apic_state) {
182735ef 1693 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1694 } else {
02e51483 1695 apic_deliver_nmi(cpu->apic_state);
02c09195 1696 }
ab49ab5c
LC
1697 }
1698#else
9cb805fd 1699 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1700#endif
1701}
27498bef
ST
1702
1703void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1704{
1705 if (!use_icount) {
1706 return;
1707 }
1708
1709 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1710 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1711 if (icount_align_option) {
1712 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1713 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1714 } else {
1715 cpu_fprintf(f, "Max guest delay NA\n");
1716 cpu_fprintf(f, "Max guest advance NA\n");
1717 }
1718}