]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
memory: Add address_space_init_shareable()
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
022c62cb 32#include "exec/gdbstub.h"
9c17d615
PB
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
de0b36b6 35#include "qmp-commands.h"
296af7c9 36
1de7afc9 37#include "qemu/thread.h"
9c17d615
PB
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
1de7afc9
PB
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
cb365646 42#include "qemu/seqlock.h"
a4e15de9 43#include "qapi-event.h"
9cb805fd 44#include "hw/nmi.h"
8b427044 45#include "sysemu/replay.h"
0ff0fc19
JK
46
47#ifndef _WIN32
1de7afc9 48#include "qemu/compatfd.h"
0ff0fc19 49#endif
296af7c9 50
6d9cb73c
JK
51#ifdef CONFIG_LINUX
52
53#include <sys/prctl.h>
54
c0532a76
MT
55#ifndef PR_MCE_KILL
56#define PR_MCE_KILL 33
57#endif
58
6d9cb73c
JK
59#ifndef PR_MCE_KILL_SET
60#define PR_MCE_KILL_SET 1
61#endif
62
63#ifndef PR_MCE_KILL_EARLY
64#define PR_MCE_KILL_EARLY 1
65#endif
66
67#endif /* CONFIG_LINUX */
68
182735ef 69static CPUState *next_cpu;
27498bef
ST
70int64_t max_delay;
71int64_t max_advance;
296af7c9 72
2adcc85d
JH
73/* vcpu throttling controls */
74static QEMUTimer *throttle_timer;
75static unsigned int throttle_percentage;
76
77#define CPU_THROTTLE_PCT_MIN 1
78#define CPU_THROTTLE_PCT_MAX 99
79#define CPU_THROTTLE_TIMESLICE_NS 10000000
80
321bc0b2
TC
81bool cpu_is_stopped(CPUState *cpu)
82{
83 return cpu->stopped || !runstate_is_running();
84}
85
a98ae1d8 86static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 87{
c64ca814 88 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
89 return false;
90 }
321bc0b2 91 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
92 return true;
93 }
8c2e1b00 94 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 95 kvm_halt_in_kernel()) {
ac873f1e
PM
96 return false;
97 }
98 return true;
99}
100
101static bool all_cpu_threads_idle(void)
102{
182735ef 103 CPUState *cpu;
ac873f1e 104
bdc44640 105 CPU_FOREACH(cpu) {
182735ef 106 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
107 return false;
108 }
109 }
110 return true;
111}
112
946fb27c
PB
113/***********************************************************/
114/* guest cycle counter */
115
a3270e19
PB
116/* Protected by TimersState seqlock */
117
5045e9d9 118static bool icount_sleep = true;
71468395 119static int64_t vm_clock_warp_start = -1;
946fb27c
PB
120/* Conversion factor from emulated instructions to virtual clock ticks. */
121static int icount_time_shift;
122/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
123#define MAX_ICOUNT_SHIFT 10
a3270e19 124
946fb27c
PB
125static QEMUTimer *icount_rt_timer;
126static QEMUTimer *icount_vm_timer;
127static QEMUTimer *icount_warp_timer;
946fb27c
PB
128
129typedef struct TimersState {
cb365646 130 /* Protected by BQL. */
946fb27c
PB
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
cb365646
LPF
133
134 /* cpu_clock_offset can be read out of BQL, so protect it with
135 * this lock.
136 */
137 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
138 int64_t cpu_clock_offset;
139 int32_t cpu_ticks_enabled;
140 int64_t dummy;
c96778bb
FK
141
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias;
144 /* Only written by TCG thread */
145 int64_t qemu_icount;
946fb27c
PB
146} TimersState;
147
d9cd4007 148static TimersState timers_state;
946fb27c 149
2a62914b 150int64_t cpu_get_icount_raw(void)
946fb27c
PB
151{
152 int64_t icount;
4917cf44 153 CPUState *cpu = current_cpu;
946fb27c 154
c96778bb 155 icount = timers_state.qemu_icount;
4917cf44 156 if (cpu) {
414b15c9 157 if (!cpu->can_do_io) {
2a62914b
PD
158 fprintf(stderr, "Bad icount read\n");
159 exit(1);
946fb27c 160 }
28ecfd7a 161 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 162 }
2a62914b
PD
163 return icount;
164}
165
166/* Return the virtual CPU time, based on the instruction counter. */
167static int64_t cpu_get_icount_locked(void)
168{
169 int64_t icount = cpu_get_icount_raw();
3f031313 170 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
171}
172
17a15f1b
PB
173int64_t cpu_get_icount(void)
174{
175 int64_t icount;
176 unsigned start;
177
178 do {
179 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
180 icount = cpu_get_icount_locked();
181 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
182
183 return icount;
184}
185
3f031313
FK
186int64_t cpu_icount_to_ns(int64_t icount)
187{
188 return icount << icount_time_shift;
189}
190
946fb27c 191/* return the host CPU cycle counter and handle stop/restart */
cb365646 192/* Caller must hold the BQL */
946fb27c
PB
193int64_t cpu_get_ticks(void)
194{
5f3e3101
PB
195 int64_t ticks;
196
946fb27c
PB
197 if (use_icount) {
198 return cpu_get_icount();
199 }
5f3e3101
PB
200
201 ticks = timers_state.cpu_ticks_offset;
202 if (timers_state.cpu_ticks_enabled) {
4a7428c5 203 ticks += cpu_get_host_ticks();
5f3e3101
PB
204 }
205
206 if (timers_state.cpu_ticks_prev > ticks) {
207 /* Note: non increasing ticks may happen if the host uses
208 software suspend */
209 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
210 ticks = timers_state.cpu_ticks_prev;
946fb27c 211 }
5f3e3101
PB
212
213 timers_state.cpu_ticks_prev = ticks;
214 return ticks;
946fb27c
PB
215}
216
cb365646 217static int64_t cpu_get_clock_locked(void)
946fb27c 218{
5f3e3101 219 int64_t ticks;
cb365646 220
5f3e3101
PB
221 ticks = timers_state.cpu_clock_offset;
222 if (timers_state.cpu_ticks_enabled) {
223 ticks += get_clock();
946fb27c 224 }
cb365646 225
5f3e3101 226 return ticks;
cb365646
LPF
227}
228
229/* return the host CPU monotonic timer and handle stop/restart */
230int64_t cpu_get_clock(void)
231{
232 int64_t ti;
233 unsigned start;
234
235 do {
236 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
237 ti = cpu_get_clock_locked();
238 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
239
240 return ti;
946fb27c
PB
241}
242
cb365646
LPF
243/* enable cpu_get_ticks()
244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
245 */
946fb27c
PB
246void cpu_enable_ticks(void)
247{
cb365646
LPF
248 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
249 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 250 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 251 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
252 timers_state.cpu_clock_offset -= get_clock();
253 timers_state.cpu_ticks_enabled = 1;
254 }
cb365646 255 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
256}
257
258/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
259 * cpu_get_ticks() after that.
260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
261 */
946fb27c
PB
262void cpu_disable_ticks(void)
263{
cb365646
LPF
264 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
265 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 266 if (timers_state.cpu_ticks_enabled) {
4a7428c5 267 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 268 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
269 timers_state.cpu_ticks_enabled = 0;
270 }
cb365646 271 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
272}
273
274/* Correlation between real and virtual time is always going to be
275 fairly approximate, so ignore small variation.
276 When the guest is idle real and virtual time will be aligned in
277 the IO wait loop. */
278#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
279
280static void icount_adjust(void)
281{
282 int64_t cur_time;
283 int64_t cur_icount;
284 int64_t delta;
a3270e19
PB
285
286 /* Protected by TimersState mutex. */
946fb27c 287 static int64_t last_delta;
468cc7cf 288
946fb27c
PB
289 /* If the VM is not running, then do nothing. */
290 if (!runstate_is_running()) {
291 return;
292 }
468cc7cf 293
17a15f1b
PB
294 seqlock_write_lock(&timers_state.vm_clock_seqlock);
295 cur_time = cpu_get_clock_locked();
296 cur_icount = cpu_get_icount_locked();
468cc7cf 297
946fb27c
PB
298 delta = cur_icount - cur_time;
299 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
300 if (delta > 0
301 && last_delta + ICOUNT_WOBBLE < delta * 2
302 && icount_time_shift > 0) {
303 /* The guest is getting too far ahead. Slow time down. */
304 icount_time_shift--;
305 }
306 if (delta < 0
307 && last_delta - ICOUNT_WOBBLE > delta * 2
308 && icount_time_shift < MAX_ICOUNT_SHIFT) {
309 /* The guest is getting too far behind. Speed time up. */
310 icount_time_shift++;
311 }
312 last_delta = delta;
c96778bb
FK
313 timers_state.qemu_icount_bias = cur_icount
314 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 315 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
316}
317
318static void icount_adjust_rt(void *opaque)
319{
40daca54 320 timer_mod(icount_rt_timer,
1979b908 321 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
322 icount_adjust();
323}
324
325static void icount_adjust_vm(void *opaque)
326{
40daca54
AB
327 timer_mod(icount_vm_timer,
328 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
329 get_ticks_per_sec() / 10);
946fb27c
PB
330 icount_adjust();
331}
332
333static int64_t qemu_icount_round(int64_t count)
334{
335 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
336}
337
efab87cf 338static void icount_warp_rt(void)
946fb27c 339{
17a15f1b
PB
340 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
341 * changes from -1 to another value, so the race here is okay.
342 */
343 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
344 return;
345 }
346
17a15f1b 347 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 348 if (runstate_is_running()) {
8eda206e
PD
349 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
350 cpu_get_clock_locked());
8ed961d9
PB
351 int64_t warp_delta;
352
353 warp_delta = clock - vm_clock_warp_start;
354 if (use_icount == 2) {
946fb27c 355 /*
40daca54 356 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
357 * far ahead of real time.
358 */
17a15f1b 359 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 360 int64_t delta = clock - cur_icount;
8ed961d9 361 warp_delta = MIN(warp_delta, delta);
946fb27c 362 }
c96778bb 363 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
364 }
365 vm_clock_warp_start = -1;
17a15f1b 366 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
367
368 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
370 }
946fb27c
PB
371}
372
efab87cf
PD
373static void icount_dummy_timer(void *opaque)
374{
375 (void)opaque;
376}
377
8156be56
PB
378void qtest_clock_warp(int64_t dest)
379{
40daca54 380 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 381 AioContext *aio_context;
8156be56 382 assert(qtest_enabled());
efef88b3 383 aio_context = qemu_get_aio_context();
8156be56 384 while (clock < dest) {
40daca54 385 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 386 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 387
17a15f1b 388 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 389 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
390 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
391
40daca54 392 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 393 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 394 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 395 }
40daca54 396 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
397}
398
40daca54 399void qemu_clock_warp(QEMUClockType type)
946fb27c 400{
ce78d18c 401 int64_t clock;
946fb27c
PB
402 int64_t deadline;
403
404 /*
405 * There are too many global variables to make the "warp" behavior
406 * applicable to other clocks. But a clock argument removes the
407 * need for if statements all over the place.
408 */
40daca54 409 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
410 return;
411 }
412
8bd7f71d
PD
413 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
414 * do not fire, so computing the deadline does not make sense.
415 */
416 if (!runstate_is_running()) {
417 return;
418 }
419
420 /* warp clock deterministically in record/replay mode */
421 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP)) {
422 return;
423 }
424
5045e9d9
VC
425 if (icount_sleep) {
426 /*
427 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
428 * This ensures that the deadline for the timer is computed correctly
429 * below.
430 * This also makes sure that the insn counter is synchronized before
431 * the CPU starts running, in case the CPU is woken by an event other
432 * than the earliest QEMU_CLOCK_VIRTUAL timer.
433 */
efab87cf 434 icount_warp_rt();
5045e9d9
VC
435 timer_del(icount_warp_timer);
436 }
ce78d18c 437 if (!all_cpu_threads_idle()) {
946fb27c
PB
438 return;
439 }
440
8156be56
PB
441 if (qtest_enabled()) {
442 /* When testing, qtest commands advance icount. */
443 return;
444 }
445
ac70aafc 446 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 447 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 448 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 449 if (deadline < 0) {
d7a0f71d
VC
450 static bool notified;
451 if (!icount_sleep && !notified) {
452 error_report("WARNING: icount sleep disabled and no active timers");
453 notified = true;
454 }
ce78d18c 455 return;
ac70aafc
AB
456 }
457
946fb27c
PB
458 if (deadline > 0) {
459 /*
40daca54 460 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
461 * sleep. Otherwise, the CPU might be waiting for a future timer
462 * interrupt to wake it up, but the interrupt never comes because
463 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 464 * QEMU_CLOCK_VIRTUAL.
946fb27c 465 */
5045e9d9
VC
466 if (!icount_sleep) {
467 /*
468 * We never let VCPUs sleep in no sleep icount mode.
469 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
470 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
471 * It is useful when we want a deterministic execution time,
472 * isolated from host latencies.
473 */
474 seqlock_write_lock(&timers_state.vm_clock_seqlock);
475 timers_state.qemu_icount_bias += deadline;
476 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
477 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
478 } else {
479 /*
480 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
481 * "real" time, (related to the time left until the next event) has
482 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
483 * This avoids that the warps are visible externally; for example,
484 * you will not be sending network packets continuously instead of
485 * every 100ms.
486 */
487 seqlock_write_lock(&timers_state.vm_clock_seqlock);
488 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
489 vm_clock_warp_start = clock;
490 }
491 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
492 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 493 }
ac70aafc 494 } else if (deadline == 0) {
40daca54 495 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
496 }
497}
498
d09eae37
FK
499static bool icount_state_needed(void *opaque)
500{
501 return use_icount;
502}
503
504/*
505 * This is a subsection for icount migration.
506 */
507static const VMStateDescription icount_vmstate_timers = {
508 .name = "timer/icount",
509 .version_id = 1,
510 .minimum_version_id = 1,
5cd8cada 511 .needed = icount_state_needed,
d09eae37
FK
512 .fields = (VMStateField[]) {
513 VMSTATE_INT64(qemu_icount_bias, TimersState),
514 VMSTATE_INT64(qemu_icount, TimersState),
515 VMSTATE_END_OF_LIST()
516 }
517};
518
946fb27c
PB
519static const VMStateDescription vmstate_timers = {
520 .name = "timer",
521 .version_id = 2,
522 .minimum_version_id = 1,
35d08458 523 .fields = (VMStateField[]) {
946fb27c
PB
524 VMSTATE_INT64(cpu_ticks_offset, TimersState),
525 VMSTATE_INT64(dummy, TimersState),
526 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
527 VMSTATE_END_OF_LIST()
d09eae37 528 },
5cd8cada
JQ
529 .subsections = (const VMStateDescription*[]) {
530 &icount_vmstate_timers,
531 NULL
946fb27c
PB
532 }
533};
534
2adcc85d
JH
535static void cpu_throttle_thread(void *opaque)
536{
537 CPUState *cpu = opaque;
538 double pct;
539 double throttle_ratio;
540 long sleeptime_ns;
541
542 if (!cpu_throttle_get_percentage()) {
543 return;
544 }
545
546 pct = (double)cpu_throttle_get_percentage()/100;
547 throttle_ratio = pct / (1 - pct);
548 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
549
550 qemu_mutex_unlock_iothread();
551 atomic_set(&cpu->throttle_thread_scheduled, 0);
552 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
553 qemu_mutex_lock_iothread();
554}
555
556static void cpu_throttle_timer_tick(void *opaque)
557{
558 CPUState *cpu;
559 double pct;
560
561 /* Stop the timer if needed */
562 if (!cpu_throttle_get_percentage()) {
563 return;
564 }
565 CPU_FOREACH(cpu) {
566 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
567 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
568 }
569 }
570
571 pct = (double)cpu_throttle_get_percentage()/100;
572 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
573 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
574}
575
576void cpu_throttle_set(int new_throttle_pct)
577{
578 /* Ensure throttle percentage is within valid range */
579 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
580 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
581
582 atomic_set(&throttle_percentage, new_throttle_pct);
583
584 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
585 CPU_THROTTLE_TIMESLICE_NS);
586}
587
588void cpu_throttle_stop(void)
589{
590 atomic_set(&throttle_percentage, 0);
591}
592
593bool cpu_throttle_active(void)
594{
595 return (cpu_throttle_get_percentage() != 0);
596}
597
598int cpu_throttle_get_percentage(void)
599{
600 return atomic_read(&throttle_percentage);
601}
602
4603ea01
PD
603void cpu_ticks_init(void)
604{
605 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
606 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
607 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
608 cpu_throttle_timer_tick, NULL);
4603ea01
PD
609}
610
1ad9580b 611void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 612{
1ad9580b 613 const char *option;
a8bfac37 614 char *rem_str = NULL;
1ad9580b 615
1ad9580b 616 option = qemu_opt_get(opts, "shift");
946fb27c 617 if (!option) {
a8bfac37
ST
618 if (qemu_opt_get(opts, "align") != NULL) {
619 error_setg(errp, "Please specify shift option when using align");
620 }
946fb27c
PB
621 return;
622 }
f1f4b57e
VC
623
624 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
625 if (icount_sleep) {
626 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
efab87cf 627 icount_dummy_timer, NULL);
5045e9d9 628 }
f1f4b57e 629
a8bfac37 630 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
631
632 if (icount_align_option && !icount_sleep) {
633 error_setg(errp, "align=on and sleep=no are incompatible");
634 }
946fb27c 635 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
636 errno = 0;
637 icount_time_shift = strtol(option, &rem_str, 0);
638 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
639 error_setg(errp, "icount: Invalid shift value");
640 }
946fb27c
PB
641 use_icount = 1;
642 return;
a8bfac37
ST
643 } else if (icount_align_option) {
644 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e
VC
645 } else if (!icount_sleep) {
646 error_setg(errp, "shift=auto and sleep=no are incompatible");
946fb27c
PB
647 }
648
649 use_icount = 2;
650
651 /* 125MIPS seems a reasonable initial guess at the guest speed.
652 It will be corrected fairly quickly anyway. */
653 icount_time_shift = 3;
654
655 /* Have both realtime and virtual time triggers for speed adjustment.
656 The realtime trigger catches emulated time passing too slowly,
657 the virtual time trigger catches emulated time passing too fast.
658 Realtime triggers occur even when idle, so use them less frequently
659 than VM triggers. */
bf2a7ddb
PD
660 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
661 icount_adjust_rt, NULL);
40daca54 662 timer_mod(icount_rt_timer,
bf2a7ddb 663 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
664 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
665 icount_adjust_vm, NULL);
666 timer_mod(icount_vm_timer,
667 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
668 get_ticks_per_sec() / 10);
946fb27c
PB
669}
670
296af7c9
BS
671/***********************************************************/
672void hw_error(const char *fmt, ...)
673{
674 va_list ap;
55e5c285 675 CPUState *cpu;
296af7c9
BS
676
677 va_start(ap, fmt);
678 fprintf(stderr, "qemu: hardware error: ");
679 vfprintf(stderr, fmt, ap);
680 fprintf(stderr, "\n");
bdc44640 681 CPU_FOREACH(cpu) {
55e5c285 682 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 683 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
684 }
685 va_end(ap);
686 abort();
687}
688
689void cpu_synchronize_all_states(void)
690{
182735ef 691 CPUState *cpu;
296af7c9 692
bdc44640 693 CPU_FOREACH(cpu) {
182735ef 694 cpu_synchronize_state(cpu);
296af7c9
BS
695 }
696}
697
698void cpu_synchronize_all_post_reset(void)
699{
182735ef 700 CPUState *cpu;
296af7c9 701
bdc44640 702 CPU_FOREACH(cpu) {
182735ef 703 cpu_synchronize_post_reset(cpu);
296af7c9
BS
704 }
705}
706
707void cpu_synchronize_all_post_init(void)
708{
182735ef 709 CPUState *cpu;
296af7c9 710
bdc44640 711 CPU_FOREACH(cpu) {
182735ef 712 cpu_synchronize_post_init(cpu);
296af7c9
BS
713 }
714}
715
56983463 716static int do_vm_stop(RunState state)
296af7c9 717{
56983463
KW
718 int ret = 0;
719
1354869c 720 if (runstate_is_running()) {
296af7c9 721 cpu_disable_ticks();
296af7c9 722 pause_all_vcpus();
f5bbfba1 723 runstate_set(state);
1dfb4dd9 724 vm_state_notify(0, state);
a4e15de9 725 qapi_event_send_stop(&error_abort);
296af7c9 726 }
56983463 727
594a45ce
KW
728 bdrv_drain_all();
729 ret = bdrv_flush_all();
730
56983463 731 return ret;
296af7c9
BS
732}
733
a1fcaa73 734static bool cpu_can_run(CPUState *cpu)
296af7c9 735{
4fdeee7c 736 if (cpu->stop) {
a1fcaa73 737 return false;
0ab07c62 738 }
321bc0b2 739 if (cpu_is_stopped(cpu)) {
a1fcaa73 740 return false;
0ab07c62 741 }
a1fcaa73 742 return true;
296af7c9
BS
743}
744
91325046 745static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 746{
64f6b346 747 gdb_set_stop_cpu(cpu);
8cf71710 748 qemu_system_debug_request();
f324e766 749 cpu->stopped = true;
3c638d06
JK
750}
751
6d9cb73c
JK
752#ifdef CONFIG_LINUX
753static void sigbus_reraise(void)
754{
755 sigset_t set;
756 struct sigaction action;
757
758 memset(&action, 0, sizeof(action));
759 action.sa_handler = SIG_DFL;
760 if (!sigaction(SIGBUS, &action, NULL)) {
761 raise(SIGBUS);
762 sigemptyset(&set);
763 sigaddset(&set, SIGBUS);
764 sigprocmask(SIG_UNBLOCK, &set, NULL);
765 }
766 perror("Failed to re-raise SIGBUS!\n");
767 abort();
768}
769
770static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
771 void *ctx)
772{
773 if (kvm_on_sigbus(siginfo->ssi_code,
774 (void *)(intptr_t)siginfo->ssi_addr)) {
775 sigbus_reraise();
776 }
777}
778
779static void qemu_init_sigbus(void)
780{
781 struct sigaction action;
782
783 memset(&action, 0, sizeof(action));
784 action.sa_flags = SA_SIGINFO;
785 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
786 sigaction(SIGBUS, &action, NULL);
787
788 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
789}
790
290adf38 791static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
792{
793 struct timespec ts = { 0, 0 };
794 siginfo_t siginfo;
795 sigset_t waitset;
796 sigset_t chkset;
797 int r;
798
799 sigemptyset(&waitset);
800 sigaddset(&waitset, SIG_IPI);
801 sigaddset(&waitset, SIGBUS);
802
803 do {
804 r = sigtimedwait(&waitset, &siginfo, &ts);
805 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
806 perror("sigtimedwait");
807 exit(1);
808 }
809
810 switch (r) {
811 case SIGBUS:
290adf38 812 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
813 sigbus_reraise();
814 }
815 break;
816 default:
817 break;
818 }
819
820 r = sigpending(&chkset);
821 if (r == -1) {
822 perror("sigpending");
823 exit(1);
824 }
825 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
826}
827
6d9cb73c
JK
828#else /* !CONFIG_LINUX */
829
830static void qemu_init_sigbus(void)
831{
832}
1ab3c6c0 833
290adf38 834static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
835{
836}
6d9cb73c
JK
837#endif /* !CONFIG_LINUX */
838
296af7c9 839#ifndef _WIN32
55f8d6ac
JK
840static void dummy_signal(int sig)
841{
842}
55f8d6ac 843
13618e05 844static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
845{
846 int r;
847 sigset_t set;
848 struct sigaction sigact;
849
850 memset(&sigact, 0, sizeof(sigact));
851 sigact.sa_handler = dummy_signal;
852 sigaction(SIG_IPI, &sigact, NULL);
853
714bd040
PB
854 pthread_sigmask(SIG_BLOCK, NULL, &set);
855 sigdelset(&set, SIG_IPI);
714bd040 856 sigdelset(&set, SIGBUS);
491d6e80 857 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
858 if (r) {
859 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
860 exit(1);
861 }
862}
863
55f8d6ac 864#else /* _WIN32 */
13618e05 865static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 866{
714bd040
PB
867 abort();
868}
714bd040 869#endif /* _WIN32 */
ff48eb5f 870
b2532d88 871static QemuMutex qemu_global_mutex;
46daff13 872static QemuCond qemu_io_proceeded_cond;
6b49809c 873static unsigned iothread_requesting_mutex;
296af7c9
BS
874
875static QemuThread io_thread;
876
296af7c9
BS
877/* cpu creation */
878static QemuCond qemu_cpu_cond;
879/* system init */
296af7c9 880static QemuCond qemu_pause_cond;
e82bcec2 881static QemuCond qemu_work_cond;
296af7c9 882
d3b12f5d 883void qemu_init_cpu_loop(void)
296af7c9 884{
6d9cb73c 885 qemu_init_sigbus();
ed94592b 886 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
887 qemu_cond_init(&qemu_pause_cond);
888 qemu_cond_init(&qemu_work_cond);
46daff13 889 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 890 qemu_mutex_init(&qemu_global_mutex);
296af7c9 891
b7680cb6 892 qemu_thread_get_self(&io_thread);
296af7c9
BS
893}
894
f100f0b3 895void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
896{
897 struct qemu_work_item wi;
898
60e82579 899 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
900 func(data);
901 return;
902 }
903
904 wi.func = func;
905 wi.data = data;
3c02270d 906 wi.free = false;
376692b9
PB
907
908 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
909 if (cpu->queued_work_first == NULL) {
910 cpu->queued_work_first = &wi;
0ab07c62 911 } else {
c64ca814 912 cpu->queued_work_last->next = &wi;
0ab07c62 913 }
c64ca814 914 cpu->queued_work_last = &wi;
e82bcec2
MT
915 wi.next = NULL;
916 wi.done = false;
376692b9 917 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 918
c08d7424 919 qemu_cpu_kick(cpu);
376692b9 920 while (!atomic_mb_read(&wi.done)) {
4917cf44 921 CPUState *self_cpu = current_cpu;
e82bcec2
MT
922
923 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 924 current_cpu = self_cpu;
e82bcec2
MT
925 }
926}
927
3c02270d
CV
928void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
929{
930 struct qemu_work_item *wi;
931
932 if (qemu_cpu_is_self(cpu)) {
933 func(data);
934 return;
935 }
936
937 wi = g_malloc0(sizeof(struct qemu_work_item));
938 wi->func = func;
939 wi->data = data;
940 wi->free = true;
376692b9
PB
941
942 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
943 if (cpu->queued_work_first == NULL) {
944 cpu->queued_work_first = wi;
945 } else {
946 cpu->queued_work_last->next = wi;
947 }
948 cpu->queued_work_last = wi;
949 wi->next = NULL;
950 wi->done = false;
376692b9 951 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
952
953 qemu_cpu_kick(cpu);
954}
955
6d45b109 956static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
957{
958 struct qemu_work_item *wi;
959
c64ca814 960 if (cpu->queued_work_first == NULL) {
e82bcec2 961 return;
0ab07c62 962 }
e82bcec2 963
376692b9
PB
964 qemu_mutex_lock(&cpu->work_mutex);
965 while (cpu->queued_work_first != NULL) {
966 wi = cpu->queued_work_first;
c64ca814 967 cpu->queued_work_first = wi->next;
376692b9
PB
968 if (!cpu->queued_work_first) {
969 cpu->queued_work_last = NULL;
970 }
971 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 972 wi->func(wi->data);
376692b9 973 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
974 if (wi->free) {
975 g_free(wi);
376692b9
PB
976 } else {
977 atomic_mb_set(&wi->done, true);
3c02270d 978 }
e82bcec2 979 }
376692b9 980 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
981 qemu_cond_broadcast(&qemu_work_cond);
982}
983
509a0d78 984static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 985{
4fdeee7c
AF
986 if (cpu->stop) {
987 cpu->stop = false;
f324e766 988 cpu->stopped = true;
296af7c9
BS
989 qemu_cond_signal(&qemu_pause_cond);
990 }
6d45b109 991 flush_queued_work(cpu);
216fc9a4 992 cpu->thread_kicked = false;
296af7c9
BS
993}
994
d5f8d613 995static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 996{
16400322 997 while (all_cpu_threads_idle()) {
ab33fcda
PB
998 /* Start accounting real time to the virtual clock if the CPUs
999 are idle. */
40daca54 1000 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
d5f8d613 1001 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1002 }
296af7c9 1003
46daff13
PB
1004 while (iothread_requesting_mutex) {
1005 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1006 }
6cabe1f3 1007
bdc44640 1008 CPU_FOREACH(cpu) {
182735ef 1009 qemu_wait_io_event_common(cpu);
6cabe1f3 1010 }
296af7c9
BS
1011}
1012
fd529e8f 1013static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1014{
a98ae1d8 1015 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1016 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1017 }
296af7c9 1018
290adf38 1019 qemu_kvm_eat_signals(cpu);
509a0d78 1020 qemu_wait_io_event_common(cpu);
296af7c9
BS
1021}
1022
7e97cd88 1023static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1024{
48a106bd 1025 CPUState *cpu = arg;
84b4915d 1026 int r;
296af7c9 1027
ab28bd23
PB
1028 rcu_register_thread();
1029
2e7f7a3c 1030 qemu_mutex_lock_iothread();
814e612e 1031 qemu_thread_get_self(cpu->thread);
9f09e18a 1032 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1033 cpu->can_do_io = 1;
4917cf44 1034 current_cpu = cpu;
296af7c9 1035
504134d2 1036 r = kvm_init_vcpu(cpu);
84b4915d
JK
1037 if (r < 0) {
1038 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1039 exit(1);
1040 }
296af7c9 1041
13618e05 1042 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1043
1044 /* signal CPU creation */
61a46217 1045 cpu->created = true;
296af7c9
BS
1046 qemu_cond_signal(&qemu_cpu_cond);
1047
296af7c9 1048 while (1) {
a1fcaa73 1049 if (cpu_can_run(cpu)) {
1458c363 1050 r = kvm_cpu_exec(cpu);
83f338f7 1051 if (r == EXCP_DEBUG) {
91325046 1052 cpu_handle_guest_debug(cpu);
83f338f7 1053 }
0ab07c62 1054 }
fd529e8f 1055 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
1056 }
1057
1058 return NULL;
1059}
1060
c7f0f3b1
AL
1061static void *qemu_dummy_cpu_thread_fn(void *arg)
1062{
1063#ifdef _WIN32
1064 fprintf(stderr, "qtest is not supported under Windows\n");
1065 exit(1);
1066#else
10a9021d 1067 CPUState *cpu = arg;
c7f0f3b1
AL
1068 sigset_t waitset;
1069 int r;
1070
ab28bd23
PB
1071 rcu_register_thread();
1072
c7f0f3b1 1073 qemu_mutex_lock_iothread();
814e612e 1074 qemu_thread_get_self(cpu->thread);
9f09e18a 1075 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1076 cpu->can_do_io = 1;
c7f0f3b1
AL
1077
1078 sigemptyset(&waitset);
1079 sigaddset(&waitset, SIG_IPI);
1080
1081 /* signal CPU creation */
61a46217 1082 cpu->created = true;
c7f0f3b1
AL
1083 qemu_cond_signal(&qemu_cpu_cond);
1084
4917cf44 1085 current_cpu = cpu;
c7f0f3b1 1086 while (1) {
4917cf44 1087 current_cpu = NULL;
c7f0f3b1
AL
1088 qemu_mutex_unlock_iothread();
1089 do {
1090 int sig;
1091 r = sigwait(&waitset, &sig);
1092 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1093 if (r == -1) {
1094 perror("sigwait");
1095 exit(1);
1096 }
1097 qemu_mutex_lock_iothread();
4917cf44 1098 current_cpu = cpu;
509a0d78 1099 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1100 }
1101
1102 return NULL;
1103#endif
1104}
1105
bdb7ca67
JK
1106static void tcg_exec_all(void);
1107
7e97cd88 1108static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1109{
c3586ba7 1110 CPUState *cpu = arg;
296af7c9 1111
ab28bd23
PB
1112 rcu_register_thread();
1113
2e7f7a3c 1114 qemu_mutex_lock_iothread();
814e612e 1115 qemu_thread_get_self(cpu->thread);
296af7c9 1116
38fcbd3f
AF
1117 CPU_FOREACH(cpu) {
1118 cpu->thread_id = qemu_get_thread_id();
1119 cpu->created = true;
626cf8f4 1120 cpu->can_do_io = 1;
38fcbd3f 1121 }
296af7c9
BS
1122 qemu_cond_signal(&qemu_cpu_cond);
1123
fa7d1867 1124 /* wait for initial kick-off after machine start */
c28e399c 1125 while (first_cpu->stopped) {
d5f8d613 1126 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1127
1128 /* process any pending work */
bdc44640 1129 CPU_FOREACH(cpu) {
182735ef 1130 qemu_wait_io_event_common(cpu);
8e564b4e 1131 }
0ab07c62 1132 }
296af7c9 1133
21618b3e 1134 /* process any pending work */
aed807c8 1135 atomic_mb_set(&exit_request, 1);
21618b3e 1136
296af7c9 1137 while (1) {
bdb7ca67 1138 tcg_exec_all();
ac70aafc
AB
1139
1140 if (use_icount) {
40daca54 1141 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1142
1143 if (deadline == 0) {
40daca54 1144 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1145 }
3b2319a3 1146 }
d5f8d613 1147 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
296af7c9
BS
1148 }
1149
1150 return NULL;
1151}
1152
2ff09a40 1153static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1154{
1155#ifndef _WIN32
1156 int err;
1157
e0c38211
PB
1158 if (cpu->thread_kicked) {
1159 return;
9102deda 1160 }
e0c38211 1161 cpu->thread_kicked = true;
814e612e 1162 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1163 if (err) {
1164 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1165 exit(1);
1166 }
1167#else /* _WIN32 */
e0c38211
PB
1168 abort();
1169#endif
1170}
ed9164a3 1171
e0c38211
PB
1172static void qemu_cpu_kick_no_halt(void)
1173{
1174 CPUState *cpu;
1175 /* Ensure whatever caused the exit has reached the CPU threads before
1176 * writing exit_request.
1177 */
1178 atomic_mb_set(&exit_request, 1);
1179 cpu = atomic_mb_read(&tcg_current_cpu);
1180 if (cpu) {
1181 cpu_exit(cpu);
cc015e9a 1182 }
cc015e9a
PB
1183}
1184
c08d7424 1185void qemu_cpu_kick(CPUState *cpu)
296af7c9 1186{
f5c121b8 1187 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1188 if (tcg_enabled()) {
1189 qemu_cpu_kick_no_halt();
1190 } else {
1191 qemu_cpu_kick_thread(cpu);
1192 }
296af7c9
BS
1193}
1194
46d62fac 1195void qemu_cpu_kick_self(void)
296af7c9 1196{
4917cf44 1197 assert(current_cpu);
9102deda 1198 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1199}
1200
60e82579 1201bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1202{
814e612e 1203 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1204}
1205
79e2b9ae 1206bool qemu_in_vcpu_thread(void)
aa723c23 1207{
4917cf44 1208 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1209}
1210
afbe7053
PB
1211static __thread bool iothread_locked = false;
1212
1213bool qemu_mutex_iothread_locked(void)
1214{
1215 return iothread_locked;
1216}
1217
296af7c9
BS
1218void qemu_mutex_lock_iothread(void)
1219{
21618b3e 1220 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1221 /* In the simple case there is no need to bump the VCPU thread out of
1222 * TCG code execution.
1223 */
1224 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1225 !first_cpu || !first_cpu->created) {
296af7c9 1226 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1227 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1228 } else {
1a28cac3 1229 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1230 qemu_cpu_kick_no_halt();
1a28cac3
MT
1231 qemu_mutex_lock(&qemu_global_mutex);
1232 }
6b49809c 1233 atomic_dec(&iothread_requesting_mutex);
46daff13 1234 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1235 }
afbe7053 1236 iothread_locked = true;
296af7c9
BS
1237}
1238
1239void qemu_mutex_unlock_iothread(void)
1240{
afbe7053 1241 iothread_locked = false;
296af7c9
BS
1242 qemu_mutex_unlock(&qemu_global_mutex);
1243}
1244
1245static int all_vcpus_paused(void)
1246{
bdc44640 1247 CPUState *cpu;
296af7c9 1248
bdc44640 1249 CPU_FOREACH(cpu) {
182735ef 1250 if (!cpu->stopped) {
296af7c9 1251 return 0;
0ab07c62 1252 }
296af7c9
BS
1253 }
1254
1255 return 1;
1256}
1257
1258void pause_all_vcpus(void)
1259{
bdc44640 1260 CPUState *cpu;
296af7c9 1261
40daca54 1262 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1263 CPU_FOREACH(cpu) {
182735ef
AF
1264 cpu->stop = true;
1265 qemu_cpu_kick(cpu);
296af7c9
BS
1266 }
1267
aa723c23 1268 if (qemu_in_vcpu_thread()) {
d798e974
JK
1269 cpu_stop_current();
1270 if (!kvm_enabled()) {
bdc44640 1271 CPU_FOREACH(cpu) {
182735ef
AF
1272 cpu->stop = false;
1273 cpu->stopped = true;
d798e974
JK
1274 }
1275 return;
1276 }
1277 }
1278
296af7c9 1279 while (!all_vcpus_paused()) {
be7d6c57 1280 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1281 CPU_FOREACH(cpu) {
182735ef 1282 qemu_cpu_kick(cpu);
296af7c9
BS
1283 }
1284 }
1285}
1286
2993683b
IM
1287void cpu_resume(CPUState *cpu)
1288{
1289 cpu->stop = false;
1290 cpu->stopped = false;
1291 qemu_cpu_kick(cpu);
1292}
1293
296af7c9
BS
1294void resume_all_vcpus(void)
1295{
bdc44640 1296 CPUState *cpu;
296af7c9 1297
40daca54 1298 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1299 CPU_FOREACH(cpu) {
182735ef 1300 cpu_resume(cpu);
296af7c9
BS
1301 }
1302}
1303
4900116e
DDAG
1304/* For temporary buffers for forming a name */
1305#define VCPU_THREAD_NAME_SIZE 16
1306
e5ab30a2 1307static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1308{
4900116e 1309 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1310 static QemuCond *tcg_halt_cond;
1311 static QemuThread *tcg_cpu_thread;
4900116e 1312
296af7c9
BS
1313 /* share a single thread for all cpus with TCG */
1314 if (!tcg_cpu_thread) {
814e612e 1315 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1316 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1317 qemu_cond_init(cpu->halt_cond);
1318 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1319 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1320 cpu->cpu_index);
1321 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1322 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1323#ifdef _WIN32
814e612e 1324 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1325#endif
61a46217 1326 while (!cpu->created) {
18a85728 1327 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1328 }
814e612e 1329 tcg_cpu_thread = cpu->thread;
296af7c9 1330 } else {
814e612e 1331 cpu->thread = tcg_cpu_thread;
f5c121b8 1332 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1333 }
1334}
1335
48a106bd 1336static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1337{
4900116e
DDAG
1338 char thread_name[VCPU_THREAD_NAME_SIZE];
1339
814e612e 1340 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1341 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1342 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1343 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1344 cpu->cpu_index);
1345 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1346 cpu, QEMU_THREAD_JOINABLE);
61a46217 1347 while (!cpu->created) {
18a85728 1348 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1349 }
296af7c9
BS
1350}
1351
10a9021d 1352static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1353{
4900116e
DDAG
1354 char thread_name[VCPU_THREAD_NAME_SIZE];
1355
814e612e 1356 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1357 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1358 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1359 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1360 cpu->cpu_index);
1361 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1362 QEMU_THREAD_JOINABLE);
61a46217 1363 while (!cpu->created) {
c7f0f3b1
AL
1364 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1365 }
1366}
1367
c643bed9 1368void qemu_init_vcpu(CPUState *cpu)
296af7c9 1369{
ce3960eb
AF
1370 cpu->nr_cores = smp_cores;
1371 cpu->nr_threads = smp_threads;
f324e766 1372 cpu->stopped = true;
56943e8c
PM
1373
1374 if (!cpu->as) {
1375 /* If the target cpu hasn't set up any address spaces itself,
1376 * give it the default one.
1377 */
12ebc9a7 1378 cpu->num_ases = 1;
56943e8c
PM
1379 cpu_address_space_init(cpu, &address_space_memory, 0);
1380 }
1381
0ab07c62 1382 if (kvm_enabled()) {
48a106bd 1383 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1384 } else if (tcg_enabled()) {
e5ab30a2 1385 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1386 } else {
10a9021d 1387 qemu_dummy_start_vcpu(cpu);
0ab07c62 1388 }
296af7c9
BS
1389}
1390
b4a3d965 1391void cpu_stop_current(void)
296af7c9 1392{
4917cf44
AF
1393 if (current_cpu) {
1394 current_cpu->stop = false;
1395 current_cpu->stopped = true;
1396 cpu_exit(current_cpu);
67bb172f 1397 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1398 }
296af7c9
BS
1399}
1400
56983463 1401int vm_stop(RunState state)
296af7c9 1402{
aa723c23 1403 if (qemu_in_vcpu_thread()) {
74892d24 1404 qemu_system_vmstop_request_prepare();
1dfb4dd9 1405 qemu_system_vmstop_request(state);
296af7c9
BS
1406 /*
1407 * FIXME: should not return to device code in case
1408 * vm_stop() has been requested.
1409 */
b4a3d965 1410 cpu_stop_current();
56983463 1411 return 0;
296af7c9 1412 }
56983463
KW
1413
1414 return do_vm_stop(state);
296af7c9
BS
1415}
1416
8a9236f1
LC
1417/* does a state transition even if the VM is already stopped,
1418 current state is forgotten forever */
56983463 1419int vm_stop_force_state(RunState state)
8a9236f1
LC
1420{
1421 if (runstate_is_running()) {
56983463 1422 return vm_stop(state);
8a9236f1
LC
1423 } else {
1424 runstate_set(state);
b2780d32
WC
1425
1426 bdrv_drain_all();
594a45ce
KW
1427 /* Make sure to return an error if the flush in a previous vm_stop()
1428 * failed. */
1429 return bdrv_flush_all();
8a9236f1
LC
1430 }
1431}
1432
8b427044
PD
1433static int64_t tcg_get_icount_limit(void)
1434{
1435 int64_t deadline;
1436
1437 if (replay_mode != REPLAY_MODE_PLAY) {
1438 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1439
1440 /* Maintain prior (possibly buggy) behaviour where if no deadline
1441 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1442 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1443 * nanoseconds.
1444 */
1445 if ((deadline < 0) || (deadline > INT32_MAX)) {
1446 deadline = INT32_MAX;
1447 }
1448
1449 return qemu_icount_round(deadline);
1450 } else {
1451 return replay_get_instructions();
1452 }
1453}
1454
3d57f789 1455static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1456{
1457 int ret;
1458#ifdef CONFIG_PROFILER
1459 int64_t ti;
1460#endif
1461
1462#ifdef CONFIG_PROFILER
1463 ti = profile_getclock();
1464#endif
1465 if (use_icount) {
1466 int64_t count;
1467 int decr;
c96778bb
FK
1468 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1469 + cpu->icount_extra);
28ecfd7a 1470 cpu->icount_decr.u16.low = 0;
efee7340 1471 cpu->icount_extra = 0;
8b427044 1472 count = tcg_get_icount_limit();
c96778bb 1473 timers_state.qemu_icount += count;
296af7c9
BS
1474 decr = (count > 0xffff) ? 0xffff : count;
1475 count -= decr;
28ecfd7a 1476 cpu->icount_decr.u16.low = decr;
efee7340 1477 cpu->icount_extra = count;
296af7c9 1478 }
ea3e9847 1479 ret = cpu_exec(cpu);
296af7c9 1480#ifdef CONFIG_PROFILER
89d5cbdd 1481 tcg_time += profile_getclock() - ti;
296af7c9
BS
1482#endif
1483 if (use_icount) {
1484 /* Fold pending instructions back into the
1485 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1486 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1487 + cpu->icount_extra);
28ecfd7a 1488 cpu->icount_decr.u32 = 0;
efee7340 1489 cpu->icount_extra = 0;
8b427044 1490 replay_account_executed_instructions();
296af7c9
BS
1491 }
1492 return ret;
1493}
1494
bdb7ca67 1495static void tcg_exec_all(void)
296af7c9 1496{
9a36085b
JK
1497 int r;
1498
40daca54
AB
1499 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1500 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1501
0ab07c62 1502 if (next_cpu == NULL) {
296af7c9 1503 next_cpu = first_cpu;
0ab07c62 1504 }
bdc44640 1505 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1506 CPUState *cpu = next_cpu;
296af7c9 1507
40daca54 1508 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1509 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1510
a1fcaa73 1511 if (cpu_can_run(cpu)) {
3d57f789 1512 r = tcg_cpu_exec(cpu);
9a36085b 1513 if (r == EXCP_DEBUG) {
91325046 1514 cpu_handle_guest_debug(cpu);
3c638d06
JK
1515 break;
1516 }
f324e766 1517 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1518 break;
1519 }
1520 }
aed807c8
PB
1521
1522 /* Pairs with smp_wmb in qemu_cpu_kick. */
1523 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1524}
1525
9a78eead 1526void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1527{
1528 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1529#if defined(cpu_list)
1530 cpu_list(f, cpu_fprintf);
262353cb
BS
1531#endif
1532}
de0b36b6
LC
1533
1534CpuInfoList *qmp_query_cpus(Error **errp)
1535{
1536 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1537 CPUState *cpu;
de0b36b6 1538
bdc44640 1539 CPU_FOREACH(cpu) {
de0b36b6 1540 CpuInfoList *info;
182735ef
AF
1541#if defined(TARGET_I386)
1542 X86CPU *x86_cpu = X86_CPU(cpu);
1543 CPUX86State *env = &x86_cpu->env;
1544#elif defined(TARGET_PPC)
1545 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1546 CPUPPCState *env = &ppc_cpu->env;
1547#elif defined(TARGET_SPARC)
1548 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1549 CPUSPARCState *env = &sparc_cpu->env;
1550#elif defined(TARGET_MIPS)
1551 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1552 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1553#elif defined(TARGET_TRICORE)
1554 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1555 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1556#endif
de0b36b6 1557
cb446eca 1558 cpu_synchronize_state(cpu);
de0b36b6
LC
1559
1560 info = g_malloc0(sizeof(*info));
1561 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1562 info->value->CPU = cpu->cpu_index;
182735ef 1563 info->value->current = (cpu == first_cpu);
259186a7 1564 info->value->halted = cpu->halted;
58f88d4b 1565 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1566 info->value->thread_id = cpu->thread_id;
de0b36b6 1567#if defined(TARGET_I386)
86f4b687
EB
1568 info->value->arch = CPU_INFO_ARCH_X86;
1569 info->value->u.x86 = g_new0(CpuInfoX86, 1);
1570 info->value->u.x86->pc = env->eip + env->segs[R_CS].base;
de0b36b6 1571#elif defined(TARGET_PPC)
86f4b687
EB
1572 info->value->arch = CPU_INFO_ARCH_PPC;
1573 info->value->u.ppc = g_new0(CpuInfoPPC, 1);
1574 info->value->u.ppc->nip = env->nip;
de0b36b6 1575#elif defined(TARGET_SPARC)
86f4b687
EB
1576 info->value->arch = CPU_INFO_ARCH_SPARC;
1577 info->value->u.sparc = g_new0(CpuInfoSPARC, 1);
1578 info->value->u.sparc->pc = env->pc;
1579 info->value->u.sparc->npc = env->npc;
de0b36b6 1580#elif defined(TARGET_MIPS)
86f4b687
EB
1581 info->value->arch = CPU_INFO_ARCH_MIPS;
1582 info->value->u.mips = g_new0(CpuInfoMIPS, 1);
1583 info->value->u.mips->PC = env->active_tc.PC;
48e06fe0 1584#elif defined(TARGET_TRICORE)
86f4b687
EB
1585 info->value->arch = CPU_INFO_ARCH_TRICORE;
1586 info->value->u.tricore = g_new0(CpuInfoTricore, 1);
1587 info->value->u.tricore->PC = env->PC;
1588#else
1589 info->value->arch = CPU_INFO_ARCH_OTHER;
1590 info->value->u.other = g_new0(CpuInfoOther, 1);
de0b36b6
LC
1591#endif
1592
1593 /* XXX: waiting for the qapi to support GSList */
1594 if (!cur_item) {
1595 head = cur_item = info;
1596 } else {
1597 cur_item->next = info;
1598 cur_item = info;
1599 }
1600 }
1601
1602 return head;
1603}
0cfd6a9a
LC
1604
1605void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1606 bool has_cpu, int64_t cpu_index, Error **errp)
1607{
1608 FILE *f;
1609 uint32_t l;
55e5c285 1610 CPUState *cpu;
0cfd6a9a 1611 uint8_t buf[1024];
0dc9daf0 1612 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1613
1614 if (!has_cpu) {
1615 cpu_index = 0;
1616 }
1617
151d1322
AF
1618 cpu = qemu_get_cpu(cpu_index);
1619 if (cpu == NULL) {
c6bd8c70
MA
1620 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1621 "a CPU number");
0cfd6a9a
LC
1622 return;
1623 }
1624
1625 f = fopen(filename, "wb");
1626 if (!f) {
618da851 1627 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1628 return;
1629 }
1630
1631 while (size != 0) {
1632 l = sizeof(buf);
1633 if (l > size)
1634 l = size;
2f4d0f59 1635 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1636 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1637 " specified", orig_addr, orig_size);
2f4d0f59
AK
1638 goto exit;
1639 }
0cfd6a9a 1640 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1641 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1642 goto exit;
1643 }
1644 addr += l;
1645 size -= l;
1646 }
1647
1648exit:
1649 fclose(f);
1650}
6d3962bf
LC
1651
1652void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1653 Error **errp)
1654{
1655 FILE *f;
1656 uint32_t l;
1657 uint8_t buf[1024];
1658
1659 f = fopen(filename, "wb");
1660 if (!f) {
618da851 1661 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1662 return;
1663 }
1664
1665 while (size != 0) {
1666 l = sizeof(buf);
1667 if (l > size)
1668 l = size;
eb6282f2 1669 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1670 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1671 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1672 goto exit;
1673 }
1674 addr += l;
1675 size -= l;
1676 }
1677
1678exit:
1679 fclose(f);
1680}
ab49ab5c
LC
1681
1682void qmp_inject_nmi(Error **errp)
1683{
1684#if defined(TARGET_I386)
182735ef
AF
1685 CPUState *cs;
1686
bdc44640 1687 CPU_FOREACH(cs) {
182735ef 1688 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1689
02e51483 1690 if (!cpu->apic_state) {
182735ef 1691 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1692 } else {
02e51483 1693 apic_deliver_nmi(cpu->apic_state);
02c09195 1694 }
ab49ab5c
LC
1695 }
1696#else
9cb805fd 1697 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1698#endif
1699}
27498bef
ST
1700
1701void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1702{
1703 if (!use_icount) {
1704 return;
1705 }
1706
1707 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1708 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1709 if (icount_align_option) {
1710 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1711 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1712 } else {
1713 cpu_fprintf(f, "Max guest delay NA\n");
1714 cpu_fprintf(f, "Max guest advance NA\n");
1715 }
1716}