]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
machine: Use type_init() to register machine classes
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
296af7c9 27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
022c62cb 32#include "exec/gdbstub.h"
9c17d615
PB
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
de0b36b6 35#include "qmp-commands.h"
296af7c9 36
1de7afc9 37#include "qemu/thread.h"
9c17d615
PB
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
1de7afc9
PB
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
cb365646 42#include "qemu/seqlock.h"
a4e15de9 43#include "qapi-event.h"
9cb805fd 44#include "hw/nmi.h"
8b427044 45#include "sysemu/replay.h"
0ff0fc19
JK
46
47#ifndef _WIN32
1de7afc9 48#include "qemu/compatfd.h"
0ff0fc19 49#endif
296af7c9 50
6d9cb73c
JK
51#ifdef CONFIG_LINUX
52
53#include <sys/prctl.h>
54
c0532a76
MT
55#ifndef PR_MCE_KILL
56#define PR_MCE_KILL 33
57#endif
58
6d9cb73c
JK
59#ifndef PR_MCE_KILL_SET
60#define PR_MCE_KILL_SET 1
61#endif
62
63#ifndef PR_MCE_KILL_EARLY
64#define PR_MCE_KILL_EARLY 1
65#endif
66
67#endif /* CONFIG_LINUX */
68
182735ef 69static CPUState *next_cpu;
27498bef
ST
70int64_t max_delay;
71int64_t max_advance;
296af7c9 72
2adcc85d
JH
73/* vcpu throttling controls */
74static QEMUTimer *throttle_timer;
75static unsigned int throttle_percentage;
76
77#define CPU_THROTTLE_PCT_MIN 1
78#define CPU_THROTTLE_PCT_MAX 99
79#define CPU_THROTTLE_TIMESLICE_NS 10000000
80
321bc0b2
TC
81bool cpu_is_stopped(CPUState *cpu)
82{
83 return cpu->stopped || !runstate_is_running();
84}
85
a98ae1d8 86static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 87{
c64ca814 88 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
89 return false;
90 }
321bc0b2 91 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
92 return true;
93 }
8c2e1b00 94 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 95 kvm_halt_in_kernel()) {
ac873f1e
PM
96 return false;
97 }
98 return true;
99}
100
101static bool all_cpu_threads_idle(void)
102{
182735ef 103 CPUState *cpu;
ac873f1e 104
bdc44640 105 CPU_FOREACH(cpu) {
182735ef 106 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
107 return false;
108 }
109 }
110 return true;
111}
112
946fb27c
PB
113/***********************************************************/
114/* guest cycle counter */
115
a3270e19
PB
116/* Protected by TimersState seqlock */
117
5045e9d9 118static bool icount_sleep = true;
71468395 119static int64_t vm_clock_warp_start = -1;
946fb27c
PB
120/* Conversion factor from emulated instructions to virtual clock ticks. */
121static int icount_time_shift;
122/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
123#define MAX_ICOUNT_SHIFT 10
a3270e19 124
946fb27c
PB
125static QEMUTimer *icount_rt_timer;
126static QEMUTimer *icount_vm_timer;
127static QEMUTimer *icount_warp_timer;
946fb27c
PB
128
129typedef struct TimersState {
cb365646 130 /* Protected by BQL. */
946fb27c
PB
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
cb365646
LPF
133
134 /* cpu_clock_offset can be read out of BQL, so protect it with
135 * this lock.
136 */
137 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
138 int64_t cpu_clock_offset;
139 int32_t cpu_ticks_enabled;
140 int64_t dummy;
c96778bb
FK
141
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias;
144 /* Only written by TCG thread */
145 int64_t qemu_icount;
946fb27c
PB
146} TimersState;
147
d9cd4007 148static TimersState timers_state;
946fb27c 149
2a62914b 150int64_t cpu_get_icount_raw(void)
946fb27c
PB
151{
152 int64_t icount;
4917cf44 153 CPUState *cpu = current_cpu;
946fb27c 154
c96778bb 155 icount = timers_state.qemu_icount;
4917cf44 156 if (cpu) {
414b15c9 157 if (!cpu->can_do_io) {
2a62914b
PD
158 fprintf(stderr, "Bad icount read\n");
159 exit(1);
946fb27c 160 }
28ecfd7a 161 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 162 }
2a62914b
PD
163 return icount;
164}
165
166/* Return the virtual CPU time, based on the instruction counter. */
167static int64_t cpu_get_icount_locked(void)
168{
169 int64_t icount = cpu_get_icount_raw();
3f031313 170 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
171}
172
17a15f1b
PB
173int64_t cpu_get_icount(void)
174{
175 int64_t icount;
176 unsigned start;
177
178 do {
179 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
180 icount = cpu_get_icount_locked();
181 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
182
183 return icount;
184}
185
3f031313
FK
186int64_t cpu_icount_to_ns(int64_t icount)
187{
188 return icount << icount_time_shift;
189}
190
946fb27c 191/* return the host CPU cycle counter and handle stop/restart */
cb365646 192/* Caller must hold the BQL */
946fb27c
PB
193int64_t cpu_get_ticks(void)
194{
5f3e3101
PB
195 int64_t ticks;
196
946fb27c
PB
197 if (use_icount) {
198 return cpu_get_icount();
199 }
5f3e3101
PB
200
201 ticks = timers_state.cpu_ticks_offset;
202 if (timers_state.cpu_ticks_enabled) {
4a7428c5 203 ticks += cpu_get_host_ticks();
5f3e3101
PB
204 }
205
206 if (timers_state.cpu_ticks_prev > ticks) {
207 /* Note: non increasing ticks may happen if the host uses
208 software suspend */
209 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
210 ticks = timers_state.cpu_ticks_prev;
946fb27c 211 }
5f3e3101
PB
212
213 timers_state.cpu_ticks_prev = ticks;
214 return ticks;
946fb27c
PB
215}
216
cb365646 217static int64_t cpu_get_clock_locked(void)
946fb27c 218{
5f3e3101 219 int64_t ticks;
cb365646 220
5f3e3101
PB
221 ticks = timers_state.cpu_clock_offset;
222 if (timers_state.cpu_ticks_enabled) {
223 ticks += get_clock();
946fb27c 224 }
cb365646 225
5f3e3101 226 return ticks;
cb365646
LPF
227}
228
229/* return the host CPU monotonic timer and handle stop/restart */
230int64_t cpu_get_clock(void)
231{
232 int64_t ti;
233 unsigned start;
234
235 do {
236 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
237 ti = cpu_get_clock_locked();
238 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
239
240 return ti;
946fb27c
PB
241}
242
cb365646
LPF
243/* enable cpu_get_ticks()
244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
245 */
946fb27c
PB
246void cpu_enable_ticks(void)
247{
cb365646
LPF
248 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
249 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 250 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 251 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
252 timers_state.cpu_clock_offset -= get_clock();
253 timers_state.cpu_ticks_enabled = 1;
254 }
cb365646 255 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
256}
257
258/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
259 * cpu_get_ticks() after that.
260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
261 */
946fb27c
PB
262void cpu_disable_ticks(void)
263{
cb365646
LPF
264 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
265 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 266 if (timers_state.cpu_ticks_enabled) {
4a7428c5 267 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 268 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
269 timers_state.cpu_ticks_enabled = 0;
270 }
cb365646 271 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
272}
273
274/* Correlation between real and virtual time is always going to be
275 fairly approximate, so ignore small variation.
276 When the guest is idle real and virtual time will be aligned in
277 the IO wait loop. */
278#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
279
280static void icount_adjust(void)
281{
282 int64_t cur_time;
283 int64_t cur_icount;
284 int64_t delta;
a3270e19
PB
285
286 /* Protected by TimersState mutex. */
946fb27c 287 static int64_t last_delta;
468cc7cf 288
946fb27c
PB
289 /* If the VM is not running, then do nothing. */
290 if (!runstate_is_running()) {
291 return;
292 }
468cc7cf 293
17a15f1b
PB
294 seqlock_write_lock(&timers_state.vm_clock_seqlock);
295 cur_time = cpu_get_clock_locked();
296 cur_icount = cpu_get_icount_locked();
468cc7cf 297
946fb27c
PB
298 delta = cur_icount - cur_time;
299 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
300 if (delta > 0
301 && last_delta + ICOUNT_WOBBLE < delta * 2
302 && icount_time_shift > 0) {
303 /* The guest is getting too far ahead. Slow time down. */
304 icount_time_shift--;
305 }
306 if (delta < 0
307 && last_delta - ICOUNT_WOBBLE > delta * 2
308 && icount_time_shift < MAX_ICOUNT_SHIFT) {
309 /* The guest is getting too far behind. Speed time up. */
310 icount_time_shift++;
311 }
312 last_delta = delta;
c96778bb
FK
313 timers_state.qemu_icount_bias = cur_icount
314 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 315 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
316}
317
318static void icount_adjust_rt(void *opaque)
319{
40daca54 320 timer_mod(icount_rt_timer,
1979b908 321 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
322 icount_adjust();
323}
324
325static void icount_adjust_vm(void *opaque)
326{
40daca54
AB
327 timer_mod(icount_vm_timer,
328 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
329 get_ticks_per_sec() / 10);
946fb27c
PB
330 icount_adjust();
331}
332
333static int64_t qemu_icount_round(int64_t count)
334{
335 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
336}
337
efab87cf 338static void icount_warp_rt(void)
946fb27c 339{
17a15f1b
PB
340 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
341 * changes from -1 to another value, so the race here is okay.
342 */
343 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
344 return;
345 }
346
17a15f1b 347 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 348 if (runstate_is_running()) {
8eda206e
PD
349 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
350 cpu_get_clock_locked());
8ed961d9
PB
351 int64_t warp_delta;
352
353 warp_delta = clock - vm_clock_warp_start;
354 if (use_icount == 2) {
946fb27c 355 /*
40daca54 356 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
357 * far ahead of real time.
358 */
17a15f1b 359 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 360 int64_t delta = clock - cur_icount;
8ed961d9 361 warp_delta = MIN(warp_delta, delta);
946fb27c 362 }
c96778bb 363 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
364 }
365 vm_clock_warp_start = -1;
17a15f1b 366 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
367
368 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
370 }
946fb27c
PB
371}
372
e76d1798 373static void icount_timer_cb(void *opaque)
efab87cf 374{
e76d1798
PD
375 /* No need for a checkpoint because the timer already synchronizes
376 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
377 */
378 icount_warp_rt();
efab87cf
PD
379}
380
8156be56
PB
381void qtest_clock_warp(int64_t dest)
382{
40daca54 383 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 384 AioContext *aio_context;
8156be56 385 assert(qtest_enabled());
efef88b3 386 aio_context = qemu_get_aio_context();
8156be56 387 while (clock < dest) {
40daca54 388 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 389 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 390
17a15f1b 391 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 392 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
393 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
394
40daca54 395 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 396 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 397 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 398 }
40daca54 399 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
400}
401
e76d1798 402void qemu_start_warp_timer(void)
946fb27c 403{
ce78d18c 404 int64_t clock;
946fb27c
PB
405 int64_t deadline;
406
e76d1798 407 if (!use_icount) {
946fb27c
PB
408 return;
409 }
410
8bd7f71d
PD
411 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
412 * do not fire, so computing the deadline does not make sense.
413 */
414 if (!runstate_is_running()) {
415 return;
416 }
417
418 /* warp clock deterministically in record/replay mode */
e76d1798 419 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
420 return;
421 }
422
ce78d18c 423 if (!all_cpu_threads_idle()) {
946fb27c
PB
424 return;
425 }
426
8156be56
PB
427 if (qtest_enabled()) {
428 /* When testing, qtest commands advance icount. */
e76d1798 429 return;
8156be56
PB
430 }
431
ac70aafc 432 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 433 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 434 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 435 if (deadline < 0) {
d7a0f71d
VC
436 static bool notified;
437 if (!icount_sleep && !notified) {
438 error_report("WARNING: icount sleep disabled and no active timers");
439 notified = true;
440 }
ce78d18c 441 return;
ac70aafc
AB
442 }
443
946fb27c
PB
444 if (deadline > 0) {
445 /*
40daca54 446 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
447 * sleep. Otherwise, the CPU might be waiting for a future timer
448 * interrupt to wake it up, but the interrupt never comes because
449 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 450 * QEMU_CLOCK_VIRTUAL.
946fb27c 451 */
5045e9d9
VC
452 if (!icount_sleep) {
453 /*
454 * We never let VCPUs sleep in no sleep icount mode.
455 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
456 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
457 * It is useful when we want a deterministic execution time,
458 * isolated from host latencies.
459 */
460 seqlock_write_lock(&timers_state.vm_clock_seqlock);
461 timers_state.qemu_icount_bias += deadline;
462 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
463 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
464 } else {
465 /*
466 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
467 * "real" time, (related to the time left until the next event) has
468 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
469 * This avoids that the warps are visible externally; for example,
470 * you will not be sending network packets continuously instead of
471 * every 100ms.
472 */
473 seqlock_write_lock(&timers_state.vm_clock_seqlock);
474 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
475 vm_clock_warp_start = clock;
476 }
477 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
478 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 479 }
ac70aafc 480 } else if (deadline == 0) {
40daca54 481 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
482 }
483}
484
e76d1798
PD
485static void qemu_account_warp_timer(void)
486{
487 if (!use_icount || !icount_sleep) {
488 return;
489 }
490
491 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
492 * do not fire, so computing the deadline does not make sense.
493 */
494 if (!runstate_is_running()) {
495 return;
496 }
497
498 /* warp clock deterministically in record/replay mode */
499 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
500 return;
501 }
502
503 timer_del(icount_warp_timer);
504 icount_warp_rt();
505}
506
d09eae37
FK
507static bool icount_state_needed(void *opaque)
508{
509 return use_icount;
510}
511
512/*
513 * This is a subsection for icount migration.
514 */
515static const VMStateDescription icount_vmstate_timers = {
516 .name = "timer/icount",
517 .version_id = 1,
518 .minimum_version_id = 1,
5cd8cada 519 .needed = icount_state_needed,
d09eae37
FK
520 .fields = (VMStateField[]) {
521 VMSTATE_INT64(qemu_icount_bias, TimersState),
522 VMSTATE_INT64(qemu_icount, TimersState),
523 VMSTATE_END_OF_LIST()
524 }
525};
526
946fb27c
PB
527static const VMStateDescription vmstate_timers = {
528 .name = "timer",
529 .version_id = 2,
530 .minimum_version_id = 1,
35d08458 531 .fields = (VMStateField[]) {
946fb27c
PB
532 VMSTATE_INT64(cpu_ticks_offset, TimersState),
533 VMSTATE_INT64(dummy, TimersState),
534 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
535 VMSTATE_END_OF_LIST()
d09eae37 536 },
5cd8cada
JQ
537 .subsections = (const VMStateDescription*[]) {
538 &icount_vmstate_timers,
539 NULL
946fb27c
PB
540 }
541};
542
2adcc85d
JH
543static void cpu_throttle_thread(void *opaque)
544{
545 CPUState *cpu = opaque;
546 double pct;
547 double throttle_ratio;
548 long sleeptime_ns;
549
550 if (!cpu_throttle_get_percentage()) {
551 return;
552 }
553
554 pct = (double)cpu_throttle_get_percentage()/100;
555 throttle_ratio = pct / (1 - pct);
556 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
557
558 qemu_mutex_unlock_iothread();
559 atomic_set(&cpu->throttle_thread_scheduled, 0);
560 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
561 qemu_mutex_lock_iothread();
562}
563
564static void cpu_throttle_timer_tick(void *opaque)
565{
566 CPUState *cpu;
567 double pct;
568
569 /* Stop the timer if needed */
570 if (!cpu_throttle_get_percentage()) {
571 return;
572 }
573 CPU_FOREACH(cpu) {
574 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
575 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
576 }
577 }
578
579 pct = (double)cpu_throttle_get_percentage()/100;
580 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
581 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
582}
583
584void cpu_throttle_set(int new_throttle_pct)
585{
586 /* Ensure throttle percentage is within valid range */
587 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
588 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
589
590 atomic_set(&throttle_percentage, new_throttle_pct);
591
592 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
593 CPU_THROTTLE_TIMESLICE_NS);
594}
595
596void cpu_throttle_stop(void)
597{
598 atomic_set(&throttle_percentage, 0);
599}
600
601bool cpu_throttle_active(void)
602{
603 return (cpu_throttle_get_percentage() != 0);
604}
605
606int cpu_throttle_get_percentage(void)
607{
608 return atomic_read(&throttle_percentage);
609}
610
4603ea01
PD
611void cpu_ticks_init(void)
612{
613 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
614 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
615 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
616 cpu_throttle_timer_tick, NULL);
4603ea01
PD
617}
618
1ad9580b 619void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 620{
1ad9580b 621 const char *option;
a8bfac37 622 char *rem_str = NULL;
1ad9580b 623
1ad9580b 624 option = qemu_opt_get(opts, "shift");
946fb27c 625 if (!option) {
a8bfac37
ST
626 if (qemu_opt_get(opts, "align") != NULL) {
627 error_setg(errp, "Please specify shift option when using align");
628 }
946fb27c
PB
629 return;
630 }
f1f4b57e
VC
631
632 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
633 if (icount_sleep) {
634 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 635 icount_timer_cb, NULL);
5045e9d9 636 }
f1f4b57e 637
a8bfac37 638 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
639
640 if (icount_align_option && !icount_sleep) {
778d9f9b 641 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 642 }
946fb27c 643 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
644 errno = 0;
645 icount_time_shift = strtol(option, &rem_str, 0);
646 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
647 error_setg(errp, "icount: Invalid shift value");
648 }
946fb27c
PB
649 use_icount = 1;
650 return;
a8bfac37
ST
651 } else if (icount_align_option) {
652 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 653 } else if (!icount_sleep) {
778d9f9b 654 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
655 }
656
657 use_icount = 2;
658
659 /* 125MIPS seems a reasonable initial guess at the guest speed.
660 It will be corrected fairly quickly anyway. */
661 icount_time_shift = 3;
662
663 /* Have both realtime and virtual time triggers for speed adjustment.
664 The realtime trigger catches emulated time passing too slowly,
665 the virtual time trigger catches emulated time passing too fast.
666 Realtime triggers occur even when idle, so use them less frequently
667 than VM triggers. */
bf2a7ddb
PD
668 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
669 icount_adjust_rt, NULL);
40daca54 670 timer_mod(icount_rt_timer,
bf2a7ddb 671 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
672 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
673 icount_adjust_vm, NULL);
674 timer_mod(icount_vm_timer,
675 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
676 get_ticks_per_sec() / 10);
946fb27c
PB
677}
678
296af7c9
BS
679/***********************************************************/
680void hw_error(const char *fmt, ...)
681{
682 va_list ap;
55e5c285 683 CPUState *cpu;
296af7c9
BS
684
685 va_start(ap, fmt);
686 fprintf(stderr, "qemu: hardware error: ");
687 vfprintf(stderr, fmt, ap);
688 fprintf(stderr, "\n");
bdc44640 689 CPU_FOREACH(cpu) {
55e5c285 690 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 691 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
692 }
693 va_end(ap);
694 abort();
695}
696
697void cpu_synchronize_all_states(void)
698{
182735ef 699 CPUState *cpu;
296af7c9 700
bdc44640 701 CPU_FOREACH(cpu) {
182735ef 702 cpu_synchronize_state(cpu);
296af7c9
BS
703 }
704}
705
706void cpu_synchronize_all_post_reset(void)
707{
182735ef 708 CPUState *cpu;
296af7c9 709
bdc44640 710 CPU_FOREACH(cpu) {
182735ef 711 cpu_synchronize_post_reset(cpu);
296af7c9
BS
712 }
713}
714
715void cpu_synchronize_all_post_init(void)
716{
182735ef 717 CPUState *cpu;
296af7c9 718
bdc44640 719 CPU_FOREACH(cpu) {
182735ef 720 cpu_synchronize_post_init(cpu);
296af7c9
BS
721 }
722}
723
56983463 724static int do_vm_stop(RunState state)
296af7c9 725{
56983463
KW
726 int ret = 0;
727
1354869c 728 if (runstate_is_running()) {
296af7c9 729 cpu_disable_ticks();
296af7c9 730 pause_all_vcpus();
f5bbfba1 731 runstate_set(state);
1dfb4dd9 732 vm_state_notify(0, state);
a4e15de9 733 qapi_event_send_stop(&error_abort);
296af7c9 734 }
56983463 735
594a45ce
KW
736 bdrv_drain_all();
737 ret = bdrv_flush_all();
738
56983463 739 return ret;
296af7c9
BS
740}
741
a1fcaa73 742static bool cpu_can_run(CPUState *cpu)
296af7c9 743{
4fdeee7c 744 if (cpu->stop) {
a1fcaa73 745 return false;
0ab07c62 746 }
321bc0b2 747 if (cpu_is_stopped(cpu)) {
a1fcaa73 748 return false;
0ab07c62 749 }
a1fcaa73 750 return true;
296af7c9
BS
751}
752
91325046 753static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 754{
64f6b346 755 gdb_set_stop_cpu(cpu);
8cf71710 756 qemu_system_debug_request();
f324e766 757 cpu->stopped = true;
3c638d06
JK
758}
759
6d9cb73c
JK
760#ifdef CONFIG_LINUX
761static void sigbus_reraise(void)
762{
763 sigset_t set;
764 struct sigaction action;
765
766 memset(&action, 0, sizeof(action));
767 action.sa_handler = SIG_DFL;
768 if (!sigaction(SIGBUS, &action, NULL)) {
769 raise(SIGBUS);
770 sigemptyset(&set);
771 sigaddset(&set, SIGBUS);
772 sigprocmask(SIG_UNBLOCK, &set, NULL);
773 }
774 perror("Failed to re-raise SIGBUS!\n");
775 abort();
776}
777
778static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
779 void *ctx)
780{
781 if (kvm_on_sigbus(siginfo->ssi_code,
782 (void *)(intptr_t)siginfo->ssi_addr)) {
783 sigbus_reraise();
784 }
785}
786
787static void qemu_init_sigbus(void)
788{
789 struct sigaction action;
790
791 memset(&action, 0, sizeof(action));
792 action.sa_flags = SA_SIGINFO;
793 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
794 sigaction(SIGBUS, &action, NULL);
795
796 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
797}
798
290adf38 799static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
800{
801 struct timespec ts = { 0, 0 };
802 siginfo_t siginfo;
803 sigset_t waitset;
804 sigset_t chkset;
805 int r;
806
807 sigemptyset(&waitset);
808 sigaddset(&waitset, SIG_IPI);
809 sigaddset(&waitset, SIGBUS);
810
811 do {
812 r = sigtimedwait(&waitset, &siginfo, &ts);
813 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
814 perror("sigtimedwait");
815 exit(1);
816 }
817
818 switch (r) {
819 case SIGBUS:
290adf38 820 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
821 sigbus_reraise();
822 }
823 break;
824 default:
825 break;
826 }
827
828 r = sigpending(&chkset);
829 if (r == -1) {
830 perror("sigpending");
831 exit(1);
832 }
833 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
834}
835
6d9cb73c
JK
836#else /* !CONFIG_LINUX */
837
838static void qemu_init_sigbus(void)
839{
840}
1ab3c6c0 841
290adf38 842static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
843{
844}
6d9cb73c
JK
845#endif /* !CONFIG_LINUX */
846
296af7c9 847#ifndef _WIN32
55f8d6ac
JK
848static void dummy_signal(int sig)
849{
850}
55f8d6ac 851
13618e05 852static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
853{
854 int r;
855 sigset_t set;
856 struct sigaction sigact;
857
858 memset(&sigact, 0, sizeof(sigact));
859 sigact.sa_handler = dummy_signal;
860 sigaction(SIG_IPI, &sigact, NULL);
861
714bd040
PB
862 pthread_sigmask(SIG_BLOCK, NULL, &set);
863 sigdelset(&set, SIG_IPI);
714bd040 864 sigdelset(&set, SIGBUS);
491d6e80 865 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
866 if (r) {
867 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
868 exit(1);
869 }
870}
871
55f8d6ac 872#else /* _WIN32 */
13618e05 873static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 874{
714bd040
PB
875 abort();
876}
714bd040 877#endif /* _WIN32 */
ff48eb5f 878
b2532d88 879static QemuMutex qemu_global_mutex;
46daff13 880static QemuCond qemu_io_proceeded_cond;
6b49809c 881static unsigned iothread_requesting_mutex;
296af7c9
BS
882
883static QemuThread io_thread;
884
296af7c9
BS
885/* cpu creation */
886static QemuCond qemu_cpu_cond;
887/* system init */
296af7c9 888static QemuCond qemu_pause_cond;
e82bcec2 889static QemuCond qemu_work_cond;
296af7c9 890
d3b12f5d 891void qemu_init_cpu_loop(void)
296af7c9 892{
6d9cb73c 893 qemu_init_sigbus();
ed94592b 894 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
895 qemu_cond_init(&qemu_pause_cond);
896 qemu_cond_init(&qemu_work_cond);
46daff13 897 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 898 qemu_mutex_init(&qemu_global_mutex);
296af7c9 899
b7680cb6 900 qemu_thread_get_self(&io_thread);
296af7c9
BS
901}
902
f100f0b3 903void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
904{
905 struct qemu_work_item wi;
906
60e82579 907 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
908 func(data);
909 return;
910 }
911
912 wi.func = func;
913 wi.data = data;
3c02270d 914 wi.free = false;
376692b9
PB
915
916 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
917 if (cpu->queued_work_first == NULL) {
918 cpu->queued_work_first = &wi;
0ab07c62 919 } else {
c64ca814 920 cpu->queued_work_last->next = &wi;
0ab07c62 921 }
c64ca814 922 cpu->queued_work_last = &wi;
e82bcec2
MT
923 wi.next = NULL;
924 wi.done = false;
376692b9 925 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 926
c08d7424 927 qemu_cpu_kick(cpu);
376692b9 928 while (!atomic_mb_read(&wi.done)) {
4917cf44 929 CPUState *self_cpu = current_cpu;
e82bcec2
MT
930
931 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 932 current_cpu = self_cpu;
e82bcec2
MT
933 }
934}
935
3c02270d
CV
936void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
937{
938 struct qemu_work_item *wi;
939
940 if (qemu_cpu_is_self(cpu)) {
941 func(data);
942 return;
943 }
944
945 wi = g_malloc0(sizeof(struct qemu_work_item));
946 wi->func = func;
947 wi->data = data;
948 wi->free = true;
376692b9
PB
949
950 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
951 if (cpu->queued_work_first == NULL) {
952 cpu->queued_work_first = wi;
953 } else {
954 cpu->queued_work_last->next = wi;
955 }
956 cpu->queued_work_last = wi;
957 wi->next = NULL;
958 wi->done = false;
376692b9 959 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
960
961 qemu_cpu_kick(cpu);
962}
963
6d45b109 964static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
965{
966 struct qemu_work_item *wi;
967
c64ca814 968 if (cpu->queued_work_first == NULL) {
e82bcec2 969 return;
0ab07c62 970 }
e82bcec2 971
376692b9
PB
972 qemu_mutex_lock(&cpu->work_mutex);
973 while (cpu->queued_work_first != NULL) {
974 wi = cpu->queued_work_first;
c64ca814 975 cpu->queued_work_first = wi->next;
376692b9
PB
976 if (!cpu->queued_work_first) {
977 cpu->queued_work_last = NULL;
978 }
979 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 980 wi->func(wi->data);
376692b9 981 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
982 if (wi->free) {
983 g_free(wi);
376692b9
PB
984 } else {
985 atomic_mb_set(&wi->done, true);
3c02270d 986 }
e82bcec2 987 }
376692b9 988 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
989 qemu_cond_broadcast(&qemu_work_cond);
990}
991
509a0d78 992static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 993{
4fdeee7c
AF
994 if (cpu->stop) {
995 cpu->stop = false;
f324e766 996 cpu->stopped = true;
96bce683 997 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 998 }
6d45b109 999 flush_queued_work(cpu);
216fc9a4 1000 cpu->thread_kicked = false;
296af7c9
BS
1001}
1002
d5f8d613 1003static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1004{
16400322 1005 while (all_cpu_threads_idle()) {
d5f8d613 1006 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1007 }
296af7c9 1008
46daff13
PB
1009 while (iothread_requesting_mutex) {
1010 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1011 }
6cabe1f3 1012
bdc44640 1013 CPU_FOREACH(cpu) {
182735ef 1014 qemu_wait_io_event_common(cpu);
6cabe1f3 1015 }
296af7c9
BS
1016}
1017
fd529e8f 1018static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1019{
a98ae1d8 1020 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1021 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1022 }
296af7c9 1023
290adf38 1024 qemu_kvm_eat_signals(cpu);
509a0d78 1025 qemu_wait_io_event_common(cpu);
296af7c9
BS
1026}
1027
7e97cd88 1028static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1029{
48a106bd 1030 CPUState *cpu = arg;
84b4915d 1031 int r;
296af7c9 1032
ab28bd23
PB
1033 rcu_register_thread();
1034
2e7f7a3c 1035 qemu_mutex_lock_iothread();
814e612e 1036 qemu_thread_get_self(cpu->thread);
9f09e18a 1037 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1038 cpu->can_do_io = 1;
4917cf44 1039 current_cpu = cpu;
296af7c9 1040
504134d2 1041 r = kvm_init_vcpu(cpu);
84b4915d
JK
1042 if (r < 0) {
1043 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1044 exit(1);
1045 }
296af7c9 1046
13618e05 1047 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1048
1049 /* signal CPU creation */
61a46217 1050 cpu->created = true;
296af7c9
BS
1051 qemu_cond_signal(&qemu_cpu_cond);
1052
296af7c9 1053 while (1) {
a1fcaa73 1054 if (cpu_can_run(cpu)) {
1458c363 1055 r = kvm_cpu_exec(cpu);
83f338f7 1056 if (r == EXCP_DEBUG) {
91325046 1057 cpu_handle_guest_debug(cpu);
83f338f7 1058 }
0ab07c62 1059 }
fd529e8f 1060 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
1061 }
1062
1063 return NULL;
1064}
1065
c7f0f3b1
AL
1066static void *qemu_dummy_cpu_thread_fn(void *arg)
1067{
1068#ifdef _WIN32
1069 fprintf(stderr, "qtest is not supported under Windows\n");
1070 exit(1);
1071#else
10a9021d 1072 CPUState *cpu = arg;
c7f0f3b1
AL
1073 sigset_t waitset;
1074 int r;
1075
ab28bd23
PB
1076 rcu_register_thread();
1077
c7f0f3b1 1078 qemu_mutex_lock_iothread();
814e612e 1079 qemu_thread_get_self(cpu->thread);
9f09e18a 1080 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1081 cpu->can_do_io = 1;
c7f0f3b1
AL
1082
1083 sigemptyset(&waitset);
1084 sigaddset(&waitset, SIG_IPI);
1085
1086 /* signal CPU creation */
61a46217 1087 cpu->created = true;
c7f0f3b1
AL
1088 qemu_cond_signal(&qemu_cpu_cond);
1089
4917cf44 1090 current_cpu = cpu;
c7f0f3b1 1091 while (1) {
4917cf44 1092 current_cpu = NULL;
c7f0f3b1
AL
1093 qemu_mutex_unlock_iothread();
1094 do {
1095 int sig;
1096 r = sigwait(&waitset, &sig);
1097 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1098 if (r == -1) {
1099 perror("sigwait");
1100 exit(1);
1101 }
1102 qemu_mutex_lock_iothread();
4917cf44 1103 current_cpu = cpu;
509a0d78 1104 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1105 }
1106
1107 return NULL;
1108#endif
1109}
1110
bdb7ca67
JK
1111static void tcg_exec_all(void);
1112
7e97cd88 1113static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1114{
c3586ba7 1115 CPUState *cpu = arg;
296af7c9 1116
ab28bd23
PB
1117 rcu_register_thread();
1118
2e7f7a3c 1119 qemu_mutex_lock_iothread();
814e612e 1120 qemu_thread_get_self(cpu->thread);
296af7c9 1121
38fcbd3f
AF
1122 CPU_FOREACH(cpu) {
1123 cpu->thread_id = qemu_get_thread_id();
1124 cpu->created = true;
626cf8f4 1125 cpu->can_do_io = 1;
38fcbd3f 1126 }
296af7c9
BS
1127 qemu_cond_signal(&qemu_cpu_cond);
1128
fa7d1867 1129 /* wait for initial kick-off after machine start */
c28e399c 1130 while (first_cpu->stopped) {
d5f8d613 1131 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1132
1133 /* process any pending work */
bdc44640 1134 CPU_FOREACH(cpu) {
182735ef 1135 qemu_wait_io_event_common(cpu);
8e564b4e 1136 }
0ab07c62 1137 }
296af7c9 1138
21618b3e 1139 /* process any pending work */
aed807c8 1140 atomic_mb_set(&exit_request, 1);
21618b3e 1141
296af7c9 1142 while (1) {
bdb7ca67 1143 tcg_exec_all();
ac70aafc
AB
1144
1145 if (use_icount) {
40daca54 1146 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1147
1148 if (deadline == 0) {
40daca54 1149 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1150 }
3b2319a3 1151 }
d5f8d613 1152 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
296af7c9
BS
1153 }
1154
1155 return NULL;
1156}
1157
2ff09a40 1158static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1159{
1160#ifndef _WIN32
1161 int err;
1162
e0c38211
PB
1163 if (cpu->thread_kicked) {
1164 return;
9102deda 1165 }
e0c38211 1166 cpu->thread_kicked = true;
814e612e 1167 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1168 if (err) {
1169 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1170 exit(1);
1171 }
1172#else /* _WIN32 */
e0c38211
PB
1173 abort();
1174#endif
1175}
ed9164a3 1176
e0c38211
PB
1177static void qemu_cpu_kick_no_halt(void)
1178{
1179 CPUState *cpu;
1180 /* Ensure whatever caused the exit has reached the CPU threads before
1181 * writing exit_request.
1182 */
1183 atomic_mb_set(&exit_request, 1);
1184 cpu = atomic_mb_read(&tcg_current_cpu);
1185 if (cpu) {
1186 cpu_exit(cpu);
cc015e9a 1187 }
cc015e9a
PB
1188}
1189
c08d7424 1190void qemu_cpu_kick(CPUState *cpu)
296af7c9 1191{
f5c121b8 1192 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1193 if (tcg_enabled()) {
1194 qemu_cpu_kick_no_halt();
1195 } else {
1196 qemu_cpu_kick_thread(cpu);
1197 }
296af7c9
BS
1198}
1199
46d62fac 1200void qemu_cpu_kick_self(void)
296af7c9 1201{
4917cf44 1202 assert(current_cpu);
9102deda 1203 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1204}
1205
60e82579 1206bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1207{
814e612e 1208 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1209}
1210
79e2b9ae 1211bool qemu_in_vcpu_thread(void)
aa723c23 1212{
4917cf44 1213 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1214}
1215
afbe7053
PB
1216static __thread bool iothread_locked = false;
1217
1218bool qemu_mutex_iothread_locked(void)
1219{
1220 return iothread_locked;
1221}
1222
296af7c9
BS
1223void qemu_mutex_lock_iothread(void)
1224{
21618b3e 1225 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1226 /* In the simple case there is no need to bump the VCPU thread out of
1227 * TCG code execution.
1228 */
1229 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1230 !first_cpu || !first_cpu->created) {
296af7c9 1231 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1232 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1233 } else {
1a28cac3 1234 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1235 qemu_cpu_kick_no_halt();
1a28cac3
MT
1236 qemu_mutex_lock(&qemu_global_mutex);
1237 }
6b49809c 1238 atomic_dec(&iothread_requesting_mutex);
46daff13 1239 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1240 }
afbe7053 1241 iothread_locked = true;
296af7c9
BS
1242}
1243
1244void qemu_mutex_unlock_iothread(void)
1245{
afbe7053 1246 iothread_locked = false;
296af7c9
BS
1247 qemu_mutex_unlock(&qemu_global_mutex);
1248}
1249
1250static int all_vcpus_paused(void)
1251{
bdc44640 1252 CPUState *cpu;
296af7c9 1253
bdc44640 1254 CPU_FOREACH(cpu) {
182735ef 1255 if (!cpu->stopped) {
296af7c9 1256 return 0;
0ab07c62 1257 }
296af7c9
BS
1258 }
1259
1260 return 1;
1261}
1262
1263void pause_all_vcpus(void)
1264{
bdc44640 1265 CPUState *cpu;
296af7c9 1266
40daca54 1267 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1268 CPU_FOREACH(cpu) {
182735ef
AF
1269 cpu->stop = true;
1270 qemu_cpu_kick(cpu);
296af7c9
BS
1271 }
1272
aa723c23 1273 if (qemu_in_vcpu_thread()) {
d798e974
JK
1274 cpu_stop_current();
1275 if (!kvm_enabled()) {
bdc44640 1276 CPU_FOREACH(cpu) {
182735ef
AF
1277 cpu->stop = false;
1278 cpu->stopped = true;
d798e974
JK
1279 }
1280 return;
1281 }
1282 }
1283
296af7c9 1284 while (!all_vcpus_paused()) {
be7d6c57 1285 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1286 CPU_FOREACH(cpu) {
182735ef 1287 qemu_cpu_kick(cpu);
296af7c9
BS
1288 }
1289 }
1290}
1291
2993683b
IM
1292void cpu_resume(CPUState *cpu)
1293{
1294 cpu->stop = false;
1295 cpu->stopped = false;
1296 qemu_cpu_kick(cpu);
1297}
1298
296af7c9
BS
1299void resume_all_vcpus(void)
1300{
bdc44640 1301 CPUState *cpu;
296af7c9 1302
40daca54 1303 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1304 CPU_FOREACH(cpu) {
182735ef 1305 cpu_resume(cpu);
296af7c9
BS
1306 }
1307}
1308
4900116e
DDAG
1309/* For temporary buffers for forming a name */
1310#define VCPU_THREAD_NAME_SIZE 16
1311
e5ab30a2 1312static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1313{
4900116e 1314 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1315 static QemuCond *tcg_halt_cond;
1316 static QemuThread *tcg_cpu_thread;
4900116e 1317
296af7c9
BS
1318 /* share a single thread for all cpus with TCG */
1319 if (!tcg_cpu_thread) {
814e612e 1320 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1321 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1322 qemu_cond_init(cpu->halt_cond);
1323 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1324 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1325 cpu->cpu_index);
1326 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1327 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1328#ifdef _WIN32
814e612e 1329 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1330#endif
61a46217 1331 while (!cpu->created) {
18a85728 1332 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1333 }
814e612e 1334 tcg_cpu_thread = cpu->thread;
296af7c9 1335 } else {
814e612e 1336 cpu->thread = tcg_cpu_thread;
f5c121b8 1337 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1338 }
1339}
1340
48a106bd 1341static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1342{
4900116e
DDAG
1343 char thread_name[VCPU_THREAD_NAME_SIZE];
1344
814e612e 1345 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1346 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1347 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1348 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1349 cpu->cpu_index);
1350 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1351 cpu, QEMU_THREAD_JOINABLE);
61a46217 1352 while (!cpu->created) {
18a85728 1353 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1354 }
296af7c9
BS
1355}
1356
10a9021d 1357static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1358{
4900116e
DDAG
1359 char thread_name[VCPU_THREAD_NAME_SIZE];
1360
814e612e 1361 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1362 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1363 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1364 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1365 cpu->cpu_index);
1366 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1367 QEMU_THREAD_JOINABLE);
61a46217 1368 while (!cpu->created) {
c7f0f3b1
AL
1369 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1370 }
1371}
1372
c643bed9 1373void qemu_init_vcpu(CPUState *cpu)
296af7c9 1374{
ce3960eb
AF
1375 cpu->nr_cores = smp_cores;
1376 cpu->nr_threads = smp_threads;
f324e766 1377 cpu->stopped = true;
56943e8c
PM
1378
1379 if (!cpu->as) {
1380 /* If the target cpu hasn't set up any address spaces itself,
1381 * give it the default one.
1382 */
6731d864
PC
1383 AddressSpace *as = address_space_init_shareable(cpu->memory,
1384 "cpu-memory");
12ebc9a7 1385 cpu->num_ases = 1;
6731d864 1386 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1387 }
1388
0ab07c62 1389 if (kvm_enabled()) {
48a106bd 1390 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1391 } else if (tcg_enabled()) {
e5ab30a2 1392 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1393 } else {
10a9021d 1394 qemu_dummy_start_vcpu(cpu);
0ab07c62 1395 }
296af7c9
BS
1396}
1397
b4a3d965 1398void cpu_stop_current(void)
296af7c9 1399{
4917cf44
AF
1400 if (current_cpu) {
1401 current_cpu->stop = false;
1402 current_cpu->stopped = true;
1403 cpu_exit(current_cpu);
96bce683 1404 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1405 }
296af7c9
BS
1406}
1407
56983463 1408int vm_stop(RunState state)
296af7c9 1409{
aa723c23 1410 if (qemu_in_vcpu_thread()) {
74892d24 1411 qemu_system_vmstop_request_prepare();
1dfb4dd9 1412 qemu_system_vmstop_request(state);
296af7c9
BS
1413 /*
1414 * FIXME: should not return to device code in case
1415 * vm_stop() has been requested.
1416 */
b4a3d965 1417 cpu_stop_current();
56983463 1418 return 0;
296af7c9 1419 }
56983463
KW
1420
1421 return do_vm_stop(state);
296af7c9
BS
1422}
1423
8a9236f1
LC
1424/* does a state transition even if the VM is already stopped,
1425 current state is forgotten forever */
56983463 1426int vm_stop_force_state(RunState state)
8a9236f1
LC
1427{
1428 if (runstate_is_running()) {
56983463 1429 return vm_stop(state);
8a9236f1
LC
1430 } else {
1431 runstate_set(state);
b2780d32
WC
1432
1433 bdrv_drain_all();
594a45ce
KW
1434 /* Make sure to return an error if the flush in a previous vm_stop()
1435 * failed. */
1436 return bdrv_flush_all();
8a9236f1
LC
1437 }
1438}
1439
8b427044
PD
1440static int64_t tcg_get_icount_limit(void)
1441{
1442 int64_t deadline;
1443
1444 if (replay_mode != REPLAY_MODE_PLAY) {
1445 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1446
1447 /* Maintain prior (possibly buggy) behaviour where if no deadline
1448 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1449 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1450 * nanoseconds.
1451 */
1452 if ((deadline < 0) || (deadline > INT32_MAX)) {
1453 deadline = INT32_MAX;
1454 }
1455
1456 return qemu_icount_round(deadline);
1457 } else {
1458 return replay_get_instructions();
1459 }
1460}
1461
3d57f789 1462static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1463{
1464 int ret;
1465#ifdef CONFIG_PROFILER
1466 int64_t ti;
1467#endif
1468
1469#ifdef CONFIG_PROFILER
1470 ti = profile_getclock();
1471#endif
1472 if (use_icount) {
1473 int64_t count;
1474 int decr;
c96778bb
FK
1475 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1476 + cpu->icount_extra);
28ecfd7a 1477 cpu->icount_decr.u16.low = 0;
efee7340 1478 cpu->icount_extra = 0;
8b427044 1479 count = tcg_get_icount_limit();
c96778bb 1480 timers_state.qemu_icount += count;
296af7c9
BS
1481 decr = (count > 0xffff) ? 0xffff : count;
1482 count -= decr;
28ecfd7a 1483 cpu->icount_decr.u16.low = decr;
efee7340 1484 cpu->icount_extra = count;
296af7c9 1485 }
ea3e9847 1486 ret = cpu_exec(cpu);
296af7c9 1487#ifdef CONFIG_PROFILER
89d5cbdd 1488 tcg_time += profile_getclock() - ti;
296af7c9
BS
1489#endif
1490 if (use_icount) {
1491 /* Fold pending instructions back into the
1492 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1493 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1494 + cpu->icount_extra);
28ecfd7a 1495 cpu->icount_decr.u32 = 0;
efee7340 1496 cpu->icount_extra = 0;
8b427044 1497 replay_account_executed_instructions();
296af7c9
BS
1498 }
1499 return ret;
1500}
1501
bdb7ca67 1502static void tcg_exec_all(void)
296af7c9 1503{
9a36085b
JK
1504 int r;
1505
40daca54 1506 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
e76d1798 1507 qemu_account_warp_timer();
ab33fcda 1508
0ab07c62 1509 if (next_cpu == NULL) {
296af7c9 1510 next_cpu = first_cpu;
0ab07c62 1511 }
bdc44640 1512 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1513 CPUState *cpu = next_cpu;
296af7c9 1514
40daca54 1515 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1516 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1517
a1fcaa73 1518 if (cpu_can_run(cpu)) {
3d57f789 1519 r = tcg_cpu_exec(cpu);
9a36085b 1520 if (r == EXCP_DEBUG) {
91325046 1521 cpu_handle_guest_debug(cpu);
3c638d06
JK
1522 break;
1523 }
f324e766 1524 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1525 break;
1526 }
1527 }
aed807c8
PB
1528
1529 /* Pairs with smp_wmb in qemu_cpu_kick. */
1530 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1531}
1532
9a78eead 1533void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1534{
1535 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1536#if defined(cpu_list)
1537 cpu_list(f, cpu_fprintf);
262353cb
BS
1538#endif
1539}
de0b36b6
LC
1540
1541CpuInfoList *qmp_query_cpus(Error **errp)
1542{
1543 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1544 CPUState *cpu;
de0b36b6 1545
bdc44640 1546 CPU_FOREACH(cpu) {
de0b36b6 1547 CpuInfoList *info;
182735ef
AF
1548#if defined(TARGET_I386)
1549 X86CPU *x86_cpu = X86_CPU(cpu);
1550 CPUX86State *env = &x86_cpu->env;
1551#elif defined(TARGET_PPC)
1552 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1553 CPUPPCState *env = &ppc_cpu->env;
1554#elif defined(TARGET_SPARC)
1555 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1556 CPUSPARCState *env = &sparc_cpu->env;
1557#elif defined(TARGET_MIPS)
1558 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1559 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1560#elif defined(TARGET_TRICORE)
1561 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1562 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1563#endif
de0b36b6 1564
cb446eca 1565 cpu_synchronize_state(cpu);
de0b36b6
LC
1566
1567 info = g_malloc0(sizeof(*info));
1568 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1569 info->value->CPU = cpu->cpu_index;
182735ef 1570 info->value->current = (cpu == first_cpu);
259186a7 1571 info->value->halted = cpu->halted;
58f88d4b 1572 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1573 info->value->thread_id = cpu->thread_id;
de0b36b6 1574#if defined(TARGET_I386)
86f4b687 1575 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1576 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1577#elif defined(TARGET_PPC)
86f4b687 1578 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1579 info->value->u.ppc.nip = env->nip;
de0b36b6 1580#elif defined(TARGET_SPARC)
86f4b687 1581 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1582 info->value->u.q_sparc.pc = env->pc;
1583 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1584#elif defined(TARGET_MIPS)
86f4b687 1585 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1586 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1587#elif defined(TARGET_TRICORE)
86f4b687 1588 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1589 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1590#else
1591 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1592#endif
1593
1594 /* XXX: waiting for the qapi to support GSList */
1595 if (!cur_item) {
1596 head = cur_item = info;
1597 } else {
1598 cur_item->next = info;
1599 cur_item = info;
1600 }
1601 }
1602
1603 return head;
1604}
0cfd6a9a
LC
1605
1606void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1607 bool has_cpu, int64_t cpu_index, Error **errp)
1608{
1609 FILE *f;
1610 uint32_t l;
55e5c285 1611 CPUState *cpu;
0cfd6a9a 1612 uint8_t buf[1024];
0dc9daf0 1613 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1614
1615 if (!has_cpu) {
1616 cpu_index = 0;
1617 }
1618
151d1322
AF
1619 cpu = qemu_get_cpu(cpu_index);
1620 if (cpu == NULL) {
c6bd8c70
MA
1621 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1622 "a CPU number");
0cfd6a9a
LC
1623 return;
1624 }
1625
1626 f = fopen(filename, "wb");
1627 if (!f) {
618da851 1628 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1629 return;
1630 }
1631
1632 while (size != 0) {
1633 l = sizeof(buf);
1634 if (l > size)
1635 l = size;
2f4d0f59 1636 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1637 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1638 " specified", orig_addr, orig_size);
2f4d0f59
AK
1639 goto exit;
1640 }
0cfd6a9a 1641 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1642 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1643 goto exit;
1644 }
1645 addr += l;
1646 size -= l;
1647 }
1648
1649exit:
1650 fclose(f);
1651}
6d3962bf
LC
1652
1653void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1654 Error **errp)
1655{
1656 FILE *f;
1657 uint32_t l;
1658 uint8_t buf[1024];
1659
1660 f = fopen(filename, "wb");
1661 if (!f) {
618da851 1662 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1663 return;
1664 }
1665
1666 while (size != 0) {
1667 l = sizeof(buf);
1668 if (l > size)
1669 l = size;
eb6282f2 1670 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1671 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1672 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1673 goto exit;
1674 }
1675 addr += l;
1676 size -= l;
1677 }
1678
1679exit:
1680 fclose(f);
1681}
ab49ab5c
LC
1682
1683void qmp_inject_nmi(Error **errp)
1684{
1685#if defined(TARGET_I386)
182735ef
AF
1686 CPUState *cs;
1687
bdc44640 1688 CPU_FOREACH(cs) {
182735ef 1689 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1690
02e51483 1691 if (!cpu->apic_state) {
182735ef 1692 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1693 } else {
02e51483 1694 apic_deliver_nmi(cpu->apic_state);
02c09195 1695 }
ab49ab5c
LC
1696 }
1697#else
9cb805fd 1698 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1699#endif
1700}
27498bef
ST
1701
1702void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1703{
1704 if (!use_icount) {
1705 return;
1706 }
1707
1708 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1709 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1710 if (icount_align_option) {
1711 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1712 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1713 } else {
1714 cpu_fprintf(f, "Max guest delay NA\n");
1715 cpu_fprintf(f, "Max guest advance NA\n");
1716 }
1717}