]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
char: drop data written to a disconnected pty
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615 35#include "sysemu/dma.h"
b3946626 36#include "sysemu/hw_accel.h"
9c17d615 37#include "sysemu/kvm.h"
b0cb0a66 38#include "sysemu/hax.h"
de0b36b6 39#include "qmp-commands.h"
63c91552 40#include "exec/exec-all.h"
296af7c9 41
1de7afc9 42#include "qemu/thread.h"
9c17d615
PB
43#include "sysemu/cpus.h"
44#include "sysemu/qtest.h"
1de7afc9
PB
45#include "qemu/main-loop.h"
46#include "qemu/bitmap.h"
cb365646 47#include "qemu/seqlock.h"
a4e15de9 48#include "qapi-event.h"
9cb805fd 49#include "hw/nmi.h"
8b427044 50#include "sysemu/replay.h"
0ff0fc19
JK
51
52#ifndef _WIN32
1de7afc9 53#include "qemu/compatfd.h"
0ff0fc19 54#endif
296af7c9 55
6d9cb73c
JK
56#ifdef CONFIG_LINUX
57
58#include <sys/prctl.h>
59
c0532a76
MT
60#ifndef PR_MCE_KILL
61#define PR_MCE_KILL 33
62#endif
63
6d9cb73c
JK
64#ifndef PR_MCE_KILL_SET
65#define PR_MCE_KILL_SET 1
66#endif
67
68#ifndef PR_MCE_KILL_EARLY
69#define PR_MCE_KILL_EARLY 1
70#endif
71
72#endif /* CONFIG_LINUX */
73
27498bef
ST
74int64_t max_delay;
75int64_t max_advance;
296af7c9 76
2adcc85d
JH
77/* vcpu throttling controls */
78static QEMUTimer *throttle_timer;
79static unsigned int throttle_percentage;
80
81#define CPU_THROTTLE_PCT_MIN 1
82#define CPU_THROTTLE_PCT_MAX 99
83#define CPU_THROTTLE_TIMESLICE_NS 10000000
84
321bc0b2
TC
85bool cpu_is_stopped(CPUState *cpu)
86{
87 return cpu->stopped || !runstate_is_running();
88}
89
a98ae1d8 90static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 91{
c64ca814 92 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
93 return false;
94 }
321bc0b2 95 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
96 return true;
97 }
8c2e1b00 98 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 99 kvm_halt_in_kernel()) {
ac873f1e
PM
100 return false;
101 }
102 return true;
103}
104
105static bool all_cpu_threads_idle(void)
106{
182735ef 107 CPUState *cpu;
ac873f1e 108
bdc44640 109 CPU_FOREACH(cpu) {
182735ef 110 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
111 return false;
112 }
113 }
114 return true;
115}
116
946fb27c
PB
117/***********************************************************/
118/* guest cycle counter */
119
a3270e19
PB
120/* Protected by TimersState seqlock */
121
5045e9d9 122static bool icount_sleep = true;
71468395 123static int64_t vm_clock_warp_start = -1;
946fb27c
PB
124/* Conversion factor from emulated instructions to virtual clock ticks. */
125static int icount_time_shift;
126/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127#define MAX_ICOUNT_SHIFT 10
a3270e19 128
946fb27c
PB
129static QEMUTimer *icount_rt_timer;
130static QEMUTimer *icount_vm_timer;
131static QEMUTimer *icount_warp_timer;
946fb27c
PB
132
133typedef struct TimersState {
cb365646 134 /* Protected by BQL. */
946fb27c
PB
135 int64_t cpu_ticks_prev;
136 int64_t cpu_ticks_offset;
cb365646
LPF
137
138 /* cpu_clock_offset can be read out of BQL, so protect it with
139 * this lock.
140 */
141 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
142 int64_t cpu_clock_offset;
143 int32_t cpu_ticks_enabled;
144 int64_t dummy;
c96778bb
FK
145
146 /* Compensate for varying guest execution speed. */
147 int64_t qemu_icount_bias;
148 /* Only written by TCG thread */
149 int64_t qemu_icount;
946fb27c
PB
150} TimersState;
151
d9cd4007 152static TimersState timers_state;
946fb27c 153
2a62914b 154int64_t cpu_get_icount_raw(void)
946fb27c
PB
155{
156 int64_t icount;
4917cf44 157 CPUState *cpu = current_cpu;
946fb27c 158
c96778bb 159 icount = timers_state.qemu_icount;
4917cf44 160 if (cpu) {
414b15c9 161 if (!cpu->can_do_io) {
2a62914b
PD
162 fprintf(stderr, "Bad icount read\n");
163 exit(1);
946fb27c 164 }
28ecfd7a 165 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 166 }
2a62914b
PD
167 return icount;
168}
169
170/* Return the virtual CPU time, based on the instruction counter. */
171static int64_t cpu_get_icount_locked(void)
172{
173 int64_t icount = cpu_get_icount_raw();
3f031313 174 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
175}
176
17a15f1b
PB
177int64_t cpu_get_icount(void)
178{
179 int64_t icount;
180 unsigned start;
181
182 do {
183 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
184 icount = cpu_get_icount_locked();
185 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
186
187 return icount;
188}
189
3f031313
FK
190int64_t cpu_icount_to_ns(int64_t icount)
191{
192 return icount << icount_time_shift;
193}
194
d90f3cca
C
195/* return the time elapsed in VM between vm_start and vm_stop. Unless
196 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
197 * counter.
198 *
199 * Caller must hold the BQL
200 */
946fb27c
PB
201int64_t cpu_get_ticks(void)
202{
5f3e3101
PB
203 int64_t ticks;
204
946fb27c
PB
205 if (use_icount) {
206 return cpu_get_icount();
207 }
5f3e3101
PB
208
209 ticks = timers_state.cpu_ticks_offset;
210 if (timers_state.cpu_ticks_enabled) {
4a7428c5 211 ticks += cpu_get_host_ticks();
5f3e3101
PB
212 }
213
214 if (timers_state.cpu_ticks_prev > ticks) {
215 /* Note: non increasing ticks may happen if the host uses
216 software suspend */
217 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
218 ticks = timers_state.cpu_ticks_prev;
946fb27c 219 }
5f3e3101
PB
220
221 timers_state.cpu_ticks_prev = ticks;
222 return ticks;
946fb27c
PB
223}
224
cb365646 225static int64_t cpu_get_clock_locked(void)
946fb27c 226{
1d45cea5 227 int64_t time;
cb365646 228
1d45cea5 229 time = timers_state.cpu_clock_offset;
5f3e3101 230 if (timers_state.cpu_ticks_enabled) {
1d45cea5 231 time += get_clock();
946fb27c 232 }
cb365646 233
1d45cea5 234 return time;
cb365646
LPF
235}
236
d90f3cca 237/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
238 * the time between vm_start and vm_stop
239 */
cb365646
LPF
240int64_t cpu_get_clock(void)
241{
242 int64_t ti;
243 unsigned start;
244
245 do {
246 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
247 ti = cpu_get_clock_locked();
248 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
249
250 return ti;
946fb27c
PB
251}
252
cb365646 253/* enable cpu_get_ticks()
3224e878 254 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 255 */
946fb27c
PB
256void cpu_enable_ticks(void)
257{
cb365646 258 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 259 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 260 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 261 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
262 timers_state.cpu_clock_offset -= get_clock();
263 timers_state.cpu_ticks_enabled = 1;
264 }
03719e44 265 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
266}
267
268/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 269 * cpu_get_ticks() after that.
3224e878 270 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 271 */
946fb27c
PB
272void cpu_disable_ticks(void)
273{
cb365646 274 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 275 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 276 if (timers_state.cpu_ticks_enabled) {
4a7428c5 277 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 278 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
279 timers_state.cpu_ticks_enabled = 0;
280 }
03719e44 281 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
282}
283
284/* Correlation between real and virtual time is always going to be
285 fairly approximate, so ignore small variation.
286 When the guest is idle real and virtual time will be aligned in
287 the IO wait loop. */
73bcb24d 288#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
289
290static void icount_adjust(void)
291{
292 int64_t cur_time;
293 int64_t cur_icount;
294 int64_t delta;
a3270e19
PB
295
296 /* Protected by TimersState mutex. */
946fb27c 297 static int64_t last_delta;
468cc7cf 298
946fb27c
PB
299 /* If the VM is not running, then do nothing. */
300 if (!runstate_is_running()) {
301 return;
302 }
468cc7cf 303
03719e44 304 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
305 cur_time = cpu_get_clock_locked();
306 cur_icount = cpu_get_icount_locked();
468cc7cf 307
946fb27c
PB
308 delta = cur_icount - cur_time;
309 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
310 if (delta > 0
311 && last_delta + ICOUNT_WOBBLE < delta * 2
312 && icount_time_shift > 0) {
313 /* The guest is getting too far ahead. Slow time down. */
314 icount_time_shift--;
315 }
316 if (delta < 0
317 && last_delta - ICOUNT_WOBBLE > delta * 2
318 && icount_time_shift < MAX_ICOUNT_SHIFT) {
319 /* The guest is getting too far behind. Speed time up. */
320 icount_time_shift++;
321 }
322 last_delta = delta;
c96778bb
FK
323 timers_state.qemu_icount_bias = cur_icount
324 - (timers_state.qemu_icount << icount_time_shift);
03719e44 325 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
326}
327
328static void icount_adjust_rt(void *opaque)
329{
40daca54 330 timer_mod(icount_rt_timer,
1979b908 331 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
332 icount_adjust();
333}
334
335static void icount_adjust_vm(void *opaque)
336{
40daca54
AB
337 timer_mod(icount_vm_timer,
338 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 339 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
340 icount_adjust();
341}
342
343static int64_t qemu_icount_round(int64_t count)
344{
345 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
346}
347
efab87cf 348static void icount_warp_rt(void)
946fb27c 349{
ccffff48
AB
350 unsigned seq;
351 int64_t warp_start;
352
17a15f1b
PB
353 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
354 * changes from -1 to another value, so the race here is okay.
355 */
ccffff48
AB
356 do {
357 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
358 warp_start = vm_clock_warp_start;
359 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
360
361 if (warp_start == -1) {
946fb27c
PB
362 return;
363 }
364
03719e44 365 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 366 if (runstate_is_running()) {
8eda206e
PD
367 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
368 cpu_get_clock_locked());
8ed961d9
PB
369 int64_t warp_delta;
370
371 warp_delta = clock - vm_clock_warp_start;
372 if (use_icount == 2) {
946fb27c 373 /*
40daca54 374 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
375 * far ahead of real time.
376 */
17a15f1b 377 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 378 int64_t delta = clock - cur_icount;
8ed961d9 379 warp_delta = MIN(warp_delta, delta);
946fb27c 380 }
c96778bb 381 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
382 }
383 vm_clock_warp_start = -1;
03719e44 384 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
385
386 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
387 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
388 }
946fb27c
PB
389}
390
e76d1798 391static void icount_timer_cb(void *opaque)
efab87cf 392{
e76d1798
PD
393 /* No need for a checkpoint because the timer already synchronizes
394 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
395 */
396 icount_warp_rt();
efab87cf
PD
397}
398
8156be56
PB
399void qtest_clock_warp(int64_t dest)
400{
40daca54 401 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 402 AioContext *aio_context;
8156be56 403 assert(qtest_enabled());
efef88b3 404 aio_context = qemu_get_aio_context();
8156be56 405 while (clock < dest) {
40daca54 406 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 407 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 408
03719e44 409 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 410 timers_state.qemu_icount_bias += warp;
03719e44 411 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 412
40daca54 413 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 414 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 415 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 416 }
40daca54 417 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
418}
419
e76d1798 420void qemu_start_warp_timer(void)
946fb27c 421{
ce78d18c 422 int64_t clock;
946fb27c
PB
423 int64_t deadline;
424
e76d1798 425 if (!use_icount) {
946fb27c
PB
426 return;
427 }
428
8bd7f71d
PD
429 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
430 * do not fire, so computing the deadline does not make sense.
431 */
432 if (!runstate_is_running()) {
433 return;
434 }
435
436 /* warp clock deterministically in record/replay mode */
e76d1798 437 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
438 return;
439 }
440
ce78d18c 441 if (!all_cpu_threads_idle()) {
946fb27c
PB
442 return;
443 }
444
8156be56
PB
445 if (qtest_enabled()) {
446 /* When testing, qtest commands advance icount. */
e76d1798 447 return;
8156be56
PB
448 }
449
ac70aafc 450 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 451 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 452 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 453 if (deadline < 0) {
d7a0f71d
VC
454 static bool notified;
455 if (!icount_sleep && !notified) {
456 error_report("WARNING: icount sleep disabled and no active timers");
457 notified = true;
458 }
ce78d18c 459 return;
ac70aafc
AB
460 }
461
946fb27c
PB
462 if (deadline > 0) {
463 /*
40daca54 464 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
465 * sleep. Otherwise, the CPU might be waiting for a future timer
466 * interrupt to wake it up, but the interrupt never comes because
467 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 468 * QEMU_CLOCK_VIRTUAL.
946fb27c 469 */
5045e9d9
VC
470 if (!icount_sleep) {
471 /*
472 * We never let VCPUs sleep in no sleep icount mode.
473 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
474 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
475 * It is useful when we want a deterministic execution time,
476 * isolated from host latencies.
477 */
03719e44 478 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 479 timers_state.qemu_icount_bias += deadline;
03719e44 480 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
481 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
482 } else {
483 /*
484 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
485 * "real" time, (related to the time left until the next event) has
486 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
487 * This avoids that the warps are visible externally; for example,
488 * you will not be sending network packets continuously instead of
489 * every 100ms.
490 */
03719e44 491 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
492 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
493 vm_clock_warp_start = clock;
494 }
03719e44 495 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 496 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 497 }
ac70aafc 498 } else if (deadline == 0) {
40daca54 499 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
500 }
501}
502
e76d1798
PD
503static void qemu_account_warp_timer(void)
504{
505 if (!use_icount || !icount_sleep) {
506 return;
507 }
508
509 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
510 * do not fire, so computing the deadline does not make sense.
511 */
512 if (!runstate_is_running()) {
513 return;
514 }
515
516 /* warp clock deterministically in record/replay mode */
517 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
518 return;
519 }
520
521 timer_del(icount_warp_timer);
522 icount_warp_rt();
523}
524
d09eae37
FK
525static bool icount_state_needed(void *opaque)
526{
527 return use_icount;
528}
529
530/*
531 * This is a subsection for icount migration.
532 */
533static const VMStateDescription icount_vmstate_timers = {
534 .name = "timer/icount",
535 .version_id = 1,
536 .minimum_version_id = 1,
5cd8cada 537 .needed = icount_state_needed,
d09eae37
FK
538 .fields = (VMStateField[]) {
539 VMSTATE_INT64(qemu_icount_bias, TimersState),
540 VMSTATE_INT64(qemu_icount, TimersState),
541 VMSTATE_END_OF_LIST()
542 }
543};
544
946fb27c
PB
545static const VMStateDescription vmstate_timers = {
546 .name = "timer",
547 .version_id = 2,
548 .minimum_version_id = 1,
35d08458 549 .fields = (VMStateField[]) {
946fb27c
PB
550 VMSTATE_INT64(cpu_ticks_offset, TimersState),
551 VMSTATE_INT64(dummy, TimersState),
552 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
553 VMSTATE_END_OF_LIST()
d09eae37 554 },
5cd8cada
JQ
555 .subsections = (const VMStateDescription*[]) {
556 &icount_vmstate_timers,
557 NULL
946fb27c
PB
558 }
559};
560
14e6fe12 561static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 562{
2adcc85d
JH
563 double pct;
564 double throttle_ratio;
565 long sleeptime_ns;
566
567 if (!cpu_throttle_get_percentage()) {
568 return;
569 }
570
571 pct = (double)cpu_throttle_get_percentage()/100;
572 throttle_ratio = pct / (1 - pct);
573 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
574
575 qemu_mutex_unlock_iothread();
576 atomic_set(&cpu->throttle_thread_scheduled, 0);
577 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
578 qemu_mutex_lock_iothread();
579}
580
581static void cpu_throttle_timer_tick(void *opaque)
582{
583 CPUState *cpu;
584 double pct;
585
586 /* Stop the timer if needed */
587 if (!cpu_throttle_get_percentage()) {
588 return;
589 }
590 CPU_FOREACH(cpu) {
591 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
592 async_run_on_cpu(cpu, cpu_throttle_thread,
593 RUN_ON_CPU_NULL);
2adcc85d
JH
594 }
595 }
596
597 pct = (double)cpu_throttle_get_percentage()/100;
598 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
599 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
600}
601
602void cpu_throttle_set(int new_throttle_pct)
603{
604 /* Ensure throttle percentage is within valid range */
605 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
606 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
607
608 atomic_set(&throttle_percentage, new_throttle_pct);
609
610 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
611 CPU_THROTTLE_TIMESLICE_NS);
612}
613
614void cpu_throttle_stop(void)
615{
616 atomic_set(&throttle_percentage, 0);
617}
618
619bool cpu_throttle_active(void)
620{
621 return (cpu_throttle_get_percentage() != 0);
622}
623
624int cpu_throttle_get_percentage(void)
625{
626 return atomic_read(&throttle_percentage);
627}
628
4603ea01
PD
629void cpu_ticks_init(void)
630{
ccdb3c1f 631 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 632 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
633 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
634 cpu_throttle_timer_tick, NULL);
4603ea01
PD
635}
636
1ad9580b 637void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 638{
1ad9580b 639 const char *option;
a8bfac37 640 char *rem_str = NULL;
1ad9580b 641
1ad9580b 642 option = qemu_opt_get(opts, "shift");
946fb27c 643 if (!option) {
a8bfac37
ST
644 if (qemu_opt_get(opts, "align") != NULL) {
645 error_setg(errp, "Please specify shift option when using align");
646 }
946fb27c
PB
647 return;
648 }
f1f4b57e
VC
649
650 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
651 if (icount_sleep) {
652 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 653 icount_timer_cb, NULL);
5045e9d9 654 }
f1f4b57e 655
a8bfac37 656 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
657
658 if (icount_align_option && !icount_sleep) {
778d9f9b 659 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 660 }
946fb27c 661 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
662 errno = 0;
663 icount_time_shift = strtol(option, &rem_str, 0);
664 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
665 error_setg(errp, "icount: Invalid shift value");
666 }
946fb27c
PB
667 use_icount = 1;
668 return;
a8bfac37
ST
669 } else if (icount_align_option) {
670 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 671 } else if (!icount_sleep) {
778d9f9b 672 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
673 }
674
675 use_icount = 2;
676
677 /* 125MIPS seems a reasonable initial guess at the guest speed.
678 It will be corrected fairly quickly anyway. */
679 icount_time_shift = 3;
680
681 /* Have both realtime and virtual time triggers for speed adjustment.
682 The realtime trigger catches emulated time passing too slowly,
683 the virtual time trigger catches emulated time passing too fast.
684 Realtime triggers occur even when idle, so use them less frequently
685 than VM triggers. */
bf2a7ddb
PD
686 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
687 icount_adjust_rt, NULL);
40daca54 688 timer_mod(icount_rt_timer,
bf2a7ddb 689 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
690 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
691 icount_adjust_vm, NULL);
692 timer_mod(icount_vm_timer,
693 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 694 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
695}
696
296af7c9
BS
697/***********************************************************/
698void hw_error(const char *fmt, ...)
699{
700 va_list ap;
55e5c285 701 CPUState *cpu;
296af7c9
BS
702
703 va_start(ap, fmt);
704 fprintf(stderr, "qemu: hardware error: ");
705 vfprintf(stderr, fmt, ap);
706 fprintf(stderr, "\n");
bdc44640 707 CPU_FOREACH(cpu) {
55e5c285 708 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 709 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
710 }
711 va_end(ap);
712 abort();
713}
714
715void cpu_synchronize_all_states(void)
716{
182735ef 717 CPUState *cpu;
296af7c9 718
bdc44640 719 CPU_FOREACH(cpu) {
182735ef 720 cpu_synchronize_state(cpu);
296af7c9
BS
721 }
722}
723
724void cpu_synchronize_all_post_reset(void)
725{
182735ef 726 CPUState *cpu;
296af7c9 727
bdc44640 728 CPU_FOREACH(cpu) {
182735ef 729 cpu_synchronize_post_reset(cpu);
296af7c9
BS
730 }
731}
732
733void cpu_synchronize_all_post_init(void)
734{
182735ef 735 CPUState *cpu;
296af7c9 736
bdc44640 737 CPU_FOREACH(cpu) {
182735ef 738 cpu_synchronize_post_init(cpu);
296af7c9
BS
739 }
740}
741
56983463 742static int do_vm_stop(RunState state)
296af7c9 743{
56983463
KW
744 int ret = 0;
745
1354869c 746 if (runstate_is_running()) {
296af7c9 747 cpu_disable_ticks();
296af7c9 748 pause_all_vcpus();
f5bbfba1 749 runstate_set(state);
1dfb4dd9 750 vm_state_notify(0, state);
a4e15de9 751 qapi_event_send_stop(&error_abort);
296af7c9 752 }
56983463 753
594a45ce 754 bdrv_drain_all();
6d0ceb80 755 replay_disable_events();
22af08ea 756 ret = bdrv_flush_all();
594a45ce 757
56983463 758 return ret;
296af7c9
BS
759}
760
a1fcaa73 761static bool cpu_can_run(CPUState *cpu)
296af7c9 762{
4fdeee7c 763 if (cpu->stop) {
a1fcaa73 764 return false;
0ab07c62 765 }
321bc0b2 766 if (cpu_is_stopped(cpu)) {
a1fcaa73 767 return false;
0ab07c62 768 }
a1fcaa73 769 return true;
296af7c9
BS
770}
771
91325046 772static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 773{
64f6b346 774 gdb_set_stop_cpu(cpu);
8cf71710 775 qemu_system_debug_request();
f324e766 776 cpu->stopped = true;
3c638d06
JK
777}
778
6d9cb73c
JK
779#ifdef CONFIG_LINUX
780static void sigbus_reraise(void)
781{
782 sigset_t set;
783 struct sigaction action;
784
785 memset(&action, 0, sizeof(action));
786 action.sa_handler = SIG_DFL;
787 if (!sigaction(SIGBUS, &action, NULL)) {
788 raise(SIGBUS);
789 sigemptyset(&set);
790 sigaddset(&set, SIGBUS);
a2d1761d 791 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
792 }
793 perror("Failed to re-raise SIGBUS!\n");
794 abort();
795}
796
797static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
798 void *ctx)
799{
800 if (kvm_on_sigbus(siginfo->ssi_code,
801 (void *)(intptr_t)siginfo->ssi_addr)) {
802 sigbus_reraise();
803 }
804}
805
806static void qemu_init_sigbus(void)
807{
808 struct sigaction action;
809
810 memset(&action, 0, sizeof(action));
811 action.sa_flags = SA_SIGINFO;
812 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
813 sigaction(SIGBUS, &action, NULL);
814
815 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
816}
817
290adf38 818static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
819{
820 struct timespec ts = { 0, 0 };
821 siginfo_t siginfo;
822 sigset_t waitset;
823 sigset_t chkset;
824 int r;
825
826 sigemptyset(&waitset);
827 sigaddset(&waitset, SIG_IPI);
828 sigaddset(&waitset, SIGBUS);
829
830 do {
831 r = sigtimedwait(&waitset, &siginfo, &ts);
832 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
833 perror("sigtimedwait");
834 exit(1);
835 }
836
837 switch (r) {
838 case SIGBUS:
290adf38 839 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
840 sigbus_reraise();
841 }
842 break;
843 default:
844 break;
845 }
846
847 r = sigpending(&chkset);
848 if (r == -1) {
849 perror("sigpending");
850 exit(1);
851 }
852 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
853}
854
6d9cb73c
JK
855#else /* !CONFIG_LINUX */
856
857static void qemu_init_sigbus(void)
858{
859}
1ab3c6c0 860
290adf38 861static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
862{
863}
6d9cb73c
JK
864#endif /* !CONFIG_LINUX */
865
296af7c9 866#ifndef _WIN32
55f8d6ac
JK
867static void dummy_signal(int sig)
868{
869}
55f8d6ac 870
13618e05 871static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
872{
873 int r;
874 sigset_t set;
875 struct sigaction sigact;
876
877 memset(&sigact, 0, sizeof(sigact));
878 sigact.sa_handler = dummy_signal;
879 sigaction(SIG_IPI, &sigact, NULL);
880
714bd040
PB
881 pthread_sigmask(SIG_BLOCK, NULL, &set);
882 sigdelset(&set, SIG_IPI);
714bd040 883 sigdelset(&set, SIGBUS);
491d6e80 884 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
885 if (r) {
886 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
887 exit(1);
888 }
889}
890
55f8d6ac 891#else /* _WIN32 */
13618e05 892static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 893{
714bd040
PB
894 abort();
895}
714bd040 896#endif /* _WIN32 */
ff48eb5f 897
b2532d88 898static QemuMutex qemu_global_mutex;
46daff13 899static QemuCond qemu_io_proceeded_cond;
6b49809c 900static unsigned iothread_requesting_mutex;
296af7c9
BS
901
902static QemuThread io_thread;
903
296af7c9
BS
904/* cpu creation */
905static QemuCond qemu_cpu_cond;
906/* system init */
296af7c9
BS
907static QemuCond qemu_pause_cond;
908
d3b12f5d 909void qemu_init_cpu_loop(void)
296af7c9 910{
6d9cb73c 911 qemu_init_sigbus();
ed94592b 912 qemu_cond_init(&qemu_cpu_cond);
ed94592b 913 qemu_cond_init(&qemu_pause_cond);
46daff13 914 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 915 qemu_mutex_init(&qemu_global_mutex);
296af7c9 916
b7680cb6 917 qemu_thread_get_self(&io_thread);
296af7c9
BS
918}
919
14e6fe12 920void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 921{
d148d90e 922 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
923}
924
4c055ab5
GZ
925static void qemu_kvm_destroy_vcpu(CPUState *cpu)
926{
927 if (kvm_destroy_vcpu(cpu) < 0) {
928 error_report("kvm_destroy_vcpu failed");
929 exit(EXIT_FAILURE);
930 }
931}
932
933static void qemu_tcg_destroy_vcpu(CPUState *cpu)
934{
935}
936
509a0d78 937static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 938{
4fdeee7c
AF
939 if (cpu->stop) {
940 cpu->stop = false;
f324e766 941 cpu->stopped = true;
96bce683 942 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 943 }
a5403c69 944 process_queued_cpu_work(cpu);
216fc9a4 945 cpu->thread_kicked = false;
296af7c9
BS
946}
947
d5f8d613 948static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 949{
16400322 950 while (all_cpu_threads_idle()) {
d5f8d613 951 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 952 }
296af7c9 953
46daff13
PB
954 while (iothread_requesting_mutex) {
955 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
956 }
6cabe1f3 957
bdc44640 958 CPU_FOREACH(cpu) {
182735ef 959 qemu_wait_io_event_common(cpu);
6cabe1f3 960 }
296af7c9
BS
961}
962
fd529e8f 963static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 964{
a98ae1d8 965 while (cpu_thread_is_idle(cpu)) {
f5c121b8 966 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 967 }
296af7c9 968
290adf38 969 qemu_kvm_eat_signals(cpu);
509a0d78 970 qemu_wait_io_event_common(cpu);
296af7c9
BS
971}
972
7e97cd88 973static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 974{
48a106bd 975 CPUState *cpu = arg;
84b4915d 976 int r;
296af7c9 977
ab28bd23
PB
978 rcu_register_thread();
979
2e7f7a3c 980 qemu_mutex_lock_iothread();
814e612e 981 qemu_thread_get_self(cpu->thread);
9f09e18a 982 cpu->thread_id = qemu_get_thread_id();
626cf8f4 983 cpu->can_do_io = 1;
4917cf44 984 current_cpu = cpu;
296af7c9 985
504134d2 986 r = kvm_init_vcpu(cpu);
84b4915d
JK
987 if (r < 0) {
988 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
989 exit(1);
990 }
296af7c9 991
13618e05 992 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
993
994 /* signal CPU creation */
61a46217 995 cpu->created = true;
296af7c9
BS
996 qemu_cond_signal(&qemu_cpu_cond);
997
4c055ab5 998 do {
a1fcaa73 999 if (cpu_can_run(cpu)) {
1458c363 1000 r = kvm_cpu_exec(cpu);
83f338f7 1001 if (r == EXCP_DEBUG) {
91325046 1002 cpu_handle_guest_debug(cpu);
83f338f7 1003 }
0ab07c62 1004 }
fd529e8f 1005 qemu_kvm_wait_io_event(cpu);
4c055ab5 1006 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1007
4c055ab5 1008 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1009 cpu->created = false;
1010 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1011 qemu_mutex_unlock_iothread();
296af7c9
BS
1012 return NULL;
1013}
1014
c7f0f3b1
AL
1015static void *qemu_dummy_cpu_thread_fn(void *arg)
1016{
1017#ifdef _WIN32
1018 fprintf(stderr, "qtest is not supported under Windows\n");
1019 exit(1);
1020#else
10a9021d 1021 CPUState *cpu = arg;
c7f0f3b1
AL
1022 sigset_t waitset;
1023 int r;
1024
ab28bd23
PB
1025 rcu_register_thread();
1026
c7f0f3b1 1027 qemu_mutex_lock_iothread();
814e612e 1028 qemu_thread_get_self(cpu->thread);
9f09e18a 1029 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1030 cpu->can_do_io = 1;
c7f0f3b1
AL
1031
1032 sigemptyset(&waitset);
1033 sigaddset(&waitset, SIG_IPI);
1034
1035 /* signal CPU creation */
61a46217 1036 cpu->created = true;
c7f0f3b1
AL
1037 qemu_cond_signal(&qemu_cpu_cond);
1038
4917cf44 1039 current_cpu = cpu;
c7f0f3b1 1040 while (1) {
4917cf44 1041 current_cpu = NULL;
c7f0f3b1
AL
1042 qemu_mutex_unlock_iothread();
1043 do {
1044 int sig;
1045 r = sigwait(&waitset, &sig);
1046 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1047 if (r == -1) {
1048 perror("sigwait");
1049 exit(1);
1050 }
1051 qemu_mutex_lock_iothread();
4917cf44 1052 current_cpu = cpu;
509a0d78 1053 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1054 }
1055
1056 return NULL;
1057#endif
1058}
1059
1be7fcb8
AB
1060static int64_t tcg_get_icount_limit(void)
1061{
1062 int64_t deadline;
1063
1064 if (replay_mode != REPLAY_MODE_PLAY) {
1065 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1066
1067 /* Maintain prior (possibly buggy) behaviour where if no deadline
1068 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1069 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1070 * nanoseconds.
1071 */
1072 if ((deadline < 0) || (deadline > INT32_MAX)) {
1073 deadline = INT32_MAX;
1074 }
1075
1076 return qemu_icount_round(deadline);
1077 } else {
1078 return replay_get_instructions();
1079 }
1080}
1081
12e9700d
AB
1082static void handle_icount_deadline(void)
1083{
1084 if (use_icount) {
1085 int64_t deadline =
1086 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1087
1088 if (deadline == 0) {
1089 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1090 }
1091 }
1092}
1093
1be7fcb8
AB
1094static int tcg_cpu_exec(CPUState *cpu)
1095{
1096 int ret;
1097#ifdef CONFIG_PROFILER
1098 int64_t ti;
1099#endif
1100
1101#ifdef CONFIG_PROFILER
1102 ti = profile_getclock();
1103#endif
1104 if (use_icount) {
1105 int64_t count;
1106 int decr;
1107 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1108 + cpu->icount_extra);
1109 cpu->icount_decr.u16.low = 0;
1110 cpu->icount_extra = 0;
1111 count = tcg_get_icount_limit();
1112 timers_state.qemu_icount += count;
1113 decr = (count > 0xffff) ? 0xffff : count;
1114 count -= decr;
1115 cpu->icount_decr.u16.low = decr;
1116 cpu->icount_extra = count;
1117 }
1118 cpu_exec_start(cpu);
1119 ret = cpu_exec(cpu);
1120 cpu_exec_end(cpu);
1121#ifdef CONFIG_PROFILER
1122 tcg_time += profile_getclock() - ti;
1123#endif
1124 if (use_icount) {
1125 /* Fold pending instructions back into the
1126 instruction counter, and clear the interrupt flag. */
1127 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1128 + cpu->icount_extra);
1129 cpu->icount_decr.u32 = 0;
1130 cpu->icount_extra = 0;
1131 replay_account_executed_instructions();
1132 }
1133 return ret;
1134}
1135
c93bbbef
AB
1136/* Destroy any remaining vCPUs which have been unplugged and have
1137 * finished running
1138 */
1139static void deal_with_unplugged_cpus(void)
1be7fcb8 1140{
c93bbbef 1141 CPUState *cpu;
1be7fcb8 1142
c93bbbef
AB
1143 CPU_FOREACH(cpu) {
1144 if (cpu->unplug && !cpu_can_run(cpu)) {
1145 qemu_tcg_destroy_vcpu(cpu);
1146 cpu->created = false;
1147 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1148 break;
1149 }
1150 }
1be7fcb8 1151}
bdb7ca67 1152
7e97cd88 1153static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1154{
c3586ba7 1155 CPUState *cpu = arg;
296af7c9 1156
ab28bd23
PB
1157 rcu_register_thread();
1158
2e7f7a3c 1159 qemu_mutex_lock_iothread();
814e612e 1160 qemu_thread_get_self(cpu->thread);
296af7c9 1161
38fcbd3f
AF
1162 CPU_FOREACH(cpu) {
1163 cpu->thread_id = qemu_get_thread_id();
1164 cpu->created = true;
626cf8f4 1165 cpu->can_do_io = 1;
38fcbd3f 1166 }
296af7c9
BS
1167 qemu_cond_signal(&qemu_cpu_cond);
1168
fa7d1867 1169 /* wait for initial kick-off after machine start */
c28e399c 1170 while (first_cpu->stopped) {
d5f8d613 1171 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1172
1173 /* process any pending work */
bdc44640 1174 CPU_FOREACH(cpu) {
182735ef 1175 qemu_wait_io_event_common(cpu);
8e564b4e 1176 }
0ab07c62 1177 }
296af7c9 1178
21618b3e 1179 /* process any pending work */
aed807c8 1180 atomic_mb_set(&exit_request, 1);
21618b3e 1181
c93bbbef
AB
1182 cpu = first_cpu;
1183
296af7c9 1184 while (1) {
c93bbbef
AB
1185 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1186 qemu_account_warp_timer();
1187
1188 if (!cpu) {
1189 cpu = first_cpu;
1190 }
1191
1192 for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1193
1194 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1195 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1196
1197 if (cpu_can_run(cpu)) {
1198 int r;
1199 r = tcg_cpu_exec(cpu);
1200 if (r == EXCP_DEBUG) {
1201 cpu_handle_guest_debug(cpu);
1202 break;
1203 }
1204 } else if (cpu->stop || cpu->stopped) {
1205 if (cpu->unplug) {
1206 cpu = CPU_NEXT(cpu);
1207 }
1208 break;
1209 }
1210
1211 } /* for cpu.. */
1212
1213 /* Pairs with smp_wmb in qemu_cpu_kick. */
1214 atomic_mb_set(&exit_request, 0);
ac70aafc 1215
12e9700d 1216 handle_icount_deadline();
ac70aafc 1217
d5f8d613 1218 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
c93bbbef 1219 deal_with_unplugged_cpus();
296af7c9
BS
1220 }
1221
1222 return NULL;
1223}
1224
b0cb0a66
VP
1225static void *qemu_hax_cpu_thread_fn(void *arg)
1226{
1227 CPUState *cpu = arg;
1228 int r;
1229 qemu_thread_get_self(cpu->thread);
1230 qemu_mutex_lock(&qemu_global_mutex);
1231
1232 cpu->thread_id = qemu_get_thread_id();
1233 cpu->created = true;
1234 cpu->halted = 0;
1235 current_cpu = cpu;
1236
1237 hax_init_vcpu(cpu);
1238 qemu_cond_signal(&qemu_cpu_cond);
1239
1240 while (1) {
1241 if (cpu_can_run(cpu)) {
1242 r = hax_smp_cpu_exec(cpu);
1243 if (r == EXCP_DEBUG) {
1244 cpu_handle_guest_debug(cpu);
1245 }
1246 }
1247
1248 while (cpu_thread_is_idle(cpu)) {
1249 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1250 }
1251#ifdef _WIN32
1252 SleepEx(0, TRUE);
1253#endif
1254 qemu_wait_io_event_common(cpu);
1255 }
1256 return NULL;
1257}
1258
1259#ifdef _WIN32
1260static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1261{
1262}
1263#endif
1264
2ff09a40 1265static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1266{
1267#ifndef _WIN32
1268 int err;
1269
e0c38211
PB
1270 if (cpu->thread_kicked) {
1271 return;
9102deda 1272 }
e0c38211 1273 cpu->thread_kicked = true;
814e612e 1274 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1275 if (err) {
1276 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1277 exit(1);
1278 }
1279#else /* _WIN32 */
b0cb0a66
VP
1280 if (!qemu_cpu_is_self(cpu)) {
1281 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1282 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1283 __func__, GetLastError());
1284 exit(1);
1285 }
1286 }
e0c38211
PB
1287#endif
1288}
ed9164a3 1289
e0c38211
PB
1290static void qemu_cpu_kick_no_halt(void)
1291{
1292 CPUState *cpu;
1293 /* Ensure whatever caused the exit has reached the CPU threads before
1294 * writing exit_request.
1295 */
1296 atomic_mb_set(&exit_request, 1);
1297 cpu = atomic_mb_read(&tcg_current_cpu);
1298 if (cpu) {
1299 cpu_exit(cpu);
cc015e9a 1300 }
cc015e9a
PB
1301}
1302
c08d7424 1303void qemu_cpu_kick(CPUState *cpu)
296af7c9 1304{
f5c121b8 1305 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1306 if (tcg_enabled()) {
1307 qemu_cpu_kick_no_halt();
1308 } else {
b0cb0a66
VP
1309 if (hax_enabled()) {
1310 /*
1311 * FIXME: race condition with the exit_request check in
1312 * hax_vcpu_hax_exec
1313 */
1314 cpu->exit_request = 1;
1315 }
e0c38211
PB
1316 qemu_cpu_kick_thread(cpu);
1317 }
296af7c9
BS
1318}
1319
46d62fac 1320void qemu_cpu_kick_self(void)
296af7c9 1321{
4917cf44 1322 assert(current_cpu);
9102deda 1323 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1324}
1325
60e82579 1326bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1327{
814e612e 1328 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1329}
1330
79e2b9ae 1331bool qemu_in_vcpu_thread(void)
aa723c23 1332{
4917cf44 1333 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1334}
1335
afbe7053
PB
1336static __thread bool iothread_locked = false;
1337
1338bool qemu_mutex_iothread_locked(void)
1339{
1340 return iothread_locked;
1341}
1342
296af7c9
BS
1343void qemu_mutex_lock_iothread(void)
1344{
21618b3e 1345 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1346 /* In the simple case there is no need to bump the VCPU thread out of
1347 * TCG code execution.
1348 */
1349 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1350 !first_cpu || !first_cpu->created) {
296af7c9 1351 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1352 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1353 } else {
1a28cac3 1354 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1355 qemu_cpu_kick_no_halt();
1a28cac3
MT
1356 qemu_mutex_lock(&qemu_global_mutex);
1357 }
6b49809c 1358 atomic_dec(&iothread_requesting_mutex);
46daff13 1359 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1360 }
afbe7053 1361 iothread_locked = true;
296af7c9
BS
1362}
1363
1364void qemu_mutex_unlock_iothread(void)
1365{
afbe7053 1366 iothread_locked = false;
296af7c9
BS
1367 qemu_mutex_unlock(&qemu_global_mutex);
1368}
1369
e8faee06 1370static bool all_vcpus_paused(void)
296af7c9 1371{
bdc44640 1372 CPUState *cpu;
296af7c9 1373
bdc44640 1374 CPU_FOREACH(cpu) {
182735ef 1375 if (!cpu->stopped) {
e8faee06 1376 return false;
0ab07c62 1377 }
296af7c9
BS
1378 }
1379
e8faee06 1380 return true;
296af7c9
BS
1381}
1382
1383void pause_all_vcpus(void)
1384{
bdc44640 1385 CPUState *cpu;
296af7c9 1386
40daca54 1387 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1388 CPU_FOREACH(cpu) {
182735ef
AF
1389 cpu->stop = true;
1390 qemu_cpu_kick(cpu);
296af7c9
BS
1391 }
1392
aa723c23 1393 if (qemu_in_vcpu_thread()) {
d798e974
JK
1394 cpu_stop_current();
1395 if (!kvm_enabled()) {
bdc44640 1396 CPU_FOREACH(cpu) {
182735ef
AF
1397 cpu->stop = false;
1398 cpu->stopped = true;
d798e974
JK
1399 }
1400 return;
1401 }
1402 }
1403
296af7c9 1404 while (!all_vcpus_paused()) {
be7d6c57 1405 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1406 CPU_FOREACH(cpu) {
182735ef 1407 qemu_cpu_kick(cpu);
296af7c9
BS
1408 }
1409 }
1410}
1411
2993683b
IM
1412void cpu_resume(CPUState *cpu)
1413{
1414 cpu->stop = false;
1415 cpu->stopped = false;
1416 qemu_cpu_kick(cpu);
1417}
1418
296af7c9
BS
1419void resume_all_vcpus(void)
1420{
bdc44640 1421 CPUState *cpu;
296af7c9 1422
40daca54 1423 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1424 CPU_FOREACH(cpu) {
182735ef 1425 cpu_resume(cpu);
296af7c9
BS
1426 }
1427}
1428
4c055ab5
GZ
1429void cpu_remove(CPUState *cpu)
1430{
1431 cpu->stop = true;
1432 cpu->unplug = true;
1433 qemu_cpu_kick(cpu);
1434}
1435
2c579042
BR
1436void cpu_remove_sync(CPUState *cpu)
1437{
1438 cpu_remove(cpu);
1439 while (cpu->created) {
1440 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1441 }
1442}
1443
4900116e
DDAG
1444/* For temporary buffers for forming a name */
1445#define VCPU_THREAD_NAME_SIZE 16
1446
e5ab30a2 1447static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1448{
4900116e 1449 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1450 static QemuCond *tcg_halt_cond;
1451 static QemuThread *tcg_cpu_thread;
4900116e 1452
296af7c9
BS
1453 /* share a single thread for all cpus with TCG */
1454 if (!tcg_cpu_thread) {
814e612e 1455 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1456 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1457 qemu_cond_init(cpu->halt_cond);
1458 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1459 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1460 cpu->cpu_index);
1461 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1462 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1463#ifdef _WIN32
814e612e 1464 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1465#endif
61a46217 1466 while (!cpu->created) {
18a85728 1467 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1468 }
814e612e 1469 tcg_cpu_thread = cpu->thread;
296af7c9 1470 } else {
814e612e 1471 cpu->thread = tcg_cpu_thread;
f5c121b8 1472 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1473 }
1474}
1475
b0cb0a66
VP
1476static void qemu_hax_start_vcpu(CPUState *cpu)
1477{
1478 char thread_name[VCPU_THREAD_NAME_SIZE];
1479
1480 cpu->thread = g_malloc0(sizeof(QemuThread));
1481 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1482 qemu_cond_init(cpu->halt_cond);
1483
1484 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1485 cpu->cpu_index);
1486 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1487 cpu, QEMU_THREAD_JOINABLE);
1488#ifdef _WIN32
1489 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1490#endif
1491 while (!cpu->created) {
1492 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1493 }
1494}
1495
48a106bd 1496static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1497{
4900116e
DDAG
1498 char thread_name[VCPU_THREAD_NAME_SIZE];
1499
814e612e 1500 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1501 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1502 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1503 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1504 cpu->cpu_index);
1505 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1506 cpu, QEMU_THREAD_JOINABLE);
61a46217 1507 while (!cpu->created) {
18a85728 1508 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1509 }
296af7c9
BS
1510}
1511
10a9021d 1512static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1513{
4900116e
DDAG
1514 char thread_name[VCPU_THREAD_NAME_SIZE];
1515
814e612e 1516 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1517 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1518 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1519 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1520 cpu->cpu_index);
1521 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1522 QEMU_THREAD_JOINABLE);
61a46217 1523 while (!cpu->created) {
c7f0f3b1
AL
1524 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1525 }
1526}
1527
c643bed9 1528void qemu_init_vcpu(CPUState *cpu)
296af7c9 1529{
ce3960eb
AF
1530 cpu->nr_cores = smp_cores;
1531 cpu->nr_threads = smp_threads;
f324e766 1532 cpu->stopped = true;
56943e8c
PM
1533
1534 if (!cpu->as) {
1535 /* If the target cpu hasn't set up any address spaces itself,
1536 * give it the default one.
1537 */
6731d864
PC
1538 AddressSpace *as = address_space_init_shareable(cpu->memory,
1539 "cpu-memory");
12ebc9a7 1540 cpu->num_ases = 1;
6731d864 1541 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1542 }
1543
0ab07c62 1544 if (kvm_enabled()) {
48a106bd 1545 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1546 } else if (hax_enabled()) {
1547 qemu_hax_start_vcpu(cpu);
c7f0f3b1 1548 } else if (tcg_enabled()) {
e5ab30a2 1549 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1550 } else {
10a9021d 1551 qemu_dummy_start_vcpu(cpu);
0ab07c62 1552 }
296af7c9
BS
1553}
1554
b4a3d965 1555void cpu_stop_current(void)
296af7c9 1556{
4917cf44
AF
1557 if (current_cpu) {
1558 current_cpu->stop = false;
1559 current_cpu->stopped = true;
1560 cpu_exit(current_cpu);
96bce683 1561 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1562 }
296af7c9
BS
1563}
1564
56983463 1565int vm_stop(RunState state)
296af7c9 1566{
aa723c23 1567 if (qemu_in_vcpu_thread()) {
74892d24 1568 qemu_system_vmstop_request_prepare();
1dfb4dd9 1569 qemu_system_vmstop_request(state);
296af7c9
BS
1570 /*
1571 * FIXME: should not return to device code in case
1572 * vm_stop() has been requested.
1573 */
b4a3d965 1574 cpu_stop_current();
56983463 1575 return 0;
296af7c9 1576 }
56983463
KW
1577
1578 return do_vm_stop(state);
296af7c9
BS
1579}
1580
8a9236f1
LC
1581/* does a state transition even if the VM is already stopped,
1582 current state is forgotten forever */
56983463 1583int vm_stop_force_state(RunState state)
8a9236f1
LC
1584{
1585 if (runstate_is_running()) {
56983463 1586 return vm_stop(state);
8a9236f1
LC
1587 } else {
1588 runstate_set(state);
b2780d32
WC
1589
1590 bdrv_drain_all();
594a45ce
KW
1591 /* Make sure to return an error if the flush in a previous vm_stop()
1592 * failed. */
22af08ea 1593 return bdrv_flush_all();
8a9236f1
LC
1594 }
1595}
1596
9a78eead 1597void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1598{
1599 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1600#if defined(cpu_list)
1601 cpu_list(f, cpu_fprintf);
262353cb
BS
1602#endif
1603}
de0b36b6
LC
1604
1605CpuInfoList *qmp_query_cpus(Error **errp)
1606{
1607 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1608 CPUState *cpu;
de0b36b6 1609
bdc44640 1610 CPU_FOREACH(cpu) {
de0b36b6 1611 CpuInfoList *info;
182735ef
AF
1612#if defined(TARGET_I386)
1613 X86CPU *x86_cpu = X86_CPU(cpu);
1614 CPUX86State *env = &x86_cpu->env;
1615#elif defined(TARGET_PPC)
1616 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1617 CPUPPCState *env = &ppc_cpu->env;
1618#elif defined(TARGET_SPARC)
1619 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1620 CPUSPARCState *env = &sparc_cpu->env;
1621#elif defined(TARGET_MIPS)
1622 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1623 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1624#elif defined(TARGET_TRICORE)
1625 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1626 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1627#endif
de0b36b6 1628
cb446eca 1629 cpu_synchronize_state(cpu);
de0b36b6
LC
1630
1631 info = g_malloc0(sizeof(*info));
1632 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1633 info->value->CPU = cpu->cpu_index;
182735ef 1634 info->value->current = (cpu == first_cpu);
259186a7 1635 info->value->halted = cpu->halted;
58f88d4b 1636 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1637 info->value->thread_id = cpu->thread_id;
de0b36b6 1638#if defined(TARGET_I386)
86f4b687 1639 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1640 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1641#elif defined(TARGET_PPC)
86f4b687 1642 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1643 info->value->u.ppc.nip = env->nip;
de0b36b6 1644#elif defined(TARGET_SPARC)
86f4b687 1645 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1646 info->value->u.q_sparc.pc = env->pc;
1647 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1648#elif defined(TARGET_MIPS)
86f4b687 1649 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1650 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1651#elif defined(TARGET_TRICORE)
86f4b687 1652 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1653 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1654#else
1655 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1656#endif
1657
1658 /* XXX: waiting for the qapi to support GSList */
1659 if (!cur_item) {
1660 head = cur_item = info;
1661 } else {
1662 cur_item->next = info;
1663 cur_item = info;
1664 }
1665 }
1666
1667 return head;
1668}
0cfd6a9a
LC
1669
1670void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1671 bool has_cpu, int64_t cpu_index, Error **errp)
1672{
1673 FILE *f;
1674 uint32_t l;
55e5c285 1675 CPUState *cpu;
0cfd6a9a 1676 uint8_t buf[1024];
0dc9daf0 1677 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1678
1679 if (!has_cpu) {
1680 cpu_index = 0;
1681 }
1682
151d1322
AF
1683 cpu = qemu_get_cpu(cpu_index);
1684 if (cpu == NULL) {
c6bd8c70
MA
1685 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1686 "a CPU number");
0cfd6a9a
LC
1687 return;
1688 }
1689
1690 f = fopen(filename, "wb");
1691 if (!f) {
618da851 1692 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1693 return;
1694 }
1695
1696 while (size != 0) {
1697 l = sizeof(buf);
1698 if (l > size)
1699 l = size;
2f4d0f59 1700 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1701 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1702 " specified", orig_addr, orig_size);
2f4d0f59
AK
1703 goto exit;
1704 }
0cfd6a9a 1705 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1706 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1707 goto exit;
1708 }
1709 addr += l;
1710 size -= l;
1711 }
1712
1713exit:
1714 fclose(f);
1715}
6d3962bf
LC
1716
1717void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1718 Error **errp)
1719{
1720 FILE *f;
1721 uint32_t l;
1722 uint8_t buf[1024];
1723
1724 f = fopen(filename, "wb");
1725 if (!f) {
618da851 1726 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1727 return;
1728 }
1729
1730 while (size != 0) {
1731 l = sizeof(buf);
1732 if (l > size)
1733 l = size;
eb6282f2 1734 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1735 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1736 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1737 goto exit;
1738 }
1739 addr += l;
1740 size -= l;
1741 }
1742
1743exit:
1744 fclose(f);
1745}
ab49ab5c
LC
1746
1747void qmp_inject_nmi(Error **errp)
1748{
9cb805fd 1749 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1750}
27498bef
ST
1751
1752void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1753{
1754 if (!use_icount) {
1755 return;
1756 }
1757
1758 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1759 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1760 if (icount_align_option) {
1761 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1762 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1763 } else {
1764 cpu_fprintf(f, "Max guest delay NA\n");
1765 cpu_fprintf(f, "Max guest advance NA\n");
1766 }
1767}