]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
qemu-common: push cpu.h inclusion out of qemu-common.h
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "qmp-commands.h"
38
39 #include "qemu/thread.h"
40 #include "sysemu/cpus.h"
41 #include "sysemu/qtest.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/bitmap.h"
44 #include "qemu/seqlock.h"
45 #include "qapi-event.h"
46 #include "hw/nmi.h"
47 #include "sysemu/replay.h"
48
49 #ifndef _WIN32
50 #include "qemu/compatfd.h"
51 #endif
52
53 #ifdef CONFIG_LINUX
54
55 #include <sys/prctl.h>
56
57 #ifndef PR_MCE_KILL
58 #define PR_MCE_KILL 33
59 #endif
60
61 #ifndef PR_MCE_KILL_SET
62 #define PR_MCE_KILL_SET 1
63 #endif
64
65 #ifndef PR_MCE_KILL_EARLY
66 #define PR_MCE_KILL_EARLY 1
67 #endif
68
69 #endif /* CONFIG_LINUX */
70
71 static CPUState *next_cpu;
72 int64_t max_delay;
73 int64_t max_advance;
74
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
78
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
82
83 bool cpu_is_stopped(CPUState *cpu)
84 {
85 return cpu->stopped || !runstate_is_running();
86 }
87
88 static bool cpu_thread_is_idle(CPUState *cpu)
89 {
90 if (cpu->stop || cpu->queued_work_first) {
91 return false;
92 }
93 if (cpu_is_stopped(cpu)) {
94 return true;
95 }
96 if (!cpu->halted || cpu_has_work(cpu) ||
97 kvm_halt_in_kernel()) {
98 return false;
99 }
100 return true;
101 }
102
103 static bool all_cpu_threads_idle(void)
104 {
105 CPUState *cpu;
106
107 CPU_FOREACH(cpu) {
108 if (!cpu_thread_is_idle(cpu)) {
109 return false;
110 }
111 }
112 return true;
113 }
114
115 /***********************************************************/
116 /* guest cycle counter */
117
118 /* Protected by TimersState seqlock */
119
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
126
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
130
131 typedef struct TimersState {
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
148 } TimersState;
149
150 static TimersState timers_state;
151
152 int64_t cpu_get_icount_raw(void)
153 {
154 int64_t icount;
155 CPUState *cpu = current_cpu;
156
157 icount = timers_state.qemu_icount;
158 if (cpu) {
159 if (!cpu->can_do_io) {
160 fprintf(stderr, "Bad icount read\n");
161 exit(1);
162 }
163 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
164 }
165 return icount;
166 }
167
168 /* Return the virtual CPU time, based on the instruction counter. */
169 static int64_t cpu_get_icount_locked(void)
170 {
171 int64_t icount = cpu_get_icount_raw();
172 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
173 }
174
175 int64_t cpu_get_icount(void)
176 {
177 int64_t icount;
178 unsigned start;
179
180 do {
181 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
182 icount = cpu_get_icount_locked();
183 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
184
185 return icount;
186 }
187
188 int64_t cpu_icount_to_ns(int64_t icount)
189 {
190 return icount << icount_time_shift;
191 }
192
193 /* return the host CPU cycle counter and handle stop/restart */
194 /* Caller must hold the BQL */
195 int64_t cpu_get_ticks(void)
196 {
197 int64_t ticks;
198
199 if (use_icount) {
200 return cpu_get_icount();
201 }
202
203 ticks = timers_state.cpu_ticks_offset;
204 if (timers_state.cpu_ticks_enabled) {
205 ticks += cpu_get_host_ticks();
206 }
207
208 if (timers_state.cpu_ticks_prev > ticks) {
209 /* Note: non increasing ticks may happen if the host uses
210 software suspend */
211 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
212 ticks = timers_state.cpu_ticks_prev;
213 }
214
215 timers_state.cpu_ticks_prev = ticks;
216 return ticks;
217 }
218
219 static int64_t cpu_get_clock_locked(void)
220 {
221 int64_t ticks;
222
223 ticks = timers_state.cpu_clock_offset;
224 if (timers_state.cpu_ticks_enabled) {
225 ticks += get_clock();
226 }
227
228 return ticks;
229 }
230
231 /* return the host CPU monotonic timer and handle stop/restart */
232 int64_t cpu_get_clock(void)
233 {
234 int64_t ti;
235 unsigned start;
236
237 do {
238 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
239 ti = cpu_get_clock_locked();
240 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
241
242 return ti;
243 }
244
245 /* enable cpu_get_ticks()
246 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
247 */
248 void cpu_enable_ticks(void)
249 {
250 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
251 seqlock_write_lock(&timers_state.vm_clock_seqlock);
252 if (!timers_state.cpu_ticks_enabled) {
253 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
254 timers_state.cpu_clock_offset -= get_clock();
255 timers_state.cpu_ticks_enabled = 1;
256 }
257 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
258 }
259
260 /* disable cpu_get_ticks() : the clock is stopped. You must not call
261 * cpu_get_ticks() after that.
262 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
263 */
264 void cpu_disable_ticks(void)
265 {
266 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
267 seqlock_write_lock(&timers_state.vm_clock_seqlock);
268 if (timers_state.cpu_ticks_enabled) {
269 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
270 timers_state.cpu_clock_offset = cpu_get_clock_locked();
271 timers_state.cpu_ticks_enabled = 0;
272 }
273 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
274 }
275
276 /* Correlation between real and virtual time is always going to be
277 fairly approximate, so ignore small variation.
278 When the guest is idle real and virtual time will be aligned in
279 the IO wait loop. */
280 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
281
282 static void icount_adjust(void)
283 {
284 int64_t cur_time;
285 int64_t cur_icount;
286 int64_t delta;
287
288 /* Protected by TimersState mutex. */
289 static int64_t last_delta;
290
291 /* If the VM is not running, then do nothing. */
292 if (!runstate_is_running()) {
293 return;
294 }
295
296 seqlock_write_lock(&timers_state.vm_clock_seqlock);
297 cur_time = cpu_get_clock_locked();
298 cur_icount = cpu_get_icount_locked();
299
300 delta = cur_icount - cur_time;
301 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
302 if (delta > 0
303 && last_delta + ICOUNT_WOBBLE < delta * 2
304 && icount_time_shift > 0) {
305 /* The guest is getting too far ahead. Slow time down. */
306 icount_time_shift--;
307 }
308 if (delta < 0
309 && last_delta - ICOUNT_WOBBLE > delta * 2
310 && icount_time_shift < MAX_ICOUNT_SHIFT) {
311 /* The guest is getting too far behind. Speed time up. */
312 icount_time_shift++;
313 }
314 last_delta = delta;
315 timers_state.qemu_icount_bias = cur_icount
316 - (timers_state.qemu_icount << icount_time_shift);
317 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
318 }
319
320 static void icount_adjust_rt(void *opaque)
321 {
322 timer_mod(icount_rt_timer,
323 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
324 icount_adjust();
325 }
326
327 static void icount_adjust_vm(void *opaque)
328 {
329 timer_mod(icount_vm_timer,
330 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
331 NANOSECONDS_PER_SECOND / 10);
332 icount_adjust();
333 }
334
335 static int64_t qemu_icount_round(int64_t count)
336 {
337 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
338 }
339
340 static void icount_warp_rt(void)
341 {
342 unsigned seq;
343 int64_t warp_start;
344
345 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
346 * changes from -1 to another value, so the race here is okay.
347 */
348 do {
349 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
350 warp_start = vm_clock_warp_start;
351 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
352
353 if (warp_start == -1) {
354 return;
355 }
356
357 seqlock_write_lock(&timers_state.vm_clock_seqlock);
358 if (runstate_is_running()) {
359 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
360 cpu_get_clock_locked());
361 int64_t warp_delta;
362
363 warp_delta = clock - vm_clock_warp_start;
364 if (use_icount == 2) {
365 /*
366 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
367 * far ahead of real time.
368 */
369 int64_t cur_icount = cpu_get_icount_locked();
370 int64_t delta = clock - cur_icount;
371 warp_delta = MIN(warp_delta, delta);
372 }
373 timers_state.qemu_icount_bias += warp_delta;
374 }
375 vm_clock_warp_start = -1;
376 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
377
378 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
379 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
380 }
381 }
382
383 static void icount_timer_cb(void *opaque)
384 {
385 /* No need for a checkpoint because the timer already synchronizes
386 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
387 */
388 icount_warp_rt();
389 }
390
391 void qtest_clock_warp(int64_t dest)
392 {
393 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
394 AioContext *aio_context;
395 assert(qtest_enabled());
396 aio_context = qemu_get_aio_context();
397 while (clock < dest) {
398 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
399 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
400
401 seqlock_write_lock(&timers_state.vm_clock_seqlock);
402 timers_state.qemu_icount_bias += warp;
403 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
404
405 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
406 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
407 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
408 }
409 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
410 }
411
412 void qemu_start_warp_timer(void)
413 {
414 int64_t clock;
415 int64_t deadline;
416
417 if (!use_icount) {
418 return;
419 }
420
421 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
422 * do not fire, so computing the deadline does not make sense.
423 */
424 if (!runstate_is_running()) {
425 return;
426 }
427
428 /* warp clock deterministically in record/replay mode */
429 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
430 return;
431 }
432
433 if (!all_cpu_threads_idle()) {
434 return;
435 }
436
437 if (qtest_enabled()) {
438 /* When testing, qtest commands advance icount. */
439 return;
440 }
441
442 /* We want to use the earliest deadline from ALL vm_clocks */
443 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
444 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
445 if (deadline < 0) {
446 static bool notified;
447 if (!icount_sleep && !notified) {
448 error_report("WARNING: icount sleep disabled and no active timers");
449 notified = true;
450 }
451 return;
452 }
453
454 if (deadline > 0) {
455 /*
456 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
457 * sleep. Otherwise, the CPU might be waiting for a future timer
458 * interrupt to wake it up, but the interrupt never comes because
459 * the vCPU isn't running any insns and thus doesn't advance the
460 * QEMU_CLOCK_VIRTUAL.
461 */
462 if (!icount_sleep) {
463 /*
464 * We never let VCPUs sleep in no sleep icount mode.
465 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
466 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
467 * It is useful when we want a deterministic execution time,
468 * isolated from host latencies.
469 */
470 seqlock_write_lock(&timers_state.vm_clock_seqlock);
471 timers_state.qemu_icount_bias += deadline;
472 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
473 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
474 } else {
475 /*
476 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
477 * "real" time, (related to the time left until the next event) has
478 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
479 * This avoids that the warps are visible externally; for example,
480 * you will not be sending network packets continuously instead of
481 * every 100ms.
482 */
483 seqlock_write_lock(&timers_state.vm_clock_seqlock);
484 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
485 vm_clock_warp_start = clock;
486 }
487 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
488 timer_mod_anticipate(icount_warp_timer, clock + deadline);
489 }
490 } else if (deadline == 0) {
491 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
492 }
493 }
494
495 static void qemu_account_warp_timer(void)
496 {
497 if (!use_icount || !icount_sleep) {
498 return;
499 }
500
501 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
502 * do not fire, so computing the deadline does not make sense.
503 */
504 if (!runstate_is_running()) {
505 return;
506 }
507
508 /* warp clock deterministically in record/replay mode */
509 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
510 return;
511 }
512
513 timer_del(icount_warp_timer);
514 icount_warp_rt();
515 }
516
517 static bool icount_state_needed(void *opaque)
518 {
519 return use_icount;
520 }
521
522 /*
523 * This is a subsection for icount migration.
524 */
525 static const VMStateDescription icount_vmstate_timers = {
526 .name = "timer/icount",
527 .version_id = 1,
528 .minimum_version_id = 1,
529 .needed = icount_state_needed,
530 .fields = (VMStateField[]) {
531 VMSTATE_INT64(qemu_icount_bias, TimersState),
532 VMSTATE_INT64(qemu_icount, TimersState),
533 VMSTATE_END_OF_LIST()
534 }
535 };
536
537 static const VMStateDescription vmstate_timers = {
538 .name = "timer",
539 .version_id = 2,
540 .minimum_version_id = 1,
541 .fields = (VMStateField[]) {
542 VMSTATE_INT64(cpu_ticks_offset, TimersState),
543 VMSTATE_INT64(dummy, TimersState),
544 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
545 VMSTATE_END_OF_LIST()
546 },
547 .subsections = (const VMStateDescription*[]) {
548 &icount_vmstate_timers,
549 NULL
550 }
551 };
552
553 static void cpu_throttle_thread(void *opaque)
554 {
555 CPUState *cpu = opaque;
556 double pct;
557 double throttle_ratio;
558 long sleeptime_ns;
559
560 if (!cpu_throttle_get_percentage()) {
561 return;
562 }
563
564 pct = (double)cpu_throttle_get_percentage()/100;
565 throttle_ratio = pct / (1 - pct);
566 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
567
568 qemu_mutex_unlock_iothread();
569 atomic_set(&cpu->throttle_thread_scheduled, 0);
570 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
571 qemu_mutex_lock_iothread();
572 }
573
574 static void cpu_throttle_timer_tick(void *opaque)
575 {
576 CPUState *cpu;
577 double pct;
578
579 /* Stop the timer if needed */
580 if (!cpu_throttle_get_percentage()) {
581 return;
582 }
583 CPU_FOREACH(cpu) {
584 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
585 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
586 }
587 }
588
589 pct = (double)cpu_throttle_get_percentage()/100;
590 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
591 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
592 }
593
594 void cpu_throttle_set(int new_throttle_pct)
595 {
596 /* Ensure throttle percentage is within valid range */
597 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
598 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
599
600 atomic_set(&throttle_percentage, new_throttle_pct);
601
602 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
603 CPU_THROTTLE_TIMESLICE_NS);
604 }
605
606 void cpu_throttle_stop(void)
607 {
608 atomic_set(&throttle_percentage, 0);
609 }
610
611 bool cpu_throttle_active(void)
612 {
613 return (cpu_throttle_get_percentage() != 0);
614 }
615
616 int cpu_throttle_get_percentage(void)
617 {
618 return atomic_read(&throttle_percentage);
619 }
620
621 void cpu_ticks_init(void)
622 {
623 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
624 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
625 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
626 cpu_throttle_timer_tick, NULL);
627 }
628
629 void configure_icount(QemuOpts *opts, Error **errp)
630 {
631 const char *option;
632 char *rem_str = NULL;
633
634 option = qemu_opt_get(opts, "shift");
635 if (!option) {
636 if (qemu_opt_get(opts, "align") != NULL) {
637 error_setg(errp, "Please specify shift option when using align");
638 }
639 return;
640 }
641
642 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
643 if (icount_sleep) {
644 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
645 icount_timer_cb, NULL);
646 }
647
648 icount_align_option = qemu_opt_get_bool(opts, "align", false);
649
650 if (icount_align_option && !icount_sleep) {
651 error_setg(errp, "align=on and sleep=off are incompatible");
652 }
653 if (strcmp(option, "auto") != 0) {
654 errno = 0;
655 icount_time_shift = strtol(option, &rem_str, 0);
656 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
657 error_setg(errp, "icount: Invalid shift value");
658 }
659 use_icount = 1;
660 return;
661 } else if (icount_align_option) {
662 error_setg(errp, "shift=auto and align=on are incompatible");
663 } else if (!icount_sleep) {
664 error_setg(errp, "shift=auto and sleep=off are incompatible");
665 }
666
667 use_icount = 2;
668
669 /* 125MIPS seems a reasonable initial guess at the guest speed.
670 It will be corrected fairly quickly anyway. */
671 icount_time_shift = 3;
672
673 /* Have both realtime and virtual time triggers for speed adjustment.
674 The realtime trigger catches emulated time passing too slowly,
675 the virtual time trigger catches emulated time passing too fast.
676 Realtime triggers occur even when idle, so use them less frequently
677 than VM triggers. */
678 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
679 icount_adjust_rt, NULL);
680 timer_mod(icount_rt_timer,
681 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
682 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
683 icount_adjust_vm, NULL);
684 timer_mod(icount_vm_timer,
685 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
686 NANOSECONDS_PER_SECOND / 10);
687 }
688
689 /***********************************************************/
690 void hw_error(const char *fmt, ...)
691 {
692 va_list ap;
693 CPUState *cpu;
694
695 va_start(ap, fmt);
696 fprintf(stderr, "qemu: hardware error: ");
697 vfprintf(stderr, fmt, ap);
698 fprintf(stderr, "\n");
699 CPU_FOREACH(cpu) {
700 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
701 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
702 }
703 va_end(ap);
704 abort();
705 }
706
707 void cpu_synchronize_all_states(void)
708 {
709 CPUState *cpu;
710
711 CPU_FOREACH(cpu) {
712 cpu_synchronize_state(cpu);
713 }
714 }
715
716 void cpu_synchronize_all_post_reset(void)
717 {
718 CPUState *cpu;
719
720 CPU_FOREACH(cpu) {
721 cpu_synchronize_post_reset(cpu);
722 }
723 }
724
725 void cpu_synchronize_all_post_init(void)
726 {
727 CPUState *cpu;
728
729 CPU_FOREACH(cpu) {
730 cpu_synchronize_post_init(cpu);
731 }
732 }
733
734 static int do_vm_stop(RunState state)
735 {
736 int ret = 0;
737
738 if (runstate_is_running()) {
739 cpu_disable_ticks();
740 pause_all_vcpus();
741 runstate_set(state);
742 vm_state_notify(0, state);
743 qapi_event_send_stop(&error_abort);
744 }
745
746 bdrv_drain_all();
747 ret = blk_flush_all();
748
749 return ret;
750 }
751
752 static bool cpu_can_run(CPUState *cpu)
753 {
754 if (cpu->stop) {
755 return false;
756 }
757 if (cpu_is_stopped(cpu)) {
758 return false;
759 }
760 return true;
761 }
762
763 static void cpu_handle_guest_debug(CPUState *cpu)
764 {
765 gdb_set_stop_cpu(cpu);
766 qemu_system_debug_request();
767 cpu->stopped = true;
768 }
769
770 #ifdef CONFIG_LINUX
771 static void sigbus_reraise(void)
772 {
773 sigset_t set;
774 struct sigaction action;
775
776 memset(&action, 0, sizeof(action));
777 action.sa_handler = SIG_DFL;
778 if (!sigaction(SIGBUS, &action, NULL)) {
779 raise(SIGBUS);
780 sigemptyset(&set);
781 sigaddset(&set, SIGBUS);
782 sigprocmask(SIG_UNBLOCK, &set, NULL);
783 }
784 perror("Failed to re-raise SIGBUS!\n");
785 abort();
786 }
787
788 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
789 void *ctx)
790 {
791 if (kvm_on_sigbus(siginfo->ssi_code,
792 (void *)(intptr_t)siginfo->ssi_addr)) {
793 sigbus_reraise();
794 }
795 }
796
797 static void qemu_init_sigbus(void)
798 {
799 struct sigaction action;
800
801 memset(&action, 0, sizeof(action));
802 action.sa_flags = SA_SIGINFO;
803 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
804 sigaction(SIGBUS, &action, NULL);
805
806 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
807 }
808
809 static void qemu_kvm_eat_signals(CPUState *cpu)
810 {
811 struct timespec ts = { 0, 0 };
812 siginfo_t siginfo;
813 sigset_t waitset;
814 sigset_t chkset;
815 int r;
816
817 sigemptyset(&waitset);
818 sigaddset(&waitset, SIG_IPI);
819 sigaddset(&waitset, SIGBUS);
820
821 do {
822 r = sigtimedwait(&waitset, &siginfo, &ts);
823 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
824 perror("sigtimedwait");
825 exit(1);
826 }
827
828 switch (r) {
829 case SIGBUS:
830 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
831 sigbus_reraise();
832 }
833 break;
834 default:
835 break;
836 }
837
838 r = sigpending(&chkset);
839 if (r == -1) {
840 perror("sigpending");
841 exit(1);
842 }
843 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
844 }
845
846 #else /* !CONFIG_LINUX */
847
848 static void qemu_init_sigbus(void)
849 {
850 }
851
852 static void qemu_kvm_eat_signals(CPUState *cpu)
853 {
854 }
855 #endif /* !CONFIG_LINUX */
856
857 #ifndef _WIN32
858 static void dummy_signal(int sig)
859 {
860 }
861
862 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
863 {
864 int r;
865 sigset_t set;
866 struct sigaction sigact;
867
868 memset(&sigact, 0, sizeof(sigact));
869 sigact.sa_handler = dummy_signal;
870 sigaction(SIG_IPI, &sigact, NULL);
871
872 pthread_sigmask(SIG_BLOCK, NULL, &set);
873 sigdelset(&set, SIG_IPI);
874 sigdelset(&set, SIGBUS);
875 r = kvm_set_signal_mask(cpu, &set);
876 if (r) {
877 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
878 exit(1);
879 }
880 }
881
882 #else /* _WIN32 */
883 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
884 {
885 abort();
886 }
887 #endif /* _WIN32 */
888
889 static QemuMutex qemu_global_mutex;
890 static QemuCond qemu_io_proceeded_cond;
891 static unsigned iothread_requesting_mutex;
892
893 static QemuThread io_thread;
894
895 /* cpu creation */
896 static QemuCond qemu_cpu_cond;
897 /* system init */
898 static QemuCond qemu_pause_cond;
899 static QemuCond qemu_work_cond;
900
901 void qemu_init_cpu_loop(void)
902 {
903 qemu_init_sigbus();
904 qemu_cond_init(&qemu_cpu_cond);
905 qemu_cond_init(&qemu_pause_cond);
906 qemu_cond_init(&qemu_work_cond);
907 qemu_cond_init(&qemu_io_proceeded_cond);
908 qemu_mutex_init(&qemu_global_mutex);
909
910 qemu_thread_get_self(&io_thread);
911 }
912
913 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
914 {
915 struct qemu_work_item wi;
916
917 if (qemu_cpu_is_self(cpu)) {
918 func(data);
919 return;
920 }
921
922 wi.func = func;
923 wi.data = data;
924 wi.free = false;
925
926 qemu_mutex_lock(&cpu->work_mutex);
927 if (cpu->queued_work_first == NULL) {
928 cpu->queued_work_first = &wi;
929 } else {
930 cpu->queued_work_last->next = &wi;
931 }
932 cpu->queued_work_last = &wi;
933 wi.next = NULL;
934 wi.done = false;
935 qemu_mutex_unlock(&cpu->work_mutex);
936
937 qemu_cpu_kick(cpu);
938 while (!atomic_mb_read(&wi.done)) {
939 CPUState *self_cpu = current_cpu;
940
941 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
942 current_cpu = self_cpu;
943 }
944 }
945
946 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
947 {
948 struct qemu_work_item *wi;
949
950 if (qemu_cpu_is_self(cpu)) {
951 func(data);
952 return;
953 }
954
955 wi = g_malloc0(sizeof(struct qemu_work_item));
956 wi->func = func;
957 wi->data = data;
958 wi->free = true;
959
960 qemu_mutex_lock(&cpu->work_mutex);
961 if (cpu->queued_work_first == NULL) {
962 cpu->queued_work_first = wi;
963 } else {
964 cpu->queued_work_last->next = wi;
965 }
966 cpu->queued_work_last = wi;
967 wi->next = NULL;
968 wi->done = false;
969 qemu_mutex_unlock(&cpu->work_mutex);
970
971 qemu_cpu_kick(cpu);
972 }
973
974 static void flush_queued_work(CPUState *cpu)
975 {
976 struct qemu_work_item *wi;
977
978 if (cpu->queued_work_first == NULL) {
979 return;
980 }
981
982 qemu_mutex_lock(&cpu->work_mutex);
983 while (cpu->queued_work_first != NULL) {
984 wi = cpu->queued_work_first;
985 cpu->queued_work_first = wi->next;
986 if (!cpu->queued_work_first) {
987 cpu->queued_work_last = NULL;
988 }
989 qemu_mutex_unlock(&cpu->work_mutex);
990 wi->func(wi->data);
991 qemu_mutex_lock(&cpu->work_mutex);
992 if (wi->free) {
993 g_free(wi);
994 } else {
995 atomic_mb_set(&wi->done, true);
996 }
997 }
998 qemu_mutex_unlock(&cpu->work_mutex);
999 qemu_cond_broadcast(&qemu_work_cond);
1000 }
1001
1002 static void qemu_wait_io_event_common(CPUState *cpu)
1003 {
1004 if (cpu->stop) {
1005 cpu->stop = false;
1006 cpu->stopped = true;
1007 qemu_cond_broadcast(&qemu_pause_cond);
1008 }
1009 flush_queued_work(cpu);
1010 cpu->thread_kicked = false;
1011 }
1012
1013 static void qemu_tcg_wait_io_event(CPUState *cpu)
1014 {
1015 while (all_cpu_threads_idle()) {
1016 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1017 }
1018
1019 while (iothread_requesting_mutex) {
1020 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1021 }
1022
1023 CPU_FOREACH(cpu) {
1024 qemu_wait_io_event_common(cpu);
1025 }
1026 }
1027
1028 static void qemu_kvm_wait_io_event(CPUState *cpu)
1029 {
1030 while (cpu_thread_is_idle(cpu)) {
1031 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1032 }
1033
1034 qemu_kvm_eat_signals(cpu);
1035 qemu_wait_io_event_common(cpu);
1036 }
1037
1038 static void *qemu_kvm_cpu_thread_fn(void *arg)
1039 {
1040 CPUState *cpu = arg;
1041 int r;
1042
1043 rcu_register_thread();
1044
1045 qemu_mutex_lock_iothread();
1046 qemu_thread_get_self(cpu->thread);
1047 cpu->thread_id = qemu_get_thread_id();
1048 cpu->can_do_io = 1;
1049 current_cpu = cpu;
1050
1051 r = kvm_init_vcpu(cpu);
1052 if (r < 0) {
1053 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1054 exit(1);
1055 }
1056
1057 qemu_kvm_init_cpu_signals(cpu);
1058
1059 /* signal CPU creation */
1060 cpu->created = true;
1061 qemu_cond_signal(&qemu_cpu_cond);
1062
1063 while (1) {
1064 if (cpu_can_run(cpu)) {
1065 r = kvm_cpu_exec(cpu);
1066 if (r == EXCP_DEBUG) {
1067 cpu_handle_guest_debug(cpu);
1068 }
1069 }
1070 qemu_kvm_wait_io_event(cpu);
1071 }
1072
1073 return NULL;
1074 }
1075
1076 static void *qemu_dummy_cpu_thread_fn(void *arg)
1077 {
1078 #ifdef _WIN32
1079 fprintf(stderr, "qtest is not supported under Windows\n");
1080 exit(1);
1081 #else
1082 CPUState *cpu = arg;
1083 sigset_t waitset;
1084 int r;
1085
1086 rcu_register_thread();
1087
1088 qemu_mutex_lock_iothread();
1089 qemu_thread_get_self(cpu->thread);
1090 cpu->thread_id = qemu_get_thread_id();
1091 cpu->can_do_io = 1;
1092
1093 sigemptyset(&waitset);
1094 sigaddset(&waitset, SIG_IPI);
1095
1096 /* signal CPU creation */
1097 cpu->created = true;
1098 qemu_cond_signal(&qemu_cpu_cond);
1099
1100 current_cpu = cpu;
1101 while (1) {
1102 current_cpu = NULL;
1103 qemu_mutex_unlock_iothread();
1104 do {
1105 int sig;
1106 r = sigwait(&waitset, &sig);
1107 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1108 if (r == -1) {
1109 perror("sigwait");
1110 exit(1);
1111 }
1112 qemu_mutex_lock_iothread();
1113 current_cpu = cpu;
1114 qemu_wait_io_event_common(cpu);
1115 }
1116
1117 return NULL;
1118 #endif
1119 }
1120
1121 static void tcg_exec_all(void);
1122
1123 static void *qemu_tcg_cpu_thread_fn(void *arg)
1124 {
1125 CPUState *cpu = arg;
1126
1127 rcu_register_thread();
1128
1129 qemu_mutex_lock_iothread();
1130 qemu_thread_get_self(cpu->thread);
1131
1132 CPU_FOREACH(cpu) {
1133 cpu->thread_id = qemu_get_thread_id();
1134 cpu->created = true;
1135 cpu->can_do_io = 1;
1136 }
1137 qemu_cond_signal(&qemu_cpu_cond);
1138
1139 /* wait for initial kick-off after machine start */
1140 while (first_cpu->stopped) {
1141 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1142
1143 /* process any pending work */
1144 CPU_FOREACH(cpu) {
1145 qemu_wait_io_event_common(cpu);
1146 }
1147 }
1148
1149 /* process any pending work */
1150 atomic_mb_set(&exit_request, 1);
1151
1152 while (1) {
1153 tcg_exec_all();
1154
1155 if (use_icount) {
1156 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1157
1158 if (deadline == 0) {
1159 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1160 }
1161 }
1162 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1163 }
1164
1165 return NULL;
1166 }
1167
1168 static void qemu_cpu_kick_thread(CPUState *cpu)
1169 {
1170 #ifndef _WIN32
1171 int err;
1172
1173 if (cpu->thread_kicked) {
1174 return;
1175 }
1176 cpu->thread_kicked = true;
1177 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1178 if (err) {
1179 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1180 exit(1);
1181 }
1182 #else /* _WIN32 */
1183 abort();
1184 #endif
1185 }
1186
1187 static void qemu_cpu_kick_no_halt(void)
1188 {
1189 CPUState *cpu;
1190 /* Ensure whatever caused the exit has reached the CPU threads before
1191 * writing exit_request.
1192 */
1193 atomic_mb_set(&exit_request, 1);
1194 cpu = atomic_mb_read(&tcg_current_cpu);
1195 if (cpu) {
1196 cpu_exit(cpu);
1197 }
1198 }
1199
1200 void qemu_cpu_kick(CPUState *cpu)
1201 {
1202 qemu_cond_broadcast(cpu->halt_cond);
1203 if (tcg_enabled()) {
1204 qemu_cpu_kick_no_halt();
1205 } else {
1206 qemu_cpu_kick_thread(cpu);
1207 }
1208 }
1209
1210 void qemu_cpu_kick_self(void)
1211 {
1212 assert(current_cpu);
1213 qemu_cpu_kick_thread(current_cpu);
1214 }
1215
1216 bool qemu_cpu_is_self(CPUState *cpu)
1217 {
1218 return qemu_thread_is_self(cpu->thread);
1219 }
1220
1221 bool qemu_in_vcpu_thread(void)
1222 {
1223 return current_cpu && qemu_cpu_is_self(current_cpu);
1224 }
1225
1226 static __thread bool iothread_locked = false;
1227
1228 bool qemu_mutex_iothread_locked(void)
1229 {
1230 return iothread_locked;
1231 }
1232
1233 void qemu_mutex_lock_iothread(void)
1234 {
1235 atomic_inc(&iothread_requesting_mutex);
1236 /* In the simple case there is no need to bump the VCPU thread out of
1237 * TCG code execution.
1238 */
1239 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1240 !first_cpu || !first_cpu->created) {
1241 qemu_mutex_lock(&qemu_global_mutex);
1242 atomic_dec(&iothread_requesting_mutex);
1243 } else {
1244 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1245 qemu_cpu_kick_no_halt();
1246 qemu_mutex_lock(&qemu_global_mutex);
1247 }
1248 atomic_dec(&iothread_requesting_mutex);
1249 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1250 }
1251 iothread_locked = true;
1252 }
1253
1254 void qemu_mutex_unlock_iothread(void)
1255 {
1256 iothread_locked = false;
1257 qemu_mutex_unlock(&qemu_global_mutex);
1258 }
1259
1260 static int all_vcpus_paused(void)
1261 {
1262 CPUState *cpu;
1263
1264 CPU_FOREACH(cpu) {
1265 if (!cpu->stopped) {
1266 return 0;
1267 }
1268 }
1269
1270 return 1;
1271 }
1272
1273 void pause_all_vcpus(void)
1274 {
1275 CPUState *cpu;
1276
1277 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1278 CPU_FOREACH(cpu) {
1279 cpu->stop = true;
1280 qemu_cpu_kick(cpu);
1281 }
1282
1283 if (qemu_in_vcpu_thread()) {
1284 cpu_stop_current();
1285 if (!kvm_enabled()) {
1286 CPU_FOREACH(cpu) {
1287 cpu->stop = false;
1288 cpu->stopped = true;
1289 }
1290 return;
1291 }
1292 }
1293
1294 while (!all_vcpus_paused()) {
1295 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1296 CPU_FOREACH(cpu) {
1297 qemu_cpu_kick(cpu);
1298 }
1299 }
1300 }
1301
1302 void cpu_resume(CPUState *cpu)
1303 {
1304 cpu->stop = false;
1305 cpu->stopped = false;
1306 qemu_cpu_kick(cpu);
1307 }
1308
1309 void resume_all_vcpus(void)
1310 {
1311 CPUState *cpu;
1312
1313 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1314 CPU_FOREACH(cpu) {
1315 cpu_resume(cpu);
1316 }
1317 }
1318
1319 /* For temporary buffers for forming a name */
1320 #define VCPU_THREAD_NAME_SIZE 16
1321
1322 static void qemu_tcg_init_vcpu(CPUState *cpu)
1323 {
1324 char thread_name[VCPU_THREAD_NAME_SIZE];
1325 static QemuCond *tcg_halt_cond;
1326 static QemuThread *tcg_cpu_thread;
1327
1328 /* share a single thread for all cpus with TCG */
1329 if (!tcg_cpu_thread) {
1330 cpu->thread = g_malloc0(sizeof(QemuThread));
1331 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1332 qemu_cond_init(cpu->halt_cond);
1333 tcg_halt_cond = cpu->halt_cond;
1334 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1335 cpu->cpu_index);
1336 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1337 cpu, QEMU_THREAD_JOINABLE);
1338 #ifdef _WIN32
1339 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1340 #endif
1341 while (!cpu->created) {
1342 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1343 }
1344 tcg_cpu_thread = cpu->thread;
1345 } else {
1346 cpu->thread = tcg_cpu_thread;
1347 cpu->halt_cond = tcg_halt_cond;
1348 }
1349 }
1350
1351 static void qemu_kvm_start_vcpu(CPUState *cpu)
1352 {
1353 char thread_name[VCPU_THREAD_NAME_SIZE];
1354
1355 cpu->thread = g_malloc0(sizeof(QemuThread));
1356 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1357 qemu_cond_init(cpu->halt_cond);
1358 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1359 cpu->cpu_index);
1360 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1361 cpu, QEMU_THREAD_JOINABLE);
1362 while (!cpu->created) {
1363 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1364 }
1365 }
1366
1367 static void qemu_dummy_start_vcpu(CPUState *cpu)
1368 {
1369 char thread_name[VCPU_THREAD_NAME_SIZE];
1370
1371 cpu->thread = g_malloc0(sizeof(QemuThread));
1372 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1373 qemu_cond_init(cpu->halt_cond);
1374 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1375 cpu->cpu_index);
1376 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1377 QEMU_THREAD_JOINABLE);
1378 while (!cpu->created) {
1379 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1380 }
1381 }
1382
1383 void qemu_init_vcpu(CPUState *cpu)
1384 {
1385 cpu->nr_cores = smp_cores;
1386 cpu->nr_threads = smp_threads;
1387 cpu->stopped = true;
1388
1389 if (!cpu->as) {
1390 /* If the target cpu hasn't set up any address spaces itself,
1391 * give it the default one.
1392 */
1393 AddressSpace *as = address_space_init_shareable(cpu->memory,
1394 "cpu-memory");
1395 cpu->num_ases = 1;
1396 cpu_address_space_init(cpu, as, 0);
1397 }
1398
1399 if (kvm_enabled()) {
1400 qemu_kvm_start_vcpu(cpu);
1401 } else if (tcg_enabled()) {
1402 qemu_tcg_init_vcpu(cpu);
1403 } else {
1404 qemu_dummy_start_vcpu(cpu);
1405 }
1406 }
1407
1408 void cpu_stop_current(void)
1409 {
1410 if (current_cpu) {
1411 current_cpu->stop = false;
1412 current_cpu->stopped = true;
1413 cpu_exit(current_cpu);
1414 qemu_cond_broadcast(&qemu_pause_cond);
1415 }
1416 }
1417
1418 int vm_stop(RunState state)
1419 {
1420 if (qemu_in_vcpu_thread()) {
1421 qemu_system_vmstop_request_prepare();
1422 qemu_system_vmstop_request(state);
1423 /*
1424 * FIXME: should not return to device code in case
1425 * vm_stop() has been requested.
1426 */
1427 cpu_stop_current();
1428 return 0;
1429 }
1430
1431 return do_vm_stop(state);
1432 }
1433
1434 /* does a state transition even if the VM is already stopped,
1435 current state is forgotten forever */
1436 int vm_stop_force_state(RunState state)
1437 {
1438 if (runstate_is_running()) {
1439 return vm_stop(state);
1440 } else {
1441 runstate_set(state);
1442
1443 bdrv_drain_all();
1444 /* Make sure to return an error if the flush in a previous vm_stop()
1445 * failed. */
1446 return blk_flush_all();
1447 }
1448 }
1449
1450 static int64_t tcg_get_icount_limit(void)
1451 {
1452 int64_t deadline;
1453
1454 if (replay_mode != REPLAY_MODE_PLAY) {
1455 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1456
1457 /* Maintain prior (possibly buggy) behaviour where if no deadline
1458 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1459 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1460 * nanoseconds.
1461 */
1462 if ((deadline < 0) || (deadline > INT32_MAX)) {
1463 deadline = INT32_MAX;
1464 }
1465
1466 return qemu_icount_round(deadline);
1467 } else {
1468 return replay_get_instructions();
1469 }
1470 }
1471
1472 static int tcg_cpu_exec(CPUState *cpu)
1473 {
1474 int ret;
1475 #ifdef CONFIG_PROFILER
1476 int64_t ti;
1477 #endif
1478
1479 #ifdef CONFIG_PROFILER
1480 ti = profile_getclock();
1481 #endif
1482 if (use_icount) {
1483 int64_t count;
1484 int decr;
1485 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1486 + cpu->icount_extra);
1487 cpu->icount_decr.u16.low = 0;
1488 cpu->icount_extra = 0;
1489 count = tcg_get_icount_limit();
1490 timers_state.qemu_icount += count;
1491 decr = (count > 0xffff) ? 0xffff : count;
1492 count -= decr;
1493 cpu->icount_decr.u16.low = decr;
1494 cpu->icount_extra = count;
1495 }
1496 ret = cpu_exec(cpu);
1497 #ifdef CONFIG_PROFILER
1498 tcg_time += profile_getclock() - ti;
1499 #endif
1500 if (use_icount) {
1501 /* Fold pending instructions back into the
1502 instruction counter, and clear the interrupt flag. */
1503 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1504 + cpu->icount_extra);
1505 cpu->icount_decr.u32 = 0;
1506 cpu->icount_extra = 0;
1507 replay_account_executed_instructions();
1508 }
1509 return ret;
1510 }
1511
1512 static void tcg_exec_all(void)
1513 {
1514 int r;
1515
1516 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1517 qemu_account_warp_timer();
1518
1519 if (next_cpu == NULL) {
1520 next_cpu = first_cpu;
1521 }
1522 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1523 CPUState *cpu = next_cpu;
1524
1525 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1526 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1527
1528 if (cpu_can_run(cpu)) {
1529 r = tcg_cpu_exec(cpu);
1530 if (r == EXCP_DEBUG) {
1531 cpu_handle_guest_debug(cpu);
1532 break;
1533 }
1534 } else if (cpu->stop || cpu->stopped) {
1535 break;
1536 }
1537 }
1538
1539 /* Pairs with smp_wmb in qemu_cpu_kick. */
1540 atomic_mb_set(&exit_request, 0);
1541 }
1542
1543 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1544 {
1545 /* XXX: implement xxx_cpu_list for targets that still miss it */
1546 #if defined(cpu_list)
1547 cpu_list(f, cpu_fprintf);
1548 #endif
1549 }
1550
1551 CpuInfoList *qmp_query_cpus(Error **errp)
1552 {
1553 CpuInfoList *head = NULL, *cur_item = NULL;
1554 CPUState *cpu;
1555
1556 CPU_FOREACH(cpu) {
1557 CpuInfoList *info;
1558 #if defined(TARGET_I386)
1559 X86CPU *x86_cpu = X86_CPU(cpu);
1560 CPUX86State *env = &x86_cpu->env;
1561 #elif defined(TARGET_PPC)
1562 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1563 CPUPPCState *env = &ppc_cpu->env;
1564 #elif defined(TARGET_SPARC)
1565 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1566 CPUSPARCState *env = &sparc_cpu->env;
1567 #elif defined(TARGET_MIPS)
1568 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1569 CPUMIPSState *env = &mips_cpu->env;
1570 #elif defined(TARGET_TRICORE)
1571 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1572 CPUTriCoreState *env = &tricore_cpu->env;
1573 #endif
1574
1575 cpu_synchronize_state(cpu);
1576
1577 info = g_malloc0(sizeof(*info));
1578 info->value = g_malloc0(sizeof(*info->value));
1579 info->value->CPU = cpu->cpu_index;
1580 info->value->current = (cpu == first_cpu);
1581 info->value->halted = cpu->halted;
1582 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1583 info->value->thread_id = cpu->thread_id;
1584 #if defined(TARGET_I386)
1585 info->value->arch = CPU_INFO_ARCH_X86;
1586 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1587 #elif defined(TARGET_PPC)
1588 info->value->arch = CPU_INFO_ARCH_PPC;
1589 info->value->u.ppc.nip = env->nip;
1590 #elif defined(TARGET_SPARC)
1591 info->value->arch = CPU_INFO_ARCH_SPARC;
1592 info->value->u.q_sparc.pc = env->pc;
1593 info->value->u.q_sparc.npc = env->npc;
1594 #elif defined(TARGET_MIPS)
1595 info->value->arch = CPU_INFO_ARCH_MIPS;
1596 info->value->u.q_mips.PC = env->active_tc.PC;
1597 #elif defined(TARGET_TRICORE)
1598 info->value->arch = CPU_INFO_ARCH_TRICORE;
1599 info->value->u.tricore.PC = env->PC;
1600 #else
1601 info->value->arch = CPU_INFO_ARCH_OTHER;
1602 #endif
1603
1604 /* XXX: waiting for the qapi to support GSList */
1605 if (!cur_item) {
1606 head = cur_item = info;
1607 } else {
1608 cur_item->next = info;
1609 cur_item = info;
1610 }
1611 }
1612
1613 return head;
1614 }
1615
1616 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1617 bool has_cpu, int64_t cpu_index, Error **errp)
1618 {
1619 FILE *f;
1620 uint32_t l;
1621 CPUState *cpu;
1622 uint8_t buf[1024];
1623 int64_t orig_addr = addr, orig_size = size;
1624
1625 if (!has_cpu) {
1626 cpu_index = 0;
1627 }
1628
1629 cpu = qemu_get_cpu(cpu_index);
1630 if (cpu == NULL) {
1631 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1632 "a CPU number");
1633 return;
1634 }
1635
1636 f = fopen(filename, "wb");
1637 if (!f) {
1638 error_setg_file_open(errp, errno, filename);
1639 return;
1640 }
1641
1642 while (size != 0) {
1643 l = sizeof(buf);
1644 if (l > size)
1645 l = size;
1646 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1647 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1648 " specified", orig_addr, orig_size);
1649 goto exit;
1650 }
1651 if (fwrite(buf, 1, l, f) != l) {
1652 error_setg(errp, QERR_IO_ERROR);
1653 goto exit;
1654 }
1655 addr += l;
1656 size -= l;
1657 }
1658
1659 exit:
1660 fclose(f);
1661 }
1662
1663 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1664 Error **errp)
1665 {
1666 FILE *f;
1667 uint32_t l;
1668 uint8_t buf[1024];
1669
1670 f = fopen(filename, "wb");
1671 if (!f) {
1672 error_setg_file_open(errp, errno, filename);
1673 return;
1674 }
1675
1676 while (size != 0) {
1677 l = sizeof(buf);
1678 if (l > size)
1679 l = size;
1680 cpu_physical_memory_read(addr, buf, l);
1681 if (fwrite(buf, 1, l, f) != l) {
1682 error_setg(errp, QERR_IO_ERROR);
1683 goto exit;
1684 }
1685 addr += l;
1686 size -= l;
1687 }
1688
1689 exit:
1690 fclose(f);
1691 }
1692
1693 void qmp_inject_nmi(Error **errp)
1694 {
1695 #if defined(TARGET_I386)
1696 CPUState *cs;
1697
1698 CPU_FOREACH(cs) {
1699 X86CPU *cpu = X86_CPU(cs);
1700
1701 if (!cpu->apic_state) {
1702 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1703 } else {
1704 apic_deliver_nmi(cpu->apic_state);
1705 }
1706 }
1707 #else
1708 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1709 #endif
1710 }
1711
1712 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1713 {
1714 if (!use_icount) {
1715 return;
1716 }
1717
1718 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1719 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1720 if (icount_align_option) {
1721 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1722 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1723 } else {
1724 cpu_fprintf(f, "Max guest delay NA\n");
1725 cpu_fprintf(f, "Max guest advance NA\n");
1726 }
1727 }