]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' into staging
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "qmp-commands.h"
38 #include "exec/exec-all.h"
39
40 #include "qemu/thread.h"
41 #include "sysemu/cpus.h"
42 #include "sysemu/qtest.h"
43 #include "qemu/main-loop.h"
44 #include "qemu/bitmap.h"
45 #include "qemu/seqlock.h"
46 #include "qapi-event.h"
47 #include "hw/nmi.h"
48 #include "sysemu/replay.h"
49
50 #ifndef _WIN32
51 #include "qemu/compatfd.h"
52 #endif
53
54 #ifdef CONFIG_LINUX
55
56 #include <sys/prctl.h>
57
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
61
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
65
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
69
70 #endif /* CONFIG_LINUX */
71
72 static CPUState *next_cpu;
73 int64_t max_delay;
74 int64_t max_advance;
75
76 /* vcpu throttling controls */
77 static QEMUTimer *throttle_timer;
78 static unsigned int throttle_percentage;
79
80 #define CPU_THROTTLE_PCT_MIN 1
81 #define CPU_THROTTLE_PCT_MAX 99
82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83
84 bool cpu_is_stopped(CPUState *cpu)
85 {
86 return cpu->stopped || !runstate_is_running();
87 }
88
89 static bool cpu_thread_is_idle(CPUState *cpu)
90 {
91 if (cpu->stop || cpu->queued_work_first) {
92 return false;
93 }
94 if (cpu_is_stopped(cpu)) {
95 return true;
96 }
97 if (!cpu->halted || cpu_has_work(cpu) ||
98 kvm_halt_in_kernel()) {
99 return false;
100 }
101 return true;
102 }
103
104 static bool all_cpu_threads_idle(void)
105 {
106 CPUState *cpu;
107
108 CPU_FOREACH(cpu) {
109 if (!cpu_thread_is_idle(cpu)) {
110 return false;
111 }
112 }
113 return true;
114 }
115
116 /***********************************************************/
117 /* guest cycle counter */
118
119 /* Protected by TimersState seqlock */
120
121 static bool icount_sleep = true;
122 static int64_t vm_clock_warp_start = -1;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
127
128 static QEMUTimer *icount_rt_timer;
129 static QEMUTimer *icount_vm_timer;
130 static QEMUTimer *icount_warp_timer;
131
132 typedef struct TimersState {
133 /* Protected by BQL. */
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
149 } TimersState;
150
151 static TimersState timers_state;
152
153 int64_t cpu_get_icount_raw(void)
154 {
155 int64_t icount;
156 CPUState *cpu = current_cpu;
157
158 icount = timers_state.qemu_icount;
159 if (cpu) {
160 if (!cpu->can_do_io) {
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
163 }
164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
165 }
166 return icount;
167 }
168
169 /* Return the virtual CPU time, based on the instruction counter. */
170 static int64_t cpu_get_icount_locked(void)
171 {
172 int64_t icount = cpu_get_icount_raw();
173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
174 }
175
176 int64_t cpu_get_icount(void)
177 {
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187 }
188
189 int64_t cpu_icount_to_ns(int64_t icount)
190 {
191 return icount << icount_time_shift;
192 }
193
194 /* return the time elapsed in VM between vm_start and vm_stop. Unless
195 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
196 * counter.
197 *
198 * Caller must hold the BQL
199 */
200 int64_t cpu_get_ticks(void)
201 {
202 int64_t ticks;
203
204 if (use_icount) {
205 return cpu_get_icount();
206 }
207
208 ticks = timers_state.cpu_ticks_offset;
209 if (timers_state.cpu_ticks_enabled) {
210 ticks += cpu_get_host_ticks();
211 }
212
213 if (timers_state.cpu_ticks_prev > ticks) {
214 /* Note: non increasing ticks may happen if the host uses
215 software suspend */
216 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
217 ticks = timers_state.cpu_ticks_prev;
218 }
219
220 timers_state.cpu_ticks_prev = ticks;
221 return ticks;
222 }
223
224 static int64_t cpu_get_clock_locked(void)
225 {
226 int64_t time;
227
228 time = timers_state.cpu_clock_offset;
229 if (timers_state.cpu_ticks_enabled) {
230 time += get_clock();
231 }
232
233 return time;
234 }
235
236 /* Return the monotonic time elapsed in VM, i.e.,
237 * the time between vm_start and vm_stop
238 */
239 int64_t cpu_get_clock(void)
240 {
241 int64_t ti;
242 unsigned start;
243
244 do {
245 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246 ti = cpu_get_clock_locked();
247 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
248
249 return ti;
250 }
251
252 /* enable cpu_get_ticks()
253 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
254 */
255 void cpu_enable_ticks(void)
256 {
257 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
258 seqlock_write_begin(&timers_state.vm_clock_seqlock);
259 if (!timers_state.cpu_ticks_enabled) {
260 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
261 timers_state.cpu_clock_offset -= get_clock();
262 timers_state.cpu_ticks_enabled = 1;
263 }
264 seqlock_write_end(&timers_state.vm_clock_seqlock);
265 }
266
267 /* disable cpu_get_ticks() : the clock is stopped. You must not call
268 * cpu_get_ticks() after that.
269 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
270 */
271 void cpu_disable_ticks(void)
272 {
273 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
274 seqlock_write_begin(&timers_state.vm_clock_seqlock);
275 if (timers_state.cpu_ticks_enabled) {
276 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
277 timers_state.cpu_clock_offset = cpu_get_clock_locked();
278 timers_state.cpu_ticks_enabled = 0;
279 }
280 seqlock_write_end(&timers_state.vm_clock_seqlock);
281 }
282
283 /* Correlation between real and virtual time is always going to be
284 fairly approximate, so ignore small variation.
285 When the guest is idle real and virtual time will be aligned in
286 the IO wait loop. */
287 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
288
289 static void icount_adjust(void)
290 {
291 int64_t cur_time;
292 int64_t cur_icount;
293 int64_t delta;
294
295 /* Protected by TimersState mutex. */
296 static int64_t last_delta;
297
298 /* If the VM is not running, then do nothing. */
299 if (!runstate_is_running()) {
300 return;
301 }
302
303 seqlock_write_begin(&timers_state.vm_clock_seqlock);
304 cur_time = cpu_get_clock_locked();
305 cur_icount = cpu_get_icount_locked();
306
307 delta = cur_icount - cur_time;
308 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
309 if (delta > 0
310 && last_delta + ICOUNT_WOBBLE < delta * 2
311 && icount_time_shift > 0) {
312 /* The guest is getting too far ahead. Slow time down. */
313 icount_time_shift--;
314 }
315 if (delta < 0
316 && last_delta - ICOUNT_WOBBLE > delta * 2
317 && icount_time_shift < MAX_ICOUNT_SHIFT) {
318 /* The guest is getting too far behind. Speed time up. */
319 icount_time_shift++;
320 }
321 last_delta = delta;
322 timers_state.qemu_icount_bias = cur_icount
323 - (timers_state.qemu_icount << icount_time_shift);
324 seqlock_write_end(&timers_state.vm_clock_seqlock);
325 }
326
327 static void icount_adjust_rt(void *opaque)
328 {
329 timer_mod(icount_rt_timer,
330 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
331 icount_adjust();
332 }
333
334 static void icount_adjust_vm(void *opaque)
335 {
336 timer_mod(icount_vm_timer,
337 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
338 NANOSECONDS_PER_SECOND / 10);
339 icount_adjust();
340 }
341
342 static int64_t qemu_icount_round(int64_t count)
343 {
344 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
345 }
346
347 static void icount_warp_rt(void)
348 {
349 unsigned seq;
350 int64_t warp_start;
351
352 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
353 * changes from -1 to another value, so the race here is okay.
354 */
355 do {
356 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
357 warp_start = vm_clock_warp_start;
358 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
359
360 if (warp_start == -1) {
361 return;
362 }
363
364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
365 if (runstate_is_running()) {
366 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
367 cpu_get_clock_locked());
368 int64_t warp_delta;
369
370 warp_delta = clock - vm_clock_warp_start;
371 if (use_icount == 2) {
372 /*
373 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
374 * far ahead of real time.
375 */
376 int64_t cur_icount = cpu_get_icount_locked();
377 int64_t delta = clock - cur_icount;
378 warp_delta = MIN(warp_delta, delta);
379 }
380 timers_state.qemu_icount_bias += warp_delta;
381 }
382 vm_clock_warp_start = -1;
383 seqlock_write_end(&timers_state.vm_clock_seqlock);
384
385 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387 }
388 }
389
390 static void icount_timer_cb(void *opaque)
391 {
392 /* No need for a checkpoint because the timer already synchronizes
393 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
394 */
395 icount_warp_rt();
396 }
397
398 void qtest_clock_warp(int64_t dest)
399 {
400 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
401 AioContext *aio_context;
402 assert(qtest_enabled());
403 aio_context = qemu_get_aio_context();
404 while (clock < dest) {
405 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
406 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
407
408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
409 timers_state.qemu_icount_bias += warp;
410 seqlock_write_end(&timers_state.vm_clock_seqlock);
411
412 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
413 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
414 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
415 }
416 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
417 }
418
419 void qemu_start_warp_timer(void)
420 {
421 int64_t clock;
422 int64_t deadline;
423
424 if (!use_icount) {
425 return;
426 }
427
428 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
429 * do not fire, so computing the deadline does not make sense.
430 */
431 if (!runstate_is_running()) {
432 return;
433 }
434
435 /* warp clock deterministically in record/replay mode */
436 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
437 return;
438 }
439
440 if (!all_cpu_threads_idle()) {
441 return;
442 }
443
444 if (qtest_enabled()) {
445 /* When testing, qtest commands advance icount. */
446 return;
447 }
448
449 /* We want to use the earliest deadline from ALL vm_clocks */
450 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
451 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
452 if (deadline < 0) {
453 static bool notified;
454 if (!icount_sleep && !notified) {
455 error_report("WARNING: icount sleep disabled and no active timers");
456 notified = true;
457 }
458 return;
459 }
460
461 if (deadline > 0) {
462 /*
463 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
464 * sleep. Otherwise, the CPU might be waiting for a future timer
465 * interrupt to wake it up, but the interrupt never comes because
466 * the vCPU isn't running any insns and thus doesn't advance the
467 * QEMU_CLOCK_VIRTUAL.
468 */
469 if (!icount_sleep) {
470 /*
471 * We never let VCPUs sleep in no sleep icount mode.
472 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
473 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
474 * It is useful when we want a deterministic execution time,
475 * isolated from host latencies.
476 */
477 seqlock_write_begin(&timers_state.vm_clock_seqlock);
478 timers_state.qemu_icount_bias += deadline;
479 seqlock_write_end(&timers_state.vm_clock_seqlock);
480 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
481 } else {
482 /*
483 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
484 * "real" time, (related to the time left until the next event) has
485 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
486 * This avoids that the warps are visible externally; for example,
487 * you will not be sending network packets continuously instead of
488 * every 100ms.
489 */
490 seqlock_write_begin(&timers_state.vm_clock_seqlock);
491 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
492 vm_clock_warp_start = clock;
493 }
494 seqlock_write_end(&timers_state.vm_clock_seqlock);
495 timer_mod_anticipate(icount_warp_timer, clock + deadline);
496 }
497 } else if (deadline == 0) {
498 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
499 }
500 }
501
502 static void qemu_account_warp_timer(void)
503 {
504 if (!use_icount || !icount_sleep) {
505 return;
506 }
507
508 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
509 * do not fire, so computing the deadline does not make sense.
510 */
511 if (!runstate_is_running()) {
512 return;
513 }
514
515 /* warp clock deterministically in record/replay mode */
516 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
517 return;
518 }
519
520 timer_del(icount_warp_timer);
521 icount_warp_rt();
522 }
523
524 static bool icount_state_needed(void *opaque)
525 {
526 return use_icount;
527 }
528
529 /*
530 * This is a subsection for icount migration.
531 */
532 static const VMStateDescription icount_vmstate_timers = {
533 .name = "timer/icount",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .needed = icount_state_needed,
537 .fields = (VMStateField[]) {
538 VMSTATE_INT64(qemu_icount_bias, TimersState),
539 VMSTATE_INT64(qemu_icount, TimersState),
540 VMSTATE_END_OF_LIST()
541 }
542 };
543
544 static const VMStateDescription vmstate_timers = {
545 .name = "timer",
546 .version_id = 2,
547 .minimum_version_id = 1,
548 .fields = (VMStateField[]) {
549 VMSTATE_INT64(cpu_ticks_offset, TimersState),
550 VMSTATE_INT64(dummy, TimersState),
551 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
552 VMSTATE_END_OF_LIST()
553 },
554 .subsections = (const VMStateDescription*[]) {
555 &icount_vmstate_timers,
556 NULL
557 }
558 };
559
560 static void cpu_throttle_thread(CPUState *cpu, void *opaque)
561 {
562 double pct;
563 double throttle_ratio;
564 long sleeptime_ns;
565
566 if (!cpu_throttle_get_percentage()) {
567 return;
568 }
569
570 pct = (double)cpu_throttle_get_percentage()/100;
571 throttle_ratio = pct / (1 - pct);
572 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
573
574 qemu_mutex_unlock_iothread();
575 atomic_set(&cpu->throttle_thread_scheduled, 0);
576 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
577 qemu_mutex_lock_iothread();
578 }
579
580 static void cpu_throttle_timer_tick(void *opaque)
581 {
582 CPUState *cpu;
583 double pct;
584
585 /* Stop the timer if needed */
586 if (!cpu_throttle_get_percentage()) {
587 return;
588 }
589 CPU_FOREACH(cpu) {
590 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
591 async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
592 }
593 }
594
595 pct = (double)cpu_throttle_get_percentage()/100;
596 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598 }
599
600 void cpu_throttle_set(int new_throttle_pct)
601 {
602 /* Ensure throttle percentage is within valid range */
603 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606 atomic_set(&throttle_percentage, new_throttle_pct);
607
608 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609 CPU_THROTTLE_TIMESLICE_NS);
610 }
611
612 void cpu_throttle_stop(void)
613 {
614 atomic_set(&throttle_percentage, 0);
615 }
616
617 bool cpu_throttle_active(void)
618 {
619 return (cpu_throttle_get_percentage() != 0);
620 }
621
622 int cpu_throttle_get_percentage(void)
623 {
624 return atomic_read(&throttle_percentage);
625 }
626
627 void cpu_ticks_init(void)
628 {
629 seqlock_init(&timers_state.vm_clock_seqlock);
630 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
631 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632 cpu_throttle_timer_tick, NULL);
633 }
634
635 void configure_icount(QemuOpts *opts, Error **errp)
636 {
637 const char *option;
638 char *rem_str = NULL;
639
640 option = qemu_opt_get(opts, "shift");
641 if (!option) {
642 if (qemu_opt_get(opts, "align") != NULL) {
643 error_setg(errp, "Please specify shift option when using align");
644 }
645 return;
646 }
647
648 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
649 if (icount_sleep) {
650 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
651 icount_timer_cb, NULL);
652 }
653
654 icount_align_option = qemu_opt_get_bool(opts, "align", false);
655
656 if (icount_align_option && !icount_sleep) {
657 error_setg(errp, "align=on and sleep=off are incompatible");
658 }
659 if (strcmp(option, "auto") != 0) {
660 errno = 0;
661 icount_time_shift = strtol(option, &rem_str, 0);
662 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663 error_setg(errp, "icount: Invalid shift value");
664 }
665 use_icount = 1;
666 return;
667 } else if (icount_align_option) {
668 error_setg(errp, "shift=auto and align=on are incompatible");
669 } else if (!icount_sleep) {
670 error_setg(errp, "shift=auto and sleep=off are incompatible");
671 }
672
673 use_icount = 2;
674
675 /* 125MIPS seems a reasonable initial guess at the guest speed.
676 It will be corrected fairly quickly anyway. */
677 icount_time_shift = 3;
678
679 /* Have both realtime and virtual time triggers for speed adjustment.
680 The realtime trigger catches emulated time passing too slowly,
681 the virtual time trigger catches emulated time passing too fast.
682 Realtime triggers occur even when idle, so use them less frequently
683 than VM triggers. */
684 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685 icount_adjust_rt, NULL);
686 timer_mod(icount_rt_timer,
687 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
688 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689 icount_adjust_vm, NULL);
690 timer_mod(icount_vm_timer,
691 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
692 NANOSECONDS_PER_SECOND / 10);
693 }
694
695 /***********************************************************/
696 void hw_error(const char *fmt, ...)
697 {
698 va_list ap;
699 CPUState *cpu;
700
701 va_start(ap, fmt);
702 fprintf(stderr, "qemu: hardware error: ");
703 vfprintf(stderr, fmt, ap);
704 fprintf(stderr, "\n");
705 CPU_FOREACH(cpu) {
706 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
707 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
708 }
709 va_end(ap);
710 abort();
711 }
712
713 void cpu_synchronize_all_states(void)
714 {
715 CPUState *cpu;
716
717 CPU_FOREACH(cpu) {
718 cpu_synchronize_state(cpu);
719 }
720 }
721
722 void cpu_synchronize_all_post_reset(void)
723 {
724 CPUState *cpu;
725
726 CPU_FOREACH(cpu) {
727 cpu_synchronize_post_reset(cpu);
728 }
729 }
730
731 void cpu_synchronize_all_post_init(void)
732 {
733 CPUState *cpu;
734
735 CPU_FOREACH(cpu) {
736 cpu_synchronize_post_init(cpu);
737 }
738 }
739
740 static int do_vm_stop(RunState state)
741 {
742 int ret = 0;
743
744 if (runstate_is_running()) {
745 cpu_disable_ticks();
746 pause_all_vcpus();
747 runstate_set(state);
748 vm_state_notify(0, state);
749 qapi_event_send_stop(&error_abort);
750 }
751
752 bdrv_drain_all();
753 replay_disable_events();
754 ret = blk_flush_all();
755
756 return ret;
757 }
758
759 static bool cpu_can_run(CPUState *cpu)
760 {
761 if (cpu->stop) {
762 return false;
763 }
764 if (cpu_is_stopped(cpu)) {
765 return false;
766 }
767 return true;
768 }
769
770 static void cpu_handle_guest_debug(CPUState *cpu)
771 {
772 gdb_set_stop_cpu(cpu);
773 qemu_system_debug_request();
774 cpu->stopped = true;
775 }
776
777 #ifdef CONFIG_LINUX
778 static void sigbus_reraise(void)
779 {
780 sigset_t set;
781 struct sigaction action;
782
783 memset(&action, 0, sizeof(action));
784 action.sa_handler = SIG_DFL;
785 if (!sigaction(SIGBUS, &action, NULL)) {
786 raise(SIGBUS);
787 sigemptyset(&set);
788 sigaddset(&set, SIGBUS);
789 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
790 }
791 perror("Failed to re-raise SIGBUS!\n");
792 abort();
793 }
794
795 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
796 void *ctx)
797 {
798 if (kvm_on_sigbus(siginfo->ssi_code,
799 (void *)(intptr_t)siginfo->ssi_addr)) {
800 sigbus_reraise();
801 }
802 }
803
804 static void qemu_init_sigbus(void)
805 {
806 struct sigaction action;
807
808 memset(&action, 0, sizeof(action));
809 action.sa_flags = SA_SIGINFO;
810 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
811 sigaction(SIGBUS, &action, NULL);
812
813 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
814 }
815
816 static void qemu_kvm_eat_signals(CPUState *cpu)
817 {
818 struct timespec ts = { 0, 0 };
819 siginfo_t siginfo;
820 sigset_t waitset;
821 sigset_t chkset;
822 int r;
823
824 sigemptyset(&waitset);
825 sigaddset(&waitset, SIG_IPI);
826 sigaddset(&waitset, SIGBUS);
827
828 do {
829 r = sigtimedwait(&waitset, &siginfo, &ts);
830 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
831 perror("sigtimedwait");
832 exit(1);
833 }
834
835 switch (r) {
836 case SIGBUS:
837 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
838 sigbus_reraise();
839 }
840 break;
841 default:
842 break;
843 }
844
845 r = sigpending(&chkset);
846 if (r == -1) {
847 perror("sigpending");
848 exit(1);
849 }
850 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
851 }
852
853 #else /* !CONFIG_LINUX */
854
855 static void qemu_init_sigbus(void)
856 {
857 }
858
859 static void qemu_kvm_eat_signals(CPUState *cpu)
860 {
861 }
862 #endif /* !CONFIG_LINUX */
863
864 #ifndef _WIN32
865 static void dummy_signal(int sig)
866 {
867 }
868
869 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
870 {
871 int r;
872 sigset_t set;
873 struct sigaction sigact;
874
875 memset(&sigact, 0, sizeof(sigact));
876 sigact.sa_handler = dummy_signal;
877 sigaction(SIG_IPI, &sigact, NULL);
878
879 pthread_sigmask(SIG_BLOCK, NULL, &set);
880 sigdelset(&set, SIG_IPI);
881 sigdelset(&set, SIGBUS);
882 r = kvm_set_signal_mask(cpu, &set);
883 if (r) {
884 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
885 exit(1);
886 }
887 }
888
889 #else /* _WIN32 */
890 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
891 {
892 abort();
893 }
894 #endif /* _WIN32 */
895
896 static QemuMutex qemu_global_mutex;
897 static QemuCond qemu_io_proceeded_cond;
898 static unsigned iothread_requesting_mutex;
899
900 static QemuThread io_thread;
901
902 /* cpu creation */
903 static QemuCond qemu_cpu_cond;
904 /* system init */
905 static QemuCond qemu_pause_cond;
906
907 void qemu_init_cpu_loop(void)
908 {
909 qemu_init_sigbus();
910 qemu_cond_init(&qemu_cpu_cond);
911 qemu_cond_init(&qemu_pause_cond);
912 qemu_cond_init(&qemu_io_proceeded_cond);
913 qemu_mutex_init(&qemu_global_mutex);
914
915 qemu_thread_get_self(&io_thread);
916 }
917
918 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
919 {
920 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
921 }
922
923 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
924 {
925 if (kvm_destroy_vcpu(cpu) < 0) {
926 error_report("kvm_destroy_vcpu failed");
927 exit(EXIT_FAILURE);
928 }
929 }
930
931 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
932 {
933 }
934
935 static void qemu_wait_io_event_common(CPUState *cpu)
936 {
937 if (cpu->stop) {
938 cpu->stop = false;
939 cpu->stopped = true;
940 qemu_cond_broadcast(&qemu_pause_cond);
941 }
942 process_queued_cpu_work(cpu);
943 cpu->thread_kicked = false;
944 }
945
946 static void qemu_tcg_wait_io_event(CPUState *cpu)
947 {
948 while (all_cpu_threads_idle()) {
949 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
950 }
951
952 while (iothread_requesting_mutex) {
953 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
954 }
955
956 CPU_FOREACH(cpu) {
957 qemu_wait_io_event_common(cpu);
958 }
959 }
960
961 static void qemu_kvm_wait_io_event(CPUState *cpu)
962 {
963 while (cpu_thread_is_idle(cpu)) {
964 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
965 }
966
967 qemu_kvm_eat_signals(cpu);
968 qemu_wait_io_event_common(cpu);
969 }
970
971 static void *qemu_kvm_cpu_thread_fn(void *arg)
972 {
973 CPUState *cpu = arg;
974 int r;
975
976 rcu_register_thread();
977
978 qemu_mutex_lock_iothread();
979 qemu_thread_get_self(cpu->thread);
980 cpu->thread_id = qemu_get_thread_id();
981 cpu->can_do_io = 1;
982 current_cpu = cpu;
983
984 r = kvm_init_vcpu(cpu);
985 if (r < 0) {
986 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
987 exit(1);
988 }
989
990 qemu_kvm_init_cpu_signals(cpu);
991
992 /* signal CPU creation */
993 cpu->created = true;
994 qemu_cond_signal(&qemu_cpu_cond);
995
996 do {
997 if (cpu_can_run(cpu)) {
998 r = kvm_cpu_exec(cpu);
999 if (r == EXCP_DEBUG) {
1000 cpu_handle_guest_debug(cpu);
1001 }
1002 }
1003 qemu_kvm_wait_io_event(cpu);
1004 } while (!cpu->unplug || cpu_can_run(cpu));
1005
1006 qemu_kvm_destroy_vcpu(cpu);
1007 cpu->created = false;
1008 qemu_cond_signal(&qemu_cpu_cond);
1009 qemu_mutex_unlock_iothread();
1010 return NULL;
1011 }
1012
1013 static void *qemu_dummy_cpu_thread_fn(void *arg)
1014 {
1015 #ifdef _WIN32
1016 fprintf(stderr, "qtest is not supported under Windows\n");
1017 exit(1);
1018 #else
1019 CPUState *cpu = arg;
1020 sigset_t waitset;
1021 int r;
1022
1023 rcu_register_thread();
1024
1025 qemu_mutex_lock_iothread();
1026 qemu_thread_get_self(cpu->thread);
1027 cpu->thread_id = qemu_get_thread_id();
1028 cpu->can_do_io = 1;
1029
1030 sigemptyset(&waitset);
1031 sigaddset(&waitset, SIG_IPI);
1032
1033 /* signal CPU creation */
1034 cpu->created = true;
1035 qemu_cond_signal(&qemu_cpu_cond);
1036
1037 current_cpu = cpu;
1038 while (1) {
1039 current_cpu = NULL;
1040 qemu_mutex_unlock_iothread();
1041 do {
1042 int sig;
1043 r = sigwait(&waitset, &sig);
1044 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1045 if (r == -1) {
1046 perror("sigwait");
1047 exit(1);
1048 }
1049 qemu_mutex_lock_iothread();
1050 current_cpu = cpu;
1051 qemu_wait_io_event_common(cpu);
1052 }
1053
1054 return NULL;
1055 #endif
1056 }
1057
1058 static void tcg_exec_all(void);
1059
1060 static void *qemu_tcg_cpu_thread_fn(void *arg)
1061 {
1062 CPUState *cpu = arg;
1063 CPUState *remove_cpu = NULL;
1064
1065 rcu_register_thread();
1066
1067 qemu_mutex_lock_iothread();
1068 qemu_thread_get_self(cpu->thread);
1069
1070 CPU_FOREACH(cpu) {
1071 cpu->thread_id = qemu_get_thread_id();
1072 cpu->created = true;
1073 cpu->can_do_io = 1;
1074 }
1075 qemu_cond_signal(&qemu_cpu_cond);
1076
1077 /* wait for initial kick-off after machine start */
1078 while (first_cpu->stopped) {
1079 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1080
1081 /* process any pending work */
1082 CPU_FOREACH(cpu) {
1083 qemu_wait_io_event_common(cpu);
1084 }
1085 }
1086
1087 /* process any pending work */
1088 atomic_mb_set(&exit_request, 1);
1089
1090 while (1) {
1091 tcg_exec_all();
1092
1093 if (use_icount) {
1094 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1095
1096 if (deadline == 0) {
1097 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1098 }
1099 }
1100 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1101 CPU_FOREACH(cpu) {
1102 if (cpu->unplug && !cpu_can_run(cpu)) {
1103 remove_cpu = cpu;
1104 break;
1105 }
1106 }
1107 if (remove_cpu) {
1108 qemu_tcg_destroy_vcpu(remove_cpu);
1109 cpu->created = false;
1110 qemu_cond_signal(&qemu_cpu_cond);
1111 remove_cpu = NULL;
1112 }
1113 }
1114
1115 return NULL;
1116 }
1117
1118 static void qemu_cpu_kick_thread(CPUState *cpu)
1119 {
1120 #ifndef _WIN32
1121 int err;
1122
1123 if (cpu->thread_kicked) {
1124 return;
1125 }
1126 cpu->thread_kicked = true;
1127 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1128 if (err) {
1129 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1130 exit(1);
1131 }
1132 #else /* _WIN32 */
1133 abort();
1134 #endif
1135 }
1136
1137 static void qemu_cpu_kick_no_halt(void)
1138 {
1139 CPUState *cpu;
1140 /* Ensure whatever caused the exit has reached the CPU threads before
1141 * writing exit_request.
1142 */
1143 atomic_mb_set(&exit_request, 1);
1144 cpu = atomic_mb_read(&tcg_current_cpu);
1145 if (cpu) {
1146 cpu_exit(cpu);
1147 }
1148 }
1149
1150 void qemu_cpu_kick(CPUState *cpu)
1151 {
1152 qemu_cond_broadcast(cpu->halt_cond);
1153 if (tcg_enabled()) {
1154 qemu_cpu_kick_no_halt();
1155 } else {
1156 qemu_cpu_kick_thread(cpu);
1157 }
1158 }
1159
1160 void qemu_cpu_kick_self(void)
1161 {
1162 assert(current_cpu);
1163 qemu_cpu_kick_thread(current_cpu);
1164 }
1165
1166 bool qemu_cpu_is_self(CPUState *cpu)
1167 {
1168 return qemu_thread_is_self(cpu->thread);
1169 }
1170
1171 bool qemu_in_vcpu_thread(void)
1172 {
1173 return current_cpu && qemu_cpu_is_self(current_cpu);
1174 }
1175
1176 static __thread bool iothread_locked = false;
1177
1178 bool qemu_mutex_iothread_locked(void)
1179 {
1180 return iothread_locked;
1181 }
1182
1183 void qemu_mutex_lock_iothread(void)
1184 {
1185 atomic_inc(&iothread_requesting_mutex);
1186 /* In the simple case there is no need to bump the VCPU thread out of
1187 * TCG code execution.
1188 */
1189 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1190 !first_cpu || !first_cpu->created) {
1191 qemu_mutex_lock(&qemu_global_mutex);
1192 atomic_dec(&iothread_requesting_mutex);
1193 } else {
1194 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1195 qemu_cpu_kick_no_halt();
1196 qemu_mutex_lock(&qemu_global_mutex);
1197 }
1198 atomic_dec(&iothread_requesting_mutex);
1199 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1200 }
1201 iothread_locked = true;
1202 }
1203
1204 void qemu_mutex_unlock_iothread(void)
1205 {
1206 iothread_locked = false;
1207 qemu_mutex_unlock(&qemu_global_mutex);
1208 }
1209
1210 static int all_vcpus_paused(void)
1211 {
1212 CPUState *cpu;
1213
1214 CPU_FOREACH(cpu) {
1215 if (!cpu->stopped) {
1216 return 0;
1217 }
1218 }
1219
1220 return 1;
1221 }
1222
1223 void pause_all_vcpus(void)
1224 {
1225 CPUState *cpu;
1226
1227 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1228 CPU_FOREACH(cpu) {
1229 cpu->stop = true;
1230 qemu_cpu_kick(cpu);
1231 }
1232
1233 if (qemu_in_vcpu_thread()) {
1234 cpu_stop_current();
1235 if (!kvm_enabled()) {
1236 CPU_FOREACH(cpu) {
1237 cpu->stop = false;
1238 cpu->stopped = true;
1239 }
1240 return;
1241 }
1242 }
1243
1244 while (!all_vcpus_paused()) {
1245 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1246 CPU_FOREACH(cpu) {
1247 qemu_cpu_kick(cpu);
1248 }
1249 }
1250 }
1251
1252 void cpu_resume(CPUState *cpu)
1253 {
1254 cpu->stop = false;
1255 cpu->stopped = false;
1256 qemu_cpu_kick(cpu);
1257 }
1258
1259 void resume_all_vcpus(void)
1260 {
1261 CPUState *cpu;
1262
1263 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1264 CPU_FOREACH(cpu) {
1265 cpu_resume(cpu);
1266 }
1267 }
1268
1269 void cpu_remove(CPUState *cpu)
1270 {
1271 cpu->stop = true;
1272 cpu->unplug = true;
1273 qemu_cpu_kick(cpu);
1274 }
1275
1276 void cpu_remove_sync(CPUState *cpu)
1277 {
1278 cpu_remove(cpu);
1279 while (cpu->created) {
1280 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1281 }
1282 }
1283
1284 /* For temporary buffers for forming a name */
1285 #define VCPU_THREAD_NAME_SIZE 16
1286
1287 static void qemu_tcg_init_vcpu(CPUState *cpu)
1288 {
1289 char thread_name[VCPU_THREAD_NAME_SIZE];
1290 static QemuCond *tcg_halt_cond;
1291 static QemuThread *tcg_cpu_thread;
1292
1293 /* share a single thread for all cpus with TCG */
1294 if (!tcg_cpu_thread) {
1295 cpu->thread = g_malloc0(sizeof(QemuThread));
1296 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1297 qemu_cond_init(cpu->halt_cond);
1298 tcg_halt_cond = cpu->halt_cond;
1299 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1300 cpu->cpu_index);
1301 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1302 cpu, QEMU_THREAD_JOINABLE);
1303 #ifdef _WIN32
1304 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1305 #endif
1306 while (!cpu->created) {
1307 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1308 }
1309 tcg_cpu_thread = cpu->thread;
1310 } else {
1311 cpu->thread = tcg_cpu_thread;
1312 cpu->halt_cond = tcg_halt_cond;
1313 }
1314 }
1315
1316 static void qemu_kvm_start_vcpu(CPUState *cpu)
1317 {
1318 char thread_name[VCPU_THREAD_NAME_SIZE];
1319
1320 cpu->thread = g_malloc0(sizeof(QemuThread));
1321 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1322 qemu_cond_init(cpu->halt_cond);
1323 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1324 cpu->cpu_index);
1325 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1326 cpu, QEMU_THREAD_JOINABLE);
1327 while (!cpu->created) {
1328 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1329 }
1330 }
1331
1332 static void qemu_dummy_start_vcpu(CPUState *cpu)
1333 {
1334 char thread_name[VCPU_THREAD_NAME_SIZE];
1335
1336 cpu->thread = g_malloc0(sizeof(QemuThread));
1337 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1338 qemu_cond_init(cpu->halt_cond);
1339 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1340 cpu->cpu_index);
1341 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1342 QEMU_THREAD_JOINABLE);
1343 while (!cpu->created) {
1344 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1345 }
1346 }
1347
1348 void qemu_init_vcpu(CPUState *cpu)
1349 {
1350 cpu->nr_cores = smp_cores;
1351 cpu->nr_threads = smp_threads;
1352 cpu->stopped = true;
1353
1354 if (!cpu->as) {
1355 /* If the target cpu hasn't set up any address spaces itself,
1356 * give it the default one.
1357 */
1358 AddressSpace *as = address_space_init_shareable(cpu->memory,
1359 "cpu-memory");
1360 cpu->num_ases = 1;
1361 cpu_address_space_init(cpu, as, 0);
1362 }
1363
1364 if (kvm_enabled()) {
1365 qemu_kvm_start_vcpu(cpu);
1366 } else if (tcg_enabled()) {
1367 qemu_tcg_init_vcpu(cpu);
1368 } else {
1369 qemu_dummy_start_vcpu(cpu);
1370 }
1371 }
1372
1373 void cpu_stop_current(void)
1374 {
1375 if (current_cpu) {
1376 current_cpu->stop = false;
1377 current_cpu->stopped = true;
1378 cpu_exit(current_cpu);
1379 qemu_cond_broadcast(&qemu_pause_cond);
1380 }
1381 }
1382
1383 int vm_stop(RunState state)
1384 {
1385 if (qemu_in_vcpu_thread()) {
1386 qemu_system_vmstop_request_prepare();
1387 qemu_system_vmstop_request(state);
1388 /*
1389 * FIXME: should not return to device code in case
1390 * vm_stop() has been requested.
1391 */
1392 cpu_stop_current();
1393 return 0;
1394 }
1395
1396 return do_vm_stop(state);
1397 }
1398
1399 /* does a state transition even if the VM is already stopped,
1400 current state is forgotten forever */
1401 int vm_stop_force_state(RunState state)
1402 {
1403 if (runstate_is_running()) {
1404 return vm_stop(state);
1405 } else {
1406 runstate_set(state);
1407
1408 bdrv_drain_all();
1409 /* Make sure to return an error if the flush in a previous vm_stop()
1410 * failed. */
1411 return blk_flush_all();
1412 }
1413 }
1414
1415 static int64_t tcg_get_icount_limit(void)
1416 {
1417 int64_t deadline;
1418
1419 if (replay_mode != REPLAY_MODE_PLAY) {
1420 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1421
1422 /* Maintain prior (possibly buggy) behaviour where if no deadline
1423 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1424 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1425 * nanoseconds.
1426 */
1427 if ((deadline < 0) || (deadline > INT32_MAX)) {
1428 deadline = INT32_MAX;
1429 }
1430
1431 return qemu_icount_round(deadline);
1432 } else {
1433 return replay_get_instructions();
1434 }
1435 }
1436
1437 static int tcg_cpu_exec(CPUState *cpu)
1438 {
1439 int ret;
1440 #ifdef CONFIG_PROFILER
1441 int64_t ti;
1442 #endif
1443
1444 #ifdef CONFIG_PROFILER
1445 ti = profile_getclock();
1446 #endif
1447 if (use_icount) {
1448 int64_t count;
1449 int decr;
1450 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1451 + cpu->icount_extra);
1452 cpu->icount_decr.u16.low = 0;
1453 cpu->icount_extra = 0;
1454 count = tcg_get_icount_limit();
1455 timers_state.qemu_icount += count;
1456 decr = (count > 0xffff) ? 0xffff : count;
1457 count -= decr;
1458 cpu->icount_decr.u16.low = decr;
1459 cpu->icount_extra = count;
1460 }
1461 cpu_exec_start(cpu);
1462 ret = cpu_exec(cpu);
1463 cpu_exec_end(cpu);
1464 #ifdef CONFIG_PROFILER
1465 tcg_time += profile_getclock() - ti;
1466 #endif
1467 if (use_icount) {
1468 /* Fold pending instructions back into the
1469 instruction counter, and clear the interrupt flag. */
1470 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1471 + cpu->icount_extra);
1472 cpu->icount_decr.u32 = 0;
1473 cpu->icount_extra = 0;
1474 replay_account_executed_instructions();
1475 }
1476 return ret;
1477 }
1478
1479 static void tcg_exec_all(void)
1480 {
1481 int r;
1482
1483 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1484 qemu_account_warp_timer();
1485
1486 if (next_cpu == NULL) {
1487 next_cpu = first_cpu;
1488 }
1489 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1490 CPUState *cpu = next_cpu;
1491
1492 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1493 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1494
1495 if (cpu_can_run(cpu)) {
1496 r = tcg_cpu_exec(cpu);
1497 if (r == EXCP_DEBUG) {
1498 cpu_handle_guest_debug(cpu);
1499 break;
1500 }
1501 } else if (cpu->stop || cpu->stopped) {
1502 if (cpu->unplug) {
1503 next_cpu = CPU_NEXT(cpu);
1504 }
1505 break;
1506 }
1507 }
1508
1509 /* Pairs with smp_wmb in qemu_cpu_kick. */
1510 atomic_mb_set(&exit_request, 0);
1511 }
1512
1513 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1514 {
1515 /* XXX: implement xxx_cpu_list for targets that still miss it */
1516 #if defined(cpu_list)
1517 cpu_list(f, cpu_fprintf);
1518 #endif
1519 }
1520
1521 CpuInfoList *qmp_query_cpus(Error **errp)
1522 {
1523 CpuInfoList *head = NULL, *cur_item = NULL;
1524 CPUState *cpu;
1525
1526 CPU_FOREACH(cpu) {
1527 CpuInfoList *info;
1528 #if defined(TARGET_I386)
1529 X86CPU *x86_cpu = X86_CPU(cpu);
1530 CPUX86State *env = &x86_cpu->env;
1531 #elif defined(TARGET_PPC)
1532 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1533 CPUPPCState *env = &ppc_cpu->env;
1534 #elif defined(TARGET_SPARC)
1535 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1536 CPUSPARCState *env = &sparc_cpu->env;
1537 #elif defined(TARGET_MIPS)
1538 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1539 CPUMIPSState *env = &mips_cpu->env;
1540 #elif defined(TARGET_TRICORE)
1541 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1542 CPUTriCoreState *env = &tricore_cpu->env;
1543 #endif
1544
1545 cpu_synchronize_state(cpu);
1546
1547 info = g_malloc0(sizeof(*info));
1548 info->value = g_malloc0(sizeof(*info->value));
1549 info->value->CPU = cpu->cpu_index;
1550 info->value->current = (cpu == first_cpu);
1551 info->value->halted = cpu->halted;
1552 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1553 info->value->thread_id = cpu->thread_id;
1554 #if defined(TARGET_I386)
1555 info->value->arch = CPU_INFO_ARCH_X86;
1556 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1557 #elif defined(TARGET_PPC)
1558 info->value->arch = CPU_INFO_ARCH_PPC;
1559 info->value->u.ppc.nip = env->nip;
1560 #elif defined(TARGET_SPARC)
1561 info->value->arch = CPU_INFO_ARCH_SPARC;
1562 info->value->u.q_sparc.pc = env->pc;
1563 info->value->u.q_sparc.npc = env->npc;
1564 #elif defined(TARGET_MIPS)
1565 info->value->arch = CPU_INFO_ARCH_MIPS;
1566 info->value->u.q_mips.PC = env->active_tc.PC;
1567 #elif defined(TARGET_TRICORE)
1568 info->value->arch = CPU_INFO_ARCH_TRICORE;
1569 info->value->u.tricore.PC = env->PC;
1570 #else
1571 info->value->arch = CPU_INFO_ARCH_OTHER;
1572 #endif
1573
1574 /* XXX: waiting for the qapi to support GSList */
1575 if (!cur_item) {
1576 head = cur_item = info;
1577 } else {
1578 cur_item->next = info;
1579 cur_item = info;
1580 }
1581 }
1582
1583 return head;
1584 }
1585
1586 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1587 bool has_cpu, int64_t cpu_index, Error **errp)
1588 {
1589 FILE *f;
1590 uint32_t l;
1591 CPUState *cpu;
1592 uint8_t buf[1024];
1593 int64_t orig_addr = addr, orig_size = size;
1594
1595 if (!has_cpu) {
1596 cpu_index = 0;
1597 }
1598
1599 cpu = qemu_get_cpu(cpu_index);
1600 if (cpu == NULL) {
1601 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1602 "a CPU number");
1603 return;
1604 }
1605
1606 f = fopen(filename, "wb");
1607 if (!f) {
1608 error_setg_file_open(errp, errno, filename);
1609 return;
1610 }
1611
1612 while (size != 0) {
1613 l = sizeof(buf);
1614 if (l > size)
1615 l = size;
1616 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1617 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1618 " specified", orig_addr, orig_size);
1619 goto exit;
1620 }
1621 if (fwrite(buf, 1, l, f) != l) {
1622 error_setg(errp, QERR_IO_ERROR);
1623 goto exit;
1624 }
1625 addr += l;
1626 size -= l;
1627 }
1628
1629 exit:
1630 fclose(f);
1631 }
1632
1633 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1634 Error **errp)
1635 {
1636 FILE *f;
1637 uint32_t l;
1638 uint8_t buf[1024];
1639
1640 f = fopen(filename, "wb");
1641 if (!f) {
1642 error_setg_file_open(errp, errno, filename);
1643 return;
1644 }
1645
1646 while (size != 0) {
1647 l = sizeof(buf);
1648 if (l > size)
1649 l = size;
1650 cpu_physical_memory_read(addr, buf, l);
1651 if (fwrite(buf, 1, l, f) != l) {
1652 error_setg(errp, QERR_IO_ERROR);
1653 goto exit;
1654 }
1655 addr += l;
1656 size -= l;
1657 }
1658
1659 exit:
1660 fclose(f);
1661 }
1662
1663 void qmp_inject_nmi(Error **errp)
1664 {
1665 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1666 }
1667
1668 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1669 {
1670 if (!use_icount) {
1671 return;
1672 }
1673
1674 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1675 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1676 if (icount_align_option) {
1677 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1678 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1679 } else {
1680 cpu_fprintf(f, "Max guest delay NA\n");
1681 cpu_fprintf(f, "Max guest advance NA\n");
1682 }
1683 }