]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20150930' into...
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "qmp-commands.h"
36
37 #include "qemu/thread.h"
38 #include "sysemu/cpus.h"
39 #include "sysemu/qtest.h"
40 #include "qemu/main-loop.h"
41 #include "qemu/bitmap.h"
42 #include "qemu/seqlock.h"
43 #include "qapi-event.h"
44 #include "hw/nmi.h"
45
46 #ifndef _WIN32
47 #include "qemu/compatfd.h"
48 #endif
49
50 #ifdef CONFIG_LINUX
51
52 #include <sys/prctl.h>
53
54 #ifndef PR_MCE_KILL
55 #define PR_MCE_KILL 33
56 #endif
57
58 #ifndef PR_MCE_KILL_SET
59 #define PR_MCE_KILL_SET 1
60 #endif
61
62 #ifndef PR_MCE_KILL_EARLY
63 #define PR_MCE_KILL_EARLY 1
64 #endif
65
66 #endif /* CONFIG_LINUX */
67
68 static CPUState *next_cpu;
69 int64_t max_delay;
70 int64_t max_advance;
71
72 /* vcpu throttling controls */
73 static QEMUTimer *throttle_timer;
74 static unsigned int throttle_percentage;
75
76 #define CPU_THROTTLE_PCT_MIN 1
77 #define CPU_THROTTLE_PCT_MAX 99
78 #define CPU_THROTTLE_TIMESLICE_NS 10000000
79
80 bool cpu_is_stopped(CPUState *cpu)
81 {
82 return cpu->stopped || !runstate_is_running();
83 }
84
85 static bool cpu_thread_is_idle(CPUState *cpu)
86 {
87 if (cpu->stop || cpu->queued_work_first) {
88 return false;
89 }
90 if (cpu_is_stopped(cpu)) {
91 return true;
92 }
93 if (!cpu->halted || cpu_has_work(cpu) ||
94 kvm_halt_in_kernel()) {
95 return false;
96 }
97 return true;
98 }
99
100 static bool all_cpu_threads_idle(void)
101 {
102 CPUState *cpu;
103
104 CPU_FOREACH(cpu) {
105 if (!cpu_thread_is_idle(cpu)) {
106 return false;
107 }
108 }
109 return true;
110 }
111
112 /***********************************************************/
113 /* guest cycle counter */
114
115 /* Protected by TimersState seqlock */
116
117 static bool icount_sleep = true;
118 static int64_t vm_clock_warp_start = -1;
119 /* Conversion factor from emulated instructions to virtual clock ticks. */
120 static int icount_time_shift;
121 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
122 #define MAX_ICOUNT_SHIFT 10
123
124 static QEMUTimer *icount_rt_timer;
125 static QEMUTimer *icount_vm_timer;
126 static QEMUTimer *icount_warp_timer;
127
128 typedef struct TimersState {
129 /* Protected by BQL. */
130 int64_t cpu_ticks_prev;
131 int64_t cpu_ticks_offset;
132
133 /* cpu_clock_offset can be read out of BQL, so protect it with
134 * this lock.
135 */
136 QemuSeqLock vm_clock_seqlock;
137 int64_t cpu_clock_offset;
138 int32_t cpu_ticks_enabled;
139 int64_t dummy;
140
141 /* Compensate for varying guest execution speed. */
142 int64_t qemu_icount_bias;
143 /* Only written by TCG thread */
144 int64_t qemu_icount;
145 } TimersState;
146
147 static TimersState timers_state;
148
149 int64_t cpu_get_icount_raw(void)
150 {
151 int64_t icount;
152 CPUState *cpu = current_cpu;
153
154 icount = timers_state.qemu_icount;
155 if (cpu) {
156 if (!cpu->can_do_io) {
157 fprintf(stderr, "Bad icount read\n");
158 exit(1);
159 }
160 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
161 }
162 return icount;
163 }
164
165 /* Return the virtual CPU time, based on the instruction counter. */
166 static int64_t cpu_get_icount_locked(void)
167 {
168 int64_t icount = cpu_get_icount_raw();
169 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
170 }
171
172 int64_t cpu_get_icount(void)
173 {
174 int64_t icount;
175 unsigned start;
176
177 do {
178 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
179 icount = cpu_get_icount_locked();
180 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
181
182 return icount;
183 }
184
185 int64_t cpu_icount_to_ns(int64_t icount)
186 {
187 return icount << icount_time_shift;
188 }
189
190 /* return the host CPU cycle counter and handle stop/restart */
191 /* Caller must hold the BQL */
192 int64_t cpu_get_ticks(void)
193 {
194 int64_t ticks;
195
196 if (use_icount) {
197 return cpu_get_icount();
198 }
199
200 ticks = timers_state.cpu_ticks_offset;
201 if (timers_state.cpu_ticks_enabled) {
202 ticks += cpu_get_real_ticks();
203 }
204
205 if (timers_state.cpu_ticks_prev > ticks) {
206 /* Note: non increasing ticks may happen if the host uses
207 software suspend */
208 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
209 ticks = timers_state.cpu_ticks_prev;
210 }
211
212 timers_state.cpu_ticks_prev = ticks;
213 return ticks;
214 }
215
216 static int64_t cpu_get_clock_locked(void)
217 {
218 int64_t ticks;
219
220 ticks = timers_state.cpu_clock_offset;
221 if (timers_state.cpu_ticks_enabled) {
222 ticks += get_clock();
223 }
224
225 return ticks;
226 }
227
228 /* return the host CPU monotonic timer and handle stop/restart */
229 int64_t cpu_get_clock(void)
230 {
231 int64_t ti;
232 unsigned start;
233
234 do {
235 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
236 ti = cpu_get_clock_locked();
237 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
238
239 return ti;
240 }
241
242 /* enable cpu_get_ticks()
243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
244 */
245 void cpu_enable_ticks(void)
246 {
247 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
248 seqlock_write_lock(&timers_state.vm_clock_seqlock);
249 if (!timers_state.cpu_ticks_enabled) {
250 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
251 timers_state.cpu_clock_offset -= get_clock();
252 timers_state.cpu_ticks_enabled = 1;
253 }
254 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
255 }
256
257 /* disable cpu_get_ticks() : the clock is stopped. You must not call
258 * cpu_get_ticks() after that.
259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
260 */
261 void cpu_disable_ticks(void)
262 {
263 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
264 seqlock_write_lock(&timers_state.vm_clock_seqlock);
265 if (timers_state.cpu_ticks_enabled) {
266 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
267 timers_state.cpu_clock_offset = cpu_get_clock_locked();
268 timers_state.cpu_ticks_enabled = 0;
269 }
270 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
271 }
272
273 /* Correlation between real and virtual time is always going to be
274 fairly approximate, so ignore small variation.
275 When the guest is idle real and virtual time will be aligned in
276 the IO wait loop. */
277 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
278
279 static void icount_adjust(void)
280 {
281 int64_t cur_time;
282 int64_t cur_icount;
283 int64_t delta;
284
285 /* Protected by TimersState mutex. */
286 static int64_t last_delta;
287
288 /* If the VM is not running, then do nothing. */
289 if (!runstate_is_running()) {
290 return;
291 }
292
293 seqlock_write_lock(&timers_state.vm_clock_seqlock);
294 cur_time = cpu_get_clock_locked();
295 cur_icount = cpu_get_icount_locked();
296
297 delta = cur_icount - cur_time;
298 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
299 if (delta > 0
300 && last_delta + ICOUNT_WOBBLE < delta * 2
301 && icount_time_shift > 0) {
302 /* The guest is getting too far ahead. Slow time down. */
303 icount_time_shift--;
304 }
305 if (delta < 0
306 && last_delta - ICOUNT_WOBBLE > delta * 2
307 && icount_time_shift < MAX_ICOUNT_SHIFT) {
308 /* The guest is getting too far behind. Speed time up. */
309 icount_time_shift++;
310 }
311 last_delta = delta;
312 timers_state.qemu_icount_bias = cur_icount
313 - (timers_state.qemu_icount << icount_time_shift);
314 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
315 }
316
317 static void icount_adjust_rt(void *opaque)
318 {
319 timer_mod(icount_rt_timer,
320 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
321 icount_adjust();
322 }
323
324 static void icount_adjust_vm(void *opaque)
325 {
326 timer_mod(icount_vm_timer,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328 get_ticks_per_sec() / 10);
329 icount_adjust();
330 }
331
332 static int64_t qemu_icount_round(int64_t count)
333 {
334 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
335 }
336
337 static void icount_warp_rt(void *opaque)
338 {
339 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
340 * changes from -1 to another value, so the race here is okay.
341 */
342 if (atomic_read(&vm_clock_warp_start) == -1) {
343 return;
344 }
345
346 seqlock_write_lock(&timers_state.vm_clock_seqlock);
347 if (runstate_is_running()) {
348 int64_t clock = cpu_get_clock_locked();
349 int64_t warp_delta;
350
351 warp_delta = clock - vm_clock_warp_start;
352 if (use_icount == 2) {
353 /*
354 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
355 * far ahead of real time.
356 */
357 int64_t cur_icount = cpu_get_icount_locked();
358 int64_t delta = clock - cur_icount;
359 warp_delta = MIN(warp_delta, delta);
360 }
361 timers_state.qemu_icount_bias += warp_delta;
362 }
363 vm_clock_warp_start = -1;
364 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
365
366 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
367 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
368 }
369 }
370
371 void qtest_clock_warp(int64_t dest)
372 {
373 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
374 AioContext *aio_context;
375 assert(qtest_enabled());
376 aio_context = qemu_get_aio_context();
377 while (clock < dest) {
378 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
379 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
380
381 seqlock_write_lock(&timers_state.vm_clock_seqlock);
382 timers_state.qemu_icount_bias += warp;
383 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
384
385 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
386 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
387 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
388 }
389 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
390 }
391
392 void qemu_clock_warp(QEMUClockType type)
393 {
394 int64_t clock;
395 int64_t deadline;
396
397 /*
398 * There are too many global variables to make the "warp" behavior
399 * applicable to other clocks. But a clock argument removes the
400 * need for if statements all over the place.
401 */
402 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
403 return;
404 }
405
406 if (icount_sleep) {
407 /*
408 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
409 * This ensures that the deadline for the timer is computed correctly
410 * below.
411 * This also makes sure that the insn counter is synchronized before
412 * the CPU starts running, in case the CPU is woken by an event other
413 * than the earliest QEMU_CLOCK_VIRTUAL timer.
414 */
415 icount_warp_rt(NULL);
416 timer_del(icount_warp_timer);
417 }
418 if (!all_cpu_threads_idle()) {
419 return;
420 }
421
422 if (qtest_enabled()) {
423 /* When testing, qtest commands advance icount. */
424 return;
425 }
426
427 /* We want to use the earliest deadline from ALL vm_clocks */
428 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
429 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
430 if (deadline < 0) {
431 static bool notified;
432 if (!icount_sleep && !notified) {
433 error_report("WARNING: icount sleep disabled and no active timers");
434 notified = true;
435 }
436 return;
437 }
438
439 if (deadline > 0) {
440 /*
441 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
442 * sleep. Otherwise, the CPU might be waiting for a future timer
443 * interrupt to wake it up, but the interrupt never comes because
444 * the vCPU isn't running any insns and thus doesn't advance the
445 * QEMU_CLOCK_VIRTUAL.
446 */
447 if (!icount_sleep) {
448 /*
449 * We never let VCPUs sleep in no sleep icount mode.
450 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
451 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
452 * It is useful when we want a deterministic execution time,
453 * isolated from host latencies.
454 */
455 seqlock_write_lock(&timers_state.vm_clock_seqlock);
456 timers_state.qemu_icount_bias += deadline;
457 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
458 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
459 } else {
460 /*
461 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
462 * "real" time, (related to the time left until the next event) has
463 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
464 * This avoids that the warps are visible externally; for example,
465 * you will not be sending network packets continuously instead of
466 * every 100ms.
467 */
468 seqlock_write_lock(&timers_state.vm_clock_seqlock);
469 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
470 vm_clock_warp_start = clock;
471 }
472 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
473 timer_mod_anticipate(icount_warp_timer, clock + deadline);
474 }
475 } else if (deadline == 0) {
476 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
477 }
478 }
479
480 static bool icount_state_needed(void *opaque)
481 {
482 return use_icount;
483 }
484
485 /*
486 * This is a subsection for icount migration.
487 */
488 static const VMStateDescription icount_vmstate_timers = {
489 .name = "timer/icount",
490 .version_id = 1,
491 .minimum_version_id = 1,
492 .needed = icount_state_needed,
493 .fields = (VMStateField[]) {
494 VMSTATE_INT64(qemu_icount_bias, TimersState),
495 VMSTATE_INT64(qemu_icount, TimersState),
496 VMSTATE_END_OF_LIST()
497 }
498 };
499
500 static const VMStateDescription vmstate_timers = {
501 .name = "timer",
502 .version_id = 2,
503 .minimum_version_id = 1,
504 .fields = (VMStateField[]) {
505 VMSTATE_INT64(cpu_ticks_offset, TimersState),
506 VMSTATE_INT64(dummy, TimersState),
507 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
508 VMSTATE_END_OF_LIST()
509 },
510 .subsections = (const VMStateDescription*[]) {
511 &icount_vmstate_timers,
512 NULL
513 }
514 };
515
516 static void cpu_throttle_thread(void *opaque)
517 {
518 CPUState *cpu = opaque;
519 double pct;
520 double throttle_ratio;
521 long sleeptime_ns;
522
523 if (!cpu_throttle_get_percentage()) {
524 return;
525 }
526
527 pct = (double)cpu_throttle_get_percentage()/100;
528 throttle_ratio = pct / (1 - pct);
529 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
530
531 qemu_mutex_unlock_iothread();
532 atomic_set(&cpu->throttle_thread_scheduled, 0);
533 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
534 qemu_mutex_lock_iothread();
535 }
536
537 static void cpu_throttle_timer_tick(void *opaque)
538 {
539 CPUState *cpu;
540 double pct;
541
542 /* Stop the timer if needed */
543 if (!cpu_throttle_get_percentage()) {
544 return;
545 }
546 CPU_FOREACH(cpu) {
547 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
548 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
549 }
550 }
551
552 pct = (double)cpu_throttle_get_percentage()/100;
553 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
554 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
555 }
556
557 void cpu_throttle_set(int new_throttle_pct)
558 {
559 /* Ensure throttle percentage is within valid range */
560 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
561 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
562
563 atomic_set(&throttle_percentage, new_throttle_pct);
564
565 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
566 CPU_THROTTLE_TIMESLICE_NS);
567 }
568
569 void cpu_throttle_stop(void)
570 {
571 atomic_set(&throttle_percentage, 0);
572 }
573
574 bool cpu_throttle_active(void)
575 {
576 return (cpu_throttle_get_percentage() != 0);
577 }
578
579 int cpu_throttle_get_percentage(void)
580 {
581 return atomic_read(&throttle_percentage);
582 }
583
584 void cpu_ticks_init(void)
585 {
586 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
587 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
588 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
589 cpu_throttle_timer_tick, NULL);
590 }
591
592 void configure_icount(QemuOpts *opts, Error **errp)
593 {
594 const char *option;
595 char *rem_str = NULL;
596
597 option = qemu_opt_get(opts, "shift");
598 if (!option) {
599 if (qemu_opt_get(opts, "align") != NULL) {
600 error_setg(errp, "Please specify shift option when using align");
601 }
602 return;
603 }
604
605 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
606 if (icount_sleep) {
607 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
608 icount_warp_rt, NULL);
609 }
610
611 icount_align_option = qemu_opt_get_bool(opts, "align", false);
612
613 if (icount_align_option && !icount_sleep) {
614 error_setg(errp, "align=on and sleep=no are incompatible");
615 }
616 if (strcmp(option, "auto") != 0) {
617 errno = 0;
618 icount_time_shift = strtol(option, &rem_str, 0);
619 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
620 error_setg(errp, "icount: Invalid shift value");
621 }
622 use_icount = 1;
623 return;
624 } else if (icount_align_option) {
625 error_setg(errp, "shift=auto and align=on are incompatible");
626 } else if (!icount_sleep) {
627 error_setg(errp, "shift=auto and sleep=no are incompatible");
628 }
629
630 use_icount = 2;
631
632 /* 125MIPS seems a reasonable initial guess at the guest speed.
633 It will be corrected fairly quickly anyway. */
634 icount_time_shift = 3;
635
636 /* Have both realtime and virtual time triggers for speed adjustment.
637 The realtime trigger catches emulated time passing too slowly,
638 the virtual time trigger catches emulated time passing too fast.
639 Realtime triggers occur even when idle, so use them less frequently
640 than VM triggers. */
641 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
642 icount_adjust_rt, NULL);
643 timer_mod(icount_rt_timer,
644 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
645 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
646 icount_adjust_vm, NULL);
647 timer_mod(icount_vm_timer,
648 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
649 get_ticks_per_sec() / 10);
650 }
651
652 /***********************************************************/
653 void hw_error(const char *fmt, ...)
654 {
655 va_list ap;
656 CPUState *cpu;
657
658 va_start(ap, fmt);
659 fprintf(stderr, "qemu: hardware error: ");
660 vfprintf(stderr, fmt, ap);
661 fprintf(stderr, "\n");
662 CPU_FOREACH(cpu) {
663 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
664 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
665 }
666 va_end(ap);
667 abort();
668 }
669
670 void cpu_synchronize_all_states(void)
671 {
672 CPUState *cpu;
673
674 CPU_FOREACH(cpu) {
675 cpu_synchronize_state(cpu);
676 }
677 }
678
679 void cpu_synchronize_all_post_reset(void)
680 {
681 CPUState *cpu;
682
683 CPU_FOREACH(cpu) {
684 cpu_synchronize_post_reset(cpu);
685 }
686 }
687
688 void cpu_synchronize_all_post_init(void)
689 {
690 CPUState *cpu;
691
692 CPU_FOREACH(cpu) {
693 cpu_synchronize_post_init(cpu);
694 }
695 }
696
697 void cpu_clean_all_dirty(void)
698 {
699 CPUState *cpu;
700
701 CPU_FOREACH(cpu) {
702 cpu_clean_state(cpu);
703 }
704 }
705
706 static int do_vm_stop(RunState state)
707 {
708 int ret = 0;
709
710 if (runstate_is_running()) {
711 cpu_disable_ticks();
712 pause_all_vcpus();
713 runstate_set(state);
714 vm_state_notify(0, state);
715 qapi_event_send_stop(&error_abort);
716 }
717
718 bdrv_drain_all();
719 ret = bdrv_flush_all();
720
721 return ret;
722 }
723
724 static bool cpu_can_run(CPUState *cpu)
725 {
726 if (cpu->stop) {
727 return false;
728 }
729 if (cpu_is_stopped(cpu)) {
730 return false;
731 }
732 return true;
733 }
734
735 static void cpu_handle_guest_debug(CPUState *cpu)
736 {
737 gdb_set_stop_cpu(cpu);
738 qemu_system_debug_request();
739 cpu->stopped = true;
740 }
741
742 #ifdef CONFIG_LINUX
743 static void sigbus_reraise(void)
744 {
745 sigset_t set;
746 struct sigaction action;
747
748 memset(&action, 0, sizeof(action));
749 action.sa_handler = SIG_DFL;
750 if (!sigaction(SIGBUS, &action, NULL)) {
751 raise(SIGBUS);
752 sigemptyset(&set);
753 sigaddset(&set, SIGBUS);
754 sigprocmask(SIG_UNBLOCK, &set, NULL);
755 }
756 perror("Failed to re-raise SIGBUS!\n");
757 abort();
758 }
759
760 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
761 void *ctx)
762 {
763 if (kvm_on_sigbus(siginfo->ssi_code,
764 (void *)(intptr_t)siginfo->ssi_addr)) {
765 sigbus_reraise();
766 }
767 }
768
769 static void qemu_init_sigbus(void)
770 {
771 struct sigaction action;
772
773 memset(&action, 0, sizeof(action));
774 action.sa_flags = SA_SIGINFO;
775 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
776 sigaction(SIGBUS, &action, NULL);
777
778 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
779 }
780
781 static void qemu_kvm_eat_signals(CPUState *cpu)
782 {
783 struct timespec ts = { 0, 0 };
784 siginfo_t siginfo;
785 sigset_t waitset;
786 sigset_t chkset;
787 int r;
788
789 sigemptyset(&waitset);
790 sigaddset(&waitset, SIG_IPI);
791 sigaddset(&waitset, SIGBUS);
792
793 do {
794 r = sigtimedwait(&waitset, &siginfo, &ts);
795 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
796 perror("sigtimedwait");
797 exit(1);
798 }
799
800 switch (r) {
801 case SIGBUS:
802 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
803 sigbus_reraise();
804 }
805 break;
806 default:
807 break;
808 }
809
810 r = sigpending(&chkset);
811 if (r == -1) {
812 perror("sigpending");
813 exit(1);
814 }
815 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
816 }
817
818 #else /* !CONFIG_LINUX */
819
820 static void qemu_init_sigbus(void)
821 {
822 }
823
824 static void qemu_kvm_eat_signals(CPUState *cpu)
825 {
826 }
827 #endif /* !CONFIG_LINUX */
828
829 #ifndef _WIN32
830 static void dummy_signal(int sig)
831 {
832 }
833
834 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
835 {
836 int r;
837 sigset_t set;
838 struct sigaction sigact;
839
840 memset(&sigact, 0, sizeof(sigact));
841 sigact.sa_handler = dummy_signal;
842 sigaction(SIG_IPI, &sigact, NULL);
843
844 pthread_sigmask(SIG_BLOCK, NULL, &set);
845 sigdelset(&set, SIG_IPI);
846 sigdelset(&set, SIGBUS);
847 r = kvm_set_signal_mask(cpu, &set);
848 if (r) {
849 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
850 exit(1);
851 }
852 }
853
854 #else /* _WIN32 */
855 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
856 {
857 abort();
858 }
859 #endif /* _WIN32 */
860
861 static QemuMutex qemu_global_mutex;
862 static QemuCond qemu_io_proceeded_cond;
863 static unsigned iothread_requesting_mutex;
864
865 static QemuThread io_thread;
866
867 /* cpu creation */
868 static QemuCond qemu_cpu_cond;
869 /* system init */
870 static QemuCond qemu_pause_cond;
871 static QemuCond qemu_work_cond;
872
873 void qemu_init_cpu_loop(void)
874 {
875 qemu_init_sigbus();
876 qemu_cond_init(&qemu_cpu_cond);
877 qemu_cond_init(&qemu_pause_cond);
878 qemu_cond_init(&qemu_work_cond);
879 qemu_cond_init(&qemu_io_proceeded_cond);
880 qemu_mutex_init(&qemu_global_mutex);
881
882 qemu_thread_get_self(&io_thread);
883 }
884
885 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
886 {
887 struct qemu_work_item wi;
888
889 if (qemu_cpu_is_self(cpu)) {
890 func(data);
891 return;
892 }
893
894 wi.func = func;
895 wi.data = data;
896 wi.free = false;
897
898 qemu_mutex_lock(&cpu->work_mutex);
899 if (cpu->queued_work_first == NULL) {
900 cpu->queued_work_first = &wi;
901 } else {
902 cpu->queued_work_last->next = &wi;
903 }
904 cpu->queued_work_last = &wi;
905 wi.next = NULL;
906 wi.done = false;
907 qemu_mutex_unlock(&cpu->work_mutex);
908
909 qemu_cpu_kick(cpu);
910 while (!atomic_mb_read(&wi.done)) {
911 CPUState *self_cpu = current_cpu;
912
913 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
914 current_cpu = self_cpu;
915 }
916 }
917
918 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
919 {
920 struct qemu_work_item *wi;
921
922 if (qemu_cpu_is_self(cpu)) {
923 func(data);
924 return;
925 }
926
927 wi = g_malloc0(sizeof(struct qemu_work_item));
928 wi->func = func;
929 wi->data = data;
930 wi->free = true;
931
932 qemu_mutex_lock(&cpu->work_mutex);
933 if (cpu->queued_work_first == NULL) {
934 cpu->queued_work_first = wi;
935 } else {
936 cpu->queued_work_last->next = wi;
937 }
938 cpu->queued_work_last = wi;
939 wi->next = NULL;
940 wi->done = false;
941 qemu_mutex_unlock(&cpu->work_mutex);
942
943 qemu_cpu_kick(cpu);
944 }
945
946 static void flush_queued_work(CPUState *cpu)
947 {
948 struct qemu_work_item *wi;
949
950 if (cpu->queued_work_first == NULL) {
951 return;
952 }
953
954 qemu_mutex_lock(&cpu->work_mutex);
955 while (cpu->queued_work_first != NULL) {
956 wi = cpu->queued_work_first;
957 cpu->queued_work_first = wi->next;
958 if (!cpu->queued_work_first) {
959 cpu->queued_work_last = NULL;
960 }
961 qemu_mutex_unlock(&cpu->work_mutex);
962 wi->func(wi->data);
963 qemu_mutex_lock(&cpu->work_mutex);
964 if (wi->free) {
965 g_free(wi);
966 } else {
967 atomic_mb_set(&wi->done, true);
968 }
969 }
970 qemu_mutex_unlock(&cpu->work_mutex);
971 qemu_cond_broadcast(&qemu_work_cond);
972 }
973
974 static void qemu_wait_io_event_common(CPUState *cpu)
975 {
976 if (cpu->stop) {
977 cpu->stop = false;
978 cpu->stopped = true;
979 qemu_cond_signal(&qemu_pause_cond);
980 }
981 flush_queued_work(cpu);
982 cpu->thread_kicked = false;
983 }
984
985 static void qemu_tcg_wait_io_event(CPUState *cpu)
986 {
987 while (all_cpu_threads_idle()) {
988 /* Start accounting real time to the virtual clock if the CPUs
989 are idle. */
990 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
991 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
992 }
993
994 while (iothread_requesting_mutex) {
995 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
996 }
997
998 CPU_FOREACH(cpu) {
999 qemu_wait_io_event_common(cpu);
1000 }
1001 }
1002
1003 static void qemu_kvm_wait_io_event(CPUState *cpu)
1004 {
1005 while (cpu_thread_is_idle(cpu)) {
1006 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1007 }
1008
1009 qemu_kvm_eat_signals(cpu);
1010 qemu_wait_io_event_common(cpu);
1011 }
1012
1013 static void *qemu_kvm_cpu_thread_fn(void *arg)
1014 {
1015 CPUState *cpu = arg;
1016 int r;
1017
1018 rcu_register_thread();
1019
1020 qemu_mutex_lock_iothread();
1021 qemu_thread_get_self(cpu->thread);
1022 cpu->thread_id = qemu_get_thread_id();
1023 cpu->can_do_io = 1;
1024 current_cpu = cpu;
1025
1026 r = kvm_init_vcpu(cpu);
1027 if (r < 0) {
1028 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1029 exit(1);
1030 }
1031
1032 qemu_kvm_init_cpu_signals(cpu);
1033
1034 /* signal CPU creation */
1035 cpu->created = true;
1036 qemu_cond_signal(&qemu_cpu_cond);
1037
1038 while (1) {
1039 if (cpu_can_run(cpu)) {
1040 r = kvm_cpu_exec(cpu);
1041 if (r == EXCP_DEBUG) {
1042 cpu_handle_guest_debug(cpu);
1043 }
1044 }
1045 qemu_kvm_wait_io_event(cpu);
1046 }
1047
1048 return NULL;
1049 }
1050
1051 static void *qemu_dummy_cpu_thread_fn(void *arg)
1052 {
1053 #ifdef _WIN32
1054 fprintf(stderr, "qtest is not supported under Windows\n");
1055 exit(1);
1056 #else
1057 CPUState *cpu = arg;
1058 sigset_t waitset;
1059 int r;
1060
1061 rcu_register_thread();
1062
1063 qemu_mutex_lock_iothread();
1064 qemu_thread_get_self(cpu->thread);
1065 cpu->thread_id = qemu_get_thread_id();
1066 cpu->can_do_io = 1;
1067
1068 sigemptyset(&waitset);
1069 sigaddset(&waitset, SIG_IPI);
1070
1071 /* signal CPU creation */
1072 cpu->created = true;
1073 qemu_cond_signal(&qemu_cpu_cond);
1074
1075 current_cpu = cpu;
1076 while (1) {
1077 current_cpu = NULL;
1078 qemu_mutex_unlock_iothread();
1079 do {
1080 int sig;
1081 r = sigwait(&waitset, &sig);
1082 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1083 if (r == -1) {
1084 perror("sigwait");
1085 exit(1);
1086 }
1087 qemu_mutex_lock_iothread();
1088 current_cpu = cpu;
1089 qemu_wait_io_event_common(cpu);
1090 }
1091
1092 return NULL;
1093 #endif
1094 }
1095
1096 static void tcg_exec_all(void);
1097
1098 static void *qemu_tcg_cpu_thread_fn(void *arg)
1099 {
1100 CPUState *cpu = arg;
1101
1102 rcu_register_thread();
1103
1104 qemu_mutex_lock_iothread();
1105 qemu_thread_get_self(cpu->thread);
1106
1107 CPU_FOREACH(cpu) {
1108 cpu->thread_id = qemu_get_thread_id();
1109 cpu->created = true;
1110 cpu->can_do_io = 1;
1111 }
1112 qemu_cond_signal(&qemu_cpu_cond);
1113
1114 /* wait for initial kick-off after machine start */
1115 while (first_cpu->stopped) {
1116 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1117
1118 /* process any pending work */
1119 CPU_FOREACH(cpu) {
1120 qemu_wait_io_event_common(cpu);
1121 }
1122 }
1123
1124 /* process any pending work */
1125 atomic_mb_set(&exit_request, 1);
1126
1127 while (1) {
1128 tcg_exec_all();
1129
1130 if (use_icount) {
1131 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1132
1133 if (deadline == 0) {
1134 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1135 }
1136 }
1137 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1138 }
1139
1140 return NULL;
1141 }
1142
1143 static void qemu_cpu_kick_thread(CPUState *cpu)
1144 {
1145 #ifndef _WIN32
1146 int err;
1147
1148 if (cpu->thread_kicked) {
1149 return;
1150 }
1151 cpu->thread_kicked = true;
1152 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1153 if (err) {
1154 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1155 exit(1);
1156 }
1157 #else /* _WIN32 */
1158 abort();
1159 #endif
1160 }
1161
1162 static void qemu_cpu_kick_no_halt(void)
1163 {
1164 CPUState *cpu;
1165 /* Ensure whatever caused the exit has reached the CPU threads before
1166 * writing exit_request.
1167 */
1168 atomic_mb_set(&exit_request, 1);
1169 cpu = atomic_mb_read(&tcg_current_cpu);
1170 if (cpu) {
1171 cpu_exit(cpu);
1172 }
1173 }
1174
1175 void qemu_cpu_kick(CPUState *cpu)
1176 {
1177 qemu_cond_broadcast(cpu->halt_cond);
1178 if (tcg_enabled()) {
1179 qemu_cpu_kick_no_halt();
1180 } else {
1181 qemu_cpu_kick_thread(cpu);
1182 }
1183 }
1184
1185 void qemu_cpu_kick_self(void)
1186 {
1187 assert(current_cpu);
1188 qemu_cpu_kick_thread(current_cpu);
1189 }
1190
1191 bool qemu_cpu_is_self(CPUState *cpu)
1192 {
1193 return qemu_thread_is_self(cpu->thread);
1194 }
1195
1196 bool qemu_in_vcpu_thread(void)
1197 {
1198 return current_cpu && qemu_cpu_is_self(current_cpu);
1199 }
1200
1201 static __thread bool iothread_locked = false;
1202
1203 bool qemu_mutex_iothread_locked(void)
1204 {
1205 return iothread_locked;
1206 }
1207
1208 void qemu_mutex_lock_iothread(void)
1209 {
1210 atomic_inc(&iothread_requesting_mutex);
1211 /* In the simple case there is no need to bump the VCPU thread out of
1212 * TCG code execution.
1213 */
1214 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1215 !first_cpu || !first_cpu->created) {
1216 qemu_mutex_lock(&qemu_global_mutex);
1217 atomic_dec(&iothread_requesting_mutex);
1218 } else {
1219 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1220 qemu_cpu_kick_no_halt();
1221 qemu_mutex_lock(&qemu_global_mutex);
1222 }
1223 atomic_dec(&iothread_requesting_mutex);
1224 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1225 }
1226 iothread_locked = true;
1227 }
1228
1229 void qemu_mutex_unlock_iothread(void)
1230 {
1231 iothread_locked = false;
1232 qemu_mutex_unlock(&qemu_global_mutex);
1233 }
1234
1235 static int all_vcpus_paused(void)
1236 {
1237 CPUState *cpu;
1238
1239 CPU_FOREACH(cpu) {
1240 if (!cpu->stopped) {
1241 return 0;
1242 }
1243 }
1244
1245 return 1;
1246 }
1247
1248 void pause_all_vcpus(void)
1249 {
1250 CPUState *cpu;
1251
1252 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1253 CPU_FOREACH(cpu) {
1254 cpu->stop = true;
1255 qemu_cpu_kick(cpu);
1256 }
1257
1258 if (qemu_in_vcpu_thread()) {
1259 cpu_stop_current();
1260 if (!kvm_enabled()) {
1261 CPU_FOREACH(cpu) {
1262 cpu->stop = false;
1263 cpu->stopped = true;
1264 }
1265 return;
1266 }
1267 }
1268
1269 while (!all_vcpus_paused()) {
1270 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1271 CPU_FOREACH(cpu) {
1272 qemu_cpu_kick(cpu);
1273 }
1274 }
1275 }
1276
1277 void cpu_resume(CPUState *cpu)
1278 {
1279 cpu->stop = false;
1280 cpu->stopped = false;
1281 qemu_cpu_kick(cpu);
1282 }
1283
1284 void resume_all_vcpus(void)
1285 {
1286 CPUState *cpu;
1287
1288 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1289 CPU_FOREACH(cpu) {
1290 cpu_resume(cpu);
1291 }
1292 }
1293
1294 /* For temporary buffers for forming a name */
1295 #define VCPU_THREAD_NAME_SIZE 16
1296
1297 static void qemu_tcg_init_vcpu(CPUState *cpu)
1298 {
1299 char thread_name[VCPU_THREAD_NAME_SIZE];
1300 static QemuCond *tcg_halt_cond;
1301 static QemuThread *tcg_cpu_thread;
1302
1303 tcg_cpu_address_space_init(cpu, cpu->as);
1304
1305 /* share a single thread for all cpus with TCG */
1306 if (!tcg_cpu_thread) {
1307 cpu->thread = g_malloc0(sizeof(QemuThread));
1308 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1309 qemu_cond_init(cpu->halt_cond);
1310 tcg_halt_cond = cpu->halt_cond;
1311 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1312 cpu->cpu_index);
1313 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1314 cpu, QEMU_THREAD_JOINABLE);
1315 #ifdef _WIN32
1316 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1317 #endif
1318 while (!cpu->created) {
1319 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1320 }
1321 tcg_cpu_thread = cpu->thread;
1322 } else {
1323 cpu->thread = tcg_cpu_thread;
1324 cpu->halt_cond = tcg_halt_cond;
1325 }
1326 }
1327
1328 static void qemu_kvm_start_vcpu(CPUState *cpu)
1329 {
1330 char thread_name[VCPU_THREAD_NAME_SIZE];
1331
1332 cpu->thread = g_malloc0(sizeof(QemuThread));
1333 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1334 qemu_cond_init(cpu->halt_cond);
1335 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1336 cpu->cpu_index);
1337 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1338 cpu, QEMU_THREAD_JOINABLE);
1339 while (!cpu->created) {
1340 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1341 }
1342 }
1343
1344 static void qemu_dummy_start_vcpu(CPUState *cpu)
1345 {
1346 char thread_name[VCPU_THREAD_NAME_SIZE];
1347
1348 cpu->thread = g_malloc0(sizeof(QemuThread));
1349 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1350 qemu_cond_init(cpu->halt_cond);
1351 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1352 cpu->cpu_index);
1353 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1354 QEMU_THREAD_JOINABLE);
1355 while (!cpu->created) {
1356 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1357 }
1358 }
1359
1360 void qemu_init_vcpu(CPUState *cpu)
1361 {
1362 cpu->nr_cores = smp_cores;
1363 cpu->nr_threads = smp_threads;
1364 cpu->stopped = true;
1365 if (kvm_enabled()) {
1366 qemu_kvm_start_vcpu(cpu);
1367 } else if (tcg_enabled()) {
1368 qemu_tcg_init_vcpu(cpu);
1369 } else {
1370 qemu_dummy_start_vcpu(cpu);
1371 }
1372 }
1373
1374 void cpu_stop_current(void)
1375 {
1376 if (current_cpu) {
1377 current_cpu->stop = false;
1378 current_cpu->stopped = true;
1379 cpu_exit(current_cpu);
1380 qemu_cond_signal(&qemu_pause_cond);
1381 }
1382 }
1383
1384 int vm_stop(RunState state)
1385 {
1386 if (qemu_in_vcpu_thread()) {
1387 qemu_system_vmstop_request_prepare();
1388 qemu_system_vmstop_request(state);
1389 /*
1390 * FIXME: should not return to device code in case
1391 * vm_stop() has been requested.
1392 */
1393 cpu_stop_current();
1394 return 0;
1395 }
1396
1397 return do_vm_stop(state);
1398 }
1399
1400 /* does a state transition even if the VM is already stopped,
1401 current state is forgotten forever */
1402 int vm_stop_force_state(RunState state)
1403 {
1404 if (runstate_is_running()) {
1405 return vm_stop(state);
1406 } else {
1407 runstate_set(state);
1408 /* Make sure to return an error if the flush in a previous vm_stop()
1409 * failed. */
1410 return bdrv_flush_all();
1411 }
1412 }
1413
1414 static int tcg_cpu_exec(CPUState *cpu)
1415 {
1416 int ret;
1417 #ifdef CONFIG_PROFILER
1418 int64_t ti;
1419 #endif
1420
1421 #ifdef CONFIG_PROFILER
1422 ti = profile_getclock();
1423 #endif
1424 if (use_icount) {
1425 int64_t count;
1426 int64_t deadline;
1427 int decr;
1428 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1429 + cpu->icount_extra);
1430 cpu->icount_decr.u16.low = 0;
1431 cpu->icount_extra = 0;
1432 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1433
1434 /* Maintain prior (possibly buggy) behaviour where if no deadline
1435 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1436 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1437 * nanoseconds.
1438 */
1439 if ((deadline < 0) || (deadline > INT32_MAX)) {
1440 deadline = INT32_MAX;
1441 }
1442
1443 count = qemu_icount_round(deadline);
1444 timers_state.qemu_icount += count;
1445 decr = (count > 0xffff) ? 0xffff : count;
1446 count -= decr;
1447 cpu->icount_decr.u16.low = decr;
1448 cpu->icount_extra = count;
1449 }
1450 ret = cpu_exec(cpu);
1451 #ifdef CONFIG_PROFILER
1452 tcg_time += profile_getclock() - ti;
1453 #endif
1454 if (use_icount) {
1455 /* Fold pending instructions back into the
1456 instruction counter, and clear the interrupt flag. */
1457 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1458 + cpu->icount_extra);
1459 cpu->icount_decr.u32 = 0;
1460 cpu->icount_extra = 0;
1461 }
1462 return ret;
1463 }
1464
1465 static void tcg_exec_all(void)
1466 {
1467 int r;
1468
1469 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1470 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1471
1472 if (next_cpu == NULL) {
1473 next_cpu = first_cpu;
1474 }
1475 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1476 CPUState *cpu = next_cpu;
1477
1478 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1479 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1480
1481 if (cpu_can_run(cpu)) {
1482 r = tcg_cpu_exec(cpu);
1483 if (r == EXCP_DEBUG) {
1484 cpu_handle_guest_debug(cpu);
1485 break;
1486 }
1487 } else if (cpu->stop || cpu->stopped) {
1488 break;
1489 }
1490 }
1491
1492 /* Pairs with smp_wmb in qemu_cpu_kick. */
1493 atomic_mb_set(&exit_request, 0);
1494 }
1495
1496 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1497 {
1498 /* XXX: implement xxx_cpu_list for targets that still miss it */
1499 #if defined(cpu_list)
1500 cpu_list(f, cpu_fprintf);
1501 #endif
1502 }
1503
1504 CpuInfoList *qmp_query_cpus(Error **errp)
1505 {
1506 CpuInfoList *head = NULL, *cur_item = NULL;
1507 CPUState *cpu;
1508
1509 CPU_FOREACH(cpu) {
1510 CpuInfoList *info;
1511 #if defined(TARGET_I386)
1512 X86CPU *x86_cpu = X86_CPU(cpu);
1513 CPUX86State *env = &x86_cpu->env;
1514 #elif defined(TARGET_PPC)
1515 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1516 CPUPPCState *env = &ppc_cpu->env;
1517 #elif defined(TARGET_SPARC)
1518 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1519 CPUSPARCState *env = &sparc_cpu->env;
1520 #elif defined(TARGET_MIPS)
1521 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1522 CPUMIPSState *env = &mips_cpu->env;
1523 #elif defined(TARGET_TRICORE)
1524 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1525 CPUTriCoreState *env = &tricore_cpu->env;
1526 #endif
1527
1528 cpu_synchronize_state(cpu);
1529
1530 info = g_malloc0(sizeof(*info));
1531 info->value = g_malloc0(sizeof(*info->value));
1532 info->value->CPU = cpu->cpu_index;
1533 info->value->current = (cpu == first_cpu);
1534 info->value->halted = cpu->halted;
1535 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1536 info->value->thread_id = cpu->thread_id;
1537 #if defined(TARGET_I386)
1538 info->value->has_pc = true;
1539 info->value->pc = env->eip + env->segs[R_CS].base;
1540 #elif defined(TARGET_PPC)
1541 info->value->has_nip = true;
1542 info->value->nip = env->nip;
1543 #elif defined(TARGET_SPARC)
1544 info->value->has_pc = true;
1545 info->value->pc = env->pc;
1546 info->value->has_npc = true;
1547 info->value->npc = env->npc;
1548 #elif defined(TARGET_MIPS)
1549 info->value->has_PC = true;
1550 info->value->PC = env->active_tc.PC;
1551 #elif defined(TARGET_TRICORE)
1552 info->value->has_PC = true;
1553 info->value->PC = env->PC;
1554 #endif
1555
1556 /* XXX: waiting for the qapi to support GSList */
1557 if (!cur_item) {
1558 head = cur_item = info;
1559 } else {
1560 cur_item->next = info;
1561 cur_item = info;
1562 }
1563 }
1564
1565 return head;
1566 }
1567
1568 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1569 bool has_cpu, int64_t cpu_index, Error **errp)
1570 {
1571 FILE *f;
1572 uint32_t l;
1573 CPUState *cpu;
1574 uint8_t buf[1024];
1575 int64_t orig_addr = addr, orig_size = size;
1576
1577 if (!has_cpu) {
1578 cpu_index = 0;
1579 }
1580
1581 cpu = qemu_get_cpu(cpu_index);
1582 if (cpu == NULL) {
1583 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1584 "a CPU number");
1585 return;
1586 }
1587
1588 f = fopen(filename, "wb");
1589 if (!f) {
1590 error_setg_file_open(errp, errno, filename);
1591 return;
1592 }
1593
1594 while (size != 0) {
1595 l = sizeof(buf);
1596 if (l > size)
1597 l = size;
1598 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1599 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1600 " specified", orig_addr, orig_size);
1601 goto exit;
1602 }
1603 if (fwrite(buf, 1, l, f) != l) {
1604 error_setg(errp, QERR_IO_ERROR);
1605 goto exit;
1606 }
1607 addr += l;
1608 size -= l;
1609 }
1610
1611 exit:
1612 fclose(f);
1613 }
1614
1615 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1616 Error **errp)
1617 {
1618 FILE *f;
1619 uint32_t l;
1620 uint8_t buf[1024];
1621
1622 f = fopen(filename, "wb");
1623 if (!f) {
1624 error_setg_file_open(errp, errno, filename);
1625 return;
1626 }
1627
1628 while (size != 0) {
1629 l = sizeof(buf);
1630 if (l > size)
1631 l = size;
1632 cpu_physical_memory_read(addr, buf, l);
1633 if (fwrite(buf, 1, l, f) != l) {
1634 error_setg(errp, QERR_IO_ERROR);
1635 goto exit;
1636 }
1637 addr += l;
1638 size -= l;
1639 }
1640
1641 exit:
1642 fclose(f);
1643 }
1644
1645 void qmp_inject_nmi(Error **errp)
1646 {
1647 #if defined(TARGET_I386)
1648 CPUState *cs;
1649
1650 CPU_FOREACH(cs) {
1651 X86CPU *cpu = X86_CPU(cs);
1652
1653 if (!cpu->apic_state) {
1654 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1655 } else {
1656 apic_deliver_nmi(cpu->apic_state);
1657 }
1658 }
1659 #else
1660 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1661 #endif
1662 }
1663
1664 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1665 {
1666 if (!use_icount) {
1667 return;
1668 }
1669
1670 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1671 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1672 if (icount_align_option) {
1673 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1674 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1675 } else {
1676 cpu_fprintf(f, "Max guest delay NA\n");
1677 cpu_fprintf(f, "Max guest advance NA\n");
1678 }
1679 }