]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
8ae477728d65758c4efc290b53107f960b1e630b
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "sysemu/block-backend.h"
33 #include "exec/gdbstub.h"
34 #include "sysemu/dma.h"
35 #include "sysemu/kvm.h"
36 #include "qmp-commands.h"
37
38 #include "qemu/thread.h"
39 #include "sysemu/cpus.h"
40 #include "sysemu/qtest.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/bitmap.h"
43 #include "qemu/seqlock.h"
44 #include "qapi-event.h"
45 #include "hw/nmi.h"
46 #include "sysemu/replay.h"
47
48 #ifndef _WIN32
49 #include "qemu/compatfd.h"
50 #endif
51
52 #ifdef CONFIG_LINUX
53
54 #include <sys/prctl.h>
55
56 #ifndef PR_MCE_KILL
57 #define PR_MCE_KILL 33
58 #endif
59
60 #ifndef PR_MCE_KILL_SET
61 #define PR_MCE_KILL_SET 1
62 #endif
63
64 #ifndef PR_MCE_KILL_EARLY
65 #define PR_MCE_KILL_EARLY 1
66 #endif
67
68 #endif /* CONFIG_LINUX */
69
70 static CPUState *next_cpu;
71 int64_t max_delay;
72 int64_t max_advance;
73
74 /* vcpu throttling controls */
75 static QEMUTimer *throttle_timer;
76 static unsigned int throttle_percentage;
77
78 #define CPU_THROTTLE_PCT_MIN 1
79 #define CPU_THROTTLE_PCT_MAX 99
80 #define CPU_THROTTLE_TIMESLICE_NS 10000000
81
82 bool cpu_is_stopped(CPUState *cpu)
83 {
84 return cpu->stopped || !runstate_is_running();
85 }
86
87 static bool cpu_thread_is_idle(CPUState *cpu)
88 {
89 if (cpu->stop || cpu->queued_work_first) {
90 return false;
91 }
92 if (cpu_is_stopped(cpu)) {
93 return true;
94 }
95 if (!cpu->halted || cpu_has_work(cpu) ||
96 kvm_halt_in_kernel()) {
97 return false;
98 }
99 return true;
100 }
101
102 static bool all_cpu_threads_idle(void)
103 {
104 CPUState *cpu;
105
106 CPU_FOREACH(cpu) {
107 if (!cpu_thread_is_idle(cpu)) {
108 return false;
109 }
110 }
111 return true;
112 }
113
114 /***********************************************************/
115 /* guest cycle counter */
116
117 /* Protected by TimersState seqlock */
118
119 static bool icount_sleep = true;
120 static int64_t vm_clock_warp_start = -1;
121 /* Conversion factor from emulated instructions to virtual clock ticks. */
122 static int icount_time_shift;
123 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
124 #define MAX_ICOUNT_SHIFT 10
125
126 static QEMUTimer *icount_rt_timer;
127 static QEMUTimer *icount_vm_timer;
128 static QEMUTimer *icount_warp_timer;
129
130 typedef struct TimersState {
131 /* Protected by BQL. */
132 int64_t cpu_ticks_prev;
133 int64_t cpu_ticks_offset;
134
135 /* cpu_clock_offset can be read out of BQL, so protect it with
136 * this lock.
137 */
138 QemuSeqLock vm_clock_seqlock;
139 int64_t cpu_clock_offset;
140 int32_t cpu_ticks_enabled;
141 int64_t dummy;
142
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
145 /* Only written by TCG thread */
146 int64_t qemu_icount;
147 } TimersState;
148
149 static TimersState timers_state;
150
151 int64_t cpu_get_icount_raw(void)
152 {
153 int64_t icount;
154 CPUState *cpu = current_cpu;
155
156 icount = timers_state.qemu_icount;
157 if (cpu) {
158 if (!cpu->can_do_io) {
159 fprintf(stderr, "Bad icount read\n");
160 exit(1);
161 }
162 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
163 }
164 return icount;
165 }
166
167 /* Return the virtual CPU time, based on the instruction counter. */
168 static int64_t cpu_get_icount_locked(void)
169 {
170 int64_t icount = cpu_get_icount_raw();
171 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
172 }
173
174 int64_t cpu_get_icount(void)
175 {
176 int64_t icount;
177 unsigned start;
178
179 do {
180 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
181 icount = cpu_get_icount_locked();
182 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
183
184 return icount;
185 }
186
187 int64_t cpu_icount_to_ns(int64_t icount)
188 {
189 return icount << icount_time_shift;
190 }
191
192 /* return the host CPU cycle counter and handle stop/restart */
193 /* Caller must hold the BQL */
194 int64_t cpu_get_ticks(void)
195 {
196 int64_t ticks;
197
198 if (use_icount) {
199 return cpu_get_icount();
200 }
201
202 ticks = timers_state.cpu_ticks_offset;
203 if (timers_state.cpu_ticks_enabled) {
204 ticks += cpu_get_host_ticks();
205 }
206
207 if (timers_state.cpu_ticks_prev > ticks) {
208 /* Note: non increasing ticks may happen if the host uses
209 software suspend */
210 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
211 ticks = timers_state.cpu_ticks_prev;
212 }
213
214 timers_state.cpu_ticks_prev = ticks;
215 return ticks;
216 }
217
218 static int64_t cpu_get_clock_locked(void)
219 {
220 int64_t ticks;
221
222 ticks = timers_state.cpu_clock_offset;
223 if (timers_state.cpu_ticks_enabled) {
224 ticks += get_clock();
225 }
226
227 return ticks;
228 }
229
230 /* return the host CPU monotonic timer and handle stop/restart */
231 int64_t cpu_get_clock(void)
232 {
233 int64_t ti;
234 unsigned start;
235
236 do {
237 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
238 ti = cpu_get_clock_locked();
239 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
240
241 return ti;
242 }
243
244 /* enable cpu_get_ticks()
245 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
246 */
247 void cpu_enable_ticks(void)
248 {
249 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
250 seqlock_write_lock(&timers_state.vm_clock_seqlock);
251 if (!timers_state.cpu_ticks_enabled) {
252 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
253 timers_state.cpu_clock_offset -= get_clock();
254 timers_state.cpu_ticks_enabled = 1;
255 }
256 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
257 }
258
259 /* disable cpu_get_ticks() : the clock is stopped. You must not call
260 * cpu_get_ticks() after that.
261 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
262 */
263 void cpu_disable_ticks(void)
264 {
265 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
266 seqlock_write_lock(&timers_state.vm_clock_seqlock);
267 if (timers_state.cpu_ticks_enabled) {
268 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
269 timers_state.cpu_clock_offset = cpu_get_clock_locked();
270 timers_state.cpu_ticks_enabled = 0;
271 }
272 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
273 }
274
275 /* Correlation between real and virtual time is always going to be
276 fairly approximate, so ignore small variation.
277 When the guest is idle real and virtual time will be aligned in
278 the IO wait loop. */
279 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
280
281 static void icount_adjust(void)
282 {
283 int64_t cur_time;
284 int64_t cur_icount;
285 int64_t delta;
286
287 /* Protected by TimersState mutex. */
288 static int64_t last_delta;
289
290 /* If the VM is not running, then do nothing. */
291 if (!runstate_is_running()) {
292 return;
293 }
294
295 seqlock_write_lock(&timers_state.vm_clock_seqlock);
296 cur_time = cpu_get_clock_locked();
297 cur_icount = cpu_get_icount_locked();
298
299 delta = cur_icount - cur_time;
300 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
301 if (delta > 0
302 && last_delta + ICOUNT_WOBBLE < delta * 2
303 && icount_time_shift > 0) {
304 /* The guest is getting too far ahead. Slow time down. */
305 icount_time_shift--;
306 }
307 if (delta < 0
308 && last_delta - ICOUNT_WOBBLE > delta * 2
309 && icount_time_shift < MAX_ICOUNT_SHIFT) {
310 /* The guest is getting too far behind. Speed time up. */
311 icount_time_shift++;
312 }
313 last_delta = delta;
314 timers_state.qemu_icount_bias = cur_icount
315 - (timers_state.qemu_icount << icount_time_shift);
316 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
317 }
318
319 static void icount_adjust_rt(void *opaque)
320 {
321 timer_mod(icount_rt_timer,
322 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
323 icount_adjust();
324 }
325
326 static void icount_adjust_vm(void *opaque)
327 {
328 timer_mod(icount_vm_timer,
329 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
330 NANOSECONDS_PER_SECOND / 10);
331 icount_adjust();
332 }
333
334 static int64_t qemu_icount_round(int64_t count)
335 {
336 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
337 }
338
339 static void icount_warp_rt(void)
340 {
341 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
342 * changes from -1 to another value, so the race here is okay.
343 */
344 if (atomic_read(&vm_clock_warp_start) == -1) {
345 return;
346 }
347
348 seqlock_write_lock(&timers_state.vm_clock_seqlock);
349 if (runstate_is_running()) {
350 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
351 cpu_get_clock_locked());
352 int64_t warp_delta;
353
354 warp_delta = clock - vm_clock_warp_start;
355 if (use_icount == 2) {
356 /*
357 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
358 * far ahead of real time.
359 */
360 int64_t cur_icount = cpu_get_icount_locked();
361 int64_t delta = clock - cur_icount;
362 warp_delta = MIN(warp_delta, delta);
363 }
364 timers_state.qemu_icount_bias += warp_delta;
365 }
366 vm_clock_warp_start = -1;
367 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
368
369 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
370 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
371 }
372 }
373
374 static void icount_timer_cb(void *opaque)
375 {
376 /* No need for a checkpoint because the timer already synchronizes
377 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
378 */
379 icount_warp_rt();
380 }
381
382 void qtest_clock_warp(int64_t dest)
383 {
384 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
385 AioContext *aio_context;
386 assert(qtest_enabled());
387 aio_context = qemu_get_aio_context();
388 while (clock < dest) {
389 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
390 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
391
392 seqlock_write_lock(&timers_state.vm_clock_seqlock);
393 timers_state.qemu_icount_bias += warp;
394 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
395
396 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
397 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
398 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
399 }
400 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
401 }
402
403 void qemu_start_warp_timer(void)
404 {
405 int64_t clock;
406 int64_t deadline;
407
408 if (!use_icount) {
409 return;
410 }
411
412 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
413 * do not fire, so computing the deadline does not make sense.
414 */
415 if (!runstate_is_running()) {
416 return;
417 }
418
419 /* warp clock deterministically in record/replay mode */
420 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
421 return;
422 }
423
424 if (!all_cpu_threads_idle()) {
425 return;
426 }
427
428 if (qtest_enabled()) {
429 /* When testing, qtest commands advance icount. */
430 return;
431 }
432
433 /* We want to use the earliest deadline from ALL vm_clocks */
434 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
435 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
436 if (deadline < 0) {
437 static bool notified;
438 if (!icount_sleep && !notified) {
439 error_report("WARNING: icount sleep disabled and no active timers");
440 notified = true;
441 }
442 return;
443 }
444
445 if (deadline > 0) {
446 /*
447 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
448 * sleep. Otherwise, the CPU might be waiting for a future timer
449 * interrupt to wake it up, but the interrupt never comes because
450 * the vCPU isn't running any insns and thus doesn't advance the
451 * QEMU_CLOCK_VIRTUAL.
452 */
453 if (!icount_sleep) {
454 /*
455 * We never let VCPUs sleep in no sleep icount mode.
456 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
457 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
458 * It is useful when we want a deterministic execution time,
459 * isolated from host latencies.
460 */
461 seqlock_write_lock(&timers_state.vm_clock_seqlock);
462 timers_state.qemu_icount_bias += deadline;
463 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
464 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
465 } else {
466 /*
467 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
468 * "real" time, (related to the time left until the next event) has
469 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
470 * This avoids that the warps are visible externally; for example,
471 * you will not be sending network packets continuously instead of
472 * every 100ms.
473 */
474 seqlock_write_lock(&timers_state.vm_clock_seqlock);
475 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
476 vm_clock_warp_start = clock;
477 }
478 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
479 timer_mod_anticipate(icount_warp_timer, clock + deadline);
480 }
481 } else if (deadline == 0) {
482 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
483 }
484 }
485
486 static void qemu_account_warp_timer(void)
487 {
488 if (!use_icount || !icount_sleep) {
489 return;
490 }
491
492 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
493 * do not fire, so computing the deadline does not make sense.
494 */
495 if (!runstate_is_running()) {
496 return;
497 }
498
499 /* warp clock deterministically in record/replay mode */
500 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
501 return;
502 }
503
504 timer_del(icount_warp_timer);
505 icount_warp_rt();
506 }
507
508 static bool icount_state_needed(void *opaque)
509 {
510 return use_icount;
511 }
512
513 /*
514 * This is a subsection for icount migration.
515 */
516 static const VMStateDescription icount_vmstate_timers = {
517 .name = "timer/icount",
518 .version_id = 1,
519 .minimum_version_id = 1,
520 .needed = icount_state_needed,
521 .fields = (VMStateField[]) {
522 VMSTATE_INT64(qemu_icount_bias, TimersState),
523 VMSTATE_INT64(qemu_icount, TimersState),
524 VMSTATE_END_OF_LIST()
525 }
526 };
527
528 static const VMStateDescription vmstate_timers = {
529 .name = "timer",
530 .version_id = 2,
531 .minimum_version_id = 1,
532 .fields = (VMStateField[]) {
533 VMSTATE_INT64(cpu_ticks_offset, TimersState),
534 VMSTATE_INT64(dummy, TimersState),
535 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
536 VMSTATE_END_OF_LIST()
537 },
538 .subsections = (const VMStateDescription*[]) {
539 &icount_vmstate_timers,
540 NULL
541 }
542 };
543
544 static void cpu_throttle_thread(void *opaque)
545 {
546 CPUState *cpu = opaque;
547 double pct;
548 double throttle_ratio;
549 long sleeptime_ns;
550
551 if (!cpu_throttle_get_percentage()) {
552 return;
553 }
554
555 pct = (double)cpu_throttle_get_percentage()/100;
556 throttle_ratio = pct / (1 - pct);
557 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
558
559 qemu_mutex_unlock_iothread();
560 atomic_set(&cpu->throttle_thread_scheduled, 0);
561 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
562 qemu_mutex_lock_iothread();
563 }
564
565 static void cpu_throttle_timer_tick(void *opaque)
566 {
567 CPUState *cpu;
568 double pct;
569
570 /* Stop the timer if needed */
571 if (!cpu_throttle_get_percentage()) {
572 return;
573 }
574 CPU_FOREACH(cpu) {
575 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
576 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
577 }
578 }
579
580 pct = (double)cpu_throttle_get_percentage()/100;
581 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
582 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
583 }
584
585 void cpu_throttle_set(int new_throttle_pct)
586 {
587 /* Ensure throttle percentage is within valid range */
588 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
589 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
590
591 atomic_set(&throttle_percentage, new_throttle_pct);
592
593 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
594 CPU_THROTTLE_TIMESLICE_NS);
595 }
596
597 void cpu_throttle_stop(void)
598 {
599 atomic_set(&throttle_percentage, 0);
600 }
601
602 bool cpu_throttle_active(void)
603 {
604 return (cpu_throttle_get_percentage() != 0);
605 }
606
607 int cpu_throttle_get_percentage(void)
608 {
609 return atomic_read(&throttle_percentage);
610 }
611
612 void cpu_ticks_init(void)
613 {
614 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
615 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
616 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
617 cpu_throttle_timer_tick, NULL);
618 }
619
620 void configure_icount(QemuOpts *opts, Error **errp)
621 {
622 const char *option;
623 char *rem_str = NULL;
624
625 option = qemu_opt_get(opts, "shift");
626 if (!option) {
627 if (qemu_opt_get(opts, "align") != NULL) {
628 error_setg(errp, "Please specify shift option when using align");
629 }
630 return;
631 }
632
633 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
634 if (icount_sleep) {
635 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
636 icount_timer_cb, NULL);
637 }
638
639 icount_align_option = qemu_opt_get_bool(opts, "align", false);
640
641 if (icount_align_option && !icount_sleep) {
642 error_setg(errp, "align=on and sleep=off are incompatible");
643 }
644 if (strcmp(option, "auto") != 0) {
645 errno = 0;
646 icount_time_shift = strtol(option, &rem_str, 0);
647 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
648 error_setg(errp, "icount: Invalid shift value");
649 }
650 use_icount = 1;
651 return;
652 } else if (icount_align_option) {
653 error_setg(errp, "shift=auto and align=on are incompatible");
654 } else if (!icount_sleep) {
655 error_setg(errp, "shift=auto and sleep=off are incompatible");
656 }
657
658 use_icount = 2;
659
660 /* 125MIPS seems a reasonable initial guess at the guest speed.
661 It will be corrected fairly quickly anyway. */
662 icount_time_shift = 3;
663
664 /* Have both realtime and virtual time triggers for speed adjustment.
665 The realtime trigger catches emulated time passing too slowly,
666 the virtual time trigger catches emulated time passing too fast.
667 Realtime triggers occur even when idle, so use them less frequently
668 than VM triggers. */
669 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
670 icount_adjust_rt, NULL);
671 timer_mod(icount_rt_timer,
672 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
673 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
674 icount_adjust_vm, NULL);
675 timer_mod(icount_vm_timer,
676 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
677 NANOSECONDS_PER_SECOND / 10);
678 }
679
680 /***********************************************************/
681 void hw_error(const char *fmt, ...)
682 {
683 va_list ap;
684 CPUState *cpu;
685
686 va_start(ap, fmt);
687 fprintf(stderr, "qemu: hardware error: ");
688 vfprintf(stderr, fmt, ap);
689 fprintf(stderr, "\n");
690 CPU_FOREACH(cpu) {
691 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
692 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
693 }
694 va_end(ap);
695 abort();
696 }
697
698 void cpu_synchronize_all_states(void)
699 {
700 CPUState *cpu;
701
702 CPU_FOREACH(cpu) {
703 cpu_synchronize_state(cpu);
704 }
705 }
706
707 void cpu_synchronize_all_post_reset(void)
708 {
709 CPUState *cpu;
710
711 CPU_FOREACH(cpu) {
712 cpu_synchronize_post_reset(cpu);
713 }
714 }
715
716 void cpu_synchronize_all_post_init(void)
717 {
718 CPUState *cpu;
719
720 CPU_FOREACH(cpu) {
721 cpu_synchronize_post_init(cpu);
722 }
723 }
724
725 static int do_vm_stop(RunState state)
726 {
727 int ret = 0;
728
729 if (runstate_is_running()) {
730 cpu_disable_ticks();
731 pause_all_vcpus();
732 runstate_set(state);
733 vm_state_notify(0, state);
734 qapi_event_send_stop(&error_abort);
735 }
736
737 bdrv_drain_all();
738 ret = blk_flush_all();
739
740 return ret;
741 }
742
743 static bool cpu_can_run(CPUState *cpu)
744 {
745 if (cpu->stop) {
746 return false;
747 }
748 if (cpu_is_stopped(cpu)) {
749 return false;
750 }
751 return true;
752 }
753
754 static void cpu_handle_guest_debug(CPUState *cpu)
755 {
756 gdb_set_stop_cpu(cpu);
757 qemu_system_debug_request();
758 cpu->stopped = true;
759 }
760
761 #ifdef CONFIG_LINUX
762 static void sigbus_reraise(void)
763 {
764 sigset_t set;
765 struct sigaction action;
766
767 memset(&action, 0, sizeof(action));
768 action.sa_handler = SIG_DFL;
769 if (!sigaction(SIGBUS, &action, NULL)) {
770 raise(SIGBUS);
771 sigemptyset(&set);
772 sigaddset(&set, SIGBUS);
773 sigprocmask(SIG_UNBLOCK, &set, NULL);
774 }
775 perror("Failed to re-raise SIGBUS!\n");
776 abort();
777 }
778
779 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
780 void *ctx)
781 {
782 if (kvm_on_sigbus(siginfo->ssi_code,
783 (void *)(intptr_t)siginfo->ssi_addr)) {
784 sigbus_reraise();
785 }
786 }
787
788 static void qemu_init_sigbus(void)
789 {
790 struct sigaction action;
791
792 memset(&action, 0, sizeof(action));
793 action.sa_flags = SA_SIGINFO;
794 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
795 sigaction(SIGBUS, &action, NULL);
796
797 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
798 }
799
800 static void qemu_kvm_eat_signals(CPUState *cpu)
801 {
802 struct timespec ts = { 0, 0 };
803 siginfo_t siginfo;
804 sigset_t waitset;
805 sigset_t chkset;
806 int r;
807
808 sigemptyset(&waitset);
809 sigaddset(&waitset, SIG_IPI);
810 sigaddset(&waitset, SIGBUS);
811
812 do {
813 r = sigtimedwait(&waitset, &siginfo, &ts);
814 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
815 perror("sigtimedwait");
816 exit(1);
817 }
818
819 switch (r) {
820 case SIGBUS:
821 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
822 sigbus_reraise();
823 }
824 break;
825 default:
826 break;
827 }
828
829 r = sigpending(&chkset);
830 if (r == -1) {
831 perror("sigpending");
832 exit(1);
833 }
834 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
835 }
836
837 #else /* !CONFIG_LINUX */
838
839 static void qemu_init_sigbus(void)
840 {
841 }
842
843 static void qemu_kvm_eat_signals(CPUState *cpu)
844 {
845 }
846 #endif /* !CONFIG_LINUX */
847
848 #ifndef _WIN32
849 static void dummy_signal(int sig)
850 {
851 }
852
853 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
854 {
855 int r;
856 sigset_t set;
857 struct sigaction sigact;
858
859 memset(&sigact, 0, sizeof(sigact));
860 sigact.sa_handler = dummy_signal;
861 sigaction(SIG_IPI, &sigact, NULL);
862
863 pthread_sigmask(SIG_BLOCK, NULL, &set);
864 sigdelset(&set, SIG_IPI);
865 sigdelset(&set, SIGBUS);
866 r = kvm_set_signal_mask(cpu, &set);
867 if (r) {
868 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
869 exit(1);
870 }
871 }
872
873 #else /* _WIN32 */
874 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
875 {
876 abort();
877 }
878 #endif /* _WIN32 */
879
880 static QemuMutex qemu_global_mutex;
881 static QemuCond qemu_io_proceeded_cond;
882 static unsigned iothread_requesting_mutex;
883
884 static QemuThread io_thread;
885
886 /* cpu creation */
887 static QemuCond qemu_cpu_cond;
888 /* system init */
889 static QemuCond qemu_pause_cond;
890 static QemuCond qemu_work_cond;
891
892 void qemu_init_cpu_loop(void)
893 {
894 qemu_init_sigbus();
895 qemu_cond_init(&qemu_cpu_cond);
896 qemu_cond_init(&qemu_pause_cond);
897 qemu_cond_init(&qemu_work_cond);
898 qemu_cond_init(&qemu_io_proceeded_cond);
899 qemu_mutex_init(&qemu_global_mutex);
900
901 qemu_thread_get_self(&io_thread);
902 }
903
904 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
905 {
906 struct qemu_work_item wi;
907
908 if (qemu_cpu_is_self(cpu)) {
909 func(data);
910 return;
911 }
912
913 wi.func = func;
914 wi.data = data;
915 wi.free = false;
916
917 qemu_mutex_lock(&cpu->work_mutex);
918 if (cpu->queued_work_first == NULL) {
919 cpu->queued_work_first = &wi;
920 } else {
921 cpu->queued_work_last->next = &wi;
922 }
923 cpu->queued_work_last = &wi;
924 wi.next = NULL;
925 wi.done = false;
926 qemu_mutex_unlock(&cpu->work_mutex);
927
928 qemu_cpu_kick(cpu);
929 while (!atomic_mb_read(&wi.done)) {
930 CPUState *self_cpu = current_cpu;
931
932 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
933 current_cpu = self_cpu;
934 }
935 }
936
937 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
938 {
939 struct qemu_work_item *wi;
940
941 if (qemu_cpu_is_self(cpu)) {
942 func(data);
943 return;
944 }
945
946 wi = g_malloc0(sizeof(struct qemu_work_item));
947 wi->func = func;
948 wi->data = data;
949 wi->free = true;
950
951 qemu_mutex_lock(&cpu->work_mutex);
952 if (cpu->queued_work_first == NULL) {
953 cpu->queued_work_first = wi;
954 } else {
955 cpu->queued_work_last->next = wi;
956 }
957 cpu->queued_work_last = wi;
958 wi->next = NULL;
959 wi->done = false;
960 qemu_mutex_unlock(&cpu->work_mutex);
961
962 qemu_cpu_kick(cpu);
963 }
964
965 static void flush_queued_work(CPUState *cpu)
966 {
967 struct qemu_work_item *wi;
968
969 if (cpu->queued_work_first == NULL) {
970 return;
971 }
972
973 qemu_mutex_lock(&cpu->work_mutex);
974 while (cpu->queued_work_first != NULL) {
975 wi = cpu->queued_work_first;
976 cpu->queued_work_first = wi->next;
977 if (!cpu->queued_work_first) {
978 cpu->queued_work_last = NULL;
979 }
980 qemu_mutex_unlock(&cpu->work_mutex);
981 wi->func(wi->data);
982 qemu_mutex_lock(&cpu->work_mutex);
983 if (wi->free) {
984 g_free(wi);
985 } else {
986 atomic_mb_set(&wi->done, true);
987 }
988 }
989 qemu_mutex_unlock(&cpu->work_mutex);
990 qemu_cond_broadcast(&qemu_work_cond);
991 }
992
993 static void qemu_wait_io_event_common(CPUState *cpu)
994 {
995 if (cpu->stop) {
996 cpu->stop = false;
997 cpu->stopped = true;
998 qemu_cond_broadcast(&qemu_pause_cond);
999 }
1000 flush_queued_work(cpu);
1001 cpu->thread_kicked = false;
1002 }
1003
1004 static void qemu_tcg_wait_io_event(CPUState *cpu)
1005 {
1006 while (all_cpu_threads_idle()) {
1007 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1008 }
1009
1010 while (iothread_requesting_mutex) {
1011 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1012 }
1013
1014 CPU_FOREACH(cpu) {
1015 qemu_wait_io_event_common(cpu);
1016 }
1017 }
1018
1019 static void qemu_kvm_wait_io_event(CPUState *cpu)
1020 {
1021 while (cpu_thread_is_idle(cpu)) {
1022 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1023 }
1024
1025 qemu_kvm_eat_signals(cpu);
1026 qemu_wait_io_event_common(cpu);
1027 }
1028
1029 static void *qemu_kvm_cpu_thread_fn(void *arg)
1030 {
1031 CPUState *cpu = arg;
1032 int r;
1033
1034 rcu_register_thread();
1035
1036 qemu_mutex_lock_iothread();
1037 qemu_thread_get_self(cpu->thread);
1038 cpu->thread_id = qemu_get_thread_id();
1039 cpu->can_do_io = 1;
1040 current_cpu = cpu;
1041
1042 r = kvm_init_vcpu(cpu);
1043 if (r < 0) {
1044 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1045 exit(1);
1046 }
1047
1048 qemu_kvm_init_cpu_signals(cpu);
1049
1050 /* signal CPU creation */
1051 cpu->created = true;
1052 qemu_cond_signal(&qemu_cpu_cond);
1053
1054 while (1) {
1055 if (cpu_can_run(cpu)) {
1056 r = kvm_cpu_exec(cpu);
1057 if (r == EXCP_DEBUG) {
1058 cpu_handle_guest_debug(cpu);
1059 }
1060 }
1061 qemu_kvm_wait_io_event(cpu);
1062 }
1063
1064 return NULL;
1065 }
1066
1067 static void *qemu_dummy_cpu_thread_fn(void *arg)
1068 {
1069 #ifdef _WIN32
1070 fprintf(stderr, "qtest is not supported under Windows\n");
1071 exit(1);
1072 #else
1073 CPUState *cpu = arg;
1074 sigset_t waitset;
1075 int r;
1076
1077 rcu_register_thread();
1078
1079 qemu_mutex_lock_iothread();
1080 qemu_thread_get_self(cpu->thread);
1081 cpu->thread_id = qemu_get_thread_id();
1082 cpu->can_do_io = 1;
1083
1084 sigemptyset(&waitset);
1085 sigaddset(&waitset, SIG_IPI);
1086
1087 /* signal CPU creation */
1088 cpu->created = true;
1089 qemu_cond_signal(&qemu_cpu_cond);
1090
1091 current_cpu = cpu;
1092 while (1) {
1093 current_cpu = NULL;
1094 qemu_mutex_unlock_iothread();
1095 do {
1096 int sig;
1097 r = sigwait(&waitset, &sig);
1098 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1099 if (r == -1) {
1100 perror("sigwait");
1101 exit(1);
1102 }
1103 qemu_mutex_lock_iothread();
1104 current_cpu = cpu;
1105 qemu_wait_io_event_common(cpu);
1106 }
1107
1108 return NULL;
1109 #endif
1110 }
1111
1112 static void tcg_exec_all(void);
1113
1114 static void *qemu_tcg_cpu_thread_fn(void *arg)
1115 {
1116 CPUState *cpu = arg;
1117
1118 rcu_register_thread();
1119
1120 qemu_mutex_lock_iothread();
1121 qemu_thread_get_self(cpu->thread);
1122
1123 CPU_FOREACH(cpu) {
1124 cpu->thread_id = qemu_get_thread_id();
1125 cpu->created = true;
1126 cpu->can_do_io = 1;
1127 }
1128 qemu_cond_signal(&qemu_cpu_cond);
1129
1130 /* wait for initial kick-off after machine start */
1131 while (first_cpu->stopped) {
1132 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1133
1134 /* process any pending work */
1135 CPU_FOREACH(cpu) {
1136 qemu_wait_io_event_common(cpu);
1137 }
1138 }
1139
1140 /* process any pending work */
1141 atomic_mb_set(&exit_request, 1);
1142
1143 while (1) {
1144 tcg_exec_all();
1145
1146 if (use_icount) {
1147 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1148
1149 if (deadline == 0) {
1150 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1151 }
1152 }
1153 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1154 }
1155
1156 return NULL;
1157 }
1158
1159 static void qemu_cpu_kick_thread(CPUState *cpu)
1160 {
1161 #ifndef _WIN32
1162 int err;
1163
1164 if (cpu->thread_kicked) {
1165 return;
1166 }
1167 cpu->thread_kicked = true;
1168 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1169 if (err) {
1170 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1171 exit(1);
1172 }
1173 #else /* _WIN32 */
1174 abort();
1175 #endif
1176 }
1177
1178 static void qemu_cpu_kick_no_halt(void)
1179 {
1180 CPUState *cpu;
1181 /* Ensure whatever caused the exit has reached the CPU threads before
1182 * writing exit_request.
1183 */
1184 atomic_mb_set(&exit_request, 1);
1185 cpu = atomic_mb_read(&tcg_current_cpu);
1186 if (cpu) {
1187 cpu_exit(cpu);
1188 }
1189 }
1190
1191 void qemu_cpu_kick(CPUState *cpu)
1192 {
1193 qemu_cond_broadcast(cpu->halt_cond);
1194 if (tcg_enabled()) {
1195 qemu_cpu_kick_no_halt();
1196 } else {
1197 qemu_cpu_kick_thread(cpu);
1198 }
1199 }
1200
1201 void qemu_cpu_kick_self(void)
1202 {
1203 assert(current_cpu);
1204 qemu_cpu_kick_thread(current_cpu);
1205 }
1206
1207 bool qemu_cpu_is_self(CPUState *cpu)
1208 {
1209 return qemu_thread_is_self(cpu->thread);
1210 }
1211
1212 bool qemu_in_vcpu_thread(void)
1213 {
1214 return current_cpu && qemu_cpu_is_self(current_cpu);
1215 }
1216
1217 static __thread bool iothread_locked = false;
1218
1219 bool qemu_mutex_iothread_locked(void)
1220 {
1221 return iothread_locked;
1222 }
1223
1224 void qemu_mutex_lock_iothread(void)
1225 {
1226 atomic_inc(&iothread_requesting_mutex);
1227 /* In the simple case there is no need to bump the VCPU thread out of
1228 * TCG code execution.
1229 */
1230 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1231 !first_cpu || !first_cpu->created) {
1232 qemu_mutex_lock(&qemu_global_mutex);
1233 atomic_dec(&iothread_requesting_mutex);
1234 } else {
1235 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1236 qemu_cpu_kick_no_halt();
1237 qemu_mutex_lock(&qemu_global_mutex);
1238 }
1239 atomic_dec(&iothread_requesting_mutex);
1240 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1241 }
1242 iothread_locked = true;
1243 }
1244
1245 void qemu_mutex_unlock_iothread(void)
1246 {
1247 iothread_locked = false;
1248 qemu_mutex_unlock(&qemu_global_mutex);
1249 }
1250
1251 static int all_vcpus_paused(void)
1252 {
1253 CPUState *cpu;
1254
1255 CPU_FOREACH(cpu) {
1256 if (!cpu->stopped) {
1257 return 0;
1258 }
1259 }
1260
1261 return 1;
1262 }
1263
1264 void pause_all_vcpus(void)
1265 {
1266 CPUState *cpu;
1267
1268 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1269 CPU_FOREACH(cpu) {
1270 cpu->stop = true;
1271 qemu_cpu_kick(cpu);
1272 }
1273
1274 if (qemu_in_vcpu_thread()) {
1275 cpu_stop_current();
1276 if (!kvm_enabled()) {
1277 CPU_FOREACH(cpu) {
1278 cpu->stop = false;
1279 cpu->stopped = true;
1280 }
1281 return;
1282 }
1283 }
1284
1285 while (!all_vcpus_paused()) {
1286 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1287 CPU_FOREACH(cpu) {
1288 qemu_cpu_kick(cpu);
1289 }
1290 }
1291 }
1292
1293 void cpu_resume(CPUState *cpu)
1294 {
1295 cpu->stop = false;
1296 cpu->stopped = false;
1297 qemu_cpu_kick(cpu);
1298 }
1299
1300 void resume_all_vcpus(void)
1301 {
1302 CPUState *cpu;
1303
1304 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1305 CPU_FOREACH(cpu) {
1306 cpu_resume(cpu);
1307 }
1308 }
1309
1310 /* For temporary buffers for forming a name */
1311 #define VCPU_THREAD_NAME_SIZE 16
1312
1313 static void qemu_tcg_init_vcpu(CPUState *cpu)
1314 {
1315 char thread_name[VCPU_THREAD_NAME_SIZE];
1316 static QemuCond *tcg_halt_cond;
1317 static QemuThread *tcg_cpu_thread;
1318
1319 /* share a single thread for all cpus with TCG */
1320 if (!tcg_cpu_thread) {
1321 cpu->thread = g_malloc0(sizeof(QemuThread));
1322 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1323 qemu_cond_init(cpu->halt_cond);
1324 tcg_halt_cond = cpu->halt_cond;
1325 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1326 cpu->cpu_index);
1327 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1328 cpu, QEMU_THREAD_JOINABLE);
1329 #ifdef _WIN32
1330 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1331 #endif
1332 while (!cpu->created) {
1333 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1334 }
1335 tcg_cpu_thread = cpu->thread;
1336 } else {
1337 cpu->thread = tcg_cpu_thread;
1338 cpu->halt_cond = tcg_halt_cond;
1339 }
1340 }
1341
1342 static void qemu_kvm_start_vcpu(CPUState *cpu)
1343 {
1344 char thread_name[VCPU_THREAD_NAME_SIZE];
1345
1346 cpu->thread = g_malloc0(sizeof(QemuThread));
1347 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1348 qemu_cond_init(cpu->halt_cond);
1349 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1350 cpu->cpu_index);
1351 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1352 cpu, QEMU_THREAD_JOINABLE);
1353 while (!cpu->created) {
1354 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1355 }
1356 }
1357
1358 static void qemu_dummy_start_vcpu(CPUState *cpu)
1359 {
1360 char thread_name[VCPU_THREAD_NAME_SIZE];
1361
1362 cpu->thread = g_malloc0(sizeof(QemuThread));
1363 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1364 qemu_cond_init(cpu->halt_cond);
1365 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1366 cpu->cpu_index);
1367 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1368 QEMU_THREAD_JOINABLE);
1369 while (!cpu->created) {
1370 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1371 }
1372 }
1373
1374 void qemu_init_vcpu(CPUState *cpu)
1375 {
1376 cpu->nr_cores = smp_cores;
1377 cpu->nr_threads = smp_threads;
1378 cpu->stopped = true;
1379
1380 if (!cpu->as) {
1381 /* If the target cpu hasn't set up any address spaces itself,
1382 * give it the default one.
1383 */
1384 AddressSpace *as = address_space_init_shareable(cpu->memory,
1385 "cpu-memory");
1386 cpu->num_ases = 1;
1387 cpu_address_space_init(cpu, as, 0);
1388 }
1389
1390 if (kvm_enabled()) {
1391 qemu_kvm_start_vcpu(cpu);
1392 } else if (tcg_enabled()) {
1393 qemu_tcg_init_vcpu(cpu);
1394 } else {
1395 qemu_dummy_start_vcpu(cpu);
1396 }
1397 }
1398
1399 void cpu_stop_current(void)
1400 {
1401 if (current_cpu) {
1402 current_cpu->stop = false;
1403 current_cpu->stopped = true;
1404 cpu_exit(current_cpu);
1405 qemu_cond_broadcast(&qemu_pause_cond);
1406 }
1407 }
1408
1409 int vm_stop(RunState state)
1410 {
1411 if (qemu_in_vcpu_thread()) {
1412 qemu_system_vmstop_request_prepare();
1413 qemu_system_vmstop_request(state);
1414 /*
1415 * FIXME: should not return to device code in case
1416 * vm_stop() has been requested.
1417 */
1418 cpu_stop_current();
1419 return 0;
1420 }
1421
1422 return do_vm_stop(state);
1423 }
1424
1425 /* does a state transition even if the VM is already stopped,
1426 current state is forgotten forever */
1427 int vm_stop_force_state(RunState state)
1428 {
1429 if (runstate_is_running()) {
1430 return vm_stop(state);
1431 } else {
1432 runstate_set(state);
1433
1434 bdrv_drain_all();
1435 /* Make sure to return an error if the flush in a previous vm_stop()
1436 * failed. */
1437 return blk_flush_all();
1438 }
1439 }
1440
1441 static int64_t tcg_get_icount_limit(void)
1442 {
1443 int64_t deadline;
1444
1445 if (replay_mode != REPLAY_MODE_PLAY) {
1446 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1447
1448 /* Maintain prior (possibly buggy) behaviour where if no deadline
1449 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1450 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1451 * nanoseconds.
1452 */
1453 if ((deadline < 0) || (deadline > INT32_MAX)) {
1454 deadline = INT32_MAX;
1455 }
1456
1457 return qemu_icount_round(deadline);
1458 } else {
1459 return replay_get_instructions();
1460 }
1461 }
1462
1463 static int tcg_cpu_exec(CPUState *cpu)
1464 {
1465 int ret;
1466 #ifdef CONFIG_PROFILER
1467 int64_t ti;
1468 #endif
1469
1470 #ifdef CONFIG_PROFILER
1471 ti = profile_getclock();
1472 #endif
1473 if (use_icount) {
1474 int64_t count;
1475 int decr;
1476 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1477 + cpu->icount_extra);
1478 cpu->icount_decr.u16.low = 0;
1479 cpu->icount_extra = 0;
1480 count = tcg_get_icount_limit();
1481 timers_state.qemu_icount += count;
1482 decr = (count > 0xffff) ? 0xffff : count;
1483 count -= decr;
1484 cpu->icount_decr.u16.low = decr;
1485 cpu->icount_extra = count;
1486 }
1487 ret = cpu_exec(cpu);
1488 #ifdef CONFIG_PROFILER
1489 tcg_time += profile_getclock() - ti;
1490 #endif
1491 if (use_icount) {
1492 /* Fold pending instructions back into the
1493 instruction counter, and clear the interrupt flag. */
1494 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1495 + cpu->icount_extra);
1496 cpu->icount_decr.u32 = 0;
1497 cpu->icount_extra = 0;
1498 replay_account_executed_instructions();
1499 }
1500 return ret;
1501 }
1502
1503 static void tcg_exec_all(void)
1504 {
1505 int r;
1506
1507 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1508 qemu_account_warp_timer();
1509
1510 if (next_cpu == NULL) {
1511 next_cpu = first_cpu;
1512 }
1513 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1514 CPUState *cpu = next_cpu;
1515
1516 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1517 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1518
1519 if (cpu_can_run(cpu)) {
1520 r = tcg_cpu_exec(cpu);
1521 if (r == EXCP_DEBUG) {
1522 cpu_handle_guest_debug(cpu);
1523 break;
1524 }
1525 } else if (cpu->stop || cpu->stopped) {
1526 break;
1527 }
1528 }
1529
1530 /* Pairs with smp_wmb in qemu_cpu_kick. */
1531 atomic_mb_set(&exit_request, 0);
1532 }
1533
1534 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1535 {
1536 /* XXX: implement xxx_cpu_list for targets that still miss it */
1537 #if defined(cpu_list)
1538 cpu_list(f, cpu_fprintf);
1539 #endif
1540 }
1541
1542 CpuInfoList *qmp_query_cpus(Error **errp)
1543 {
1544 CpuInfoList *head = NULL, *cur_item = NULL;
1545 CPUState *cpu;
1546
1547 CPU_FOREACH(cpu) {
1548 CpuInfoList *info;
1549 #if defined(TARGET_I386)
1550 X86CPU *x86_cpu = X86_CPU(cpu);
1551 CPUX86State *env = &x86_cpu->env;
1552 #elif defined(TARGET_PPC)
1553 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1554 CPUPPCState *env = &ppc_cpu->env;
1555 #elif defined(TARGET_SPARC)
1556 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1557 CPUSPARCState *env = &sparc_cpu->env;
1558 #elif defined(TARGET_MIPS)
1559 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1560 CPUMIPSState *env = &mips_cpu->env;
1561 #elif defined(TARGET_TRICORE)
1562 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1563 CPUTriCoreState *env = &tricore_cpu->env;
1564 #endif
1565
1566 cpu_synchronize_state(cpu);
1567
1568 info = g_malloc0(sizeof(*info));
1569 info->value = g_malloc0(sizeof(*info->value));
1570 info->value->CPU = cpu->cpu_index;
1571 info->value->current = (cpu == first_cpu);
1572 info->value->halted = cpu->halted;
1573 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1574 info->value->thread_id = cpu->thread_id;
1575 #if defined(TARGET_I386)
1576 info->value->arch = CPU_INFO_ARCH_X86;
1577 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1578 #elif defined(TARGET_PPC)
1579 info->value->arch = CPU_INFO_ARCH_PPC;
1580 info->value->u.ppc.nip = env->nip;
1581 #elif defined(TARGET_SPARC)
1582 info->value->arch = CPU_INFO_ARCH_SPARC;
1583 info->value->u.q_sparc.pc = env->pc;
1584 info->value->u.q_sparc.npc = env->npc;
1585 #elif defined(TARGET_MIPS)
1586 info->value->arch = CPU_INFO_ARCH_MIPS;
1587 info->value->u.q_mips.PC = env->active_tc.PC;
1588 #elif defined(TARGET_TRICORE)
1589 info->value->arch = CPU_INFO_ARCH_TRICORE;
1590 info->value->u.tricore.PC = env->PC;
1591 #else
1592 info->value->arch = CPU_INFO_ARCH_OTHER;
1593 #endif
1594
1595 /* XXX: waiting for the qapi to support GSList */
1596 if (!cur_item) {
1597 head = cur_item = info;
1598 } else {
1599 cur_item->next = info;
1600 cur_item = info;
1601 }
1602 }
1603
1604 return head;
1605 }
1606
1607 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1608 bool has_cpu, int64_t cpu_index, Error **errp)
1609 {
1610 FILE *f;
1611 uint32_t l;
1612 CPUState *cpu;
1613 uint8_t buf[1024];
1614 int64_t orig_addr = addr, orig_size = size;
1615
1616 if (!has_cpu) {
1617 cpu_index = 0;
1618 }
1619
1620 cpu = qemu_get_cpu(cpu_index);
1621 if (cpu == NULL) {
1622 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1623 "a CPU number");
1624 return;
1625 }
1626
1627 f = fopen(filename, "wb");
1628 if (!f) {
1629 error_setg_file_open(errp, errno, filename);
1630 return;
1631 }
1632
1633 while (size != 0) {
1634 l = sizeof(buf);
1635 if (l > size)
1636 l = size;
1637 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1638 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1639 " specified", orig_addr, orig_size);
1640 goto exit;
1641 }
1642 if (fwrite(buf, 1, l, f) != l) {
1643 error_setg(errp, QERR_IO_ERROR);
1644 goto exit;
1645 }
1646 addr += l;
1647 size -= l;
1648 }
1649
1650 exit:
1651 fclose(f);
1652 }
1653
1654 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1655 Error **errp)
1656 {
1657 FILE *f;
1658 uint32_t l;
1659 uint8_t buf[1024];
1660
1661 f = fopen(filename, "wb");
1662 if (!f) {
1663 error_setg_file_open(errp, errno, filename);
1664 return;
1665 }
1666
1667 while (size != 0) {
1668 l = sizeof(buf);
1669 if (l > size)
1670 l = size;
1671 cpu_physical_memory_read(addr, buf, l);
1672 if (fwrite(buf, 1, l, f) != l) {
1673 error_setg(errp, QERR_IO_ERROR);
1674 goto exit;
1675 }
1676 addr += l;
1677 size -= l;
1678 }
1679
1680 exit:
1681 fclose(f);
1682 }
1683
1684 void qmp_inject_nmi(Error **errp)
1685 {
1686 #if defined(TARGET_I386)
1687 CPUState *cs;
1688
1689 CPU_FOREACH(cs) {
1690 X86CPU *cpu = X86_CPU(cs);
1691
1692 if (!cpu->apic_state) {
1693 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1694 } else {
1695 apic_deliver_nmi(cpu->apic_state);
1696 }
1697 }
1698 #else
1699 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1700 #endif
1701 }
1702
1703 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1704 {
1705 if (!use_icount) {
1706 return;
1707 }
1708
1709 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1710 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1711 if (icount_align_option) {
1712 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1713 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1714 } else {
1715 cpu_fprintf(f, "Max guest delay NA\n");
1716 cpu_fprintf(f, "Max guest advance NA\n");
1717 }
1718 }