]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20171229' into staging
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "sysemu/hvf.h"
41 #include "qmp-commands.h"
42 #include "exec/exec-all.h"
43
44 #include "qemu/thread.h"
45 #include "sysemu/cpus.h"
46 #include "sysemu/qtest.h"
47 #include "qemu/main-loop.h"
48 #include "qemu/bitmap.h"
49 #include "qemu/seqlock.h"
50 #include "tcg.h"
51 #include "qapi-event.h"
52 #include "hw/nmi.h"
53 #include "sysemu/replay.h"
54 #include "hw/boards.h"
55
56 #ifdef CONFIG_LINUX
57
58 #include <sys/prctl.h>
59
60 #ifndef PR_MCE_KILL
61 #define PR_MCE_KILL 33
62 #endif
63
64 #ifndef PR_MCE_KILL_SET
65 #define PR_MCE_KILL_SET 1
66 #endif
67
68 #ifndef PR_MCE_KILL_EARLY
69 #define PR_MCE_KILL_EARLY 1
70 #endif
71
72 #endif /* CONFIG_LINUX */
73
74 int64_t max_delay;
75 int64_t max_advance;
76
77 /* vcpu throttling controls */
78 static QEMUTimer *throttle_timer;
79 static unsigned int throttle_percentage;
80
81 #define CPU_THROTTLE_PCT_MIN 1
82 #define CPU_THROTTLE_PCT_MAX 99
83 #define CPU_THROTTLE_TIMESLICE_NS 10000000
84
85 bool cpu_is_stopped(CPUState *cpu)
86 {
87 return cpu->stopped || !runstate_is_running();
88 }
89
90 static bool cpu_thread_is_idle(CPUState *cpu)
91 {
92 if (cpu->stop || cpu->queued_work_first) {
93 return false;
94 }
95 if (cpu_is_stopped(cpu)) {
96 return true;
97 }
98 if (!cpu->halted || cpu_has_work(cpu) ||
99 kvm_halt_in_kernel()) {
100 return false;
101 }
102 return true;
103 }
104
105 static bool all_cpu_threads_idle(void)
106 {
107 CPUState *cpu;
108
109 CPU_FOREACH(cpu) {
110 if (!cpu_thread_is_idle(cpu)) {
111 return false;
112 }
113 }
114 return true;
115 }
116
117 /***********************************************************/
118 /* guest cycle counter */
119
120 /* Protected by TimersState seqlock */
121
122 static bool icount_sleep = true;
123 static int64_t vm_clock_warp_start = -1;
124 /* Conversion factor from emulated instructions to virtual clock ticks. */
125 static int icount_time_shift;
126 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127 #define MAX_ICOUNT_SHIFT 10
128
129 static QEMUTimer *icount_rt_timer;
130 static QEMUTimer *icount_vm_timer;
131 static QEMUTimer *icount_warp_timer;
132
133 typedef struct TimersState {
134 /* Protected by BQL. */
135 int64_t cpu_ticks_prev;
136 int64_t cpu_ticks_offset;
137
138 /* cpu_clock_offset can be read out of BQL, so protect it with
139 * this lock.
140 */
141 QemuSeqLock vm_clock_seqlock;
142 int64_t cpu_clock_offset;
143 int32_t cpu_ticks_enabled;
144 int64_t dummy;
145
146 /* Compensate for varying guest execution speed. */
147 int64_t qemu_icount_bias;
148 /* Only written by TCG thread */
149 int64_t qemu_icount;
150 } TimersState;
151
152 static TimersState timers_state;
153 bool mttcg_enabled;
154
155 /*
156 * We default to false if we know other options have been enabled
157 * which are currently incompatible with MTTCG. Otherwise when each
158 * guest (target) has been updated to support:
159 * - atomic instructions
160 * - memory ordering primitives (barriers)
161 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
162 *
163 * Once a guest architecture has been converted to the new primitives
164 * there are two remaining limitations to check.
165 *
166 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
167 * - The host must have a stronger memory order than the guest
168 *
169 * It may be possible in future to support strong guests on weak hosts
170 * but that will require tagging all load/stores in a guest with their
171 * implicit memory order requirements which would likely slow things
172 * down a lot.
173 */
174
175 static bool check_tcg_memory_orders_compatible(void)
176 {
177 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
178 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
179 #else
180 return false;
181 #endif
182 }
183
184 static bool default_mttcg_enabled(void)
185 {
186 if (use_icount || TCG_OVERSIZED_GUEST) {
187 return false;
188 } else {
189 #ifdef TARGET_SUPPORTS_MTTCG
190 return check_tcg_memory_orders_compatible();
191 #else
192 return false;
193 #endif
194 }
195 }
196
197 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
198 {
199 const char *t = qemu_opt_get(opts, "thread");
200 if (t) {
201 if (strcmp(t, "multi") == 0) {
202 if (TCG_OVERSIZED_GUEST) {
203 error_setg(errp, "No MTTCG when guest word size > hosts");
204 } else if (use_icount) {
205 error_setg(errp, "No MTTCG when icount is enabled");
206 } else {
207 #ifndef TARGET_SUPPORTS_MTTCG
208 error_report("Guest not yet converted to MTTCG - "
209 "you may get unexpected results");
210 #endif
211 if (!check_tcg_memory_orders_compatible()) {
212 error_report("Guest expects a stronger memory ordering "
213 "than the host provides");
214 error_printf("This may cause strange/hard to debug errors\n");
215 }
216 mttcg_enabled = true;
217 }
218 } else if (strcmp(t, "single") == 0) {
219 mttcg_enabled = false;
220 } else {
221 error_setg(errp, "Invalid 'thread' setting %s", t);
222 }
223 } else {
224 mttcg_enabled = default_mttcg_enabled();
225 }
226 }
227
228 /* The current number of executed instructions is based on what we
229 * originally budgeted minus the current state of the decrementing
230 * icount counters in extra/u16.low.
231 */
232 static int64_t cpu_get_icount_executed(CPUState *cpu)
233 {
234 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
235 }
236
237 /*
238 * Update the global shared timer_state.qemu_icount to take into
239 * account executed instructions. This is done by the TCG vCPU
240 * thread so the main-loop can see time has moved forward.
241 */
242 void cpu_update_icount(CPUState *cpu)
243 {
244 int64_t executed = cpu_get_icount_executed(cpu);
245 cpu->icount_budget -= executed;
246
247 #ifdef CONFIG_ATOMIC64
248 atomic_set__nocheck(&timers_state.qemu_icount,
249 atomic_read__nocheck(&timers_state.qemu_icount) +
250 executed);
251 #else /* FIXME: we need 64bit atomics to do this safely */
252 timers_state.qemu_icount += executed;
253 #endif
254 }
255
256 int64_t cpu_get_icount_raw(void)
257 {
258 CPUState *cpu = current_cpu;
259
260 if (cpu && cpu->running) {
261 if (!cpu->can_do_io) {
262 fprintf(stderr, "Bad icount read\n");
263 exit(1);
264 }
265 /* Take into account what has run */
266 cpu_update_icount(cpu);
267 }
268 #ifdef CONFIG_ATOMIC64
269 return atomic_read__nocheck(&timers_state.qemu_icount);
270 #else /* FIXME: we need 64bit atomics to do this safely */
271 return timers_state.qemu_icount;
272 #endif
273 }
274
275 /* Return the virtual CPU time, based on the instruction counter. */
276 static int64_t cpu_get_icount_locked(void)
277 {
278 int64_t icount = cpu_get_icount_raw();
279 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
280 }
281
282 int64_t cpu_get_icount(void)
283 {
284 int64_t icount;
285 unsigned start;
286
287 do {
288 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
289 icount = cpu_get_icount_locked();
290 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
291
292 return icount;
293 }
294
295 int64_t cpu_icount_to_ns(int64_t icount)
296 {
297 return icount << icount_time_shift;
298 }
299
300 /* return the time elapsed in VM between vm_start and vm_stop. Unless
301 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
302 * counter.
303 *
304 * Caller must hold the BQL
305 */
306 int64_t cpu_get_ticks(void)
307 {
308 int64_t ticks;
309
310 if (use_icount) {
311 return cpu_get_icount();
312 }
313
314 ticks = timers_state.cpu_ticks_offset;
315 if (timers_state.cpu_ticks_enabled) {
316 ticks += cpu_get_host_ticks();
317 }
318
319 if (timers_state.cpu_ticks_prev > ticks) {
320 /* Note: non increasing ticks may happen if the host uses
321 software suspend */
322 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
323 ticks = timers_state.cpu_ticks_prev;
324 }
325
326 timers_state.cpu_ticks_prev = ticks;
327 return ticks;
328 }
329
330 static int64_t cpu_get_clock_locked(void)
331 {
332 int64_t time;
333
334 time = timers_state.cpu_clock_offset;
335 if (timers_state.cpu_ticks_enabled) {
336 time += get_clock();
337 }
338
339 return time;
340 }
341
342 /* Return the monotonic time elapsed in VM, i.e.,
343 * the time between vm_start and vm_stop
344 */
345 int64_t cpu_get_clock(void)
346 {
347 int64_t ti;
348 unsigned start;
349
350 do {
351 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
352 ti = cpu_get_clock_locked();
353 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
354
355 return ti;
356 }
357
358 /* enable cpu_get_ticks()
359 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
360 */
361 void cpu_enable_ticks(void)
362 {
363 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
365 if (!timers_state.cpu_ticks_enabled) {
366 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
367 timers_state.cpu_clock_offset -= get_clock();
368 timers_state.cpu_ticks_enabled = 1;
369 }
370 seqlock_write_end(&timers_state.vm_clock_seqlock);
371 }
372
373 /* disable cpu_get_ticks() : the clock is stopped. You must not call
374 * cpu_get_ticks() after that.
375 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
376 */
377 void cpu_disable_ticks(void)
378 {
379 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
380 seqlock_write_begin(&timers_state.vm_clock_seqlock);
381 if (timers_state.cpu_ticks_enabled) {
382 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
383 timers_state.cpu_clock_offset = cpu_get_clock_locked();
384 timers_state.cpu_ticks_enabled = 0;
385 }
386 seqlock_write_end(&timers_state.vm_clock_seqlock);
387 }
388
389 /* Correlation between real and virtual time is always going to be
390 fairly approximate, so ignore small variation.
391 When the guest is idle real and virtual time will be aligned in
392 the IO wait loop. */
393 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
394
395 static void icount_adjust(void)
396 {
397 int64_t cur_time;
398 int64_t cur_icount;
399 int64_t delta;
400
401 /* Protected by TimersState mutex. */
402 static int64_t last_delta;
403
404 /* If the VM is not running, then do nothing. */
405 if (!runstate_is_running()) {
406 return;
407 }
408
409 seqlock_write_begin(&timers_state.vm_clock_seqlock);
410 cur_time = cpu_get_clock_locked();
411 cur_icount = cpu_get_icount_locked();
412
413 delta = cur_icount - cur_time;
414 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
415 if (delta > 0
416 && last_delta + ICOUNT_WOBBLE < delta * 2
417 && icount_time_shift > 0) {
418 /* The guest is getting too far ahead. Slow time down. */
419 icount_time_shift--;
420 }
421 if (delta < 0
422 && last_delta - ICOUNT_WOBBLE > delta * 2
423 && icount_time_shift < MAX_ICOUNT_SHIFT) {
424 /* The guest is getting too far behind. Speed time up. */
425 icount_time_shift++;
426 }
427 last_delta = delta;
428 timers_state.qemu_icount_bias = cur_icount
429 - (timers_state.qemu_icount << icount_time_shift);
430 seqlock_write_end(&timers_state.vm_clock_seqlock);
431 }
432
433 static void icount_adjust_rt(void *opaque)
434 {
435 timer_mod(icount_rt_timer,
436 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
437 icount_adjust();
438 }
439
440 static void icount_adjust_vm(void *opaque)
441 {
442 timer_mod(icount_vm_timer,
443 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
444 NANOSECONDS_PER_SECOND / 10);
445 icount_adjust();
446 }
447
448 static int64_t qemu_icount_round(int64_t count)
449 {
450 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
451 }
452
453 static void icount_warp_rt(void)
454 {
455 unsigned seq;
456 int64_t warp_start;
457
458 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
459 * changes from -1 to another value, so the race here is okay.
460 */
461 do {
462 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
463 warp_start = vm_clock_warp_start;
464 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
465
466 if (warp_start == -1) {
467 return;
468 }
469
470 seqlock_write_begin(&timers_state.vm_clock_seqlock);
471 if (runstate_is_running()) {
472 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
473 cpu_get_clock_locked());
474 int64_t warp_delta;
475
476 warp_delta = clock - vm_clock_warp_start;
477 if (use_icount == 2) {
478 /*
479 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
480 * far ahead of real time.
481 */
482 int64_t cur_icount = cpu_get_icount_locked();
483 int64_t delta = clock - cur_icount;
484 warp_delta = MIN(warp_delta, delta);
485 }
486 timers_state.qemu_icount_bias += warp_delta;
487 }
488 vm_clock_warp_start = -1;
489 seqlock_write_end(&timers_state.vm_clock_seqlock);
490
491 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
493 }
494 }
495
496 static void icount_timer_cb(void *opaque)
497 {
498 /* No need for a checkpoint because the timer already synchronizes
499 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
500 */
501 icount_warp_rt();
502 }
503
504 void qtest_clock_warp(int64_t dest)
505 {
506 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
507 AioContext *aio_context;
508 assert(qtest_enabled());
509 aio_context = qemu_get_aio_context();
510 while (clock < dest) {
511 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
512 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
513
514 seqlock_write_begin(&timers_state.vm_clock_seqlock);
515 timers_state.qemu_icount_bias += warp;
516 seqlock_write_end(&timers_state.vm_clock_seqlock);
517
518 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
519 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
520 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
521 }
522 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
523 }
524
525 void qemu_start_warp_timer(void)
526 {
527 int64_t clock;
528 int64_t deadline;
529
530 if (!use_icount) {
531 return;
532 }
533
534 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
535 * do not fire, so computing the deadline does not make sense.
536 */
537 if (!runstate_is_running()) {
538 return;
539 }
540
541 /* warp clock deterministically in record/replay mode */
542 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
543 return;
544 }
545
546 if (!all_cpu_threads_idle()) {
547 return;
548 }
549
550 if (qtest_enabled()) {
551 /* When testing, qtest commands advance icount. */
552 return;
553 }
554
555 /* We want to use the earliest deadline from ALL vm_clocks */
556 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
557 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
558 if (deadline < 0) {
559 static bool notified;
560 if (!icount_sleep && !notified) {
561 warn_report("icount sleep disabled and no active timers");
562 notified = true;
563 }
564 return;
565 }
566
567 if (deadline > 0) {
568 /*
569 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
570 * sleep. Otherwise, the CPU might be waiting for a future timer
571 * interrupt to wake it up, but the interrupt never comes because
572 * the vCPU isn't running any insns and thus doesn't advance the
573 * QEMU_CLOCK_VIRTUAL.
574 */
575 if (!icount_sleep) {
576 /*
577 * We never let VCPUs sleep in no sleep icount mode.
578 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
579 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
580 * It is useful when we want a deterministic execution time,
581 * isolated from host latencies.
582 */
583 seqlock_write_begin(&timers_state.vm_clock_seqlock);
584 timers_state.qemu_icount_bias += deadline;
585 seqlock_write_end(&timers_state.vm_clock_seqlock);
586 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
587 } else {
588 /*
589 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
590 * "real" time, (related to the time left until the next event) has
591 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
592 * This avoids that the warps are visible externally; for example,
593 * you will not be sending network packets continuously instead of
594 * every 100ms.
595 */
596 seqlock_write_begin(&timers_state.vm_clock_seqlock);
597 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
598 vm_clock_warp_start = clock;
599 }
600 seqlock_write_end(&timers_state.vm_clock_seqlock);
601 timer_mod_anticipate(icount_warp_timer, clock + deadline);
602 }
603 } else if (deadline == 0) {
604 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
605 }
606 }
607
608 static void qemu_account_warp_timer(void)
609 {
610 if (!use_icount || !icount_sleep) {
611 return;
612 }
613
614 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
615 * do not fire, so computing the deadline does not make sense.
616 */
617 if (!runstate_is_running()) {
618 return;
619 }
620
621 /* warp clock deterministically in record/replay mode */
622 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
623 return;
624 }
625
626 timer_del(icount_warp_timer);
627 icount_warp_rt();
628 }
629
630 static bool icount_state_needed(void *opaque)
631 {
632 return use_icount;
633 }
634
635 /*
636 * This is a subsection for icount migration.
637 */
638 static const VMStateDescription icount_vmstate_timers = {
639 .name = "timer/icount",
640 .version_id = 1,
641 .minimum_version_id = 1,
642 .needed = icount_state_needed,
643 .fields = (VMStateField[]) {
644 VMSTATE_INT64(qemu_icount_bias, TimersState),
645 VMSTATE_INT64(qemu_icount, TimersState),
646 VMSTATE_END_OF_LIST()
647 }
648 };
649
650 static const VMStateDescription vmstate_timers = {
651 .name = "timer",
652 .version_id = 2,
653 .minimum_version_id = 1,
654 .fields = (VMStateField[]) {
655 VMSTATE_INT64(cpu_ticks_offset, TimersState),
656 VMSTATE_INT64(dummy, TimersState),
657 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
658 VMSTATE_END_OF_LIST()
659 },
660 .subsections = (const VMStateDescription*[]) {
661 &icount_vmstate_timers,
662 NULL
663 }
664 };
665
666 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
667 {
668 double pct;
669 double throttle_ratio;
670 long sleeptime_ns;
671
672 if (!cpu_throttle_get_percentage()) {
673 return;
674 }
675
676 pct = (double)cpu_throttle_get_percentage()/100;
677 throttle_ratio = pct / (1 - pct);
678 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
679
680 qemu_mutex_unlock_iothread();
681 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
682 qemu_mutex_lock_iothread();
683 atomic_set(&cpu->throttle_thread_scheduled, 0);
684 }
685
686 static void cpu_throttle_timer_tick(void *opaque)
687 {
688 CPUState *cpu;
689 double pct;
690
691 /* Stop the timer if needed */
692 if (!cpu_throttle_get_percentage()) {
693 return;
694 }
695 CPU_FOREACH(cpu) {
696 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
697 async_run_on_cpu(cpu, cpu_throttle_thread,
698 RUN_ON_CPU_NULL);
699 }
700 }
701
702 pct = (double)cpu_throttle_get_percentage()/100;
703 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
704 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
705 }
706
707 void cpu_throttle_set(int new_throttle_pct)
708 {
709 /* Ensure throttle percentage is within valid range */
710 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
711 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
712
713 atomic_set(&throttle_percentage, new_throttle_pct);
714
715 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
716 CPU_THROTTLE_TIMESLICE_NS);
717 }
718
719 void cpu_throttle_stop(void)
720 {
721 atomic_set(&throttle_percentage, 0);
722 }
723
724 bool cpu_throttle_active(void)
725 {
726 return (cpu_throttle_get_percentage() != 0);
727 }
728
729 int cpu_throttle_get_percentage(void)
730 {
731 return atomic_read(&throttle_percentage);
732 }
733
734 void cpu_ticks_init(void)
735 {
736 seqlock_init(&timers_state.vm_clock_seqlock);
737 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
738 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
739 cpu_throttle_timer_tick, NULL);
740 }
741
742 void configure_icount(QemuOpts *opts, Error **errp)
743 {
744 const char *option;
745 char *rem_str = NULL;
746
747 option = qemu_opt_get(opts, "shift");
748 if (!option) {
749 if (qemu_opt_get(opts, "align") != NULL) {
750 error_setg(errp, "Please specify shift option when using align");
751 }
752 return;
753 }
754
755 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
756 if (icount_sleep) {
757 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
758 icount_timer_cb, NULL);
759 }
760
761 icount_align_option = qemu_opt_get_bool(opts, "align", false);
762
763 if (icount_align_option && !icount_sleep) {
764 error_setg(errp, "align=on and sleep=off are incompatible");
765 }
766 if (strcmp(option, "auto") != 0) {
767 errno = 0;
768 icount_time_shift = strtol(option, &rem_str, 0);
769 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
770 error_setg(errp, "icount: Invalid shift value");
771 }
772 use_icount = 1;
773 return;
774 } else if (icount_align_option) {
775 error_setg(errp, "shift=auto and align=on are incompatible");
776 } else if (!icount_sleep) {
777 error_setg(errp, "shift=auto and sleep=off are incompatible");
778 }
779
780 use_icount = 2;
781
782 /* 125MIPS seems a reasonable initial guess at the guest speed.
783 It will be corrected fairly quickly anyway. */
784 icount_time_shift = 3;
785
786 /* Have both realtime and virtual time triggers for speed adjustment.
787 The realtime trigger catches emulated time passing too slowly,
788 the virtual time trigger catches emulated time passing too fast.
789 Realtime triggers occur even when idle, so use them less frequently
790 than VM triggers. */
791 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
792 icount_adjust_rt, NULL);
793 timer_mod(icount_rt_timer,
794 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
795 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
796 icount_adjust_vm, NULL);
797 timer_mod(icount_vm_timer,
798 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
799 NANOSECONDS_PER_SECOND / 10);
800 }
801
802 /***********************************************************/
803 /* TCG vCPU kick timer
804 *
805 * The kick timer is responsible for moving single threaded vCPU
806 * emulation on to the next vCPU. If more than one vCPU is running a
807 * timer event with force a cpu->exit so the next vCPU can get
808 * scheduled.
809 *
810 * The timer is removed if all vCPUs are idle and restarted again once
811 * idleness is complete.
812 */
813
814 static QEMUTimer *tcg_kick_vcpu_timer;
815 static CPUState *tcg_current_rr_cpu;
816
817 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
818
819 static inline int64_t qemu_tcg_next_kick(void)
820 {
821 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
822 }
823
824 /* Kick the currently round-robin scheduled vCPU */
825 static void qemu_cpu_kick_rr_cpu(void)
826 {
827 CPUState *cpu;
828 do {
829 cpu = atomic_mb_read(&tcg_current_rr_cpu);
830 if (cpu) {
831 cpu_exit(cpu);
832 }
833 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
834 }
835
836 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
837 {
838 }
839
840 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
841 {
842 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
843 qemu_notify_event();
844 return;
845 }
846
847 if (!qemu_in_vcpu_thread() && first_cpu) {
848 /* qemu_cpu_kick is not enough to kick a halted CPU out of
849 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
850 * causes cpu_thread_is_idle to return false. This way,
851 * handle_icount_deadline can run.
852 */
853 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
854 }
855 }
856
857 static void kick_tcg_thread(void *opaque)
858 {
859 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
860 qemu_cpu_kick_rr_cpu();
861 }
862
863 static void start_tcg_kick_timer(void)
864 {
865 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
866 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
867 kick_tcg_thread, NULL);
868 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
869 }
870 }
871
872 static void stop_tcg_kick_timer(void)
873 {
874 if (tcg_kick_vcpu_timer) {
875 timer_del(tcg_kick_vcpu_timer);
876 tcg_kick_vcpu_timer = NULL;
877 }
878 }
879
880 /***********************************************************/
881 void hw_error(const char *fmt, ...)
882 {
883 va_list ap;
884 CPUState *cpu;
885
886 va_start(ap, fmt);
887 fprintf(stderr, "qemu: hardware error: ");
888 vfprintf(stderr, fmt, ap);
889 fprintf(stderr, "\n");
890 CPU_FOREACH(cpu) {
891 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
892 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
893 }
894 va_end(ap);
895 abort();
896 }
897
898 void cpu_synchronize_all_states(void)
899 {
900 CPUState *cpu;
901
902 CPU_FOREACH(cpu) {
903 cpu_synchronize_state(cpu);
904 /* TODO: move to cpu_synchronize_state() */
905 if (hvf_enabled()) {
906 hvf_cpu_synchronize_state(cpu);
907 }
908 }
909 }
910
911 void cpu_synchronize_all_post_reset(void)
912 {
913 CPUState *cpu;
914
915 CPU_FOREACH(cpu) {
916 cpu_synchronize_post_reset(cpu);
917 /* TODO: move to cpu_synchronize_post_reset() */
918 if (hvf_enabled()) {
919 hvf_cpu_synchronize_post_reset(cpu);
920 }
921 }
922 }
923
924 void cpu_synchronize_all_post_init(void)
925 {
926 CPUState *cpu;
927
928 CPU_FOREACH(cpu) {
929 cpu_synchronize_post_init(cpu);
930 /* TODO: move to cpu_synchronize_post_init() */
931 if (hvf_enabled()) {
932 hvf_cpu_synchronize_post_init(cpu);
933 }
934 }
935 }
936
937 void cpu_synchronize_all_pre_loadvm(void)
938 {
939 CPUState *cpu;
940
941 CPU_FOREACH(cpu) {
942 cpu_synchronize_pre_loadvm(cpu);
943 }
944 }
945
946 static int do_vm_stop(RunState state)
947 {
948 int ret = 0;
949
950 if (runstate_is_running()) {
951 cpu_disable_ticks();
952 pause_all_vcpus();
953 runstate_set(state);
954 vm_state_notify(0, state);
955 qapi_event_send_stop(&error_abort);
956 }
957
958 bdrv_drain_all();
959 replay_disable_events();
960 ret = bdrv_flush_all();
961
962 return ret;
963 }
964
965 static bool cpu_can_run(CPUState *cpu)
966 {
967 if (cpu->stop) {
968 return false;
969 }
970 if (cpu_is_stopped(cpu)) {
971 return false;
972 }
973 return true;
974 }
975
976 static void cpu_handle_guest_debug(CPUState *cpu)
977 {
978 gdb_set_stop_cpu(cpu);
979 qemu_system_debug_request();
980 cpu->stopped = true;
981 }
982
983 #ifdef CONFIG_LINUX
984 static void sigbus_reraise(void)
985 {
986 sigset_t set;
987 struct sigaction action;
988
989 memset(&action, 0, sizeof(action));
990 action.sa_handler = SIG_DFL;
991 if (!sigaction(SIGBUS, &action, NULL)) {
992 raise(SIGBUS);
993 sigemptyset(&set);
994 sigaddset(&set, SIGBUS);
995 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
996 }
997 perror("Failed to re-raise SIGBUS!\n");
998 abort();
999 }
1000
1001 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1002 {
1003 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1004 sigbus_reraise();
1005 }
1006
1007 if (current_cpu) {
1008 /* Called asynchronously in VCPU thread. */
1009 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1010 sigbus_reraise();
1011 }
1012 } else {
1013 /* Called synchronously (via signalfd) in main thread. */
1014 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1015 sigbus_reraise();
1016 }
1017 }
1018 }
1019
1020 static void qemu_init_sigbus(void)
1021 {
1022 struct sigaction action;
1023
1024 memset(&action, 0, sizeof(action));
1025 action.sa_flags = SA_SIGINFO;
1026 action.sa_sigaction = sigbus_handler;
1027 sigaction(SIGBUS, &action, NULL);
1028
1029 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1030 }
1031 #else /* !CONFIG_LINUX */
1032 static void qemu_init_sigbus(void)
1033 {
1034 }
1035 #endif /* !CONFIG_LINUX */
1036
1037 static QemuMutex qemu_global_mutex;
1038
1039 static QemuThread io_thread;
1040
1041 /* cpu creation */
1042 static QemuCond qemu_cpu_cond;
1043 /* system init */
1044 static QemuCond qemu_pause_cond;
1045
1046 void qemu_init_cpu_loop(void)
1047 {
1048 qemu_init_sigbus();
1049 qemu_cond_init(&qemu_cpu_cond);
1050 qemu_cond_init(&qemu_pause_cond);
1051 qemu_mutex_init(&qemu_global_mutex);
1052
1053 qemu_thread_get_self(&io_thread);
1054 }
1055
1056 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1057 {
1058 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1059 }
1060
1061 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1062 {
1063 if (kvm_destroy_vcpu(cpu) < 0) {
1064 error_report("kvm_destroy_vcpu failed");
1065 exit(EXIT_FAILURE);
1066 }
1067 }
1068
1069 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1070 {
1071 }
1072
1073 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1074 {
1075 g_assert(qemu_cpu_is_self(cpu));
1076 cpu->stop = false;
1077 cpu->stopped = true;
1078 if (exit) {
1079 cpu_exit(cpu);
1080 }
1081 qemu_cond_broadcast(&qemu_pause_cond);
1082 }
1083
1084 static void qemu_wait_io_event_common(CPUState *cpu)
1085 {
1086 atomic_mb_set(&cpu->thread_kicked, false);
1087 if (cpu->stop) {
1088 qemu_cpu_stop(cpu, false);
1089 }
1090 process_queued_cpu_work(cpu);
1091 }
1092
1093 static bool qemu_tcg_should_sleep(CPUState *cpu)
1094 {
1095 if (mttcg_enabled) {
1096 return cpu_thread_is_idle(cpu);
1097 } else {
1098 return all_cpu_threads_idle();
1099 }
1100 }
1101
1102 static void qemu_tcg_wait_io_event(CPUState *cpu)
1103 {
1104 while (qemu_tcg_should_sleep(cpu)) {
1105 stop_tcg_kick_timer();
1106 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1107 }
1108
1109 start_tcg_kick_timer();
1110
1111 qemu_wait_io_event_common(cpu);
1112 }
1113
1114 static void qemu_kvm_wait_io_event(CPUState *cpu)
1115 {
1116 while (cpu_thread_is_idle(cpu)) {
1117 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1118 }
1119
1120 qemu_wait_io_event_common(cpu);
1121 }
1122
1123 static void qemu_hvf_wait_io_event(CPUState *cpu)
1124 {
1125 while (cpu_thread_is_idle(cpu)) {
1126 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1127 }
1128 qemu_wait_io_event_common(cpu);
1129 }
1130
1131 static void *qemu_kvm_cpu_thread_fn(void *arg)
1132 {
1133 CPUState *cpu = arg;
1134 int r;
1135
1136 rcu_register_thread();
1137
1138 qemu_mutex_lock_iothread();
1139 qemu_thread_get_self(cpu->thread);
1140 cpu->thread_id = qemu_get_thread_id();
1141 cpu->can_do_io = 1;
1142 current_cpu = cpu;
1143
1144 r = kvm_init_vcpu(cpu);
1145 if (r < 0) {
1146 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1147 exit(1);
1148 }
1149
1150 kvm_init_cpu_signals(cpu);
1151
1152 /* signal CPU creation */
1153 cpu->created = true;
1154 qemu_cond_signal(&qemu_cpu_cond);
1155
1156 do {
1157 if (cpu_can_run(cpu)) {
1158 r = kvm_cpu_exec(cpu);
1159 if (r == EXCP_DEBUG) {
1160 cpu_handle_guest_debug(cpu);
1161 }
1162 }
1163 qemu_kvm_wait_io_event(cpu);
1164 } while (!cpu->unplug || cpu_can_run(cpu));
1165
1166 qemu_kvm_destroy_vcpu(cpu);
1167 cpu->created = false;
1168 qemu_cond_signal(&qemu_cpu_cond);
1169 qemu_mutex_unlock_iothread();
1170 return NULL;
1171 }
1172
1173 static void *qemu_dummy_cpu_thread_fn(void *arg)
1174 {
1175 #ifdef _WIN32
1176 fprintf(stderr, "qtest is not supported under Windows\n");
1177 exit(1);
1178 #else
1179 CPUState *cpu = arg;
1180 sigset_t waitset;
1181 int r;
1182
1183 rcu_register_thread();
1184
1185 qemu_mutex_lock_iothread();
1186 qemu_thread_get_self(cpu->thread);
1187 cpu->thread_id = qemu_get_thread_id();
1188 cpu->can_do_io = 1;
1189 current_cpu = cpu;
1190
1191 sigemptyset(&waitset);
1192 sigaddset(&waitset, SIG_IPI);
1193
1194 /* signal CPU creation */
1195 cpu->created = true;
1196 qemu_cond_signal(&qemu_cpu_cond);
1197
1198 while (1) {
1199 qemu_mutex_unlock_iothread();
1200 do {
1201 int sig;
1202 r = sigwait(&waitset, &sig);
1203 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1204 if (r == -1) {
1205 perror("sigwait");
1206 exit(1);
1207 }
1208 qemu_mutex_lock_iothread();
1209 qemu_wait_io_event_common(cpu);
1210 }
1211
1212 return NULL;
1213 #endif
1214 }
1215
1216 static int64_t tcg_get_icount_limit(void)
1217 {
1218 int64_t deadline;
1219
1220 if (replay_mode != REPLAY_MODE_PLAY) {
1221 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1222
1223 /* Maintain prior (possibly buggy) behaviour where if no deadline
1224 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1225 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1226 * nanoseconds.
1227 */
1228 if ((deadline < 0) || (deadline > INT32_MAX)) {
1229 deadline = INT32_MAX;
1230 }
1231
1232 return qemu_icount_round(deadline);
1233 } else {
1234 return replay_get_instructions();
1235 }
1236 }
1237
1238 static void handle_icount_deadline(void)
1239 {
1240 assert(qemu_in_vcpu_thread());
1241 if (use_icount) {
1242 int64_t deadline =
1243 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1244
1245 if (deadline == 0) {
1246 /* Wake up other AioContexts. */
1247 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1248 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1249 }
1250 }
1251 }
1252
1253 static void prepare_icount_for_run(CPUState *cpu)
1254 {
1255 if (use_icount) {
1256 int insns_left;
1257
1258 /* These should always be cleared by process_icount_data after
1259 * each vCPU execution. However u16.high can be raised
1260 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1261 */
1262 g_assert(cpu->icount_decr.u16.low == 0);
1263 g_assert(cpu->icount_extra == 0);
1264
1265 cpu->icount_budget = tcg_get_icount_limit();
1266 insns_left = MIN(0xffff, cpu->icount_budget);
1267 cpu->icount_decr.u16.low = insns_left;
1268 cpu->icount_extra = cpu->icount_budget - insns_left;
1269 }
1270 }
1271
1272 static void process_icount_data(CPUState *cpu)
1273 {
1274 if (use_icount) {
1275 /* Account for executed instructions */
1276 cpu_update_icount(cpu);
1277
1278 /* Reset the counters */
1279 cpu->icount_decr.u16.low = 0;
1280 cpu->icount_extra = 0;
1281 cpu->icount_budget = 0;
1282
1283 replay_account_executed_instructions();
1284 }
1285 }
1286
1287
1288 static int tcg_cpu_exec(CPUState *cpu)
1289 {
1290 int ret;
1291 #ifdef CONFIG_PROFILER
1292 int64_t ti;
1293 #endif
1294
1295 #ifdef CONFIG_PROFILER
1296 ti = profile_getclock();
1297 #endif
1298 qemu_mutex_unlock_iothread();
1299 cpu_exec_start(cpu);
1300 ret = cpu_exec(cpu);
1301 cpu_exec_end(cpu);
1302 qemu_mutex_lock_iothread();
1303 #ifdef CONFIG_PROFILER
1304 tcg_time += profile_getclock() - ti;
1305 #endif
1306 return ret;
1307 }
1308
1309 /* Destroy any remaining vCPUs which have been unplugged and have
1310 * finished running
1311 */
1312 static void deal_with_unplugged_cpus(void)
1313 {
1314 CPUState *cpu;
1315
1316 CPU_FOREACH(cpu) {
1317 if (cpu->unplug && !cpu_can_run(cpu)) {
1318 qemu_tcg_destroy_vcpu(cpu);
1319 cpu->created = false;
1320 qemu_cond_signal(&qemu_cpu_cond);
1321 break;
1322 }
1323 }
1324 }
1325
1326 /* Single-threaded TCG
1327 *
1328 * In the single-threaded case each vCPU is simulated in turn. If
1329 * there is more than a single vCPU we create a simple timer to kick
1330 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1331 * This is done explicitly rather than relying on side-effects
1332 * elsewhere.
1333 */
1334
1335 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1336 {
1337 CPUState *cpu = arg;
1338
1339 rcu_register_thread();
1340 tcg_register_thread();
1341
1342 qemu_mutex_lock_iothread();
1343 qemu_thread_get_self(cpu->thread);
1344
1345 CPU_FOREACH(cpu) {
1346 cpu->thread_id = qemu_get_thread_id();
1347 cpu->created = true;
1348 cpu->can_do_io = 1;
1349 }
1350 qemu_cond_signal(&qemu_cpu_cond);
1351
1352 /* wait for initial kick-off after machine start */
1353 while (first_cpu->stopped) {
1354 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1355
1356 /* process any pending work */
1357 CPU_FOREACH(cpu) {
1358 current_cpu = cpu;
1359 qemu_wait_io_event_common(cpu);
1360 }
1361 }
1362
1363 start_tcg_kick_timer();
1364
1365 cpu = first_cpu;
1366
1367 /* process any pending work */
1368 cpu->exit_request = 1;
1369
1370 while (1) {
1371 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1372 qemu_account_warp_timer();
1373
1374 /* Run the timers here. This is much more efficient than
1375 * waking up the I/O thread and waiting for completion.
1376 */
1377 handle_icount_deadline();
1378
1379 if (!cpu) {
1380 cpu = first_cpu;
1381 }
1382
1383 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1384
1385 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1386 current_cpu = cpu;
1387
1388 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1389 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1390
1391 if (cpu_can_run(cpu)) {
1392 int r;
1393
1394 prepare_icount_for_run(cpu);
1395
1396 r = tcg_cpu_exec(cpu);
1397
1398 process_icount_data(cpu);
1399
1400 if (r == EXCP_DEBUG) {
1401 cpu_handle_guest_debug(cpu);
1402 break;
1403 } else if (r == EXCP_ATOMIC) {
1404 qemu_mutex_unlock_iothread();
1405 cpu_exec_step_atomic(cpu);
1406 qemu_mutex_lock_iothread();
1407 break;
1408 }
1409 } else if (cpu->stop) {
1410 if (cpu->unplug) {
1411 cpu = CPU_NEXT(cpu);
1412 }
1413 break;
1414 }
1415
1416 cpu = CPU_NEXT(cpu);
1417 } /* while (cpu && !cpu->exit_request).. */
1418
1419 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1420 atomic_set(&tcg_current_rr_cpu, NULL);
1421
1422 if (cpu && cpu->exit_request) {
1423 atomic_mb_set(&cpu->exit_request, 0);
1424 }
1425
1426 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1427 deal_with_unplugged_cpus();
1428 }
1429
1430 return NULL;
1431 }
1432
1433 static void *qemu_hax_cpu_thread_fn(void *arg)
1434 {
1435 CPUState *cpu = arg;
1436 int r;
1437
1438 qemu_mutex_lock_iothread();
1439 qemu_thread_get_self(cpu->thread);
1440
1441 cpu->thread_id = qemu_get_thread_id();
1442 cpu->created = true;
1443 cpu->halted = 0;
1444 current_cpu = cpu;
1445
1446 hax_init_vcpu(cpu);
1447 qemu_cond_signal(&qemu_cpu_cond);
1448
1449 while (1) {
1450 if (cpu_can_run(cpu)) {
1451 r = hax_smp_cpu_exec(cpu);
1452 if (r == EXCP_DEBUG) {
1453 cpu_handle_guest_debug(cpu);
1454 }
1455 }
1456
1457 while (cpu_thread_is_idle(cpu)) {
1458 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1459 }
1460 #ifdef _WIN32
1461 SleepEx(0, TRUE);
1462 #endif
1463 qemu_wait_io_event_common(cpu);
1464 }
1465 return NULL;
1466 }
1467
1468 /* The HVF-specific vCPU thread function. This one should only run when the host
1469 * CPU supports the VMX "unrestricted guest" feature. */
1470 static void *qemu_hvf_cpu_thread_fn(void *arg)
1471 {
1472 CPUState *cpu = arg;
1473
1474 int r;
1475
1476 assert(hvf_enabled());
1477
1478 rcu_register_thread();
1479
1480 qemu_mutex_lock_iothread();
1481 qemu_thread_get_self(cpu->thread);
1482
1483 cpu->thread_id = qemu_get_thread_id();
1484 cpu->can_do_io = 1;
1485 current_cpu = cpu;
1486
1487 hvf_init_vcpu(cpu);
1488
1489 /* signal CPU creation */
1490 cpu->created = true;
1491 qemu_cond_signal(&qemu_cpu_cond);
1492
1493 do {
1494 if (cpu_can_run(cpu)) {
1495 r = hvf_vcpu_exec(cpu);
1496 if (r == EXCP_DEBUG) {
1497 cpu_handle_guest_debug(cpu);
1498 }
1499 }
1500 qemu_hvf_wait_io_event(cpu);
1501 } while (!cpu->unplug || cpu_can_run(cpu));
1502
1503 hvf_vcpu_destroy(cpu);
1504 cpu->created = false;
1505 qemu_cond_signal(&qemu_cpu_cond);
1506 qemu_mutex_unlock_iothread();
1507 return NULL;
1508 }
1509
1510 #ifdef _WIN32
1511 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1512 {
1513 }
1514 #endif
1515
1516 /* Multi-threaded TCG
1517 *
1518 * In the multi-threaded case each vCPU has its own thread. The TLS
1519 * variable current_cpu can be used deep in the code to find the
1520 * current CPUState for a given thread.
1521 */
1522
1523 static void *qemu_tcg_cpu_thread_fn(void *arg)
1524 {
1525 CPUState *cpu = arg;
1526
1527 g_assert(!use_icount);
1528
1529 rcu_register_thread();
1530 tcg_register_thread();
1531
1532 qemu_mutex_lock_iothread();
1533 qemu_thread_get_self(cpu->thread);
1534
1535 cpu->thread_id = qemu_get_thread_id();
1536 cpu->created = true;
1537 cpu->can_do_io = 1;
1538 current_cpu = cpu;
1539 qemu_cond_signal(&qemu_cpu_cond);
1540
1541 /* process any pending work */
1542 cpu->exit_request = 1;
1543
1544 while (1) {
1545 if (cpu_can_run(cpu)) {
1546 int r;
1547 r = tcg_cpu_exec(cpu);
1548 switch (r) {
1549 case EXCP_DEBUG:
1550 cpu_handle_guest_debug(cpu);
1551 break;
1552 case EXCP_HALTED:
1553 /* during start-up the vCPU is reset and the thread is
1554 * kicked several times. If we don't ensure we go back
1555 * to sleep in the halted state we won't cleanly
1556 * start-up when the vCPU is enabled.
1557 *
1558 * cpu->halted should ensure we sleep in wait_io_event
1559 */
1560 g_assert(cpu->halted);
1561 break;
1562 case EXCP_ATOMIC:
1563 qemu_mutex_unlock_iothread();
1564 cpu_exec_step_atomic(cpu);
1565 qemu_mutex_lock_iothread();
1566 default:
1567 /* Ignore everything else? */
1568 break;
1569 }
1570 } else if (cpu->unplug) {
1571 qemu_tcg_destroy_vcpu(cpu);
1572 cpu->created = false;
1573 qemu_cond_signal(&qemu_cpu_cond);
1574 qemu_mutex_unlock_iothread();
1575 return NULL;
1576 }
1577
1578 atomic_mb_set(&cpu->exit_request, 0);
1579 qemu_tcg_wait_io_event(cpu);
1580 }
1581
1582 return NULL;
1583 }
1584
1585 static void qemu_cpu_kick_thread(CPUState *cpu)
1586 {
1587 #ifndef _WIN32
1588 int err;
1589
1590 if (cpu->thread_kicked) {
1591 return;
1592 }
1593 cpu->thread_kicked = true;
1594 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1595 if (err) {
1596 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1597 exit(1);
1598 }
1599 #else /* _WIN32 */
1600 if (!qemu_cpu_is_self(cpu)) {
1601 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1602 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1603 __func__, GetLastError());
1604 exit(1);
1605 }
1606 }
1607 #endif
1608 }
1609
1610 void qemu_cpu_kick(CPUState *cpu)
1611 {
1612 qemu_cond_broadcast(cpu->halt_cond);
1613 if (tcg_enabled()) {
1614 cpu_exit(cpu);
1615 /* NOP unless doing single-thread RR */
1616 qemu_cpu_kick_rr_cpu();
1617 } else {
1618 if (hax_enabled()) {
1619 /*
1620 * FIXME: race condition with the exit_request check in
1621 * hax_vcpu_hax_exec
1622 */
1623 cpu->exit_request = 1;
1624 }
1625 qemu_cpu_kick_thread(cpu);
1626 }
1627 }
1628
1629 void qemu_cpu_kick_self(void)
1630 {
1631 assert(current_cpu);
1632 qemu_cpu_kick_thread(current_cpu);
1633 }
1634
1635 bool qemu_cpu_is_self(CPUState *cpu)
1636 {
1637 return qemu_thread_is_self(cpu->thread);
1638 }
1639
1640 bool qemu_in_vcpu_thread(void)
1641 {
1642 return current_cpu && qemu_cpu_is_self(current_cpu);
1643 }
1644
1645 static __thread bool iothread_locked = false;
1646
1647 bool qemu_mutex_iothread_locked(void)
1648 {
1649 return iothread_locked;
1650 }
1651
1652 void qemu_mutex_lock_iothread(void)
1653 {
1654 g_assert(!qemu_mutex_iothread_locked());
1655 qemu_mutex_lock(&qemu_global_mutex);
1656 iothread_locked = true;
1657 }
1658
1659 void qemu_mutex_unlock_iothread(void)
1660 {
1661 g_assert(qemu_mutex_iothread_locked());
1662 iothread_locked = false;
1663 qemu_mutex_unlock(&qemu_global_mutex);
1664 }
1665
1666 static bool all_vcpus_paused(void)
1667 {
1668 CPUState *cpu;
1669
1670 CPU_FOREACH(cpu) {
1671 if (!cpu->stopped) {
1672 return false;
1673 }
1674 }
1675
1676 return true;
1677 }
1678
1679 void pause_all_vcpus(void)
1680 {
1681 CPUState *cpu;
1682
1683 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1684 CPU_FOREACH(cpu) {
1685 if (qemu_cpu_is_self(cpu)) {
1686 qemu_cpu_stop(cpu, true);
1687 } else {
1688 cpu->stop = true;
1689 qemu_cpu_kick(cpu);
1690 }
1691 }
1692
1693 while (!all_vcpus_paused()) {
1694 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1695 CPU_FOREACH(cpu) {
1696 qemu_cpu_kick(cpu);
1697 }
1698 }
1699 }
1700
1701 void cpu_resume(CPUState *cpu)
1702 {
1703 cpu->stop = false;
1704 cpu->stopped = false;
1705 qemu_cpu_kick(cpu);
1706 }
1707
1708 void resume_all_vcpus(void)
1709 {
1710 CPUState *cpu;
1711
1712 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1713 CPU_FOREACH(cpu) {
1714 cpu_resume(cpu);
1715 }
1716 }
1717
1718 void cpu_remove(CPUState *cpu)
1719 {
1720 cpu->stop = true;
1721 cpu->unplug = true;
1722 qemu_cpu_kick(cpu);
1723 }
1724
1725 void cpu_remove_sync(CPUState *cpu)
1726 {
1727 cpu_remove(cpu);
1728 while (cpu->created) {
1729 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1730 }
1731 }
1732
1733 /* For temporary buffers for forming a name */
1734 #define VCPU_THREAD_NAME_SIZE 16
1735
1736 static void qemu_tcg_init_vcpu(CPUState *cpu)
1737 {
1738 char thread_name[VCPU_THREAD_NAME_SIZE];
1739 static QemuCond *single_tcg_halt_cond;
1740 static QemuThread *single_tcg_cpu_thread;
1741 static int tcg_region_inited;
1742
1743 /*
1744 * Initialize TCG regions--once. Now is a good time, because:
1745 * (1) TCG's init context, prologue and target globals have been set up.
1746 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1747 * -accel flag is processed, so the check doesn't work then).
1748 */
1749 if (!tcg_region_inited) {
1750 tcg_region_inited = 1;
1751 tcg_region_init();
1752 }
1753
1754 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1755 cpu->thread = g_malloc0(sizeof(QemuThread));
1756 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1757 qemu_cond_init(cpu->halt_cond);
1758
1759 if (qemu_tcg_mttcg_enabled()) {
1760 /* create a thread per vCPU with TCG (MTTCG) */
1761 parallel_cpus = true;
1762 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1763 cpu->cpu_index);
1764
1765 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1766 cpu, QEMU_THREAD_JOINABLE);
1767
1768 } else {
1769 /* share a single thread for all cpus with TCG */
1770 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1771 qemu_thread_create(cpu->thread, thread_name,
1772 qemu_tcg_rr_cpu_thread_fn,
1773 cpu, QEMU_THREAD_JOINABLE);
1774
1775 single_tcg_halt_cond = cpu->halt_cond;
1776 single_tcg_cpu_thread = cpu->thread;
1777 }
1778 #ifdef _WIN32
1779 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1780 #endif
1781 while (!cpu->created) {
1782 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1783 }
1784 } else {
1785 /* For non-MTTCG cases we share the thread */
1786 cpu->thread = single_tcg_cpu_thread;
1787 cpu->halt_cond = single_tcg_halt_cond;
1788 }
1789 }
1790
1791 static void qemu_hax_start_vcpu(CPUState *cpu)
1792 {
1793 char thread_name[VCPU_THREAD_NAME_SIZE];
1794
1795 cpu->thread = g_malloc0(sizeof(QemuThread));
1796 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1797 qemu_cond_init(cpu->halt_cond);
1798
1799 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1800 cpu->cpu_index);
1801 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1802 cpu, QEMU_THREAD_JOINABLE);
1803 #ifdef _WIN32
1804 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1805 #endif
1806 while (!cpu->created) {
1807 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1808 }
1809 }
1810
1811 static void qemu_kvm_start_vcpu(CPUState *cpu)
1812 {
1813 char thread_name[VCPU_THREAD_NAME_SIZE];
1814
1815 cpu->thread = g_malloc0(sizeof(QemuThread));
1816 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1817 qemu_cond_init(cpu->halt_cond);
1818 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1819 cpu->cpu_index);
1820 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1821 cpu, QEMU_THREAD_JOINABLE);
1822 while (!cpu->created) {
1823 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1824 }
1825 }
1826
1827 static void qemu_hvf_start_vcpu(CPUState *cpu)
1828 {
1829 char thread_name[VCPU_THREAD_NAME_SIZE];
1830
1831 /* HVF currently does not support TCG, and only runs in
1832 * unrestricted-guest mode. */
1833 assert(hvf_enabled());
1834
1835 cpu->thread = g_malloc0(sizeof(QemuThread));
1836 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1837 qemu_cond_init(cpu->halt_cond);
1838
1839 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1840 cpu->cpu_index);
1841 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1842 cpu, QEMU_THREAD_JOINABLE);
1843 while (!cpu->created) {
1844 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1845 }
1846 }
1847
1848 static void qemu_dummy_start_vcpu(CPUState *cpu)
1849 {
1850 char thread_name[VCPU_THREAD_NAME_SIZE];
1851
1852 cpu->thread = g_malloc0(sizeof(QemuThread));
1853 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1854 qemu_cond_init(cpu->halt_cond);
1855 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1856 cpu->cpu_index);
1857 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1858 QEMU_THREAD_JOINABLE);
1859 while (!cpu->created) {
1860 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1861 }
1862 }
1863
1864 void qemu_init_vcpu(CPUState *cpu)
1865 {
1866 cpu->nr_cores = smp_cores;
1867 cpu->nr_threads = smp_threads;
1868 cpu->stopped = true;
1869
1870 if (!cpu->as) {
1871 /* If the target cpu hasn't set up any address spaces itself,
1872 * give it the default one.
1873 */
1874 cpu->num_ases = 1;
1875 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1876 }
1877
1878 if (kvm_enabled()) {
1879 qemu_kvm_start_vcpu(cpu);
1880 } else if (hax_enabled()) {
1881 qemu_hax_start_vcpu(cpu);
1882 } else if (hvf_enabled()) {
1883 qemu_hvf_start_vcpu(cpu);
1884 } else if (tcg_enabled()) {
1885 qemu_tcg_init_vcpu(cpu);
1886 } else {
1887 qemu_dummy_start_vcpu(cpu);
1888 }
1889 }
1890
1891 void cpu_stop_current(void)
1892 {
1893 if (current_cpu) {
1894 qemu_cpu_stop(current_cpu, true);
1895 }
1896 }
1897
1898 int vm_stop(RunState state)
1899 {
1900 if (qemu_in_vcpu_thread()) {
1901 qemu_system_vmstop_request_prepare();
1902 qemu_system_vmstop_request(state);
1903 /*
1904 * FIXME: should not return to device code in case
1905 * vm_stop() has been requested.
1906 */
1907 cpu_stop_current();
1908 return 0;
1909 }
1910
1911 return do_vm_stop(state);
1912 }
1913
1914 /**
1915 * Prepare for (re)starting the VM.
1916 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1917 * running or in case of an error condition), 0 otherwise.
1918 */
1919 int vm_prepare_start(void)
1920 {
1921 RunState requested;
1922 int res = 0;
1923
1924 qemu_vmstop_requested(&requested);
1925 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1926 return -1;
1927 }
1928
1929 /* Ensure that a STOP/RESUME pair of events is emitted if a
1930 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1931 * example, according to documentation is always followed by
1932 * the STOP event.
1933 */
1934 if (runstate_is_running()) {
1935 qapi_event_send_stop(&error_abort);
1936 res = -1;
1937 } else {
1938 replay_enable_events();
1939 cpu_enable_ticks();
1940 runstate_set(RUN_STATE_RUNNING);
1941 vm_state_notify(1, RUN_STATE_RUNNING);
1942 }
1943
1944 /* We are sending this now, but the CPUs will be resumed shortly later */
1945 qapi_event_send_resume(&error_abort);
1946 return res;
1947 }
1948
1949 void vm_start(void)
1950 {
1951 if (!vm_prepare_start()) {
1952 resume_all_vcpus();
1953 }
1954 }
1955
1956 /* does a state transition even if the VM is already stopped,
1957 current state is forgotten forever */
1958 int vm_stop_force_state(RunState state)
1959 {
1960 if (runstate_is_running()) {
1961 return vm_stop(state);
1962 } else {
1963 runstate_set(state);
1964
1965 bdrv_drain_all();
1966 /* Make sure to return an error if the flush in a previous vm_stop()
1967 * failed. */
1968 return bdrv_flush_all();
1969 }
1970 }
1971
1972 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1973 {
1974 /* XXX: implement xxx_cpu_list for targets that still miss it */
1975 #if defined(cpu_list)
1976 cpu_list(f, cpu_fprintf);
1977 #endif
1978 }
1979
1980 CpuInfoList *qmp_query_cpus(Error **errp)
1981 {
1982 MachineState *ms = MACHINE(qdev_get_machine());
1983 MachineClass *mc = MACHINE_GET_CLASS(ms);
1984 CpuInfoList *head = NULL, *cur_item = NULL;
1985 CPUState *cpu;
1986
1987 CPU_FOREACH(cpu) {
1988 CpuInfoList *info;
1989 #if defined(TARGET_I386)
1990 X86CPU *x86_cpu = X86_CPU(cpu);
1991 CPUX86State *env = &x86_cpu->env;
1992 #elif defined(TARGET_PPC)
1993 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1994 CPUPPCState *env = &ppc_cpu->env;
1995 #elif defined(TARGET_SPARC)
1996 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1997 CPUSPARCState *env = &sparc_cpu->env;
1998 #elif defined(TARGET_MIPS)
1999 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2000 CPUMIPSState *env = &mips_cpu->env;
2001 #elif defined(TARGET_TRICORE)
2002 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2003 CPUTriCoreState *env = &tricore_cpu->env;
2004 #endif
2005
2006 cpu_synchronize_state(cpu);
2007
2008 info = g_malloc0(sizeof(*info));
2009 info->value = g_malloc0(sizeof(*info->value));
2010 info->value->CPU = cpu->cpu_index;
2011 info->value->current = (cpu == first_cpu);
2012 info->value->halted = cpu->halted;
2013 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2014 info->value->thread_id = cpu->thread_id;
2015 #if defined(TARGET_I386)
2016 info->value->arch = CPU_INFO_ARCH_X86;
2017 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2018 #elif defined(TARGET_PPC)
2019 info->value->arch = CPU_INFO_ARCH_PPC;
2020 info->value->u.ppc.nip = env->nip;
2021 #elif defined(TARGET_SPARC)
2022 info->value->arch = CPU_INFO_ARCH_SPARC;
2023 info->value->u.q_sparc.pc = env->pc;
2024 info->value->u.q_sparc.npc = env->npc;
2025 #elif defined(TARGET_MIPS)
2026 info->value->arch = CPU_INFO_ARCH_MIPS;
2027 info->value->u.q_mips.PC = env->active_tc.PC;
2028 #elif defined(TARGET_TRICORE)
2029 info->value->arch = CPU_INFO_ARCH_TRICORE;
2030 info->value->u.tricore.PC = env->PC;
2031 #else
2032 info->value->arch = CPU_INFO_ARCH_OTHER;
2033 #endif
2034 info->value->has_props = !!mc->cpu_index_to_instance_props;
2035 if (info->value->has_props) {
2036 CpuInstanceProperties *props;
2037 props = g_malloc0(sizeof(*props));
2038 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2039 info->value->props = props;
2040 }
2041
2042 /* XXX: waiting for the qapi to support GSList */
2043 if (!cur_item) {
2044 head = cur_item = info;
2045 } else {
2046 cur_item->next = info;
2047 cur_item = info;
2048 }
2049 }
2050
2051 return head;
2052 }
2053
2054 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2055 bool has_cpu, int64_t cpu_index, Error **errp)
2056 {
2057 FILE *f;
2058 uint32_t l;
2059 CPUState *cpu;
2060 uint8_t buf[1024];
2061 int64_t orig_addr = addr, orig_size = size;
2062
2063 if (!has_cpu) {
2064 cpu_index = 0;
2065 }
2066
2067 cpu = qemu_get_cpu(cpu_index);
2068 if (cpu == NULL) {
2069 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2070 "a CPU number");
2071 return;
2072 }
2073
2074 f = fopen(filename, "wb");
2075 if (!f) {
2076 error_setg_file_open(errp, errno, filename);
2077 return;
2078 }
2079
2080 while (size != 0) {
2081 l = sizeof(buf);
2082 if (l > size)
2083 l = size;
2084 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2085 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2086 " specified", orig_addr, orig_size);
2087 goto exit;
2088 }
2089 if (fwrite(buf, 1, l, f) != l) {
2090 error_setg(errp, QERR_IO_ERROR);
2091 goto exit;
2092 }
2093 addr += l;
2094 size -= l;
2095 }
2096
2097 exit:
2098 fclose(f);
2099 }
2100
2101 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2102 Error **errp)
2103 {
2104 FILE *f;
2105 uint32_t l;
2106 uint8_t buf[1024];
2107
2108 f = fopen(filename, "wb");
2109 if (!f) {
2110 error_setg_file_open(errp, errno, filename);
2111 return;
2112 }
2113
2114 while (size != 0) {
2115 l = sizeof(buf);
2116 if (l > size)
2117 l = size;
2118 cpu_physical_memory_read(addr, buf, l);
2119 if (fwrite(buf, 1, l, f) != l) {
2120 error_setg(errp, QERR_IO_ERROR);
2121 goto exit;
2122 }
2123 addr += l;
2124 size -= l;
2125 }
2126
2127 exit:
2128 fclose(f);
2129 }
2130
2131 void qmp_inject_nmi(Error **errp)
2132 {
2133 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2134 }
2135
2136 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2137 {
2138 if (!use_icount) {
2139 return;
2140 }
2141
2142 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2143 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2144 if (icount_align_option) {
2145 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2146 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2147 } else {
2148 cpu_fprintf(f, "Max guest delay NA\n");
2149 cpu_fprintf(f, "Max guest advance NA\n");
2150 }
2151 }