]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
s390x/kvm: Handle bpb feature
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "sysemu/hvf.h"
41 #include "qmp-commands.h"
42 #include "exec/exec-all.h"
43
44 #include "qemu/thread.h"
45 #include "sysemu/cpus.h"
46 #include "sysemu/qtest.h"
47 #include "qemu/main-loop.h"
48 #include "qemu/bitmap.h"
49 #include "qemu/seqlock.h"
50 #include "tcg.h"
51 #include "qapi-event.h"
52 #include "hw/nmi.h"
53 #include "sysemu/replay.h"
54 #include "hw/boards.h"
55
56 #ifdef CONFIG_LINUX
57
58 #include <sys/prctl.h>
59
60 #ifndef PR_MCE_KILL
61 #define PR_MCE_KILL 33
62 #endif
63
64 #ifndef PR_MCE_KILL_SET
65 #define PR_MCE_KILL_SET 1
66 #endif
67
68 #ifndef PR_MCE_KILL_EARLY
69 #define PR_MCE_KILL_EARLY 1
70 #endif
71
72 #endif /* CONFIG_LINUX */
73
74 int64_t max_delay;
75 int64_t max_advance;
76
77 /* vcpu throttling controls */
78 static QEMUTimer *throttle_timer;
79 static unsigned int throttle_percentage;
80
81 #define CPU_THROTTLE_PCT_MIN 1
82 #define CPU_THROTTLE_PCT_MAX 99
83 #define CPU_THROTTLE_TIMESLICE_NS 10000000
84
85 bool cpu_is_stopped(CPUState *cpu)
86 {
87 return cpu->stopped || !runstate_is_running();
88 }
89
90 static bool cpu_thread_is_idle(CPUState *cpu)
91 {
92 if (cpu->stop || cpu->queued_work_first) {
93 return false;
94 }
95 if (cpu_is_stopped(cpu)) {
96 return true;
97 }
98 if (!cpu->halted || cpu_has_work(cpu) ||
99 kvm_halt_in_kernel()) {
100 return false;
101 }
102 return true;
103 }
104
105 static bool all_cpu_threads_idle(void)
106 {
107 CPUState *cpu;
108
109 CPU_FOREACH(cpu) {
110 if (!cpu_thread_is_idle(cpu)) {
111 return false;
112 }
113 }
114 return true;
115 }
116
117 /***********************************************************/
118 /* guest cycle counter */
119
120 /* Protected by TimersState seqlock */
121
122 static bool icount_sleep = true;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
127
128 typedef struct TimersState {
129 /* Protected by BQL. */
130 int64_t cpu_ticks_prev;
131 int64_t cpu_ticks_offset;
132
133 /* cpu_clock_offset can be read out of BQL, so protect it with
134 * this lock.
135 */
136 QemuSeqLock vm_clock_seqlock;
137 int64_t cpu_clock_offset;
138 int32_t cpu_ticks_enabled;
139 int64_t dummy;
140
141 /* Compensate for varying guest execution speed. */
142 int64_t qemu_icount_bias;
143 /* Only written by TCG thread */
144 int64_t qemu_icount;
145 /* for adjusting icount */
146 int64_t vm_clock_warp_start;
147 QEMUTimer *icount_rt_timer;
148 QEMUTimer *icount_vm_timer;
149 QEMUTimer *icount_warp_timer;
150 } TimersState;
151
152 static TimersState timers_state;
153 bool mttcg_enabled;
154
155 /*
156 * We default to false if we know other options have been enabled
157 * which are currently incompatible with MTTCG. Otherwise when each
158 * guest (target) has been updated to support:
159 * - atomic instructions
160 * - memory ordering primitives (barriers)
161 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
162 *
163 * Once a guest architecture has been converted to the new primitives
164 * there are two remaining limitations to check.
165 *
166 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
167 * - The host must have a stronger memory order than the guest
168 *
169 * It may be possible in future to support strong guests on weak hosts
170 * but that will require tagging all load/stores in a guest with their
171 * implicit memory order requirements which would likely slow things
172 * down a lot.
173 */
174
175 static bool check_tcg_memory_orders_compatible(void)
176 {
177 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
178 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
179 #else
180 return false;
181 #endif
182 }
183
184 static bool default_mttcg_enabled(void)
185 {
186 if (use_icount || TCG_OVERSIZED_GUEST) {
187 return false;
188 } else {
189 #ifdef TARGET_SUPPORTS_MTTCG
190 return check_tcg_memory_orders_compatible();
191 #else
192 return false;
193 #endif
194 }
195 }
196
197 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
198 {
199 const char *t = qemu_opt_get(opts, "thread");
200 if (t) {
201 if (strcmp(t, "multi") == 0) {
202 if (TCG_OVERSIZED_GUEST) {
203 error_setg(errp, "No MTTCG when guest word size > hosts");
204 } else if (use_icount) {
205 error_setg(errp, "No MTTCG when icount is enabled");
206 } else {
207 #ifndef TARGET_SUPPORTS_MTTCG
208 error_report("Guest not yet converted to MTTCG - "
209 "you may get unexpected results");
210 #endif
211 if (!check_tcg_memory_orders_compatible()) {
212 error_report("Guest expects a stronger memory ordering "
213 "than the host provides");
214 error_printf("This may cause strange/hard to debug errors\n");
215 }
216 mttcg_enabled = true;
217 }
218 } else if (strcmp(t, "single") == 0) {
219 mttcg_enabled = false;
220 } else {
221 error_setg(errp, "Invalid 'thread' setting %s", t);
222 }
223 } else {
224 mttcg_enabled = default_mttcg_enabled();
225 }
226 }
227
228 /* The current number of executed instructions is based on what we
229 * originally budgeted minus the current state of the decrementing
230 * icount counters in extra/u16.low.
231 */
232 static int64_t cpu_get_icount_executed(CPUState *cpu)
233 {
234 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
235 }
236
237 /*
238 * Update the global shared timer_state.qemu_icount to take into
239 * account executed instructions. This is done by the TCG vCPU
240 * thread so the main-loop can see time has moved forward.
241 */
242 void cpu_update_icount(CPUState *cpu)
243 {
244 int64_t executed = cpu_get_icount_executed(cpu);
245 cpu->icount_budget -= executed;
246
247 #ifdef CONFIG_ATOMIC64
248 atomic_set__nocheck(&timers_state.qemu_icount,
249 atomic_read__nocheck(&timers_state.qemu_icount) +
250 executed);
251 #else /* FIXME: we need 64bit atomics to do this safely */
252 timers_state.qemu_icount += executed;
253 #endif
254 }
255
256 int64_t cpu_get_icount_raw(void)
257 {
258 CPUState *cpu = current_cpu;
259
260 if (cpu && cpu->running) {
261 if (!cpu->can_do_io) {
262 fprintf(stderr, "Bad icount read\n");
263 exit(1);
264 }
265 /* Take into account what has run */
266 cpu_update_icount(cpu);
267 }
268 #ifdef CONFIG_ATOMIC64
269 return atomic_read__nocheck(&timers_state.qemu_icount);
270 #else /* FIXME: we need 64bit atomics to do this safely */
271 return timers_state.qemu_icount;
272 #endif
273 }
274
275 /* Return the virtual CPU time, based on the instruction counter. */
276 static int64_t cpu_get_icount_locked(void)
277 {
278 int64_t icount = cpu_get_icount_raw();
279 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
280 }
281
282 int64_t cpu_get_icount(void)
283 {
284 int64_t icount;
285 unsigned start;
286
287 do {
288 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
289 icount = cpu_get_icount_locked();
290 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
291
292 return icount;
293 }
294
295 int64_t cpu_icount_to_ns(int64_t icount)
296 {
297 return icount << icount_time_shift;
298 }
299
300 /* return the time elapsed in VM between vm_start and vm_stop. Unless
301 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
302 * counter.
303 *
304 * Caller must hold the BQL
305 */
306 int64_t cpu_get_ticks(void)
307 {
308 int64_t ticks;
309
310 if (use_icount) {
311 return cpu_get_icount();
312 }
313
314 ticks = timers_state.cpu_ticks_offset;
315 if (timers_state.cpu_ticks_enabled) {
316 ticks += cpu_get_host_ticks();
317 }
318
319 if (timers_state.cpu_ticks_prev > ticks) {
320 /* Note: non increasing ticks may happen if the host uses
321 software suspend */
322 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
323 ticks = timers_state.cpu_ticks_prev;
324 }
325
326 timers_state.cpu_ticks_prev = ticks;
327 return ticks;
328 }
329
330 static int64_t cpu_get_clock_locked(void)
331 {
332 int64_t time;
333
334 time = timers_state.cpu_clock_offset;
335 if (timers_state.cpu_ticks_enabled) {
336 time += get_clock();
337 }
338
339 return time;
340 }
341
342 /* Return the monotonic time elapsed in VM, i.e.,
343 * the time between vm_start and vm_stop
344 */
345 int64_t cpu_get_clock(void)
346 {
347 int64_t ti;
348 unsigned start;
349
350 do {
351 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
352 ti = cpu_get_clock_locked();
353 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
354
355 return ti;
356 }
357
358 /* enable cpu_get_ticks()
359 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
360 */
361 void cpu_enable_ticks(void)
362 {
363 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
365 if (!timers_state.cpu_ticks_enabled) {
366 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
367 timers_state.cpu_clock_offset -= get_clock();
368 timers_state.cpu_ticks_enabled = 1;
369 }
370 seqlock_write_end(&timers_state.vm_clock_seqlock);
371 }
372
373 /* disable cpu_get_ticks() : the clock is stopped. You must not call
374 * cpu_get_ticks() after that.
375 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
376 */
377 void cpu_disable_ticks(void)
378 {
379 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
380 seqlock_write_begin(&timers_state.vm_clock_seqlock);
381 if (timers_state.cpu_ticks_enabled) {
382 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
383 timers_state.cpu_clock_offset = cpu_get_clock_locked();
384 timers_state.cpu_ticks_enabled = 0;
385 }
386 seqlock_write_end(&timers_state.vm_clock_seqlock);
387 }
388
389 /* Correlation between real and virtual time is always going to be
390 fairly approximate, so ignore small variation.
391 When the guest is idle real and virtual time will be aligned in
392 the IO wait loop. */
393 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
394
395 static void icount_adjust(void)
396 {
397 int64_t cur_time;
398 int64_t cur_icount;
399 int64_t delta;
400
401 /* Protected by TimersState mutex. */
402 static int64_t last_delta;
403
404 /* If the VM is not running, then do nothing. */
405 if (!runstate_is_running()) {
406 return;
407 }
408
409 seqlock_write_begin(&timers_state.vm_clock_seqlock);
410 cur_time = cpu_get_clock_locked();
411 cur_icount = cpu_get_icount_locked();
412
413 delta = cur_icount - cur_time;
414 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
415 if (delta > 0
416 && last_delta + ICOUNT_WOBBLE < delta * 2
417 && icount_time_shift > 0) {
418 /* The guest is getting too far ahead. Slow time down. */
419 icount_time_shift--;
420 }
421 if (delta < 0
422 && last_delta - ICOUNT_WOBBLE > delta * 2
423 && icount_time_shift < MAX_ICOUNT_SHIFT) {
424 /* The guest is getting too far behind. Speed time up. */
425 icount_time_shift++;
426 }
427 last_delta = delta;
428 timers_state.qemu_icount_bias = cur_icount
429 - (timers_state.qemu_icount << icount_time_shift);
430 seqlock_write_end(&timers_state.vm_clock_seqlock);
431 }
432
433 static void icount_adjust_rt(void *opaque)
434 {
435 timer_mod(timers_state.icount_rt_timer,
436 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
437 icount_adjust();
438 }
439
440 static void icount_adjust_vm(void *opaque)
441 {
442 timer_mod(timers_state.icount_vm_timer,
443 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
444 NANOSECONDS_PER_SECOND / 10);
445 icount_adjust();
446 }
447
448 static int64_t qemu_icount_round(int64_t count)
449 {
450 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
451 }
452
453 static void icount_warp_rt(void)
454 {
455 unsigned seq;
456 int64_t warp_start;
457
458 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
459 * changes from -1 to another value, so the race here is okay.
460 */
461 do {
462 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
463 warp_start = timers_state.vm_clock_warp_start;
464 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
465
466 if (warp_start == -1) {
467 return;
468 }
469
470 seqlock_write_begin(&timers_state.vm_clock_seqlock);
471 if (runstate_is_running()) {
472 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
473 cpu_get_clock_locked());
474 int64_t warp_delta;
475
476 warp_delta = clock - timers_state.vm_clock_warp_start;
477 if (use_icount == 2) {
478 /*
479 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
480 * far ahead of real time.
481 */
482 int64_t cur_icount = cpu_get_icount_locked();
483 int64_t delta = clock - cur_icount;
484 warp_delta = MIN(warp_delta, delta);
485 }
486 timers_state.qemu_icount_bias += warp_delta;
487 }
488 timers_state.vm_clock_warp_start = -1;
489 seqlock_write_end(&timers_state.vm_clock_seqlock);
490
491 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
493 }
494 }
495
496 static void icount_timer_cb(void *opaque)
497 {
498 /* No need for a checkpoint because the timer already synchronizes
499 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
500 */
501 icount_warp_rt();
502 }
503
504 void qtest_clock_warp(int64_t dest)
505 {
506 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
507 AioContext *aio_context;
508 assert(qtest_enabled());
509 aio_context = qemu_get_aio_context();
510 while (clock < dest) {
511 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
512 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
513
514 seqlock_write_begin(&timers_state.vm_clock_seqlock);
515 timers_state.qemu_icount_bias += warp;
516 seqlock_write_end(&timers_state.vm_clock_seqlock);
517
518 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
519 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
520 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
521 }
522 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
523 }
524
525 void qemu_start_warp_timer(void)
526 {
527 int64_t clock;
528 int64_t deadline;
529
530 if (!use_icount) {
531 return;
532 }
533
534 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
535 * do not fire, so computing the deadline does not make sense.
536 */
537 if (!runstate_is_running()) {
538 return;
539 }
540
541 /* warp clock deterministically in record/replay mode */
542 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
543 return;
544 }
545
546 if (!all_cpu_threads_idle()) {
547 return;
548 }
549
550 if (qtest_enabled()) {
551 /* When testing, qtest commands advance icount. */
552 return;
553 }
554
555 /* We want to use the earliest deadline from ALL vm_clocks */
556 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
557 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
558 if (deadline < 0) {
559 static bool notified;
560 if (!icount_sleep && !notified) {
561 warn_report("icount sleep disabled and no active timers");
562 notified = true;
563 }
564 return;
565 }
566
567 if (deadline > 0) {
568 /*
569 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
570 * sleep. Otherwise, the CPU might be waiting for a future timer
571 * interrupt to wake it up, but the interrupt never comes because
572 * the vCPU isn't running any insns and thus doesn't advance the
573 * QEMU_CLOCK_VIRTUAL.
574 */
575 if (!icount_sleep) {
576 /*
577 * We never let VCPUs sleep in no sleep icount mode.
578 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
579 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
580 * It is useful when we want a deterministic execution time,
581 * isolated from host latencies.
582 */
583 seqlock_write_begin(&timers_state.vm_clock_seqlock);
584 timers_state.qemu_icount_bias += deadline;
585 seqlock_write_end(&timers_state.vm_clock_seqlock);
586 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
587 } else {
588 /*
589 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
590 * "real" time, (related to the time left until the next event) has
591 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
592 * This avoids that the warps are visible externally; for example,
593 * you will not be sending network packets continuously instead of
594 * every 100ms.
595 */
596 seqlock_write_begin(&timers_state.vm_clock_seqlock);
597 if (timers_state.vm_clock_warp_start == -1
598 || timers_state.vm_clock_warp_start > clock) {
599 timers_state.vm_clock_warp_start = clock;
600 }
601 seqlock_write_end(&timers_state.vm_clock_seqlock);
602 timer_mod_anticipate(timers_state.icount_warp_timer,
603 clock + deadline);
604 }
605 } else if (deadline == 0) {
606 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
607 }
608 }
609
610 static void qemu_account_warp_timer(void)
611 {
612 if (!use_icount || !icount_sleep) {
613 return;
614 }
615
616 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
617 * do not fire, so computing the deadline does not make sense.
618 */
619 if (!runstate_is_running()) {
620 return;
621 }
622
623 /* warp clock deterministically in record/replay mode */
624 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
625 return;
626 }
627
628 timer_del(timers_state.icount_warp_timer);
629 icount_warp_rt();
630 }
631
632 static bool icount_state_needed(void *opaque)
633 {
634 return use_icount;
635 }
636
637 static bool warp_timer_state_needed(void *opaque)
638 {
639 TimersState *s = opaque;
640 return s->icount_warp_timer != NULL;
641 }
642
643 static bool adjust_timers_state_needed(void *opaque)
644 {
645 TimersState *s = opaque;
646 return s->icount_rt_timer != NULL;
647 }
648
649 /*
650 * Subsection for warp timer migration is optional, because may not be created
651 */
652 static const VMStateDescription icount_vmstate_warp_timer = {
653 .name = "timer/icount/warp_timer",
654 .version_id = 1,
655 .minimum_version_id = 1,
656 .needed = warp_timer_state_needed,
657 .fields = (VMStateField[]) {
658 VMSTATE_INT64(vm_clock_warp_start, TimersState),
659 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
660 VMSTATE_END_OF_LIST()
661 }
662 };
663
664 static const VMStateDescription icount_vmstate_adjust_timers = {
665 .name = "timer/icount/timers",
666 .version_id = 1,
667 .minimum_version_id = 1,
668 .needed = adjust_timers_state_needed,
669 .fields = (VMStateField[]) {
670 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
671 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
672 VMSTATE_END_OF_LIST()
673 }
674 };
675
676 /*
677 * This is a subsection for icount migration.
678 */
679 static const VMStateDescription icount_vmstate_timers = {
680 .name = "timer/icount",
681 .version_id = 1,
682 .minimum_version_id = 1,
683 .needed = icount_state_needed,
684 .fields = (VMStateField[]) {
685 VMSTATE_INT64(qemu_icount_bias, TimersState),
686 VMSTATE_INT64(qemu_icount, TimersState),
687 VMSTATE_END_OF_LIST()
688 },
689 .subsections = (const VMStateDescription*[]) {
690 &icount_vmstate_warp_timer,
691 &icount_vmstate_adjust_timers,
692 NULL
693 }
694 };
695
696 static const VMStateDescription vmstate_timers = {
697 .name = "timer",
698 .version_id = 2,
699 .minimum_version_id = 1,
700 .fields = (VMStateField[]) {
701 VMSTATE_INT64(cpu_ticks_offset, TimersState),
702 VMSTATE_INT64(dummy, TimersState),
703 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
704 VMSTATE_END_OF_LIST()
705 },
706 .subsections = (const VMStateDescription*[]) {
707 &icount_vmstate_timers,
708 NULL
709 }
710 };
711
712 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
713 {
714 double pct;
715 double throttle_ratio;
716 long sleeptime_ns;
717
718 if (!cpu_throttle_get_percentage()) {
719 return;
720 }
721
722 pct = (double)cpu_throttle_get_percentage()/100;
723 throttle_ratio = pct / (1 - pct);
724 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
725
726 qemu_mutex_unlock_iothread();
727 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
728 qemu_mutex_lock_iothread();
729 atomic_set(&cpu->throttle_thread_scheduled, 0);
730 }
731
732 static void cpu_throttle_timer_tick(void *opaque)
733 {
734 CPUState *cpu;
735 double pct;
736
737 /* Stop the timer if needed */
738 if (!cpu_throttle_get_percentage()) {
739 return;
740 }
741 CPU_FOREACH(cpu) {
742 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
743 async_run_on_cpu(cpu, cpu_throttle_thread,
744 RUN_ON_CPU_NULL);
745 }
746 }
747
748 pct = (double)cpu_throttle_get_percentage()/100;
749 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
750 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
751 }
752
753 void cpu_throttle_set(int new_throttle_pct)
754 {
755 /* Ensure throttle percentage is within valid range */
756 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
757 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
758
759 atomic_set(&throttle_percentage, new_throttle_pct);
760
761 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
762 CPU_THROTTLE_TIMESLICE_NS);
763 }
764
765 void cpu_throttle_stop(void)
766 {
767 atomic_set(&throttle_percentage, 0);
768 }
769
770 bool cpu_throttle_active(void)
771 {
772 return (cpu_throttle_get_percentage() != 0);
773 }
774
775 int cpu_throttle_get_percentage(void)
776 {
777 return atomic_read(&throttle_percentage);
778 }
779
780 void cpu_ticks_init(void)
781 {
782 seqlock_init(&timers_state.vm_clock_seqlock);
783 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
784 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
785 cpu_throttle_timer_tick, NULL);
786 }
787
788 void configure_icount(QemuOpts *opts, Error **errp)
789 {
790 const char *option;
791 char *rem_str = NULL;
792
793 option = qemu_opt_get(opts, "shift");
794 if (!option) {
795 if (qemu_opt_get(opts, "align") != NULL) {
796 error_setg(errp, "Please specify shift option when using align");
797 }
798 return;
799 }
800
801 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
802 if (icount_sleep) {
803 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
804 icount_timer_cb, NULL);
805 }
806
807 icount_align_option = qemu_opt_get_bool(opts, "align", false);
808
809 if (icount_align_option && !icount_sleep) {
810 error_setg(errp, "align=on and sleep=off are incompatible");
811 }
812 if (strcmp(option, "auto") != 0) {
813 errno = 0;
814 icount_time_shift = strtol(option, &rem_str, 0);
815 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
816 error_setg(errp, "icount: Invalid shift value");
817 }
818 use_icount = 1;
819 return;
820 } else if (icount_align_option) {
821 error_setg(errp, "shift=auto and align=on are incompatible");
822 } else if (!icount_sleep) {
823 error_setg(errp, "shift=auto and sleep=off are incompatible");
824 }
825
826 use_icount = 2;
827
828 /* 125MIPS seems a reasonable initial guess at the guest speed.
829 It will be corrected fairly quickly anyway. */
830 icount_time_shift = 3;
831
832 /* Have both realtime and virtual time triggers for speed adjustment.
833 The realtime trigger catches emulated time passing too slowly,
834 the virtual time trigger catches emulated time passing too fast.
835 Realtime triggers occur even when idle, so use them less frequently
836 than VM triggers. */
837 timers_state.vm_clock_warp_start = -1;
838 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
839 icount_adjust_rt, NULL);
840 timer_mod(timers_state.icount_rt_timer,
841 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
842 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
843 icount_adjust_vm, NULL);
844 timer_mod(timers_state.icount_vm_timer,
845 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
846 NANOSECONDS_PER_SECOND / 10);
847 }
848
849 /***********************************************************/
850 /* TCG vCPU kick timer
851 *
852 * The kick timer is responsible for moving single threaded vCPU
853 * emulation on to the next vCPU. If more than one vCPU is running a
854 * timer event with force a cpu->exit so the next vCPU can get
855 * scheduled.
856 *
857 * The timer is removed if all vCPUs are idle and restarted again once
858 * idleness is complete.
859 */
860
861 static QEMUTimer *tcg_kick_vcpu_timer;
862 static CPUState *tcg_current_rr_cpu;
863
864 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
865
866 static inline int64_t qemu_tcg_next_kick(void)
867 {
868 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
869 }
870
871 /* Kick the currently round-robin scheduled vCPU */
872 static void qemu_cpu_kick_rr_cpu(void)
873 {
874 CPUState *cpu;
875 do {
876 cpu = atomic_mb_read(&tcg_current_rr_cpu);
877 if (cpu) {
878 cpu_exit(cpu);
879 }
880 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
881 }
882
883 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
884 {
885 }
886
887 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
888 {
889 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
890 qemu_notify_event();
891 return;
892 }
893
894 if (!qemu_in_vcpu_thread() && first_cpu) {
895 /* qemu_cpu_kick is not enough to kick a halted CPU out of
896 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
897 * causes cpu_thread_is_idle to return false. This way,
898 * handle_icount_deadline can run.
899 */
900 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
901 }
902 }
903
904 static void kick_tcg_thread(void *opaque)
905 {
906 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
907 qemu_cpu_kick_rr_cpu();
908 }
909
910 static void start_tcg_kick_timer(void)
911 {
912 assert(!mttcg_enabled);
913 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
914 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
915 kick_tcg_thread, NULL);
916 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
917 }
918 }
919
920 static void stop_tcg_kick_timer(void)
921 {
922 assert(!mttcg_enabled);
923 if (tcg_kick_vcpu_timer) {
924 timer_del(tcg_kick_vcpu_timer);
925 tcg_kick_vcpu_timer = NULL;
926 }
927 }
928
929 /***********************************************************/
930 void hw_error(const char *fmt, ...)
931 {
932 va_list ap;
933 CPUState *cpu;
934
935 va_start(ap, fmt);
936 fprintf(stderr, "qemu: hardware error: ");
937 vfprintf(stderr, fmt, ap);
938 fprintf(stderr, "\n");
939 CPU_FOREACH(cpu) {
940 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
941 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
942 }
943 va_end(ap);
944 abort();
945 }
946
947 void cpu_synchronize_all_states(void)
948 {
949 CPUState *cpu;
950
951 CPU_FOREACH(cpu) {
952 cpu_synchronize_state(cpu);
953 /* TODO: move to cpu_synchronize_state() */
954 if (hvf_enabled()) {
955 hvf_cpu_synchronize_state(cpu);
956 }
957 }
958 }
959
960 void cpu_synchronize_all_post_reset(void)
961 {
962 CPUState *cpu;
963
964 CPU_FOREACH(cpu) {
965 cpu_synchronize_post_reset(cpu);
966 /* TODO: move to cpu_synchronize_post_reset() */
967 if (hvf_enabled()) {
968 hvf_cpu_synchronize_post_reset(cpu);
969 }
970 }
971 }
972
973 void cpu_synchronize_all_post_init(void)
974 {
975 CPUState *cpu;
976
977 CPU_FOREACH(cpu) {
978 cpu_synchronize_post_init(cpu);
979 /* TODO: move to cpu_synchronize_post_init() */
980 if (hvf_enabled()) {
981 hvf_cpu_synchronize_post_init(cpu);
982 }
983 }
984 }
985
986 void cpu_synchronize_all_pre_loadvm(void)
987 {
988 CPUState *cpu;
989
990 CPU_FOREACH(cpu) {
991 cpu_synchronize_pre_loadvm(cpu);
992 }
993 }
994
995 static int do_vm_stop(RunState state)
996 {
997 int ret = 0;
998
999 if (runstate_is_running()) {
1000 cpu_disable_ticks();
1001 pause_all_vcpus();
1002 runstate_set(state);
1003 vm_state_notify(0, state);
1004 qapi_event_send_stop(&error_abort);
1005 }
1006
1007 bdrv_drain_all();
1008 replay_disable_events();
1009 ret = bdrv_flush_all();
1010
1011 return ret;
1012 }
1013
1014 static bool cpu_can_run(CPUState *cpu)
1015 {
1016 if (cpu->stop) {
1017 return false;
1018 }
1019 if (cpu_is_stopped(cpu)) {
1020 return false;
1021 }
1022 return true;
1023 }
1024
1025 static void cpu_handle_guest_debug(CPUState *cpu)
1026 {
1027 gdb_set_stop_cpu(cpu);
1028 qemu_system_debug_request();
1029 cpu->stopped = true;
1030 }
1031
1032 #ifdef CONFIG_LINUX
1033 static void sigbus_reraise(void)
1034 {
1035 sigset_t set;
1036 struct sigaction action;
1037
1038 memset(&action, 0, sizeof(action));
1039 action.sa_handler = SIG_DFL;
1040 if (!sigaction(SIGBUS, &action, NULL)) {
1041 raise(SIGBUS);
1042 sigemptyset(&set);
1043 sigaddset(&set, SIGBUS);
1044 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1045 }
1046 perror("Failed to re-raise SIGBUS!\n");
1047 abort();
1048 }
1049
1050 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1051 {
1052 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1053 sigbus_reraise();
1054 }
1055
1056 if (current_cpu) {
1057 /* Called asynchronously in VCPU thread. */
1058 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1059 sigbus_reraise();
1060 }
1061 } else {
1062 /* Called synchronously (via signalfd) in main thread. */
1063 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1064 sigbus_reraise();
1065 }
1066 }
1067 }
1068
1069 static void qemu_init_sigbus(void)
1070 {
1071 struct sigaction action;
1072
1073 memset(&action, 0, sizeof(action));
1074 action.sa_flags = SA_SIGINFO;
1075 action.sa_sigaction = sigbus_handler;
1076 sigaction(SIGBUS, &action, NULL);
1077
1078 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1079 }
1080 #else /* !CONFIG_LINUX */
1081 static void qemu_init_sigbus(void)
1082 {
1083 }
1084 #endif /* !CONFIG_LINUX */
1085
1086 static QemuMutex qemu_global_mutex;
1087
1088 static QemuThread io_thread;
1089
1090 /* cpu creation */
1091 static QemuCond qemu_cpu_cond;
1092 /* system init */
1093 static QemuCond qemu_pause_cond;
1094
1095 void qemu_init_cpu_loop(void)
1096 {
1097 qemu_init_sigbus();
1098 qemu_cond_init(&qemu_cpu_cond);
1099 qemu_cond_init(&qemu_pause_cond);
1100 qemu_mutex_init(&qemu_global_mutex);
1101
1102 qemu_thread_get_self(&io_thread);
1103 }
1104
1105 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1106 {
1107 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1108 }
1109
1110 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1111 {
1112 if (kvm_destroy_vcpu(cpu) < 0) {
1113 error_report("kvm_destroy_vcpu failed");
1114 exit(EXIT_FAILURE);
1115 }
1116 }
1117
1118 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1119 {
1120 }
1121
1122 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1123 {
1124 g_assert(qemu_cpu_is_self(cpu));
1125 cpu->stop = false;
1126 cpu->stopped = true;
1127 if (exit) {
1128 cpu_exit(cpu);
1129 }
1130 qemu_cond_broadcast(&qemu_pause_cond);
1131 }
1132
1133 static void qemu_wait_io_event_common(CPUState *cpu)
1134 {
1135 atomic_mb_set(&cpu->thread_kicked, false);
1136 if (cpu->stop) {
1137 qemu_cpu_stop(cpu, false);
1138 }
1139 process_queued_cpu_work(cpu);
1140 }
1141
1142 static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
1143 {
1144 while (all_cpu_threads_idle()) {
1145 stop_tcg_kick_timer();
1146 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1147 }
1148
1149 start_tcg_kick_timer();
1150
1151 qemu_wait_io_event_common(cpu);
1152 }
1153
1154 static void qemu_wait_io_event(CPUState *cpu)
1155 {
1156 while (cpu_thread_is_idle(cpu)) {
1157 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1158 }
1159
1160 #ifdef _WIN32
1161 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1162 if (!tcg_enabled()) {
1163 SleepEx(0, TRUE);
1164 }
1165 #endif
1166 qemu_wait_io_event_common(cpu);
1167 }
1168
1169 static void *qemu_kvm_cpu_thread_fn(void *arg)
1170 {
1171 CPUState *cpu = arg;
1172 int r;
1173
1174 rcu_register_thread();
1175
1176 qemu_mutex_lock_iothread();
1177 qemu_thread_get_self(cpu->thread);
1178 cpu->thread_id = qemu_get_thread_id();
1179 cpu->can_do_io = 1;
1180 current_cpu = cpu;
1181
1182 r = kvm_init_vcpu(cpu);
1183 if (r < 0) {
1184 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1185 exit(1);
1186 }
1187
1188 kvm_init_cpu_signals(cpu);
1189
1190 /* signal CPU creation */
1191 cpu->created = true;
1192 qemu_cond_signal(&qemu_cpu_cond);
1193
1194 do {
1195 if (cpu_can_run(cpu)) {
1196 r = kvm_cpu_exec(cpu);
1197 if (r == EXCP_DEBUG) {
1198 cpu_handle_guest_debug(cpu);
1199 }
1200 }
1201 qemu_wait_io_event(cpu);
1202 } while (!cpu->unplug || cpu_can_run(cpu));
1203
1204 qemu_kvm_destroy_vcpu(cpu);
1205 cpu->created = false;
1206 qemu_cond_signal(&qemu_cpu_cond);
1207 qemu_mutex_unlock_iothread();
1208 return NULL;
1209 }
1210
1211 static void *qemu_dummy_cpu_thread_fn(void *arg)
1212 {
1213 #ifdef _WIN32
1214 fprintf(stderr, "qtest is not supported under Windows\n");
1215 exit(1);
1216 #else
1217 CPUState *cpu = arg;
1218 sigset_t waitset;
1219 int r;
1220
1221 rcu_register_thread();
1222
1223 qemu_mutex_lock_iothread();
1224 qemu_thread_get_self(cpu->thread);
1225 cpu->thread_id = qemu_get_thread_id();
1226 cpu->can_do_io = 1;
1227 current_cpu = cpu;
1228
1229 sigemptyset(&waitset);
1230 sigaddset(&waitset, SIG_IPI);
1231
1232 /* signal CPU creation */
1233 cpu->created = true;
1234 qemu_cond_signal(&qemu_cpu_cond);
1235
1236 while (1) {
1237 qemu_mutex_unlock_iothread();
1238 do {
1239 int sig;
1240 r = sigwait(&waitset, &sig);
1241 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1242 if (r == -1) {
1243 perror("sigwait");
1244 exit(1);
1245 }
1246 qemu_mutex_lock_iothread();
1247 qemu_wait_io_event(cpu);
1248 }
1249
1250 return NULL;
1251 #endif
1252 }
1253
1254 static int64_t tcg_get_icount_limit(void)
1255 {
1256 int64_t deadline;
1257
1258 if (replay_mode != REPLAY_MODE_PLAY) {
1259 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1260
1261 /* Maintain prior (possibly buggy) behaviour where if no deadline
1262 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1263 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1264 * nanoseconds.
1265 */
1266 if ((deadline < 0) || (deadline > INT32_MAX)) {
1267 deadline = INT32_MAX;
1268 }
1269
1270 return qemu_icount_round(deadline);
1271 } else {
1272 return replay_get_instructions();
1273 }
1274 }
1275
1276 static void handle_icount_deadline(void)
1277 {
1278 assert(qemu_in_vcpu_thread());
1279 if (use_icount) {
1280 int64_t deadline =
1281 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1282
1283 if (deadline == 0) {
1284 /* Wake up other AioContexts. */
1285 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1286 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1287 }
1288 }
1289 }
1290
1291 static void prepare_icount_for_run(CPUState *cpu)
1292 {
1293 if (use_icount) {
1294 int insns_left;
1295
1296 /* These should always be cleared by process_icount_data after
1297 * each vCPU execution. However u16.high can be raised
1298 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1299 */
1300 g_assert(cpu->icount_decr.u16.low == 0);
1301 g_assert(cpu->icount_extra == 0);
1302
1303 cpu->icount_budget = tcg_get_icount_limit();
1304 insns_left = MIN(0xffff, cpu->icount_budget);
1305 cpu->icount_decr.u16.low = insns_left;
1306 cpu->icount_extra = cpu->icount_budget - insns_left;
1307 }
1308 }
1309
1310 static void process_icount_data(CPUState *cpu)
1311 {
1312 if (use_icount) {
1313 /* Account for executed instructions */
1314 cpu_update_icount(cpu);
1315
1316 /* Reset the counters */
1317 cpu->icount_decr.u16.low = 0;
1318 cpu->icount_extra = 0;
1319 cpu->icount_budget = 0;
1320
1321 replay_account_executed_instructions();
1322 }
1323 }
1324
1325
1326 static int tcg_cpu_exec(CPUState *cpu)
1327 {
1328 int ret;
1329 #ifdef CONFIG_PROFILER
1330 int64_t ti;
1331 #endif
1332
1333 #ifdef CONFIG_PROFILER
1334 ti = profile_getclock();
1335 #endif
1336 qemu_mutex_unlock_iothread();
1337 cpu_exec_start(cpu);
1338 ret = cpu_exec(cpu);
1339 cpu_exec_end(cpu);
1340 qemu_mutex_lock_iothread();
1341 #ifdef CONFIG_PROFILER
1342 tcg_time += profile_getclock() - ti;
1343 #endif
1344 return ret;
1345 }
1346
1347 /* Destroy any remaining vCPUs which have been unplugged and have
1348 * finished running
1349 */
1350 static void deal_with_unplugged_cpus(void)
1351 {
1352 CPUState *cpu;
1353
1354 CPU_FOREACH(cpu) {
1355 if (cpu->unplug && !cpu_can_run(cpu)) {
1356 qemu_tcg_destroy_vcpu(cpu);
1357 cpu->created = false;
1358 qemu_cond_signal(&qemu_cpu_cond);
1359 break;
1360 }
1361 }
1362 }
1363
1364 /* Single-threaded TCG
1365 *
1366 * In the single-threaded case each vCPU is simulated in turn. If
1367 * there is more than a single vCPU we create a simple timer to kick
1368 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1369 * This is done explicitly rather than relying on side-effects
1370 * elsewhere.
1371 */
1372
1373 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1374 {
1375 CPUState *cpu = arg;
1376
1377 rcu_register_thread();
1378 tcg_register_thread();
1379
1380 qemu_mutex_lock_iothread();
1381 qemu_thread_get_self(cpu->thread);
1382
1383 CPU_FOREACH(cpu) {
1384 cpu->thread_id = qemu_get_thread_id();
1385 cpu->created = true;
1386 cpu->can_do_io = 1;
1387 }
1388 qemu_cond_signal(&qemu_cpu_cond);
1389
1390 /* wait for initial kick-off after machine start */
1391 while (first_cpu->stopped) {
1392 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1393
1394 /* process any pending work */
1395 CPU_FOREACH(cpu) {
1396 current_cpu = cpu;
1397 qemu_wait_io_event_common(cpu);
1398 }
1399 }
1400
1401 start_tcg_kick_timer();
1402
1403 cpu = first_cpu;
1404
1405 /* process any pending work */
1406 cpu->exit_request = 1;
1407
1408 while (1) {
1409 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1410 qemu_account_warp_timer();
1411
1412 /* Run the timers here. This is much more efficient than
1413 * waking up the I/O thread and waiting for completion.
1414 */
1415 handle_icount_deadline();
1416
1417 if (!cpu) {
1418 cpu = first_cpu;
1419 }
1420
1421 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1422
1423 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1424 current_cpu = cpu;
1425
1426 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1427 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1428
1429 if (cpu_can_run(cpu)) {
1430 int r;
1431
1432 prepare_icount_for_run(cpu);
1433
1434 r = tcg_cpu_exec(cpu);
1435
1436 process_icount_data(cpu);
1437
1438 if (r == EXCP_DEBUG) {
1439 cpu_handle_guest_debug(cpu);
1440 break;
1441 } else if (r == EXCP_ATOMIC) {
1442 qemu_mutex_unlock_iothread();
1443 cpu_exec_step_atomic(cpu);
1444 qemu_mutex_lock_iothread();
1445 break;
1446 }
1447 } else if (cpu->stop) {
1448 if (cpu->unplug) {
1449 cpu = CPU_NEXT(cpu);
1450 }
1451 break;
1452 }
1453
1454 cpu = CPU_NEXT(cpu);
1455 } /* while (cpu && !cpu->exit_request).. */
1456
1457 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1458 atomic_set(&tcg_current_rr_cpu, NULL);
1459
1460 if (cpu && cpu->exit_request) {
1461 atomic_mb_set(&cpu->exit_request, 0);
1462 }
1463
1464 qemu_tcg_rr_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1465 deal_with_unplugged_cpus();
1466 }
1467
1468 return NULL;
1469 }
1470
1471 static void *qemu_hax_cpu_thread_fn(void *arg)
1472 {
1473 CPUState *cpu = arg;
1474 int r;
1475
1476 qemu_mutex_lock_iothread();
1477 qemu_thread_get_self(cpu->thread);
1478
1479 cpu->thread_id = qemu_get_thread_id();
1480 cpu->created = true;
1481 cpu->halted = 0;
1482 current_cpu = cpu;
1483
1484 hax_init_vcpu(cpu);
1485 qemu_cond_signal(&qemu_cpu_cond);
1486
1487 while (1) {
1488 if (cpu_can_run(cpu)) {
1489 r = hax_smp_cpu_exec(cpu);
1490 if (r == EXCP_DEBUG) {
1491 cpu_handle_guest_debug(cpu);
1492 }
1493 }
1494
1495 qemu_wait_io_event(cpu);
1496 }
1497 return NULL;
1498 }
1499
1500 /* The HVF-specific vCPU thread function. This one should only run when the host
1501 * CPU supports the VMX "unrestricted guest" feature. */
1502 static void *qemu_hvf_cpu_thread_fn(void *arg)
1503 {
1504 CPUState *cpu = arg;
1505
1506 int r;
1507
1508 assert(hvf_enabled());
1509
1510 rcu_register_thread();
1511
1512 qemu_mutex_lock_iothread();
1513 qemu_thread_get_self(cpu->thread);
1514
1515 cpu->thread_id = qemu_get_thread_id();
1516 cpu->can_do_io = 1;
1517 current_cpu = cpu;
1518
1519 hvf_init_vcpu(cpu);
1520
1521 /* signal CPU creation */
1522 cpu->created = true;
1523 qemu_cond_signal(&qemu_cpu_cond);
1524
1525 do {
1526 if (cpu_can_run(cpu)) {
1527 r = hvf_vcpu_exec(cpu);
1528 if (r == EXCP_DEBUG) {
1529 cpu_handle_guest_debug(cpu);
1530 }
1531 }
1532 qemu_wait_io_event(cpu);
1533 } while (!cpu->unplug || cpu_can_run(cpu));
1534
1535 hvf_vcpu_destroy(cpu);
1536 cpu->created = false;
1537 qemu_cond_signal(&qemu_cpu_cond);
1538 qemu_mutex_unlock_iothread();
1539 return NULL;
1540 }
1541
1542 #ifdef _WIN32
1543 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1544 {
1545 }
1546 #endif
1547
1548 /* Multi-threaded TCG
1549 *
1550 * In the multi-threaded case each vCPU has its own thread. The TLS
1551 * variable current_cpu can be used deep in the code to find the
1552 * current CPUState for a given thread.
1553 */
1554
1555 static void *qemu_tcg_cpu_thread_fn(void *arg)
1556 {
1557 CPUState *cpu = arg;
1558
1559 g_assert(!use_icount);
1560
1561 rcu_register_thread();
1562 tcg_register_thread();
1563
1564 qemu_mutex_lock_iothread();
1565 qemu_thread_get_self(cpu->thread);
1566
1567 cpu->thread_id = qemu_get_thread_id();
1568 cpu->created = true;
1569 cpu->can_do_io = 1;
1570 current_cpu = cpu;
1571 qemu_cond_signal(&qemu_cpu_cond);
1572
1573 /* process any pending work */
1574 cpu->exit_request = 1;
1575
1576 while (1) {
1577 if (cpu_can_run(cpu)) {
1578 int r;
1579 r = tcg_cpu_exec(cpu);
1580 switch (r) {
1581 case EXCP_DEBUG:
1582 cpu_handle_guest_debug(cpu);
1583 break;
1584 case EXCP_HALTED:
1585 /* during start-up the vCPU is reset and the thread is
1586 * kicked several times. If we don't ensure we go back
1587 * to sleep in the halted state we won't cleanly
1588 * start-up when the vCPU is enabled.
1589 *
1590 * cpu->halted should ensure we sleep in wait_io_event
1591 */
1592 g_assert(cpu->halted);
1593 break;
1594 case EXCP_ATOMIC:
1595 qemu_mutex_unlock_iothread();
1596 cpu_exec_step_atomic(cpu);
1597 qemu_mutex_lock_iothread();
1598 default:
1599 /* Ignore everything else? */
1600 break;
1601 }
1602 } else if (cpu->unplug) {
1603 qemu_tcg_destroy_vcpu(cpu);
1604 cpu->created = false;
1605 qemu_cond_signal(&qemu_cpu_cond);
1606 qemu_mutex_unlock_iothread();
1607 return NULL;
1608 }
1609
1610 atomic_mb_set(&cpu->exit_request, 0);
1611 qemu_wait_io_event(cpu);
1612 }
1613
1614 return NULL;
1615 }
1616
1617 static void qemu_cpu_kick_thread(CPUState *cpu)
1618 {
1619 #ifndef _WIN32
1620 int err;
1621
1622 if (cpu->thread_kicked) {
1623 return;
1624 }
1625 cpu->thread_kicked = true;
1626 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1627 if (err) {
1628 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1629 exit(1);
1630 }
1631 #else /* _WIN32 */
1632 if (!qemu_cpu_is_self(cpu)) {
1633 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1634 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1635 __func__, GetLastError());
1636 exit(1);
1637 }
1638 }
1639 #endif
1640 }
1641
1642 void qemu_cpu_kick(CPUState *cpu)
1643 {
1644 qemu_cond_broadcast(cpu->halt_cond);
1645 if (tcg_enabled()) {
1646 cpu_exit(cpu);
1647 /* NOP unless doing single-thread RR */
1648 qemu_cpu_kick_rr_cpu();
1649 } else {
1650 if (hax_enabled()) {
1651 /*
1652 * FIXME: race condition with the exit_request check in
1653 * hax_vcpu_hax_exec
1654 */
1655 cpu->exit_request = 1;
1656 }
1657 qemu_cpu_kick_thread(cpu);
1658 }
1659 }
1660
1661 void qemu_cpu_kick_self(void)
1662 {
1663 assert(current_cpu);
1664 qemu_cpu_kick_thread(current_cpu);
1665 }
1666
1667 bool qemu_cpu_is_self(CPUState *cpu)
1668 {
1669 return qemu_thread_is_self(cpu->thread);
1670 }
1671
1672 bool qemu_in_vcpu_thread(void)
1673 {
1674 return current_cpu && qemu_cpu_is_self(current_cpu);
1675 }
1676
1677 static __thread bool iothread_locked = false;
1678
1679 bool qemu_mutex_iothread_locked(void)
1680 {
1681 return iothread_locked;
1682 }
1683
1684 void qemu_mutex_lock_iothread(void)
1685 {
1686 g_assert(!qemu_mutex_iothread_locked());
1687 qemu_mutex_lock(&qemu_global_mutex);
1688 iothread_locked = true;
1689 }
1690
1691 void qemu_mutex_unlock_iothread(void)
1692 {
1693 g_assert(qemu_mutex_iothread_locked());
1694 iothread_locked = false;
1695 qemu_mutex_unlock(&qemu_global_mutex);
1696 }
1697
1698 static bool all_vcpus_paused(void)
1699 {
1700 CPUState *cpu;
1701
1702 CPU_FOREACH(cpu) {
1703 if (!cpu->stopped) {
1704 return false;
1705 }
1706 }
1707
1708 return true;
1709 }
1710
1711 void pause_all_vcpus(void)
1712 {
1713 CPUState *cpu;
1714
1715 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1716 CPU_FOREACH(cpu) {
1717 if (qemu_cpu_is_self(cpu)) {
1718 qemu_cpu_stop(cpu, true);
1719 } else {
1720 cpu->stop = true;
1721 qemu_cpu_kick(cpu);
1722 }
1723 }
1724
1725 while (!all_vcpus_paused()) {
1726 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1727 CPU_FOREACH(cpu) {
1728 qemu_cpu_kick(cpu);
1729 }
1730 }
1731 }
1732
1733 void cpu_resume(CPUState *cpu)
1734 {
1735 cpu->stop = false;
1736 cpu->stopped = false;
1737 qemu_cpu_kick(cpu);
1738 }
1739
1740 void resume_all_vcpus(void)
1741 {
1742 CPUState *cpu;
1743
1744 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1745 CPU_FOREACH(cpu) {
1746 cpu_resume(cpu);
1747 }
1748 }
1749
1750 void cpu_remove(CPUState *cpu)
1751 {
1752 cpu->stop = true;
1753 cpu->unplug = true;
1754 qemu_cpu_kick(cpu);
1755 }
1756
1757 void cpu_remove_sync(CPUState *cpu)
1758 {
1759 cpu_remove(cpu);
1760 while (cpu->created) {
1761 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1762 }
1763 }
1764
1765 /* For temporary buffers for forming a name */
1766 #define VCPU_THREAD_NAME_SIZE 16
1767
1768 static void qemu_tcg_init_vcpu(CPUState *cpu)
1769 {
1770 char thread_name[VCPU_THREAD_NAME_SIZE];
1771 static QemuCond *single_tcg_halt_cond;
1772 static QemuThread *single_tcg_cpu_thread;
1773 static int tcg_region_inited;
1774
1775 /*
1776 * Initialize TCG regions--once. Now is a good time, because:
1777 * (1) TCG's init context, prologue and target globals have been set up.
1778 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1779 * -accel flag is processed, so the check doesn't work then).
1780 */
1781 if (!tcg_region_inited) {
1782 tcg_region_inited = 1;
1783 tcg_region_init();
1784 }
1785
1786 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1787 cpu->thread = g_malloc0(sizeof(QemuThread));
1788 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1789 qemu_cond_init(cpu->halt_cond);
1790
1791 if (qemu_tcg_mttcg_enabled()) {
1792 /* create a thread per vCPU with TCG (MTTCG) */
1793 parallel_cpus = true;
1794 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1795 cpu->cpu_index);
1796
1797 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1798 cpu, QEMU_THREAD_JOINABLE);
1799
1800 } else {
1801 /* share a single thread for all cpus with TCG */
1802 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1803 qemu_thread_create(cpu->thread, thread_name,
1804 qemu_tcg_rr_cpu_thread_fn,
1805 cpu, QEMU_THREAD_JOINABLE);
1806
1807 single_tcg_halt_cond = cpu->halt_cond;
1808 single_tcg_cpu_thread = cpu->thread;
1809 }
1810 #ifdef _WIN32
1811 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1812 #endif
1813 while (!cpu->created) {
1814 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1815 }
1816 } else {
1817 /* For non-MTTCG cases we share the thread */
1818 cpu->thread = single_tcg_cpu_thread;
1819 cpu->halt_cond = single_tcg_halt_cond;
1820 }
1821 }
1822
1823 static void qemu_hax_start_vcpu(CPUState *cpu)
1824 {
1825 char thread_name[VCPU_THREAD_NAME_SIZE];
1826
1827 cpu->thread = g_malloc0(sizeof(QemuThread));
1828 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1829 qemu_cond_init(cpu->halt_cond);
1830
1831 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1832 cpu->cpu_index);
1833 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1834 cpu, QEMU_THREAD_JOINABLE);
1835 #ifdef _WIN32
1836 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1837 #endif
1838 while (!cpu->created) {
1839 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1840 }
1841 }
1842
1843 static void qemu_kvm_start_vcpu(CPUState *cpu)
1844 {
1845 char thread_name[VCPU_THREAD_NAME_SIZE];
1846
1847 cpu->thread = g_malloc0(sizeof(QemuThread));
1848 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1849 qemu_cond_init(cpu->halt_cond);
1850 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1851 cpu->cpu_index);
1852 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1853 cpu, QEMU_THREAD_JOINABLE);
1854 while (!cpu->created) {
1855 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1856 }
1857 }
1858
1859 static void qemu_hvf_start_vcpu(CPUState *cpu)
1860 {
1861 char thread_name[VCPU_THREAD_NAME_SIZE];
1862
1863 /* HVF currently does not support TCG, and only runs in
1864 * unrestricted-guest mode. */
1865 assert(hvf_enabled());
1866
1867 cpu->thread = g_malloc0(sizeof(QemuThread));
1868 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1869 qemu_cond_init(cpu->halt_cond);
1870
1871 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1872 cpu->cpu_index);
1873 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1874 cpu, QEMU_THREAD_JOINABLE);
1875 while (!cpu->created) {
1876 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1877 }
1878 }
1879
1880 static void qemu_dummy_start_vcpu(CPUState *cpu)
1881 {
1882 char thread_name[VCPU_THREAD_NAME_SIZE];
1883
1884 cpu->thread = g_malloc0(sizeof(QemuThread));
1885 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1886 qemu_cond_init(cpu->halt_cond);
1887 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1888 cpu->cpu_index);
1889 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1890 QEMU_THREAD_JOINABLE);
1891 while (!cpu->created) {
1892 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1893 }
1894 }
1895
1896 void qemu_init_vcpu(CPUState *cpu)
1897 {
1898 cpu->nr_cores = smp_cores;
1899 cpu->nr_threads = smp_threads;
1900 cpu->stopped = true;
1901
1902 if (!cpu->as) {
1903 /* If the target cpu hasn't set up any address spaces itself,
1904 * give it the default one.
1905 */
1906 cpu->num_ases = 1;
1907 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1908 }
1909
1910 if (kvm_enabled()) {
1911 qemu_kvm_start_vcpu(cpu);
1912 } else if (hax_enabled()) {
1913 qemu_hax_start_vcpu(cpu);
1914 } else if (hvf_enabled()) {
1915 qemu_hvf_start_vcpu(cpu);
1916 } else if (tcg_enabled()) {
1917 qemu_tcg_init_vcpu(cpu);
1918 } else {
1919 qemu_dummy_start_vcpu(cpu);
1920 }
1921 }
1922
1923 void cpu_stop_current(void)
1924 {
1925 if (current_cpu) {
1926 qemu_cpu_stop(current_cpu, true);
1927 }
1928 }
1929
1930 int vm_stop(RunState state)
1931 {
1932 if (qemu_in_vcpu_thread()) {
1933 qemu_system_vmstop_request_prepare();
1934 qemu_system_vmstop_request(state);
1935 /*
1936 * FIXME: should not return to device code in case
1937 * vm_stop() has been requested.
1938 */
1939 cpu_stop_current();
1940 return 0;
1941 }
1942
1943 return do_vm_stop(state);
1944 }
1945
1946 /**
1947 * Prepare for (re)starting the VM.
1948 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1949 * running or in case of an error condition), 0 otherwise.
1950 */
1951 int vm_prepare_start(void)
1952 {
1953 RunState requested;
1954 int res = 0;
1955
1956 qemu_vmstop_requested(&requested);
1957 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1958 return -1;
1959 }
1960
1961 /* Ensure that a STOP/RESUME pair of events is emitted if a
1962 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1963 * example, according to documentation is always followed by
1964 * the STOP event.
1965 */
1966 if (runstate_is_running()) {
1967 qapi_event_send_stop(&error_abort);
1968 res = -1;
1969 } else {
1970 replay_enable_events();
1971 cpu_enable_ticks();
1972 runstate_set(RUN_STATE_RUNNING);
1973 vm_state_notify(1, RUN_STATE_RUNNING);
1974 }
1975
1976 /* We are sending this now, but the CPUs will be resumed shortly later */
1977 qapi_event_send_resume(&error_abort);
1978 return res;
1979 }
1980
1981 void vm_start(void)
1982 {
1983 if (!vm_prepare_start()) {
1984 resume_all_vcpus();
1985 }
1986 }
1987
1988 /* does a state transition even if the VM is already stopped,
1989 current state is forgotten forever */
1990 int vm_stop_force_state(RunState state)
1991 {
1992 if (runstate_is_running()) {
1993 return vm_stop(state);
1994 } else {
1995 runstate_set(state);
1996
1997 bdrv_drain_all();
1998 /* Make sure to return an error if the flush in a previous vm_stop()
1999 * failed. */
2000 return bdrv_flush_all();
2001 }
2002 }
2003
2004 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2005 {
2006 /* XXX: implement xxx_cpu_list for targets that still miss it */
2007 #if defined(cpu_list)
2008 cpu_list(f, cpu_fprintf);
2009 #endif
2010 }
2011
2012 CpuInfoList *qmp_query_cpus(Error **errp)
2013 {
2014 MachineState *ms = MACHINE(qdev_get_machine());
2015 MachineClass *mc = MACHINE_GET_CLASS(ms);
2016 CpuInfoList *head = NULL, *cur_item = NULL;
2017 CPUState *cpu;
2018
2019 CPU_FOREACH(cpu) {
2020 CpuInfoList *info;
2021 #if defined(TARGET_I386)
2022 X86CPU *x86_cpu = X86_CPU(cpu);
2023 CPUX86State *env = &x86_cpu->env;
2024 #elif defined(TARGET_PPC)
2025 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2026 CPUPPCState *env = &ppc_cpu->env;
2027 #elif defined(TARGET_SPARC)
2028 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2029 CPUSPARCState *env = &sparc_cpu->env;
2030 #elif defined(TARGET_MIPS)
2031 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2032 CPUMIPSState *env = &mips_cpu->env;
2033 #elif defined(TARGET_TRICORE)
2034 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2035 CPUTriCoreState *env = &tricore_cpu->env;
2036 #endif
2037
2038 cpu_synchronize_state(cpu);
2039
2040 info = g_malloc0(sizeof(*info));
2041 info->value = g_malloc0(sizeof(*info->value));
2042 info->value->CPU = cpu->cpu_index;
2043 info->value->current = (cpu == first_cpu);
2044 info->value->halted = cpu->halted;
2045 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2046 info->value->thread_id = cpu->thread_id;
2047 #if defined(TARGET_I386)
2048 info->value->arch = CPU_INFO_ARCH_X86;
2049 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2050 #elif defined(TARGET_PPC)
2051 info->value->arch = CPU_INFO_ARCH_PPC;
2052 info->value->u.ppc.nip = env->nip;
2053 #elif defined(TARGET_SPARC)
2054 info->value->arch = CPU_INFO_ARCH_SPARC;
2055 info->value->u.q_sparc.pc = env->pc;
2056 info->value->u.q_sparc.npc = env->npc;
2057 #elif defined(TARGET_MIPS)
2058 info->value->arch = CPU_INFO_ARCH_MIPS;
2059 info->value->u.q_mips.PC = env->active_tc.PC;
2060 #elif defined(TARGET_TRICORE)
2061 info->value->arch = CPU_INFO_ARCH_TRICORE;
2062 info->value->u.tricore.PC = env->PC;
2063 #else
2064 info->value->arch = CPU_INFO_ARCH_OTHER;
2065 #endif
2066 info->value->has_props = !!mc->cpu_index_to_instance_props;
2067 if (info->value->has_props) {
2068 CpuInstanceProperties *props;
2069 props = g_malloc0(sizeof(*props));
2070 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2071 info->value->props = props;
2072 }
2073
2074 /* XXX: waiting for the qapi to support GSList */
2075 if (!cur_item) {
2076 head = cur_item = info;
2077 } else {
2078 cur_item->next = info;
2079 cur_item = info;
2080 }
2081 }
2082
2083 return head;
2084 }
2085
2086 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2087 bool has_cpu, int64_t cpu_index, Error **errp)
2088 {
2089 FILE *f;
2090 uint32_t l;
2091 CPUState *cpu;
2092 uint8_t buf[1024];
2093 int64_t orig_addr = addr, orig_size = size;
2094
2095 if (!has_cpu) {
2096 cpu_index = 0;
2097 }
2098
2099 cpu = qemu_get_cpu(cpu_index);
2100 if (cpu == NULL) {
2101 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2102 "a CPU number");
2103 return;
2104 }
2105
2106 f = fopen(filename, "wb");
2107 if (!f) {
2108 error_setg_file_open(errp, errno, filename);
2109 return;
2110 }
2111
2112 while (size != 0) {
2113 l = sizeof(buf);
2114 if (l > size)
2115 l = size;
2116 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2117 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2118 " specified", orig_addr, orig_size);
2119 goto exit;
2120 }
2121 if (fwrite(buf, 1, l, f) != l) {
2122 error_setg(errp, QERR_IO_ERROR);
2123 goto exit;
2124 }
2125 addr += l;
2126 size -= l;
2127 }
2128
2129 exit:
2130 fclose(f);
2131 }
2132
2133 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2134 Error **errp)
2135 {
2136 FILE *f;
2137 uint32_t l;
2138 uint8_t buf[1024];
2139
2140 f = fopen(filename, "wb");
2141 if (!f) {
2142 error_setg_file_open(errp, errno, filename);
2143 return;
2144 }
2145
2146 while (size != 0) {
2147 l = sizeof(buf);
2148 if (l > size)
2149 l = size;
2150 cpu_physical_memory_read(addr, buf, l);
2151 if (fwrite(buf, 1, l, f) != l) {
2152 error_setg(errp, QERR_IO_ERROR);
2153 goto exit;
2154 }
2155 addr += l;
2156 size -= l;
2157 }
2158
2159 exit:
2160 fclose(f);
2161 }
2162
2163 void qmp_inject_nmi(Error **errp)
2164 {
2165 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2166 }
2167
2168 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2169 {
2170 if (!use_icount) {
2171 return;
2172 }
2173
2174 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2175 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2176 if (icount_align_option) {
2177 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2178 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2179 } else {
2180 cpu_fprintf(f, "Max guest delay NA\n");
2181 cpu_fprintf(f, "Max guest advance NA\n");
2182 }
2183 }