]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
Merge remote-tracking branch 'remotes/berrange/tags/qio-next-pull-request' into staging
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/block-backend.h"
36 #include "exec/gdbstub.h"
37 #include "sysemu/dma.h"
38 #include "sysemu/hw_accel.h"
39 #include "sysemu/kvm.h"
40 #include "sysemu/hax.h"
41 #include "sysemu/hvf.h"
42 #include "sysemu/whpx.h"
43 #include "exec/exec-all.h"
44
45 #include "qemu/thread.h"
46 #include "sysemu/cpus.h"
47 #include "sysemu/qtest.h"
48 #include "qemu/main-loop.h"
49 #include "qemu/option.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/seqlock.h"
52 #include "tcg.h"
53 #include "hw/nmi.h"
54 #include "sysemu/replay.h"
55 #include "hw/boards.h"
56
57 #ifdef CONFIG_LINUX
58
59 #include <sys/prctl.h>
60
61 #ifndef PR_MCE_KILL
62 #define PR_MCE_KILL 33
63 #endif
64
65 #ifndef PR_MCE_KILL_SET
66 #define PR_MCE_KILL_SET 1
67 #endif
68
69 #ifndef PR_MCE_KILL_EARLY
70 #define PR_MCE_KILL_EARLY 1
71 #endif
72
73 #endif /* CONFIG_LINUX */
74
75 int64_t max_delay;
76 int64_t max_advance;
77
78 /* vcpu throttling controls */
79 static QEMUTimer *throttle_timer;
80 static unsigned int throttle_percentage;
81
82 #define CPU_THROTTLE_PCT_MIN 1
83 #define CPU_THROTTLE_PCT_MAX 99
84 #define CPU_THROTTLE_TIMESLICE_NS 10000000
85
86 bool cpu_is_stopped(CPUState *cpu)
87 {
88 return cpu->stopped || !runstate_is_running();
89 }
90
91 static bool cpu_thread_is_idle(CPUState *cpu)
92 {
93 if (cpu->stop || cpu->queued_work_first) {
94 return false;
95 }
96 if (cpu_is_stopped(cpu)) {
97 return true;
98 }
99 if (!cpu->halted || cpu_has_work(cpu) ||
100 kvm_halt_in_kernel()) {
101 return false;
102 }
103 return true;
104 }
105
106 static bool all_cpu_threads_idle(void)
107 {
108 CPUState *cpu;
109
110 CPU_FOREACH(cpu) {
111 if (!cpu_thread_is_idle(cpu)) {
112 return false;
113 }
114 }
115 return true;
116 }
117
118 /***********************************************************/
119 /* guest cycle counter */
120
121 /* Protected by TimersState seqlock */
122
123 static bool icount_sleep = true;
124 /* Conversion factor from emulated instructions to virtual clock ticks. */
125 static int icount_time_shift;
126 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127 #define MAX_ICOUNT_SHIFT 10
128
129 typedef struct TimersState {
130 /* Protected by BQL. */
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
133
134 /* cpu_clock_offset can be read out of BQL, so protect it with
135 * this lock.
136 */
137 QemuSeqLock vm_clock_seqlock;
138 int64_t cpu_clock_offset;
139 int32_t cpu_ticks_enabled;
140 int64_t dummy;
141
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias;
144 /* Only written by TCG thread */
145 int64_t qemu_icount;
146 /* for adjusting icount */
147 int64_t vm_clock_warp_start;
148 QEMUTimer *icount_rt_timer;
149 QEMUTimer *icount_vm_timer;
150 QEMUTimer *icount_warp_timer;
151 } TimersState;
152
153 static TimersState timers_state;
154 bool mttcg_enabled;
155
156 /*
157 * We default to false if we know other options have been enabled
158 * which are currently incompatible with MTTCG. Otherwise when each
159 * guest (target) has been updated to support:
160 * - atomic instructions
161 * - memory ordering primitives (barriers)
162 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
163 *
164 * Once a guest architecture has been converted to the new primitives
165 * there are two remaining limitations to check.
166 *
167 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
168 * - The host must have a stronger memory order than the guest
169 *
170 * It may be possible in future to support strong guests on weak hosts
171 * but that will require tagging all load/stores in a guest with their
172 * implicit memory order requirements which would likely slow things
173 * down a lot.
174 */
175
176 static bool check_tcg_memory_orders_compatible(void)
177 {
178 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
179 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
180 #else
181 return false;
182 #endif
183 }
184
185 static bool default_mttcg_enabled(void)
186 {
187 if (use_icount || TCG_OVERSIZED_GUEST) {
188 return false;
189 } else {
190 #ifdef TARGET_SUPPORTS_MTTCG
191 return check_tcg_memory_orders_compatible();
192 #else
193 return false;
194 #endif
195 }
196 }
197
198 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
199 {
200 const char *t = qemu_opt_get(opts, "thread");
201 if (t) {
202 if (strcmp(t, "multi") == 0) {
203 if (TCG_OVERSIZED_GUEST) {
204 error_setg(errp, "No MTTCG when guest word size > hosts");
205 } else if (use_icount) {
206 error_setg(errp, "No MTTCG when icount is enabled");
207 } else {
208 #ifndef TARGET_SUPPORTS_MTTCG
209 error_report("Guest not yet converted to MTTCG - "
210 "you may get unexpected results");
211 #endif
212 if (!check_tcg_memory_orders_compatible()) {
213 error_report("Guest expects a stronger memory ordering "
214 "than the host provides");
215 error_printf("This may cause strange/hard to debug errors\n");
216 }
217 mttcg_enabled = true;
218 }
219 } else if (strcmp(t, "single") == 0) {
220 mttcg_enabled = false;
221 } else {
222 error_setg(errp, "Invalid 'thread' setting %s", t);
223 }
224 } else {
225 mttcg_enabled = default_mttcg_enabled();
226 }
227 }
228
229 /* The current number of executed instructions is based on what we
230 * originally budgeted minus the current state of the decrementing
231 * icount counters in extra/u16.low.
232 */
233 static int64_t cpu_get_icount_executed(CPUState *cpu)
234 {
235 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
236 }
237
238 /*
239 * Update the global shared timer_state.qemu_icount to take into
240 * account executed instructions. This is done by the TCG vCPU
241 * thread so the main-loop can see time has moved forward.
242 */
243 void cpu_update_icount(CPUState *cpu)
244 {
245 int64_t executed = cpu_get_icount_executed(cpu);
246 cpu->icount_budget -= executed;
247
248 #ifdef CONFIG_ATOMIC64
249 atomic_set__nocheck(&timers_state.qemu_icount,
250 atomic_read__nocheck(&timers_state.qemu_icount) +
251 executed);
252 #else /* FIXME: we need 64bit atomics to do this safely */
253 timers_state.qemu_icount += executed;
254 #endif
255 }
256
257 int64_t cpu_get_icount_raw(void)
258 {
259 CPUState *cpu = current_cpu;
260
261 if (cpu && cpu->running) {
262 if (!cpu->can_do_io) {
263 error_report("Bad icount read");
264 exit(1);
265 }
266 /* Take into account what has run */
267 cpu_update_icount(cpu);
268 }
269 #ifdef CONFIG_ATOMIC64
270 return atomic_read__nocheck(&timers_state.qemu_icount);
271 #else /* FIXME: we need 64bit atomics to do this safely */
272 return timers_state.qemu_icount;
273 #endif
274 }
275
276 /* Return the virtual CPU time, based on the instruction counter. */
277 static int64_t cpu_get_icount_locked(void)
278 {
279 int64_t icount = cpu_get_icount_raw();
280 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
281 }
282
283 int64_t cpu_get_icount(void)
284 {
285 int64_t icount;
286 unsigned start;
287
288 do {
289 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
290 icount = cpu_get_icount_locked();
291 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
292
293 return icount;
294 }
295
296 int64_t cpu_icount_to_ns(int64_t icount)
297 {
298 return icount << icount_time_shift;
299 }
300
301 /* return the time elapsed in VM between vm_start and vm_stop. Unless
302 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
303 * counter.
304 *
305 * Caller must hold the BQL
306 */
307 int64_t cpu_get_ticks(void)
308 {
309 int64_t ticks;
310
311 if (use_icount) {
312 return cpu_get_icount();
313 }
314
315 ticks = timers_state.cpu_ticks_offset;
316 if (timers_state.cpu_ticks_enabled) {
317 ticks += cpu_get_host_ticks();
318 }
319
320 if (timers_state.cpu_ticks_prev > ticks) {
321 /* Note: non increasing ticks may happen if the host uses
322 software suspend */
323 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
324 ticks = timers_state.cpu_ticks_prev;
325 }
326
327 timers_state.cpu_ticks_prev = ticks;
328 return ticks;
329 }
330
331 static int64_t cpu_get_clock_locked(void)
332 {
333 int64_t time;
334
335 time = timers_state.cpu_clock_offset;
336 if (timers_state.cpu_ticks_enabled) {
337 time += get_clock();
338 }
339
340 return time;
341 }
342
343 /* Return the monotonic time elapsed in VM, i.e.,
344 * the time between vm_start and vm_stop
345 */
346 int64_t cpu_get_clock(void)
347 {
348 int64_t ti;
349 unsigned start;
350
351 do {
352 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
353 ti = cpu_get_clock_locked();
354 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
355
356 return ti;
357 }
358
359 /* enable cpu_get_ticks()
360 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
361 */
362 void cpu_enable_ticks(void)
363 {
364 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
365 seqlock_write_begin(&timers_state.vm_clock_seqlock);
366 if (!timers_state.cpu_ticks_enabled) {
367 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
368 timers_state.cpu_clock_offset -= get_clock();
369 timers_state.cpu_ticks_enabled = 1;
370 }
371 seqlock_write_end(&timers_state.vm_clock_seqlock);
372 }
373
374 /* disable cpu_get_ticks() : the clock is stopped. You must not call
375 * cpu_get_ticks() after that.
376 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
377 */
378 void cpu_disable_ticks(void)
379 {
380 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
381 seqlock_write_begin(&timers_state.vm_clock_seqlock);
382 if (timers_state.cpu_ticks_enabled) {
383 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
384 timers_state.cpu_clock_offset = cpu_get_clock_locked();
385 timers_state.cpu_ticks_enabled = 0;
386 }
387 seqlock_write_end(&timers_state.vm_clock_seqlock);
388 }
389
390 /* Correlation between real and virtual time is always going to be
391 fairly approximate, so ignore small variation.
392 When the guest is idle real and virtual time will be aligned in
393 the IO wait loop. */
394 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
395
396 static void icount_adjust(void)
397 {
398 int64_t cur_time;
399 int64_t cur_icount;
400 int64_t delta;
401
402 /* Protected by TimersState mutex. */
403 static int64_t last_delta;
404
405 /* If the VM is not running, then do nothing. */
406 if (!runstate_is_running()) {
407 return;
408 }
409
410 seqlock_write_begin(&timers_state.vm_clock_seqlock);
411 cur_time = cpu_get_clock_locked();
412 cur_icount = cpu_get_icount_locked();
413
414 delta = cur_icount - cur_time;
415 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
416 if (delta > 0
417 && last_delta + ICOUNT_WOBBLE < delta * 2
418 && icount_time_shift > 0) {
419 /* The guest is getting too far ahead. Slow time down. */
420 icount_time_shift--;
421 }
422 if (delta < 0
423 && last_delta - ICOUNT_WOBBLE > delta * 2
424 && icount_time_shift < MAX_ICOUNT_SHIFT) {
425 /* The guest is getting too far behind. Speed time up. */
426 icount_time_shift++;
427 }
428 last_delta = delta;
429 timers_state.qemu_icount_bias = cur_icount
430 - (timers_state.qemu_icount << icount_time_shift);
431 seqlock_write_end(&timers_state.vm_clock_seqlock);
432 }
433
434 static void icount_adjust_rt(void *opaque)
435 {
436 timer_mod(timers_state.icount_rt_timer,
437 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
438 icount_adjust();
439 }
440
441 static void icount_adjust_vm(void *opaque)
442 {
443 timer_mod(timers_state.icount_vm_timer,
444 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
445 NANOSECONDS_PER_SECOND / 10);
446 icount_adjust();
447 }
448
449 static int64_t qemu_icount_round(int64_t count)
450 {
451 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
452 }
453
454 static void icount_warp_rt(void)
455 {
456 unsigned seq;
457 int64_t warp_start;
458
459 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
460 * changes from -1 to another value, so the race here is okay.
461 */
462 do {
463 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
464 warp_start = timers_state.vm_clock_warp_start;
465 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
466
467 if (warp_start == -1) {
468 return;
469 }
470
471 seqlock_write_begin(&timers_state.vm_clock_seqlock);
472 if (runstate_is_running()) {
473 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
474 cpu_get_clock_locked());
475 int64_t warp_delta;
476
477 warp_delta = clock - timers_state.vm_clock_warp_start;
478 if (use_icount == 2) {
479 /*
480 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
481 * far ahead of real time.
482 */
483 int64_t cur_icount = cpu_get_icount_locked();
484 int64_t delta = clock - cur_icount;
485 warp_delta = MIN(warp_delta, delta);
486 }
487 timers_state.qemu_icount_bias += warp_delta;
488 }
489 timers_state.vm_clock_warp_start = -1;
490 seqlock_write_end(&timers_state.vm_clock_seqlock);
491
492 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
493 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
494 }
495 }
496
497 static void icount_timer_cb(void *opaque)
498 {
499 /* No need for a checkpoint because the timer already synchronizes
500 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
501 */
502 icount_warp_rt();
503 }
504
505 void qtest_clock_warp(int64_t dest)
506 {
507 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
508 AioContext *aio_context;
509 assert(qtest_enabled());
510 aio_context = qemu_get_aio_context();
511 while (clock < dest) {
512 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
513 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
514
515 seqlock_write_begin(&timers_state.vm_clock_seqlock);
516 timers_state.qemu_icount_bias += warp;
517 seqlock_write_end(&timers_state.vm_clock_seqlock);
518
519 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
520 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
521 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
522 }
523 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
524 }
525
526 void qemu_start_warp_timer(void)
527 {
528 int64_t clock;
529 int64_t deadline;
530
531 if (!use_icount) {
532 return;
533 }
534
535 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
536 * do not fire, so computing the deadline does not make sense.
537 */
538 if (!runstate_is_running()) {
539 return;
540 }
541
542 /* warp clock deterministically in record/replay mode */
543 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
544 return;
545 }
546
547 if (!all_cpu_threads_idle()) {
548 return;
549 }
550
551 if (qtest_enabled()) {
552 /* When testing, qtest commands advance icount. */
553 return;
554 }
555
556 /* We want to use the earliest deadline from ALL vm_clocks */
557 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
558 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
559 if (deadline < 0) {
560 static bool notified;
561 if (!icount_sleep && !notified) {
562 warn_report("icount sleep disabled and no active timers");
563 notified = true;
564 }
565 return;
566 }
567
568 if (deadline > 0) {
569 /*
570 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
571 * sleep. Otherwise, the CPU might be waiting for a future timer
572 * interrupt to wake it up, but the interrupt never comes because
573 * the vCPU isn't running any insns and thus doesn't advance the
574 * QEMU_CLOCK_VIRTUAL.
575 */
576 if (!icount_sleep) {
577 /*
578 * We never let VCPUs sleep in no sleep icount mode.
579 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
580 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
581 * It is useful when we want a deterministic execution time,
582 * isolated from host latencies.
583 */
584 seqlock_write_begin(&timers_state.vm_clock_seqlock);
585 timers_state.qemu_icount_bias += deadline;
586 seqlock_write_end(&timers_state.vm_clock_seqlock);
587 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
588 } else {
589 /*
590 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
591 * "real" time, (related to the time left until the next event) has
592 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
593 * This avoids that the warps are visible externally; for example,
594 * you will not be sending network packets continuously instead of
595 * every 100ms.
596 */
597 seqlock_write_begin(&timers_state.vm_clock_seqlock);
598 if (timers_state.vm_clock_warp_start == -1
599 || timers_state.vm_clock_warp_start > clock) {
600 timers_state.vm_clock_warp_start = clock;
601 }
602 seqlock_write_end(&timers_state.vm_clock_seqlock);
603 timer_mod_anticipate(timers_state.icount_warp_timer,
604 clock + deadline);
605 }
606 } else if (deadline == 0) {
607 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
608 }
609 }
610
611 static void qemu_account_warp_timer(void)
612 {
613 if (!use_icount || !icount_sleep) {
614 return;
615 }
616
617 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
618 * do not fire, so computing the deadline does not make sense.
619 */
620 if (!runstate_is_running()) {
621 return;
622 }
623
624 /* warp clock deterministically in record/replay mode */
625 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
626 return;
627 }
628
629 timer_del(timers_state.icount_warp_timer);
630 icount_warp_rt();
631 }
632
633 static bool icount_state_needed(void *opaque)
634 {
635 return use_icount;
636 }
637
638 static bool warp_timer_state_needed(void *opaque)
639 {
640 TimersState *s = opaque;
641 return s->icount_warp_timer != NULL;
642 }
643
644 static bool adjust_timers_state_needed(void *opaque)
645 {
646 TimersState *s = opaque;
647 return s->icount_rt_timer != NULL;
648 }
649
650 /*
651 * Subsection for warp timer migration is optional, because may not be created
652 */
653 static const VMStateDescription icount_vmstate_warp_timer = {
654 .name = "timer/icount/warp_timer",
655 .version_id = 1,
656 .minimum_version_id = 1,
657 .needed = warp_timer_state_needed,
658 .fields = (VMStateField[]) {
659 VMSTATE_INT64(vm_clock_warp_start, TimersState),
660 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
661 VMSTATE_END_OF_LIST()
662 }
663 };
664
665 static const VMStateDescription icount_vmstate_adjust_timers = {
666 .name = "timer/icount/timers",
667 .version_id = 1,
668 .minimum_version_id = 1,
669 .needed = adjust_timers_state_needed,
670 .fields = (VMStateField[]) {
671 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
672 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
673 VMSTATE_END_OF_LIST()
674 }
675 };
676
677 /*
678 * This is a subsection for icount migration.
679 */
680 static const VMStateDescription icount_vmstate_timers = {
681 .name = "timer/icount",
682 .version_id = 1,
683 .minimum_version_id = 1,
684 .needed = icount_state_needed,
685 .fields = (VMStateField[]) {
686 VMSTATE_INT64(qemu_icount_bias, TimersState),
687 VMSTATE_INT64(qemu_icount, TimersState),
688 VMSTATE_END_OF_LIST()
689 },
690 .subsections = (const VMStateDescription*[]) {
691 &icount_vmstate_warp_timer,
692 &icount_vmstate_adjust_timers,
693 NULL
694 }
695 };
696
697 static const VMStateDescription vmstate_timers = {
698 .name = "timer",
699 .version_id = 2,
700 .minimum_version_id = 1,
701 .fields = (VMStateField[]) {
702 VMSTATE_INT64(cpu_ticks_offset, TimersState),
703 VMSTATE_INT64(dummy, TimersState),
704 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
705 VMSTATE_END_OF_LIST()
706 },
707 .subsections = (const VMStateDescription*[]) {
708 &icount_vmstate_timers,
709 NULL
710 }
711 };
712
713 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
714 {
715 double pct;
716 double throttle_ratio;
717 long sleeptime_ns;
718
719 if (!cpu_throttle_get_percentage()) {
720 return;
721 }
722
723 pct = (double)cpu_throttle_get_percentage()/100;
724 throttle_ratio = pct / (1 - pct);
725 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
726
727 qemu_mutex_unlock_iothread();
728 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
729 qemu_mutex_lock_iothread();
730 atomic_set(&cpu->throttle_thread_scheduled, 0);
731 }
732
733 static void cpu_throttle_timer_tick(void *opaque)
734 {
735 CPUState *cpu;
736 double pct;
737
738 /* Stop the timer if needed */
739 if (!cpu_throttle_get_percentage()) {
740 return;
741 }
742 CPU_FOREACH(cpu) {
743 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
744 async_run_on_cpu(cpu, cpu_throttle_thread,
745 RUN_ON_CPU_NULL);
746 }
747 }
748
749 pct = (double)cpu_throttle_get_percentage()/100;
750 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
751 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
752 }
753
754 void cpu_throttle_set(int new_throttle_pct)
755 {
756 /* Ensure throttle percentage is within valid range */
757 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
758 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
759
760 atomic_set(&throttle_percentage, new_throttle_pct);
761
762 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
763 CPU_THROTTLE_TIMESLICE_NS);
764 }
765
766 void cpu_throttle_stop(void)
767 {
768 atomic_set(&throttle_percentage, 0);
769 }
770
771 bool cpu_throttle_active(void)
772 {
773 return (cpu_throttle_get_percentage() != 0);
774 }
775
776 int cpu_throttle_get_percentage(void)
777 {
778 return atomic_read(&throttle_percentage);
779 }
780
781 void cpu_ticks_init(void)
782 {
783 seqlock_init(&timers_state.vm_clock_seqlock);
784 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
785 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
786 cpu_throttle_timer_tick, NULL);
787 }
788
789 void configure_icount(QemuOpts *opts, Error **errp)
790 {
791 const char *option;
792 char *rem_str = NULL;
793
794 option = qemu_opt_get(opts, "shift");
795 if (!option) {
796 if (qemu_opt_get(opts, "align") != NULL) {
797 error_setg(errp, "Please specify shift option when using align");
798 }
799 return;
800 }
801
802 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
803 if (icount_sleep) {
804 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
805 icount_timer_cb, NULL);
806 }
807
808 icount_align_option = qemu_opt_get_bool(opts, "align", false);
809
810 if (icount_align_option && !icount_sleep) {
811 error_setg(errp, "align=on and sleep=off are incompatible");
812 }
813 if (strcmp(option, "auto") != 0) {
814 errno = 0;
815 icount_time_shift = strtol(option, &rem_str, 0);
816 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
817 error_setg(errp, "icount: Invalid shift value");
818 }
819 use_icount = 1;
820 return;
821 } else if (icount_align_option) {
822 error_setg(errp, "shift=auto and align=on are incompatible");
823 } else if (!icount_sleep) {
824 error_setg(errp, "shift=auto and sleep=off are incompatible");
825 }
826
827 use_icount = 2;
828
829 /* 125MIPS seems a reasonable initial guess at the guest speed.
830 It will be corrected fairly quickly anyway. */
831 icount_time_shift = 3;
832
833 /* Have both realtime and virtual time triggers for speed adjustment.
834 The realtime trigger catches emulated time passing too slowly,
835 the virtual time trigger catches emulated time passing too fast.
836 Realtime triggers occur even when idle, so use them less frequently
837 than VM triggers. */
838 timers_state.vm_clock_warp_start = -1;
839 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
840 icount_adjust_rt, NULL);
841 timer_mod(timers_state.icount_rt_timer,
842 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
843 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
844 icount_adjust_vm, NULL);
845 timer_mod(timers_state.icount_vm_timer,
846 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
847 NANOSECONDS_PER_SECOND / 10);
848 }
849
850 /***********************************************************/
851 /* TCG vCPU kick timer
852 *
853 * The kick timer is responsible for moving single threaded vCPU
854 * emulation on to the next vCPU. If more than one vCPU is running a
855 * timer event with force a cpu->exit so the next vCPU can get
856 * scheduled.
857 *
858 * The timer is removed if all vCPUs are idle and restarted again once
859 * idleness is complete.
860 */
861
862 static QEMUTimer *tcg_kick_vcpu_timer;
863 static CPUState *tcg_current_rr_cpu;
864
865 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
866
867 static inline int64_t qemu_tcg_next_kick(void)
868 {
869 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
870 }
871
872 /* Kick the currently round-robin scheduled vCPU */
873 static void qemu_cpu_kick_rr_cpu(void)
874 {
875 CPUState *cpu;
876 do {
877 cpu = atomic_mb_read(&tcg_current_rr_cpu);
878 if (cpu) {
879 cpu_exit(cpu);
880 }
881 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
882 }
883
884 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
885 {
886 }
887
888 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
889 {
890 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
891 qemu_notify_event();
892 return;
893 }
894
895 if (!qemu_in_vcpu_thread() && first_cpu) {
896 /* qemu_cpu_kick is not enough to kick a halted CPU out of
897 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
898 * causes cpu_thread_is_idle to return false. This way,
899 * handle_icount_deadline can run.
900 */
901 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
902 }
903 }
904
905 static void kick_tcg_thread(void *opaque)
906 {
907 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
908 qemu_cpu_kick_rr_cpu();
909 }
910
911 static void start_tcg_kick_timer(void)
912 {
913 assert(!mttcg_enabled);
914 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
915 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
916 kick_tcg_thread, NULL);
917 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
918 }
919 }
920
921 static void stop_tcg_kick_timer(void)
922 {
923 assert(!mttcg_enabled);
924 if (tcg_kick_vcpu_timer) {
925 timer_del(tcg_kick_vcpu_timer);
926 tcg_kick_vcpu_timer = NULL;
927 }
928 }
929
930 /***********************************************************/
931 void hw_error(const char *fmt, ...)
932 {
933 va_list ap;
934 CPUState *cpu;
935
936 va_start(ap, fmt);
937 fprintf(stderr, "qemu: hardware error: ");
938 vfprintf(stderr, fmt, ap);
939 fprintf(stderr, "\n");
940 CPU_FOREACH(cpu) {
941 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
942 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
943 }
944 va_end(ap);
945 abort();
946 }
947
948 void cpu_synchronize_all_states(void)
949 {
950 CPUState *cpu;
951
952 CPU_FOREACH(cpu) {
953 cpu_synchronize_state(cpu);
954 /* TODO: move to cpu_synchronize_state() */
955 if (hvf_enabled()) {
956 hvf_cpu_synchronize_state(cpu);
957 }
958 }
959 }
960
961 void cpu_synchronize_all_post_reset(void)
962 {
963 CPUState *cpu;
964
965 CPU_FOREACH(cpu) {
966 cpu_synchronize_post_reset(cpu);
967 /* TODO: move to cpu_synchronize_post_reset() */
968 if (hvf_enabled()) {
969 hvf_cpu_synchronize_post_reset(cpu);
970 }
971 }
972 }
973
974 void cpu_synchronize_all_post_init(void)
975 {
976 CPUState *cpu;
977
978 CPU_FOREACH(cpu) {
979 cpu_synchronize_post_init(cpu);
980 /* TODO: move to cpu_synchronize_post_init() */
981 if (hvf_enabled()) {
982 hvf_cpu_synchronize_post_init(cpu);
983 }
984 }
985 }
986
987 void cpu_synchronize_all_pre_loadvm(void)
988 {
989 CPUState *cpu;
990
991 CPU_FOREACH(cpu) {
992 cpu_synchronize_pre_loadvm(cpu);
993 }
994 }
995
996 static int do_vm_stop(RunState state)
997 {
998 int ret = 0;
999
1000 if (runstate_is_running()) {
1001 cpu_disable_ticks();
1002 pause_all_vcpus();
1003 runstate_set(state);
1004 vm_state_notify(0, state);
1005 qapi_event_send_stop(&error_abort);
1006 }
1007
1008 bdrv_drain_all();
1009 replay_disable_events();
1010 ret = bdrv_flush_all();
1011
1012 return ret;
1013 }
1014
1015 static bool cpu_can_run(CPUState *cpu)
1016 {
1017 if (cpu->stop) {
1018 return false;
1019 }
1020 if (cpu_is_stopped(cpu)) {
1021 return false;
1022 }
1023 return true;
1024 }
1025
1026 static void cpu_handle_guest_debug(CPUState *cpu)
1027 {
1028 gdb_set_stop_cpu(cpu);
1029 qemu_system_debug_request();
1030 cpu->stopped = true;
1031 }
1032
1033 #ifdef CONFIG_LINUX
1034 static void sigbus_reraise(void)
1035 {
1036 sigset_t set;
1037 struct sigaction action;
1038
1039 memset(&action, 0, sizeof(action));
1040 action.sa_handler = SIG_DFL;
1041 if (!sigaction(SIGBUS, &action, NULL)) {
1042 raise(SIGBUS);
1043 sigemptyset(&set);
1044 sigaddset(&set, SIGBUS);
1045 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1046 }
1047 perror("Failed to re-raise SIGBUS!\n");
1048 abort();
1049 }
1050
1051 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1052 {
1053 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1054 sigbus_reraise();
1055 }
1056
1057 if (current_cpu) {
1058 /* Called asynchronously in VCPU thread. */
1059 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1060 sigbus_reraise();
1061 }
1062 } else {
1063 /* Called synchronously (via signalfd) in main thread. */
1064 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1065 sigbus_reraise();
1066 }
1067 }
1068 }
1069
1070 static void qemu_init_sigbus(void)
1071 {
1072 struct sigaction action;
1073
1074 memset(&action, 0, sizeof(action));
1075 action.sa_flags = SA_SIGINFO;
1076 action.sa_sigaction = sigbus_handler;
1077 sigaction(SIGBUS, &action, NULL);
1078
1079 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1080 }
1081 #else /* !CONFIG_LINUX */
1082 static void qemu_init_sigbus(void)
1083 {
1084 }
1085 #endif /* !CONFIG_LINUX */
1086
1087 static QemuMutex qemu_global_mutex;
1088
1089 static QemuThread io_thread;
1090
1091 /* cpu creation */
1092 static QemuCond qemu_cpu_cond;
1093 /* system init */
1094 static QemuCond qemu_pause_cond;
1095
1096 void qemu_init_cpu_loop(void)
1097 {
1098 qemu_init_sigbus();
1099 qemu_cond_init(&qemu_cpu_cond);
1100 qemu_cond_init(&qemu_pause_cond);
1101 qemu_mutex_init(&qemu_global_mutex);
1102
1103 qemu_thread_get_self(&io_thread);
1104 }
1105
1106 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1107 {
1108 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1109 }
1110
1111 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1112 {
1113 if (kvm_destroy_vcpu(cpu) < 0) {
1114 error_report("kvm_destroy_vcpu failed");
1115 exit(EXIT_FAILURE);
1116 }
1117 }
1118
1119 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1120 {
1121 }
1122
1123 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1124 {
1125 g_assert(qemu_cpu_is_self(cpu));
1126 cpu->stop = false;
1127 cpu->stopped = true;
1128 if (exit) {
1129 cpu_exit(cpu);
1130 }
1131 qemu_cond_broadcast(&qemu_pause_cond);
1132 }
1133
1134 static void qemu_wait_io_event_common(CPUState *cpu)
1135 {
1136 atomic_mb_set(&cpu->thread_kicked, false);
1137 if (cpu->stop) {
1138 qemu_cpu_stop(cpu, false);
1139 }
1140 process_queued_cpu_work(cpu);
1141 }
1142
1143 static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
1144 {
1145 while (all_cpu_threads_idle()) {
1146 stop_tcg_kick_timer();
1147 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1148 }
1149
1150 start_tcg_kick_timer();
1151
1152 qemu_wait_io_event_common(cpu);
1153 }
1154
1155 static void qemu_wait_io_event(CPUState *cpu)
1156 {
1157 while (cpu_thread_is_idle(cpu)) {
1158 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1159 }
1160
1161 #ifdef _WIN32
1162 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1163 if (!tcg_enabled()) {
1164 SleepEx(0, TRUE);
1165 }
1166 #endif
1167 qemu_wait_io_event_common(cpu);
1168 }
1169
1170 static void *qemu_kvm_cpu_thread_fn(void *arg)
1171 {
1172 CPUState *cpu = arg;
1173 int r;
1174
1175 rcu_register_thread();
1176
1177 qemu_mutex_lock_iothread();
1178 qemu_thread_get_self(cpu->thread);
1179 cpu->thread_id = qemu_get_thread_id();
1180 cpu->can_do_io = 1;
1181 current_cpu = cpu;
1182
1183 r = kvm_init_vcpu(cpu);
1184 if (r < 0) {
1185 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1186 exit(1);
1187 }
1188
1189 kvm_init_cpu_signals(cpu);
1190
1191 /* signal CPU creation */
1192 cpu->created = true;
1193 qemu_cond_signal(&qemu_cpu_cond);
1194
1195 do {
1196 if (cpu_can_run(cpu)) {
1197 r = kvm_cpu_exec(cpu);
1198 if (r == EXCP_DEBUG) {
1199 cpu_handle_guest_debug(cpu);
1200 }
1201 }
1202 qemu_wait_io_event(cpu);
1203 } while (!cpu->unplug || cpu_can_run(cpu));
1204
1205 qemu_kvm_destroy_vcpu(cpu);
1206 cpu->created = false;
1207 qemu_cond_signal(&qemu_cpu_cond);
1208 qemu_mutex_unlock_iothread();
1209 rcu_unregister_thread();
1210 return NULL;
1211 }
1212
1213 static void *qemu_dummy_cpu_thread_fn(void *arg)
1214 {
1215 #ifdef _WIN32
1216 error_report("qtest is not supported under Windows");
1217 exit(1);
1218 #else
1219 CPUState *cpu = arg;
1220 sigset_t waitset;
1221 int r;
1222
1223 rcu_register_thread();
1224
1225 qemu_mutex_lock_iothread();
1226 qemu_thread_get_self(cpu->thread);
1227 cpu->thread_id = qemu_get_thread_id();
1228 cpu->can_do_io = 1;
1229 current_cpu = cpu;
1230
1231 sigemptyset(&waitset);
1232 sigaddset(&waitset, SIG_IPI);
1233
1234 /* signal CPU creation */
1235 cpu->created = true;
1236 qemu_cond_signal(&qemu_cpu_cond);
1237
1238 do {
1239 qemu_mutex_unlock_iothread();
1240 do {
1241 int sig;
1242 r = sigwait(&waitset, &sig);
1243 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1244 if (r == -1) {
1245 perror("sigwait");
1246 exit(1);
1247 }
1248 qemu_mutex_lock_iothread();
1249 qemu_wait_io_event(cpu);
1250 } while (!cpu->unplug);
1251
1252 rcu_unregister_thread();
1253 return NULL;
1254 #endif
1255 }
1256
1257 static int64_t tcg_get_icount_limit(void)
1258 {
1259 int64_t deadline;
1260
1261 if (replay_mode != REPLAY_MODE_PLAY) {
1262 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1263
1264 /* Maintain prior (possibly buggy) behaviour where if no deadline
1265 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1266 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1267 * nanoseconds.
1268 */
1269 if ((deadline < 0) || (deadline > INT32_MAX)) {
1270 deadline = INT32_MAX;
1271 }
1272
1273 return qemu_icount_round(deadline);
1274 } else {
1275 return replay_get_instructions();
1276 }
1277 }
1278
1279 static void handle_icount_deadline(void)
1280 {
1281 assert(qemu_in_vcpu_thread());
1282 if (use_icount) {
1283 int64_t deadline =
1284 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1285
1286 if (deadline == 0) {
1287 /* Wake up other AioContexts. */
1288 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1289 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1290 }
1291 }
1292 }
1293
1294 static void prepare_icount_for_run(CPUState *cpu)
1295 {
1296 if (use_icount) {
1297 int insns_left;
1298
1299 /* These should always be cleared by process_icount_data after
1300 * each vCPU execution. However u16.high can be raised
1301 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1302 */
1303 g_assert(cpu->icount_decr.u16.low == 0);
1304 g_assert(cpu->icount_extra == 0);
1305
1306 cpu->icount_budget = tcg_get_icount_limit();
1307 insns_left = MIN(0xffff, cpu->icount_budget);
1308 cpu->icount_decr.u16.low = insns_left;
1309 cpu->icount_extra = cpu->icount_budget - insns_left;
1310 }
1311 }
1312
1313 static void process_icount_data(CPUState *cpu)
1314 {
1315 if (use_icount) {
1316 /* Account for executed instructions */
1317 cpu_update_icount(cpu);
1318
1319 /* Reset the counters */
1320 cpu->icount_decr.u16.low = 0;
1321 cpu->icount_extra = 0;
1322 cpu->icount_budget = 0;
1323
1324 replay_account_executed_instructions();
1325 }
1326 }
1327
1328
1329 static int tcg_cpu_exec(CPUState *cpu)
1330 {
1331 int ret;
1332 #ifdef CONFIG_PROFILER
1333 int64_t ti;
1334 #endif
1335
1336 #ifdef CONFIG_PROFILER
1337 ti = profile_getclock();
1338 #endif
1339 qemu_mutex_unlock_iothread();
1340 cpu_exec_start(cpu);
1341 ret = cpu_exec(cpu);
1342 cpu_exec_end(cpu);
1343 qemu_mutex_lock_iothread();
1344 #ifdef CONFIG_PROFILER
1345 tcg_time += profile_getclock() - ti;
1346 #endif
1347 return ret;
1348 }
1349
1350 /* Destroy any remaining vCPUs which have been unplugged and have
1351 * finished running
1352 */
1353 static void deal_with_unplugged_cpus(void)
1354 {
1355 CPUState *cpu;
1356
1357 CPU_FOREACH(cpu) {
1358 if (cpu->unplug && !cpu_can_run(cpu)) {
1359 qemu_tcg_destroy_vcpu(cpu);
1360 cpu->created = false;
1361 qemu_cond_signal(&qemu_cpu_cond);
1362 break;
1363 }
1364 }
1365 }
1366
1367 /* Single-threaded TCG
1368 *
1369 * In the single-threaded case each vCPU is simulated in turn. If
1370 * there is more than a single vCPU we create a simple timer to kick
1371 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1372 * This is done explicitly rather than relying on side-effects
1373 * elsewhere.
1374 */
1375
1376 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1377 {
1378 CPUState *cpu = arg;
1379
1380 rcu_register_thread();
1381 tcg_register_thread();
1382
1383 qemu_mutex_lock_iothread();
1384 qemu_thread_get_self(cpu->thread);
1385
1386 cpu->thread_id = qemu_get_thread_id();
1387 cpu->created = true;
1388 cpu->can_do_io = 1;
1389 qemu_cond_signal(&qemu_cpu_cond);
1390
1391 /* wait for initial kick-off after machine start */
1392 while (first_cpu->stopped) {
1393 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1394
1395 /* process any pending work */
1396 CPU_FOREACH(cpu) {
1397 current_cpu = cpu;
1398 qemu_wait_io_event_common(cpu);
1399 }
1400 }
1401
1402 start_tcg_kick_timer();
1403
1404 cpu = first_cpu;
1405
1406 /* process any pending work */
1407 cpu->exit_request = 1;
1408
1409 while (1) {
1410 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1411 qemu_account_warp_timer();
1412
1413 /* Run the timers here. This is much more efficient than
1414 * waking up the I/O thread and waiting for completion.
1415 */
1416 handle_icount_deadline();
1417
1418 if (!cpu) {
1419 cpu = first_cpu;
1420 }
1421
1422 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1423
1424 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1425 current_cpu = cpu;
1426
1427 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1428 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1429
1430 if (cpu_can_run(cpu)) {
1431 int r;
1432
1433 prepare_icount_for_run(cpu);
1434
1435 r = tcg_cpu_exec(cpu);
1436
1437 process_icount_data(cpu);
1438
1439 if (r == EXCP_DEBUG) {
1440 cpu_handle_guest_debug(cpu);
1441 break;
1442 } else if (r == EXCP_ATOMIC) {
1443 qemu_mutex_unlock_iothread();
1444 cpu_exec_step_atomic(cpu);
1445 qemu_mutex_lock_iothread();
1446 break;
1447 }
1448 } else if (cpu->stop) {
1449 if (cpu->unplug) {
1450 cpu = CPU_NEXT(cpu);
1451 }
1452 break;
1453 }
1454
1455 cpu = CPU_NEXT(cpu);
1456 } /* while (cpu && !cpu->exit_request).. */
1457
1458 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1459 atomic_set(&tcg_current_rr_cpu, NULL);
1460
1461 if (cpu && cpu->exit_request) {
1462 atomic_mb_set(&cpu->exit_request, 0);
1463 }
1464
1465 qemu_tcg_rr_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1466 deal_with_unplugged_cpus();
1467 }
1468
1469 rcu_unregister_thread();
1470 return NULL;
1471 }
1472
1473 static void *qemu_hax_cpu_thread_fn(void *arg)
1474 {
1475 CPUState *cpu = arg;
1476 int r;
1477
1478 rcu_register_thread();
1479 qemu_mutex_lock_iothread();
1480 qemu_thread_get_self(cpu->thread);
1481
1482 cpu->thread_id = qemu_get_thread_id();
1483 cpu->created = true;
1484 cpu->halted = 0;
1485 current_cpu = cpu;
1486
1487 hax_init_vcpu(cpu);
1488 qemu_cond_signal(&qemu_cpu_cond);
1489
1490 do {
1491 if (cpu_can_run(cpu)) {
1492 r = hax_smp_cpu_exec(cpu);
1493 if (r == EXCP_DEBUG) {
1494 cpu_handle_guest_debug(cpu);
1495 }
1496 }
1497
1498 qemu_wait_io_event(cpu);
1499 } while (!cpu->unplug || cpu_can_run(cpu));
1500 rcu_unregister_thread();
1501 return NULL;
1502 }
1503
1504 /* The HVF-specific vCPU thread function. This one should only run when the host
1505 * CPU supports the VMX "unrestricted guest" feature. */
1506 static void *qemu_hvf_cpu_thread_fn(void *arg)
1507 {
1508 CPUState *cpu = arg;
1509
1510 int r;
1511
1512 assert(hvf_enabled());
1513
1514 rcu_register_thread();
1515
1516 qemu_mutex_lock_iothread();
1517 qemu_thread_get_self(cpu->thread);
1518
1519 cpu->thread_id = qemu_get_thread_id();
1520 cpu->can_do_io = 1;
1521 current_cpu = cpu;
1522
1523 hvf_init_vcpu(cpu);
1524
1525 /* signal CPU creation */
1526 cpu->created = true;
1527 qemu_cond_signal(&qemu_cpu_cond);
1528
1529 do {
1530 if (cpu_can_run(cpu)) {
1531 r = hvf_vcpu_exec(cpu);
1532 if (r == EXCP_DEBUG) {
1533 cpu_handle_guest_debug(cpu);
1534 }
1535 }
1536 qemu_wait_io_event(cpu);
1537 } while (!cpu->unplug || cpu_can_run(cpu));
1538
1539 hvf_vcpu_destroy(cpu);
1540 cpu->created = false;
1541 qemu_cond_signal(&qemu_cpu_cond);
1542 qemu_mutex_unlock_iothread();
1543 rcu_unregister_thread();
1544 return NULL;
1545 }
1546
1547 static void *qemu_whpx_cpu_thread_fn(void *arg)
1548 {
1549 CPUState *cpu = arg;
1550 int r;
1551
1552 rcu_register_thread();
1553
1554 qemu_mutex_lock_iothread();
1555 qemu_thread_get_self(cpu->thread);
1556 cpu->thread_id = qemu_get_thread_id();
1557 current_cpu = cpu;
1558
1559 r = whpx_init_vcpu(cpu);
1560 if (r < 0) {
1561 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1562 exit(1);
1563 }
1564
1565 /* signal CPU creation */
1566 cpu->created = true;
1567 qemu_cond_signal(&qemu_cpu_cond);
1568
1569 do {
1570 if (cpu_can_run(cpu)) {
1571 r = whpx_vcpu_exec(cpu);
1572 if (r == EXCP_DEBUG) {
1573 cpu_handle_guest_debug(cpu);
1574 }
1575 }
1576 while (cpu_thread_is_idle(cpu)) {
1577 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1578 }
1579 qemu_wait_io_event_common(cpu);
1580 } while (!cpu->unplug || cpu_can_run(cpu));
1581
1582 whpx_destroy_vcpu(cpu);
1583 cpu->created = false;
1584 qemu_cond_signal(&qemu_cpu_cond);
1585 qemu_mutex_unlock_iothread();
1586 rcu_unregister_thread();
1587 return NULL;
1588 }
1589
1590 #ifdef _WIN32
1591 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1592 {
1593 }
1594 #endif
1595
1596 /* Multi-threaded TCG
1597 *
1598 * In the multi-threaded case each vCPU has its own thread. The TLS
1599 * variable current_cpu can be used deep in the code to find the
1600 * current CPUState for a given thread.
1601 */
1602
1603 static void *qemu_tcg_cpu_thread_fn(void *arg)
1604 {
1605 CPUState *cpu = arg;
1606
1607 g_assert(!use_icount);
1608
1609 rcu_register_thread();
1610 tcg_register_thread();
1611
1612 qemu_mutex_lock_iothread();
1613 qemu_thread_get_self(cpu->thread);
1614
1615 cpu->thread_id = qemu_get_thread_id();
1616 cpu->created = true;
1617 cpu->can_do_io = 1;
1618 current_cpu = cpu;
1619 qemu_cond_signal(&qemu_cpu_cond);
1620
1621 /* process any pending work */
1622 cpu->exit_request = 1;
1623
1624 while (1) {
1625 if (cpu_can_run(cpu)) {
1626 int r;
1627 r = tcg_cpu_exec(cpu);
1628 switch (r) {
1629 case EXCP_DEBUG:
1630 cpu_handle_guest_debug(cpu);
1631 break;
1632 case EXCP_HALTED:
1633 /* during start-up the vCPU is reset and the thread is
1634 * kicked several times. If we don't ensure we go back
1635 * to sleep in the halted state we won't cleanly
1636 * start-up when the vCPU is enabled.
1637 *
1638 * cpu->halted should ensure we sleep in wait_io_event
1639 */
1640 g_assert(cpu->halted);
1641 break;
1642 case EXCP_ATOMIC:
1643 qemu_mutex_unlock_iothread();
1644 cpu_exec_step_atomic(cpu);
1645 qemu_mutex_lock_iothread();
1646 default:
1647 /* Ignore everything else? */
1648 break;
1649 }
1650 }
1651
1652 atomic_mb_set(&cpu->exit_request, 0);
1653 qemu_wait_io_event(cpu);
1654 } while (!cpu->unplug || cpu_can_run(cpu));
1655
1656 qemu_tcg_destroy_vcpu(cpu);
1657 cpu->created = false;
1658 qemu_cond_signal(&qemu_cpu_cond);
1659 qemu_mutex_unlock_iothread();
1660 rcu_unregister_thread();
1661 return NULL;
1662 }
1663
1664 static void qemu_cpu_kick_thread(CPUState *cpu)
1665 {
1666 #ifndef _WIN32
1667 int err;
1668
1669 if (cpu->thread_kicked) {
1670 return;
1671 }
1672 cpu->thread_kicked = true;
1673 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1674 if (err) {
1675 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1676 exit(1);
1677 }
1678 #else /* _WIN32 */
1679 if (!qemu_cpu_is_self(cpu)) {
1680 if (whpx_enabled()) {
1681 whpx_vcpu_kick(cpu);
1682 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1683 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1684 __func__, GetLastError());
1685 exit(1);
1686 }
1687 }
1688 #endif
1689 }
1690
1691 void qemu_cpu_kick(CPUState *cpu)
1692 {
1693 qemu_cond_broadcast(cpu->halt_cond);
1694 if (tcg_enabled()) {
1695 cpu_exit(cpu);
1696 /* NOP unless doing single-thread RR */
1697 qemu_cpu_kick_rr_cpu();
1698 } else {
1699 if (hax_enabled()) {
1700 /*
1701 * FIXME: race condition with the exit_request check in
1702 * hax_vcpu_hax_exec
1703 */
1704 cpu->exit_request = 1;
1705 }
1706 qemu_cpu_kick_thread(cpu);
1707 }
1708 }
1709
1710 void qemu_cpu_kick_self(void)
1711 {
1712 assert(current_cpu);
1713 qemu_cpu_kick_thread(current_cpu);
1714 }
1715
1716 bool qemu_cpu_is_self(CPUState *cpu)
1717 {
1718 return qemu_thread_is_self(cpu->thread);
1719 }
1720
1721 bool qemu_in_vcpu_thread(void)
1722 {
1723 return current_cpu && qemu_cpu_is_self(current_cpu);
1724 }
1725
1726 static __thread bool iothread_locked = false;
1727
1728 bool qemu_mutex_iothread_locked(void)
1729 {
1730 return iothread_locked;
1731 }
1732
1733 void qemu_mutex_lock_iothread(void)
1734 {
1735 g_assert(!qemu_mutex_iothread_locked());
1736 qemu_mutex_lock(&qemu_global_mutex);
1737 iothread_locked = true;
1738 }
1739
1740 void qemu_mutex_unlock_iothread(void)
1741 {
1742 g_assert(qemu_mutex_iothread_locked());
1743 iothread_locked = false;
1744 qemu_mutex_unlock(&qemu_global_mutex);
1745 }
1746
1747 static bool all_vcpus_paused(void)
1748 {
1749 CPUState *cpu;
1750
1751 CPU_FOREACH(cpu) {
1752 if (!cpu->stopped) {
1753 return false;
1754 }
1755 }
1756
1757 return true;
1758 }
1759
1760 void pause_all_vcpus(void)
1761 {
1762 CPUState *cpu;
1763
1764 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1765 CPU_FOREACH(cpu) {
1766 if (qemu_cpu_is_self(cpu)) {
1767 qemu_cpu_stop(cpu, true);
1768 } else {
1769 cpu->stop = true;
1770 qemu_cpu_kick(cpu);
1771 }
1772 }
1773
1774 while (!all_vcpus_paused()) {
1775 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1776 CPU_FOREACH(cpu) {
1777 qemu_cpu_kick(cpu);
1778 }
1779 }
1780 }
1781
1782 void cpu_resume(CPUState *cpu)
1783 {
1784 cpu->stop = false;
1785 cpu->stopped = false;
1786 qemu_cpu_kick(cpu);
1787 }
1788
1789 void resume_all_vcpus(void)
1790 {
1791 CPUState *cpu;
1792
1793 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1794 CPU_FOREACH(cpu) {
1795 cpu_resume(cpu);
1796 }
1797 }
1798
1799 void cpu_remove_sync(CPUState *cpu)
1800 {
1801 cpu->stop = true;
1802 cpu->unplug = true;
1803 qemu_cpu_kick(cpu);
1804 qemu_mutex_unlock_iothread();
1805 qemu_thread_join(cpu->thread);
1806 qemu_mutex_lock_iothread();
1807 }
1808
1809 /* For temporary buffers for forming a name */
1810 #define VCPU_THREAD_NAME_SIZE 16
1811
1812 static void qemu_tcg_init_vcpu(CPUState *cpu)
1813 {
1814 char thread_name[VCPU_THREAD_NAME_SIZE];
1815 static QemuCond *single_tcg_halt_cond;
1816 static QemuThread *single_tcg_cpu_thread;
1817 static int tcg_region_inited;
1818
1819 /*
1820 * Initialize TCG regions--once. Now is a good time, because:
1821 * (1) TCG's init context, prologue and target globals have been set up.
1822 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1823 * -accel flag is processed, so the check doesn't work then).
1824 */
1825 if (!tcg_region_inited) {
1826 tcg_region_inited = 1;
1827 tcg_region_init();
1828 }
1829
1830 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1831 cpu->thread = g_malloc0(sizeof(QemuThread));
1832 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1833 qemu_cond_init(cpu->halt_cond);
1834
1835 if (qemu_tcg_mttcg_enabled()) {
1836 /* create a thread per vCPU with TCG (MTTCG) */
1837 parallel_cpus = true;
1838 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1839 cpu->cpu_index);
1840
1841 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1842 cpu, QEMU_THREAD_JOINABLE);
1843
1844 } else {
1845 /* share a single thread for all cpus with TCG */
1846 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1847 qemu_thread_create(cpu->thread, thread_name,
1848 qemu_tcg_rr_cpu_thread_fn,
1849 cpu, QEMU_THREAD_JOINABLE);
1850
1851 single_tcg_halt_cond = cpu->halt_cond;
1852 single_tcg_cpu_thread = cpu->thread;
1853 }
1854 #ifdef _WIN32
1855 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1856 #endif
1857 } else {
1858 /* For non-MTTCG cases we share the thread */
1859 cpu->thread = single_tcg_cpu_thread;
1860 cpu->halt_cond = single_tcg_halt_cond;
1861 cpu->thread_id = first_cpu->thread_id;
1862 cpu->can_do_io = 1;
1863 cpu->created = true;
1864 }
1865 }
1866
1867 static void qemu_hax_start_vcpu(CPUState *cpu)
1868 {
1869 char thread_name[VCPU_THREAD_NAME_SIZE];
1870
1871 cpu->thread = g_malloc0(sizeof(QemuThread));
1872 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1873 qemu_cond_init(cpu->halt_cond);
1874
1875 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1876 cpu->cpu_index);
1877 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1878 cpu, QEMU_THREAD_JOINABLE);
1879 #ifdef _WIN32
1880 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1881 #endif
1882 }
1883
1884 static void qemu_kvm_start_vcpu(CPUState *cpu)
1885 {
1886 char thread_name[VCPU_THREAD_NAME_SIZE];
1887
1888 cpu->thread = g_malloc0(sizeof(QemuThread));
1889 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1890 qemu_cond_init(cpu->halt_cond);
1891 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1892 cpu->cpu_index);
1893 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1894 cpu, QEMU_THREAD_JOINABLE);
1895 }
1896
1897 static void qemu_hvf_start_vcpu(CPUState *cpu)
1898 {
1899 char thread_name[VCPU_THREAD_NAME_SIZE];
1900
1901 /* HVF currently does not support TCG, and only runs in
1902 * unrestricted-guest mode. */
1903 assert(hvf_enabled());
1904
1905 cpu->thread = g_malloc0(sizeof(QemuThread));
1906 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1907 qemu_cond_init(cpu->halt_cond);
1908
1909 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1910 cpu->cpu_index);
1911 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1912 cpu, QEMU_THREAD_JOINABLE);
1913 }
1914
1915 static void qemu_whpx_start_vcpu(CPUState *cpu)
1916 {
1917 char thread_name[VCPU_THREAD_NAME_SIZE];
1918
1919 cpu->thread = g_malloc0(sizeof(QemuThread));
1920 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1921 qemu_cond_init(cpu->halt_cond);
1922 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
1923 cpu->cpu_index);
1924 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
1925 cpu, QEMU_THREAD_JOINABLE);
1926 #ifdef _WIN32
1927 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1928 #endif
1929 }
1930
1931 static void qemu_dummy_start_vcpu(CPUState *cpu)
1932 {
1933 char thread_name[VCPU_THREAD_NAME_SIZE];
1934
1935 cpu->thread = g_malloc0(sizeof(QemuThread));
1936 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1937 qemu_cond_init(cpu->halt_cond);
1938 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1939 cpu->cpu_index);
1940 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1941 QEMU_THREAD_JOINABLE);
1942 }
1943
1944 void qemu_init_vcpu(CPUState *cpu)
1945 {
1946 cpu->nr_cores = smp_cores;
1947 cpu->nr_threads = smp_threads;
1948 cpu->stopped = true;
1949
1950 if (!cpu->as) {
1951 /* If the target cpu hasn't set up any address spaces itself,
1952 * give it the default one.
1953 */
1954 cpu->num_ases = 1;
1955 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1956 }
1957
1958 if (kvm_enabled()) {
1959 qemu_kvm_start_vcpu(cpu);
1960 } else if (hax_enabled()) {
1961 qemu_hax_start_vcpu(cpu);
1962 } else if (hvf_enabled()) {
1963 qemu_hvf_start_vcpu(cpu);
1964 } else if (tcg_enabled()) {
1965 qemu_tcg_init_vcpu(cpu);
1966 } else if (whpx_enabled()) {
1967 qemu_whpx_start_vcpu(cpu);
1968 } else {
1969 qemu_dummy_start_vcpu(cpu);
1970 }
1971
1972 while (!cpu->created) {
1973 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1974 }
1975 }
1976
1977 void cpu_stop_current(void)
1978 {
1979 if (current_cpu) {
1980 qemu_cpu_stop(current_cpu, true);
1981 }
1982 }
1983
1984 int vm_stop(RunState state)
1985 {
1986 if (qemu_in_vcpu_thread()) {
1987 qemu_system_vmstop_request_prepare();
1988 qemu_system_vmstop_request(state);
1989 /*
1990 * FIXME: should not return to device code in case
1991 * vm_stop() has been requested.
1992 */
1993 cpu_stop_current();
1994 return 0;
1995 }
1996
1997 return do_vm_stop(state);
1998 }
1999
2000 /**
2001 * Prepare for (re)starting the VM.
2002 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2003 * running or in case of an error condition), 0 otherwise.
2004 */
2005 int vm_prepare_start(void)
2006 {
2007 RunState requested;
2008 int res = 0;
2009
2010 qemu_vmstop_requested(&requested);
2011 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2012 return -1;
2013 }
2014
2015 /* Ensure that a STOP/RESUME pair of events is emitted if a
2016 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2017 * example, according to documentation is always followed by
2018 * the STOP event.
2019 */
2020 if (runstate_is_running()) {
2021 qapi_event_send_stop(&error_abort);
2022 res = -1;
2023 } else {
2024 replay_enable_events();
2025 cpu_enable_ticks();
2026 runstate_set(RUN_STATE_RUNNING);
2027 vm_state_notify(1, RUN_STATE_RUNNING);
2028 }
2029
2030 /* We are sending this now, but the CPUs will be resumed shortly later */
2031 qapi_event_send_resume(&error_abort);
2032 return res;
2033 }
2034
2035 void vm_start(void)
2036 {
2037 if (!vm_prepare_start()) {
2038 resume_all_vcpus();
2039 }
2040 }
2041
2042 /* does a state transition even if the VM is already stopped,
2043 current state is forgotten forever */
2044 int vm_stop_force_state(RunState state)
2045 {
2046 if (runstate_is_running()) {
2047 return vm_stop(state);
2048 } else {
2049 runstate_set(state);
2050
2051 bdrv_drain_all();
2052 /* Make sure to return an error if the flush in a previous vm_stop()
2053 * failed. */
2054 return bdrv_flush_all();
2055 }
2056 }
2057
2058 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2059 {
2060 /* XXX: implement xxx_cpu_list for targets that still miss it */
2061 #if defined(cpu_list)
2062 cpu_list(f, cpu_fprintf);
2063 #endif
2064 }
2065
2066 CpuInfoList *qmp_query_cpus(Error **errp)
2067 {
2068 MachineState *ms = MACHINE(qdev_get_machine());
2069 MachineClass *mc = MACHINE_GET_CLASS(ms);
2070 CpuInfoList *head = NULL, *cur_item = NULL;
2071 CPUState *cpu;
2072
2073 CPU_FOREACH(cpu) {
2074 CpuInfoList *info;
2075 #if defined(TARGET_I386)
2076 X86CPU *x86_cpu = X86_CPU(cpu);
2077 CPUX86State *env = &x86_cpu->env;
2078 #elif defined(TARGET_PPC)
2079 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2080 CPUPPCState *env = &ppc_cpu->env;
2081 #elif defined(TARGET_SPARC)
2082 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2083 CPUSPARCState *env = &sparc_cpu->env;
2084 #elif defined(TARGET_MIPS)
2085 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2086 CPUMIPSState *env = &mips_cpu->env;
2087 #elif defined(TARGET_TRICORE)
2088 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2089 CPUTriCoreState *env = &tricore_cpu->env;
2090 #elif defined(TARGET_S390X)
2091 S390CPU *s390_cpu = S390_CPU(cpu);
2092 CPUS390XState *env = &s390_cpu->env;
2093 #endif
2094
2095 cpu_synchronize_state(cpu);
2096
2097 info = g_malloc0(sizeof(*info));
2098 info->value = g_malloc0(sizeof(*info->value));
2099 info->value->CPU = cpu->cpu_index;
2100 info->value->current = (cpu == first_cpu);
2101 info->value->halted = cpu->halted;
2102 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2103 info->value->thread_id = cpu->thread_id;
2104 #if defined(TARGET_I386)
2105 info->value->arch = CPU_INFO_ARCH_X86;
2106 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2107 #elif defined(TARGET_PPC)
2108 info->value->arch = CPU_INFO_ARCH_PPC;
2109 info->value->u.ppc.nip = env->nip;
2110 #elif defined(TARGET_SPARC)
2111 info->value->arch = CPU_INFO_ARCH_SPARC;
2112 info->value->u.q_sparc.pc = env->pc;
2113 info->value->u.q_sparc.npc = env->npc;
2114 #elif defined(TARGET_MIPS)
2115 info->value->arch = CPU_INFO_ARCH_MIPS;
2116 info->value->u.q_mips.PC = env->active_tc.PC;
2117 #elif defined(TARGET_TRICORE)
2118 info->value->arch = CPU_INFO_ARCH_TRICORE;
2119 info->value->u.tricore.PC = env->PC;
2120 #elif defined(TARGET_S390X)
2121 info->value->arch = CPU_INFO_ARCH_S390;
2122 info->value->u.s390.cpu_state = env->cpu_state;
2123 #else
2124 info->value->arch = CPU_INFO_ARCH_OTHER;
2125 #endif
2126 info->value->has_props = !!mc->cpu_index_to_instance_props;
2127 if (info->value->has_props) {
2128 CpuInstanceProperties *props;
2129 props = g_malloc0(sizeof(*props));
2130 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2131 info->value->props = props;
2132 }
2133
2134 /* XXX: waiting for the qapi to support GSList */
2135 if (!cur_item) {
2136 head = cur_item = info;
2137 } else {
2138 cur_item->next = info;
2139 cur_item = info;
2140 }
2141 }
2142
2143 return head;
2144 }
2145
2146 /*
2147 * fast means: we NEVER interrupt vCPU threads to retrieve
2148 * information from KVM.
2149 */
2150 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2151 {
2152 MachineState *ms = MACHINE(qdev_get_machine());
2153 MachineClass *mc = MACHINE_GET_CLASS(ms);
2154 CpuInfoFastList *head = NULL, *cur_item = NULL;
2155 CPUState *cpu;
2156 #if defined(TARGET_S390X)
2157 S390CPU *s390_cpu;
2158 CPUS390XState *env;
2159 #endif
2160
2161 CPU_FOREACH(cpu) {
2162 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2163 info->value = g_malloc0(sizeof(*info->value));
2164
2165 info->value->cpu_index = cpu->cpu_index;
2166 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2167 info->value->thread_id = cpu->thread_id;
2168
2169 info->value->has_props = !!mc->cpu_index_to_instance_props;
2170 if (info->value->has_props) {
2171 CpuInstanceProperties *props;
2172 props = g_malloc0(sizeof(*props));
2173 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2174 info->value->props = props;
2175 }
2176
2177 #if defined(TARGET_S390X)
2178 s390_cpu = S390_CPU(cpu);
2179 env = &s390_cpu->env;
2180 info->value->arch = CPU_INFO_ARCH_S390;
2181 info->value->u.s390.cpu_state = env->cpu_state;
2182 #endif
2183 if (!cur_item) {
2184 head = cur_item = info;
2185 } else {
2186 cur_item->next = info;
2187 cur_item = info;
2188 }
2189 }
2190
2191 return head;
2192 }
2193
2194 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2195 bool has_cpu, int64_t cpu_index, Error **errp)
2196 {
2197 FILE *f;
2198 uint32_t l;
2199 CPUState *cpu;
2200 uint8_t buf[1024];
2201 int64_t orig_addr = addr, orig_size = size;
2202
2203 if (!has_cpu) {
2204 cpu_index = 0;
2205 }
2206
2207 cpu = qemu_get_cpu(cpu_index);
2208 if (cpu == NULL) {
2209 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2210 "a CPU number");
2211 return;
2212 }
2213
2214 f = fopen(filename, "wb");
2215 if (!f) {
2216 error_setg_file_open(errp, errno, filename);
2217 return;
2218 }
2219
2220 while (size != 0) {
2221 l = sizeof(buf);
2222 if (l > size)
2223 l = size;
2224 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2225 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2226 " specified", orig_addr, orig_size);
2227 goto exit;
2228 }
2229 if (fwrite(buf, 1, l, f) != l) {
2230 error_setg(errp, QERR_IO_ERROR);
2231 goto exit;
2232 }
2233 addr += l;
2234 size -= l;
2235 }
2236
2237 exit:
2238 fclose(f);
2239 }
2240
2241 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2242 Error **errp)
2243 {
2244 FILE *f;
2245 uint32_t l;
2246 uint8_t buf[1024];
2247
2248 f = fopen(filename, "wb");
2249 if (!f) {
2250 error_setg_file_open(errp, errno, filename);
2251 return;
2252 }
2253
2254 while (size != 0) {
2255 l = sizeof(buf);
2256 if (l > size)
2257 l = size;
2258 cpu_physical_memory_read(addr, buf, l);
2259 if (fwrite(buf, 1, l, f) != l) {
2260 error_setg(errp, QERR_IO_ERROR);
2261 goto exit;
2262 }
2263 addr += l;
2264 size -= l;
2265 }
2266
2267 exit:
2268 fclose(f);
2269 }
2270
2271 void qmp_inject_nmi(Error **errp)
2272 {
2273 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2274 }
2275
2276 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2277 {
2278 if (!use_icount) {
2279 return;
2280 }
2281
2282 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2283 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2284 if (icount_align_option) {
2285 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2286 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2287 } else {
2288 cpu_fprintf(f, "Max guest delay NA\n");
2289 cpu_fprintf(f, "Max guest advance NA\n");
2290 }
2291 }