]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
kvm: move cpu synchronization code
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/hw_accel.h"
37 #include "sysemu/kvm.h"
38 #include "qmp-commands.h"
39 #include "exec/exec-all.h"
40
41 #include "qemu/thread.h"
42 #include "sysemu/cpus.h"
43 #include "sysemu/qtest.h"
44 #include "qemu/main-loop.h"
45 #include "qemu/bitmap.h"
46 #include "qemu/seqlock.h"
47 #include "qapi-event.h"
48 #include "hw/nmi.h"
49 #include "sysemu/replay.h"
50
51 #ifndef _WIN32
52 #include "qemu/compatfd.h"
53 #endif
54
55 #ifdef CONFIG_LINUX
56
57 #include <sys/prctl.h>
58
59 #ifndef PR_MCE_KILL
60 #define PR_MCE_KILL 33
61 #endif
62
63 #ifndef PR_MCE_KILL_SET
64 #define PR_MCE_KILL_SET 1
65 #endif
66
67 #ifndef PR_MCE_KILL_EARLY
68 #define PR_MCE_KILL_EARLY 1
69 #endif
70
71 #endif /* CONFIG_LINUX */
72
73 int64_t max_delay;
74 int64_t max_advance;
75
76 /* vcpu throttling controls */
77 static QEMUTimer *throttle_timer;
78 static unsigned int throttle_percentage;
79
80 #define CPU_THROTTLE_PCT_MIN 1
81 #define CPU_THROTTLE_PCT_MAX 99
82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83
84 bool cpu_is_stopped(CPUState *cpu)
85 {
86 return cpu->stopped || !runstate_is_running();
87 }
88
89 static bool cpu_thread_is_idle(CPUState *cpu)
90 {
91 if (cpu->stop || cpu->queued_work_first) {
92 return false;
93 }
94 if (cpu_is_stopped(cpu)) {
95 return true;
96 }
97 if (!cpu->halted || cpu_has_work(cpu) ||
98 kvm_halt_in_kernel()) {
99 return false;
100 }
101 return true;
102 }
103
104 static bool all_cpu_threads_idle(void)
105 {
106 CPUState *cpu;
107
108 CPU_FOREACH(cpu) {
109 if (!cpu_thread_is_idle(cpu)) {
110 return false;
111 }
112 }
113 return true;
114 }
115
116 /***********************************************************/
117 /* guest cycle counter */
118
119 /* Protected by TimersState seqlock */
120
121 static bool icount_sleep = true;
122 static int64_t vm_clock_warp_start = -1;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
127
128 static QEMUTimer *icount_rt_timer;
129 static QEMUTimer *icount_vm_timer;
130 static QEMUTimer *icount_warp_timer;
131
132 typedef struct TimersState {
133 /* Protected by BQL. */
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
149 } TimersState;
150
151 static TimersState timers_state;
152
153 int64_t cpu_get_icount_raw(void)
154 {
155 int64_t icount;
156 CPUState *cpu = current_cpu;
157
158 icount = timers_state.qemu_icount;
159 if (cpu) {
160 if (!cpu->can_do_io) {
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
163 }
164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
165 }
166 return icount;
167 }
168
169 /* Return the virtual CPU time, based on the instruction counter. */
170 static int64_t cpu_get_icount_locked(void)
171 {
172 int64_t icount = cpu_get_icount_raw();
173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
174 }
175
176 int64_t cpu_get_icount(void)
177 {
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187 }
188
189 int64_t cpu_icount_to_ns(int64_t icount)
190 {
191 return icount << icount_time_shift;
192 }
193
194 /* return the time elapsed in VM between vm_start and vm_stop. Unless
195 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
196 * counter.
197 *
198 * Caller must hold the BQL
199 */
200 int64_t cpu_get_ticks(void)
201 {
202 int64_t ticks;
203
204 if (use_icount) {
205 return cpu_get_icount();
206 }
207
208 ticks = timers_state.cpu_ticks_offset;
209 if (timers_state.cpu_ticks_enabled) {
210 ticks += cpu_get_host_ticks();
211 }
212
213 if (timers_state.cpu_ticks_prev > ticks) {
214 /* Note: non increasing ticks may happen if the host uses
215 software suspend */
216 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
217 ticks = timers_state.cpu_ticks_prev;
218 }
219
220 timers_state.cpu_ticks_prev = ticks;
221 return ticks;
222 }
223
224 static int64_t cpu_get_clock_locked(void)
225 {
226 int64_t time;
227
228 time = timers_state.cpu_clock_offset;
229 if (timers_state.cpu_ticks_enabled) {
230 time += get_clock();
231 }
232
233 return time;
234 }
235
236 /* Return the monotonic time elapsed in VM, i.e.,
237 * the time between vm_start and vm_stop
238 */
239 int64_t cpu_get_clock(void)
240 {
241 int64_t ti;
242 unsigned start;
243
244 do {
245 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246 ti = cpu_get_clock_locked();
247 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
248
249 return ti;
250 }
251
252 /* enable cpu_get_ticks()
253 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
254 */
255 void cpu_enable_ticks(void)
256 {
257 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
258 seqlock_write_begin(&timers_state.vm_clock_seqlock);
259 if (!timers_state.cpu_ticks_enabled) {
260 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
261 timers_state.cpu_clock_offset -= get_clock();
262 timers_state.cpu_ticks_enabled = 1;
263 }
264 seqlock_write_end(&timers_state.vm_clock_seqlock);
265 }
266
267 /* disable cpu_get_ticks() : the clock is stopped. You must not call
268 * cpu_get_ticks() after that.
269 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
270 */
271 void cpu_disable_ticks(void)
272 {
273 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
274 seqlock_write_begin(&timers_state.vm_clock_seqlock);
275 if (timers_state.cpu_ticks_enabled) {
276 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
277 timers_state.cpu_clock_offset = cpu_get_clock_locked();
278 timers_state.cpu_ticks_enabled = 0;
279 }
280 seqlock_write_end(&timers_state.vm_clock_seqlock);
281 }
282
283 /* Correlation between real and virtual time is always going to be
284 fairly approximate, so ignore small variation.
285 When the guest is idle real and virtual time will be aligned in
286 the IO wait loop. */
287 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
288
289 static void icount_adjust(void)
290 {
291 int64_t cur_time;
292 int64_t cur_icount;
293 int64_t delta;
294
295 /* Protected by TimersState mutex. */
296 static int64_t last_delta;
297
298 /* If the VM is not running, then do nothing. */
299 if (!runstate_is_running()) {
300 return;
301 }
302
303 seqlock_write_begin(&timers_state.vm_clock_seqlock);
304 cur_time = cpu_get_clock_locked();
305 cur_icount = cpu_get_icount_locked();
306
307 delta = cur_icount - cur_time;
308 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
309 if (delta > 0
310 && last_delta + ICOUNT_WOBBLE < delta * 2
311 && icount_time_shift > 0) {
312 /* The guest is getting too far ahead. Slow time down. */
313 icount_time_shift--;
314 }
315 if (delta < 0
316 && last_delta - ICOUNT_WOBBLE > delta * 2
317 && icount_time_shift < MAX_ICOUNT_SHIFT) {
318 /* The guest is getting too far behind. Speed time up. */
319 icount_time_shift++;
320 }
321 last_delta = delta;
322 timers_state.qemu_icount_bias = cur_icount
323 - (timers_state.qemu_icount << icount_time_shift);
324 seqlock_write_end(&timers_state.vm_clock_seqlock);
325 }
326
327 static void icount_adjust_rt(void *opaque)
328 {
329 timer_mod(icount_rt_timer,
330 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
331 icount_adjust();
332 }
333
334 static void icount_adjust_vm(void *opaque)
335 {
336 timer_mod(icount_vm_timer,
337 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
338 NANOSECONDS_PER_SECOND / 10);
339 icount_adjust();
340 }
341
342 static int64_t qemu_icount_round(int64_t count)
343 {
344 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
345 }
346
347 static void icount_warp_rt(void)
348 {
349 unsigned seq;
350 int64_t warp_start;
351
352 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
353 * changes from -1 to another value, so the race here is okay.
354 */
355 do {
356 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
357 warp_start = vm_clock_warp_start;
358 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
359
360 if (warp_start == -1) {
361 return;
362 }
363
364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
365 if (runstate_is_running()) {
366 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
367 cpu_get_clock_locked());
368 int64_t warp_delta;
369
370 warp_delta = clock - vm_clock_warp_start;
371 if (use_icount == 2) {
372 /*
373 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
374 * far ahead of real time.
375 */
376 int64_t cur_icount = cpu_get_icount_locked();
377 int64_t delta = clock - cur_icount;
378 warp_delta = MIN(warp_delta, delta);
379 }
380 timers_state.qemu_icount_bias += warp_delta;
381 }
382 vm_clock_warp_start = -1;
383 seqlock_write_end(&timers_state.vm_clock_seqlock);
384
385 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387 }
388 }
389
390 static void icount_timer_cb(void *opaque)
391 {
392 /* No need for a checkpoint because the timer already synchronizes
393 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
394 */
395 icount_warp_rt();
396 }
397
398 void qtest_clock_warp(int64_t dest)
399 {
400 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
401 AioContext *aio_context;
402 assert(qtest_enabled());
403 aio_context = qemu_get_aio_context();
404 while (clock < dest) {
405 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
406 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
407
408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
409 timers_state.qemu_icount_bias += warp;
410 seqlock_write_end(&timers_state.vm_clock_seqlock);
411
412 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
413 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
414 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
415 }
416 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
417 }
418
419 void qemu_start_warp_timer(void)
420 {
421 int64_t clock;
422 int64_t deadline;
423
424 if (!use_icount) {
425 return;
426 }
427
428 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
429 * do not fire, so computing the deadline does not make sense.
430 */
431 if (!runstate_is_running()) {
432 return;
433 }
434
435 /* warp clock deterministically in record/replay mode */
436 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
437 return;
438 }
439
440 if (!all_cpu_threads_idle()) {
441 return;
442 }
443
444 if (qtest_enabled()) {
445 /* When testing, qtest commands advance icount. */
446 return;
447 }
448
449 /* We want to use the earliest deadline from ALL vm_clocks */
450 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
451 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
452 if (deadline < 0) {
453 static bool notified;
454 if (!icount_sleep && !notified) {
455 error_report("WARNING: icount sleep disabled and no active timers");
456 notified = true;
457 }
458 return;
459 }
460
461 if (deadline > 0) {
462 /*
463 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
464 * sleep. Otherwise, the CPU might be waiting for a future timer
465 * interrupt to wake it up, but the interrupt never comes because
466 * the vCPU isn't running any insns and thus doesn't advance the
467 * QEMU_CLOCK_VIRTUAL.
468 */
469 if (!icount_sleep) {
470 /*
471 * We never let VCPUs sleep in no sleep icount mode.
472 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
473 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
474 * It is useful when we want a deterministic execution time,
475 * isolated from host latencies.
476 */
477 seqlock_write_begin(&timers_state.vm_clock_seqlock);
478 timers_state.qemu_icount_bias += deadline;
479 seqlock_write_end(&timers_state.vm_clock_seqlock);
480 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
481 } else {
482 /*
483 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
484 * "real" time, (related to the time left until the next event) has
485 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
486 * This avoids that the warps are visible externally; for example,
487 * you will not be sending network packets continuously instead of
488 * every 100ms.
489 */
490 seqlock_write_begin(&timers_state.vm_clock_seqlock);
491 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
492 vm_clock_warp_start = clock;
493 }
494 seqlock_write_end(&timers_state.vm_clock_seqlock);
495 timer_mod_anticipate(icount_warp_timer, clock + deadline);
496 }
497 } else if (deadline == 0) {
498 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
499 }
500 }
501
502 static void qemu_account_warp_timer(void)
503 {
504 if (!use_icount || !icount_sleep) {
505 return;
506 }
507
508 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
509 * do not fire, so computing the deadline does not make sense.
510 */
511 if (!runstate_is_running()) {
512 return;
513 }
514
515 /* warp clock deterministically in record/replay mode */
516 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
517 return;
518 }
519
520 timer_del(icount_warp_timer);
521 icount_warp_rt();
522 }
523
524 static bool icount_state_needed(void *opaque)
525 {
526 return use_icount;
527 }
528
529 /*
530 * This is a subsection for icount migration.
531 */
532 static const VMStateDescription icount_vmstate_timers = {
533 .name = "timer/icount",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .needed = icount_state_needed,
537 .fields = (VMStateField[]) {
538 VMSTATE_INT64(qemu_icount_bias, TimersState),
539 VMSTATE_INT64(qemu_icount, TimersState),
540 VMSTATE_END_OF_LIST()
541 }
542 };
543
544 static const VMStateDescription vmstate_timers = {
545 .name = "timer",
546 .version_id = 2,
547 .minimum_version_id = 1,
548 .fields = (VMStateField[]) {
549 VMSTATE_INT64(cpu_ticks_offset, TimersState),
550 VMSTATE_INT64(dummy, TimersState),
551 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
552 VMSTATE_END_OF_LIST()
553 },
554 .subsections = (const VMStateDescription*[]) {
555 &icount_vmstate_timers,
556 NULL
557 }
558 };
559
560 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
561 {
562 double pct;
563 double throttle_ratio;
564 long sleeptime_ns;
565
566 if (!cpu_throttle_get_percentage()) {
567 return;
568 }
569
570 pct = (double)cpu_throttle_get_percentage()/100;
571 throttle_ratio = pct / (1 - pct);
572 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
573
574 qemu_mutex_unlock_iothread();
575 atomic_set(&cpu->throttle_thread_scheduled, 0);
576 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
577 qemu_mutex_lock_iothread();
578 }
579
580 static void cpu_throttle_timer_tick(void *opaque)
581 {
582 CPUState *cpu;
583 double pct;
584
585 /* Stop the timer if needed */
586 if (!cpu_throttle_get_percentage()) {
587 return;
588 }
589 CPU_FOREACH(cpu) {
590 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
591 async_run_on_cpu(cpu, cpu_throttle_thread,
592 RUN_ON_CPU_NULL);
593 }
594 }
595
596 pct = (double)cpu_throttle_get_percentage()/100;
597 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
598 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
599 }
600
601 void cpu_throttle_set(int new_throttle_pct)
602 {
603 /* Ensure throttle percentage is within valid range */
604 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
605 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
606
607 atomic_set(&throttle_percentage, new_throttle_pct);
608
609 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
610 CPU_THROTTLE_TIMESLICE_NS);
611 }
612
613 void cpu_throttle_stop(void)
614 {
615 atomic_set(&throttle_percentage, 0);
616 }
617
618 bool cpu_throttle_active(void)
619 {
620 return (cpu_throttle_get_percentage() != 0);
621 }
622
623 int cpu_throttle_get_percentage(void)
624 {
625 return atomic_read(&throttle_percentage);
626 }
627
628 void cpu_ticks_init(void)
629 {
630 seqlock_init(&timers_state.vm_clock_seqlock);
631 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
632 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
633 cpu_throttle_timer_tick, NULL);
634 }
635
636 void configure_icount(QemuOpts *opts, Error **errp)
637 {
638 const char *option;
639 char *rem_str = NULL;
640
641 option = qemu_opt_get(opts, "shift");
642 if (!option) {
643 if (qemu_opt_get(opts, "align") != NULL) {
644 error_setg(errp, "Please specify shift option when using align");
645 }
646 return;
647 }
648
649 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
650 if (icount_sleep) {
651 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
652 icount_timer_cb, NULL);
653 }
654
655 icount_align_option = qemu_opt_get_bool(opts, "align", false);
656
657 if (icount_align_option && !icount_sleep) {
658 error_setg(errp, "align=on and sleep=off are incompatible");
659 }
660 if (strcmp(option, "auto") != 0) {
661 errno = 0;
662 icount_time_shift = strtol(option, &rem_str, 0);
663 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
664 error_setg(errp, "icount: Invalid shift value");
665 }
666 use_icount = 1;
667 return;
668 } else if (icount_align_option) {
669 error_setg(errp, "shift=auto and align=on are incompatible");
670 } else if (!icount_sleep) {
671 error_setg(errp, "shift=auto and sleep=off are incompatible");
672 }
673
674 use_icount = 2;
675
676 /* 125MIPS seems a reasonable initial guess at the guest speed.
677 It will be corrected fairly quickly anyway. */
678 icount_time_shift = 3;
679
680 /* Have both realtime and virtual time triggers for speed adjustment.
681 The realtime trigger catches emulated time passing too slowly,
682 the virtual time trigger catches emulated time passing too fast.
683 Realtime triggers occur even when idle, so use them less frequently
684 than VM triggers. */
685 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
686 icount_adjust_rt, NULL);
687 timer_mod(icount_rt_timer,
688 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
689 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
690 icount_adjust_vm, NULL);
691 timer_mod(icount_vm_timer,
692 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
693 NANOSECONDS_PER_SECOND / 10);
694 }
695
696 /***********************************************************/
697 void hw_error(const char *fmt, ...)
698 {
699 va_list ap;
700 CPUState *cpu;
701
702 va_start(ap, fmt);
703 fprintf(stderr, "qemu: hardware error: ");
704 vfprintf(stderr, fmt, ap);
705 fprintf(stderr, "\n");
706 CPU_FOREACH(cpu) {
707 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
708 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
709 }
710 va_end(ap);
711 abort();
712 }
713
714 void cpu_synchronize_all_states(void)
715 {
716 CPUState *cpu;
717
718 CPU_FOREACH(cpu) {
719 cpu_synchronize_state(cpu);
720 }
721 }
722
723 void cpu_synchronize_all_post_reset(void)
724 {
725 CPUState *cpu;
726
727 CPU_FOREACH(cpu) {
728 cpu_synchronize_post_reset(cpu);
729 }
730 }
731
732 void cpu_synchronize_all_post_init(void)
733 {
734 CPUState *cpu;
735
736 CPU_FOREACH(cpu) {
737 cpu_synchronize_post_init(cpu);
738 }
739 }
740
741 static int do_vm_stop(RunState state)
742 {
743 int ret = 0;
744
745 if (runstate_is_running()) {
746 cpu_disable_ticks();
747 pause_all_vcpus();
748 runstate_set(state);
749 vm_state_notify(0, state);
750 qapi_event_send_stop(&error_abort);
751 }
752
753 bdrv_drain_all();
754 replay_disable_events();
755 ret = bdrv_flush_all();
756
757 return ret;
758 }
759
760 static bool cpu_can_run(CPUState *cpu)
761 {
762 if (cpu->stop) {
763 return false;
764 }
765 if (cpu_is_stopped(cpu)) {
766 return false;
767 }
768 return true;
769 }
770
771 static void cpu_handle_guest_debug(CPUState *cpu)
772 {
773 gdb_set_stop_cpu(cpu);
774 qemu_system_debug_request();
775 cpu->stopped = true;
776 }
777
778 #ifdef CONFIG_LINUX
779 static void sigbus_reraise(void)
780 {
781 sigset_t set;
782 struct sigaction action;
783
784 memset(&action, 0, sizeof(action));
785 action.sa_handler = SIG_DFL;
786 if (!sigaction(SIGBUS, &action, NULL)) {
787 raise(SIGBUS);
788 sigemptyset(&set);
789 sigaddset(&set, SIGBUS);
790 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
791 }
792 perror("Failed to re-raise SIGBUS!\n");
793 abort();
794 }
795
796 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
797 void *ctx)
798 {
799 if (kvm_on_sigbus(siginfo->ssi_code,
800 (void *)(intptr_t)siginfo->ssi_addr)) {
801 sigbus_reraise();
802 }
803 }
804
805 static void qemu_init_sigbus(void)
806 {
807 struct sigaction action;
808
809 memset(&action, 0, sizeof(action));
810 action.sa_flags = SA_SIGINFO;
811 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
812 sigaction(SIGBUS, &action, NULL);
813
814 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
815 }
816
817 static void qemu_kvm_eat_signals(CPUState *cpu)
818 {
819 struct timespec ts = { 0, 0 };
820 siginfo_t siginfo;
821 sigset_t waitset;
822 sigset_t chkset;
823 int r;
824
825 sigemptyset(&waitset);
826 sigaddset(&waitset, SIG_IPI);
827 sigaddset(&waitset, SIGBUS);
828
829 do {
830 r = sigtimedwait(&waitset, &siginfo, &ts);
831 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
832 perror("sigtimedwait");
833 exit(1);
834 }
835
836 switch (r) {
837 case SIGBUS:
838 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
839 sigbus_reraise();
840 }
841 break;
842 default:
843 break;
844 }
845
846 r = sigpending(&chkset);
847 if (r == -1) {
848 perror("sigpending");
849 exit(1);
850 }
851 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
852 }
853
854 #else /* !CONFIG_LINUX */
855
856 static void qemu_init_sigbus(void)
857 {
858 }
859
860 static void qemu_kvm_eat_signals(CPUState *cpu)
861 {
862 }
863 #endif /* !CONFIG_LINUX */
864
865 #ifndef _WIN32
866 static void dummy_signal(int sig)
867 {
868 }
869
870 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
871 {
872 int r;
873 sigset_t set;
874 struct sigaction sigact;
875
876 memset(&sigact, 0, sizeof(sigact));
877 sigact.sa_handler = dummy_signal;
878 sigaction(SIG_IPI, &sigact, NULL);
879
880 pthread_sigmask(SIG_BLOCK, NULL, &set);
881 sigdelset(&set, SIG_IPI);
882 sigdelset(&set, SIGBUS);
883 r = kvm_set_signal_mask(cpu, &set);
884 if (r) {
885 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
886 exit(1);
887 }
888 }
889
890 #else /* _WIN32 */
891 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
892 {
893 abort();
894 }
895 #endif /* _WIN32 */
896
897 static QemuMutex qemu_global_mutex;
898 static QemuCond qemu_io_proceeded_cond;
899 static unsigned iothread_requesting_mutex;
900
901 static QemuThread io_thread;
902
903 /* cpu creation */
904 static QemuCond qemu_cpu_cond;
905 /* system init */
906 static QemuCond qemu_pause_cond;
907
908 void qemu_init_cpu_loop(void)
909 {
910 qemu_init_sigbus();
911 qemu_cond_init(&qemu_cpu_cond);
912 qemu_cond_init(&qemu_pause_cond);
913 qemu_cond_init(&qemu_io_proceeded_cond);
914 qemu_mutex_init(&qemu_global_mutex);
915
916 qemu_thread_get_self(&io_thread);
917 }
918
919 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
920 {
921 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
922 }
923
924 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
925 {
926 if (kvm_destroy_vcpu(cpu) < 0) {
927 error_report("kvm_destroy_vcpu failed");
928 exit(EXIT_FAILURE);
929 }
930 }
931
932 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
933 {
934 }
935
936 static void qemu_wait_io_event_common(CPUState *cpu)
937 {
938 if (cpu->stop) {
939 cpu->stop = false;
940 cpu->stopped = true;
941 qemu_cond_broadcast(&qemu_pause_cond);
942 }
943 process_queued_cpu_work(cpu);
944 cpu->thread_kicked = false;
945 }
946
947 static void qemu_tcg_wait_io_event(CPUState *cpu)
948 {
949 while (all_cpu_threads_idle()) {
950 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
951 }
952
953 while (iothread_requesting_mutex) {
954 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
955 }
956
957 CPU_FOREACH(cpu) {
958 qemu_wait_io_event_common(cpu);
959 }
960 }
961
962 static void qemu_kvm_wait_io_event(CPUState *cpu)
963 {
964 while (cpu_thread_is_idle(cpu)) {
965 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
966 }
967
968 qemu_kvm_eat_signals(cpu);
969 qemu_wait_io_event_common(cpu);
970 }
971
972 static void *qemu_kvm_cpu_thread_fn(void *arg)
973 {
974 CPUState *cpu = arg;
975 int r;
976
977 rcu_register_thread();
978
979 qemu_mutex_lock_iothread();
980 qemu_thread_get_self(cpu->thread);
981 cpu->thread_id = qemu_get_thread_id();
982 cpu->can_do_io = 1;
983 current_cpu = cpu;
984
985 r = kvm_init_vcpu(cpu);
986 if (r < 0) {
987 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
988 exit(1);
989 }
990
991 qemu_kvm_init_cpu_signals(cpu);
992
993 /* signal CPU creation */
994 cpu->created = true;
995 qemu_cond_signal(&qemu_cpu_cond);
996
997 do {
998 if (cpu_can_run(cpu)) {
999 r = kvm_cpu_exec(cpu);
1000 if (r == EXCP_DEBUG) {
1001 cpu_handle_guest_debug(cpu);
1002 }
1003 }
1004 qemu_kvm_wait_io_event(cpu);
1005 } while (!cpu->unplug || cpu_can_run(cpu));
1006
1007 qemu_kvm_destroy_vcpu(cpu);
1008 cpu->created = false;
1009 qemu_cond_signal(&qemu_cpu_cond);
1010 qemu_mutex_unlock_iothread();
1011 return NULL;
1012 }
1013
1014 static void *qemu_dummy_cpu_thread_fn(void *arg)
1015 {
1016 #ifdef _WIN32
1017 fprintf(stderr, "qtest is not supported under Windows\n");
1018 exit(1);
1019 #else
1020 CPUState *cpu = arg;
1021 sigset_t waitset;
1022 int r;
1023
1024 rcu_register_thread();
1025
1026 qemu_mutex_lock_iothread();
1027 qemu_thread_get_self(cpu->thread);
1028 cpu->thread_id = qemu_get_thread_id();
1029 cpu->can_do_io = 1;
1030
1031 sigemptyset(&waitset);
1032 sigaddset(&waitset, SIG_IPI);
1033
1034 /* signal CPU creation */
1035 cpu->created = true;
1036 qemu_cond_signal(&qemu_cpu_cond);
1037
1038 current_cpu = cpu;
1039 while (1) {
1040 current_cpu = NULL;
1041 qemu_mutex_unlock_iothread();
1042 do {
1043 int sig;
1044 r = sigwait(&waitset, &sig);
1045 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1046 if (r == -1) {
1047 perror("sigwait");
1048 exit(1);
1049 }
1050 qemu_mutex_lock_iothread();
1051 current_cpu = cpu;
1052 qemu_wait_io_event_common(cpu);
1053 }
1054
1055 return NULL;
1056 #endif
1057 }
1058
1059 static int64_t tcg_get_icount_limit(void)
1060 {
1061 int64_t deadline;
1062
1063 if (replay_mode != REPLAY_MODE_PLAY) {
1064 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1065
1066 /* Maintain prior (possibly buggy) behaviour where if no deadline
1067 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1068 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1069 * nanoseconds.
1070 */
1071 if ((deadline < 0) || (deadline > INT32_MAX)) {
1072 deadline = INT32_MAX;
1073 }
1074
1075 return qemu_icount_round(deadline);
1076 } else {
1077 return replay_get_instructions();
1078 }
1079 }
1080
1081 static void handle_icount_deadline(void)
1082 {
1083 if (use_icount) {
1084 int64_t deadline =
1085 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1086
1087 if (deadline == 0) {
1088 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1089 }
1090 }
1091 }
1092
1093 static int tcg_cpu_exec(CPUState *cpu)
1094 {
1095 int ret;
1096 #ifdef CONFIG_PROFILER
1097 int64_t ti;
1098 #endif
1099
1100 #ifdef CONFIG_PROFILER
1101 ti = profile_getclock();
1102 #endif
1103 if (use_icount) {
1104 int64_t count;
1105 int decr;
1106 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1107 + cpu->icount_extra);
1108 cpu->icount_decr.u16.low = 0;
1109 cpu->icount_extra = 0;
1110 count = tcg_get_icount_limit();
1111 timers_state.qemu_icount += count;
1112 decr = (count > 0xffff) ? 0xffff : count;
1113 count -= decr;
1114 cpu->icount_decr.u16.low = decr;
1115 cpu->icount_extra = count;
1116 }
1117 cpu_exec_start(cpu);
1118 ret = cpu_exec(cpu);
1119 cpu_exec_end(cpu);
1120 #ifdef CONFIG_PROFILER
1121 tcg_time += profile_getclock() - ti;
1122 #endif
1123 if (use_icount) {
1124 /* Fold pending instructions back into the
1125 instruction counter, and clear the interrupt flag. */
1126 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1127 + cpu->icount_extra);
1128 cpu->icount_decr.u32 = 0;
1129 cpu->icount_extra = 0;
1130 replay_account_executed_instructions();
1131 }
1132 return ret;
1133 }
1134
1135 /* Destroy any remaining vCPUs which have been unplugged and have
1136 * finished running
1137 */
1138 static void deal_with_unplugged_cpus(void)
1139 {
1140 CPUState *cpu;
1141
1142 CPU_FOREACH(cpu) {
1143 if (cpu->unplug && !cpu_can_run(cpu)) {
1144 qemu_tcg_destroy_vcpu(cpu);
1145 cpu->created = false;
1146 qemu_cond_signal(&qemu_cpu_cond);
1147 break;
1148 }
1149 }
1150 }
1151
1152 static void *qemu_tcg_cpu_thread_fn(void *arg)
1153 {
1154 CPUState *cpu = arg;
1155
1156 rcu_register_thread();
1157
1158 qemu_mutex_lock_iothread();
1159 qemu_thread_get_self(cpu->thread);
1160
1161 CPU_FOREACH(cpu) {
1162 cpu->thread_id = qemu_get_thread_id();
1163 cpu->created = true;
1164 cpu->can_do_io = 1;
1165 }
1166 qemu_cond_signal(&qemu_cpu_cond);
1167
1168 /* wait for initial kick-off after machine start */
1169 while (first_cpu->stopped) {
1170 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1171
1172 /* process any pending work */
1173 CPU_FOREACH(cpu) {
1174 qemu_wait_io_event_common(cpu);
1175 }
1176 }
1177
1178 /* process any pending work */
1179 atomic_mb_set(&exit_request, 1);
1180
1181 cpu = first_cpu;
1182
1183 while (1) {
1184 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1185 qemu_account_warp_timer();
1186
1187 if (!cpu) {
1188 cpu = first_cpu;
1189 }
1190
1191 for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1192
1193 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1194 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1195
1196 if (cpu_can_run(cpu)) {
1197 int r;
1198 r = tcg_cpu_exec(cpu);
1199 if (r == EXCP_DEBUG) {
1200 cpu_handle_guest_debug(cpu);
1201 break;
1202 }
1203 } else if (cpu->stop || cpu->stopped) {
1204 if (cpu->unplug) {
1205 cpu = CPU_NEXT(cpu);
1206 }
1207 break;
1208 }
1209
1210 } /* for cpu.. */
1211
1212 /* Pairs with smp_wmb in qemu_cpu_kick. */
1213 atomic_mb_set(&exit_request, 0);
1214
1215 handle_icount_deadline();
1216
1217 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1218 deal_with_unplugged_cpus();
1219 }
1220
1221 return NULL;
1222 }
1223
1224 static void qemu_cpu_kick_thread(CPUState *cpu)
1225 {
1226 #ifndef _WIN32
1227 int err;
1228
1229 if (cpu->thread_kicked) {
1230 return;
1231 }
1232 cpu->thread_kicked = true;
1233 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1234 if (err) {
1235 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1236 exit(1);
1237 }
1238 #else /* _WIN32 */
1239 abort();
1240 #endif
1241 }
1242
1243 static void qemu_cpu_kick_no_halt(void)
1244 {
1245 CPUState *cpu;
1246 /* Ensure whatever caused the exit has reached the CPU threads before
1247 * writing exit_request.
1248 */
1249 atomic_mb_set(&exit_request, 1);
1250 cpu = atomic_mb_read(&tcg_current_cpu);
1251 if (cpu) {
1252 cpu_exit(cpu);
1253 }
1254 }
1255
1256 void qemu_cpu_kick(CPUState *cpu)
1257 {
1258 qemu_cond_broadcast(cpu->halt_cond);
1259 if (tcg_enabled()) {
1260 qemu_cpu_kick_no_halt();
1261 } else {
1262 qemu_cpu_kick_thread(cpu);
1263 }
1264 }
1265
1266 void qemu_cpu_kick_self(void)
1267 {
1268 assert(current_cpu);
1269 qemu_cpu_kick_thread(current_cpu);
1270 }
1271
1272 bool qemu_cpu_is_self(CPUState *cpu)
1273 {
1274 return qemu_thread_is_self(cpu->thread);
1275 }
1276
1277 bool qemu_in_vcpu_thread(void)
1278 {
1279 return current_cpu && qemu_cpu_is_self(current_cpu);
1280 }
1281
1282 static __thread bool iothread_locked = false;
1283
1284 bool qemu_mutex_iothread_locked(void)
1285 {
1286 return iothread_locked;
1287 }
1288
1289 void qemu_mutex_lock_iothread(void)
1290 {
1291 atomic_inc(&iothread_requesting_mutex);
1292 /* In the simple case there is no need to bump the VCPU thread out of
1293 * TCG code execution.
1294 */
1295 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1296 !first_cpu || !first_cpu->created) {
1297 qemu_mutex_lock(&qemu_global_mutex);
1298 atomic_dec(&iothread_requesting_mutex);
1299 } else {
1300 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1301 qemu_cpu_kick_no_halt();
1302 qemu_mutex_lock(&qemu_global_mutex);
1303 }
1304 atomic_dec(&iothread_requesting_mutex);
1305 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1306 }
1307 iothread_locked = true;
1308 }
1309
1310 void qemu_mutex_unlock_iothread(void)
1311 {
1312 iothread_locked = false;
1313 qemu_mutex_unlock(&qemu_global_mutex);
1314 }
1315
1316 static bool all_vcpus_paused(void)
1317 {
1318 CPUState *cpu;
1319
1320 CPU_FOREACH(cpu) {
1321 if (!cpu->stopped) {
1322 return false;
1323 }
1324 }
1325
1326 return true;
1327 }
1328
1329 void pause_all_vcpus(void)
1330 {
1331 CPUState *cpu;
1332
1333 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1334 CPU_FOREACH(cpu) {
1335 cpu->stop = true;
1336 qemu_cpu_kick(cpu);
1337 }
1338
1339 if (qemu_in_vcpu_thread()) {
1340 cpu_stop_current();
1341 if (!kvm_enabled()) {
1342 CPU_FOREACH(cpu) {
1343 cpu->stop = false;
1344 cpu->stopped = true;
1345 }
1346 return;
1347 }
1348 }
1349
1350 while (!all_vcpus_paused()) {
1351 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1352 CPU_FOREACH(cpu) {
1353 qemu_cpu_kick(cpu);
1354 }
1355 }
1356 }
1357
1358 void cpu_resume(CPUState *cpu)
1359 {
1360 cpu->stop = false;
1361 cpu->stopped = false;
1362 qemu_cpu_kick(cpu);
1363 }
1364
1365 void resume_all_vcpus(void)
1366 {
1367 CPUState *cpu;
1368
1369 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1370 CPU_FOREACH(cpu) {
1371 cpu_resume(cpu);
1372 }
1373 }
1374
1375 void cpu_remove(CPUState *cpu)
1376 {
1377 cpu->stop = true;
1378 cpu->unplug = true;
1379 qemu_cpu_kick(cpu);
1380 }
1381
1382 void cpu_remove_sync(CPUState *cpu)
1383 {
1384 cpu_remove(cpu);
1385 while (cpu->created) {
1386 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1387 }
1388 }
1389
1390 /* For temporary buffers for forming a name */
1391 #define VCPU_THREAD_NAME_SIZE 16
1392
1393 static void qemu_tcg_init_vcpu(CPUState *cpu)
1394 {
1395 char thread_name[VCPU_THREAD_NAME_SIZE];
1396 static QemuCond *tcg_halt_cond;
1397 static QemuThread *tcg_cpu_thread;
1398
1399 /* share a single thread for all cpus with TCG */
1400 if (!tcg_cpu_thread) {
1401 cpu->thread = g_malloc0(sizeof(QemuThread));
1402 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1403 qemu_cond_init(cpu->halt_cond);
1404 tcg_halt_cond = cpu->halt_cond;
1405 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1406 cpu->cpu_index);
1407 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1408 cpu, QEMU_THREAD_JOINABLE);
1409 #ifdef _WIN32
1410 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1411 #endif
1412 while (!cpu->created) {
1413 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1414 }
1415 tcg_cpu_thread = cpu->thread;
1416 } else {
1417 cpu->thread = tcg_cpu_thread;
1418 cpu->halt_cond = tcg_halt_cond;
1419 }
1420 }
1421
1422 static void qemu_kvm_start_vcpu(CPUState *cpu)
1423 {
1424 char thread_name[VCPU_THREAD_NAME_SIZE];
1425
1426 cpu->thread = g_malloc0(sizeof(QemuThread));
1427 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1428 qemu_cond_init(cpu->halt_cond);
1429 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1430 cpu->cpu_index);
1431 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1432 cpu, QEMU_THREAD_JOINABLE);
1433 while (!cpu->created) {
1434 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1435 }
1436 }
1437
1438 static void qemu_dummy_start_vcpu(CPUState *cpu)
1439 {
1440 char thread_name[VCPU_THREAD_NAME_SIZE];
1441
1442 cpu->thread = g_malloc0(sizeof(QemuThread));
1443 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1444 qemu_cond_init(cpu->halt_cond);
1445 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1446 cpu->cpu_index);
1447 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1448 QEMU_THREAD_JOINABLE);
1449 while (!cpu->created) {
1450 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1451 }
1452 }
1453
1454 void qemu_init_vcpu(CPUState *cpu)
1455 {
1456 cpu->nr_cores = smp_cores;
1457 cpu->nr_threads = smp_threads;
1458 cpu->stopped = true;
1459
1460 if (!cpu->as) {
1461 /* If the target cpu hasn't set up any address spaces itself,
1462 * give it the default one.
1463 */
1464 AddressSpace *as = address_space_init_shareable(cpu->memory,
1465 "cpu-memory");
1466 cpu->num_ases = 1;
1467 cpu_address_space_init(cpu, as, 0);
1468 }
1469
1470 if (kvm_enabled()) {
1471 qemu_kvm_start_vcpu(cpu);
1472 } else if (tcg_enabled()) {
1473 qemu_tcg_init_vcpu(cpu);
1474 } else {
1475 qemu_dummy_start_vcpu(cpu);
1476 }
1477 }
1478
1479 void cpu_stop_current(void)
1480 {
1481 if (current_cpu) {
1482 current_cpu->stop = false;
1483 current_cpu->stopped = true;
1484 cpu_exit(current_cpu);
1485 qemu_cond_broadcast(&qemu_pause_cond);
1486 }
1487 }
1488
1489 int vm_stop(RunState state)
1490 {
1491 if (qemu_in_vcpu_thread()) {
1492 qemu_system_vmstop_request_prepare();
1493 qemu_system_vmstop_request(state);
1494 /*
1495 * FIXME: should not return to device code in case
1496 * vm_stop() has been requested.
1497 */
1498 cpu_stop_current();
1499 return 0;
1500 }
1501
1502 return do_vm_stop(state);
1503 }
1504
1505 /* does a state transition even if the VM is already stopped,
1506 current state is forgotten forever */
1507 int vm_stop_force_state(RunState state)
1508 {
1509 if (runstate_is_running()) {
1510 return vm_stop(state);
1511 } else {
1512 runstate_set(state);
1513
1514 bdrv_drain_all();
1515 /* Make sure to return an error if the flush in a previous vm_stop()
1516 * failed. */
1517 return bdrv_flush_all();
1518 }
1519 }
1520
1521 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1522 {
1523 /* XXX: implement xxx_cpu_list for targets that still miss it */
1524 #if defined(cpu_list)
1525 cpu_list(f, cpu_fprintf);
1526 #endif
1527 }
1528
1529 CpuInfoList *qmp_query_cpus(Error **errp)
1530 {
1531 CpuInfoList *head = NULL, *cur_item = NULL;
1532 CPUState *cpu;
1533
1534 CPU_FOREACH(cpu) {
1535 CpuInfoList *info;
1536 #if defined(TARGET_I386)
1537 X86CPU *x86_cpu = X86_CPU(cpu);
1538 CPUX86State *env = &x86_cpu->env;
1539 #elif defined(TARGET_PPC)
1540 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1541 CPUPPCState *env = &ppc_cpu->env;
1542 #elif defined(TARGET_SPARC)
1543 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1544 CPUSPARCState *env = &sparc_cpu->env;
1545 #elif defined(TARGET_MIPS)
1546 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1547 CPUMIPSState *env = &mips_cpu->env;
1548 #elif defined(TARGET_TRICORE)
1549 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1550 CPUTriCoreState *env = &tricore_cpu->env;
1551 #endif
1552
1553 cpu_synchronize_state(cpu);
1554
1555 info = g_malloc0(sizeof(*info));
1556 info->value = g_malloc0(sizeof(*info->value));
1557 info->value->CPU = cpu->cpu_index;
1558 info->value->current = (cpu == first_cpu);
1559 info->value->halted = cpu->halted;
1560 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1561 info->value->thread_id = cpu->thread_id;
1562 #if defined(TARGET_I386)
1563 info->value->arch = CPU_INFO_ARCH_X86;
1564 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1565 #elif defined(TARGET_PPC)
1566 info->value->arch = CPU_INFO_ARCH_PPC;
1567 info->value->u.ppc.nip = env->nip;
1568 #elif defined(TARGET_SPARC)
1569 info->value->arch = CPU_INFO_ARCH_SPARC;
1570 info->value->u.q_sparc.pc = env->pc;
1571 info->value->u.q_sparc.npc = env->npc;
1572 #elif defined(TARGET_MIPS)
1573 info->value->arch = CPU_INFO_ARCH_MIPS;
1574 info->value->u.q_mips.PC = env->active_tc.PC;
1575 #elif defined(TARGET_TRICORE)
1576 info->value->arch = CPU_INFO_ARCH_TRICORE;
1577 info->value->u.tricore.PC = env->PC;
1578 #else
1579 info->value->arch = CPU_INFO_ARCH_OTHER;
1580 #endif
1581
1582 /* XXX: waiting for the qapi to support GSList */
1583 if (!cur_item) {
1584 head = cur_item = info;
1585 } else {
1586 cur_item->next = info;
1587 cur_item = info;
1588 }
1589 }
1590
1591 return head;
1592 }
1593
1594 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1595 bool has_cpu, int64_t cpu_index, Error **errp)
1596 {
1597 FILE *f;
1598 uint32_t l;
1599 CPUState *cpu;
1600 uint8_t buf[1024];
1601 int64_t orig_addr = addr, orig_size = size;
1602
1603 if (!has_cpu) {
1604 cpu_index = 0;
1605 }
1606
1607 cpu = qemu_get_cpu(cpu_index);
1608 if (cpu == NULL) {
1609 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1610 "a CPU number");
1611 return;
1612 }
1613
1614 f = fopen(filename, "wb");
1615 if (!f) {
1616 error_setg_file_open(errp, errno, filename);
1617 return;
1618 }
1619
1620 while (size != 0) {
1621 l = sizeof(buf);
1622 if (l > size)
1623 l = size;
1624 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1625 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1626 " specified", orig_addr, orig_size);
1627 goto exit;
1628 }
1629 if (fwrite(buf, 1, l, f) != l) {
1630 error_setg(errp, QERR_IO_ERROR);
1631 goto exit;
1632 }
1633 addr += l;
1634 size -= l;
1635 }
1636
1637 exit:
1638 fclose(f);
1639 }
1640
1641 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1642 Error **errp)
1643 {
1644 FILE *f;
1645 uint32_t l;
1646 uint8_t buf[1024];
1647
1648 f = fopen(filename, "wb");
1649 if (!f) {
1650 error_setg_file_open(errp, errno, filename);
1651 return;
1652 }
1653
1654 while (size != 0) {
1655 l = sizeof(buf);
1656 if (l > size)
1657 l = size;
1658 cpu_physical_memory_read(addr, buf, l);
1659 if (fwrite(buf, 1, l, f) != l) {
1660 error_setg(errp, QERR_IO_ERROR);
1661 goto exit;
1662 }
1663 addr += l;
1664 size -= l;
1665 }
1666
1667 exit:
1668 fclose(f);
1669 }
1670
1671 void qmp_inject_nmi(Error **errp)
1672 {
1673 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1674 }
1675
1676 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1677 {
1678 if (!use_icount) {
1679 return;
1680 }
1681
1682 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1683 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1684 if (icount_align_option) {
1685 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1686 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1687 } else {
1688 cpu_fprintf(f, "Max guest delay NA\n");
1689 cpu_fprintf(f, "Max guest advance NA\n");
1690 }
1691 }