]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
ed7d30a6c13b2304ea8201fdefb9d2a059b2c4f7
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "qmp-commands.h"
38 #include "exec/exec-all.h"
39
40 #include "qemu/thread.h"
41 #include "sysemu/cpus.h"
42 #include "sysemu/qtest.h"
43 #include "qemu/main-loop.h"
44 #include "qemu/bitmap.h"
45 #include "qemu/seqlock.h"
46 #include "qapi-event.h"
47 #include "hw/nmi.h"
48 #include "sysemu/replay.h"
49
50 #ifndef _WIN32
51 #include "qemu/compatfd.h"
52 #endif
53
54 #ifdef CONFIG_LINUX
55
56 #include <sys/prctl.h>
57
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
61
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
65
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
69
70 #endif /* CONFIG_LINUX */
71
72 static CPUState *next_cpu;
73 int64_t max_delay;
74 int64_t max_advance;
75
76 /* vcpu throttling controls */
77 static QEMUTimer *throttle_timer;
78 static unsigned int throttle_percentage;
79
80 #define CPU_THROTTLE_PCT_MIN 1
81 #define CPU_THROTTLE_PCT_MAX 99
82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83
84 bool cpu_is_stopped(CPUState *cpu)
85 {
86 return cpu->stopped || !runstate_is_running();
87 }
88
89 static bool cpu_thread_is_idle(CPUState *cpu)
90 {
91 if (cpu->stop || cpu->queued_work_first) {
92 return false;
93 }
94 if (cpu_is_stopped(cpu)) {
95 return true;
96 }
97 if (!cpu->halted || cpu_has_work(cpu) ||
98 kvm_halt_in_kernel()) {
99 return false;
100 }
101 return true;
102 }
103
104 static bool all_cpu_threads_idle(void)
105 {
106 CPUState *cpu;
107
108 CPU_FOREACH(cpu) {
109 if (!cpu_thread_is_idle(cpu)) {
110 return false;
111 }
112 }
113 return true;
114 }
115
116 /***********************************************************/
117 /* guest cycle counter */
118
119 /* Protected by TimersState seqlock */
120
121 static bool icount_sleep = true;
122 static int64_t vm_clock_warp_start = -1;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
127
128 static QEMUTimer *icount_rt_timer;
129 static QEMUTimer *icount_vm_timer;
130 static QEMUTimer *icount_warp_timer;
131
132 typedef struct TimersState {
133 /* Protected by BQL. */
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
149 } TimersState;
150
151 static TimersState timers_state;
152
153 int64_t cpu_get_icount_raw(void)
154 {
155 int64_t icount;
156 CPUState *cpu = current_cpu;
157
158 icount = timers_state.qemu_icount;
159 if (cpu) {
160 if (!cpu->can_do_io) {
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
163 }
164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
165 }
166 return icount;
167 }
168
169 /* Return the virtual CPU time, based on the instruction counter. */
170 static int64_t cpu_get_icount_locked(void)
171 {
172 int64_t icount = cpu_get_icount_raw();
173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
174 }
175
176 int64_t cpu_get_icount(void)
177 {
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187 }
188
189 int64_t cpu_icount_to_ns(int64_t icount)
190 {
191 return icount << icount_time_shift;
192 }
193
194 /* return the time elapsed in VM between vm_start and vm_stop. Unless
195 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
196 * counter.
197 *
198 * Caller must hold the BQL
199 */
200 int64_t cpu_get_ticks(void)
201 {
202 int64_t ticks;
203
204 if (use_icount) {
205 return cpu_get_icount();
206 }
207
208 ticks = timers_state.cpu_ticks_offset;
209 if (timers_state.cpu_ticks_enabled) {
210 ticks += cpu_get_host_ticks();
211 }
212
213 if (timers_state.cpu_ticks_prev > ticks) {
214 /* Note: non increasing ticks may happen if the host uses
215 software suspend */
216 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
217 ticks = timers_state.cpu_ticks_prev;
218 }
219
220 timers_state.cpu_ticks_prev = ticks;
221 return ticks;
222 }
223
224 static int64_t cpu_get_clock_locked(void)
225 {
226 int64_t time;
227
228 time = timers_state.cpu_clock_offset;
229 if (timers_state.cpu_ticks_enabled) {
230 time += get_clock();
231 }
232
233 return time;
234 }
235
236 /* Return the monotonic time elapsed in VM, i.e.,
237 * the time between vm_start and vm_stop
238 */
239 int64_t cpu_get_clock(void)
240 {
241 int64_t ti;
242 unsigned start;
243
244 do {
245 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246 ti = cpu_get_clock_locked();
247 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
248
249 return ti;
250 }
251
252 /* enable cpu_get_ticks()
253 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
254 */
255 void cpu_enable_ticks(void)
256 {
257 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
258 seqlock_write_begin(&timers_state.vm_clock_seqlock);
259 if (!timers_state.cpu_ticks_enabled) {
260 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
261 timers_state.cpu_clock_offset -= get_clock();
262 timers_state.cpu_ticks_enabled = 1;
263 }
264 seqlock_write_end(&timers_state.vm_clock_seqlock);
265 }
266
267 /* disable cpu_get_ticks() : the clock is stopped. You must not call
268 * cpu_get_ticks() after that.
269 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
270 */
271 void cpu_disable_ticks(void)
272 {
273 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
274 seqlock_write_begin(&timers_state.vm_clock_seqlock);
275 if (timers_state.cpu_ticks_enabled) {
276 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
277 timers_state.cpu_clock_offset = cpu_get_clock_locked();
278 timers_state.cpu_ticks_enabled = 0;
279 }
280 seqlock_write_end(&timers_state.vm_clock_seqlock);
281 }
282
283 /* Correlation between real and virtual time is always going to be
284 fairly approximate, so ignore small variation.
285 When the guest is idle real and virtual time will be aligned in
286 the IO wait loop. */
287 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
288
289 static void icount_adjust(void)
290 {
291 int64_t cur_time;
292 int64_t cur_icount;
293 int64_t delta;
294
295 /* Protected by TimersState mutex. */
296 static int64_t last_delta;
297
298 /* If the VM is not running, then do nothing. */
299 if (!runstate_is_running()) {
300 return;
301 }
302
303 seqlock_write_begin(&timers_state.vm_clock_seqlock);
304 cur_time = cpu_get_clock_locked();
305 cur_icount = cpu_get_icount_locked();
306
307 delta = cur_icount - cur_time;
308 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
309 if (delta > 0
310 && last_delta + ICOUNT_WOBBLE < delta * 2
311 && icount_time_shift > 0) {
312 /* The guest is getting too far ahead. Slow time down. */
313 icount_time_shift--;
314 }
315 if (delta < 0
316 && last_delta - ICOUNT_WOBBLE > delta * 2
317 && icount_time_shift < MAX_ICOUNT_SHIFT) {
318 /* The guest is getting too far behind. Speed time up. */
319 icount_time_shift++;
320 }
321 last_delta = delta;
322 timers_state.qemu_icount_bias = cur_icount
323 - (timers_state.qemu_icount << icount_time_shift);
324 seqlock_write_end(&timers_state.vm_clock_seqlock);
325 }
326
327 static void icount_adjust_rt(void *opaque)
328 {
329 timer_mod(icount_rt_timer,
330 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
331 icount_adjust();
332 }
333
334 static void icount_adjust_vm(void *opaque)
335 {
336 timer_mod(icount_vm_timer,
337 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
338 NANOSECONDS_PER_SECOND / 10);
339 icount_adjust();
340 }
341
342 static int64_t qemu_icount_round(int64_t count)
343 {
344 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
345 }
346
347 static void icount_warp_rt(void)
348 {
349 unsigned seq;
350 int64_t warp_start;
351
352 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
353 * changes from -1 to another value, so the race here is okay.
354 */
355 do {
356 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
357 warp_start = vm_clock_warp_start;
358 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
359
360 if (warp_start == -1) {
361 return;
362 }
363
364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
365 if (runstate_is_running()) {
366 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
367 cpu_get_clock_locked());
368 int64_t warp_delta;
369
370 warp_delta = clock - vm_clock_warp_start;
371 if (use_icount == 2) {
372 /*
373 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
374 * far ahead of real time.
375 */
376 int64_t cur_icount = cpu_get_icount_locked();
377 int64_t delta = clock - cur_icount;
378 warp_delta = MIN(warp_delta, delta);
379 }
380 timers_state.qemu_icount_bias += warp_delta;
381 }
382 vm_clock_warp_start = -1;
383 seqlock_write_end(&timers_state.vm_clock_seqlock);
384
385 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387 }
388 }
389
390 static void icount_timer_cb(void *opaque)
391 {
392 /* No need for a checkpoint because the timer already synchronizes
393 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
394 */
395 icount_warp_rt();
396 }
397
398 void qtest_clock_warp(int64_t dest)
399 {
400 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
401 AioContext *aio_context;
402 assert(qtest_enabled());
403 aio_context = qemu_get_aio_context();
404 while (clock < dest) {
405 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
406 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
407
408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
409 timers_state.qemu_icount_bias += warp;
410 seqlock_write_end(&timers_state.vm_clock_seqlock);
411
412 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
413 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
414 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
415 }
416 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
417 }
418
419 void qemu_start_warp_timer(void)
420 {
421 int64_t clock;
422 int64_t deadline;
423
424 if (!use_icount) {
425 return;
426 }
427
428 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
429 * do not fire, so computing the deadline does not make sense.
430 */
431 if (!runstate_is_running()) {
432 return;
433 }
434
435 /* warp clock deterministically in record/replay mode */
436 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
437 return;
438 }
439
440 if (!all_cpu_threads_idle()) {
441 return;
442 }
443
444 if (qtest_enabled()) {
445 /* When testing, qtest commands advance icount. */
446 return;
447 }
448
449 /* We want to use the earliest deadline from ALL vm_clocks */
450 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
451 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
452 if (deadline < 0) {
453 static bool notified;
454 if (!icount_sleep && !notified) {
455 error_report("WARNING: icount sleep disabled and no active timers");
456 notified = true;
457 }
458 return;
459 }
460
461 if (deadline > 0) {
462 /*
463 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
464 * sleep. Otherwise, the CPU might be waiting for a future timer
465 * interrupt to wake it up, but the interrupt never comes because
466 * the vCPU isn't running any insns and thus doesn't advance the
467 * QEMU_CLOCK_VIRTUAL.
468 */
469 if (!icount_sleep) {
470 /*
471 * We never let VCPUs sleep in no sleep icount mode.
472 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
473 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
474 * It is useful when we want a deterministic execution time,
475 * isolated from host latencies.
476 */
477 seqlock_write_begin(&timers_state.vm_clock_seqlock);
478 timers_state.qemu_icount_bias += deadline;
479 seqlock_write_end(&timers_state.vm_clock_seqlock);
480 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
481 } else {
482 /*
483 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
484 * "real" time, (related to the time left until the next event) has
485 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
486 * This avoids that the warps are visible externally; for example,
487 * you will not be sending network packets continuously instead of
488 * every 100ms.
489 */
490 seqlock_write_begin(&timers_state.vm_clock_seqlock);
491 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
492 vm_clock_warp_start = clock;
493 }
494 seqlock_write_end(&timers_state.vm_clock_seqlock);
495 timer_mod_anticipate(icount_warp_timer, clock + deadline);
496 }
497 } else if (deadline == 0) {
498 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
499 }
500 }
501
502 static void qemu_account_warp_timer(void)
503 {
504 if (!use_icount || !icount_sleep) {
505 return;
506 }
507
508 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
509 * do not fire, so computing the deadline does not make sense.
510 */
511 if (!runstate_is_running()) {
512 return;
513 }
514
515 /* warp clock deterministically in record/replay mode */
516 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
517 return;
518 }
519
520 timer_del(icount_warp_timer);
521 icount_warp_rt();
522 }
523
524 static bool icount_state_needed(void *opaque)
525 {
526 return use_icount;
527 }
528
529 /*
530 * This is a subsection for icount migration.
531 */
532 static const VMStateDescription icount_vmstate_timers = {
533 .name = "timer/icount",
534 .version_id = 1,
535 .minimum_version_id = 1,
536 .needed = icount_state_needed,
537 .fields = (VMStateField[]) {
538 VMSTATE_INT64(qemu_icount_bias, TimersState),
539 VMSTATE_INT64(qemu_icount, TimersState),
540 VMSTATE_END_OF_LIST()
541 }
542 };
543
544 static const VMStateDescription vmstate_timers = {
545 .name = "timer",
546 .version_id = 2,
547 .minimum_version_id = 1,
548 .fields = (VMStateField[]) {
549 VMSTATE_INT64(cpu_ticks_offset, TimersState),
550 VMSTATE_INT64(dummy, TimersState),
551 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
552 VMSTATE_END_OF_LIST()
553 },
554 .subsections = (const VMStateDescription*[]) {
555 &icount_vmstate_timers,
556 NULL
557 }
558 };
559
560 static void cpu_throttle_thread(CPUState *cpu, void *opaque)
561 {
562 double pct;
563 double throttle_ratio;
564 long sleeptime_ns;
565
566 if (!cpu_throttle_get_percentage()) {
567 return;
568 }
569
570 pct = (double)cpu_throttle_get_percentage()/100;
571 throttle_ratio = pct / (1 - pct);
572 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
573
574 qemu_mutex_unlock_iothread();
575 atomic_set(&cpu->throttle_thread_scheduled, 0);
576 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
577 qemu_mutex_lock_iothread();
578 }
579
580 static void cpu_throttle_timer_tick(void *opaque)
581 {
582 CPUState *cpu;
583 double pct;
584
585 /* Stop the timer if needed */
586 if (!cpu_throttle_get_percentage()) {
587 return;
588 }
589 CPU_FOREACH(cpu) {
590 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
591 async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
592 }
593 }
594
595 pct = (double)cpu_throttle_get_percentage()/100;
596 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598 }
599
600 void cpu_throttle_set(int new_throttle_pct)
601 {
602 /* Ensure throttle percentage is within valid range */
603 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606 atomic_set(&throttle_percentage, new_throttle_pct);
607
608 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609 CPU_THROTTLE_TIMESLICE_NS);
610 }
611
612 void cpu_throttle_stop(void)
613 {
614 atomic_set(&throttle_percentage, 0);
615 }
616
617 bool cpu_throttle_active(void)
618 {
619 return (cpu_throttle_get_percentage() != 0);
620 }
621
622 int cpu_throttle_get_percentage(void)
623 {
624 return atomic_read(&throttle_percentage);
625 }
626
627 void cpu_ticks_init(void)
628 {
629 seqlock_init(&timers_state.vm_clock_seqlock);
630 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
631 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632 cpu_throttle_timer_tick, NULL);
633 }
634
635 void configure_icount(QemuOpts *opts, Error **errp)
636 {
637 const char *option;
638 char *rem_str = NULL;
639
640 option = qemu_opt_get(opts, "shift");
641 if (!option) {
642 if (qemu_opt_get(opts, "align") != NULL) {
643 error_setg(errp, "Please specify shift option when using align");
644 }
645 return;
646 }
647
648 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
649 if (icount_sleep) {
650 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
651 icount_timer_cb, NULL);
652 }
653
654 icount_align_option = qemu_opt_get_bool(opts, "align", false);
655
656 if (icount_align_option && !icount_sleep) {
657 error_setg(errp, "align=on and sleep=off are incompatible");
658 }
659 if (strcmp(option, "auto") != 0) {
660 errno = 0;
661 icount_time_shift = strtol(option, &rem_str, 0);
662 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663 error_setg(errp, "icount: Invalid shift value");
664 }
665 use_icount = 1;
666 return;
667 } else if (icount_align_option) {
668 error_setg(errp, "shift=auto and align=on are incompatible");
669 } else if (!icount_sleep) {
670 error_setg(errp, "shift=auto and sleep=off are incompatible");
671 }
672
673 use_icount = 2;
674
675 /* 125MIPS seems a reasonable initial guess at the guest speed.
676 It will be corrected fairly quickly anyway. */
677 icount_time_shift = 3;
678
679 /* Have both realtime and virtual time triggers for speed adjustment.
680 The realtime trigger catches emulated time passing too slowly,
681 the virtual time trigger catches emulated time passing too fast.
682 Realtime triggers occur even when idle, so use them less frequently
683 than VM triggers. */
684 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685 icount_adjust_rt, NULL);
686 timer_mod(icount_rt_timer,
687 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
688 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689 icount_adjust_vm, NULL);
690 timer_mod(icount_vm_timer,
691 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
692 NANOSECONDS_PER_SECOND / 10);
693 }
694
695 /***********************************************************/
696 void hw_error(const char *fmt, ...)
697 {
698 va_list ap;
699 CPUState *cpu;
700
701 va_start(ap, fmt);
702 fprintf(stderr, "qemu: hardware error: ");
703 vfprintf(stderr, fmt, ap);
704 fprintf(stderr, "\n");
705 CPU_FOREACH(cpu) {
706 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
707 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
708 }
709 va_end(ap);
710 abort();
711 }
712
713 void cpu_synchronize_all_states(void)
714 {
715 CPUState *cpu;
716
717 CPU_FOREACH(cpu) {
718 cpu_synchronize_state(cpu);
719 }
720 }
721
722 void cpu_synchronize_all_post_reset(void)
723 {
724 CPUState *cpu;
725
726 CPU_FOREACH(cpu) {
727 cpu_synchronize_post_reset(cpu);
728 }
729 }
730
731 void cpu_synchronize_all_post_init(void)
732 {
733 CPUState *cpu;
734
735 CPU_FOREACH(cpu) {
736 cpu_synchronize_post_init(cpu);
737 }
738 }
739
740 static int do_vm_stop(RunState state)
741 {
742 int ret = 0;
743
744 if (runstate_is_running()) {
745 cpu_disable_ticks();
746 pause_all_vcpus();
747 runstate_set(state);
748 vm_state_notify(0, state);
749 qapi_event_send_stop(&error_abort);
750 }
751
752 bdrv_drain_all();
753 ret = blk_flush_all();
754
755 return ret;
756 }
757
758 static bool cpu_can_run(CPUState *cpu)
759 {
760 if (cpu->stop) {
761 return false;
762 }
763 if (cpu_is_stopped(cpu)) {
764 return false;
765 }
766 return true;
767 }
768
769 static void cpu_handle_guest_debug(CPUState *cpu)
770 {
771 gdb_set_stop_cpu(cpu);
772 qemu_system_debug_request();
773 cpu->stopped = true;
774 }
775
776 #ifdef CONFIG_LINUX
777 static void sigbus_reraise(void)
778 {
779 sigset_t set;
780 struct sigaction action;
781
782 memset(&action, 0, sizeof(action));
783 action.sa_handler = SIG_DFL;
784 if (!sigaction(SIGBUS, &action, NULL)) {
785 raise(SIGBUS);
786 sigemptyset(&set);
787 sigaddset(&set, SIGBUS);
788 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
789 }
790 perror("Failed to re-raise SIGBUS!\n");
791 abort();
792 }
793
794 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
795 void *ctx)
796 {
797 if (kvm_on_sigbus(siginfo->ssi_code,
798 (void *)(intptr_t)siginfo->ssi_addr)) {
799 sigbus_reraise();
800 }
801 }
802
803 static void qemu_init_sigbus(void)
804 {
805 struct sigaction action;
806
807 memset(&action, 0, sizeof(action));
808 action.sa_flags = SA_SIGINFO;
809 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
810 sigaction(SIGBUS, &action, NULL);
811
812 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
813 }
814
815 static void qemu_kvm_eat_signals(CPUState *cpu)
816 {
817 struct timespec ts = { 0, 0 };
818 siginfo_t siginfo;
819 sigset_t waitset;
820 sigset_t chkset;
821 int r;
822
823 sigemptyset(&waitset);
824 sigaddset(&waitset, SIG_IPI);
825 sigaddset(&waitset, SIGBUS);
826
827 do {
828 r = sigtimedwait(&waitset, &siginfo, &ts);
829 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
830 perror("sigtimedwait");
831 exit(1);
832 }
833
834 switch (r) {
835 case SIGBUS:
836 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
837 sigbus_reraise();
838 }
839 break;
840 default:
841 break;
842 }
843
844 r = sigpending(&chkset);
845 if (r == -1) {
846 perror("sigpending");
847 exit(1);
848 }
849 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
850 }
851
852 #else /* !CONFIG_LINUX */
853
854 static void qemu_init_sigbus(void)
855 {
856 }
857
858 static void qemu_kvm_eat_signals(CPUState *cpu)
859 {
860 }
861 #endif /* !CONFIG_LINUX */
862
863 #ifndef _WIN32
864 static void dummy_signal(int sig)
865 {
866 }
867
868 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
869 {
870 int r;
871 sigset_t set;
872 struct sigaction sigact;
873
874 memset(&sigact, 0, sizeof(sigact));
875 sigact.sa_handler = dummy_signal;
876 sigaction(SIG_IPI, &sigact, NULL);
877
878 pthread_sigmask(SIG_BLOCK, NULL, &set);
879 sigdelset(&set, SIG_IPI);
880 sigdelset(&set, SIGBUS);
881 r = kvm_set_signal_mask(cpu, &set);
882 if (r) {
883 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
884 exit(1);
885 }
886 }
887
888 #else /* _WIN32 */
889 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
890 {
891 abort();
892 }
893 #endif /* _WIN32 */
894
895 static QemuMutex qemu_global_mutex;
896 static QemuCond qemu_io_proceeded_cond;
897 static unsigned iothread_requesting_mutex;
898
899 static QemuThread io_thread;
900
901 /* cpu creation */
902 static QemuCond qemu_cpu_cond;
903 /* system init */
904 static QemuCond qemu_pause_cond;
905 static QemuCond qemu_work_cond;
906
907 void qemu_init_cpu_loop(void)
908 {
909 qemu_init_sigbus();
910 qemu_cond_init(&qemu_cpu_cond);
911 qemu_cond_init(&qemu_pause_cond);
912 qemu_cond_init(&qemu_work_cond);
913 qemu_cond_init(&qemu_io_proceeded_cond);
914 qemu_mutex_init(&qemu_global_mutex);
915
916 qemu_thread_get_self(&io_thread);
917 }
918
919 static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
920 {
921 qemu_mutex_lock(&cpu->work_mutex);
922 if (cpu->queued_work_first == NULL) {
923 cpu->queued_work_first = wi;
924 } else {
925 cpu->queued_work_last->next = wi;
926 }
927 cpu->queued_work_last = wi;
928 wi->next = NULL;
929 wi->done = false;
930 qemu_mutex_unlock(&cpu->work_mutex);
931
932 qemu_cpu_kick(cpu);
933 }
934
935 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
936 {
937 struct qemu_work_item wi;
938
939 if (qemu_cpu_is_self(cpu)) {
940 func(cpu, data);
941 return;
942 }
943
944 wi.func = func;
945 wi.data = data;
946 wi.free = false;
947
948 queue_work_on_cpu(cpu, &wi);
949 while (!atomic_mb_read(&wi.done)) {
950 CPUState *self_cpu = current_cpu;
951
952 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
953 current_cpu = self_cpu;
954 }
955 }
956
957 void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
958 {
959 struct qemu_work_item *wi;
960
961 if (qemu_cpu_is_self(cpu)) {
962 func(cpu, data);
963 return;
964 }
965
966 wi = g_malloc0(sizeof(struct qemu_work_item));
967 wi->func = func;
968 wi->data = data;
969 wi->free = true;
970
971 queue_work_on_cpu(cpu, wi);
972 }
973
974 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
975 {
976 if (kvm_destroy_vcpu(cpu) < 0) {
977 error_report("kvm_destroy_vcpu failed");
978 exit(EXIT_FAILURE);
979 }
980 }
981
982 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
983 {
984 }
985
986 static void flush_queued_work(CPUState *cpu)
987 {
988 struct qemu_work_item *wi;
989
990 if (cpu->queued_work_first == NULL) {
991 return;
992 }
993
994 qemu_mutex_lock(&cpu->work_mutex);
995 while (cpu->queued_work_first != NULL) {
996 wi = cpu->queued_work_first;
997 cpu->queued_work_first = wi->next;
998 if (!cpu->queued_work_first) {
999 cpu->queued_work_last = NULL;
1000 }
1001 qemu_mutex_unlock(&cpu->work_mutex);
1002 wi->func(cpu, wi->data);
1003 qemu_mutex_lock(&cpu->work_mutex);
1004 if (wi->free) {
1005 g_free(wi);
1006 } else {
1007 atomic_mb_set(&wi->done, true);
1008 }
1009 }
1010 qemu_mutex_unlock(&cpu->work_mutex);
1011 qemu_cond_broadcast(&qemu_work_cond);
1012 }
1013
1014 static void qemu_wait_io_event_common(CPUState *cpu)
1015 {
1016 if (cpu->stop) {
1017 cpu->stop = false;
1018 cpu->stopped = true;
1019 qemu_cond_broadcast(&qemu_pause_cond);
1020 }
1021 flush_queued_work(cpu);
1022 cpu->thread_kicked = false;
1023 }
1024
1025 static void qemu_tcg_wait_io_event(CPUState *cpu)
1026 {
1027 while (all_cpu_threads_idle()) {
1028 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1029 }
1030
1031 while (iothread_requesting_mutex) {
1032 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1033 }
1034
1035 CPU_FOREACH(cpu) {
1036 qemu_wait_io_event_common(cpu);
1037 }
1038 }
1039
1040 static void qemu_kvm_wait_io_event(CPUState *cpu)
1041 {
1042 while (cpu_thread_is_idle(cpu)) {
1043 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1044 }
1045
1046 qemu_kvm_eat_signals(cpu);
1047 qemu_wait_io_event_common(cpu);
1048 }
1049
1050 static void *qemu_kvm_cpu_thread_fn(void *arg)
1051 {
1052 CPUState *cpu = arg;
1053 int r;
1054
1055 rcu_register_thread();
1056
1057 qemu_mutex_lock_iothread();
1058 qemu_thread_get_self(cpu->thread);
1059 cpu->thread_id = qemu_get_thread_id();
1060 cpu->can_do_io = 1;
1061 current_cpu = cpu;
1062
1063 r = kvm_init_vcpu(cpu);
1064 if (r < 0) {
1065 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1066 exit(1);
1067 }
1068
1069 qemu_kvm_init_cpu_signals(cpu);
1070
1071 /* signal CPU creation */
1072 cpu->created = true;
1073 qemu_cond_signal(&qemu_cpu_cond);
1074
1075 do {
1076 if (cpu_can_run(cpu)) {
1077 r = kvm_cpu_exec(cpu);
1078 if (r == EXCP_DEBUG) {
1079 cpu_handle_guest_debug(cpu);
1080 }
1081 }
1082 qemu_kvm_wait_io_event(cpu);
1083 } while (!cpu->unplug || cpu_can_run(cpu));
1084
1085 qemu_kvm_destroy_vcpu(cpu);
1086 cpu->created = false;
1087 qemu_cond_signal(&qemu_cpu_cond);
1088 qemu_mutex_unlock_iothread();
1089 return NULL;
1090 }
1091
1092 static void *qemu_dummy_cpu_thread_fn(void *arg)
1093 {
1094 #ifdef _WIN32
1095 fprintf(stderr, "qtest is not supported under Windows\n");
1096 exit(1);
1097 #else
1098 CPUState *cpu = arg;
1099 sigset_t waitset;
1100 int r;
1101
1102 rcu_register_thread();
1103
1104 qemu_mutex_lock_iothread();
1105 qemu_thread_get_self(cpu->thread);
1106 cpu->thread_id = qemu_get_thread_id();
1107 cpu->can_do_io = 1;
1108
1109 sigemptyset(&waitset);
1110 sigaddset(&waitset, SIG_IPI);
1111
1112 /* signal CPU creation */
1113 cpu->created = true;
1114 qemu_cond_signal(&qemu_cpu_cond);
1115
1116 current_cpu = cpu;
1117 while (1) {
1118 current_cpu = NULL;
1119 qemu_mutex_unlock_iothread();
1120 do {
1121 int sig;
1122 r = sigwait(&waitset, &sig);
1123 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1124 if (r == -1) {
1125 perror("sigwait");
1126 exit(1);
1127 }
1128 qemu_mutex_lock_iothread();
1129 current_cpu = cpu;
1130 qemu_wait_io_event_common(cpu);
1131 }
1132
1133 return NULL;
1134 #endif
1135 }
1136
1137 static void tcg_exec_all(void);
1138
1139 static void *qemu_tcg_cpu_thread_fn(void *arg)
1140 {
1141 CPUState *cpu = arg;
1142 CPUState *remove_cpu = NULL;
1143
1144 rcu_register_thread();
1145
1146 qemu_mutex_lock_iothread();
1147 qemu_thread_get_self(cpu->thread);
1148
1149 CPU_FOREACH(cpu) {
1150 cpu->thread_id = qemu_get_thread_id();
1151 cpu->created = true;
1152 cpu->can_do_io = 1;
1153 }
1154 qemu_cond_signal(&qemu_cpu_cond);
1155
1156 /* wait for initial kick-off after machine start */
1157 while (first_cpu->stopped) {
1158 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1159
1160 /* process any pending work */
1161 CPU_FOREACH(cpu) {
1162 qemu_wait_io_event_common(cpu);
1163 }
1164 }
1165
1166 /* process any pending work */
1167 atomic_mb_set(&exit_request, 1);
1168
1169 while (1) {
1170 tcg_exec_all();
1171
1172 if (use_icount) {
1173 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1174
1175 if (deadline == 0) {
1176 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1177 }
1178 }
1179 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1180 CPU_FOREACH(cpu) {
1181 if (cpu->unplug && !cpu_can_run(cpu)) {
1182 remove_cpu = cpu;
1183 break;
1184 }
1185 }
1186 if (remove_cpu) {
1187 qemu_tcg_destroy_vcpu(remove_cpu);
1188 cpu->created = false;
1189 qemu_cond_signal(&qemu_cpu_cond);
1190 remove_cpu = NULL;
1191 }
1192 }
1193
1194 return NULL;
1195 }
1196
1197 static void qemu_cpu_kick_thread(CPUState *cpu)
1198 {
1199 #ifndef _WIN32
1200 int err;
1201
1202 if (cpu->thread_kicked) {
1203 return;
1204 }
1205 cpu->thread_kicked = true;
1206 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1207 if (err) {
1208 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1209 exit(1);
1210 }
1211 #else /* _WIN32 */
1212 abort();
1213 #endif
1214 }
1215
1216 static void qemu_cpu_kick_no_halt(void)
1217 {
1218 CPUState *cpu;
1219 /* Ensure whatever caused the exit has reached the CPU threads before
1220 * writing exit_request.
1221 */
1222 atomic_mb_set(&exit_request, 1);
1223 cpu = atomic_mb_read(&tcg_current_cpu);
1224 if (cpu) {
1225 cpu_exit(cpu);
1226 }
1227 }
1228
1229 void qemu_cpu_kick(CPUState *cpu)
1230 {
1231 qemu_cond_broadcast(cpu->halt_cond);
1232 if (tcg_enabled()) {
1233 qemu_cpu_kick_no_halt();
1234 } else {
1235 qemu_cpu_kick_thread(cpu);
1236 }
1237 }
1238
1239 void qemu_cpu_kick_self(void)
1240 {
1241 assert(current_cpu);
1242 qemu_cpu_kick_thread(current_cpu);
1243 }
1244
1245 bool qemu_cpu_is_self(CPUState *cpu)
1246 {
1247 return qemu_thread_is_self(cpu->thread);
1248 }
1249
1250 bool qemu_in_vcpu_thread(void)
1251 {
1252 return current_cpu && qemu_cpu_is_self(current_cpu);
1253 }
1254
1255 static __thread bool iothread_locked = false;
1256
1257 bool qemu_mutex_iothread_locked(void)
1258 {
1259 return iothread_locked;
1260 }
1261
1262 void qemu_mutex_lock_iothread(void)
1263 {
1264 atomic_inc(&iothread_requesting_mutex);
1265 /* In the simple case there is no need to bump the VCPU thread out of
1266 * TCG code execution.
1267 */
1268 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1269 !first_cpu || !first_cpu->created) {
1270 qemu_mutex_lock(&qemu_global_mutex);
1271 atomic_dec(&iothread_requesting_mutex);
1272 } else {
1273 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1274 qemu_cpu_kick_no_halt();
1275 qemu_mutex_lock(&qemu_global_mutex);
1276 }
1277 atomic_dec(&iothread_requesting_mutex);
1278 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1279 }
1280 iothread_locked = true;
1281 }
1282
1283 void qemu_mutex_unlock_iothread(void)
1284 {
1285 iothread_locked = false;
1286 qemu_mutex_unlock(&qemu_global_mutex);
1287 }
1288
1289 static int all_vcpus_paused(void)
1290 {
1291 CPUState *cpu;
1292
1293 CPU_FOREACH(cpu) {
1294 if (!cpu->stopped) {
1295 return 0;
1296 }
1297 }
1298
1299 return 1;
1300 }
1301
1302 void pause_all_vcpus(void)
1303 {
1304 CPUState *cpu;
1305
1306 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1307 CPU_FOREACH(cpu) {
1308 cpu->stop = true;
1309 qemu_cpu_kick(cpu);
1310 }
1311
1312 if (qemu_in_vcpu_thread()) {
1313 cpu_stop_current();
1314 if (!kvm_enabled()) {
1315 CPU_FOREACH(cpu) {
1316 cpu->stop = false;
1317 cpu->stopped = true;
1318 }
1319 return;
1320 }
1321 }
1322
1323 while (!all_vcpus_paused()) {
1324 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1325 CPU_FOREACH(cpu) {
1326 qemu_cpu_kick(cpu);
1327 }
1328 }
1329 }
1330
1331 void cpu_resume(CPUState *cpu)
1332 {
1333 cpu->stop = false;
1334 cpu->stopped = false;
1335 qemu_cpu_kick(cpu);
1336 }
1337
1338 void resume_all_vcpus(void)
1339 {
1340 CPUState *cpu;
1341
1342 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1343 CPU_FOREACH(cpu) {
1344 cpu_resume(cpu);
1345 }
1346 }
1347
1348 void cpu_remove(CPUState *cpu)
1349 {
1350 cpu->stop = true;
1351 cpu->unplug = true;
1352 qemu_cpu_kick(cpu);
1353 }
1354
1355 void cpu_remove_sync(CPUState *cpu)
1356 {
1357 cpu_remove(cpu);
1358 while (cpu->created) {
1359 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1360 }
1361 }
1362
1363 /* For temporary buffers for forming a name */
1364 #define VCPU_THREAD_NAME_SIZE 16
1365
1366 static void qemu_tcg_init_vcpu(CPUState *cpu)
1367 {
1368 char thread_name[VCPU_THREAD_NAME_SIZE];
1369 static QemuCond *tcg_halt_cond;
1370 static QemuThread *tcg_cpu_thread;
1371
1372 /* share a single thread for all cpus with TCG */
1373 if (!tcg_cpu_thread) {
1374 cpu->thread = g_malloc0(sizeof(QemuThread));
1375 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1376 qemu_cond_init(cpu->halt_cond);
1377 tcg_halt_cond = cpu->halt_cond;
1378 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1379 cpu->cpu_index);
1380 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1381 cpu, QEMU_THREAD_JOINABLE);
1382 #ifdef _WIN32
1383 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1384 #endif
1385 while (!cpu->created) {
1386 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1387 }
1388 tcg_cpu_thread = cpu->thread;
1389 } else {
1390 cpu->thread = tcg_cpu_thread;
1391 cpu->halt_cond = tcg_halt_cond;
1392 }
1393 }
1394
1395 static void qemu_kvm_start_vcpu(CPUState *cpu)
1396 {
1397 char thread_name[VCPU_THREAD_NAME_SIZE];
1398
1399 cpu->thread = g_malloc0(sizeof(QemuThread));
1400 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1401 qemu_cond_init(cpu->halt_cond);
1402 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1403 cpu->cpu_index);
1404 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1405 cpu, QEMU_THREAD_JOINABLE);
1406 while (!cpu->created) {
1407 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1408 }
1409 }
1410
1411 static void qemu_dummy_start_vcpu(CPUState *cpu)
1412 {
1413 char thread_name[VCPU_THREAD_NAME_SIZE];
1414
1415 cpu->thread = g_malloc0(sizeof(QemuThread));
1416 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1417 qemu_cond_init(cpu->halt_cond);
1418 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1419 cpu->cpu_index);
1420 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1421 QEMU_THREAD_JOINABLE);
1422 while (!cpu->created) {
1423 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1424 }
1425 }
1426
1427 void qemu_init_vcpu(CPUState *cpu)
1428 {
1429 cpu->nr_cores = smp_cores;
1430 cpu->nr_threads = smp_threads;
1431 cpu->stopped = true;
1432
1433 if (!cpu->as) {
1434 /* If the target cpu hasn't set up any address spaces itself,
1435 * give it the default one.
1436 */
1437 AddressSpace *as = address_space_init_shareable(cpu->memory,
1438 "cpu-memory");
1439 cpu->num_ases = 1;
1440 cpu_address_space_init(cpu, as, 0);
1441 }
1442
1443 if (kvm_enabled()) {
1444 qemu_kvm_start_vcpu(cpu);
1445 } else if (tcg_enabled()) {
1446 qemu_tcg_init_vcpu(cpu);
1447 } else {
1448 qemu_dummy_start_vcpu(cpu);
1449 }
1450 }
1451
1452 void cpu_stop_current(void)
1453 {
1454 if (current_cpu) {
1455 current_cpu->stop = false;
1456 current_cpu->stopped = true;
1457 cpu_exit(current_cpu);
1458 qemu_cond_broadcast(&qemu_pause_cond);
1459 }
1460 }
1461
1462 int vm_stop(RunState state)
1463 {
1464 if (qemu_in_vcpu_thread()) {
1465 qemu_system_vmstop_request_prepare();
1466 qemu_system_vmstop_request(state);
1467 /*
1468 * FIXME: should not return to device code in case
1469 * vm_stop() has been requested.
1470 */
1471 cpu_stop_current();
1472 return 0;
1473 }
1474
1475 return do_vm_stop(state);
1476 }
1477
1478 /* does a state transition even if the VM is already stopped,
1479 current state is forgotten forever */
1480 int vm_stop_force_state(RunState state)
1481 {
1482 if (runstate_is_running()) {
1483 return vm_stop(state);
1484 } else {
1485 runstate_set(state);
1486
1487 bdrv_drain_all();
1488 /* Make sure to return an error if the flush in a previous vm_stop()
1489 * failed. */
1490 return blk_flush_all();
1491 }
1492 }
1493
1494 static int64_t tcg_get_icount_limit(void)
1495 {
1496 int64_t deadline;
1497
1498 if (replay_mode != REPLAY_MODE_PLAY) {
1499 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1500
1501 /* Maintain prior (possibly buggy) behaviour where if no deadline
1502 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1503 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1504 * nanoseconds.
1505 */
1506 if ((deadline < 0) || (deadline > INT32_MAX)) {
1507 deadline = INT32_MAX;
1508 }
1509
1510 return qemu_icount_round(deadline);
1511 } else {
1512 return replay_get_instructions();
1513 }
1514 }
1515
1516 static int tcg_cpu_exec(CPUState *cpu)
1517 {
1518 int ret;
1519 #ifdef CONFIG_PROFILER
1520 int64_t ti;
1521 #endif
1522
1523 #ifdef CONFIG_PROFILER
1524 ti = profile_getclock();
1525 #endif
1526 if (use_icount) {
1527 int64_t count;
1528 int decr;
1529 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1530 + cpu->icount_extra);
1531 cpu->icount_decr.u16.low = 0;
1532 cpu->icount_extra = 0;
1533 count = tcg_get_icount_limit();
1534 timers_state.qemu_icount += count;
1535 decr = (count > 0xffff) ? 0xffff : count;
1536 count -= decr;
1537 cpu->icount_decr.u16.low = decr;
1538 cpu->icount_extra = count;
1539 }
1540 ret = cpu_exec(cpu);
1541 #ifdef CONFIG_PROFILER
1542 tcg_time += profile_getclock() - ti;
1543 #endif
1544 if (use_icount) {
1545 /* Fold pending instructions back into the
1546 instruction counter, and clear the interrupt flag. */
1547 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1548 + cpu->icount_extra);
1549 cpu->icount_decr.u32 = 0;
1550 cpu->icount_extra = 0;
1551 replay_account_executed_instructions();
1552 }
1553 return ret;
1554 }
1555
1556 static void tcg_exec_all(void)
1557 {
1558 int r;
1559
1560 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1561 qemu_account_warp_timer();
1562
1563 if (next_cpu == NULL) {
1564 next_cpu = first_cpu;
1565 }
1566 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1567 CPUState *cpu = next_cpu;
1568
1569 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1570 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1571
1572 if (cpu_can_run(cpu)) {
1573 r = tcg_cpu_exec(cpu);
1574 if (r == EXCP_DEBUG) {
1575 cpu_handle_guest_debug(cpu);
1576 break;
1577 }
1578 } else if (cpu->stop || cpu->stopped) {
1579 if (cpu->unplug) {
1580 next_cpu = CPU_NEXT(cpu);
1581 }
1582 break;
1583 }
1584 }
1585
1586 /* Pairs with smp_wmb in qemu_cpu_kick. */
1587 atomic_mb_set(&exit_request, 0);
1588 }
1589
1590 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1591 {
1592 /* XXX: implement xxx_cpu_list for targets that still miss it */
1593 #if defined(cpu_list)
1594 cpu_list(f, cpu_fprintf);
1595 #endif
1596 }
1597
1598 CpuInfoList *qmp_query_cpus(Error **errp)
1599 {
1600 CpuInfoList *head = NULL, *cur_item = NULL;
1601 CPUState *cpu;
1602
1603 CPU_FOREACH(cpu) {
1604 CpuInfoList *info;
1605 #if defined(TARGET_I386)
1606 X86CPU *x86_cpu = X86_CPU(cpu);
1607 CPUX86State *env = &x86_cpu->env;
1608 #elif defined(TARGET_PPC)
1609 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1610 CPUPPCState *env = &ppc_cpu->env;
1611 #elif defined(TARGET_SPARC)
1612 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1613 CPUSPARCState *env = &sparc_cpu->env;
1614 #elif defined(TARGET_MIPS)
1615 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1616 CPUMIPSState *env = &mips_cpu->env;
1617 #elif defined(TARGET_TRICORE)
1618 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1619 CPUTriCoreState *env = &tricore_cpu->env;
1620 #endif
1621
1622 cpu_synchronize_state(cpu);
1623
1624 info = g_malloc0(sizeof(*info));
1625 info->value = g_malloc0(sizeof(*info->value));
1626 info->value->CPU = cpu->cpu_index;
1627 info->value->current = (cpu == first_cpu);
1628 info->value->halted = cpu->halted;
1629 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1630 info->value->thread_id = cpu->thread_id;
1631 #if defined(TARGET_I386)
1632 info->value->arch = CPU_INFO_ARCH_X86;
1633 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1634 #elif defined(TARGET_PPC)
1635 info->value->arch = CPU_INFO_ARCH_PPC;
1636 info->value->u.ppc.nip = env->nip;
1637 #elif defined(TARGET_SPARC)
1638 info->value->arch = CPU_INFO_ARCH_SPARC;
1639 info->value->u.q_sparc.pc = env->pc;
1640 info->value->u.q_sparc.npc = env->npc;
1641 #elif defined(TARGET_MIPS)
1642 info->value->arch = CPU_INFO_ARCH_MIPS;
1643 info->value->u.q_mips.PC = env->active_tc.PC;
1644 #elif defined(TARGET_TRICORE)
1645 info->value->arch = CPU_INFO_ARCH_TRICORE;
1646 info->value->u.tricore.PC = env->PC;
1647 #else
1648 info->value->arch = CPU_INFO_ARCH_OTHER;
1649 #endif
1650
1651 /* XXX: waiting for the qapi to support GSList */
1652 if (!cur_item) {
1653 head = cur_item = info;
1654 } else {
1655 cur_item->next = info;
1656 cur_item = info;
1657 }
1658 }
1659
1660 return head;
1661 }
1662
1663 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1664 bool has_cpu, int64_t cpu_index, Error **errp)
1665 {
1666 FILE *f;
1667 uint32_t l;
1668 CPUState *cpu;
1669 uint8_t buf[1024];
1670 int64_t orig_addr = addr, orig_size = size;
1671
1672 if (!has_cpu) {
1673 cpu_index = 0;
1674 }
1675
1676 cpu = qemu_get_cpu(cpu_index);
1677 if (cpu == NULL) {
1678 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1679 "a CPU number");
1680 return;
1681 }
1682
1683 f = fopen(filename, "wb");
1684 if (!f) {
1685 error_setg_file_open(errp, errno, filename);
1686 return;
1687 }
1688
1689 while (size != 0) {
1690 l = sizeof(buf);
1691 if (l > size)
1692 l = size;
1693 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1694 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1695 " specified", orig_addr, orig_size);
1696 goto exit;
1697 }
1698 if (fwrite(buf, 1, l, f) != l) {
1699 error_setg(errp, QERR_IO_ERROR);
1700 goto exit;
1701 }
1702 addr += l;
1703 size -= l;
1704 }
1705
1706 exit:
1707 fclose(f);
1708 }
1709
1710 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1711 Error **errp)
1712 {
1713 FILE *f;
1714 uint32_t l;
1715 uint8_t buf[1024];
1716
1717 f = fopen(filename, "wb");
1718 if (!f) {
1719 error_setg_file_open(errp, errno, filename);
1720 return;
1721 }
1722
1723 while (size != 0) {
1724 l = sizeof(buf);
1725 if (l > size)
1726 l = size;
1727 cpu_physical_memory_read(addr, buf, l);
1728 if (fwrite(buf, 1, l, f) != l) {
1729 error_setg(errp, QERR_IO_ERROR);
1730 goto exit;
1731 }
1732 addr += l;
1733 size -= l;
1734 }
1735
1736 exit:
1737 fclose(f);
1738 }
1739
1740 void qmp_inject_nmi(Error **errp)
1741 {
1742 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1743 }
1744
1745 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1746 {
1747 if (!use_icount) {
1748 return;
1749 }
1750
1751 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1752 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1753 if (icount_align_option) {
1754 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1755 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1756 } else {
1757 cpu_fprintf(f, "Max guest delay NA\n");
1758 cpu_fprintf(f, "Max guest advance NA\n");
1759 }
1760 }