]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
tcg: cpus rm tcg_exec_all()
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "qmp-commands.h"
38 #include "exec/exec-all.h"
39
40 #include "qemu/thread.h"
41 #include "sysemu/cpus.h"
42 #include "sysemu/qtest.h"
43 #include "qemu/main-loop.h"
44 #include "qemu/bitmap.h"
45 #include "qemu/seqlock.h"
46 #include "qapi-event.h"
47 #include "hw/nmi.h"
48 #include "sysemu/replay.h"
49
50 #ifndef _WIN32
51 #include "qemu/compatfd.h"
52 #endif
53
54 #ifdef CONFIG_LINUX
55
56 #include <sys/prctl.h>
57
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
61
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
65
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
69
70 #endif /* CONFIG_LINUX */
71
72 int64_t max_delay;
73 int64_t max_advance;
74
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
78
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
82
83 bool cpu_is_stopped(CPUState *cpu)
84 {
85 return cpu->stopped || !runstate_is_running();
86 }
87
88 static bool cpu_thread_is_idle(CPUState *cpu)
89 {
90 if (cpu->stop || cpu->queued_work_first) {
91 return false;
92 }
93 if (cpu_is_stopped(cpu)) {
94 return true;
95 }
96 if (!cpu->halted || cpu_has_work(cpu) ||
97 kvm_halt_in_kernel()) {
98 return false;
99 }
100 return true;
101 }
102
103 static bool all_cpu_threads_idle(void)
104 {
105 CPUState *cpu;
106
107 CPU_FOREACH(cpu) {
108 if (!cpu_thread_is_idle(cpu)) {
109 return false;
110 }
111 }
112 return true;
113 }
114
115 /***********************************************************/
116 /* guest cycle counter */
117
118 /* Protected by TimersState seqlock */
119
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
126
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
130
131 typedef struct TimersState {
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
148 } TimersState;
149
150 static TimersState timers_state;
151
152 int64_t cpu_get_icount_raw(void)
153 {
154 int64_t icount;
155 CPUState *cpu = current_cpu;
156
157 icount = timers_state.qemu_icount;
158 if (cpu) {
159 if (!cpu->can_do_io) {
160 fprintf(stderr, "Bad icount read\n");
161 exit(1);
162 }
163 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
164 }
165 return icount;
166 }
167
168 /* Return the virtual CPU time, based on the instruction counter. */
169 static int64_t cpu_get_icount_locked(void)
170 {
171 int64_t icount = cpu_get_icount_raw();
172 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
173 }
174
175 int64_t cpu_get_icount(void)
176 {
177 int64_t icount;
178 unsigned start;
179
180 do {
181 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
182 icount = cpu_get_icount_locked();
183 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
184
185 return icount;
186 }
187
188 int64_t cpu_icount_to_ns(int64_t icount)
189 {
190 return icount << icount_time_shift;
191 }
192
193 /* return the time elapsed in VM between vm_start and vm_stop. Unless
194 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
195 * counter.
196 *
197 * Caller must hold the BQL
198 */
199 int64_t cpu_get_ticks(void)
200 {
201 int64_t ticks;
202
203 if (use_icount) {
204 return cpu_get_icount();
205 }
206
207 ticks = timers_state.cpu_ticks_offset;
208 if (timers_state.cpu_ticks_enabled) {
209 ticks += cpu_get_host_ticks();
210 }
211
212 if (timers_state.cpu_ticks_prev > ticks) {
213 /* Note: non increasing ticks may happen if the host uses
214 software suspend */
215 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
216 ticks = timers_state.cpu_ticks_prev;
217 }
218
219 timers_state.cpu_ticks_prev = ticks;
220 return ticks;
221 }
222
223 static int64_t cpu_get_clock_locked(void)
224 {
225 int64_t time;
226
227 time = timers_state.cpu_clock_offset;
228 if (timers_state.cpu_ticks_enabled) {
229 time += get_clock();
230 }
231
232 return time;
233 }
234
235 /* Return the monotonic time elapsed in VM, i.e.,
236 * the time between vm_start and vm_stop
237 */
238 int64_t cpu_get_clock(void)
239 {
240 int64_t ti;
241 unsigned start;
242
243 do {
244 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
245 ti = cpu_get_clock_locked();
246 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
247
248 return ti;
249 }
250
251 /* enable cpu_get_ticks()
252 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
253 */
254 void cpu_enable_ticks(void)
255 {
256 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
257 seqlock_write_begin(&timers_state.vm_clock_seqlock);
258 if (!timers_state.cpu_ticks_enabled) {
259 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
260 timers_state.cpu_clock_offset -= get_clock();
261 timers_state.cpu_ticks_enabled = 1;
262 }
263 seqlock_write_end(&timers_state.vm_clock_seqlock);
264 }
265
266 /* disable cpu_get_ticks() : the clock is stopped. You must not call
267 * cpu_get_ticks() after that.
268 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
269 */
270 void cpu_disable_ticks(void)
271 {
272 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
273 seqlock_write_begin(&timers_state.vm_clock_seqlock);
274 if (timers_state.cpu_ticks_enabled) {
275 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
276 timers_state.cpu_clock_offset = cpu_get_clock_locked();
277 timers_state.cpu_ticks_enabled = 0;
278 }
279 seqlock_write_end(&timers_state.vm_clock_seqlock);
280 }
281
282 /* Correlation between real and virtual time is always going to be
283 fairly approximate, so ignore small variation.
284 When the guest is idle real and virtual time will be aligned in
285 the IO wait loop. */
286 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
287
288 static void icount_adjust(void)
289 {
290 int64_t cur_time;
291 int64_t cur_icount;
292 int64_t delta;
293
294 /* Protected by TimersState mutex. */
295 static int64_t last_delta;
296
297 /* If the VM is not running, then do nothing. */
298 if (!runstate_is_running()) {
299 return;
300 }
301
302 seqlock_write_begin(&timers_state.vm_clock_seqlock);
303 cur_time = cpu_get_clock_locked();
304 cur_icount = cpu_get_icount_locked();
305
306 delta = cur_icount - cur_time;
307 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
308 if (delta > 0
309 && last_delta + ICOUNT_WOBBLE < delta * 2
310 && icount_time_shift > 0) {
311 /* The guest is getting too far ahead. Slow time down. */
312 icount_time_shift--;
313 }
314 if (delta < 0
315 && last_delta - ICOUNT_WOBBLE > delta * 2
316 && icount_time_shift < MAX_ICOUNT_SHIFT) {
317 /* The guest is getting too far behind. Speed time up. */
318 icount_time_shift++;
319 }
320 last_delta = delta;
321 timers_state.qemu_icount_bias = cur_icount
322 - (timers_state.qemu_icount << icount_time_shift);
323 seqlock_write_end(&timers_state.vm_clock_seqlock);
324 }
325
326 static void icount_adjust_rt(void *opaque)
327 {
328 timer_mod(icount_rt_timer,
329 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
330 icount_adjust();
331 }
332
333 static void icount_adjust_vm(void *opaque)
334 {
335 timer_mod(icount_vm_timer,
336 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337 NANOSECONDS_PER_SECOND / 10);
338 icount_adjust();
339 }
340
341 static int64_t qemu_icount_round(int64_t count)
342 {
343 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
344 }
345
346 static void icount_warp_rt(void)
347 {
348 unsigned seq;
349 int64_t warp_start;
350
351 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
352 * changes from -1 to another value, so the race here is okay.
353 */
354 do {
355 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
356 warp_start = vm_clock_warp_start;
357 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
358
359 if (warp_start == -1) {
360 return;
361 }
362
363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
364 if (runstate_is_running()) {
365 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
366 cpu_get_clock_locked());
367 int64_t warp_delta;
368
369 warp_delta = clock - vm_clock_warp_start;
370 if (use_icount == 2) {
371 /*
372 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
373 * far ahead of real time.
374 */
375 int64_t cur_icount = cpu_get_icount_locked();
376 int64_t delta = clock - cur_icount;
377 warp_delta = MIN(warp_delta, delta);
378 }
379 timers_state.qemu_icount_bias += warp_delta;
380 }
381 vm_clock_warp_start = -1;
382 seqlock_write_end(&timers_state.vm_clock_seqlock);
383
384 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
385 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
386 }
387 }
388
389 static void icount_timer_cb(void *opaque)
390 {
391 /* No need for a checkpoint because the timer already synchronizes
392 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
393 */
394 icount_warp_rt();
395 }
396
397 void qtest_clock_warp(int64_t dest)
398 {
399 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
400 AioContext *aio_context;
401 assert(qtest_enabled());
402 aio_context = qemu_get_aio_context();
403 while (clock < dest) {
404 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
405 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
406
407 seqlock_write_begin(&timers_state.vm_clock_seqlock);
408 timers_state.qemu_icount_bias += warp;
409 seqlock_write_end(&timers_state.vm_clock_seqlock);
410
411 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
412 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
413 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
414 }
415 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
416 }
417
418 void qemu_start_warp_timer(void)
419 {
420 int64_t clock;
421 int64_t deadline;
422
423 if (!use_icount) {
424 return;
425 }
426
427 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
428 * do not fire, so computing the deadline does not make sense.
429 */
430 if (!runstate_is_running()) {
431 return;
432 }
433
434 /* warp clock deterministically in record/replay mode */
435 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
436 return;
437 }
438
439 if (!all_cpu_threads_idle()) {
440 return;
441 }
442
443 if (qtest_enabled()) {
444 /* When testing, qtest commands advance icount. */
445 return;
446 }
447
448 /* We want to use the earliest deadline from ALL vm_clocks */
449 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
450 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
451 if (deadline < 0) {
452 static bool notified;
453 if (!icount_sleep && !notified) {
454 error_report("WARNING: icount sleep disabled and no active timers");
455 notified = true;
456 }
457 return;
458 }
459
460 if (deadline > 0) {
461 /*
462 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
463 * sleep. Otherwise, the CPU might be waiting for a future timer
464 * interrupt to wake it up, but the interrupt never comes because
465 * the vCPU isn't running any insns and thus doesn't advance the
466 * QEMU_CLOCK_VIRTUAL.
467 */
468 if (!icount_sleep) {
469 /*
470 * We never let VCPUs sleep in no sleep icount mode.
471 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
472 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
473 * It is useful when we want a deterministic execution time,
474 * isolated from host latencies.
475 */
476 seqlock_write_begin(&timers_state.vm_clock_seqlock);
477 timers_state.qemu_icount_bias += deadline;
478 seqlock_write_end(&timers_state.vm_clock_seqlock);
479 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
480 } else {
481 /*
482 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
483 * "real" time, (related to the time left until the next event) has
484 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
485 * This avoids that the warps are visible externally; for example,
486 * you will not be sending network packets continuously instead of
487 * every 100ms.
488 */
489 seqlock_write_begin(&timers_state.vm_clock_seqlock);
490 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
491 vm_clock_warp_start = clock;
492 }
493 seqlock_write_end(&timers_state.vm_clock_seqlock);
494 timer_mod_anticipate(icount_warp_timer, clock + deadline);
495 }
496 } else if (deadline == 0) {
497 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
498 }
499 }
500
501 static void qemu_account_warp_timer(void)
502 {
503 if (!use_icount || !icount_sleep) {
504 return;
505 }
506
507 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
508 * do not fire, so computing the deadline does not make sense.
509 */
510 if (!runstate_is_running()) {
511 return;
512 }
513
514 /* warp clock deterministically in record/replay mode */
515 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
516 return;
517 }
518
519 timer_del(icount_warp_timer);
520 icount_warp_rt();
521 }
522
523 static bool icount_state_needed(void *opaque)
524 {
525 return use_icount;
526 }
527
528 /*
529 * This is a subsection for icount migration.
530 */
531 static const VMStateDescription icount_vmstate_timers = {
532 .name = "timer/icount",
533 .version_id = 1,
534 .minimum_version_id = 1,
535 .needed = icount_state_needed,
536 .fields = (VMStateField[]) {
537 VMSTATE_INT64(qemu_icount_bias, TimersState),
538 VMSTATE_INT64(qemu_icount, TimersState),
539 VMSTATE_END_OF_LIST()
540 }
541 };
542
543 static const VMStateDescription vmstate_timers = {
544 .name = "timer",
545 .version_id = 2,
546 .minimum_version_id = 1,
547 .fields = (VMStateField[]) {
548 VMSTATE_INT64(cpu_ticks_offset, TimersState),
549 VMSTATE_INT64(dummy, TimersState),
550 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
551 VMSTATE_END_OF_LIST()
552 },
553 .subsections = (const VMStateDescription*[]) {
554 &icount_vmstate_timers,
555 NULL
556 }
557 };
558
559 static void cpu_throttle_thread(CPUState *cpu, void *opaque)
560 {
561 double pct;
562 double throttle_ratio;
563 long sleeptime_ns;
564
565 if (!cpu_throttle_get_percentage()) {
566 return;
567 }
568
569 pct = (double)cpu_throttle_get_percentage()/100;
570 throttle_ratio = pct / (1 - pct);
571 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
572
573 qemu_mutex_unlock_iothread();
574 atomic_set(&cpu->throttle_thread_scheduled, 0);
575 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
576 qemu_mutex_lock_iothread();
577 }
578
579 static void cpu_throttle_timer_tick(void *opaque)
580 {
581 CPUState *cpu;
582 double pct;
583
584 /* Stop the timer if needed */
585 if (!cpu_throttle_get_percentage()) {
586 return;
587 }
588 CPU_FOREACH(cpu) {
589 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
590 async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
591 }
592 }
593
594 pct = (double)cpu_throttle_get_percentage()/100;
595 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
596 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
597 }
598
599 void cpu_throttle_set(int new_throttle_pct)
600 {
601 /* Ensure throttle percentage is within valid range */
602 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
603 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
604
605 atomic_set(&throttle_percentage, new_throttle_pct);
606
607 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
608 CPU_THROTTLE_TIMESLICE_NS);
609 }
610
611 void cpu_throttle_stop(void)
612 {
613 atomic_set(&throttle_percentage, 0);
614 }
615
616 bool cpu_throttle_active(void)
617 {
618 return (cpu_throttle_get_percentage() != 0);
619 }
620
621 int cpu_throttle_get_percentage(void)
622 {
623 return atomic_read(&throttle_percentage);
624 }
625
626 void cpu_ticks_init(void)
627 {
628 seqlock_init(&timers_state.vm_clock_seqlock);
629 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
630 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
631 cpu_throttle_timer_tick, NULL);
632 }
633
634 void configure_icount(QemuOpts *opts, Error **errp)
635 {
636 const char *option;
637 char *rem_str = NULL;
638
639 option = qemu_opt_get(opts, "shift");
640 if (!option) {
641 if (qemu_opt_get(opts, "align") != NULL) {
642 error_setg(errp, "Please specify shift option when using align");
643 }
644 return;
645 }
646
647 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
648 if (icount_sleep) {
649 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
650 icount_timer_cb, NULL);
651 }
652
653 icount_align_option = qemu_opt_get_bool(opts, "align", false);
654
655 if (icount_align_option && !icount_sleep) {
656 error_setg(errp, "align=on and sleep=off are incompatible");
657 }
658 if (strcmp(option, "auto") != 0) {
659 errno = 0;
660 icount_time_shift = strtol(option, &rem_str, 0);
661 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
662 error_setg(errp, "icount: Invalid shift value");
663 }
664 use_icount = 1;
665 return;
666 } else if (icount_align_option) {
667 error_setg(errp, "shift=auto and align=on are incompatible");
668 } else if (!icount_sleep) {
669 error_setg(errp, "shift=auto and sleep=off are incompatible");
670 }
671
672 use_icount = 2;
673
674 /* 125MIPS seems a reasonable initial guess at the guest speed.
675 It will be corrected fairly quickly anyway. */
676 icount_time_shift = 3;
677
678 /* Have both realtime and virtual time triggers for speed adjustment.
679 The realtime trigger catches emulated time passing too slowly,
680 the virtual time trigger catches emulated time passing too fast.
681 Realtime triggers occur even when idle, so use them less frequently
682 than VM triggers. */
683 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
684 icount_adjust_rt, NULL);
685 timer_mod(icount_rt_timer,
686 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
687 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
688 icount_adjust_vm, NULL);
689 timer_mod(icount_vm_timer,
690 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
691 NANOSECONDS_PER_SECOND / 10);
692 }
693
694 /***********************************************************/
695 void hw_error(const char *fmt, ...)
696 {
697 va_list ap;
698 CPUState *cpu;
699
700 va_start(ap, fmt);
701 fprintf(stderr, "qemu: hardware error: ");
702 vfprintf(stderr, fmt, ap);
703 fprintf(stderr, "\n");
704 CPU_FOREACH(cpu) {
705 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
706 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
707 }
708 va_end(ap);
709 abort();
710 }
711
712 void cpu_synchronize_all_states(void)
713 {
714 CPUState *cpu;
715
716 CPU_FOREACH(cpu) {
717 cpu_synchronize_state(cpu);
718 }
719 }
720
721 void cpu_synchronize_all_post_reset(void)
722 {
723 CPUState *cpu;
724
725 CPU_FOREACH(cpu) {
726 cpu_synchronize_post_reset(cpu);
727 }
728 }
729
730 void cpu_synchronize_all_post_init(void)
731 {
732 CPUState *cpu;
733
734 CPU_FOREACH(cpu) {
735 cpu_synchronize_post_init(cpu);
736 }
737 }
738
739 static int do_vm_stop(RunState state)
740 {
741 int ret = 0;
742
743 if (runstate_is_running()) {
744 cpu_disable_ticks();
745 pause_all_vcpus();
746 runstate_set(state);
747 vm_state_notify(0, state);
748 qapi_event_send_stop(&error_abort);
749 }
750
751 bdrv_drain_all();
752 replay_disable_events();
753 ret = bdrv_flush_all();
754
755 return ret;
756 }
757
758 static bool cpu_can_run(CPUState *cpu)
759 {
760 if (cpu->stop) {
761 return false;
762 }
763 if (cpu_is_stopped(cpu)) {
764 return false;
765 }
766 return true;
767 }
768
769 static void cpu_handle_guest_debug(CPUState *cpu)
770 {
771 gdb_set_stop_cpu(cpu);
772 qemu_system_debug_request();
773 cpu->stopped = true;
774 }
775
776 #ifdef CONFIG_LINUX
777 static void sigbus_reraise(void)
778 {
779 sigset_t set;
780 struct sigaction action;
781
782 memset(&action, 0, sizeof(action));
783 action.sa_handler = SIG_DFL;
784 if (!sigaction(SIGBUS, &action, NULL)) {
785 raise(SIGBUS);
786 sigemptyset(&set);
787 sigaddset(&set, SIGBUS);
788 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
789 }
790 perror("Failed to re-raise SIGBUS!\n");
791 abort();
792 }
793
794 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
795 void *ctx)
796 {
797 if (kvm_on_sigbus(siginfo->ssi_code,
798 (void *)(intptr_t)siginfo->ssi_addr)) {
799 sigbus_reraise();
800 }
801 }
802
803 static void qemu_init_sigbus(void)
804 {
805 struct sigaction action;
806
807 memset(&action, 0, sizeof(action));
808 action.sa_flags = SA_SIGINFO;
809 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
810 sigaction(SIGBUS, &action, NULL);
811
812 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
813 }
814
815 static void qemu_kvm_eat_signals(CPUState *cpu)
816 {
817 struct timespec ts = { 0, 0 };
818 siginfo_t siginfo;
819 sigset_t waitset;
820 sigset_t chkset;
821 int r;
822
823 sigemptyset(&waitset);
824 sigaddset(&waitset, SIG_IPI);
825 sigaddset(&waitset, SIGBUS);
826
827 do {
828 r = sigtimedwait(&waitset, &siginfo, &ts);
829 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
830 perror("sigtimedwait");
831 exit(1);
832 }
833
834 switch (r) {
835 case SIGBUS:
836 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
837 sigbus_reraise();
838 }
839 break;
840 default:
841 break;
842 }
843
844 r = sigpending(&chkset);
845 if (r == -1) {
846 perror("sigpending");
847 exit(1);
848 }
849 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
850 }
851
852 #else /* !CONFIG_LINUX */
853
854 static void qemu_init_sigbus(void)
855 {
856 }
857
858 static void qemu_kvm_eat_signals(CPUState *cpu)
859 {
860 }
861 #endif /* !CONFIG_LINUX */
862
863 #ifndef _WIN32
864 static void dummy_signal(int sig)
865 {
866 }
867
868 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
869 {
870 int r;
871 sigset_t set;
872 struct sigaction sigact;
873
874 memset(&sigact, 0, sizeof(sigact));
875 sigact.sa_handler = dummy_signal;
876 sigaction(SIG_IPI, &sigact, NULL);
877
878 pthread_sigmask(SIG_BLOCK, NULL, &set);
879 sigdelset(&set, SIG_IPI);
880 sigdelset(&set, SIGBUS);
881 r = kvm_set_signal_mask(cpu, &set);
882 if (r) {
883 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
884 exit(1);
885 }
886 }
887
888 #else /* _WIN32 */
889 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
890 {
891 abort();
892 }
893 #endif /* _WIN32 */
894
895 static QemuMutex qemu_global_mutex;
896 static QemuCond qemu_io_proceeded_cond;
897 static unsigned iothread_requesting_mutex;
898
899 static QemuThread io_thread;
900
901 /* cpu creation */
902 static QemuCond qemu_cpu_cond;
903 /* system init */
904 static QemuCond qemu_pause_cond;
905
906 void qemu_init_cpu_loop(void)
907 {
908 qemu_init_sigbus();
909 qemu_cond_init(&qemu_cpu_cond);
910 qemu_cond_init(&qemu_pause_cond);
911 qemu_cond_init(&qemu_io_proceeded_cond);
912 qemu_mutex_init(&qemu_global_mutex);
913
914 qemu_thread_get_self(&io_thread);
915 }
916
917 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
918 {
919 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
920 }
921
922 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
923 {
924 if (kvm_destroy_vcpu(cpu) < 0) {
925 error_report("kvm_destroy_vcpu failed");
926 exit(EXIT_FAILURE);
927 }
928 }
929
930 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
931 {
932 }
933
934 static void qemu_wait_io_event_common(CPUState *cpu)
935 {
936 if (cpu->stop) {
937 cpu->stop = false;
938 cpu->stopped = true;
939 qemu_cond_broadcast(&qemu_pause_cond);
940 }
941 process_queued_cpu_work(cpu);
942 cpu->thread_kicked = false;
943 }
944
945 static void qemu_tcg_wait_io_event(CPUState *cpu)
946 {
947 while (all_cpu_threads_idle()) {
948 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
949 }
950
951 while (iothread_requesting_mutex) {
952 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
953 }
954
955 CPU_FOREACH(cpu) {
956 qemu_wait_io_event_common(cpu);
957 }
958 }
959
960 static void qemu_kvm_wait_io_event(CPUState *cpu)
961 {
962 while (cpu_thread_is_idle(cpu)) {
963 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
964 }
965
966 qemu_kvm_eat_signals(cpu);
967 qemu_wait_io_event_common(cpu);
968 }
969
970 static void *qemu_kvm_cpu_thread_fn(void *arg)
971 {
972 CPUState *cpu = arg;
973 int r;
974
975 rcu_register_thread();
976
977 qemu_mutex_lock_iothread();
978 qemu_thread_get_self(cpu->thread);
979 cpu->thread_id = qemu_get_thread_id();
980 cpu->can_do_io = 1;
981 current_cpu = cpu;
982
983 r = kvm_init_vcpu(cpu);
984 if (r < 0) {
985 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
986 exit(1);
987 }
988
989 qemu_kvm_init_cpu_signals(cpu);
990
991 /* signal CPU creation */
992 cpu->created = true;
993 qemu_cond_signal(&qemu_cpu_cond);
994
995 do {
996 if (cpu_can_run(cpu)) {
997 r = kvm_cpu_exec(cpu);
998 if (r == EXCP_DEBUG) {
999 cpu_handle_guest_debug(cpu);
1000 }
1001 }
1002 qemu_kvm_wait_io_event(cpu);
1003 } while (!cpu->unplug || cpu_can_run(cpu));
1004
1005 qemu_kvm_destroy_vcpu(cpu);
1006 cpu->created = false;
1007 qemu_cond_signal(&qemu_cpu_cond);
1008 qemu_mutex_unlock_iothread();
1009 return NULL;
1010 }
1011
1012 static void *qemu_dummy_cpu_thread_fn(void *arg)
1013 {
1014 #ifdef _WIN32
1015 fprintf(stderr, "qtest is not supported under Windows\n");
1016 exit(1);
1017 #else
1018 CPUState *cpu = arg;
1019 sigset_t waitset;
1020 int r;
1021
1022 rcu_register_thread();
1023
1024 qemu_mutex_lock_iothread();
1025 qemu_thread_get_self(cpu->thread);
1026 cpu->thread_id = qemu_get_thread_id();
1027 cpu->can_do_io = 1;
1028
1029 sigemptyset(&waitset);
1030 sigaddset(&waitset, SIG_IPI);
1031
1032 /* signal CPU creation */
1033 cpu->created = true;
1034 qemu_cond_signal(&qemu_cpu_cond);
1035
1036 current_cpu = cpu;
1037 while (1) {
1038 current_cpu = NULL;
1039 qemu_mutex_unlock_iothread();
1040 do {
1041 int sig;
1042 r = sigwait(&waitset, &sig);
1043 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1044 if (r == -1) {
1045 perror("sigwait");
1046 exit(1);
1047 }
1048 qemu_mutex_lock_iothread();
1049 current_cpu = cpu;
1050 qemu_wait_io_event_common(cpu);
1051 }
1052
1053 return NULL;
1054 #endif
1055 }
1056
1057 static int64_t tcg_get_icount_limit(void)
1058 {
1059 int64_t deadline;
1060
1061 if (replay_mode != REPLAY_MODE_PLAY) {
1062 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1063
1064 /* Maintain prior (possibly buggy) behaviour where if no deadline
1065 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1066 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1067 * nanoseconds.
1068 */
1069 if ((deadline < 0) || (deadline > INT32_MAX)) {
1070 deadline = INT32_MAX;
1071 }
1072
1073 return qemu_icount_round(deadline);
1074 } else {
1075 return replay_get_instructions();
1076 }
1077 }
1078
1079 static int tcg_cpu_exec(CPUState *cpu)
1080 {
1081 int ret;
1082 #ifdef CONFIG_PROFILER
1083 int64_t ti;
1084 #endif
1085
1086 #ifdef CONFIG_PROFILER
1087 ti = profile_getclock();
1088 #endif
1089 if (use_icount) {
1090 int64_t count;
1091 int decr;
1092 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1093 + cpu->icount_extra);
1094 cpu->icount_decr.u16.low = 0;
1095 cpu->icount_extra = 0;
1096 count = tcg_get_icount_limit();
1097 timers_state.qemu_icount += count;
1098 decr = (count > 0xffff) ? 0xffff : count;
1099 count -= decr;
1100 cpu->icount_decr.u16.low = decr;
1101 cpu->icount_extra = count;
1102 }
1103 cpu_exec_start(cpu);
1104 ret = cpu_exec(cpu);
1105 cpu_exec_end(cpu);
1106 #ifdef CONFIG_PROFILER
1107 tcg_time += profile_getclock() - ti;
1108 #endif
1109 if (use_icount) {
1110 /* Fold pending instructions back into the
1111 instruction counter, and clear the interrupt flag. */
1112 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1113 + cpu->icount_extra);
1114 cpu->icount_decr.u32 = 0;
1115 cpu->icount_extra = 0;
1116 replay_account_executed_instructions();
1117 }
1118 return ret;
1119 }
1120
1121 /* Destroy any remaining vCPUs which have been unplugged and have
1122 * finished running
1123 */
1124 static void deal_with_unplugged_cpus(void)
1125 {
1126 CPUState *cpu;
1127
1128 CPU_FOREACH(cpu) {
1129 if (cpu->unplug && !cpu_can_run(cpu)) {
1130 qemu_tcg_destroy_vcpu(cpu);
1131 cpu->created = false;
1132 qemu_cond_signal(&qemu_cpu_cond);
1133 break;
1134 }
1135 }
1136 }
1137
1138 static void *qemu_tcg_cpu_thread_fn(void *arg)
1139 {
1140 CPUState *cpu = arg;
1141
1142 rcu_register_thread();
1143
1144 qemu_mutex_lock_iothread();
1145 qemu_thread_get_self(cpu->thread);
1146
1147 CPU_FOREACH(cpu) {
1148 cpu->thread_id = qemu_get_thread_id();
1149 cpu->created = true;
1150 cpu->can_do_io = 1;
1151 }
1152 qemu_cond_signal(&qemu_cpu_cond);
1153
1154 /* wait for initial kick-off after machine start */
1155 while (first_cpu->stopped) {
1156 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1157
1158 /* process any pending work */
1159 CPU_FOREACH(cpu) {
1160 qemu_wait_io_event_common(cpu);
1161 }
1162 }
1163
1164 /* process any pending work */
1165 atomic_mb_set(&exit_request, 1);
1166
1167 cpu = first_cpu;
1168
1169 while (1) {
1170 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1171 qemu_account_warp_timer();
1172
1173 if (!cpu) {
1174 cpu = first_cpu;
1175 }
1176
1177 for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1178
1179 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1180 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1181
1182 if (cpu_can_run(cpu)) {
1183 int r;
1184 r = tcg_cpu_exec(cpu);
1185 if (r == EXCP_DEBUG) {
1186 cpu_handle_guest_debug(cpu);
1187 break;
1188 }
1189 } else if (cpu->stop || cpu->stopped) {
1190 if (cpu->unplug) {
1191 cpu = CPU_NEXT(cpu);
1192 }
1193 break;
1194 }
1195
1196 } /* for cpu.. */
1197
1198 /* Pairs with smp_wmb in qemu_cpu_kick. */
1199 atomic_mb_set(&exit_request, 0);
1200
1201 if (use_icount) {
1202 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1203
1204 if (deadline == 0) {
1205 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1206 }
1207 }
1208 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1209 deal_with_unplugged_cpus();
1210 }
1211
1212 return NULL;
1213 }
1214
1215 static void qemu_cpu_kick_thread(CPUState *cpu)
1216 {
1217 #ifndef _WIN32
1218 int err;
1219
1220 if (cpu->thread_kicked) {
1221 return;
1222 }
1223 cpu->thread_kicked = true;
1224 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1225 if (err) {
1226 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1227 exit(1);
1228 }
1229 #else /* _WIN32 */
1230 abort();
1231 #endif
1232 }
1233
1234 static void qemu_cpu_kick_no_halt(void)
1235 {
1236 CPUState *cpu;
1237 /* Ensure whatever caused the exit has reached the CPU threads before
1238 * writing exit_request.
1239 */
1240 atomic_mb_set(&exit_request, 1);
1241 cpu = atomic_mb_read(&tcg_current_cpu);
1242 if (cpu) {
1243 cpu_exit(cpu);
1244 }
1245 }
1246
1247 void qemu_cpu_kick(CPUState *cpu)
1248 {
1249 qemu_cond_broadcast(cpu->halt_cond);
1250 if (tcg_enabled()) {
1251 qemu_cpu_kick_no_halt();
1252 } else {
1253 qemu_cpu_kick_thread(cpu);
1254 }
1255 }
1256
1257 void qemu_cpu_kick_self(void)
1258 {
1259 assert(current_cpu);
1260 qemu_cpu_kick_thread(current_cpu);
1261 }
1262
1263 bool qemu_cpu_is_self(CPUState *cpu)
1264 {
1265 return qemu_thread_is_self(cpu->thread);
1266 }
1267
1268 bool qemu_in_vcpu_thread(void)
1269 {
1270 return current_cpu && qemu_cpu_is_self(current_cpu);
1271 }
1272
1273 static __thread bool iothread_locked = false;
1274
1275 bool qemu_mutex_iothread_locked(void)
1276 {
1277 return iothread_locked;
1278 }
1279
1280 void qemu_mutex_lock_iothread(void)
1281 {
1282 atomic_inc(&iothread_requesting_mutex);
1283 /* In the simple case there is no need to bump the VCPU thread out of
1284 * TCG code execution.
1285 */
1286 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1287 !first_cpu || !first_cpu->created) {
1288 qemu_mutex_lock(&qemu_global_mutex);
1289 atomic_dec(&iothread_requesting_mutex);
1290 } else {
1291 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1292 qemu_cpu_kick_no_halt();
1293 qemu_mutex_lock(&qemu_global_mutex);
1294 }
1295 atomic_dec(&iothread_requesting_mutex);
1296 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1297 }
1298 iothread_locked = true;
1299 }
1300
1301 void qemu_mutex_unlock_iothread(void)
1302 {
1303 iothread_locked = false;
1304 qemu_mutex_unlock(&qemu_global_mutex);
1305 }
1306
1307 static bool all_vcpus_paused(void)
1308 {
1309 CPUState *cpu;
1310
1311 CPU_FOREACH(cpu) {
1312 if (!cpu->stopped) {
1313 return false;
1314 }
1315 }
1316
1317 return true;
1318 }
1319
1320 void pause_all_vcpus(void)
1321 {
1322 CPUState *cpu;
1323
1324 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1325 CPU_FOREACH(cpu) {
1326 cpu->stop = true;
1327 qemu_cpu_kick(cpu);
1328 }
1329
1330 if (qemu_in_vcpu_thread()) {
1331 cpu_stop_current();
1332 if (!kvm_enabled()) {
1333 CPU_FOREACH(cpu) {
1334 cpu->stop = false;
1335 cpu->stopped = true;
1336 }
1337 return;
1338 }
1339 }
1340
1341 while (!all_vcpus_paused()) {
1342 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1343 CPU_FOREACH(cpu) {
1344 qemu_cpu_kick(cpu);
1345 }
1346 }
1347 }
1348
1349 void cpu_resume(CPUState *cpu)
1350 {
1351 cpu->stop = false;
1352 cpu->stopped = false;
1353 qemu_cpu_kick(cpu);
1354 }
1355
1356 void resume_all_vcpus(void)
1357 {
1358 CPUState *cpu;
1359
1360 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1361 CPU_FOREACH(cpu) {
1362 cpu_resume(cpu);
1363 }
1364 }
1365
1366 void cpu_remove(CPUState *cpu)
1367 {
1368 cpu->stop = true;
1369 cpu->unplug = true;
1370 qemu_cpu_kick(cpu);
1371 }
1372
1373 void cpu_remove_sync(CPUState *cpu)
1374 {
1375 cpu_remove(cpu);
1376 while (cpu->created) {
1377 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1378 }
1379 }
1380
1381 /* For temporary buffers for forming a name */
1382 #define VCPU_THREAD_NAME_SIZE 16
1383
1384 static void qemu_tcg_init_vcpu(CPUState *cpu)
1385 {
1386 char thread_name[VCPU_THREAD_NAME_SIZE];
1387 static QemuCond *tcg_halt_cond;
1388 static QemuThread *tcg_cpu_thread;
1389
1390 /* share a single thread for all cpus with TCG */
1391 if (!tcg_cpu_thread) {
1392 cpu->thread = g_malloc0(sizeof(QemuThread));
1393 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1394 qemu_cond_init(cpu->halt_cond);
1395 tcg_halt_cond = cpu->halt_cond;
1396 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1397 cpu->cpu_index);
1398 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1399 cpu, QEMU_THREAD_JOINABLE);
1400 #ifdef _WIN32
1401 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1402 #endif
1403 while (!cpu->created) {
1404 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1405 }
1406 tcg_cpu_thread = cpu->thread;
1407 } else {
1408 cpu->thread = tcg_cpu_thread;
1409 cpu->halt_cond = tcg_halt_cond;
1410 }
1411 }
1412
1413 static void qemu_kvm_start_vcpu(CPUState *cpu)
1414 {
1415 char thread_name[VCPU_THREAD_NAME_SIZE];
1416
1417 cpu->thread = g_malloc0(sizeof(QemuThread));
1418 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1419 qemu_cond_init(cpu->halt_cond);
1420 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1421 cpu->cpu_index);
1422 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1423 cpu, QEMU_THREAD_JOINABLE);
1424 while (!cpu->created) {
1425 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1426 }
1427 }
1428
1429 static void qemu_dummy_start_vcpu(CPUState *cpu)
1430 {
1431 char thread_name[VCPU_THREAD_NAME_SIZE];
1432
1433 cpu->thread = g_malloc0(sizeof(QemuThread));
1434 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1435 qemu_cond_init(cpu->halt_cond);
1436 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1437 cpu->cpu_index);
1438 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1439 QEMU_THREAD_JOINABLE);
1440 while (!cpu->created) {
1441 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1442 }
1443 }
1444
1445 void qemu_init_vcpu(CPUState *cpu)
1446 {
1447 cpu->nr_cores = smp_cores;
1448 cpu->nr_threads = smp_threads;
1449 cpu->stopped = true;
1450
1451 if (!cpu->as) {
1452 /* If the target cpu hasn't set up any address spaces itself,
1453 * give it the default one.
1454 */
1455 AddressSpace *as = address_space_init_shareable(cpu->memory,
1456 "cpu-memory");
1457 cpu->num_ases = 1;
1458 cpu_address_space_init(cpu, as, 0);
1459 }
1460
1461 if (kvm_enabled()) {
1462 qemu_kvm_start_vcpu(cpu);
1463 } else if (tcg_enabled()) {
1464 qemu_tcg_init_vcpu(cpu);
1465 } else {
1466 qemu_dummy_start_vcpu(cpu);
1467 }
1468 }
1469
1470 void cpu_stop_current(void)
1471 {
1472 if (current_cpu) {
1473 current_cpu->stop = false;
1474 current_cpu->stopped = true;
1475 cpu_exit(current_cpu);
1476 qemu_cond_broadcast(&qemu_pause_cond);
1477 }
1478 }
1479
1480 int vm_stop(RunState state)
1481 {
1482 if (qemu_in_vcpu_thread()) {
1483 qemu_system_vmstop_request_prepare();
1484 qemu_system_vmstop_request(state);
1485 /*
1486 * FIXME: should not return to device code in case
1487 * vm_stop() has been requested.
1488 */
1489 cpu_stop_current();
1490 return 0;
1491 }
1492
1493 return do_vm_stop(state);
1494 }
1495
1496 /* does a state transition even if the VM is already stopped,
1497 current state is forgotten forever */
1498 int vm_stop_force_state(RunState state)
1499 {
1500 if (runstate_is_running()) {
1501 return vm_stop(state);
1502 } else {
1503 runstate_set(state);
1504
1505 bdrv_drain_all();
1506 /* Make sure to return an error if the flush in a previous vm_stop()
1507 * failed. */
1508 return bdrv_flush_all();
1509 }
1510 }
1511
1512 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1513 {
1514 /* XXX: implement xxx_cpu_list for targets that still miss it */
1515 #if defined(cpu_list)
1516 cpu_list(f, cpu_fprintf);
1517 #endif
1518 }
1519
1520 CpuInfoList *qmp_query_cpus(Error **errp)
1521 {
1522 CpuInfoList *head = NULL, *cur_item = NULL;
1523 CPUState *cpu;
1524
1525 CPU_FOREACH(cpu) {
1526 CpuInfoList *info;
1527 #if defined(TARGET_I386)
1528 X86CPU *x86_cpu = X86_CPU(cpu);
1529 CPUX86State *env = &x86_cpu->env;
1530 #elif defined(TARGET_PPC)
1531 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1532 CPUPPCState *env = &ppc_cpu->env;
1533 #elif defined(TARGET_SPARC)
1534 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1535 CPUSPARCState *env = &sparc_cpu->env;
1536 #elif defined(TARGET_MIPS)
1537 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1538 CPUMIPSState *env = &mips_cpu->env;
1539 #elif defined(TARGET_TRICORE)
1540 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1541 CPUTriCoreState *env = &tricore_cpu->env;
1542 #endif
1543
1544 cpu_synchronize_state(cpu);
1545
1546 info = g_malloc0(sizeof(*info));
1547 info->value = g_malloc0(sizeof(*info->value));
1548 info->value->CPU = cpu->cpu_index;
1549 info->value->current = (cpu == first_cpu);
1550 info->value->halted = cpu->halted;
1551 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1552 info->value->thread_id = cpu->thread_id;
1553 #if defined(TARGET_I386)
1554 info->value->arch = CPU_INFO_ARCH_X86;
1555 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1556 #elif defined(TARGET_PPC)
1557 info->value->arch = CPU_INFO_ARCH_PPC;
1558 info->value->u.ppc.nip = env->nip;
1559 #elif defined(TARGET_SPARC)
1560 info->value->arch = CPU_INFO_ARCH_SPARC;
1561 info->value->u.q_sparc.pc = env->pc;
1562 info->value->u.q_sparc.npc = env->npc;
1563 #elif defined(TARGET_MIPS)
1564 info->value->arch = CPU_INFO_ARCH_MIPS;
1565 info->value->u.q_mips.PC = env->active_tc.PC;
1566 #elif defined(TARGET_TRICORE)
1567 info->value->arch = CPU_INFO_ARCH_TRICORE;
1568 info->value->u.tricore.PC = env->PC;
1569 #else
1570 info->value->arch = CPU_INFO_ARCH_OTHER;
1571 #endif
1572
1573 /* XXX: waiting for the qapi to support GSList */
1574 if (!cur_item) {
1575 head = cur_item = info;
1576 } else {
1577 cur_item->next = info;
1578 cur_item = info;
1579 }
1580 }
1581
1582 return head;
1583 }
1584
1585 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1586 bool has_cpu, int64_t cpu_index, Error **errp)
1587 {
1588 FILE *f;
1589 uint32_t l;
1590 CPUState *cpu;
1591 uint8_t buf[1024];
1592 int64_t orig_addr = addr, orig_size = size;
1593
1594 if (!has_cpu) {
1595 cpu_index = 0;
1596 }
1597
1598 cpu = qemu_get_cpu(cpu_index);
1599 if (cpu == NULL) {
1600 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1601 "a CPU number");
1602 return;
1603 }
1604
1605 f = fopen(filename, "wb");
1606 if (!f) {
1607 error_setg_file_open(errp, errno, filename);
1608 return;
1609 }
1610
1611 while (size != 0) {
1612 l = sizeof(buf);
1613 if (l > size)
1614 l = size;
1615 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1616 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1617 " specified", orig_addr, orig_size);
1618 goto exit;
1619 }
1620 if (fwrite(buf, 1, l, f) != l) {
1621 error_setg(errp, QERR_IO_ERROR);
1622 goto exit;
1623 }
1624 addr += l;
1625 size -= l;
1626 }
1627
1628 exit:
1629 fclose(f);
1630 }
1631
1632 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1633 Error **errp)
1634 {
1635 FILE *f;
1636 uint32_t l;
1637 uint8_t buf[1024];
1638
1639 f = fopen(filename, "wb");
1640 if (!f) {
1641 error_setg_file_open(errp, errno, filename);
1642 return;
1643 }
1644
1645 while (size != 0) {
1646 l = sizeof(buf);
1647 if (l > size)
1648 l = size;
1649 cpu_physical_memory_read(addr, buf, l);
1650 if (fwrite(buf, 1, l, f) != l) {
1651 error_setg(errp, QERR_IO_ERROR);
1652 goto exit;
1653 }
1654 addr += l;
1655 size -= l;
1656 }
1657
1658 exit:
1659 fclose(f);
1660 }
1661
1662 void qmp_inject_nmi(Error **errp)
1663 {
1664 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1665 }
1666
1667 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1668 {
1669 if (!use_icount) {
1670 return;
1671 }
1672
1673 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1674 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1675 if (icount_align_option) {
1676 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1677 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1678 } else {
1679 cpu_fprintf(f, "Max guest delay NA\n");
1680 cpu_fprintf(f, "Max guest advance NA\n");
1681 }
1682 }