]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * QEMU System Emulator | |
3 | * | |
4 | * Copyright (c) 2003-2008 Fabrice Bellard | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 | * of this software and associated documentation files (the "Software"), to deal | |
8 | * in the Software without restriction, including without limitation the rights | |
9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 | * copies of the Software, and to permit persons to whom the Software is | |
11 | * furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
22 | * THE SOFTWARE. | |
23 | */ | |
24 | ||
25 | /* Needed early for CONFIG_BSD etc. */ | |
26 | #include "config-host.h" | |
27 | ||
28 | #include "monitor/monitor.h" | |
29 | #include "qapi/qmp/qerror.h" | |
30 | #include "sysemu/sysemu.h" | |
31 | #include "exec/gdbstub.h" | |
32 | #include "sysemu/dma.h" | |
33 | #include "sysemu/kvm.h" | |
34 | #include "qmp-commands.h" | |
35 | ||
36 | #include "qemu/thread.h" | |
37 | #include "sysemu/cpus.h" | |
38 | #include "sysemu/qtest.h" | |
39 | #include "qemu/main-loop.h" | |
40 | #include "qemu/bitmap.h" | |
41 | #include "qemu/seqlock.h" | |
42 | #include "qapi-event.h" | |
43 | #include "hw/nmi.h" | |
44 | ||
45 | #ifndef _WIN32 | |
46 | #include "qemu/compatfd.h" | |
47 | #endif | |
48 | ||
49 | #ifdef CONFIG_LINUX | |
50 | ||
51 | #include <sys/prctl.h> | |
52 | ||
53 | #ifndef PR_MCE_KILL | |
54 | #define PR_MCE_KILL 33 | |
55 | #endif | |
56 | ||
57 | #ifndef PR_MCE_KILL_SET | |
58 | #define PR_MCE_KILL_SET 1 | |
59 | #endif | |
60 | ||
61 | #ifndef PR_MCE_KILL_EARLY | |
62 | #define PR_MCE_KILL_EARLY 1 | |
63 | #endif | |
64 | ||
65 | #endif /* CONFIG_LINUX */ | |
66 | ||
67 | static CPUState *next_cpu; | |
68 | int64_t max_delay; | |
69 | int64_t max_advance; | |
70 | ||
71 | bool cpu_is_stopped(CPUState *cpu) | |
72 | { | |
73 | return cpu->stopped || !runstate_is_running(); | |
74 | } | |
75 | ||
76 | static bool cpu_thread_is_idle(CPUState *cpu) | |
77 | { | |
78 | if (cpu->stop || cpu->queued_work_first) { | |
79 | return false; | |
80 | } | |
81 | if (cpu_is_stopped(cpu)) { | |
82 | return true; | |
83 | } | |
84 | if (!cpu->halted || cpu_has_work(cpu) || | |
85 | kvm_halt_in_kernel()) { | |
86 | return false; | |
87 | } | |
88 | return true; | |
89 | } | |
90 | ||
91 | static bool all_cpu_threads_idle(void) | |
92 | { | |
93 | CPUState *cpu; | |
94 | ||
95 | CPU_FOREACH(cpu) { | |
96 | if (!cpu_thread_is_idle(cpu)) { | |
97 | return false; | |
98 | } | |
99 | } | |
100 | return true; | |
101 | } | |
102 | ||
103 | /***********************************************************/ | |
104 | /* guest cycle counter */ | |
105 | ||
106 | /* Protected by TimersState seqlock */ | |
107 | ||
108 | static int64_t vm_clock_warp_start = -1; | |
109 | /* Conversion factor from emulated instructions to virtual clock ticks. */ | |
110 | static int icount_time_shift; | |
111 | /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ | |
112 | #define MAX_ICOUNT_SHIFT 10 | |
113 | ||
114 | static QEMUTimer *icount_rt_timer; | |
115 | static QEMUTimer *icount_vm_timer; | |
116 | static QEMUTimer *icount_warp_timer; | |
117 | ||
118 | typedef struct TimersState { | |
119 | /* Protected by BQL. */ | |
120 | int64_t cpu_ticks_prev; | |
121 | int64_t cpu_ticks_offset; | |
122 | ||
123 | /* cpu_clock_offset can be read out of BQL, so protect it with | |
124 | * this lock. | |
125 | */ | |
126 | QemuSeqLock vm_clock_seqlock; | |
127 | int64_t cpu_clock_offset; | |
128 | int32_t cpu_ticks_enabled; | |
129 | int64_t dummy; | |
130 | ||
131 | /* Compensate for varying guest execution speed. */ | |
132 | int64_t qemu_icount_bias; | |
133 | /* Only written by TCG thread */ | |
134 | int64_t qemu_icount; | |
135 | } TimersState; | |
136 | ||
137 | static TimersState timers_state; | |
138 | ||
139 | /* Return the virtual CPU time, based on the instruction counter. */ | |
140 | static int64_t cpu_get_icount_locked(void) | |
141 | { | |
142 | int64_t icount; | |
143 | CPUState *cpu = current_cpu; | |
144 | ||
145 | icount = timers_state.qemu_icount; | |
146 | if (cpu) { | |
147 | if (!cpu_can_do_io(cpu)) { | |
148 | fprintf(stderr, "Bad clock read\n"); | |
149 | } | |
150 | icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); | |
151 | } | |
152 | return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount); | |
153 | } | |
154 | ||
155 | int64_t cpu_get_icount(void) | |
156 | { | |
157 | int64_t icount; | |
158 | unsigned start; | |
159 | ||
160 | do { | |
161 | start = seqlock_read_begin(&timers_state.vm_clock_seqlock); | |
162 | icount = cpu_get_icount_locked(); | |
163 | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); | |
164 | ||
165 | return icount; | |
166 | } | |
167 | ||
168 | int64_t cpu_icount_to_ns(int64_t icount) | |
169 | { | |
170 | return icount << icount_time_shift; | |
171 | } | |
172 | ||
173 | /* return the host CPU cycle counter and handle stop/restart */ | |
174 | /* Caller must hold the BQL */ | |
175 | int64_t cpu_get_ticks(void) | |
176 | { | |
177 | int64_t ticks; | |
178 | ||
179 | if (use_icount) { | |
180 | return cpu_get_icount(); | |
181 | } | |
182 | ||
183 | ticks = timers_state.cpu_ticks_offset; | |
184 | if (timers_state.cpu_ticks_enabled) { | |
185 | ticks += cpu_get_real_ticks(); | |
186 | } | |
187 | ||
188 | if (timers_state.cpu_ticks_prev > ticks) { | |
189 | /* Note: non increasing ticks may happen if the host uses | |
190 | software suspend */ | |
191 | timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; | |
192 | ticks = timers_state.cpu_ticks_prev; | |
193 | } | |
194 | ||
195 | timers_state.cpu_ticks_prev = ticks; | |
196 | return ticks; | |
197 | } | |
198 | ||
199 | static int64_t cpu_get_clock_locked(void) | |
200 | { | |
201 | int64_t ticks; | |
202 | ||
203 | ticks = timers_state.cpu_clock_offset; | |
204 | if (timers_state.cpu_ticks_enabled) { | |
205 | ticks += get_clock(); | |
206 | } | |
207 | ||
208 | return ticks; | |
209 | } | |
210 | ||
211 | /* return the host CPU monotonic timer and handle stop/restart */ | |
212 | int64_t cpu_get_clock(void) | |
213 | { | |
214 | int64_t ti; | |
215 | unsigned start; | |
216 | ||
217 | do { | |
218 | start = seqlock_read_begin(&timers_state.vm_clock_seqlock); | |
219 | ti = cpu_get_clock_locked(); | |
220 | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); | |
221 | ||
222 | return ti; | |
223 | } | |
224 | ||
225 | /* return the offset between the host clock and virtual CPU clock */ | |
226 | int64_t cpu_get_clock_offset(void) | |
227 | { | |
228 | int64_t ti; | |
229 | unsigned start; | |
230 | ||
231 | do { | |
232 | start = seqlock_read_begin(&timers_state.vm_clock_seqlock); | |
233 | ti = timers_state.cpu_clock_offset; | |
234 | if (!timers_state.cpu_ticks_enabled) { | |
235 | ti -= get_clock(); | |
236 | } | |
237 | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); | |
238 | ||
239 | return -ti; | |
240 | } | |
241 | ||
242 | /* enable cpu_get_ticks() | |
243 | * Caller must hold BQL which server as mutex for vm_clock_seqlock. | |
244 | */ | |
245 | void cpu_enable_ticks(void) | |
246 | { | |
247 | /* Here, the really thing protected by seqlock is cpu_clock_offset. */ | |
248 | seqlock_write_lock(&timers_state.vm_clock_seqlock); | |
249 | if (!timers_state.cpu_ticks_enabled) { | |
250 | timers_state.cpu_ticks_offset -= cpu_get_real_ticks(); | |
251 | timers_state.cpu_clock_offset -= get_clock(); | |
252 | timers_state.cpu_ticks_enabled = 1; | |
253 | } | |
254 | seqlock_write_unlock(&timers_state.vm_clock_seqlock); | |
255 | } | |
256 | ||
257 | /* disable cpu_get_ticks() : the clock is stopped. You must not call | |
258 | * cpu_get_ticks() after that. | |
259 | * Caller must hold BQL which server as mutex for vm_clock_seqlock. | |
260 | */ | |
261 | void cpu_disable_ticks(void) | |
262 | { | |
263 | /* Here, the really thing protected by seqlock is cpu_clock_offset. */ | |
264 | seqlock_write_lock(&timers_state.vm_clock_seqlock); | |
265 | if (timers_state.cpu_ticks_enabled) { | |
266 | timers_state.cpu_ticks_offset += cpu_get_real_ticks(); | |
267 | timers_state.cpu_clock_offset = cpu_get_clock_locked(); | |
268 | timers_state.cpu_ticks_enabled = 0; | |
269 | } | |
270 | seqlock_write_unlock(&timers_state.vm_clock_seqlock); | |
271 | } | |
272 | ||
273 | /* Correlation between real and virtual time is always going to be | |
274 | fairly approximate, so ignore small variation. | |
275 | When the guest is idle real and virtual time will be aligned in | |
276 | the IO wait loop. */ | |
277 | #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10) | |
278 | ||
279 | static void icount_adjust(void) | |
280 | { | |
281 | int64_t cur_time; | |
282 | int64_t cur_icount; | |
283 | int64_t delta; | |
284 | ||
285 | /* Protected by TimersState mutex. */ | |
286 | static int64_t last_delta; | |
287 | ||
288 | /* If the VM is not running, then do nothing. */ | |
289 | if (!runstate_is_running()) { | |
290 | return; | |
291 | } | |
292 | ||
293 | seqlock_write_lock(&timers_state.vm_clock_seqlock); | |
294 | cur_time = cpu_get_clock_locked(); | |
295 | cur_icount = cpu_get_icount_locked(); | |
296 | ||
297 | delta = cur_icount - cur_time; | |
298 | /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ | |
299 | if (delta > 0 | |
300 | && last_delta + ICOUNT_WOBBLE < delta * 2 | |
301 | && icount_time_shift > 0) { | |
302 | /* The guest is getting too far ahead. Slow time down. */ | |
303 | icount_time_shift--; | |
304 | } | |
305 | if (delta < 0 | |
306 | && last_delta - ICOUNT_WOBBLE > delta * 2 | |
307 | && icount_time_shift < MAX_ICOUNT_SHIFT) { | |
308 | /* The guest is getting too far behind. Speed time up. */ | |
309 | icount_time_shift++; | |
310 | } | |
311 | last_delta = delta; | |
312 | timers_state.qemu_icount_bias = cur_icount | |
313 | - (timers_state.qemu_icount << icount_time_shift); | |
314 | seqlock_write_unlock(&timers_state.vm_clock_seqlock); | |
315 | } | |
316 | ||
317 | static void icount_adjust_rt(void *opaque) | |
318 | { | |
319 | timer_mod(icount_rt_timer, | |
320 | qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000); | |
321 | icount_adjust(); | |
322 | } | |
323 | ||
324 | static void icount_adjust_vm(void *opaque) | |
325 | { | |
326 | timer_mod(icount_vm_timer, | |
327 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + | |
328 | get_ticks_per_sec() / 10); | |
329 | icount_adjust(); | |
330 | } | |
331 | ||
332 | static int64_t qemu_icount_round(int64_t count) | |
333 | { | |
334 | return (count + (1 << icount_time_shift) - 1) >> icount_time_shift; | |
335 | } | |
336 | ||
337 | static void icount_warp_rt(void *opaque) | |
338 | { | |
339 | /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start | |
340 | * changes from -1 to another value, so the race here is okay. | |
341 | */ | |
342 | if (atomic_read(&vm_clock_warp_start) == -1) { | |
343 | return; | |
344 | } | |
345 | ||
346 | seqlock_write_lock(&timers_state.vm_clock_seqlock); | |
347 | if (runstate_is_running()) { | |
348 | int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); | |
349 | int64_t warp_delta; | |
350 | ||
351 | warp_delta = clock - vm_clock_warp_start; | |
352 | if (use_icount == 2) { | |
353 | /* | |
354 | * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too | |
355 | * far ahead of real time. | |
356 | */ | |
357 | int64_t cur_time = cpu_get_clock_locked(); | |
358 | int64_t cur_icount = cpu_get_icount_locked(); | |
359 | int64_t delta = cur_time - cur_icount; | |
360 | warp_delta = MIN(warp_delta, delta); | |
361 | } | |
362 | timers_state.qemu_icount_bias += warp_delta; | |
363 | } | |
364 | vm_clock_warp_start = -1; | |
365 | seqlock_write_unlock(&timers_state.vm_clock_seqlock); | |
366 | ||
367 | if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { | |
368 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
369 | } | |
370 | } | |
371 | ||
372 | void qtest_clock_warp(int64_t dest) | |
373 | { | |
374 | int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); | |
375 | assert(qtest_enabled()); | |
376 | while (clock < dest) { | |
377 | int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); | |
378 | int64_t warp = qemu_soonest_timeout(dest - clock, deadline); | |
379 | seqlock_write_lock(&timers_state.vm_clock_seqlock); | |
380 | timers_state.qemu_icount_bias += warp; | |
381 | seqlock_write_unlock(&timers_state.vm_clock_seqlock); | |
382 | ||
383 | qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); | |
384 | clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); | |
385 | } | |
386 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
387 | } | |
388 | ||
389 | void qemu_clock_warp(QEMUClockType type) | |
390 | { | |
391 | int64_t clock; | |
392 | int64_t deadline; | |
393 | ||
394 | /* | |
395 | * There are too many global variables to make the "warp" behavior | |
396 | * applicable to other clocks. But a clock argument removes the | |
397 | * need for if statements all over the place. | |
398 | */ | |
399 | if (type != QEMU_CLOCK_VIRTUAL || !use_icount) { | |
400 | return; | |
401 | } | |
402 | ||
403 | /* | |
404 | * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now. | |
405 | * This ensures that the deadline for the timer is computed correctly below. | |
406 | * This also makes sure that the insn counter is synchronized before the | |
407 | * CPU starts running, in case the CPU is woken by an event other than | |
408 | * the earliest QEMU_CLOCK_VIRTUAL timer. | |
409 | */ | |
410 | icount_warp_rt(NULL); | |
411 | timer_del(icount_warp_timer); | |
412 | if (!all_cpu_threads_idle()) { | |
413 | return; | |
414 | } | |
415 | ||
416 | if (qtest_enabled()) { | |
417 | /* When testing, qtest commands advance icount. */ | |
418 | return; | |
419 | } | |
420 | ||
421 | /* We want to use the earliest deadline from ALL vm_clocks */ | |
422 | clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); | |
423 | deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); | |
424 | if (deadline < 0) { | |
425 | return; | |
426 | } | |
427 | ||
428 | if (deadline > 0) { | |
429 | /* | |
430 | * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to | |
431 | * sleep. Otherwise, the CPU might be waiting for a future timer | |
432 | * interrupt to wake it up, but the interrupt never comes because | |
433 | * the vCPU isn't running any insns and thus doesn't advance the | |
434 | * QEMU_CLOCK_VIRTUAL. | |
435 | * | |
436 | * An extreme solution for this problem would be to never let VCPUs | |
437 | * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL | |
438 | * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL | |
439 | * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL | |
440 | * after some e"real" time, (related to the time left until the next | |
441 | * event) has passed. The QEMU_CLOCK_REALTIME timer will do this. | |
442 | * This avoids that the warps are visible externally; for example, | |
443 | * you will not be sending network packets continuously instead of | |
444 | * every 100ms. | |
445 | */ | |
446 | seqlock_write_lock(&timers_state.vm_clock_seqlock); | |
447 | if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) { | |
448 | vm_clock_warp_start = clock; | |
449 | } | |
450 | seqlock_write_unlock(&timers_state.vm_clock_seqlock); | |
451 | timer_mod_anticipate(icount_warp_timer, clock + deadline); | |
452 | } else if (deadline == 0) { | |
453 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
454 | } | |
455 | } | |
456 | ||
457 | static bool icount_state_needed(void *opaque) | |
458 | { | |
459 | return use_icount; | |
460 | } | |
461 | ||
462 | /* | |
463 | * This is a subsection for icount migration. | |
464 | */ | |
465 | static const VMStateDescription icount_vmstate_timers = { | |
466 | .name = "timer/icount", | |
467 | .version_id = 1, | |
468 | .minimum_version_id = 1, | |
469 | .fields = (VMStateField[]) { | |
470 | VMSTATE_INT64(qemu_icount_bias, TimersState), | |
471 | VMSTATE_INT64(qemu_icount, TimersState), | |
472 | VMSTATE_END_OF_LIST() | |
473 | } | |
474 | }; | |
475 | ||
476 | static const VMStateDescription vmstate_timers = { | |
477 | .name = "timer", | |
478 | .version_id = 2, | |
479 | .minimum_version_id = 1, | |
480 | .fields = (VMStateField[]) { | |
481 | VMSTATE_INT64(cpu_ticks_offset, TimersState), | |
482 | VMSTATE_INT64(dummy, TimersState), | |
483 | VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), | |
484 | VMSTATE_END_OF_LIST() | |
485 | }, | |
486 | .subsections = (VMStateSubsection[]) { | |
487 | { | |
488 | .vmsd = &icount_vmstate_timers, | |
489 | .needed = icount_state_needed, | |
490 | }, { | |
491 | /* empty */ | |
492 | } | |
493 | } | |
494 | }; | |
495 | ||
496 | void configure_icount(QemuOpts *opts, Error **errp) | |
497 | { | |
498 | const char *option; | |
499 | char *rem_str = NULL; | |
500 | ||
501 | seqlock_init(&timers_state.vm_clock_seqlock, NULL); | |
502 | vmstate_register(NULL, 0, &vmstate_timers, &timers_state); | |
503 | option = qemu_opt_get(opts, "shift"); | |
504 | if (!option) { | |
505 | if (qemu_opt_get(opts, "align") != NULL) { | |
506 | error_setg(errp, "Please specify shift option when using align"); | |
507 | } | |
508 | return; | |
509 | } | |
510 | icount_align_option = qemu_opt_get_bool(opts, "align", false); | |
511 | icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME, | |
512 | icount_warp_rt, NULL); | |
513 | if (strcmp(option, "auto") != 0) { | |
514 | errno = 0; | |
515 | icount_time_shift = strtol(option, &rem_str, 0); | |
516 | if (errno != 0 || *rem_str != '\0' || !strlen(option)) { | |
517 | error_setg(errp, "icount: Invalid shift value"); | |
518 | } | |
519 | use_icount = 1; | |
520 | return; | |
521 | } else if (icount_align_option) { | |
522 | error_setg(errp, "shift=auto and align=on are incompatible"); | |
523 | } | |
524 | ||
525 | use_icount = 2; | |
526 | ||
527 | /* 125MIPS seems a reasonable initial guess at the guest speed. | |
528 | It will be corrected fairly quickly anyway. */ | |
529 | icount_time_shift = 3; | |
530 | ||
531 | /* Have both realtime and virtual time triggers for speed adjustment. | |
532 | The realtime trigger catches emulated time passing too slowly, | |
533 | the virtual time trigger catches emulated time passing too fast. | |
534 | Realtime triggers occur even when idle, so use them less frequently | |
535 | than VM triggers. */ | |
536 | icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME, | |
537 | icount_adjust_rt, NULL); | |
538 | timer_mod(icount_rt_timer, | |
539 | qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000); | |
540 | icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, | |
541 | icount_adjust_vm, NULL); | |
542 | timer_mod(icount_vm_timer, | |
543 | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + | |
544 | get_ticks_per_sec() / 10); | |
545 | } | |
546 | ||
547 | /***********************************************************/ | |
548 | void hw_error(const char *fmt, ...) | |
549 | { | |
550 | va_list ap; | |
551 | CPUState *cpu; | |
552 | ||
553 | va_start(ap, fmt); | |
554 | fprintf(stderr, "qemu: hardware error: "); | |
555 | vfprintf(stderr, fmt, ap); | |
556 | fprintf(stderr, "\n"); | |
557 | CPU_FOREACH(cpu) { | |
558 | fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); | |
559 | cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU); | |
560 | } | |
561 | va_end(ap); | |
562 | abort(); | |
563 | } | |
564 | ||
565 | void cpu_synchronize_all_states(void) | |
566 | { | |
567 | CPUState *cpu; | |
568 | ||
569 | CPU_FOREACH(cpu) { | |
570 | cpu_synchronize_state(cpu); | |
571 | } | |
572 | } | |
573 | ||
574 | void cpu_synchronize_all_post_reset(void) | |
575 | { | |
576 | CPUState *cpu; | |
577 | ||
578 | CPU_FOREACH(cpu) { | |
579 | cpu_synchronize_post_reset(cpu); | |
580 | } | |
581 | } | |
582 | ||
583 | void cpu_synchronize_all_post_init(void) | |
584 | { | |
585 | CPUState *cpu; | |
586 | ||
587 | CPU_FOREACH(cpu) { | |
588 | cpu_synchronize_post_init(cpu); | |
589 | } | |
590 | } | |
591 | ||
592 | static int do_vm_stop(RunState state) | |
593 | { | |
594 | int ret = 0; | |
595 | ||
596 | if (runstate_is_running()) { | |
597 | cpu_disable_ticks(); | |
598 | pause_all_vcpus(); | |
599 | runstate_set(state); | |
600 | vm_state_notify(0, state); | |
601 | qapi_event_send_stop(&error_abort); | |
602 | } | |
603 | ||
604 | bdrv_drain_all(); | |
605 | ret = bdrv_flush_all(); | |
606 | ||
607 | return ret; | |
608 | } | |
609 | ||
610 | static bool cpu_can_run(CPUState *cpu) | |
611 | { | |
612 | if (cpu->stop) { | |
613 | return false; | |
614 | } | |
615 | if (cpu_is_stopped(cpu)) { | |
616 | return false; | |
617 | } | |
618 | return true; | |
619 | } | |
620 | ||
621 | static void cpu_handle_guest_debug(CPUState *cpu) | |
622 | { | |
623 | gdb_set_stop_cpu(cpu); | |
624 | qemu_system_debug_request(); | |
625 | cpu->stopped = true; | |
626 | } | |
627 | ||
628 | static void cpu_signal(int sig) | |
629 | { | |
630 | if (current_cpu) { | |
631 | cpu_exit(current_cpu); | |
632 | } | |
633 | exit_request = 1; | |
634 | } | |
635 | ||
636 | #ifdef CONFIG_LINUX | |
637 | static void sigbus_reraise(void) | |
638 | { | |
639 | sigset_t set; | |
640 | struct sigaction action; | |
641 | ||
642 | memset(&action, 0, sizeof(action)); | |
643 | action.sa_handler = SIG_DFL; | |
644 | if (!sigaction(SIGBUS, &action, NULL)) { | |
645 | raise(SIGBUS); | |
646 | sigemptyset(&set); | |
647 | sigaddset(&set, SIGBUS); | |
648 | sigprocmask(SIG_UNBLOCK, &set, NULL); | |
649 | } | |
650 | perror("Failed to re-raise SIGBUS!\n"); | |
651 | abort(); | |
652 | } | |
653 | ||
654 | static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, | |
655 | void *ctx) | |
656 | { | |
657 | if (kvm_on_sigbus(siginfo->ssi_code, | |
658 | (void *)(intptr_t)siginfo->ssi_addr)) { | |
659 | sigbus_reraise(); | |
660 | } | |
661 | } | |
662 | ||
663 | static void qemu_init_sigbus(void) | |
664 | { | |
665 | struct sigaction action; | |
666 | ||
667 | memset(&action, 0, sizeof(action)); | |
668 | action.sa_flags = SA_SIGINFO; | |
669 | action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; | |
670 | sigaction(SIGBUS, &action, NULL); | |
671 | ||
672 | prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); | |
673 | } | |
674 | ||
675 | static void qemu_kvm_eat_signals(CPUState *cpu) | |
676 | { | |
677 | struct timespec ts = { 0, 0 }; | |
678 | siginfo_t siginfo; | |
679 | sigset_t waitset; | |
680 | sigset_t chkset; | |
681 | int r; | |
682 | ||
683 | sigemptyset(&waitset); | |
684 | sigaddset(&waitset, SIG_IPI); | |
685 | sigaddset(&waitset, SIGBUS); | |
686 | ||
687 | do { | |
688 | r = sigtimedwait(&waitset, &siginfo, &ts); | |
689 | if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { | |
690 | perror("sigtimedwait"); | |
691 | exit(1); | |
692 | } | |
693 | ||
694 | switch (r) { | |
695 | case SIGBUS: | |
696 | if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) { | |
697 | sigbus_reraise(); | |
698 | } | |
699 | break; | |
700 | default: | |
701 | break; | |
702 | } | |
703 | ||
704 | r = sigpending(&chkset); | |
705 | if (r == -1) { | |
706 | perror("sigpending"); | |
707 | exit(1); | |
708 | } | |
709 | } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); | |
710 | } | |
711 | ||
712 | #else /* !CONFIG_LINUX */ | |
713 | ||
714 | static void qemu_init_sigbus(void) | |
715 | { | |
716 | } | |
717 | ||
718 | static void qemu_kvm_eat_signals(CPUState *cpu) | |
719 | { | |
720 | } | |
721 | #endif /* !CONFIG_LINUX */ | |
722 | ||
723 | #ifndef _WIN32 | |
724 | static void dummy_signal(int sig) | |
725 | { | |
726 | } | |
727 | ||
728 | static void qemu_kvm_init_cpu_signals(CPUState *cpu) | |
729 | { | |
730 | int r; | |
731 | sigset_t set; | |
732 | struct sigaction sigact; | |
733 | ||
734 | memset(&sigact, 0, sizeof(sigact)); | |
735 | sigact.sa_handler = dummy_signal; | |
736 | sigaction(SIG_IPI, &sigact, NULL); | |
737 | ||
738 | pthread_sigmask(SIG_BLOCK, NULL, &set); | |
739 | sigdelset(&set, SIG_IPI); | |
740 | sigdelset(&set, SIGBUS); | |
741 | r = kvm_set_signal_mask(cpu, &set); | |
742 | if (r) { | |
743 | fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); | |
744 | exit(1); | |
745 | } | |
746 | } | |
747 | ||
748 | static void qemu_tcg_init_cpu_signals(void) | |
749 | { | |
750 | sigset_t set; | |
751 | struct sigaction sigact; | |
752 | ||
753 | memset(&sigact, 0, sizeof(sigact)); | |
754 | sigact.sa_handler = cpu_signal; | |
755 | sigaction(SIG_IPI, &sigact, NULL); | |
756 | ||
757 | sigemptyset(&set); | |
758 | sigaddset(&set, SIG_IPI); | |
759 | pthread_sigmask(SIG_UNBLOCK, &set, NULL); | |
760 | } | |
761 | ||
762 | #else /* _WIN32 */ | |
763 | static void qemu_kvm_init_cpu_signals(CPUState *cpu) | |
764 | { | |
765 | abort(); | |
766 | } | |
767 | ||
768 | static void qemu_tcg_init_cpu_signals(void) | |
769 | { | |
770 | } | |
771 | #endif /* _WIN32 */ | |
772 | ||
773 | static QemuMutex qemu_global_mutex; | |
774 | static QemuCond qemu_io_proceeded_cond; | |
775 | static bool iothread_requesting_mutex; | |
776 | ||
777 | static QemuThread io_thread; | |
778 | ||
779 | static QemuThread *tcg_cpu_thread; | |
780 | static QemuCond *tcg_halt_cond; | |
781 | ||
782 | /* cpu creation */ | |
783 | static QemuCond qemu_cpu_cond; | |
784 | /* system init */ | |
785 | static QemuCond qemu_pause_cond; | |
786 | static QemuCond qemu_work_cond; | |
787 | ||
788 | void qemu_init_cpu_loop(void) | |
789 | { | |
790 | qemu_init_sigbus(); | |
791 | qemu_cond_init(&qemu_cpu_cond); | |
792 | qemu_cond_init(&qemu_pause_cond); | |
793 | qemu_cond_init(&qemu_work_cond); | |
794 | qemu_cond_init(&qemu_io_proceeded_cond); | |
795 | qemu_mutex_init(&qemu_global_mutex); | |
796 | ||
797 | qemu_thread_get_self(&io_thread); | |
798 | } | |
799 | ||
800 | void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) | |
801 | { | |
802 | struct qemu_work_item wi; | |
803 | ||
804 | if (qemu_cpu_is_self(cpu)) { | |
805 | func(data); | |
806 | return; | |
807 | } | |
808 | ||
809 | wi.func = func; | |
810 | wi.data = data; | |
811 | wi.free = false; | |
812 | if (cpu->queued_work_first == NULL) { | |
813 | cpu->queued_work_first = &wi; | |
814 | } else { | |
815 | cpu->queued_work_last->next = &wi; | |
816 | } | |
817 | cpu->queued_work_last = &wi; | |
818 | wi.next = NULL; | |
819 | wi.done = false; | |
820 | ||
821 | qemu_cpu_kick(cpu); | |
822 | while (!wi.done) { | |
823 | CPUState *self_cpu = current_cpu; | |
824 | ||
825 | qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex); | |
826 | current_cpu = self_cpu; | |
827 | } | |
828 | } | |
829 | ||
830 | void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) | |
831 | { | |
832 | struct qemu_work_item *wi; | |
833 | ||
834 | if (qemu_cpu_is_self(cpu)) { | |
835 | func(data); | |
836 | return; | |
837 | } | |
838 | ||
839 | wi = g_malloc0(sizeof(struct qemu_work_item)); | |
840 | wi->func = func; | |
841 | wi->data = data; | |
842 | wi->free = true; | |
843 | if (cpu->queued_work_first == NULL) { | |
844 | cpu->queued_work_first = wi; | |
845 | } else { | |
846 | cpu->queued_work_last->next = wi; | |
847 | } | |
848 | cpu->queued_work_last = wi; | |
849 | wi->next = NULL; | |
850 | wi->done = false; | |
851 | ||
852 | qemu_cpu_kick(cpu); | |
853 | } | |
854 | ||
855 | static void flush_queued_work(CPUState *cpu) | |
856 | { | |
857 | struct qemu_work_item *wi; | |
858 | ||
859 | if (cpu->queued_work_first == NULL) { | |
860 | return; | |
861 | } | |
862 | ||
863 | while ((wi = cpu->queued_work_first)) { | |
864 | cpu->queued_work_first = wi->next; | |
865 | wi->func(wi->data); | |
866 | wi->done = true; | |
867 | if (wi->free) { | |
868 | g_free(wi); | |
869 | } | |
870 | } | |
871 | cpu->queued_work_last = NULL; | |
872 | qemu_cond_broadcast(&qemu_work_cond); | |
873 | } | |
874 | ||
875 | static void qemu_wait_io_event_common(CPUState *cpu) | |
876 | { | |
877 | if (cpu->stop) { | |
878 | cpu->stop = false; | |
879 | cpu->stopped = true; | |
880 | qemu_cond_signal(&qemu_pause_cond); | |
881 | } | |
882 | flush_queued_work(cpu); | |
883 | cpu->thread_kicked = false; | |
884 | } | |
885 | ||
886 | static void qemu_tcg_wait_io_event(void) | |
887 | { | |
888 | CPUState *cpu; | |
889 | ||
890 | while (all_cpu_threads_idle()) { | |
891 | /* Start accounting real time to the virtual clock if the CPUs | |
892 | are idle. */ | |
893 | qemu_clock_warp(QEMU_CLOCK_VIRTUAL); | |
894 | qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex); | |
895 | } | |
896 | ||
897 | while (iothread_requesting_mutex) { | |
898 | qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex); | |
899 | } | |
900 | ||
901 | CPU_FOREACH(cpu) { | |
902 | qemu_wait_io_event_common(cpu); | |
903 | } | |
904 | } | |
905 | ||
906 | static void qemu_kvm_wait_io_event(CPUState *cpu) | |
907 | { | |
908 | while (cpu_thread_is_idle(cpu)) { | |
909 | qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); | |
910 | } | |
911 | ||
912 | qemu_kvm_eat_signals(cpu); | |
913 | qemu_wait_io_event_common(cpu); | |
914 | } | |
915 | ||
916 | static void *qemu_kvm_cpu_thread_fn(void *arg) | |
917 | { | |
918 | CPUState *cpu = arg; | |
919 | int r; | |
920 | ||
921 | qemu_mutex_lock(&qemu_global_mutex); | |
922 | qemu_thread_get_self(cpu->thread); | |
923 | cpu->thread_id = qemu_get_thread_id(); | |
924 | current_cpu = cpu; | |
925 | ||
926 | r = kvm_init_vcpu(cpu); | |
927 | if (r < 0) { | |
928 | fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r)); | |
929 | exit(1); | |
930 | } | |
931 | ||
932 | qemu_kvm_init_cpu_signals(cpu); | |
933 | ||
934 | /* signal CPU creation */ | |
935 | cpu->created = true; | |
936 | qemu_cond_signal(&qemu_cpu_cond); | |
937 | ||
938 | while (1) { | |
939 | if (cpu_can_run(cpu)) { | |
940 | r = kvm_cpu_exec(cpu); | |
941 | if (r == EXCP_DEBUG) { | |
942 | cpu_handle_guest_debug(cpu); | |
943 | } | |
944 | } | |
945 | qemu_kvm_wait_io_event(cpu); | |
946 | } | |
947 | ||
948 | return NULL; | |
949 | } | |
950 | ||
951 | static void *qemu_dummy_cpu_thread_fn(void *arg) | |
952 | { | |
953 | #ifdef _WIN32 | |
954 | fprintf(stderr, "qtest is not supported under Windows\n"); | |
955 | exit(1); | |
956 | #else | |
957 | CPUState *cpu = arg; | |
958 | sigset_t waitset; | |
959 | int r; | |
960 | ||
961 | qemu_mutex_lock_iothread(); | |
962 | qemu_thread_get_self(cpu->thread); | |
963 | cpu->thread_id = qemu_get_thread_id(); | |
964 | ||
965 | sigemptyset(&waitset); | |
966 | sigaddset(&waitset, SIG_IPI); | |
967 | ||
968 | /* signal CPU creation */ | |
969 | cpu->created = true; | |
970 | qemu_cond_signal(&qemu_cpu_cond); | |
971 | ||
972 | current_cpu = cpu; | |
973 | while (1) { | |
974 | current_cpu = NULL; | |
975 | qemu_mutex_unlock_iothread(); | |
976 | do { | |
977 | int sig; | |
978 | r = sigwait(&waitset, &sig); | |
979 | } while (r == -1 && (errno == EAGAIN || errno == EINTR)); | |
980 | if (r == -1) { | |
981 | perror("sigwait"); | |
982 | exit(1); | |
983 | } | |
984 | qemu_mutex_lock_iothread(); | |
985 | current_cpu = cpu; | |
986 | qemu_wait_io_event_common(cpu); | |
987 | } | |
988 | ||
989 | return NULL; | |
990 | #endif | |
991 | } | |
992 | ||
993 | static void tcg_exec_all(void); | |
994 | ||
995 | static void *qemu_tcg_cpu_thread_fn(void *arg) | |
996 | { | |
997 | CPUState *cpu = arg; | |
998 | ||
999 | qemu_tcg_init_cpu_signals(); | |
1000 | qemu_thread_get_self(cpu->thread); | |
1001 | ||
1002 | qemu_mutex_lock(&qemu_global_mutex); | |
1003 | CPU_FOREACH(cpu) { | |
1004 | cpu->thread_id = qemu_get_thread_id(); | |
1005 | cpu->created = true; | |
1006 | } | |
1007 | qemu_cond_signal(&qemu_cpu_cond); | |
1008 | ||
1009 | /* wait for initial kick-off after machine start */ | |
1010 | while (QTAILQ_FIRST(&cpus)->stopped) { | |
1011 | qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex); | |
1012 | ||
1013 | /* process any pending work */ | |
1014 | CPU_FOREACH(cpu) { | |
1015 | qemu_wait_io_event_common(cpu); | |
1016 | } | |
1017 | } | |
1018 | ||
1019 | while (1) { | |
1020 | tcg_exec_all(); | |
1021 | ||
1022 | if (use_icount) { | |
1023 | int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); | |
1024 | ||
1025 | if (deadline == 0) { | |
1026 | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | |
1027 | } | |
1028 | } | |
1029 | qemu_tcg_wait_io_event(); | |
1030 | } | |
1031 | ||
1032 | return NULL; | |
1033 | } | |
1034 | ||
1035 | static void qemu_cpu_kick_thread(CPUState *cpu) | |
1036 | { | |
1037 | #ifndef _WIN32 | |
1038 | int err; | |
1039 | ||
1040 | err = pthread_kill(cpu->thread->thread, SIG_IPI); | |
1041 | if (err) { | |
1042 | fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); | |
1043 | exit(1); | |
1044 | } | |
1045 | #else /* _WIN32 */ | |
1046 | if (!qemu_cpu_is_self(cpu)) { | |
1047 | CONTEXT tcgContext; | |
1048 | ||
1049 | if (SuspendThread(cpu->hThread) == (DWORD)-1) { | |
1050 | fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__, | |
1051 | GetLastError()); | |
1052 | exit(1); | |
1053 | } | |
1054 | ||
1055 | /* On multi-core systems, we are not sure that the thread is actually | |
1056 | * suspended until we can get the context. | |
1057 | */ | |
1058 | tcgContext.ContextFlags = CONTEXT_CONTROL; | |
1059 | while (GetThreadContext(cpu->hThread, &tcgContext) != 0) { | |
1060 | continue; | |
1061 | } | |
1062 | ||
1063 | cpu_signal(0); | |
1064 | ||
1065 | if (ResumeThread(cpu->hThread) == (DWORD)-1) { | |
1066 | fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__, | |
1067 | GetLastError()); | |
1068 | exit(1); | |
1069 | } | |
1070 | } | |
1071 | #endif | |
1072 | } | |
1073 | ||
1074 | void qemu_cpu_kick(CPUState *cpu) | |
1075 | { | |
1076 | qemu_cond_broadcast(cpu->halt_cond); | |
1077 | if (!tcg_enabled() && !cpu->thread_kicked) { | |
1078 | qemu_cpu_kick_thread(cpu); | |
1079 | cpu->thread_kicked = true; | |
1080 | } | |
1081 | } | |
1082 | ||
1083 | void qemu_cpu_kick_self(void) | |
1084 | { | |
1085 | #ifndef _WIN32 | |
1086 | assert(current_cpu); | |
1087 | ||
1088 | if (!current_cpu->thread_kicked) { | |
1089 | qemu_cpu_kick_thread(current_cpu); | |
1090 | current_cpu->thread_kicked = true; | |
1091 | } | |
1092 | #else | |
1093 | abort(); | |
1094 | #endif | |
1095 | } | |
1096 | ||
1097 | bool qemu_cpu_is_self(CPUState *cpu) | |
1098 | { | |
1099 | return qemu_thread_is_self(cpu->thread); | |
1100 | } | |
1101 | ||
1102 | static bool qemu_in_vcpu_thread(void) | |
1103 | { | |
1104 | return current_cpu && qemu_cpu_is_self(current_cpu); | |
1105 | } | |
1106 | ||
1107 | void qemu_mutex_lock_iothread(void) | |
1108 | { | |
1109 | if (!tcg_enabled()) { | |
1110 | qemu_mutex_lock(&qemu_global_mutex); | |
1111 | } else { | |
1112 | iothread_requesting_mutex = true; | |
1113 | if (qemu_mutex_trylock(&qemu_global_mutex)) { | |
1114 | qemu_cpu_kick_thread(first_cpu); | |
1115 | qemu_mutex_lock(&qemu_global_mutex); | |
1116 | } | |
1117 | iothread_requesting_mutex = false; | |
1118 | qemu_cond_broadcast(&qemu_io_proceeded_cond); | |
1119 | } | |
1120 | } | |
1121 | ||
1122 | void qemu_mutex_unlock_iothread(void) | |
1123 | { | |
1124 | qemu_mutex_unlock(&qemu_global_mutex); | |
1125 | } | |
1126 | ||
1127 | static int all_vcpus_paused(void) | |
1128 | { | |
1129 | CPUState *cpu; | |
1130 | ||
1131 | CPU_FOREACH(cpu) { | |
1132 | if (!cpu->stopped) { | |
1133 | return 0; | |
1134 | } | |
1135 | } | |
1136 | ||
1137 | return 1; | |
1138 | } | |
1139 | ||
1140 | void pause_all_vcpus(void) | |
1141 | { | |
1142 | CPUState *cpu; | |
1143 | ||
1144 | qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); | |
1145 | CPU_FOREACH(cpu) { | |
1146 | cpu->stop = true; | |
1147 | qemu_cpu_kick(cpu); | |
1148 | } | |
1149 | ||
1150 | if (qemu_in_vcpu_thread()) { | |
1151 | cpu_stop_current(); | |
1152 | if (!kvm_enabled()) { | |
1153 | CPU_FOREACH(cpu) { | |
1154 | cpu->stop = false; | |
1155 | cpu->stopped = true; | |
1156 | } | |
1157 | return; | |
1158 | } | |
1159 | } | |
1160 | ||
1161 | while (!all_vcpus_paused()) { | |
1162 | qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); | |
1163 | CPU_FOREACH(cpu) { | |
1164 | qemu_cpu_kick(cpu); | |
1165 | } | |
1166 | } | |
1167 | } | |
1168 | ||
1169 | void cpu_resume(CPUState *cpu) | |
1170 | { | |
1171 | cpu->stop = false; | |
1172 | cpu->stopped = false; | |
1173 | qemu_cpu_kick(cpu); | |
1174 | } | |
1175 | ||
1176 | void resume_all_vcpus(void) | |
1177 | { | |
1178 | CPUState *cpu; | |
1179 | ||
1180 | qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); | |
1181 | CPU_FOREACH(cpu) { | |
1182 | cpu_resume(cpu); | |
1183 | } | |
1184 | } | |
1185 | ||
1186 | /* For temporary buffers for forming a name */ | |
1187 | #define VCPU_THREAD_NAME_SIZE 16 | |
1188 | ||
1189 | static void qemu_tcg_init_vcpu(CPUState *cpu) | |
1190 | { | |
1191 | char thread_name[VCPU_THREAD_NAME_SIZE]; | |
1192 | ||
1193 | tcg_cpu_address_space_init(cpu, cpu->as); | |
1194 | ||
1195 | /* share a single thread for all cpus with TCG */ | |
1196 | if (!tcg_cpu_thread) { | |
1197 | cpu->thread = g_malloc0(sizeof(QemuThread)); | |
1198 | cpu->halt_cond = g_malloc0(sizeof(QemuCond)); | |
1199 | qemu_cond_init(cpu->halt_cond); | |
1200 | tcg_halt_cond = cpu->halt_cond; | |
1201 | snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG", | |
1202 | cpu->cpu_index); | |
1203 | qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn, | |
1204 | cpu, QEMU_THREAD_JOINABLE); | |
1205 | #ifdef _WIN32 | |
1206 | cpu->hThread = qemu_thread_get_handle(cpu->thread); | |
1207 | #endif | |
1208 | while (!cpu->created) { | |
1209 | qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); | |
1210 | } | |
1211 | tcg_cpu_thread = cpu->thread; | |
1212 | } else { | |
1213 | cpu->thread = tcg_cpu_thread; | |
1214 | cpu->halt_cond = tcg_halt_cond; | |
1215 | } | |
1216 | } | |
1217 | ||
1218 | static void qemu_kvm_start_vcpu(CPUState *cpu) | |
1219 | { | |
1220 | char thread_name[VCPU_THREAD_NAME_SIZE]; | |
1221 | ||
1222 | cpu->thread = g_malloc0(sizeof(QemuThread)); | |
1223 | cpu->halt_cond = g_malloc0(sizeof(QemuCond)); | |
1224 | qemu_cond_init(cpu->halt_cond); | |
1225 | snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM", | |
1226 | cpu->cpu_index); | |
1227 | qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn, | |
1228 | cpu, QEMU_THREAD_JOINABLE); | |
1229 | while (!cpu->created) { | |
1230 | qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); | |
1231 | } | |
1232 | } | |
1233 | ||
1234 | static void qemu_dummy_start_vcpu(CPUState *cpu) | |
1235 | { | |
1236 | char thread_name[VCPU_THREAD_NAME_SIZE]; | |
1237 | ||
1238 | cpu->thread = g_malloc0(sizeof(QemuThread)); | |
1239 | cpu->halt_cond = g_malloc0(sizeof(QemuCond)); | |
1240 | qemu_cond_init(cpu->halt_cond); | |
1241 | snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY", | |
1242 | cpu->cpu_index); | |
1243 | qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu, | |
1244 | QEMU_THREAD_JOINABLE); | |
1245 | while (!cpu->created) { | |
1246 | qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); | |
1247 | } | |
1248 | } | |
1249 | ||
1250 | void qemu_init_vcpu(CPUState *cpu) | |
1251 | { | |
1252 | cpu->nr_cores = smp_cores; | |
1253 | cpu->nr_threads = smp_threads; | |
1254 | cpu->stopped = true; | |
1255 | if (kvm_enabled()) { | |
1256 | qemu_kvm_start_vcpu(cpu); | |
1257 | } else if (tcg_enabled()) { | |
1258 | qemu_tcg_init_vcpu(cpu); | |
1259 | } else { | |
1260 | qemu_dummy_start_vcpu(cpu); | |
1261 | } | |
1262 | } | |
1263 | ||
1264 | void cpu_stop_current(void) | |
1265 | { | |
1266 | if (current_cpu) { | |
1267 | current_cpu->stop = false; | |
1268 | current_cpu->stopped = true; | |
1269 | cpu_exit(current_cpu); | |
1270 | qemu_cond_signal(&qemu_pause_cond); | |
1271 | } | |
1272 | } | |
1273 | ||
1274 | int vm_stop(RunState state) | |
1275 | { | |
1276 | if (qemu_in_vcpu_thread()) { | |
1277 | qemu_system_vmstop_request_prepare(); | |
1278 | qemu_system_vmstop_request(state); | |
1279 | /* | |
1280 | * FIXME: should not return to device code in case | |
1281 | * vm_stop() has been requested. | |
1282 | */ | |
1283 | cpu_stop_current(); | |
1284 | return 0; | |
1285 | } | |
1286 | ||
1287 | return do_vm_stop(state); | |
1288 | } | |
1289 | ||
1290 | /* does a state transition even if the VM is already stopped, | |
1291 | current state is forgotten forever */ | |
1292 | int vm_stop_force_state(RunState state) | |
1293 | { | |
1294 | if (runstate_is_running()) { | |
1295 | return vm_stop(state); | |
1296 | } else { | |
1297 | runstate_set(state); | |
1298 | /* Make sure to return an error if the flush in a previous vm_stop() | |
1299 | * failed. */ | |
1300 | return bdrv_flush_all(); | |
1301 | } | |
1302 | } | |
1303 | ||
1304 | static int tcg_cpu_exec(CPUArchState *env) | |
1305 | { | |
1306 | CPUState *cpu = ENV_GET_CPU(env); | |
1307 | int ret; | |
1308 | #ifdef CONFIG_PROFILER | |
1309 | int64_t ti; | |
1310 | #endif | |
1311 | ||
1312 | #ifdef CONFIG_PROFILER | |
1313 | ti = profile_getclock(); | |
1314 | #endif | |
1315 | if (use_icount) { | |
1316 | int64_t count; | |
1317 | int64_t deadline; | |
1318 | int decr; | |
1319 | timers_state.qemu_icount -= (cpu->icount_decr.u16.low | |
1320 | + cpu->icount_extra); | |
1321 | cpu->icount_decr.u16.low = 0; | |
1322 | cpu->icount_extra = 0; | |
1323 | deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); | |
1324 | ||
1325 | /* Maintain prior (possibly buggy) behaviour where if no deadline | |
1326 | * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than | |
1327 | * INT32_MAX nanoseconds ahead, we still use INT32_MAX | |
1328 | * nanoseconds. | |
1329 | */ | |
1330 | if ((deadline < 0) || (deadline > INT32_MAX)) { | |
1331 | deadline = INT32_MAX; | |
1332 | } | |
1333 | ||
1334 | count = qemu_icount_round(deadline); | |
1335 | timers_state.qemu_icount += count; | |
1336 | decr = (count > 0xffff) ? 0xffff : count; | |
1337 | count -= decr; | |
1338 | cpu->icount_decr.u16.low = decr; | |
1339 | cpu->icount_extra = count; | |
1340 | } | |
1341 | ret = cpu_exec(env); | |
1342 | #ifdef CONFIG_PROFILER | |
1343 | qemu_time += profile_getclock() - ti; | |
1344 | #endif | |
1345 | if (use_icount) { | |
1346 | /* Fold pending instructions back into the | |
1347 | instruction counter, and clear the interrupt flag. */ | |
1348 | timers_state.qemu_icount -= (cpu->icount_decr.u16.low | |
1349 | + cpu->icount_extra); | |
1350 | cpu->icount_decr.u32 = 0; | |
1351 | cpu->icount_extra = 0; | |
1352 | } | |
1353 | return ret; | |
1354 | } | |
1355 | ||
1356 | static void tcg_exec_all(void) | |
1357 | { | |
1358 | int r; | |
1359 | ||
1360 | /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ | |
1361 | qemu_clock_warp(QEMU_CLOCK_VIRTUAL); | |
1362 | ||
1363 | if (next_cpu == NULL) { | |
1364 | next_cpu = first_cpu; | |
1365 | } | |
1366 | for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) { | |
1367 | CPUState *cpu = next_cpu; | |
1368 | CPUArchState *env = cpu->env_ptr; | |
1369 | ||
1370 | qemu_clock_enable(QEMU_CLOCK_VIRTUAL, | |
1371 | (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); | |
1372 | ||
1373 | if (cpu_can_run(cpu)) { | |
1374 | r = tcg_cpu_exec(env); | |
1375 | if (r == EXCP_DEBUG) { | |
1376 | cpu_handle_guest_debug(cpu); | |
1377 | break; | |
1378 | } | |
1379 | } else if (cpu->stop || cpu->stopped) { | |
1380 | break; | |
1381 | } | |
1382 | } | |
1383 | exit_request = 0; | |
1384 | } | |
1385 | ||
1386 | void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg) | |
1387 | { | |
1388 | /* XXX: implement xxx_cpu_list for targets that still miss it */ | |
1389 | #if defined(cpu_list) | |
1390 | cpu_list(f, cpu_fprintf); | |
1391 | #endif | |
1392 | } | |
1393 | ||
1394 | CpuInfoList *qmp_query_cpus(Error **errp) | |
1395 | { | |
1396 | CpuInfoList *head = NULL, *cur_item = NULL; | |
1397 | CPUState *cpu; | |
1398 | ||
1399 | CPU_FOREACH(cpu) { | |
1400 | CpuInfoList *info; | |
1401 | #if defined(TARGET_I386) | |
1402 | X86CPU *x86_cpu = X86_CPU(cpu); | |
1403 | CPUX86State *env = &x86_cpu->env; | |
1404 | #elif defined(TARGET_PPC) | |
1405 | PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu); | |
1406 | CPUPPCState *env = &ppc_cpu->env; | |
1407 | #elif defined(TARGET_SPARC) | |
1408 | SPARCCPU *sparc_cpu = SPARC_CPU(cpu); | |
1409 | CPUSPARCState *env = &sparc_cpu->env; | |
1410 | #elif defined(TARGET_MIPS) | |
1411 | MIPSCPU *mips_cpu = MIPS_CPU(cpu); | |
1412 | CPUMIPSState *env = &mips_cpu->env; | |
1413 | #elif defined(TARGET_TRICORE) | |
1414 | TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu); | |
1415 | CPUTriCoreState *env = &tricore_cpu->env; | |
1416 | #endif | |
1417 | ||
1418 | cpu_synchronize_state(cpu); | |
1419 | ||
1420 | info = g_malloc0(sizeof(*info)); | |
1421 | info->value = g_malloc0(sizeof(*info->value)); | |
1422 | info->value->CPU = cpu->cpu_index; | |
1423 | info->value->current = (cpu == first_cpu); | |
1424 | info->value->halted = cpu->halted; | |
1425 | info->value->thread_id = cpu->thread_id; | |
1426 | #if defined(TARGET_I386) | |
1427 | info->value->has_pc = true; | |
1428 | info->value->pc = env->eip + env->segs[R_CS].base; | |
1429 | #elif defined(TARGET_PPC) | |
1430 | info->value->has_nip = true; | |
1431 | info->value->nip = env->nip; | |
1432 | #elif defined(TARGET_SPARC) | |
1433 | info->value->has_pc = true; | |
1434 | info->value->pc = env->pc; | |
1435 | info->value->has_npc = true; | |
1436 | info->value->npc = env->npc; | |
1437 | #elif defined(TARGET_MIPS) | |
1438 | info->value->has_PC = true; | |
1439 | info->value->PC = env->active_tc.PC; | |
1440 | #elif defined(TARGET_TRICORE) | |
1441 | info->value->has_PC = true; | |
1442 | info->value->PC = env->PC; | |
1443 | #endif | |
1444 | ||
1445 | /* XXX: waiting for the qapi to support GSList */ | |
1446 | if (!cur_item) { | |
1447 | head = cur_item = info; | |
1448 | } else { | |
1449 | cur_item->next = info; | |
1450 | cur_item = info; | |
1451 | } | |
1452 | } | |
1453 | ||
1454 | return head; | |
1455 | } | |
1456 | ||
1457 | void qmp_memsave(int64_t addr, int64_t size, const char *filename, | |
1458 | bool has_cpu, int64_t cpu_index, Error **errp) | |
1459 | { | |
1460 | FILE *f; | |
1461 | uint32_t l; | |
1462 | CPUState *cpu; | |
1463 | uint8_t buf[1024]; | |
1464 | ||
1465 | if (!has_cpu) { | |
1466 | cpu_index = 0; | |
1467 | } | |
1468 | ||
1469 | cpu = qemu_get_cpu(cpu_index); | |
1470 | if (cpu == NULL) { | |
1471 | error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index", | |
1472 | "a CPU number"); | |
1473 | return; | |
1474 | } | |
1475 | ||
1476 | f = fopen(filename, "wb"); | |
1477 | if (!f) { | |
1478 | error_setg_file_open(errp, errno, filename); | |
1479 | return; | |
1480 | } | |
1481 | ||
1482 | while (size != 0) { | |
1483 | l = sizeof(buf); | |
1484 | if (l > size) | |
1485 | l = size; | |
1486 | if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) { | |
1487 | error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr); | |
1488 | goto exit; | |
1489 | } | |
1490 | if (fwrite(buf, 1, l, f) != l) { | |
1491 | error_set(errp, QERR_IO_ERROR); | |
1492 | goto exit; | |
1493 | } | |
1494 | addr += l; | |
1495 | size -= l; | |
1496 | } | |
1497 | ||
1498 | exit: | |
1499 | fclose(f); | |
1500 | } | |
1501 | ||
1502 | void qmp_pmemsave(int64_t addr, int64_t size, const char *filename, | |
1503 | Error **errp) | |
1504 | { | |
1505 | FILE *f; | |
1506 | uint32_t l; | |
1507 | uint8_t buf[1024]; | |
1508 | ||
1509 | f = fopen(filename, "wb"); | |
1510 | if (!f) { | |
1511 | error_setg_file_open(errp, errno, filename); | |
1512 | return; | |
1513 | } | |
1514 | ||
1515 | while (size != 0) { | |
1516 | l = sizeof(buf); | |
1517 | if (l > size) | |
1518 | l = size; | |
1519 | cpu_physical_memory_read(addr, buf, l); | |
1520 | if (fwrite(buf, 1, l, f) != l) { | |
1521 | error_set(errp, QERR_IO_ERROR); | |
1522 | goto exit; | |
1523 | } | |
1524 | addr += l; | |
1525 | size -= l; | |
1526 | } | |
1527 | ||
1528 | exit: | |
1529 | fclose(f); | |
1530 | } | |
1531 | ||
1532 | void qmp_inject_nmi(Error **errp) | |
1533 | { | |
1534 | #if defined(TARGET_I386) | |
1535 | CPUState *cs; | |
1536 | ||
1537 | CPU_FOREACH(cs) { | |
1538 | X86CPU *cpu = X86_CPU(cs); | |
1539 | ||
1540 | if (!cpu->apic_state) { | |
1541 | cpu_interrupt(cs, CPU_INTERRUPT_NMI); | |
1542 | } else { | |
1543 | apic_deliver_nmi(cpu->apic_state); | |
1544 | } | |
1545 | } | |
1546 | #else | |
1547 | nmi_monitor_handle(monitor_get_cpu_index(), errp); | |
1548 | #endif | |
1549 | } | |
1550 | ||
1551 | void dump_drift_info(FILE *f, fprintf_function cpu_fprintf) | |
1552 | { | |
1553 | if (!use_icount) { | |
1554 | return; | |
1555 | } | |
1556 | ||
1557 | cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n", | |
1558 | (cpu_get_clock() - cpu_get_icount())/SCALE_MS); | |
1559 | if (icount_align_option) { | |
1560 | cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS); | |
1561 | cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS); | |
1562 | } else { | |
1563 | cpu_fprintf(f, "Max guest delay NA\n"); | |
1564 | cpu_fprintf(f, "Max guest advance NA\n"); | |
1565 | } | |
1566 | } |