]> git.proxmox.com Git - qemu.git/blob - cpus.c
kvm: Unconditionally reenter kernel after IO exits
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor.h"
29 #include "sysemu.h"
30 #include "gdbstub.h"
31 #include "dma.h"
32 #include "kvm.h"
33 #include "exec-all.h"
34
35 #include "cpus.h"
36 #include "compatfd.h"
37
38 #ifdef SIGRTMIN
39 #define SIG_IPI (SIGRTMIN+4)
40 #else
41 #define SIG_IPI SIGUSR1
42 #endif
43
44 #ifdef CONFIG_LINUX
45
46 #include <sys/prctl.h>
47
48 #ifndef PR_MCE_KILL
49 #define PR_MCE_KILL 33
50 #endif
51
52 #ifndef PR_MCE_KILL_SET
53 #define PR_MCE_KILL_SET 1
54 #endif
55
56 #ifndef PR_MCE_KILL_EARLY
57 #define PR_MCE_KILL_EARLY 1
58 #endif
59
60 #endif /* CONFIG_LINUX */
61
62 static CPUState *next_cpu;
63
64 /***********************************************************/
65 void hw_error(const char *fmt, ...)
66 {
67 va_list ap;
68 CPUState *env;
69
70 va_start(ap, fmt);
71 fprintf(stderr, "qemu: hardware error: ");
72 vfprintf(stderr, fmt, ap);
73 fprintf(stderr, "\n");
74 for(env = first_cpu; env != NULL; env = env->next_cpu) {
75 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
76 #ifdef TARGET_I386
77 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
78 #else
79 cpu_dump_state(env, stderr, fprintf, 0);
80 #endif
81 }
82 va_end(ap);
83 abort();
84 }
85
86 void cpu_synchronize_all_states(void)
87 {
88 CPUState *cpu;
89
90 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
91 cpu_synchronize_state(cpu);
92 }
93 }
94
95 void cpu_synchronize_all_post_reset(void)
96 {
97 CPUState *cpu;
98
99 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
100 cpu_synchronize_post_reset(cpu);
101 }
102 }
103
104 void cpu_synchronize_all_post_init(void)
105 {
106 CPUState *cpu;
107
108 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
109 cpu_synchronize_post_init(cpu);
110 }
111 }
112
113 int cpu_is_stopped(CPUState *env)
114 {
115 return !vm_running || env->stopped;
116 }
117
118 static void do_vm_stop(int reason)
119 {
120 if (vm_running) {
121 cpu_disable_ticks();
122 vm_running = 0;
123 pause_all_vcpus();
124 vm_state_notify(0, reason);
125 qemu_aio_flush();
126 bdrv_flush_all();
127 monitor_protocol_event(QEVENT_STOP, NULL);
128 }
129 }
130
131 static int cpu_can_run(CPUState *env)
132 {
133 if (env->stop)
134 return 0;
135 if (env->stopped || !vm_running)
136 return 0;
137 return 1;
138 }
139
140 static int cpu_has_work(CPUState *env)
141 {
142 if (env->stop)
143 return 1;
144 if (env->queued_work_first)
145 return 1;
146 if (env->stopped || !vm_running)
147 return 0;
148 if (!env->halted)
149 return 1;
150 if (qemu_cpu_has_work(env))
151 return 1;
152 return 0;
153 }
154
155 static int any_cpu_has_work(void)
156 {
157 CPUState *env;
158
159 for (env = first_cpu; env != NULL; env = env->next_cpu)
160 if (cpu_has_work(env))
161 return 1;
162 return 0;
163 }
164
165 static void cpu_debug_handler(CPUState *env)
166 {
167 gdb_set_stop_cpu(env);
168 debug_requested = EXCP_DEBUG;
169 vm_stop(EXCP_DEBUG);
170 }
171
172 #ifdef CONFIG_LINUX
173 static void sigbus_reraise(void)
174 {
175 sigset_t set;
176 struct sigaction action;
177
178 memset(&action, 0, sizeof(action));
179 action.sa_handler = SIG_DFL;
180 if (!sigaction(SIGBUS, &action, NULL)) {
181 raise(SIGBUS);
182 sigemptyset(&set);
183 sigaddset(&set, SIGBUS);
184 sigprocmask(SIG_UNBLOCK, &set, NULL);
185 }
186 perror("Failed to re-raise SIGBUS!\n");
187 abort();
188 }
189
190 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
191 void *ctx)
192 {
193 if (kvm_on_sigbus(siginfo->ssi_code,
194 (void *)(intptr_t)siginfo->ssi_addr)) {
195 sigbus_reraise();
196 }
197 }
198
199 static void qemu_init_sigbus(void)
200 {
201 struct sigaction action;
202
203 memset(&action, 0, sizeof(action));
204 action.sa_flags = SA_SIGINFO;
205 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
206 sigaction(SIGBUS, &action, NULL);
207
208 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
209 }
210
211 #else /* !CONFIG_LINUX */
212
213 static void qemu_init_sigbus(void)
214 {
215 }
216 #endif /* !CONFIG_LINUX */
217
218 #ifndef _WIN32
219 static int io_thread_fd = -1;
220
221 static void qemu_event_increment(void)
222 {
223 /* Write 8 bytes to be compatible with eventfd. */
224 static const uint64_t val = 1;
225 ssize_t ret;
226
227 if (io_thread_fd == -1)
228 return;
229
230 do {
231 ret = write(io_thread_fd, &val, sizeof(val));
232 } while (ret < 0 && errno == EINTR);
233
234 /* EAGAIN is fine, a read must be pending. */
235 if (ret < 0 && errno != EAGAIN) {
236 fprintf(stderr, "qemu_event_increment: write() filed: %s\n",
237 strerror(errno));
238 exit (1);
239 }
240 }
241
242 static void qemu_event_read(void *opaque)
243 {
244 int fd = (unsigned long)opaque;
245 ssize_t len;
246 char buffer[512];
247
248 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
249 do {
250 len = read(fd, buffer, sizeof(buffer));
251 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
252 }
253
254 static int qemu_event_init(void)
255 {
256 int err;
257 int fds[2];
258
259 err = qemu_eventfd(fds);
260 if (err == -1)
261 return -errno;
262
263 err = fcntl_setfl(fds[0], O_NONBLOCK);
264 if (err < 0)
265 goto fail;
266
267 err = fcntl_setfl(fds[1], O_NONBLOCK);
268 if (err < 0)
269 goto fail;
270
271 qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
272 (void *)(unsigned long)fds[0]);
273
274 io_thread_fd = fds[1];
275 return 0;
276
277 fail:
278 close(fds[0]);
279 close(fds[1]);
280 return err;
281 }
282
283 static void dummy_signal(int sig)
284 {
285 }
286
287 /* If we have signalfd, we mask out the signals we want to handle and then
288 * use signalfd to listen for them. We rely on whatever the current signal
289 * handler is to dispatch the signals when we receive them.
290 */
291 static void sigfd_handler(void *opaque)
292 {
293 int fd = (unsigned long) opaque;
294 struct qemu_signalfd_siginfo info;
295 struct sigaction action;
296 ssize_t len;
297
298 while (1) {
299 do {
300 len = read(fd, &info, sizeof(info));
301 } while (len == -1 && errno == EINTR);
302
303 if (len == -1 && errno == EAGAIN) {
304 break;
305 }
306
307 if (len != sizeof(info)) {
308 printf("read from sigfd returned %zd: %m\n", len);
309 return;
310 }
311
312 sigaction(info.ssi_signo, NULL, &action);
313 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
314 action.sa_sigaction(info.ssi_signo,
315 (siginfo_t *)&info, NULL);
316 } else if (action.sa_handler) {
317 action.sa_handler(info.ssi_signo);
318 }
319 }
320 }
321
322 static int qemu_signalfd_init(sigset_t mask)
323 {
324 int sigfd;
325
326 sigfd = qemu_signalfd(&mask);
327 if (sigfd == -1) {
328 fprintf(stderr, "failed to create signalfd\n");
329 return -errno;
330 }
331
332 fcntl_setfl(sigfd, O_NONBLOCK);
333
334 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
335 (void *)(unsigned long) sigfd);
336
337 return 0;
338 }
339
340 static void qemu_kvm_eat_signals(CPUState *env)
341 {
342 struct timespec ts = { 0, 0 };
343 siginfo_t siginfo;
344 sigset_t waitset;
345 sigset_t chkset;
346 int r;
347
348 sigemptyset(&waitset);
349 sigaddset(&waitset, SIG_IPI);
350 sigaddset(&waitset, SIGBUS);
351
352 do {
353 r = sigtimedwait(&waitset, &siginfo, &ts);
354 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
355 perror("sigtimedwait");
356 exit(1);
357 }
358
359 switch (r) {
360 case SIGBUS:
361 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
362 sigbus_reraise();
363 }
364 break;
365 default:
366 break;
367 }
368
369 r = sigpending(&chkset);
370 if (r == -1) {
371 perror("sigpending");
372 exit(1);
373 }
374 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
375
376 #ifndef CONFIG_IOTHREAD
377 if (sigismember(&chkset, SIGIO) || sigismember(&chkset, SIGALRM)) {
378 qemu_notify_event();
379 }
380 #endif
381 }
382
383 #else /* _WIN32 */
384
385 HANDLE qemu_event_handle;
386
387 static void dummy_event_handler(void *opaque)
388 {
389 }
390
391 static int qemu_event_init(void)
392 {
393 qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
394 if (!qemu_event_handle) {
395 fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
396 return -1;
397 }
398 qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
399 return 0;
400 }
401
402 static void qemu_event_increment(void)
403 {
404 if (!SetEvent(qemu_event_handle)) {
405 fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
406 GetLastError());
407 exit (1);
408 }
409 }
410
411 static void qemu_kvm_eat_signals(CPUState *env)
412 {
413 }
414 #endif /* _WIN32 */
415
416 #ifndef CONFIG_IOTHREAD
417 static void qemu_kvm_init_cpu_signals(CPUState *env)
418 {
419 #ifndef _WIN32
420 int r;
421 sigset_t set;
422 struct sigaction sigact;
423
424 memset(&sigact, 0, sizeof(sigact));
425 sigact.sa_handler = dummy_signal;
426 sigaction(SIG_IPI, &sigact, NULL);
427
428 sigemptyset(&set);
429 sigaddset(&set, SIG_IPI);
430 sigaddset(&set, SIGIO);
431 sigaddset(&set, SIGALRM);
432 pthread_sigmask(SIG_BLOCK, &set, NULL);
433
434 pthread_sigmask(SIG_BLOCK, NULL, &set);
435 sigdelset(&set, SIG_IPI);
436 sigdelset(&set, SIGBUS);
437 sigdelset(&set, SIGIO);
438 sigdelset(&set, SIGALRM);
439 r = kvm_set_signal_mask(env, &set);
440 if (r) {
441 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
442 exit(1);
443 }
444 #endif
445 }
446
447 #ifndef _WIN32
448 static sigset_t block_synchronous_signals(void)
449 {
450 sigset_t set;
451
452 sigemptyset(&set);
453 sigaddset(&set, SIGBUS);
454 if (kvm_enabled()) {
455 /*
456 * We need to process timer signals synchronously to avoid a race
457 * between exit_request check and KVM vcpu entry.
458 */
459 sigaddset(&set, SIGIO);
460 sigaddset(&set, SIGALRM);
461 }
462
463 return set;
464 }
465 #endif
466
467 int qemu_init_main_loop(void)
468 {
469 #ifndef _WIN32
470 sigset_t blocked_signals;
471 int ret;
472
473 blocked_signals = block_synchronous_signals();
474
475 ret = qemu_signalfd_init(blocked_signals);
476 if (ret) {
477 return ret;
478 }
479 #endif
480 cpu_set_debug_excp_handler(cpu_debug_handler);
481
482 qemu_init_sigbus();
483
484 return qemu_event_init();
485 }
486
487 void qemu_main_loop_start(void)
488 {
489 }
490
491 void qemu_init_vcpu(void *_env)
492 {
493 CPUState *env = _env;
494 int r;
495
496 env->nr_cores = smp_cores;
497 env->nr_threads = smp_threads;
498
499 if (kvm_enabled()) {
500 r = kvm_init_vcpu(env);
501 if (r < 0) {
502 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
503 exit(1);
504 }
505 qemu_kvm_init_cpu_signals(env);
506 }
507 }
508
509 int qemu_cpu_self(void *env)
510 {
511 return 1;
512 }
513
514 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
515 {
516 func(data);
517 }
518
519 void resume_all_vcpus(void)
520 {
521 }
522
523 void pause_all_vcpus(void)
524 {
525 }
526
527 void qemu_cpu_kick(void *env)
528 {
529 return;
530 }
531
532 void qemu_cpu_kick_self(void)
533 {
534 #ifndef _WIN32
535 assert(cpu_single_env);
536
537 raise(SIG_IPI);
538 #else
539 abort();
540 #endif
541 }
542
543 void qemu_notify_event(void)
544 {
545 CPUState *env = cpu_single_env;
546
547 qemu_event_increment ();
548 if (env) {
549 cpu_exit(env);
550 }
551 if (next_cpu && env != next_cpu) {
552 cpu_exit(next_cpu);
553 }
554 exit_request = 1;
555 }
556
557 void qemu_mutex_lock_iothread(void) {}
558 void qemu_mutex_unlock_iothread(void) {}
559
560 void cpu_stop_current(void)
561 {
562 }
563
564 void vm_stop(int reason)
565 {
566 do_vm_stop(reason);
567 }
568
569 #else /* CONFIG_IOTHREAD */
570
571 #include "qemu-thread.h"
572
573 QemuMutex qemu_global_mutex;
574 static QemuMutex qemu_fair_mutex;
575
576 static QemuThread io_thread;
577
578 static QemuThread *tcg_cpu_thread;
579 static QemuCond *tcg_halt_cond;
580
581 static int qemu_system_ready;
582 /* cpu creation */
583 static QemuCond qemu_cpu_cond;
584 /* system init */
585 static QemuCond qemu_system_cond;
586 static QemuCond qemu_pause_cond;
587 static QemuCond qemu_work_cond;
588
589 static void cpu_signal(int sig)
590 {
591 if (cpu_single_env) {
592 cpu_exit(cpu_single_env);
593 }
594 exit_request = 1;
595 }
596
597 static void qemu_kvm_init_cpu_signals(CPUState *env)
598 {
599 int r;
600 sigset_t set;
601 struct sigaction sigact;
602
603 memset(&sigact, 0, sizeof(sigact));
604 sigact.sa_handler = dummy_signal;
605 sigaction(SIG_IPI, &sigact, NULL);
606
607 pthread_sigmask(SIG_BLOCK, NULL, &set);
608 sigdelset(&set, SIG_IPI);
609 sigdelset(&set, SIGBUS);
610 r = kvm_set_signal_mask(env, &set);
611 if (r) {
612 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
613 exit(1);
614 }
615 }
616
617 static void qemu_tcg_init_cpu_signals(void)
618 {
619 sigset_t set;
620 struct sigaction sigact;
621
622 memset(&sigact, 0, sizeof(sigact));
623 sigact.sa_handler = cpu_signal;
624 sigaction(SIG_IPI, &sigact, NULL);
625
626 sigemptyset(&set);
627 sigaddset(&set, SIG_IPI);
628 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
629 }
630
631 static sigset_t block_io_signals(void)
632 {
633 sigset_t set;
634
635 /* SIGUSR2 used by posix-aio-compat.c */
636 sigemptyset(&set);
637 sigaddset(&set, SIGUSR2);
638 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
639
640 sigemptyset(&set);
641 sigaddset(&set, SIGIO);
642 sigaddset(&set, SIGALRM);
643 sigaddset(&set, SIG_IPI);
644 sigaddset(&set, SIGBUS);
645 pthread_sigmask(SIG_BLOCK, &set, NULL);
646
647 return set;
648 }
649
650 int qemu_init_main_loop(void)
651 {
652 int ret;
653 sigset_t blocked_signals;
654
655 cpu_set_debug_excp_handler(cpu_debug_handler);
656
657 qemu_init_sigbus();
658
659 blocked_signals = block_io_signals();
660
661 ret = qemu_signalfd_init(blocked_signals);
662 if (ret)
663 return ret;
664
665 /* Note eventfd must be drained before signalfd handlers run */
666 ret = qemu_event_init();
667 if (ret)
668 return ret;
669
670 qemu_cond_init(&qemu_pause_cond);
671 qemu_cond_init(&qemu_system_cond);
672 qemu_mutex_init(&qemu_fair_mutex);
673 qemu_mutex_init(&qemu_global_mutex);
674 qemu_mutex_lock(&qemu_global_mutex);
675
676 qemu_thread_self(&io_thread);
677
678 return 0;
679 }
680
681 void qemu_main_loop_start(void)
682 {
683 qemu_system_ready = 1;
684 qemu_cond_broadcast(&qemu_system_cond);
685 }
686
687 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
688 {
689 struct qemu_work_item wi;
690
691 if (qemu_cpu_self(env)) {
692 func(data);
693 return;
694 }
695
696 wi.func = func;
697 wi.data = data;
698 if (!env->queued_work_first)
699 env->queued_work_first = &wi;
700 else
701 env->queued_work_last->next = &wi;
702 env->queued_work_last = &wi;
703 wi.next = NULL;
704 wi.done = false;
705
706 qemu_cpu_kick(env);
707 while (!wi.done) {
708 CPUState *self_env = cpu_single_env;
709
710 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
711 cpu_single_env = self_env;
712 }
713 }
714
715 static void flush_queued_work(CPUState *env)
716 {
717 struct qemu_work_item *wi;
718
719 if (!env->queued_work_first)
720 return;
721
722 while ((wi = env->queued_work_first)) {
723 env->queued_work_first = wi->next;
724 wi->func(wi->data);
725 wi->done = true;
726 }
727 env->queued_work_last = NULL;
728 qemu_cond_broadcast(&qemu_work_cond);
729 }
730
731 static void qemu_wait_io_event_common(CPUState *env)
732 {
733 if (env->stop) {
734 env->stop = 0;
735 env->stopped = 1;
736 qemu_cond_signal(&qemu_pause_cond);
737 }
738 flush_queued_work(env);
739 env->thread_kicked = false;
740 }
741
742 static void qemu_tcg_wait_io_event(void)
743 {
744 CPUState *env;
745
746 while (!any_cpu_has_work())
747 qemu_cond_timedwait(tcg_halt_cond, &qemu_global_mutex, 1000);
748
749 qemu_mutex_unlock(&qemu_global_mutex);
750
751 /*
752 * Users of qemu_global_mutex can be starved, having no chance
753 * to acquire it since this path will get to it first.
754 * So use another lock to provide fairness.
755 */
756 qemu_mutex_lock(&qemu_fair_mutex);
757 qemu_mutex_unlock(&qemu_fair_mutex);
758
759 qemu_mutex_lock(&qemu_global_mutex);
760
761 for (env = first_cpu; env != NULL; env = env->next_cpu) {
762 qemu_wait_io_event_common(env);
763 }
764 }
765
766 static void qemu_kvm_wait_io_event(CPUState *env)
767 {
768 while (!cpu_has_work(env))
769 qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
770
771 qemu_kvm_eat_signals(env);
772 qemu_wait_io_event_common(env);
773 }
774
775 static int qemu_cpu_exec(CPUState *env);
776
777 static void *kvm_cpu_thread_fn(void *arg)
778 {
779 CPUState *env = arg;
780 int r;
781
782 qemu_mutex_lock(&qemu_global_mutex);
783 qemu_thread_self(env->thread);
784
785 r = kvm_init_vcpu(env);
786 if (r < 0) {
787 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
788 exit(1);
789 }
790
791 qemu_kvm_init_cpu_signals(env);
792
793 /* signal CPU creation */
794 env->created = 1;
795 qemu_cond_signal(&qemu_cpu_cond);
796
797 /* and wait for machine initialization */
798 while (!qemu_system_ready)
799 qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
800
801 while (1) {
802 if (cpu_can_run(env))
803 qemu_cpu_exec(env);
804 qemu_kvm_wait_io_event(env);
805 }
806
807 return NULL;
808 }
809
810 static void *tcg_cpu_thread_fn(void *arg)
811 {
812 CPUState *env = arg;
813
814 qemu_tcg_init_cpu_signals();
815 qemu_thread_self(env->thread);
816
817 /* signal CPU creation */
818 qemu_mutex_lock(&qemu_global_mutex);
819 for (env = first_cpu; env != NULL; env = env->next_cpu)
820 env->created = 1;
821 qemu_cond_signal(&qemu_cpu_cond);
822
823 /* and wait for machine initialization */
824 while (!qemu_system_ready)
825 qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
826
827 while (1) {
828 cpu_exec_all();
829 qemu_tcg_wait_io_event();
830 }
831
832 return NULL;
833 }
834
835 void qemu_cpu_kick(void *_env)
836 {
837 CPUState *env = _env;
838 qemu_cond_broadcast(env->halt_cond);
839 if (!env->thread_kicked) {
840 qemu_thread_signal(env->thread, SIG_IPI);
841 env->thread_kicked = true;
842 }
843 }
844
845 void qemu_cpu_kick_self(void)
846 {
847 assert(cpu_single_env);
848
849 if (!cpu_single_env->thread_kicked) {
850 qemu_thread_signal(cpu_single_env->thread, SIG_IPI);
851 cpu_single_env->thread_kicked = true;
852 }
853 }
854
855 int qemu_cpu_self(void *_env)
856 {
857 CPUState *env = _env;
858 QemuThread this;
859
860 qemu_thread_self(&this);
861
862 return qemu_thread_equal(&this, env->thread);
863 }
864
865 void qemu_mutex_lock_iothread(void)
866 {
867 if (kvm_enabled()) {
868 qemu_mutex_lock(&qemu_global_mutex);
869 } else {
870 qemu_mutex_lock(&qemu_fair_mutex);
871 if (qemu_mutex_trylock(&qemu_global_mutex)) {
872 qemu_thread_signal(tcg_cpu_thread, SIG_IPI);
873 qemu_mutex_lock(&qemu_global_mutex);
874 }
875 qemu_mutex_unlock(&qemu_fair_mutex);
876 }
877 }
878
879 void qemu_mutex_unlock_iothread(void)
880 {
881 qemu_mutex_unlock(&qemu_global_mutex);
882 }
883
884 static int all_vcpus_paused(void)
885 {
886 CPUState *penv = first_cpu;
887
888 while (penv) {
889 if (!penv->stopped)
890 return 0;
891 penv = (CPUState *)penv->next_cpu;
892 }
893
894 return 1;
895 }
896
897 void pause_all_vcpus(void)
898 {
899 CPUState *penv = first_cpu;
900
901 while (penv) {
902 penv->stop = 1;
903 qemu_cpu_kick(penv);
904 penv = (CPUState *)penv->next_cpu;
905 }
906
907 while (!all_vcpus_paused()) {
908 qemu_cond_timedwait(&qemu_pause_cond, &qemu_global_mutex, 100);
909 penv = first_cpu;
910 while (penv) {
911 qemu_cpu_kick(penv);
912 penv = (CPUState *)penv->next_cpu;
913 }
914 }
915 }
916
917 void resume_all_vcpus(void)
918 {
919 CPUState *penv = first_cpu;
920
921 while (penv) {
922 penv->stop = 0;
923 penv->stopped = 0;
924 qemu_cpu_kick(penv);
925 penv = (CPUState *)penv->next_cpu;
926 }
927 }
928
929 static void tcg_init_vcpu(void *_env)
930 {
931 CPUState *env = _env;
932 /* share a single thread for all cpus with TCG */
933 if (!tcg_cpu_thread) {
934 env->thread = qemu_mallocz(sizeof(QemuThread));
935 env->halt_cond = qemu_mallocz(sizeof(QemuCond));
936 qemu_cond_init(env->halt_cond);
937 qemu_thread_create(env->thread, tcg_cpu_thread_fn, env);
938 while (env->created == 0)
939 qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
940 tcg_cpu_thread = env->thread;
941 tcg_halt_cond = env->halt_cond;
942 } else {
943 env->thread = tcg_cpu_thread;
944 env->halt_cond = tcg_halt_cond;
945 }
946 }
947
948 static void kvm_start_vcpu(CPUState *env)
949 {
950 env->thread = qemu_mallocz(sizeof(QemuThread));
951 env->halt_cond = qemu_mallocz(sizeof(QemuCond));
952 qemu_cond_init(env->halt_cond);
953 qemu_thread_create(env->thread, kvm_cpu_thread_fn, env);
954 while (env->created == 0)
955 qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
956 }
957
958 void qemu_init_vcpu(void *_env)
959 {
960 CPUState *env = _env;
961
962 env->nr_cores = smp_cores;
963 env->nr_threads = smp_threads;
964 if (kvm_enabled())
965 kvm_start_vcpu(env);
966 else
967 tcg_init_vcpu(env);
968 }
969
970 void qemu_notify_event(void)
971 {
972 qemu_event_increment();
973 }
974
975 static void qemu_system_vmstop_request(int reason)
976 {
977 vmstop_requested = reason;
978 qemu_notify_event();
979 }
980
981 void cpu_stop_current(void)
982 {
983 if (cpu_single_env) {
984 cpu_single_env->stopped = 1;
985 cpu_exit(cpu_single_env);
986 }
987 }
988
989 void vm_stop(int reason)
990 {
991 QemuThread me;
992 qemu_thread_self(&me);
993
994 if (!qemu_thread_equal(&me, &io_thread)) {
995 qemu_system_vmstop_request(reason);
996 /*
997 * FIXME: should not return to device code in case
998 * vm_stop() has been requested.
999 */
1000 cpu_stop_current();
1001 return;
1002 }
1003 do_vm_stop(reason);
1004 }
1005
1006 #endif
1007
1008 static int qemu_cpu_exec(CPUState *env)
1009 {
1010 int ret;
1011 #ifdef CONFIG_PROFILER
1012 int64_t ti;
1013 #endif
1014
1015 #ifdef CONFIG_PROFILER
1016 ti = profile_getclock();
1017 #endif
1018 if (use_icount) {
1019 int64_t count;
1020 int decr;
1021 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1022 env->icount_decr.u16.low = 0;
1023 env->icount_extra = 0;
1024 count = qemu_icount_round (qemu_next_deadline());
1025 qemu_icount += count;
1026 decr = (count > 0xffff) ? 0xffff : count;
1027 count -= decr;
1028 env->icount_decr.u16.low = decr;
1029 env->icount_extra = count;
1030 }
1031 ret = cpu_exec(env);
1032 #ifdef CONFIG_PROFILER
1033 qemu_time += profile_getclock() - ti;
1034 #endif
1035 if (use_icount) {
1036 /* Fold pending instructions back into the
1037 instruction counter, and clear the interrupt flag. */
1038 qemu_icount -= (env->icount_decr.u16.low
1039 + env->icount_extra);
1040 env->icount_decr.u32 = 0;
1041 env->icount_extra = 0;
1042 }
1043 return ret;
1044 }
1045
1046 bool cpu_exec_all(void)
1047 {
1048 int r;
1049
1050 if (next_cpu == NULL)
1051 next_cpu = first_cpu;
1052 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1053 CPUState *env = next_cpu;
1054
1055 qemu_clock_enable(vm_clock,
1056 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1057
1058 if (qemu_alarm_pending())
1059 break;
1060 if (cpu_can_run(env)) {
1061 r = qemu_cpu_exec(env);
1062 if (kvm_enabled()) {
1063 qemu_kvm_eat_signals(env);
1064 }
1065 if (r == EXCP_DEBUG) {
1066 break;
1067 }
1068 } else if (env->stop) {
1069 break;
1070 }
1071 }
1072 exit_request = 0;
1073 return any_cpu_has_work();
1074 }
1075
1076 void set_numa_modes(void)
1077 {
1078 CPUState *env;
1079 int i;
1080
1081 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1082 for (i = 0; i < nb_numa_nodes; i++) {
1083 if (node_cpumask[i] & (1 << env->cpu_index)) {
1084 env->numa_node = i;
1085 }
1086 }
1087 }
1088 }
1089
1090 void set_cpu_log(const char *optarg)
1091 {
1092 int mask;
1093 const CPULogItem *item;
1094
1095 mask = cpu_str_to_log_mask(optarg);
1096 if (!mask) {
1097 printf("Log items (comma separated):\n");
1098 for (item = cpu_log_items; item->mask != 0; item++) {
1099 printf("%-10s %s\n", item->name, item->help);
1100 }
1101 exit(1);
1102 }
1103 cpu_set_log(mask);
1104 }
1105
1106 /* Return the virtual CPU time, based on the instruction counter. */
1107 int64_t cpu_get_icount(void)
1108 {
1109 int64_t icount;
1110 CPUState *env = cpu_single_env;;
1111
1112 icount = qemu_icount;
1113 if (env) {
1114 if (!can_do_io(env)) {
1115 fprintf(stderr, "Bad clock read\n");
1116 }
1117 icount -= (env->icount_decr.u16.low + env->icount_extra);
1118 }
1119 return qemu_icount_bias + (icount << icount_time_shift);
1120 }
1121
1122 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1123 {
1124 /* XXX: implement xxx_cpu_list for targets that still miss it */
1125 #if defined(cpu_list_id)
1126 cpu_list_id(f, cpu_fprintf, optarg);
1127 #elif defined(cpu_list)
1128 cpu_list(f, cpu_fprintf); /* deprecated */
1129 #endif
1130 }