]>
Commit | Line | Data |
---|---|---|
a57e0c36 PM |
1 | /* |
2 | * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
16 | */ | |
17 | ||
18 | #ifndef LINUX_USER_SAFE_SYSCALL_H | |
19 | #define LINUX_USER_SAFE_SYSCALL_H | |
20 | ||
21 | /** | |
22 | * safe_syscall: | |
23 | * @int number: number of system call to make | |
24 | * ...: arguments to the system call | |
25 | * | |
26 | * Call a system call if guest signal not pending. | |
27 | * This has the same API as the libc syscall() function, except that it | |
28 | * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending. | |
29 | * | |
30 | * Returns: the system call result, or -1 with an error code in errno | |
31 | * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing | |
32 | * with any of the host errno values.) | |
33 | */ | |
34 | ||
35 | /* | |
36 | * A guide to using safe_syscall() to handle interactions between guest | |
37 | * syscalls and guest signals: | |
38 | * | |
39 | * Guest syscalls come in two flavours: | |
40 | * | |
41 | * (1) Non-interruptible syscalls | |
42 | * | |
43 | * These are guest syscalls that never get interrupted by signals and | |
44 | * so never return EINTR. They can be implemented straightforwardly in | |
45 | * QEMU: just make sure that if the implementation code has to make any | |
46 | * blocking calls that those calls are retried if they return EINTR. | |
47 | * It's also OK to implement these with safe_syscall, though it will be | |
48 | * a little less efficient if a signal is delivered at the 'wrong' moment. | |
49 | * | |
50 | * Some non-interruptible syscalls need to be handled using block_signals() | |
51 | * to block signals for the duration of the syscall. This mainly applies | |
52 | * to code which needs to modify the data structures used by the | |
53 | * host_signal_handler() function and the functions it calls, including | |
54 | * all syscalls which change the thread's signal mask. | |
55 | * | |
56 | * (2) Interruptible syscalls | |
57 | * | |
58 | * These are guest syscalls that can be interrupted by signals and | |
59 | * for which we need to either return EINTR or arrange for the guest | |
60 | * syscall to be restarted. This category includes both syscalls which | |
61 | * always restart (and in the kernel return -ERESTARTNOINTR), ones | |
62 | * which only restart if there is no handler (kernel returns -ERESTARTNOHAND | |
63 | * or -ERESTART_RESTARTBLOCK), and the most common kind which restart | |
64 | * if the handler was registered with SA_RESTART (kernel returns | |
65 | * -ERESTARTSYS). System calls which are only interruptible in some | |
66 | * situations (like 'open') also need to be handled this way. | |
67 | * | |
68 | * Here it is important that the host syscall is made | |
69 | * via this safe_syscall() function, and *not* via the host libc. | |
70 | * If the host libc is used then the implementation will appear to work | |
71 | * most of the time, but there will be a race condition where a | |
72 | * signal could arrive just before we make the host syscall inside libc, | |
73 | * and then then guest syscall will not correctly be interrupted. | |
74 | * Instead the implementation of the guest syscall can use the safe_syscall | |
75 | * function but otherwise just return the result or errno in the usual | |
76 | * way; the main loop code will take care of restarting the syscall | |
77 | * if appropriate. | |
78 | * | |
79 | * (If the implementation needs to make multiple host syscalls this is | |
80 | * OK; any which might really block must be via safe_syscall(); for those | |
81 | * which are only technically blocking (ie which we know in practice won't | |
82 | * stay in the host kernel indefinitely) it's OK to use libc if necessary. | |
83 | * You must be able to cope with backing out correctly if some safe_syscall | |
84 | * you make in the implementation returns either -TARGET_ERESTARTSYS or | |
85 | * EINTR though.) | |
86 | * | |
87 | * block_signals() cannot be used for interruptible syscalls. | |
88 | * | |
89 | * | |
90 | * How and why the safe_syscall implementation works: | |
91 | * | |
92 | * The basic setup is that we make the host syscall via a known | |
93 | * section of host native assembly. If a signal occurs, our signal | |
94 | * handler checks the interrupted host PC against the addresse of that | |
95 | * known section. If the PC is before or at the address of the syscall | |
96 | * instruction then we change the PC to point at a "return | |
97 | * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler | |
98 | * (causing the safe_syscall() call to immediately return that value). | |
99 | * Then in the main.c loop if we see this magic return value we adjust | |
100 | * the guest PC to wind it back to before the system call, and invoke | |
101 | * the guest signal handler as usual. | |
102 | * | |
103 | * This winding-back will happen in two cases: | |
104 | * (1) signal came in just before we took the host syscall (a race); | |
105 | * in this case we'll take the guest signal and have another go | |
106 | * at the syscall afterwards, and this is indistinguishable for the | |
107 | * guest from the timing having been different such that the guest | |
108 | * signal really did win the race | |
109 | * (2) signal came in while the host syscall was blocking, and the | |
110 | * host kernel decided the syscall should be restarted; | |
111 | * in this case we want to restart the guest syscall also, and so | |
112 | * rewinding is the right thing. (Note that "restart" semantics mean | |
113 | * "first call the signal handler, then reattempt the syscall".) | |
114 | * The other situation to consider is when a signal came in while the | |
115 | * host syscall was blocking, and the host kernel decided that the syscall | |
116 | * should not be restarted; in this case QEMU's host signal handler will | |
117 | * be invoked with the PC pointing just after the syscall instruction, | |
118 | * with registers indicating an EINTR return; the special code in the | |
119 | * handler will not kick in, and we will return EINTR to the guest as | |
120 | * we should. | |
121 | * | |
122 | * Notice that we can leave the host kernel to make the decision for | |
123 | * us about whether to do a restart of the syscall or not; we do not | |
124 | * need to check SA_RESTART flags in QEMU or distinguish the various | |
125 | * kinds of restartability. | |
126 | */ | |
127 | #ifdef HAVE_SAFE_SYSCALL | |
128 | /* The core part of this function is implemented in assembly */ | |
129 | extern long safe_syscall_base(int *pending, long number, ...); | |
130 | ||
131 | #define safe_syscall(...) \ | |
132 | ({ \ | |
133 | long ret_; \ | |
134 | int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \ | |
135 | ret_ = safe_syscall_base(psp_, __VA_ARGS__); \ | |
136 | if (is_error(ret_)) { \ | |
137 | errno = -ret_; \ | |
138 | ret_ = -1; \ | |
139 | } \ | |
140 | ret_; \ | |
141 | }) | |
142 | ||
143 | #else | |
144 | ||
145 | /* | |
146 | * Fallback for architectures which don't yet provide a safe-syscall assembly | |
147 | * fragment; note that this is racy! | |
148 | * This should go away when all host architectures have been updated. | |
149 | */ | |
150 | #define safe_syscall syscall | |
151 | ||
152 | #endif | |
153 | ||
154 | #endif |