]> git.proxmox.com Git - mirror_qemu.git/blob - linux-user/i386/cpu_loop.c
vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present
[mirror_qemu.git] / linux-user / i386 / cpu_loop.c
1 /*
2 * qemu user cpu loop
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "qemu.h"
22 #include "qemu/timer.h"
23 #include "user-internals.h"
24 #include "cpu_loop-common.h"
25 #include "signal-common.h"
26 #include "user-mmap.h"
27
28 /***********************************************************/
29 /* CPUX86 core interface */
30
31 uint64_t cpu_get_tsc(CPUX86State *env)
32 {
33 return cpu_get_host_ticks();
34 }
35
36 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
37 int flags)
38 {
39 unsigned int e1, e2;
40 uint32_t *p;
41 e1 = (addr << 16) | (limit & 0xffff);
42 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
43 e2 |= flags;
44 p = ptr;
45 p[0] = tswap32(e1);
46 p[1] = tswap32(e2);
47 }
48
49 static uint64_t *idt_table;
50 #ifdef TARGET_X86_64
51 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
52 uint64_t addr, unsigned int sel)
53 {
54 uint32_t *p, e1, e2;
55 e1 = (addr & 0xffff) | (sel << 16);
56 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
57 p = ptr;
58 p[0] = tswap32(e1);
59 p[1] = tswap32(e2);
60 p[2] = tswap32(addr >> 32);
61 p[3] = 0;
62 }
63 /* only dpl matters as we do only user space emulation */
64 static void set_idt(int n, unsigned int dpl)
65 {
66 set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
67 }
68 #else
69 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
70 uint32_t addr, unsigned int sel)
71 {
72 uint32_t *p, e1, e2;
73 e1 = (addr & 0xffff) | (sel << 16);
74 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
75 p = ptr;
76 p[0] = tswap32(e1);
77 p[1] = tswap32(e2);
78 }
79
80 /* only dpl matters as we do only user space emulation */
81 static void set_idt(int n, unsigned int dpl)
82 {
83 set_gate(idt_table + n, 0, dpl, 0, 0);
84 }
85 #endif
86
87 #ifdef TARGET_X86_64
88 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
89 {
90 /*
91 * For all the vsyscalls, NULL means "don't write anything" not
92 * "write it at address 0".
93 */
94 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
95 return true;
96 }
97
98 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
99 force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
100 return false;
101 }
102
103 /*
104 * Since v3.1, the kernel traps and emulates the vsyscall page.
105 * Entry points other than the official generate SIGSEGV.
106 */
107 static void emulate_vsyscall(CPUX86State *env)
108 {
109 int syscall;
110 abi_ulong ret;
111 uint64_t caller;
112
113 /*
114 * Validate the entry point. We have already validated the page
115 * during translation to get here; now verify the offset.
116 */
117 switch (env->eip & ~TARGET_PAGE_MASK) {
118 case 0x000:
119 syscall = TARGET_NR_gettimeofday;
120 break;
121 case 0x400:
122 syscall = TARGET_NR_time;
123 break;
124 case 0x800:
125 syscall = TARGET_NR_getcpu;
126 break;
127 default:
128 goto sigsegv;
129 }
130
131 /*
132 * Validate the return address.
133 * Note that the kernel treats this the same as an invalid entry point.
134 */
135 if (get_user_u64(caller, env->regs[R_ESP])) {
136 goto sigsegv;
137 }
138
139 /*
140 * Validate the pointer arguments.
141 */
142 switch (syscall) {
143 case TARGET_NR_gettimeofday:
144 if (!write_ok_or_segv(env, env->regs[R_EDI],
145 sizeof(struct target_timeval)) ||
146 !write_ok_or_segv(env, env->regs[R_ESI],
147 sizeof(struct target_timezone))) {
148 return;
149 }
150 break;
151 case TARGET_NR_time:
152 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
153 return;
154 }
155 break;
156 case TARGET_NR_getcpu:
157 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
158 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
159 return;
160 }
161 break;
162 default:
163 g_assert_not_reached();
164 }
165
166 /*
167 * Perform the syscall. None of the vsyscalls should need restarting.
168 */
169 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
170 env->regs[R_EDX], env->regs[10], env->regs[8],
171 env->regs[9], 0, 0);
172 g_assert(ret != -QEMU_ERESTARTSYS);
173 g_assert(ret != -QEMU_ESIGRETURN);
174 if (ret == -TARGET_EFAULT) {
175 goto sigsegv;
176 }
177 env->regs[R_EAX] = ret;
178
179 /* Emulate a ret instruction to leave the vsyscall page. */
180 env->eip = caller;
181 env->regs[R_ESP] += 8;
182 return;
183
184 sigsegv:
185 force_sig(TARGET_SIGSEGV);
186 }
187 #endif
188
189 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
190 {
191 #ifndef TARGET_X86_64
192 if (env->eflags & VM_MASK) {
193 handle_vm86_trap(env, trapnr);
194 return true;
195 }
196 #endif
197 return false;
198 }
199
200 void cpu_loop(CPUX86State *env)
201 {
202 CPUState *cs = env_cpu(env);
203 int trapnr;
204 abi_ulong ret;
205
206 for(;;) {
207 cpu_exec_start(cs);
208 trapnr = cpu_exec(cs);
209 cpu_exec_end(cs);
210 process_queued_cpu_work(cs);
211
212 switch(trapnr) {
213 case 0x80:
214 /* linux syscall from int $0x80 */
215 ret = do_syscall(env,
216 env->regs[R_EAX],
217 env->regs[R_EBX],
218 env->regs[R_ECX],
219 env->regs[R_EDX],
220 env->regs[R_ESI],
221 env->regs[R_EDI],
222 env->regs[R_EBP],
223 0, 0);
224 if (ret == -QEMU_ERESTARTSYS) {
225 env->eip -= 2;
226 } else if (ret != -QEMU_ESIGRETURN) {
227 env->regs[R_EAX] = ret;
228 }
229 break;
230 #ifndef TARGET_ABI32
231 case EXCP_SYSCALL:
232 /* linux syscall from syscall instruction */
233 ret = do_syscall(env,
234 env->regs[R_EAX],
235 env->regs[R_EDI],
236 env->regs[R_ESI],
237 env->regs[R_EDX],
238 env->regs[10],
239 env->regs[8],
240 env->regs[9],
241 0, 0);
242 if (ret == -QEMU_ERESTARTSYS) {
243 env->eip -= 2;
244 } else if (ret != -QEMU_ESIGRETURN) {
245 env->regs[R_EAX] = ret;
246 }
247 break;
248 #endif
249 #ifdef TARGET_X86_64
250 case EXCP_VSYSCALL:
251 emulate_vsyscall(env);
252 break;
253 #endif
254 case EXCP0B_NOSEG:
255 case EXCP0C_STACK:
256 force_sig(TARGET_SIGBUS);
257 break;
258 case EXCP0D_GPF:
259 /* XXX: potential problem if ABI32 */
260 if (maybe_handle_vm86_trap(env, trapnr)) {
261 break;
262 }
263 force_sig(TARGET_SIGSEGV);
264 break;
265 case EXCP0E_PAGE:
266 force_sig_fault(TARGET_SIGSEGV,
267 (env->error_code & PG_ERROR_P_MASK ?
268 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
269 env->cr[2]);
270 break;
271 case EXCP00_DIVZ:
272 if (maybe_handle_vm86_trap(env, trapnr)) {
273 break;
274 }
275 force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
276 break;
277 case EXCP01_DB:
278 if (maybe_handle_vm86_trap(env, trapnr)) {
279 break;
280 }
281 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
282 break;
283 case EXCP03_INT3:
284 if (maybe_handle_vm86_trap(env, trapnr)) {
285 break;
286 }
287 force_sig(TARGET_SIGTRAP);
288 break;
289 case EXCP04_INTO:
290 case EXCP05_BOUND:
291 if (maybe_handle_vm86_trap(env, trapnr)) {
292 break;
293 }
294 force_sig(TARGET_SIGSEGV);
295 break;
296 case EXCP06_ILLOP:
297 force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
298 break;
299 case EXCP_INTERRUPT:
300 /* just indicate that signals should be handled asap */
301 break;
302 case EXCP_DEBUG:
303 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
304 break;
305 case EXCP_ATOMIC:
306 cpu_exec_step_atomic(cs);
307 break;
308 default:
309 EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
310 trapnr);
311 abort();
312 }
313 process_pending_signals(env);
314 }
315 }
316
317 static void target_cpu_free(void *obj)
318 {
319 CPUArchState *env = ((CPUState *)obj)->env_ptr;
320 target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES);
321 g_free(obj);
322 }
323
324 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
325 {
326 CPUState *cpu = env_cpu(env);
327 OBJECT(cpu)->free = target_cpu_free;
328 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
329 env->hflags |= HF_PE_MASK | HF_CPL_MASK;
330 if (env->features[FEAT_1_EDX] & CPUID_SSE) {
331 env->cr[4] |= CR4_OSFXSR_MASK;
332 env->hflags |= HF_OSFXSR_MASK;
333 }
334 #ifndef TARGET_ABI32
335 /* enable 64 bit mode if possible */
336 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
337 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
338 exit(EXIT_FAILURE);
339 }
340 env->cr[4] |= CR4_PAE_MASK;
341 env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
342 env->hflags |= HF_LMA_MASK;
343 #endif
344
345 /* flags setup : we activate the IRQs by default as in user mode */
346 env->eflags |= IF_MASK;
347
348 /* linux register setup */
349 #ifndef TARGET_ABI32
350 env->regs[R_EAX] = regs->rax;
351 env->regs[R_EBX] = regs->rbx;
352 env->regs[R_ECX] = regs->rcx;
353 env->regs[R_EDX] = regs->rdx;
354 env->regs[R_ESI] = regs->rsi;
355 env->regs[R_EDI] = regs->rdi;
356 env->regs[R_EBP] = regs->rbp;
357 env->regs[R_ESP] = regs->rsp;
358 env->eip = regs->rip;
359 #else
360 env->regs[R_EAX] = regs->eax;
361 env->regs[R_EBX] = regs->ebx;
362 env->regs[R_ECX] = regs->ecx;
363 env->regs[R_EDX] = regs->edx;
364 env->regs[R_ESI] = regs->esi;
365 env->regs[R_EDI] = regs->edi;
366 env->regs[R_EBP] = regs->ebp;
367 env->regs[R_ESP] = regs->esp;
368 env->eip = regs->eip;
369 #endif
370
371 /* linux interrupt setup */
372 #ifndef TARGET_ABI32
373 env->idt.limit = 511;
374 #else
375 env->idt.limit = 255;
376 #endif
377 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
378 PROT_READ|PROT_WRITE,
379 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
380 idt_table = g2h_untagged(env->idt.base);
381 set_idt(0, 0);
382 set_idt(1, 0);
383 set_idt(2, 0);
384 set_idt(3, 3);
385 set_idt(4, 3);
386 set_idt(5, 0);
387 set_idt(6, 0);
388 set_idt(7, 0);
389 set_idt(8, 0);
390 set_idt(9, 0);
391 set_idt(10, 0);
392 set_idt(11, 0);
393 set_idt(12, 0);
394 set_idt(13, 0);
395 set_idt(14, 0);
396 set_idt(15, 0);
397 set_idt(16, 0);
398 set_idt(17, 0);
399 set_idt(18, 0);
400 set_idt(19, 0);
401 set_idt(0x80, 3);
402
403 /* linux segment setup */
404 {
405 uint64_t *gdt_table;
406 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
407 PROT_READ|PROT_WRITE,
408 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
409 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
410 gdt_table = g2h_untagged(env->gdt.base);
411 #ifdef TARGET_ABI32
412 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
413 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
414 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
415 #else
416 /* 64 bit code segment */
417 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
418 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
419 DESC_L_MASK |
420 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
421 #endif
422 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
423 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
424 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
425 }
426 cpu_x86_load_seg(env, R_CS, __USER_CS);
427 cpu_x86_load_seg(env, R_SS, __USER_DS);
428 #ifdef TARGET_ABI32
429 cpu_x86_load_seg(env, R_DS, __USER_DS);
430 cpu_x86_load_seg(env, R_ES, __USER_DS);
431 cpu_x86_load_seg(env, R_FS, __USER_DS);
432 cpu_x86_load_seg(env, R_GS, __USER_DS);
433 /* This hack makes Wine work... */
434 env->segs[R_FS].selector = 0;
435 #else
436 cpu_x86_load_seg(env, R_DS, 0);
437 cpu_x86_load_seg(env, R_ES, 0);
438 cpu_x86_load_seg(env, R_FS, 0);
439 cpu_x86_load_seg(env, R_GS, 0);
440 #endif
441 }