]> git.proxmox.com Git - mirror_qemu.git/blame - linux-user/i386/cpu_loop.c
Merge tag 'for_upstream' of https://git.kernel.org/pub/scm/virt/kvm/mst/qemu into...
[mirror_qemu.git] / linux-user / i386 / cpu_loop.c
CommitLineData
cd71c089
LV
1/*
2 * qemu user cpu loop
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "qemu.h"
cdf06ce5 22#include "qemu/timer.h"
3b249d26 23#include "user-internals.h"
cd71c089 24#include "cpu_loop-common.h"
2113aed6 25#include "signal-common.h"
5423e6d3 26#include "user-mmap.h"
cd71c089 27
3f8258c1
LV
28/***********************************************************/
29/* CPUX86 core interface */
30
31uint64_t cpu_get_tsc(CPUX86State *env)
32{
33 return cpu_get_host_ticks();
34}
35
36static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
37 int flags)
38{
39 unsigned int e1, e2;
40 uint32_t *p;
41 e1 = (addr << 16) | (limit & 0xffff);
42 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
43 e2 |= flags;
44 p = ptr;
45 p[0] = tswap32(e1);
46 p[1] = tswap32(e2);
47}
48
49static uint64_t *idt_table;
50#ifdef TARGET_X86_64
51static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
52 uint64_t addr, unsigned int sel)
53{
54 uint32_t *p, e1, e2;
55 e1 = (addr & 0xffff) | (sel << 16);
56 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
57 p = ptr;
58 p[0] = tswap32(e1);
59 p[1] = tswap32(e2);
60 p[2] = tswap32(addr >> 32);
61 p[3] = 0;
62}
63/* only dpl matters as we do only user space emulation */
64static void set_idt(int n, unsigned int dpl)
65{
66 set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
67}
68#else
69static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
70 uint32_t addr, unsigned int sel)
71{
72 uint32_t *p, e1, e2;
73 e1 = (addr & 0xffff) | (sel << 16);
74 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
75 p = ptr;
76 p[0] = tswap32(e1);
77 p[1] = tswap32(e2);
78}
79
80/* only dpl matters as we do only user space emulation */
81static void set_idt(int n, unsigned int dpl)
82{
83 set_gate(idt_table + n, 0, dpl, 0, 0);
84}
85#endif
86
b26491b4
RH
87#ifdef TARGET_X86_64
88static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
89{
90 /*
91 * For all the vsyscalls, NULL means "don't write anything" not
92 * "write it at address 0".
93 */
c7169b02 94 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
b26491b4
RH
95 return true;
96 }
97
98 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
d08d6d66 99 force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
b26491b4
RH
100 return false;
101}
102
103/*
104 * Since v3.1, the kernel traps and emulates the vsyscall page.
105 * Entry points other than the official generate SIGSEGV.
106 */
107static void emulate_vsyscall(CPUX86State *env)
108{
109 int syscall;
110 abi_ulong ret;
111 uint64_t caller;
112
113 /*
114 * Validate the entry point. We have already validated the page
115 * during translation to get here; now verify the offset.
116 */
117 switch (env->eip & ~TARGET_PAGE_MASK) {
118 case 0x000:
119 syscall = TARGET_NR_gettimeofday;
120 break;
121 case 0x400:
122 syscall = TARGET_NR_time;
123 break;
124 case 0x800:
125 syscall = TARGET_NR_getcpu;
126 break;
127 default:
128 goto sigsegv;
129 }
130
131 /*
132 * Validate the return address.
133 * Note that the kernel treats this the same as an invalid entry point.
134 */
135 if (get_user_u64(caller, env->regs[R_ESP])) {
136 goto sigsegv;
137 }
138
139 /*
7a21bee2 140 * Validate the pointer arguments.
b26491b4
RH
141 */
142 switch (syscall) {
143 case TARGET_NR_gettimeofday:
144 if (!write_ok_or_segv(env, env->regs[R_EDI],
145 sizeof(struct target_timeval)) ||
146 !write_ok_or_segv(env, env->regs[R_ESI],
147 sizeof(struct target_timezone))) {
148 return;
149 }
150 break;
151 case TARGET_NR_time:
152 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
153 return;
154 }
155 break;
156 case TARGET_NR_getcpu:
157 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
158 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
159 return;
160 }
161 break;
162 default:
163 g_assert_not_reached();
164 }
165
166 /*
167 * Perform the syscall. None of the vsyscalls should need restarting.
168 */
169 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
170 env->regs[R_EDX], env->regs[10], env->regs[8],
171 env->regs[9], 0, 0);
af254a27 172 g_assert(ret != -QEMU_ERESTARTSYS);
57a0c938 173 g_assert(ret != -QEMU_ESIGRETURN);
b26491b4
RH
174 if (ret == -TARGET_EFAULT) {
175 goto sigsegv;
176 }
177 env->regs[R_EAX] = ret;
178
179 /* Emulate a ret instruction to leave the vsyscall page. */
180 env->eip = caller;
181 env->regs[R_ESP] += 8;
182 return;
183
184 sigsegv:
d08d6d66 185 force_sig(TARGET_SIGSEGV);
b26491b4
RH
186}
187#endif
188
1ade5b2f
RH
189static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
190{
191#ifndef TARGET_X86_64
192 if (env->eflags & VM_MASK) {
193 handle_vm86_trap(env, trapnr);
194 return true;
195 }
196#endif
197 return false;
198}
199
3f8258c1
LV
200void cpu_loop(CPUX86State *env)
201{
6aa9e42f 202 CPUState *cs = env_cpu(env);
3f8258c1 203 int trapnr;
3f8258c1 204 abi_ulong ret;
3f8258c1
LV
205
206 for(;;) {
207 cpu_exec_start(cs);
208 trapnr = cpu_exec(cs);
209 cpu_exec_end(cs);
210 process_queued_cpu_work(cs);
211
212 switch(trapnr) {
213 case 0x80:
63fd8ef0
PB
214#ifndef TARGET_X86_64
215 case EXCP_SYSCALL:
216#endif
3f8258c1
LV
217 /* linux syscall from int $0x80 */
218 ret = do_syscall(env,
219 env->regs[R_EAX],
220 env->regs[R_EBX],
221 env->regs[R_ECX],
222 env->regs[R_EDX],
223 env->regs[R_ESI],
224 env->regs[R_EDI],
225 env->regs[R_EBP],
226 0, 0);
af254a27 227 if (ret == -QEMU_ERESTARTSYS) {
3f8258c1 228 env->eip -= 2;
57a0c938 229 } else if (ret != -QEMU_ESIGRETURN) {
3f8258c1
LV
230 env->regs[R_EAX] = ret;
231 }
232 break;
63fd8ef0 233#ifdef TARGET_X86_64
3f8258c1 234 case EXCP_SYSCALL:
63fd8ef0 235 /* linux syscall from syscall instruction. */
3f8258c1
LV
236 ret = do_syscall(env,
237 env->regs[R_EAX],
238 env->regs[R_EDI],
239 env->regs[R_ESI],
240 env->regs[R_EDX],
241 env->regs[10],
242 env->regs[8],
243 env->regs[9],
244 0, 0);
af254a27 245 if (ret == -QEMU_ERESTARTSYS) {
3f8258c1 246 env->eip -= 2;
57a0c938 247 } else if (ret != -QEMU_ESIGRETURN) {
3f8258c1
LV
248 env->regs[R_EAX] = ret;
249 }
250 break;
b26491b4
RH
251 case EXCP_VSYSCALL:
252 emulate_vsyscall(env);
253 break;
3f8258c1
LV
254#endif
255 case EXCP0B_NOSEG:
256 case EXCP0C_STACK:
d08d6d66 257 force_sig(TARGET_SIGBUS);
3f8258c1
LV
258 break;
259 case EXCP0D_GPF:
260 /* XXX: potential problem if ABI32 */
1ade5b2f 261 if (maybe_handle_vm86_trap(env, trapnr)) {
acf768a9 262 break;
3f8258c1 263 }
d08d6d66 264 force_sig(TARGET_SIGSEGV);
3f8258c1
LV
265 break;
266 case EXCP0E_PAGE:
d08d6d66
RH
267 force_sig_fault(TARGET_SIGSEGV,
268 (env->error_code & PG_ERROR_P_MASK ?
269 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
270 env->cr[2]);
3f8258c1
LV
271 break;
272 case EXCP00_DIVZ:
1ade5b2f 273 if (maybe_handle_vm86_trap(env, trapnr)) {
acf768a9 274 break;
3f8258c1 275 }
d08d6d66 276 force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
3f8258c1
LV
277 break;
278 case EXCP01_DB:
1ade5b2f 279 if (maybe_handle_vm86_trap(env, trapnr)) {
acf768a9
RH
280 break;
281 }
d08d6d66
RH
282 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
283 break;
284 case EXCP03_INT3:
285 if (maybe_handle_vm86_trap(env, trapnr)) {
286 break;
3f8258c1 287 }
d08d6d66 288 force_sig(TARGET_SIGTRAP);
3f8258c1
LV
289 break;
290 case EXCP04_INTO:
291 case EXCP05_BOUND:
1ade5b2f 292 if (maybe_handle_vm86_trap(env, trapnr)) {
acf768a9 293 break;
3f8258c1 294 }
d08d6d66 295 force_sig(TARGET_SIGSEGV);
3f8258c1
LV
296 break;
297 case EXCP06_ILLOP:
d08d6d66 298 force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
3f8258c1
LV
299 break;
300 case EXCP_INTERRUPT:
301 /* just indicate that signals should be handled asap */
302 break;
303 case EXCP_DEBUG:
d08d6d66 304 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
3f8258c1
LV
305 break;
306 case EXCP_ATOMIC:
307 cpu_exec_step_atomic(cs);
308 break;
309 default:
bd5ccd61
HD
310 EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
311 trapnr);
3f8258c1
LV
312 abort();
313 }
314 process_pending_signals(env);
315 }
316}
317
2732c739 318static void target_cpu_free(void *obj)
319{
320 CPUArchState *env = ((CPUState *)obj)->env_ptr;
321 target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES);
322 g_free(obj);
323}
324
cd71c089
LV
325void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
326{
2732c739 327 CPUState *cpu = env_cpu(env);
328 OBJECT(cpu)->free = target_cpu_free;
3f8258c1
LV
329 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
330 env->hflags |= HF_PE_MASK | HF_CPL_MASK;
331 if (env->features[FEAT_1_EDX] & CPUID_SSE) {
332 env->cr[4] |= CR4_OSFXSR_MASK;
333 env->hflags |= HF_OSFXSR_MASK;
334 }
335#ifndef TARGET_ABI32
336 /* enable 64 bit mode if possible */
337 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
338 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
339 exit(EXIT_FAILURE);
340 }
341 env->cr[4] |= CR4_PAE_MASK;
342 env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
343 env->hflags |= HF_LMA_MASK;
344#endif
345
346 /* flags setup : we activate the IRQs by default as in user mode */
347 env->eflags |= IF_MASK;
348
349 /* linux register setup */
350#ifndef TARGET_ABI32
351 env->regs[R_EAX] = regs->rax;
352 env->regs[R_EBX] = regs->rbx;
353 env->regs[R_ECX] = regs->rcx;
354 env->regs[R_EDX] = regs->rdx;
355 env->regs[R_ESI] = regs->rsi;
356 env->regs[R_EDI] = regs->rdi;
357 env->regs[R_EBP] = regs->rbp;
358 env->regs[R_ESP] = regs->rsp;
359 env->eip = regs->rip;
360#else
361 env->regs[R_EAX] = regs->eax;
362 env->regs[R_EBX] = regs->ebx;
363 env->regs[R_ECX] = regs->ecx;
364 env->regs[R_EDX] = regs->edx;
365 env->regs[R_ESI] = regs->esi;
366 env->regs[R_EDI] = regs->edi;
367 env->regs[R_EBP] = regs->ebp;
368 env->regs[R_ESP] = regs->esp;
369 env->eip = regs->eip;
370#endif
371
372 /* linux interrupt setup */
373#ifndef TARGET_ABI32
374 env->idt.limit = 511;
375#else
376 env->idt.limit = 255;
377#endif
378 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
379 PROT_READ|PROT_WRITE,
380 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
3e8f1628 381 idt_table = g2h_untagged(env->idt.base);
3f8258c1
LV
382 set_idt(0, 0);
383 set_idt(1, 0);
384 set_idt(2, 0);
385 set_idt(3, 3);
386 set_idt(4, 3);
387 set_idt(5, 0);
388 set_idt(6, 0);
389 set_idt(7, 0);
390 set_idt(8, 0);
391 set_idt(9, 0);
392 set_idt(10, 0);
393 set_idt(11, 0);
394 set_idt(12, 0);
395 set_idt(13, 0);
396 set_idt(14, 0);
397 set_idt(15, 0);
398 set_idt(16, 0);
399 set_idt(17, 0);
400 set_idt(18, 0);
401 set_idt(19, 0);
402 set_idt(0x80, 3);
403
404 /* linux segment setup */
405 {
406 uint64_t *gdt_table;
407 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
408 PROT_READ|PROT_WRITE,
409 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
410 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
3e8f1628 411 gdt_table = g2h_untagged(env->gdt.base);
3f8258c1
LV
412#ifdef TARGET_ABI32
413 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
414 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
415 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
416#else
417 /* 64 bit code segment */
418 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
419 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
420 DESC_L_MASK |
421 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
422#endif
423 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
424 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
425 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
426 }
427 cpu_x86_load_seg(env, R_CS, __USER_CS);
428 cpu_x86_load_seg(env, R_SS, __USER_DS);
429#ifdef TARGET_ABI32
430 cpu_x86_load_seg(env, R_DS, __USER_DS);
431 cpu_x86_load_seg(env, R_ES, __USER_DS);
432 cpu_x86_load_seg(env, R_FS, __USER_DS);
433 cpu_x86_load_seg(env, R_GS, __USER_DS);
434 /* This hack makes Wine work... */
435 env->segs[R_FS].selector = 0;
436#else
437 cpu_x86_load_seg(env, R_DS, 0);
438 cpu_x86_load_seg(env, R_ES, 0);
439 cpu_x86_load_seg(env, R_FS, 0);
440 cpu_x86_load_seg(env, R_GS, 0);
441#endif
cd71c089 442}