]> git.proxmox.com Git - qemu.git/blame - kqemu.c
Avoid compiler warning
[qemu.git] / kqemu.c
CommitLineData
9df217a3
FB
1/*
2 * KQEMU support
5fafdf24 3 *
da260249 4 * Copyright (c) 2005-2008 Fabrice Bellard
9df217a3
FB
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include "config.h"
21#ifdef _WIN32
4fddf62a 22#define WIN32_LEAN_AND_MEAN
9df217a3 23#include <windows.h>
6e4255f6 24#include <winioctl.h>
9df217a3
FB
25#else
26#include <sys/types.h>
27#include <sys/mman.h>
6e4255f6 28#include <sys/ioctl.h>
9df217a3 29#endif
605686cd 30#ifdef HOST_SOLARIS
aafd8139 31#include <sys/ioccom.h>
605686cd 32#endif
9df217a3
FB
33#include <stdlib.h>
34#include <stdio.h>
35#include <stdarg.h>
36#include <string.h>
37#include <errno.h>
38#include <unistd.h>
39#include <inttypes.h>
40
41#include "cpu.h"
42#include "exec-all.h"
ca10f867 43#include "qemu-common.h"
9df217a3
FB
44
45#ifdef USE_KQEMU
46
47#define DEBUG
aa062973 48//#define PROFILE
9df217a3
FB
49
50#include <unistd.h>
51#include <fcntl.h>
b88a3832 52#include "kqemu.h"
9df217a3 53
6e4255f6
FB
54#ifdef _WIN32
55#define KQEMU_DEVICE "\\\\.\\kqemu"
56#else
9df217a3 57#define KQEMU_DEVICE "/dev/kqemu"
6e4255f6
FB
58#endif
59
da260249
FB
60static void qpi_init(void);
61
6e4255f6
FB
62#ifdef _WIN32
63#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
64HANDLE kqemu_fd = KQEMU_INVALID_FD;
65#define kqemu_closefd(x) CloseHandle(x)
66#else
67#define KQEMU_INVALID_FD -1
68int kqemu_fd = KQEMU_INVALID_FD;
69#define kqemu_closefd(x) close(x)
70#endif
9df217a3 71
f32fc648
FB
72/* 0 = not allowed
73 1 = user kqemu
74 2 = kernel kqemu
75*/
9df217a3 76int kqemu_allowed = 1;
da260249 77uint64_t *pages_to_flush;
9df217a3 78unsigned int nb_pages_to_flush;
da260249 79uint64_t *ram_pages_to_update;
aa062973 80unsigned int nb_ram_pages_to_update;
da260249 81uint64_t *modified_ram_pages;
f32fc648
FB
82unsigned int nb_modified_ram_pages;
83uint8_t *modified_ram_pages_table;
da260249
FB
84int qpi_io_memory;
85uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
9df217a3
FB
86
87#define cpuid(index, eax, ebx, ecx, edx) \
88 asm volatile ("cpuid" \
89 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
90 : "0" (index))
91
c28e951f
FB
92#ifdef __x86_64__
93static int is_cpuid_supported(void)
94{
95 return 1;
96}
97#else
9df217a3
FB
98static int is_cpuid_supported(void)
99{
100 int v0, v1;
101 asm volatile ("pushf\n"
102 "popl %0\n"
103 "movl %0, %1\n"
104 "xorl $0x00200000, %0\n"
105 "pushl %0\n"
106 "popf\n"
107 "pushf\n"
108 "popl %0\n"
109 : "=a" (v0), "=d" (v1)
110 :
111 : "cc");
112 return (v0 != v1);
113}
c28e951f 114#endif
9df217a3
FB
115
116static void kqemu_update_cpuid(CPUState *env)
117{
0de6bb73 118 int critical_features_mask, features, ext_features, ext_features_mask;
9df217a3
FB
119 uint32_t eax, ebx, ecx, edx;
120
121 /* the following features are kept identical on the host and
122 target cpus because they are important for user code. Strictly
123 speaking, only SSE really matters because the OS must support
124 it if the user code uses it. */
5fafdf24
TS
125 critical_features_mask =
126 CPUID_CMOV | CPUID_CX8 |
127 CPUID_FXSR | CPUID_MMX | CPUID_SSE |
ca0d1734 128 CPUID_SSE2 | CPUID_SEP;
0de6bb73 129 ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
9df217a3
FB
130 if (!is_cpuid_supported()) {
131 features = 0;
0de6bb73 132 ext_features = 0;
9df217a3
FB
133 } else {
134 cpuid(1, eax, ebx, ecx, edx);
135 features = edx;
0de6bb73 136 ext_features = ecx;
9df217a3 137 }
ca0d1734
FB
138#ifdef __x86_64__
139 /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
140 compatibility mode, so in order to have the best performances
141 it is better not to use it */
142 features &= ~CPUID_SEP;
143#endif
9df217a3
FB
144 env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
145 (features & critical_features_mask);
0de6bb73
FB
146 env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
147 (ext_features & ext_features_mask);
9df217a3
FB
148 /* XXX: we could update more of the target CPUID state so that the
149 non accelerated code sees exactly the same CPU features as the
150 accelerated code */
151}
152
153int kqemu_init(CPUState *env)
154{
da260249 155 struct kqemu_init kinit;
9df217a3 156 int ret, version;
6e4255f6
FB
157#ifdef _WIN32
158 DWORD temp;
159#endif
9df217a3
FB
160
161 if (!kqemu_allowed)
162 return -1;
163
6e4255f6
FB
164#ifdef _WIN32
165 kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
166 FILE_SHARE_READ | FILE_SHARE_WRITE,
167 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
168 NULL);
7fb2a862 169 if (kqemu_fd == KQEMU_INVALID_FD) {
170 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
171 KQEMU_DEVICE, GetLastError());
172 return -1;
173 }
6e4255f6 174#else
9df217a3 175 kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
6e4255f6 176 if (kqemu_fd == KQEMU_INVALID_FD) {
99c19686
TS
177 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
178 KQEMU_DEVICE, strerror(errno));
9df217a3
FB
179 return -1;
180 }
7fb2a862 181#endif
9df217a3 182 version = 0;
6e4255f6
FB
183#ifdef _WIN32
184 DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
185 &version, sizeof(version), &temp, NULL);
186#else
9df217a3 187 ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
6e4255f6 188#endif
9df217a3
FB
189 if (version != KQEMU_VERSION) {
190 fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
191 version, KQEMU_VERSION);
192 goto fail;
193 }
194
5fafdf24 195 pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
da260249 196 sizeof(uint64_t));
9df217a3
FB
197 if (!pages_to_flush)
198 goto fail;
199
5fafdf24 200 ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
da260249 201 sizeof(uint64_t));
aa062973
FB
202 if (!ram_pages_to_update)
203 goto fail;
204
5fafdf24 205 modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
da260249 206 sizeof(uint64_t));
f32fc648
FB
207 if (!modified_ram_pages)
208 goto fail;
209 modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
210 if (!modified_ram_pages_table)
211 goto fail;
212
da260249
FB
213 memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
214 kinit.ram_base = phys_ram_base;
215 kinit.ram_size = phys_ram_size;
216 kinit.ram_dirty = phys_ram_dirty;
217 kinit.pages_to_flush = pages_to_flush;
218 kinit.ram_pages_to_update = ram_pages_to_update;
219 kinit.modified_ram_pages = modified_ram_pages;
6e4255f6 220#ifdef _WIN32
da260249 221 ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
6e4255f6
FB
222 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
223#else
da260249 224 ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
6e4255f6 225#endif
9df217a3
FB
226 if (ret < 0) {
227 fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
228 fail:
6e4255f6
FB
229 kqemu_closefd(kqemu_fd);
230 kqemu_fd = KQEMU_INVALID_FD;
9df217a3
FB
231 return -1;
232 }
233 kqemu_update_cpuid(env);
f32fc648 234 env->kqemu_enabled = kqemu_allowed;
9df217a3 235 nb_pages_to_flush = 0;
aa062973 236 nb_ram_pages_to_update = 0;
da260249
FB
237
238 qpi_init();
9df217a3
FB
239 return 0;
240}
241
242void kqemu_flush_page(CPUState *env, target_ulong addr)
243{
f32fc648 244#if defined(DEBUG)
9df217a3
FB
245 if (loglevel & CPU_LOG_INT) {
246 fprintf(logfile, "kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
247 }
248#endif
249 if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
250 nb_pages_to_flush = KQEMU_FLUSH_ALL;
251 else
252 pages_to_flush[nb_pages_to_flush++] = addr;
253}
254
255void kqemu_flush(CPUState *env, int global)
256{
257#ifdef DEBUG
258 if (loglevel & CPU_LOG_INT) {
259 fprintf(logfile, "kqemu_flush:\n");
260 }
261#endif
262 nb_pages_to_flush = KQEMU_FLUSH_ALL;
263}
264
aa062973
FB
265void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
266{
267#ifdef DEBUG
268 if (loglevel & CPU_LOG_INT) {
da260249
FB
269 fprintf(logfile, "kqemu_set_notdirty: addr=%08lx\n",
270 (unsigned long)ram_addr);
aa062973
FB
271 }
272#endif
fc8dc060
FB
273 /* we only track transitions to dirty state */
274 if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
275 return;
aa062973
FB
276 if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
277 nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
278 else
279 ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
280}
281
f32fc648
FB
282static void kqemu_reset_modified_ram_pages(void)
283{
284 int i;
285 unsigned long page_index;
3b46e624 286
f32fc648
FB
287 for(i = 0; i < nb_modified_ram_pages; i++) {
288 page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
289 modified_ram_pages_table[page_index] = 0;
290 }
291 nb_modified_ram_pages = 0;
292}
293
294void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
295{
296 unsigned long page_index;
297 int ret;
298#ifdef _WIN32
299 DWORD temp;
300#endif
301
302 page_index = ram_addr >> TARGET_PAGE_BITS;
303 if (!modified_ram_pages_table[page_index]) {
304#if 0
305 printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
306#endif
307 modified_ram_pages_table[page_index] = 1;
308 modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
309 if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
310 /* flush */
311#ifdef _WIN32
5fafdf24
TS
312 ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
313 &nb_modified_ram_pages,
f32fc648
FB
314 sizeof(nb_modified_ram_pages),
315 NULL, 0, &temp, NULL);
316#else
5fafdf24 317 ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
f32fc648
FB
318 &nb_modified_ram_pages);
319#endif
320 kqemu_reset_modified_ram_pages();
321 }
322 }
323}
324
da260249
FB
325void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
326 ram_addr_t phys_offset)
327{
328 struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
329 uint64_t end;
330 int ret, io_index;
331
332 end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
333 start_addr &= TARGET_PAGE_MASK;
334 kphys_mem->phys_addr = start_addr;
335 kphys_mem->size = end - start_addr;
336 kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
337 io_index = phys_offset & ~TARGET_PAGE_MASK;
338 switch(io_index) {
339 case IO_MEM_RAM:
340 kphys_mem->io_index = KQEMU_IO_MEM_RAM;
341 break;
342 case IO_MEM_ROM:
343 kphys_mem->io_index = KQEMU_IO_MEM_ROM;
344 break;
345 default:
346 if (qpi_io_memory == io_index) {
347 kphys_mem->io_index = KQEMU_IO_MEM_COMM;
348 } else {
349 kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
350 }
351 break;
352 }
353#ifdef _WIN32
354 {
355 DWORD temp;
356 ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
357 kphys_mem, sizeof(*kphys_mem),
358 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
359 }
360#else
361 ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
362#endif
363 if (ret < 0) {
364 fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
365 ret, start_addr,
366 (unsigned long)size, (unsigned long)phys_offset);
367 }
368}
369
9df217a3
FB
370struct fpstate {
371 uint16_t fpuc;
372 uint16_t dummy1;
373 uint16_t fpus;
374 uint16_t dummy2;
375 uint16_t fptag;
376 uint16_t dummy3;
377
378 uint32_t fpip;
379 uint32_t fpcs;
380 uint32_t fpoo;
381 uint32_t fpos;
382 uint8_t fpregs1[8 * 10];
383};
384
385struct fpxstate {
386 uint16_t fpuc;
387 uint16_t fpus;
388 uint16_t fptag;
389 uint16_t fop;
390 uint32_t fpuip;
391 uint16_t cs_sel;
392 uint16_t dummy0;
393 uint32_t fpudp;
394 uint16_t ds_sel;
395 uint16_t dummy1;
396 uint32_t mxcsr;
397 uint32_t mxcsr_mask;
398 uint8_t fpregs1[8 * 16];
c28e951f
FB
399 uint8_t xmm_regs[16 * 16];
400 uint8_t dummy2[96];
9df217a3
FB
401};
402
403static struct fpxstate fpx1 __attribute__((aligned(16)));
404
405static void restore_native_fp_frstor(CPUState *env)
406{
407 int fptag, i, j;
408 struct fpstate fp1, *fp = &fp1;
3b46e624 409
9df217a3
FB
410 fp->fpuc = env->fpuc;
411 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
412 fptag = 0;
413 for (i=7; i>=0; i--) {
414 fptag <<= 2;
415 if (env->fptags[i]) {
416 fptag |= 3;
417 } else {
418 /* the FPU automatically computes it */
419 }
420 }
421 fp->fptag = fptag;
422 j = env->fpstt;
423 for(i = 0;i < 8; i++) {
424 memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
425 j = (j + 1) & 7;
426 }
427 asm volatile ("frstor %0" : "=m" (*fp));
428}
5fafdf24 429
9df217a3
FB
430static void save_native_fp_fsave(CPUState *env)
431{
432 int fptag, i, j;
433 uint16_t fpuc;
434 struct fpstate fp1, *fp = &fp1;
435
436 asm volatile ("fsave %0" : : "m" (*fp));
437 env->fpuc = fp->fpuc;
438 env->fpstt = (fp->fpus >> 11) & 7;
439 env->fpus = fp->fpus & ~0x3800;
440 fptag = fp->fptag;
441 for(i = 0;i < 8; i++) {
442 env->fptags[i] = ((fptag & 3) == 3);
443 fptag >>= 2;
444 }
445 j = env->fpstt;
446 for(i = 0;i < 8; i++) {
447 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
448 j = (j + 1) & 7;
449 }
450 /* we must restore the default rounding state */
451 fpuc = 0x037f | (env->fpuc & (3 << 10));
452 asm volatile("fldcw %0" : : "m" (fpuc));
453}
454
455static void restore_native_fp_fxrstor(CPUState *env)
456{
457 struct fpxstate *fp = &fpx1;
458 int i, j, fptag;
459
460 fp->fpuc = env->fpuc;
461 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
462 fptag = 0;
463 for(i = 0; i < 8; i++)
464 fptag |= (env->fptags[i] << i);
465 fp->fptag = fptag ^ 0xff;
466
467 j = env->fpstt;
468 for(i = 0;i < 8; i++) {
469 memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
470 j = (j + 1) & 7;
471 }
472 if (env->cpuid_features & CPUID_SSE) {
473 fp->mxcsr = env->mxcsr;
474 /* XXX: check if DAZ is not available */
475 fp->mxcsr_mask = 0xffff;
c28e951f 476 memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
477 }
478 asm volatile ("fxrstor %0" : "=m" (*fp));
479}
480
481static void save_native_fp_fxsave(CPUState *env)
482{
483 struct fpxstate *fp = &fpx1;
484 int fptag, i, j;
485 uint16_t fpuc;
486
487 asm volatile ("fxsave %0" : : "m" (*fp));
488 env->fpuc = fp->fpuc;
489 env->fpstt = (fp->fpus >> 11) & 7;
490 env->fpus = fp->fpus & ~0x3800;
491 fptag = fp->fptag ^ 0xff;
492 for(i = 0;i < 8; i++) {
493 env->fptags[i] = (fptag >> i) & 1;
494 }
495 j = env->fpstt;
496 for(i = 0;i < 8; i++) {
497 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
498 j = (j + 1) & 7;
499 }
500 if (env->cpuid_features & CPUID_SSE) {
501 env->mxcsr = fp->mxcsr;
c28e951f 502 memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
503 }
504
505 /* we must restore the default rounding state */
506 asm volatile ("fninit");
507 fpuc = 0x037f | (env->fpuc & (3 << 10));
508 asm volatile("fldcw %0" : : "m" (fpuc));
509}
510
c28e951f
FB
511static int do_syscall(CPUState *env,
512 struct kqemu_cpu_state *kenv)
513{
514 int selector;
3b46e624 515
c28e951f 516 selector = (env->star >> 32) & 0xffff;
da260249 517#ifdef TARGET_X86_64
c28e951f 518 if (env->hflags & HF_LMA_MASK) {
93eac243
FB
519 int code64;
520
c28e951f
FB
521 env->regs[R_ECX] = kenv->next_eip;
522 env->regs[11] = env->eflags;
523
93eac243
FB
524 code64 = env->hflags & HF_CS64_MASK;
525
c28e951f 526 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
527 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
528 0, 0xffffffff,
c4e27dd4 529 DESC_G_MASK | DESC_P_MASK |
c28e951f
FB
530 DESC_S_MASK |
531 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
5fafdf24 532 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
533 0, 0xffffffff,
534 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
535 DESC_S_MASK |
536 DESC_W_MASK | DESC_A_MASK);
537 env->eflags &= ~env->fmask;
93eac243 538 if (code64)
c28e951f
FB
539 env->eip = env->lstar;
540 else
541 env->eip = env->cstar;
5fafdf24 542 } else
c28e951f
FB
543#endif
544 {
545 env->regs[R_ECX] = (uint32_t)kenv->next_eip;
3b46e624 546
c28e951f 547 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
548 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
549 0, 0xffffffff,
c28e951f
FB
550 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
551 DESC_S_MASK |
552 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
5fafdf24 553 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
554 0, 0xffffffff,
555 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
556 DESC_S_MASK |
557 DESC_W_MASK | DESC_A_MASK);
558 env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
559 env->eip = (uint32_t)env->star;
560 }
561 return 2;
562}
563
f32fc648 564#ifdef CONFIG_PROFILER
aa062973
FB
565
566#define PC_REC_SIZE 1
567#define PC_REC_HASH_BITS 16
568#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
569
570typedef struct PCRecord {
571 unsigned long pc;
572 int64_t count;
573 struct PCRecord *next;
574} PCRecord;
575
f32fc648
FB
576static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
577static int nb_pc_records;
aa062973 578
f32fc648 579static void kqemu_record_pc(unsigned long pc)
aa062973
FB
580{
581 unsigned long h;
582 PCRecord **pr, *r;
583
584 h = pc / PC_REC_SIZE;
585 h = h ^ (h >> PC_REC_HASH_BITS);
586 h &= (PC_REC_HASH_SIZE - 1);
587 pr = &pc_rec_hash[h];
588 for(;;) {
589 r = *pr;
590 if (r == NULL)
591 break;
592 if (r->pc == pc) {
593 r->count++;
594 return;
595 }
596 pr = &r->next;
597 }
598 r = malloc(sizeof(PCRecord));
599 r->count = 1;
600 r->pc = pc;
601 r->next = NULL;
602 *pr = r;
603 nb_pc_records++;
604}
605
f32fc648 606static int pc_rec_cmp(const void *p1, const void *p2)
aa062973
FB
607{
608 PCRecord *r1 = *(PCRecord **)p1;
609 PCRecord *r2 = *(PCRecord **)p2;
610 if (r1->count < r2->count)
611 return 1;
612 else if (r1->count == r2->count)
613 return 0;
614 else
615 return -1;
616}
617
f32fc648
FB
618static void kqemu_record_flush(void)
619{
620 PCRecord *r, *r_next;
621 int h;
622
623 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
624 for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
625 r_next = r->next;
626 free(r);
627 }
628 pc_rec_hash[h] = NULL;
629 }
630 nb_pc_records = 0;
631}
632
aa062973
FB
633void kqemu_record_dump(void)
634{
635 PCRecord **pr, *r;
636 int i, h;
637 FILE *f;
638 int64_t total, sum;
639
640 pr = malloc(sizeof(PCRecord *) * nb_pc_records);
641 i = 0;
642 total = 0;
643 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
644 for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
645 pr[i++] = r;
646 total += r->count;
647 }
648 }
649 qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
3b46e624 650
aa062973
FB
651 f = fopen("/tmp/kqemu.stats", "w");
652 if (!f) {
653 perror("/tmp/kqemu.stats");
654 exit(1);
655 }
26a76461 656 fprintf(f, "total: %" PRId64 "\n", total);
aa062973
FB
657 sum = 0;
658 for(i = 0; i < nb_pc_records; i++) {
659 r = pr[i];
660 sum += r->count;
5fafdf24
TS
661 fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
662 r->pc,
663 r->count,
aa062973
FB
664 (double)r->count / (double)total * 100.0,
665 (double)sum / (double)total * 100.0);
666 }
667 fclose(f);
668 free(pr);
f32fc648
FB
669
670 kqemu_record_flush();
aa062973
FB
671}
672#endif
673
da260249
FB
674static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
675 const SegmentCache *sc)
676{
677 ksc->selector = sc->selector;
678 ksc->flags = sc->flags;
679 ksc->limit = sc->limit;
680 ksc->base = sc->base;
681}
682
683static inline void kqemu_save_seg(SegmentCache *sc,
684 const struct kqemu_segment_cache *ksc)
685{
686 sc->selector = ksc->selector;
687 sc->flags = ksc->flags;
688 sc->limit = ksc->limit;
689 sc->base = ksc->base;
690}
691
9df217a3
FB
692int kqemu_cpu_exec(CPUState *env)
693{
694 struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
f32fc648
FB
695 int ret, cpl, i;
696#ifdef CONFIG_PROFILER
697 int64_t ti;
698#endif
6e4255f6
FB
699#ifdef _WIN32
700 DWORD temp;
701#endif
9df217a3 702
f32fc648
FB
703#ifdef CONFIG_PROFILER
704 ti = profile_getclock();
705#endif
9df217a3
FB
706#ifdef DEBUG
707 if (loglevel & CPU_LOG_INT) {
708 fprintf(logfile, "kqemu: cpu_exec: enter\n");
709 cpu_dump_state(env, logfile, fprintf, 0);
710 }
711#endif
da260249
FB
712 for(i = 0; i < CPU_NB_REGS; i++)
713 kenv->regs[i] = env->regs[i];
9df217a3
FB
714 kenv->eip = env->eip;
715 kenv->eflags = env->eflags;
da260249
FB
716 for(i = 0; i < 6; i++)
717 kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
718 kqemu_load_seg(&kenv->ldt, &env->ldt);
719 kqemu_load_seg(&kenv->tr, &env->tr);
720 kqemu_load_seg(&kenv->gdt, &env->gdt);
721 kqemu_load_seg(&kenv->idt, &env->idt);
9df217a3
FB
722 kenv->cr0 = env->cr[0];
723 kenv->cr2 = env->cr[2];
724 kenv->cr3 = env->cr[3];
725 kenv->cr4 = env->cr[4];
726 kenv->a20_mask = env->a20_mask;
c28e951f 727 kenv->efer = env->efer;
f32fc648
FB
728 kenv->tsc_offset = 0;
729 kenv->star = env->star;
730 kenv->sysenter_cs = env->sysenter_cs;
731 kenv->sysenter_esp = env->sysenter_esp;
732 kenv->sysenter_eip = env->sysenter_eip;
da260249 733#ifdef TARGET_X86_64
f32fc648
FB
734 kenv->lstar = env->lstar;
735 kenv->cstar = env->cstar;
736 kenv->fmask = env->fmask;
737 kenv->kernelgsbase = env->kernelgsbase;
c28e951f 738#endif
9df217a3
FB
739 if (env->dr[7] & 0xff) {
740 kenv->dr7 = env->dr[7];
741 kenv->dr0 = env->dr[0];
742 kenv->dr1 = env->dr[1];
743 kenv->dr2 = env->dr[2];
744 kenv->dr3 = env->dr[3];
745 } else {
746 kenv->dr7 = 0;
747 }
748 kenv->dr6 = env->dr[6];
f32fc648
FB
749 cpl = (env->hflags & HF_CPL_MASK);
750 kenv->cpl = cpl;
9df217a3 751 kenv->nb_pages_to_flush = nb_pages_to_flush;
f32fc648 752 kenv->user_only = (env->kqemu_enabled == 1);
aa062973 753 kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
aa062973 754 nb_ram_pages_to_update = 0;
f32fc648 755 kenv->nb_modified_ram_pages = nb_modified_ram_pages;
da260249 756
f32fc648
FB
757 kqemu_reset_modified_ram_pages();
758
759 if (env->cpuid_features & CPUID_FXSR)
760 restore_native_fp_fxrstor(env);
761 else
762 restore_native_fp_frstor(env);
9df217a3 763
6e4255f6 764#ifdef _WIN32
a332e112
FB
765 if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
766 kenv, sizeof(struct kqemu_cpu_state),
767 kenv, sizeof(struct kqemu_cpu_state),
768 &temp, NULL)) {
769 ret = kenv->retval;
770 } else {
771 ret = -1;
772 }
6e4255f6 773#else
6e4255f6
FB
774 ioctl(kqemu_fd, KQEMU_EXEC, kenv);
775 ret = kenv->retval;
6e4255f6 776#endif
f32fc648
FB
777 if (env->cpuid_features & CPUID_FXSR)
778 save_native_fp_fxsave(env);
779 else
780 save_native_fp_fsave(env);
9df217a3 781
da260249
FB
782 for(i = 0; i < CPU_NB_REGS; i++)
783 env->regs[i] = kenv->regs[i];
9df217a3
FB
784 env->eip = kenv->eip;
785 env->eflags = kenv->eflags;
da260249
FB
786 for(i = 0; i < 6; i++)
787 kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
f32fc648 788 cpu_x86_set_cpl(env, kenv->cpl);
da260249 789 kqemu_save_seg(&env->ldt, &kenv->ldt);
f32fc648
FB
790 env->cr[0] = kenv->cr0;
791 env->cr[4] = kenv->cr4;
792 env->cr[3] = kenv->cr3;
9df217a3
FB
793 env->cr[2] = kenv->cr2;
794 env->dr[6] = kenv->dr6;
da260249 795#ifdef TARGET_X86_64
f32fc648 796 env->kernelgsbase = kenv->kernelgsbase;
f32fc648
FB
797#endif
798
799 /* flush pages as indicated by kqemu */
800 if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
801 tlb_flush(env, 1);
802 } else {
803 for(i = 0; i < kenv->nb_pages_to_flush; i++) {
804 tlb_flush_page(env, pages_to_flush[i]);
805 }
806 }
807 nb_pages_to_flush = 0;
808
809#ifdef CONFIG_PROFILER
810 kqemu_time += profile_getclock() - ti;
811 kqemu_exec_count++;
812#endif
9df217a3 813
aa062973
FB
814 if (kenv->nb_ram_pages_to_update > 0) {
815 cpu_tlb_update_dirty(env);
816 }
aa062973 817
f32fc648
FB
818 if (kenv->nb_modified_ram_pages > 0) {
819 for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
820 unsigned long addr;
821 addr = modified_ram_pages[i];
822 tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
823 }
824 }
f32fc648 825
aa062973
FB
826 /* restore the hidden flags */
827 {
828 unsigned int new_hflags;
829#ifdef TARGET_X86_64
5fafdf24 830 if ((env->hflags & HF_LMA_MASK) &&
aa062973
FB
831 (env->segs[R_CS].flags & DESC_L_MASK)) {
832 /* long mode */
833 new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
834 } else
835#endif
836 {
837 /* legacy / compatibility case */
838 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
839 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
840 new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
841 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
5fafdf24 842 if (!(env->cr[0] & CR0_PE_MASK) ||
aa062973
FB
843 (env->eflags & VM_MASK) ||
844 !(env->hflags & HF_CS32_MASK)) {
845 /* XXX: try to avoid this test. The problem comes from the
846 fact that is real mode or vm86 mode we only modify the
847 'base' and 'selector' fields of the segment cache to go
848 faster. A solution may be to force addseg to one in
849 translate-i386.c. */
850 new_hflags |= HF_ADDSEG_MASK;
851 } else {
5fafdf24 852 new_hflags |= ((env->segs[R_DS].base |
aa062973 853 env->segs[R_ES].base |
5fafdf24 854 env->segs[R_SS].base) != 0) <<
aa062973
FB
855 HF_ADDSEG_SHIFT;
856 }
857 }
5fafdf24 858 env->hflags = (env->hflags &
aa062973
FB
859 ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
860 new_hflags;
861 }
f32fc648
FB
862 /* update FPU flags */
863 env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
864 ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
865 if (env->cr[4] & CR4_OSFXSR_MASK)
866 env->hflags |= HF_OSFXSR_MASK;
867 else
868 env->hflags &= ~HF_OSFXSR_MASK;
3b46e624 869
9df217a3
FB
870#ifdef DEBUG
871 if (loglevel & CPU_LOG_INT) {
872 fprintf(logfile, "kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
873 }
874#endif
c28e951f
FB
875 if (ret == KQEMU_RET_SYSCALL) {
876 /* syscall instruction */
877 return do_syscall(env, kenv);
5fafdf24 878 } else
9df217a3
FB
879 if ((ret & 0xff00) == KQEMU_RET_INT) {
880 env->exception_index = ret & 0xff;
881 env->error_code = 0;
882 env->exception_is_int = 1;
883 env->exception_next_eip = kenv->next_eip;
f32fc648
FB
884#ifdef CONFIG_PROFILER
885 kqemu_ret_int_count++;
886#endif
9df217a3 887#ifdef DEBUG
c28e951f 888 if (loglevel & CPU_LOG_INT) {
5fafdf24 889 fprintf(logfile, "kqemu: interrupt v=%02x:\n",
c28e951f
FB
890 env->exception_index);
891 cpu_dump_state(env, logfile, fprintf, 0);
892 }
9df217a3
FB
893#endif
894 return 1;
895 } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
896 env->exception_index = ret & 0xff;
897 env->error_code = kenv->error_code;
898 env->exception_is_int = 0;
899 env->exception_next_eip = 0;
f32fc648
FB
900#ifdef CONFIG_PROFILER
901 kqemu_ret_excp_count++;
902#endif
9df217a3
FB
903#ifdef DEBUG
904 if (loglevel & CPU_LOG_INT) {
905 fprintf(logfile, "kqemu: exception v=%02x e=%04x:\n",
906 env->exception_index, env->error_code);
907 cpu_dump_state(env, logfile, fprintf, 0);
908 }
909#endif
910 return 1;
911 } else if (ret == KQEMU_RET_INTR) {
f32fc648
FB
912#ifdef CONFIG_PROFILER
913 kqemu_ret_intr_count++;
914#endif
c45b3c0e
FB
915#ifdef DEBUG
916 if (loglevel & CPU_LOG_INT) {
917 cpu_dump_state(env, logfile, fprintf, 0);
918 }
919#endif
9df217a3 920 return 0;
5fafdf24 921 } else if (ret == KQEMU_RET_SOFTMMU) {
f32fc648
FB
922#ifdef CONFIG_PROFILER
923 {
924 unsigned long pc = env->eip + env->segs[R_CS].base;
925 kqemu_record_pc(pc);
926 }
aa062973
FB
927#endif
928#ifdef DEBUG
929 if (loglevel & CPU_LOG_INT) {
930 cpu_dump_state(env, logfile, fprintf, 0);
931 }
932#endif
9df217a3
FB
933 return 2;
934 } else {
935 cpu_dump_state(env, stderr, fprintf, 0);
936 fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
937 exit(1);
938 }
939 return 0;
940}
941
a332e112
FB
942void kqemu_cpu_interrupt(CPUState *env)
943{
da260249 944#if defined(_WIN32)
5fafdf24 945 /* cancelling the I/O request causes KQEMU to finish executing the
a332e112
FB
946 current block and successfully returning. */
947 CancelIo(kqemu_fd);
948#endif
949}
950
da260249
FB
951/*
952 QEMU paravirtualization interface. The current interface only
953 allows to modify the IF and IOPL flags when running in
954 kqemu.
955
956 At this point it is not very satisfactory. I leave it for reference
957 as it adds little complexity.
958*/
959
960#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
961
962static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
963{
964 return 0;
965}
966
967static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
968{
969 return 0;
970}
971
972static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
973{
974}
975
976static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
977{
978}
979
980static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
981{
982 CPUState *env;
983
984 env = cpu_single_env;
985 if (!env)
986 return 0;
987 return env->eflags & (IF_MASK | IOPL_MASK);
988}
989
990/* Note: after writing to this address, the guest code must make sure
991 it is exiting the current TB. pushf/popf can be used for that
992 purpose. */
993static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
994{
995 CPUState *env;
996
997 env = cpu_single_env;
998 if (!env)
999 return;
1000 env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
1001 (val & (IF_MASK | IOPL_MASK));
1002}
1003
1004static CPUReadMemoryFunc *qpi_mem_read[3] = {
1005 qpi_mem_readb,
1006 qpi_mem_readw,
1007 qpi_mem_readl,
1008};
1009
1010static CPUWriteMemoryFunc *qpi_mem_write[3] = {
1011 qpi_mem_writeb,
1012 qpi_mem_writew,
1013 qpi_mem_writel,
1014};
1015
1016static void qpi_init(void)
1017{
1018 kqemu_comm_base = 0xff000000 | 1;
1019 qpi_io_memory = cpu_register_io_memory(0,
1020 qpi_mem_read,
1021 qpi_mem_write, NULL);
1022 cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
1023 0x1000, qpi_io_memory);
1024}
9df217a3 1025#endif