]> git.proxmox.com Git - qemu.git/blame - kqemu.c
Clean up debugging code #ifdefs (Eduardo Habkost)
[qemu.git] / kqemu.c
CommitLineData
9df217a3
FB
1/*
2 * KQEMU support
5fafdf24 3 *
da260249 4 * Copyright (c) 2005-2008 Fabrice Bellard
9df217a3
FB
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
fad6cb1a 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
9df217a3
FB
19 */
20#include "config.h"
21#ifdef _WIN32
4fddf62a 22#define WIN32_LEAN_AND_MEAN
9df217a3 23#include <windows.h>
6e4255f6 24#include <winioctl.h>
9df217a3
FB
25#else
26#include <sys/types.h>
27#include <sys/mman.h>
6e4255f6 28#include <sys/ioctl.h>
9df217a3 29#endif
605686cd 30#ifdef HOST_SOLARIS
aafd8139 31#include <sys/ioccom.h>
605686cd 32#endif
9df217a3
FB
33#include <stdlib.h>
34#include <stdio.h>
35#include <stdarg.h>
36#include <string.h>
37#include <errno.h>
38#include <unistd.h>
39#include <inttypes.h>
40
41#include "cpu.h"
42#include "exec-all.h"
ca10f867 43#include "qemu-common.h"
9df217a3
FB
44
45#ifdef USE_KQEMU
46
47#define DEBUG
aa062973 48//#define PROFILE
9df217a3 49
d12d51d5
AL
50
51#ifdef DEBUG
52# define LOG_INT(...) do { \
53 if (loglevel & CPU_LOG_INT) \
54 fprintf(logfile, ## __VA_ARGS__); \
55 } while (0)
56# define LOG_INT_STATE(env) \
57 do { \
58 if (loglevel & CPU_LOG_INT) \
59 cpu_dump_state(env, logfile, fprintf, 0); \
60 } while (0)
61#else
62# define LOG_INT(...) do { } while (0)
63# define LOG_INT_STATE(env) do { } while (0)
64#endif
65
9df217a3
FB
66#include <unistd.h>
67#include <fcntl.h>
b88a3832 68#include "kqemu.h"
9df217a3 69
6e4255f6
FB
70#ifdef _WIN32
71#define KQEMU_DEVICE "\\\\.\\kqemu"
72#else
9df217a3 73#define KQEMU_DEVICE "/dev/kqemu"
6e4255f6
FB
74#endif
75
da260249
FB
76static void qpi_init(void);
77
6e4255f6
FB
78#ifdef _WIN32
79#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
80HANDLE kqemu_fd = KQEMU_INVALID_FD;
81#define kqemu_closefd(x) CloseHandle(x)
82#else
83#define KQEMU_INVALID_FD -1
84int kqemu_fd = KQEMU_INVALID_FD;
85#define kqemu_closefd(x) close(x)
86#endif
9df217a3 87
f32fc648
FB
88/* 0 = not allowed
89 1 = user kqemu
90 2 = kernel kqemu
91*/
9df217a3 92int kqemu_allowed = 1;
da260249 93uint64_t *pages_to_flush;
9df217a3 94unsigned int nb_pages_to_flush;
da260249 95uint64_t *ram_pages_to_update;
aa062973 96unsigned int nb_ram_pages_to_update;
da260249 97uint64_t *modified_ram_pages;
f32fc648
FB
98unsigned int nb_modified_ram_pages;
99uint8_t *modified_ram_pages_table;
da260249
FB
100int qpi_io_memory;
101uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
9df217a3
FB
102
103#define cpuid(index, eax, ebx, ecx, edx) \
104 asm volatile ("cpuid" \
105 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
106 : "0" (index))
107
c28e951f
FB
108#ifdef __x86_64__
109static int is_cpuid_supported(void)
110{
111 return 1;
112}
113#else
9df217a3
FB
114static int is_cpuid_supported(void)
115{
116 int v0, v1;
117 asm volatile ("pushf\n"
118 "popl %0\n"
119 "movl %0, %1\n"
120 "xorl $0x00200000, %0\n"
121 "pushl %0\n"
122 "popf\n"
123 "pushf\n"
124 "popl %0\n"
125 : "=a" (v0), "=d" (v1)
126 :
127 : "cc");
128 return (v0 != v1);
129}
c28e951f 130#endif
9df217a3
FB
131
132static void kqemu_update_cpuid(CPUState *env)
133{
0de6bb73 134 int critical_features_mask, features, ext_features, ext_features_mask;
9df217a3
FB
135 uint32_t eax, ebx, ecx, edx;
136
137 /* the following features are kept identical on the host and
138 target cpus because they are important for user code. Strictly
139 speaking, only SSE really matters because the OS must support
140 it if the user code uses it. */
5fafdf24
TS
141 critical_features_mask =
142 CPUID_CMOV | CPUID_CX8 |
143 CPUID_FXSR | CPUID_MMX | CPUID_SSE |
ca0d1734 144 CPUID_SSE2 | CPUID_SEP;
0de6bb73 145 ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
9df217a3
FB
146 if (!is_cpuid_supported()) {
147 features = 0;
0de6bb73 148 ext_features = 0;
9df217a3
FB
149 } else {
150 cpuid(1, eax, ebx, ecx, edx);
151 features = edx;
0de6bb73 152 ext_features = ecx;
9df217a3 153 }
ca0d1734
FB
154#ifdef __x86_64__
155 /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
156 compatibility mode, so in order to have the best performances
157 it is better not to use it */
158 features &= ~CPUID_SEP;
159#endif
9df217a3
FB
160 env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
161 (features & critical_features_mask);
0de6bb73
FB
162 env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
163 (ext_features & ext_features_mask);
9df217a3
FB
164 /* XXX: we could update more of the target CPUID state so that the
165 non accelerated code sees exactly the same CPU features as the
166 accelerated code */
167}
168
169int kqemu_init(CPUState *env)
170{
da260249 171 struct kqemu_init kinit;
9df217a3 172 int ret, version;
6e4255f6
FB
173#ifdef _WIN32
174 DWORD temp;
175#endif
9df217a3
FB
176
177 if (!kqemu_allowed)
178 return -1;
179
6e4255f6
FB
180#ifdef _WIN32
181 kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
182 FILE_SHARE_READ | FILE_SHARE_WRITE,
183 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
184 NULL);
7fb2a862 185 if (kqemu_fd == KQEMU_INVALID_FD) {
186 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
187 KQEMU_DEVICE, GetLastError());
188 return -1;
189 }
6e4255f6 190#else
9df217a3 191 kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
6e4255f6 192 if (kqemu_fd == KQEMU_INVALID_FD) {
99c19686
TS
193 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
194 KQEMU_DEVICE, strerror(errno));
9df217a3
FB
195 return -1;
196 }
7fb2a862 197#endif
9df217a3 198 version = 0;
6e4255f6
FB
199#ifdef _WIN32
200 DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
201 &version, sizeof(version), &temp, NULL);
202#else
9df217a3 203 ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
6e4255f6 204#endif
9df217a3
FB
205 if (version != KQEMU_VERSION) {
206 fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
207 version, KQEMU_VERSION);
208 goto fail;
209 }
210
5fafdf24 211 pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
da260249 212 sizeof(uint64_t));
9df217a3
FB
213 if (!pages_to_flush)
214 goto fail;
215
5fafdf24 216 ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
da260249 217 sizeof(uint64_t));
aa062973
FB
218 if (!ram_pages_to_update)
219 goto fail;
220
5fafdf24 221 modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
da260249 222 sizeof(uint64_t));
f32fc648
FB
223 if (!modified_ram_pages)
224 goto fail;
225 modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
226 if (!modified_ram_pages_table)
227 goto fail;
228
da260249
FB
229 memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
230 kinit.ram_base = phys_ram_base;
231 kinit.ram_size = phys_ram_size;
232 kinit.ram_dirty = phys_ram_dirty;
233 kinit.pages_to_flush = pages_to_flush;
234 kinit.ram_pages_to_update = ram_pages_to_update;
235 kinit.modified_ram_pages = modified_ram_pages;
6e4255f6 236#ifdef _WIN32
da260249 237 ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
6e4255f6
FB
238 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
239#else
da260249 240 ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
6e4255f6 241#endif
9df217a3
FB
242 if (ret < 0) {
243 fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
244 fail:
6e4255f6
FB
245 kqemu_closefd(kqemu_fd);
246 kqemu_fd = KQEMU_INVALID_FD;
9df217a3
FB
247 return -1;
248 }
249 kqemu_update_cpuid(env);
f32fc648 250 env->kqemu_enabled = kqemu_allowed;
9df217a3 251 nb_pages_to_flush = 0;
aa062973 252 nb_ram_pages_to_update = 0;
da260249
FB
253
254 qpi_init();
9df217a3
FB
255 return 0;
256}
257
258void kqemu_flush_page(CPUState *env, target_ulong addr)
259{
d12d51d5 260 LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
9df217a3
FB
261 if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
262 nb_pages_to_flush = KQEMU_FLUSH_ALL;
263 else
264 pages_to_flush[nb_pages_to_flush++] = addr;
265}
266
267void kqemu_flush(CPUState *env, int global)
268{
d12d51d5 269 LOG_INT("kqemu_flush:\n");
9df217a3
FB
270 nb_pages_to_flush = KQEMU_FLUSH_ALL;
271}
272
aa062973
FB
273void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
274{
d12d51d5 275 LOG_INT("kqemu_set_notdirty: addr=%08lx\n",
da260249 276 (unsigned long)ram_addr);
fc8dc060
FB
277 /* we only track transitions to dirty state */
278 if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
279 return;
aa062973
FB
280 if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
281 nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
282 else
283 ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
284}
285
f32fc648
FB
286static void kqemu_reset_modified_ram_pages(void)
287{
288 int i;
289 unsigned long page_index;
3b46e624 290
f32fc648
FB
291 for(i = 0; i < nb_modified_ram_pages; i++) {
292 page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
293 modified_ram_pages_table[page_index] = 0;
294 }
295 nb_modified_ram_pages = 0;
296}
297
298void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
299{
300 unsigned long page_index;
301 int ret;
302#ifdef _WIN32
303 DWORD temp;
304#endif
305
306 page_index = ram_addr >> TARGET_PAGE_BITS;
307 if (!modified_ram_pages_table[page_index]) {
308#if 0
309 printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
310#endif
311 modified_ram_pages_table[page_index] = 1;
312 modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
313 if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
314 /* flush */
315#ifdef _WIN32
5fafdf24
TS
316 ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
317 &nb_modified_ram_pages,
f32fc648
FB
318 sizeof(nb_modified_ram_pages),
319 NULL, 0, &temp, NULL);
320#else
5fafdf24 321 ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
f32fc648
FB
322 &nb_modified_ram_pages);
323#endif
324 kqemu_reset_modified_ram_pages();
325 }
326 }
327}
328
da260249
FB
329void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
330 ram_addr_t phys_offset)
331{
332 struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
333 uint64_t end;
334 int ret, io_index;
335
336 end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
337 start_addr &= TARGET_PAGE_MASK;
338 kphys_mem->phys_addr = start_addr;
339 kphys_mem->size = end - start_addr;
340 kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
341 io_index = phys_offset & ~TARGET_PAGE_MASK;
342 switch(io_index) {
343 case IO_MEM_RAM:
344 kphys_mem->io_index = KQEMU_IO_MEM_RAM;
345 break;
346 case IO_MEM_ROM:
347 kphys_mem->io_index = KQEMU_IO_MEM_ROM;
348 break;
349 default:
350 if (qpi_io_memory == io_index) {
351 kphys_mem->io_index = KQEMU_IO_MEM_COMM;
352 } else {
353 kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
354 }
355 break;
356 }
357#ifdef _WIN32
358 {
359 DWORD temp;
360 ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
361 kphys_mem, sizeof(*kphys_mem),
362 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
363 }
364#else
365 ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
366#endif
367 if (ret < 0) {
368 fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
369 ret, start_addr,
370 (unsigned long)size, (unsigned long)phys_offset);
371 }
372}
373
9df217a3
FB
374struct fpstate {
375 uint16_t fpuc;
376 uint16_t dummy1;
377 uint16_t fpus;
378 uint16_t dummy2;
379 uint16_t fptag;
380 uint16_t dummy3;
381
382 uint32_t fpip;
383 uint32_t fpcs;
384 uint32_t fpoo;
385 uint32_t fpos;
386 uint8_t fpregs1[8 * 10];
387};
388
389struct fpxstate {
390 uint16_t fpuc;
391 uint16_t fpus;
392 uint16_t fptag;
393 uint16_t fop;
394 uint32_t fpuip;
395 uint16_t cs_sel;
396 uint16_t dummy0;
397 uint32_t fpudp;
398 uint16_t ds_sel;
399 uint16_t dummy1;
400 uint32_t mxcsr;
401 uint32_t mxcsr_mask;
402 uint8_t fpregs1[8 * 16];
c28e951f
FB
403 uint8_t xmm_regs[16 * 16];
404 uint8_t dummy2[96];
9df217a3
FB
405};
406
407static struct fpxstate fpx1 __attribute__((aligned(16)));
408
409static void restore_native_fp_frstor(CPUState *env)
410{
411 int fptag, i, j;
412 struct fpstate fp1, *fp = &fp1;
3b46e624 413
9df217a3
FB
414 fp->fpuc = env->fpuc;
415 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
416 fptag = 0;
417 for (i=7; i>=0; i--) {
418 fptag <<= 2;
419 if (env->fptags[i]) {
420 fptag |= 3;
421 } else {
422 /* the FPU automatically computes it */
423 }
424 }
425 fp->fptag = fptag;
426 j = env->fpstt;
427 for(i = 0;i < 8; i++) {
428 memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
429 j = (j + 1) & 7;
430 }
431 asm volatile ("frstor %0" : "=m" (*fp));
432}
5fafdf24 433
9df217a3
FB
434static void save_native_fp_fsave(CPUState *env)
435{
436 int fptag, i, j;
437 uint16_t fpuc;
438 struct fpstate fp1, *fp = &fp1;
439
440 asm volatile ("fsave %0" : : "m" (*fp));
441 env->fpuc = fp->fpuc;
442 env->fpstt = (fp->fpus >> 11) & 7;
443 env->fpus = fp->fpus & ~0x3800;
444 fptag = fp->fptag;
445 for(i = 0;i < 8; i++) {
446 env->fptags[i] = ((fptag & 3) == 3);
447 fptag >>= 2;
448 }
449 j = env->fpstt;
450 for(i = 0;i < 8; i++) {
451 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
452 j = (j + 1) & 7;
453 }
454 /* we must restore the default rounding state */
455 fpuc = 0x037f | (env->fpuc & (3 << 10));
456 asm volatile("fldcw %0" : : "m" (fpuc));
457}
458
459static void restore_native_fp_fxrstor(CPUState *env)
460{
461 struct fpxstate *fp = &fpx1;
462 int i, j, fptag;
463
464 fp->fpuc = env->fpuc;
465 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
466 fptag = 0;
467 for(i = 0; i < 8; i++)
468 fptag |= (env->fptags[i] << i);
469 fp->fptag = fptag ^ 0xff;
470
471 j = env->fpstt;
472 for(i = 0;i < 8; i++) {
473 memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
474 j = (j + 1) & 7;
475 }
476 if (env->cpuid_features & CPUID_SSE) {
477 fp->mxcsr = env->mxcsr;
478 /* XXX: check if DAZ is not available */
479 fp->mxcsr_mask = 0xffff;
c28e951f 480 memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
481 }
482 asm volatile ("fxrstor %0" : "=m" (*fp));
483}
484
485static void save_native_fp_fxsave(CPUState *env)
486{
487 struct fpxstate *fp = &fpx1;
488 int fptag, i, j;
489 uint16_t fpuc;
490
491 asm volatile ("fxsave %0" : : "m" (*fp));
492 env->fpuc = fp->fpuc;
493 env->fpstt = (fp->fpus >> 11) & 7;
494 env->fpus = fp->fpus & ~0x3800;
495 fptag = fp->fptag ^ 0xff;
496 for(i = 0;i < 8; i++) {
497 env->fptags[i] = (fptag >> i) & 1;
498 }
499 j = env->fpstt;
500 for(i = 0;i < 8; i++) {
501 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
502 j = (j + 1) & 7;
503 }
504 if (env->cpuid_features & CPUID_SSE) {
505 env->mxcsr = fp->mxcsr;
c28e951f 506 memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
507 }
508
509 /* we must restore the default rounding state */
510 asm volatile ("fninit");
511 fpuc = 0x037f | (env->fpuc & (3 << 10));
512 asm volatile("fldcw %0" : : "m" (fpuc));
513}
514
c28e951f
FB
515static int do_syscall(CPUState *env,
516 struct kqemu_cpu_state *kenv)
517{
518 int selector;
3b46e624 519
c28e951f 520 selector = (env->star >> 32) & 0xffff;
da260249 521#ifdef TARGET_X86_64
c28e951f 522 if (env->hflags & HF_LMA_MASK) {
93eac243
FB
523 int code64;
524
c28e951f
FB
525 env->regs[R_ECX] = kenv->next_eip;
526 env->regs[11] = env->eflags;
527
93eac243
FB
528 code64 = env->hflags & HF_CS64_MASK;
529
c28e951f 530 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
531 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
532 0, 0xffffffff,
c4e27dd4 533 DESC_G_MASK | DESC_P_MASK |
c28e951f
FB
534 DESC_S_MASK |
535 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
5fafdf24 536 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
537 0, 0xffffffff,
538 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
539 DESC_S_MASK |
540 DESC_W_MASK | DESC_A_MASK);
541 env->eflags &= ~env->fmask;
93eac243 542 if (code64)
c28e951f
FB
543 env->eip = env->lstar;
544 else
545 env->eip = env->cstar;
5fafdf24 546 } else
c28e951f
FB
547#endif
548 {
549 env->regs[R_ECX] = (uint32_t)kenv->next_eip;
3b46e624 550
c28e951f 551 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
552 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
553 0, 0xffffffff,
c28e951f
FB
554 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
555 DESC_S_MASK |
556 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
5fafdf24 557 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
558 0, 0xffffffff,
559 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
560 DESC_S_MASK |
561 DESC_W_MASK | DESC_A_MASK);
562 env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
563 env->eip = (uint32_t)env->star;
564 }
565 return 2;
566}
567
f32fc648 568#ifdef CONFIG_PROFILER
aa062973
FB
569
570#define PC_REC_SIZE 1
571#define PC_REC_HASH_BITS 16
572#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
573
574typedef struct PCRecord {
575 unsigned long pc;
576 int64_t count;
577 struct PCRecord *next;
578} PCRecord;
579
f32fc648
FB
580static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
581static int nb_pc_records;
aa062973 582
f32fc648 583static void kqemu_record_pc(unsigned long pc)
aa062973
FB
584{
585 unsigned long h;
586 PCRecord **pr, *r;
587
588 h = pc / PC_REC_SIZE;
589 h = h ^ (h >> PC_REC_HASH_BITS);
590 h &= (PC_REC_HASH_SIZE - 1);
591 pr = &pc_rec_hash[h];
592 for(;;) {
593 r = *pr;
594 if (r == NULL)
595 break;
596 if (r->pc == pc) {
597 r->count++;
598 return;
599 }
600 pr = &r->next;
601 }
602 r = malloc(sizeof(PCRecord));
603 r->count = 1;
604 r->pc = pc;
605 r->next = NULL;
606 *pr = r;
607 nb_pc_records++;
608}
609
f32fc648 610static int pc_rec_cmp(const void *p1, const void *p2)
aa062973
FB
611{
612 PCRecord *r1 = *(PCRecord **)p1;
613 PCRecord *r2 = *(PCRecord **)p2;
614 if (r1->count < r2->count)
615 return 1;
616 else if (r1->count == r2->count)
617 return 0;
618 else
619 return -1;
620}
621
f32fc648
FB
622static void kqemu_record_flush(void)
623{
624 PCRecord *r, *r_next;
625 int h;
626
627 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
628 for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
629 r_next = r->next;
630 free(r);
631 }
632 pc_rec_hash[h] = NULL;
633 }
634 nb_pc_records = 0;
635}
636
aa062973
FB
637void kqemu_record_dump(void)
638{
639 PCRecord **pr, *r;
640 int i, h;
641 FILE *f;
642 int64_t total, sum;
643
644 pr = malloc(sizeof(PCRecord *) * nb_pc_records);
645 i = 0;
646 total = 0;
647 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
648 for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
649 pr[i++] = r;
650 total += r->count;
651 }
652 }
653 qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
3b46e624 654
aa062973
FB
655 f = fopen("/tmp/kqemu.stats", "w");
656 if (!f) {
657 perror("/tmp/kqemu.stats");
658 exit(1);
659 }
26a76461 660 fprintf(f, "total: %" PRId64 "\n", total);
aa062973
FB
661 sum = 0;
662 for(i = 0; i < nb_pc_records; i++) {
663 r = pr[i];
664 sum += r->count;
5fafdf24
TS
665 fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
666 r->pc,
667 r->count,
aa062973
FB
668 (double)r->count / (double)total * 100.0,
669 (double)sum / (double)total * 100.0);
670 }
671 fclose(f);
672 free(pr);
f32fc648
FB
673
674 kqemu_record_flush();
aa062973
FB
675}
676#endif
677
da260249
FB
678static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
679 const SegmentCache *sc)
680{
681 ksc->selector = sc->selector;
682 ksc->flags = sc->flags;
683 ksc->limit = sc->limit;
684 ksc->base = sc->base;
685}
686
687static inline void kqemu_save_seg(SegmentCache *sc,
688 const struct kqemu_segment_cache *ksc)
689{
690 sc->selector = ksc->selector;
691 sc->flags = ksc->flags;
692 sc->limit = ksc->limit;
693 sc->base = ksc->base;
694}
695
9df217a3
FB
696int kqemu_cpu_exec(CPUState *env)
697{
698 struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
f32fc648
FB
699 int ret, cpl, i;
700#ifdef CONFIG_PROFILER
701 int64_t ti;
702#endif
6e4255f6
FB
703#ifdef _WIN32
704 DWORD temp;
705#endif
9df217a3 706
f32fc648
FB
707#ifdef CONFIG_PROFILER
708 ti = profile_getclock();
709#endif
d12d51d5
AL
710 LOG_INT("kqemu: cpu_exec: enter\n");
711 LOG_INT_STATE(env);
da260249
FB
712 for(i = 0; i < CPU_NB_REGS; i++)
713 kenv->regs[i] = env->regs[i];
9df217a3
FB
714 kenv->eip = env->eip;
715 kenv->eflags = env->eflags;
da260249
FB
716 for(i = 0; i < 6; i++)
717 kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
718 kqemu_load_seg(&kenv->ldt, &env->ldt);
719 kqemu_load_seg(&kenv->tr, &env->tr);
720 kqemu_load_seg(&kenv->gdt, &env->gdt);
721 kqemu_load_seg(&kenv->idt, &env->idt);
9df217a3
FB
722 kenv->cr0 = env->cr[0];
723 kenv->cr2 = env->cr[2];
724 kenv->cr3 = env->cr[3];
725 kenv->cr4 = env->cr[4];
726 kenv->a20_mask = env->a20_mask;
c28e951f 727 kenv->efer = env->efer;
f32fc648
FB
728 kenv->tsc_offset = 0;
729 kenv->star = env->star;
730 kenv->sysenter_cs = env->sysenter_cs;
731 kenv->sysenter_esp = env->sysenter_esp;
732 kenv->sysenter_eip = env->sysenter_eip;
da260249 733#ifdef TARGET_X86_64
f32fc648
FB
734 kenv->lstar = env->lstar;
735 kenv->cstar = env->cstar;
736 kenv->fmask = env->fmask;
737 kenv->kernelgsbase = env->kernelgsbase;
c28e951f 738#endif
9df217a3
FB
739 if (env->dr[7] & 0xff) {
740 kenv->dr7 = env->dr[7];
741 kenv->dr0 = env->dr[0];
742 kenv->dr1 = env->dr[1];
743 kenv->dr2 = env->dr[2];
744 kenv->dr3 = env->dr[3];
745 } else {
746 kenv->dr7 = 0;
747 }
748 kenv->dr6 = env->dr[6];
f32fc648
FB
749 cpl = (env->hflags & HF_CPL_MASK);
750 kenv->cpl = cpl;
9df217a3 751 kenv->nb_pages_to_flush = nb_pages_to_flush;
f32fc648 752 kenv->user_only = (env->kqemu_enabled == 1);
aa062973 753 kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
aa062973 754 nb_ram_pages_to_update = 0;
f32fc648 755 kenv->nb_modified_ram_pages = nb_modified_ram_pages;
da260249 756
f32fc648
FB
757 kqemu_reset_modified_ram_pages();
758
759 if (env->cpuid_features & CPUID_FXSR)
760 restore_native_fp_fxrstor(env);
761 else
762 restore_native_fp_frstor(env);
9df217a3 763
6e4255f6 764#ifdef _WIN32
a332e112
FB
765 if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
766 kenv, sizeof(struct kqemu_cpu_state),
767 kenv, sizeof(struct kqemu_cpu_state),
768 &temp, NULL)) {
769 ret = kenv->retval;
770 } else {
771 ret = -1;
772 }
6e4255f6 773#else
6e4255f6
FB
774 ioctl(kqemu_fd, KQEMU_EXEC, kenv);
775 ret = kenv->retval;
6e4255f6 776#endif
f32fc648
FB
777 if (env->cpuid_features & CPUID_FXSR)
778 save_native_fp_fxsave(env);
779 else
780 save_native_fp_fsave(env);
9df217a3 781
da260249
FB
782 for(i = 0; i < CPU_NB_REGS; i++)
783 env->regs[i] = kenv->regs[i];
9df217a3
FB
784 env->eip = kenv->eip;
785 env->eflags = kenv->eflags;
da260249
FB
786 for(i = 0; i < 6; i++)
787 kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
f32fc648 788 cpu_x86_set_cpl(env, kenv->cpl);
da260249 789 kqemu_save_seg(&env->ldt, &kenv->ldt);
f32fc648
FB
790 env->cr[0] = kenv->cr0;
791 env->cr[4] = kenv->cr4;
792 env->cr[3] = kenv->cr3;
9df217a3
FB
793 env->cr[2] = kenv->cr2;
794 env->dr[6] = kenv->dr6;
da260249 795#ifdef TARGET_X86_64
f32fc648 796 env->kernelgsbase = kenv->kernelgsbase;
f32fc648
FB
797#endif
798
799 /* flush pages as indicated by kqemu */
800 if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
801 tlb_flush(env, 1);
802 } else {
803 for(i = 0; i < kenv->nb_pages_to_flush; i++) {
804 tlb_flush_page(env, pages_to_flush[i]);
805 }
806 }
807 nb_pages_to_flush = 0;
808
809#ifdef CONFIG_PROFILER
810 kqemu_time += profile_getclock() - ti;
811 kqemu_exec_count++;
812#endif
9df217a3 813
aa062973
FB
814 if (kenv->nb_ram_pages_to_update > 0) {
815 cpu_tlb_update_dirty(env);
816 }
aa062973 817
f32fc648
FB
818 if (kenv->nb_modified_ram_pages > 0) {
819 for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
820 unsigned long addr;
821 addr = modified_ram_pages[i];
822 tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
823 }
824 }
f32fc648 825
aa062973
FB
826 /* restore the hidden flags */
827 {
828 unsigned int new_hflags;
829#ifdef TARGET_X86_64
5fafdf24 830 if ((env->hflags & HF_LMA_MASK) &&
aa062973
FB
831 (env->segs[R_CS].flags & DESC_L_MASK)) {
832 /* long mode */
833 new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
834 } else
835#endif
836 {
837 /* legacy / compatibility case */
838 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
839 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
840 new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
841 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
5fafdf24 842 if (!(env->cr[0] & CR0_PE_MASK) ||
aa062973
FB
843 (env->eflags & VM_MASK) ||
844 !(env->hflags & HF_CS32_MASK)) {
845 /* XXX: try to avoid this test. The problem comes from the
846 fact that is real mode or vm86 mode we only modify the
847 'base' and 'selector' fields of the segment cache to go
848 faster. A solution may be to force addseg to one in
849 translate-i386.c. */
850 new_hflags |= HF_ADDSEG_MASK;
851 } else {
5fafdf24 852 new_hflags |= ((env->segs[R_DS].base |
aa062973 853 env->segs[R_ES].base |
5fafdf24 854 env->segs[R_SS].base) != 0) <<
aa062973
FB
855 HF_ADDSEG_SHIFT;
856 }
857 }
5fafdf24 858 env->hflags = (env->hflags &
aa062973
FB
859 ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
860 new_hflags;
861 }
f32fc648
FB
862 /* update FPU flags */
863 env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
864 ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
865 if (env->cr[4] & CR4_OSFXSR_MASK)
866 env->hflags |= HF_OSFXSR_MASK;
867 else
868 env->hflags &= ~HF_OSFXSR_MASK;
3b46e624 869
d12d51d5 870 LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
c28e951f
FB
871 if (ret == KQEMU_RET_SYSCALL) {
872 /* syscall instruction */
873 return do_syscall(env, kenv);
5fafdf24 874 } else
9df217a3
FB
875 if ((ret & 0xff00) == KQEMU_RET_INT) {
876 env->exception_index = ret & 0xff;
877 env->error_code = 0;
878 env->exception_is_int = 1;
879 env->exception_next_eip = kenv->next_eip;
f32fc648
FB
880#ifdef CONFIG_PROFILER
881 kqemu_ret_int_count++;
882#endif
d12d51d5
AL
883 LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
884 LOG_INT_STATE(env);
9df217a3
FB
885 return 1;
886 } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
887 env->exception_index = ret & 0xff;
888 env->error_code = kenv->error_code;
889 env->exception_is_int = 0;
890 env->exception_next_eip = 0;
f32fc648
FB
891#ifdef CONFIG_PROFILER
892 kqemu_ret_excp_count++;
893#endif
d12d51d5 894 LOG_INT("kqemu: exception v=%02x e=%04x:\n",
9df217a3 895 env->exception_index, env->error_code);
d12d51d5 896 LOG_INT_STATE(env);
9df217a3
FB
897 return 1;
898 } else if (ret == KQEMU_RET_INTR) {
f32fc648
FB
899#ifdef CONFIG_PROFILER
900 kqemu_ret_intr_count++;
901#endif
d12d51d5 902 LOG_INT_STATE(env);
9df217a3 903 return 0;
5fafdf24 904 } else if (ret == KQEMU_RET_SOFTMMU) {
f32fc648
FB
905#ifdef CONFIG_PROFILER
906 {
907 unsigned long pc = env->eip + env->segs[R_CS].base;
908 kqemu_record_pc(pc);
909 }
aa062973 910#endif
d12d51d5 911 LOG_INT_STATE(env);
9df217a3
FB
912 return 2;
913 } else {
914 cpu_dump_state(env, stderr, fprintf, 0);
915 fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
916 exit(1);
917 }
918 return 0;
919}
920
a332e112
FB
921void kqemu_cpu_interrupt(CPUState *env)
922{
da260249 923#if defined(_WIN32)
5fafdf24 924 /* cancelling the I/O request causes KQEMU to finish executing the
a332e112
FB
925 current block and successfully returning. */
926 CancelIo(kqemu_fd);
927#endif
928}
929
da260249
FB
930/*
931 QEMU paravirtualization interface. The current interface only
932 allows to modify the IF and IOPL flags when running in
933 kqemu.
934
935 At this point it is not very satisfactory. I leave it for reference
936 as it adds little complexity.
937*/
938
939#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
940
941static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
942{
943 return 0;
944}
945
946static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
947{
948 return 0;
949}
950
951static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
952{
953}
954
955static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
956{
957}
958
959static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
960{
961 CPUState *env;
962
963 env = cpu_single_env;
964 if (!env)
965 return 0;
966 return env->eflags & (IF_MASK | IOPL_MASK);
967}
968
969/* Note: after writing to this address, the guest code must make sure
970 it is exiting the current TB. pushf/popf can be used for that
971 purpose. */
972static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
973{
974 CPUState *env;
975
976 env = cpu_single_env;
977 if (!env)
978 return;
979 env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
980 (val & (IF_MASK | IOPL_MASK));
981}
982
983static CPUReadMemoryFunc *qpi_mem_read[3] = {
984 qpi_mem_readb,
985 qpi_mem_readw,
986 qpi_mem_readl,
987};
988
989static CPUWriteMemoryFunc *qpi_mem_write[3] = {
990 qpi_mem_writeb,
991 qpi_mem_writew,
992 qpi_mem_writel,
993};
994
995static void qpi_init(void)
996{
997 kqemu_comm_base = 0xff000000 | 1;
998 qpi_io_memory = cpu_register_io_memory(0,
999 qpi_mem_read,
1000 qpi_mem_write, NULL);
1001 cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
1002 0x1000, qpi_io_memory);
1003}
9df217a3 1004#endif