]> git.proxmox.com Git - qemu.git/blame - kqemu.c
Don't leak VLANClientState on PCI hot remove
[qemu.git] / kqemu.c
CommitLineData
9df217a3
FB
1/*
2 * KQEMU support
5fafdf24 3 *
da260249 4 * Copyright (c) 2005-2008 Fabrice Bellard
9df217a3
FB
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
fad6cb1a 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
9df217a3
FB
19 */
20#include "config.h"
21#ifdef _WIN32
4fddf62a 22#define WIN32_LEAN_AND_MEAN
9df217a3 23#include <windows.h>
6e4255f6 24#include <winioctl.h>
9df217a3
FB
25#else
26#include <sys/types.h>
27#include <sys/mman.h>
6e4255f6 28#include <sys/ioctl.h>
9df217a3 29#endif
605686cd 30#ifdef HOST_SOLARIS
aafd8139 31#include <sys/ioccom.h>
605686cd 32#endif
9df217a3
FB
33#include <stdlib.h>
34#include <stdio.h>
35#include <stdarg.h>
36#include <string.h>
37#include <errno.h>
38#include <unistd.h>
39#include <inttypes.h>
40
41#include "cpu.h"
42#include "exec-all.h"
ca10f867 43#include "qemu-common.h"
9df217a3
FB
44
45#ifdef USE_KQEMU
46
47#define DEBUG
aa062973 48//#define PROFILE
9df217a3 49
d12d51d5
AL
50
51#ifdef DEBUG
93fcfe39
AL
52# define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
53# define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
d12d51d5
AL
54#else
55# define LOG_INT(...) do { } while (0)
56# define LOG_INT_STATE(env) do { } while (0)
57#endif
58
9df217a3
FB
59#include <unistd.h>
60#include <fcntl.h>
b88a3832 61#include "kqemu.h"
9df217a3 62
6e4255f6
FB
63#ifdef _WIN32
64#define KQEMU_DEVICE "\\\\.\\kqemu"
65#else
9df217a3 66#define KQEMU_DEVICE "/dev/kqemu"
6e4255f6
FB
67#endif
68
da260249
FB
69static void qpi_init(void);
70
6e4255f6
FB
71#ifdef _WIN32
72#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
73HANDLE kqemu_fd = KQEMU_INVALID_FD;
74#define kqemu_closefd(x) CloseHandle(x)
75#else
76#define KQEMU_INVALID_FD -1
77int kqemu_fd = KQEMU_INVALID_FD;
78#define kqemu_closefd(x) close(x)
79#endif
9df217a3 80
f32fc648
FB
81/* 0 = not allowed
82 1 = user kqemu
83 2 = kernel kqemu
84*/
9df217a3 85int kqemu_allowed = 1;
da260249 86uint64_t *pages_to_flush;
9df217a3 87unsigned int nb_pages_to_flush;
da260249 88uint64_t *ram_pages_to_update;
aa062973 89unsigned int nb_ram_pages_to_update;
da260249 90uint64_t *modified_ram_pages;
f32fc648
FB
91unsigned int nb_modified_ram_pages;
92uint8_t *modified_ram_pages_table;
da260249
FB
93int qpi_io_memory;
94uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
9df217a3
FB
95
96#define cpuid(index, eax, ebx, ecx, edx) \
97 asm volatile ("cpuid" \
98 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
99 : "0" (index))
100
c28e951f
FB
101#ifdef __x86_64__
102static int is_cpuid_supported(void)
103{
104 return 1;
105}
106#else
9df217a3
FB
107static int is_cpuid_supported(void)
108{
109 int v0, v1;
110 asm volatile ("pushf\n"
111 "popl %0\n"
112 "movl %0, %1\n"
113 "xorl $0x00200000, %0\n"
114 "pushl %0\n"
115 "popf\n"
116 "pushf\n"
117 "popl %0\n"
118 : "=a" (v0), "=d" (v1)
119 :
120 : "cc");
121 return (v0 != v1);
122}
c28e951f 123#endif
9df217a3
FB
124
125static void kqemu_update_cpuid(CPUState *env)
126{
0de6bb73 127 int critical_features_mask, features, ext_features, ext_features_mask;
9df217a3
FB
128 uint32_t eax, ebx, ecx, edx;
129
130 /* the following features are kept identical on the host and
131 target cpus because they are important for user code. Strictly
132 speaking, only SSE really matters because the OS must support
133 it if the user code uses it. */
5fafdf24
TS
134 critical_features_mask =
135 CPUID_CMOV | CPUID_CX8 |
136 CPUID_FXSR | CPUID_MMX | CPUID_SSE |
ca0d1734 137 CPUID_SSE2 | CPUID_SEP;
0de6bb73 138 ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
9df217a3
FB
139 if (!is_cpuid_supported()) {
140 features = 0;
0de6bb73 141 ext_features = 0;
9df217a3
FB
142 } else {
143 cpuid(1, eax, ebx, ecx, edx);
144 features = edx;
0de6bb73 145 ext_features = ecx;
9df217a3 146 }
ca0d1734
FB
147#ifdef __x86_64__
148 /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
149 compatibility mode, so in order to have the best performances
150 it is better not to use it */
151 features &= ~CPUID_SEP;
152#endif
9df217a3
FB
153 env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
154 (features & critical_features_mask);
0de6bb73
FB
155 env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
156 (ext_features & ext_features_mask);
9df217a3
FB
157 /* XXX: we could update more of the target CPUID state so that the
158 non accelerated code sees exactly the same CPU features as the
159 accelerated code */
160}
161
162int kqemu_init(CPUState *env)
163{
da260249 164 struct kqemu_init kinit;
9df217a3 165 int ret, version;
6e4255f6
FB
166#ifdef _WIN32
167 DWORD temp;
168#endif
9df217a3
FB
169
170 if (!kqemu_allowed)
171 return -1;
172
6e4255f6
FB
173#ifdef _WIN32
174 kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
175 FILE_SHARE_READ | FILE_SHARE_WRITE,
176 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
177 NULL);
7fb2a862 178 if (kqemu_fd == KQEMU_INVALID_FD) {
179 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
180 KQEMU_DEVICE, GetLastError());
181 return -1;
182 }
6e4255f6 183#else
9df217a3 184 kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
6e4255f6 185 if (kqemu_fd == KQEMU_INVALID_FD) {
99c19686
TS
186 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
187 KQEMU_DEVICE, strerror(errno));
9df217a3
FB
188 return -1;
189 }
7fb2a862 190#endif
9df217a3 191 version = 0;
6e4255f6
FB
192#ifdef _WIN32
193 DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
194 &version, sizeof(version), &temp, NULL);
195#else
9df217a3 196 ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
6e4255f6 197#endif
9df217a3
FB
198 if (version != KQEMU_VERSION) {
199 fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
200 version, KQEMU_VERSION);
201 goto fail;
202 }
203
5fafdf24 204 pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
da260249 205 sizeof(uint64_t));
9df217a3
FB
206 if (!pages_to_flush)
207 goto fail;
208
5fafdf24 209 ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
da260249 210 sizeof(uint64_t));
aa062973
FB
211 if (!ram_pages_to_update)
212 goto fail;
213
5fafdf24 214 modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
da260249 215 sizeof(uint64_t));
f32fc648
FB
216 if (!modified_ram_pages)
217 goto fail;
218 modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
219 if (!modified_ram_pages_table)
220 goto fail;
221
da260249
FB
222 memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
223 kinit.ram_base = phys_ram_base;
224 kinit.ram_size = phys_ram_size;
225 kinit.ram_dirty = phys_ram_dirty;
226 kinit.pages_to_flush = pages_to_flush;
227 kinit.ram_pages_to_update = ram_pages_to_update;
228 kinit.modified_ram_pages = modified_ram_pages;
6e4255f6 229#ifdef _WIN32
da260249 230 ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
6e4255f6
FB
231 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
232#else
da260249 233 ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
6e4255f6 234#endif
9df217a3
FB
235 if (ret < 0) {
236 fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
237 fail:
6e4255f6
FB
238 kqemu_closefd(kqemu_fd);
239 kqemu_fd = KQEMU_INVALID_FD;
9df217a3
FB
240 return -1;
241 }
242 kqemu_update_cpuid(env);
f32fc648 243 env->kqemu_enabled = kqemu_allowed;
9df217a3 244 nb_pages_to_flush = 0;
aa062973 245 nb_ram_pages_to_update = 0;
da260249
FB
246
247 qpi_init();
9df217a3
FB
248 return 0;
249}
250
251void kqemu_flush_page(CPUState *env, target_ulong addr)
252{
d12d51d5 253 LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
9df217a3
FB
254 if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
255 nb_pages_to_flush = KQEMU_FLUSH_ALL;
256 else
257 pages_to_flush[nb_pages_to_flush++] = addr;
258}
259
260void kqemu_flush(CPUState *env, int global)
261{
d12d51d5 262 LOG_INT("kqemu_flush:\n");
9df217a3
FB
263 nb_pages_to_flush = KQEMU_FLUSH_ALL;
264}
265
aa062973
FB
266void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
267{
d12d51d5 268 LOG_INT("kqemu_set_notdirty: addr=%08lx\n",
da260249 269 (unsigned long)ram_addr);
fc8dc060
FB
270 /* we only track transitions to dirty state */
271 if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
272 return;
aa062973
FB
273 if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
274 nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
275 else
276 ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
277}
278
f32fc648
FB
279static void kqemu_reset_modified_ram_pages(void)
280{
281 int i;
282 unsigned long page_index;
3b46e624 283
f32fc648
FB
284 for(i = 0; i < nb_modified_ram_pages; i++) {
285 page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
286 modified_ram_pages_table[page_index] = 0;
287 }
288 nb_modified_ram_pages = 0;
289}
290
291void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
292{
293 unsigned long page_index;
294 int ret;
295#ifdef _WIN32
296 DWORD temp;
297#endif
298
299 page_index = ram_addr >> TARGET_PAGE_BITS;
300 if (!modified_ram_pages_table[page_index]) {
301#if 0
302 printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
303#endif
304 modified_ram_pages_table[page_index] = 1;
305 modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
306 if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
307 /* flush */
308#ifdef _WIN32
5fafdf24
TS
309 ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
310 &nb_modified_ram_pages,
f32fc648
FB
311 sizeof(nb_modified_ram_pages),
312 NULL, 0, &temp, NULL);
313#else
5fafdf24 314 ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
f32fc648
FB
315 &nb_modified_ram_pages);
316#endif
317 kqemu_reset_modified_ram_pages();
318 }
319 }
320}
321
da260249
FB
322void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
323 ram_addr_t phys_offset)
324{
325 struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
326 uint64_t end;
327 int ret, io_index;
328
329 end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
330 start_addr &= TARGET_PAGE_MASK;
331 kphys_mem->phys_addr = start_addr;
332 kphys_mem->size = end - start_addr;
333 kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
334 io_index = phys_offset & ~TARGET_PAGE_MASK;
335 switch(io_index) {
336 case IO_MEM_RAM:
337 kphys_mem->io_index = KQEMU_IO_MEM_RAM;
338 break;
339 case IO_MEM_ROM:
340 kphys_mem->io_index = KQEMU_IO_MEM_ROM;
341 break;
342 default:
343 if (qpi_io_memory == io_index) {
344 kphys_mem->io_index = KQEMU_IO_MEM_COMM;
345 } else {
346 kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
347 }
348 break;
349 }
350#ifdef _WIN32
351 {
352 DWORD temp;
353 ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
354 kphys_mem, sizeof(*kphys_mem),
355 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
356 }
357#else
358 ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
359#endif
360 if (ret < 0) {
361 fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
362 ret, start_addr,
363 (unsigned long)size, (unsigned long)phys_offset);
364 }
365}
366
9df217a3
FB
367struct fpstate {
368 uint16_t fpuc;
369 uint16_t dummy1;
370 uint16_t fpus;
371 uint16_t dummy2;
372 uint16_t fptag;
373 uint16_t dummy3;
374
375 uint32_t fpip;
376 uint32_t fpcs;
377 uint32_t fpoo;
378 uint32_t fpos;
379 uint8_t fpregs1[8 * 10];
380};
381
382struct fpxstate {
383 uint16_t fpuc;
384 uint16_t fpus;
385 uint16_t fptag;
386 uint16_t fop;
387 uint32_t fpuip;
388 uint16_t cs_sel;
389 uint16_t dummy0;
390 uint32_t fpudp;
391 uint16_t ds_sel;
392 uint16_t dummy1;
393 uint32_t mxcsr;
394 uint32_t mxcsr_mask;
395 uint8_t fpregs1[8 * 16];
c28e951f
FB
396 uint8_t xmm_regs[16 * 16];
397 uint8_t dummy2[96];
9df217a3
FB
398};
399
400static struct fpxstate fpx1 __attribute__((aligned(16)));
401
402static void restore_native_fp_frstor(CPUState *env)
403{
404 int fptag, i, j;
405 struct fpstate fp1, *fp = &fp1;
3b46e624 406
9df217a3
FB
407 fp->fpuc = env->fpuc;
408 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
409 fptag = 0;
410 for (i=7; i>=0; i--) {
411 fptag <<= 2;
412 if (env->fptags[i]) {
413 fptag |= 3;
414 } else {
415 /* the FPU automatically computes it */
416 }
417 }
418 fp->fptag = fptag;
419 j = env->fpstt;
420 for(i = 0;i < 8; i++) {
421 memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
422 j = (j + 1) & 7;
423 }
424 asm volatile ("frstor %0" : "=m" (*fp));
425}
5fafdf24 426
9df217a3
FB
427static void save_native_fp_fsave(CPUState *env)
428{
429 int fptag, i, j;
430 uint16_t fpuc;
431 struct fpstate fp1, *fp = &fp1;
432
433 asm volatile ("fsave %0" : : "m" (*fp));
434 env->fpuc = fp->fpuc;
435 env->fpstt = (fp->fpus >> 11) & 7;
436 env->fpus = fp->fpus & ~0x3800;
437 fptag = fp->fptag;
438 for(i = 0;i < 8; i++) {
439 env->fptags[i] = ((fptag & 3) == 3);
440 fptag >>= 2;
441 }
442 j = env->fpstt;
443 for(i = 0;i < 8; i++) {
444 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
445 j = (j + 1) & 7;
446 }
447 /* we must restore the default rounding state */
448 fpuc = 0x037f | (env->fpuc & (3 << 10));
449 asm volatile("fldcw %0" : : "m" (fpuc));
450}
451
452static void restore_native_fp_fxrstor(CPUState *env)
453{
454 struct fpxstate *fp = &fpx1;
455 int i, j, fptag;
456
457 fp->fpuc = env->fpuc;
458 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
459 fptag = 0;
460 for(i = 0; i < 8; i++)
461 fptag |= (env->fptags[i] << i);
462 fp->fptag = fptag ^ 0xff;
463
464 j = env->fpstt;
465 for(i = 0;i < 8; i++) {
466 memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
467 j = (j + 1) & 7;
468 }
469 if (env->cpuid_features & CPUID_SSE) {
470 fp->mxcsr = env->mxcsr;
471 /* XXX: check if DAZ is not available */
472 fp->mxcsr_mask = 0xffff;
c28e951f 473 memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
474 }
475 asm volatile ("fxrstor %0" : "=m" (*fp));
476}
477
478static void save_native_fp_fxsave(CPUState *env)
479{
480 struct fpxstate *fp = &fpx1;
481 int fptag, i, j;
482 uint16_t fpuc;
483
484 asm volatile ("fxsave %0" : : "m" (*fp));
485 env->fpuc = fp->fpuc;
486 env->fpstt = (fp->fpus >> 11) & 7;
487 env->fpus = fp->fpus & ~0x3800;
488 fptag = fp->fptag ^ 0xff;
489 for(i = 0;i < 8; i++) {
490 env->fptags[i] = (fptag >> i) & 1;
491 }
492 j = env->fpstt;
493 for(i = 0;i < 8; i++) {
494 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
495 j = (j + 1) & 7;
496 }
497 if (env->cpuid_features & CPUID_SSE) {
498 env->mxcsr = fp->mxcsr;
c28e951f 499 memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
500 }
501
502 /* we must restore the default rounding state */
503 asm volatile ("fninit");
504 fpuc = 0x037f | (env->fpuc & (3 << 10));
505 asm volatile("fldcw %0" : : "m" (fpuc));
506}
507
c28e951f
FB
508static int do_syscall(CPUState *env,
509 struct kqemu_cpu_state *kenv)
510{
511 int selector;
3b46e624 512
c28e951f 513 selector = (env->star >> 32) & 0xffff;
da260249 514#ifdef TARGET_X86_64
c28e951f 515 if (env->hflags & HF_LMA_MASK) {
93eac243
FB
516 int code64;
517
c28e951f
FB
518 env->regs[R_ECX] = kenv->next_eip;
519 env->regs[11] = env->eflags;
520
93eac243
FB
521 code64 = env->hflags & HF_CS64_MASK;
522
c28e951f 523 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
524 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
525 0, 0xffffffff,
c4e27dd4 526 DESC_G_MASK | DESC_P_MASK |
c28e951f
FB
527 DESC_S_MASK |
528 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
5fafdf24 529 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
530 0, 0xffffffff,
531 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
532 DESC_S_MASK |
533 DESC_W_MASK | DESC_A_MASK);
534 env->eflags &= ~env->fmask;
93eac243 535 if (code64)
c28e951f
FB
536 env->eip = env->lstar;
537 else
538 env->eip = env->cstar;
5fafdf24 539 } else
c28e951f
FB
540#endif
541 {
542 env->regs[R_ECX] = (uint32_t)kenv->next_eip;
3b46e624 543
c28e951f 544 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
545 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
546 0, 0xffffffff,
c28e951f
FB
547 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
548 DESC_S_MASK |
549 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
5fafdf24 550 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
551 0, 0xffffffff,
552 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
553 DESC_S_MASK |
554 DESC_W_MASK | DESC_A_MASK);
555 env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
556 env->eip = (uint32_t)env->star;
557 }
558 return 2;
559}
560
f32fc648 561#ifdef CONFIG_PROFILER
aa062973
FB
562
563#define PC_REC_SIZE 1
564#define PC_REC_HASH_BITS 16
565#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
566
567typedef struct PCRecord {
568 unsigned long pc;
569 int64_t count;
570 struct PCRecord *next;
571} PCRecord;
572
f32fc648
FB
573static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
574static int nb_pc_records;
aa062973 575
f32fc648 576static void kqemu_record_pc(unsigned long pc)
aa062973
FB
577{
578 unsigned long h;
579 PCRecord **pr, *r;
580
581 h = pc / PC_REC_SIZE;
582 h = h ^ (h >> PC_REC_HASH_BITS);
583 h &= (PC_REC_HASH_SIZE - 1);
584 pr = &pc_rec_hash[h];
585 for(;;) {
586 r = *pr;
587 if (r == NULL)
588 break;
589 if (r->pc == pc) {
590 r->count++;
591 return;
592 }
593 pr = &r->next;
594 }
595 r = malloc(sizeof(PCRecord));
596 r->count = 1;
597 r->pc = pc;
598 r->next = NULL;
599 *pr = r;
600 nb_pc_records++;
601}
602
f32fc648 603static int pc_rec_cmp(const void *p1, const void *p2)
aa062973
FB
604{
605 PCRecord *r1 = *(PCRecord **)p1;
606 PCRecord *r2 = *(PCRecord **)p2;
607 if (r1->count < r2->count)
608 return 1;
609 else if (r1->count == r2->count)
610 return 0;
611 else
612 return -1;
613}
614
f32fc648
FB
615static void kqemu_record_flush(void)
616{
617 PCRecord *r, *r_next;
618 int h;
619
620 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
621 for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
622 r_next = r->next;
623 free(r);
624 }
625 pc_rec_hash[h] = NULL;
626 }
627 nb_pc_records = 0;
628}
629
aa062973
FB
630void kqemu_record_dump(void)
631{
632 PCRecord **pr, *r;
633 int i, h;
634 FILE *f;
635 int64_t total, sum;
636
637 pr = malloc(sizeof(PCRecord *) * nb_pc_records);
638 i = 0;
639 total = 0;
640 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
641 for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
642 pr[i++] = r;
643 total += r->count;
644 }
645 }
646 qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
3b46e624 647
aa062973
FB
648 f = fopen("/tmp/kqemu.stats", "w");
649 if (!f) {
650 perror("/tmp/kqemu.stats");
651 exit(1);
652 }
26a76461 653 fprintf(f, "total: %" PRId64 "\n", total);
aa062973
FB
654 sum = 0;
655 for(i = 0; i < nb_pc_records; i++) {
656 r = pr[i];
657 sum += r->count;
5fafdf24
TS
658 fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
659 r->pc,
660 r->count,
aa062973
FB
661 (double)r->count / (double)total * 100.0,
662 (double)sum / (double)total * 100.0);
663 }
664 fclose(f);
665 free(pr);
f32fc648
FB
666
667 kqemu_record_flush();
aa062973
FB
668}
669#endif
670
da260249
FB
671static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
672 const SegmentCache *sc)
673{
674 ksc->selector = sc->selector;
675 ksc->flags = sc->flags;
676 ksc->limit = sc->limit;
677 ksc->base = sc->base;
678}
679
680static inline void kqemu_save_seg(SegmentCache *sc,
681 const struct kqemu_segment_cache *ksc)
682{
683 sc->selector = ksc->selector;
684 sc->flags = ksc->flags;
685 sc->limit = ksc->limit;
686 sc->base = ksc->base;
687}
688
9df217a3
FB
689int kqemu_cpu_exec(CPUState *env)
690{
691 struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
f32fc648
FB
692 int ret, cpl, i;
693#ifdef CONFIG_PROFILER
694 int64_t ti;
695#endif
6e4255f6
FB
696#ifdef _WIN32
697 DWORD temp;
698#endif
9df217a3 699
f32fc648
FB
700#ifdef CONFIG_PROFILER
701 ti = profile_getclock();
702#endif
d12d51d5
AL
703 LOG_INT("kqemu: cpu_exec: enter\n");
704 LOG_INT_STATE(env);
da260249
FB
705 for(i = 0; i < CPU_NB_REGS; i++)
706 kenv->regs[i] = env->regs[i];
9df217a3
FB
707 kenv->eip = env->eip;
708 kenv->eflags = env->eflags;
da260249
FB
709 for(i = 0; i < 6; i++)
710 kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
711 kqemu_load_seg(&kenv->ldt, &env->ldt);
712 kqemu_load_seg(&kenv->tr, &env->tr);
713 kqemu_load_seg(&kenv->gdt, &env->gdt);
714 kqemu_load_seg(&kenv->idt, &env->idt);
9df217a3
FB
715 kenv->cr0 = env->cr[0];
716 kenv->cr2 = env->cr[2];
717 kenv->cr3 = env->cr[3];
718 kenv->cr4 = env->cr[4];
719 kenv->a20_mask = env->a20_mask;
c28e951f 720 kenv->efer = env->efer;
f32fc648
FB
721 kenv->tsc_offset = 0;
722 kenv->star = env->star;
723 kenv->sysenter_cs = env->sysenter_cs;
724 kenv->sysenter_esp = env->sysenter_esp;
725 kenv->sysenter_eip = env->sysenter_eip;
da260249 726#ifdef TARGET_X86_64
f32fc648
FB
727 kenv->lstar = env->lstar;
728 kenv->cstar = env->cstar;
729 kenv->fmask = env->fmask;
730 kenv->kernelgsbase = env->kernelgsbase;
c28e951f 731#endif
9df217a3
FB
732 if (env->dr[7] & 0xff) {
733 kenv->dr7 = env->dr[7];
734 kenv->dr0 = env->dr[0];
735 kenv->dr1 = env->dr[1];
736 kenv->dr2 = env->dr[2];
737 kenv->dr3 = env->dr[3];
738 } else {
739 kenv->dr7 = 0;
740 }
741 kenv->dr6 = env->dr[6];
f32fc648
FB
742 cpl = (env->hflags & HF_CPL_MASK);
743 kenv->cpl = cpl;
9df217a3 744 kenv->nb_pages_to_flush = nb_pages_to_flush;
f32fc648 745 kenv->user_only = (env->kqemu_enabled == 1);
aa062973 746 kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
aa062973 747 nb_ram_pages_to_update = 0;
f32fc648 748 kenv->nb_modified_ram_pages = nb_modified_ram_pages;
da260249 749
f32fc648
FB
750 kqemu_reset_modified_ram_pages();
751
752 if (env->cpuid_features & CPUID_FXSR)
753 restore_native_fp_fxrstor(env);
754 else
755 restore_native_fp_frstor(env);
9df217a3 756
6e4255f6 757#ifdef _WIN32
a332e112
FB
758 if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
759 kenv, sizeof(struct kqemu_cpu_state),
760 kenv, sizeof(struct kqemu_cpu_state),
761 &temp, NULL)) {
762 ret = kenv->retval;
763 } else {
764 ret = -1;
765 }
6e4255f6 766#else
6e4255f6
FB
767 ioctl(kqemu_fd, KQEMU_EXEC, kenv);
768 ret = kenv->retval;
6e4255f6 769#endif
f32fc648
FB
770 if (env->cpuid_features & CPUID_FXSR)
771 save_native_fp_fxsave(env);
772 else
773 save_native_fp_fsave(env);
9df217a3 774
da260249
FB
775 for(i = 0; i < CPU_NB_REGS; i++)
776 env->regs[i] = kenv->regs[i];
9df217a3
FB
777 env->eip = kenv->eip;
778 env->eflags = kenv->eflags;
da260249
FB
779 for(i = 0; i < 6; i++)
780 kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
f32fc648 781 cpu_x86_set_cpl(env, kenv->cpl);
da260249 782 kqemu_save_seg(&env->ldt, &kenv->ldt);
f32fc648
FB
783 env->cr[0] = kenv->cr0;
784 env->cr[4] = kenv->cr4;
785 env->cr[3] = kenv->cr3;
9df217a3
FB
786 env->cr[2] = kenv->cr2;
787 env->dr[6] = kenv->dr6;
da260249 788#ifdef TARGET_X86_64
f32fc648 789 env->kernelgsbase = kenv->kernelgsbase;
f32fc648
FB
790#endif
791
792 /* flush pages as indicated by kqemu */
793 if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
794 tlb_flush(env, 1);
795 } else {
796 for(i = 0; i < kenv->nb_pages_to_flush; i++) {
797 tlb_flush_page(env, pages_to_flush[i]);
798 }
799 }
800 nb_pages_to_flush = 0;
801
802#ifdef CONFIG_PROFILER
803 kqemu_time += profile_getclock() - ti;
804 kqemu_exec_count++;
805#endif
9df217a3 806
aa062973
FB
807 if (kenv->nb_ram_pages_to_update > 0) {
808 cpu_tlb_update_dirty(env);
809 }
aa062973 810
f32fc648
FB
811 if (kenv->nb_modified_ram_pages > 0) {
812 for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
813 unsigned long addr;
814 addr = modified_ram_pages[i];
815 tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
816 }
817 }
f32fc648 818
aa062973
FB
819 /* restore the hidden flags */
820 {
821 unsigned int new_hflags;
822#ifdef TARGET_X86_64
5fafdf24 823 if ((env->hflags & HF_LMA_MASK) &&
aa062973
FB
824 (env->segs[R_CS].flags & DESC_L_MASK)) {
825 /* long mode */
826 new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
827 } else
828#endif
829 {
830 /* legacy / compatibility case */
831 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
832 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
833 new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
834 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
5fafdf24 835 if (!(env->cr[0] & CR0_PE_MASK) ||
aa062973
FB
836 (env->eflags & VM_MASK) ||
837 !(env->hflags & HF_CS32_MASK)) {
838 /* XXX: try to avoid this test. The problem comes from the
839 fact that is real mode or vm86 mode we only modify the
840 'base' and 'selector' fields of the segment cache to go
841 faster. A solution may be to force addseg to one in
842 translate-i386.c. */
843 new_hflags |= HF_ADDSEG_MASK;
844 } else {
5fafdf24 845 new_hflags |= ((env->segs[R_DS].base |
aa062973 846 env->segs[R_ES].base |
5fafdf24 847 env->segs[R_SS].base) != 0) <<
aa062973
FB
848 HF_ADDSEG_SHIFT;
849 }
850 }
5fafdf24 851 env->hflags = (env->hflags &
aa062973
FB
852 ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
853 new_hflags;
854 }
f32fc648
FB
855 /* update FPU flags */
856 env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
857 ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
858 if (env->cr[4] & CR4_OSFXSR_MASK)
859 env->hflags |= HF_OSFXSR_MASK;
860 else
861 env->hflags &= ~HF_OSFXSR_MASK;
3b46e624 862
d12d51d5 863 LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
c28e951f
FB
864 if (ret == KQEMU_RET_SYSCALL) {
865 /* syscall instruction */
866 return do_syscall(env, kenv);
5fafdf24 867 } else
9df217a3
FB
868 if ((ret & 0xff00) == KQEMU_RET_INT) {
869 env->exception_index = ret & 0xff;
870 env->error_code = 0;
871 env->exception_is_int = 1;
872 env->exception_next_eip = kenv->next_eip;
f32fc648
FB
873#ifdef CONFIG_PROFILER
874 kqemu_ret_int_count++;
875#endif
d12d51d5
AL
876 LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
877 LOG_INT_STATE(env);
9df217a3
FB
878 return 1;
879 } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
880 env->exception_index = ret & 0xff;
881 env->error_code = kenv->error_code;
882 env->exception_is_int = 0;
883 env->exception_next_eip = 0;
f32fc648
FB
884#ifdef CONFIG_PROFILER
885 kqemu_ret_excp_count++;
886#endif
d12d51d5 887 LOG_INT("kqemu: exception v=%02x e=%04x:\n",
9df217a3 888 env->exception_index, env->error_code);
d12d51d5 889 LOG_INT_STATE(env);
9df217a3
FB
890 return 1;
891 } else if (ret == KQEMU_RET_INTR) {
f32fc648
FB
892#ifdef CONFIG_PROFILER
893 kqemu_ret_intr_count++;
894#endif
d12d51d5 895 LOG_INT_STATE(env);
9df217a3 896 return 0;
5fafdf24 897 } else if (ret == KQEMU_RET_SOFTMMU) {
f32fc648
FB
898#ifdef CONFIG_PROFILER
899 {
900 unsigned long pc = env->eip + env->segs[R_CS].base;
901 kqemu_record_pc(pc);
902 }
aa062973 903#endif
d12d51d5 904 LOG_INT_STATE(env);
9df217a3
FB
905 return 2;
906 } else {
907 cpu_dump_state(env, stderr, fprintf, 0);
908 fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
909 exit(1);
910 }
911 return 0;
912}
913
a332e112
FB
914void kqemu_cpu_interrupt(CPUState *env)
915{
da260249 916#if defined(_WIN32)
5fafdf24 917 /* cancelling the I/O request causes KQEMU to finish executing the
a332e112
FB
918 current block and successfully returning. */
919 CancelIo(kqemu_fd);
920#endif
921}
922
da260249
FB
923/*
924 QEMU paravirtualization interface. The current interface only
925 allows to modify the IF and IOPL flags when running in
926 kqemu.
927
928 At this point it is not very satisfactory. I leave it for reference
929 as it adds little complexity.
930*/
931
932#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
933
934static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
935{
936 return 0;
937}
938
939static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
940{
941 return 0;
942}
943
944static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
945{
946}
947
948static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
949{
950}
951
952static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
953{
954 CPUState *env;
955
956 env = cpu_single_env;
957 if (!env)
958 return 0;
959 return env->eflags & (IF_MASK | IOPL_MASK);
960}
961
962/* Note: after writing to this address, the guest code must make sure
963 it is exiting the current TB. pushf/popf can be used for that
964 purpose. */
965static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
966{
967 CPUState *env;
968
969 env = cpu_single_env;
970 if (!env)
971 return;
972 env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
973 (val & (IF_MASK | IOPL_MASK));
974}
975
976static CPUReadMemoryFunc *qpi_mem_read[3] = {
977 qpi_mem_readb,
978 qpi_mem_readw,
979 qpi_mem_readl,
980};
981
982static CPUWriteMemoryFunc *qpi_mem_write[3] = {
983 qpi_mem_writeb,
984 qpi_mem_writew,
985 qpi_mem_writel,
986};
987
988static void qpi_init(void)
989{
990 kqemu_comm_base = 0xff000000 | 1;
991 qpi_io_memory = cpu_register_io_memory(0,
992 qpi_mem_read,
993 qpi_mem_write, NULL);
994 cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
995 0x1000, qpi_io_memory);
996}
9df217a3 997#endif