]> git.proxmox.com Git - qemu.git/blame - kqemu.c
Implement dynamic guest ram allocation.
[qemu.git] / kqemu.c
CommitLineData
9df217a3
FB
1/*
2 * KQEMU support
5fafdf24 3 *
da260249 4 * Copyright (c) 2005-2008 Fabrice Bellard
9df217a3
FB
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
fad6cb1a 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
9df217a3
FB
19 */
20#include "config.h"
21#ifdef _WIN32
22#include <windows.h>
6e4255f6 23#include <winioctl.h>
9df217a3
FB
24#else
25#include <sys/types.h>
26#include <sys/mman.h>
6e4255f6 27#include <sys/ioctl.h>
9df217a3 28#endif
605686cd 29#ifdef HOST_SOLARIS
aafd8139 30#include <sys/ioccom.h>
605686cd 31#endif
9df217a3
FB
32#include <stdlib.h>
33#include <stdio.h>
34#include <stdarg.h>
35#include <string.h>
36#include <errno.h>
37#include <unistd.h>
38#include <inttypes.h>
39
40#include "cpu.h"
41#include "exec-all.h"
ca10f867 42#include "qemu-common.h"
9df217a3
FB
43
44#ifdef USE_KQEMU
45
46#define DEBUG
aa062973 47//#define PROFILE
9df217a3 48
d12d51d5
AL
49
50#ifdef DEBUG
93fcfe39
AL
51# define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
52# define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
d12d51d5
AL
53#else
54# define LOG_INT(...) do { } while (0)
55# define LOG_INT_STATE(env) do { } while (0)
56#endif
57
9df217a3
FB
58#include <unistd.h>
59#include <fcntl.h>
b88a3832 60#include "kqemu.h"
9df217a3 61
6e4255f6
FB
62#ifdef _WIN32
63#define KQEMU_DEVICE "\\\\.\\kqemu"
64#else
9df217a3 65#define KQEMU_DEVICE "/dev/kqemu"
6e4255f6
FB
66#endif
67
da260249
FB
68static void qpi_init(void);
69
6e4255f6
FB
70#ifdef _WIN32
71#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
72HANDLE kqemu_fd = KQEMU_INVALID_FD;
73#define kqemu_closefd(x) CloseHandle(x)
74#else
75#define KQEMU_INVALID_FD -1
76int kqemu_fd = KQEMU_INVALID_FD;
77#define kqemu_closefd(x) close(x)
78#endif
9df217a3 79
f32fc648
FB
80/* 0 = not allowed
81 1 = user kqemu
82 2 = kernel kqemu
83*/
9df217a3 84int kqemu_allowed = 1;
da260249 85uint64_t *pages_to_flush;
9df217a3 86unsigned int nb_pages_to_flush;
da260249 87uint64_t *ram_pages_to_update;
aa062973 88unsigned int nb_ram_pages_to_update;
da260249 89uint64_t *modified_ram_pages;
f32fc648
FB
90unsigned int nb_modified_ram_pages;
91uint8_t *modified_ram_pages_table;
da260249
FB
92int qpi_io_memory;
93uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
94a6b54f
PB
94ram_addr_t kqemu_phys_ram_size;
95uint8_t *kqemu_phys_ram_base;
9df217a3
FB
96
97#define cpuid(index, eax, ebx, ecx, edx) \
98 asm volatile ("cpuid" \
99 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
100 : "0" (index))
101
c28e951f
FB
102#ifdef __x86_64__
103static int is_cpuid_supported(void)
104{
105 return 1;
106}
107#else
9df217a3
FB
108static int is_cpuid_supported(void)
109{
110 int v0, v1;
111 asm volatile ("pushf\n"
112 "popl %0\n"
113 "movl %0, %1\n"
114 "xorl $0x00200000, %0\n"
115 "pushl %0\n"
116 "popf\n"
117 "pushf\n"
118 "popl %0\n"
119 : "=a" (v0), "=d" (v1)
120 :
121 : "cc");
122 return (v0 != v1);
123}
c28e951f 124#endif
9df217a3
FB
125
126static void kqemu_update_cpuid(CPUState *env)
127{
0de6bb73 128 int critical_features_mask, features, ext_features, ext_features_mask;
9df217a3
FB
129 uint32_t eax, ebx, ecx, edx;
130
131 /* the following features are kept identical on the host and
132 target cpus because they are important for user code. Strictly
133 speaking, only SSE really matters because the OS must support
134 it if the user code uses it. */
5fafdf24
TS
135 critical_features_mask =
136 CPUID_CMOV | CPUID_CX8 |
137 CPUID_FXSR | CPUID_MMX | CPUID_SSE |
ca0d1734 138 CPUID_SSE2 | CPUID_SEP;
0de6bb73 139 ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
9df217a3
FB
140 if (!is_cpuid_supported()) {
141 features = 0;
0de6bb73 142 ext_features = 0;
9df217a3
FB
143 } else {
144 cpuid(1, eax, ebx, ecx, edx);
145 features = edx;
0de6bb73 146 ext_features = ecx;
9df217a3 147 }
ca0d1734
FB
148#ifdef __x86_64__
149 /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
150 compatibility mode, so in order to have the best performances
151 it is better not to use it */
152 features &= ~CPUID_SEP;
153#endif
9df217a3
FB
154 env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
155 (features & critical_features_mask);
0de6bb73
FB
156 env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
157 (ext_features & ext_features_mask);
9df217a3
FB
158 /* XXX: we could update more of the target CPUID state so that the
159 non accelerated code sees exactly the same CPU features as the
160 accelerated code */
161}
162
163int kqemu_init(CPUState *env)
164{
da260249 165 struct kqemu_init kinit;
9df217a3 166 int ret, version;
6e4255f6
FB
167#ifdef _WIN32
168 DWORD temp;
169#endif
9df217a3
FB
170
171 if (!kqemu_allowed)
172 return -1;
173
6e4255f6
FB
174#ifdef _WIN32
175 kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
176 FILE_SHARE_READ | FILE_SHARE_WRITE,
177 NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
178 NULL);
7fb2a862 179 if (kqemu_fd == KQEMU_INVALID_FD) {
180 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
181 KQEMU_DEVICE, GetLastError());
182 return -1;
183 }
6e4255f6 184#else
9df217a3 185 kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
6e4255f6 186 if (kqemu_fd == KQEMU_INVALID_FD) {
99c19686
TS
187 fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
188 KQEMU_DEVICE, strerror(errno));
9df217a3
FB
189 return -1;
190 }
7fb2a862 191#endif
9df217a3 192 version = 0;
6e4255f6
FB
193#ifdef _WIN32
194 DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
195 &version, sizeof(version), &temp, NULL);
196#else
9df217a3 197 ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
6e4255f6 198#endif
9df217a3
FB
199 if (version != KQEMU_VERSION) {
200 fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
201 version, KQEMU_VERSION);
202 goto fail;
203 }
204
5fafdf24 205 pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
da260249 206 sizeof(uint64_t));
9df217a3
FB
207 if (!pages_to_flush)
208 goto fail;
209
5fafdf24 210 ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
da260249 211 sizeof(uint64_t));
aa062973
FB
212 if (!ram_pages_to_update)
213 goto fail;
214
5fafdf24 215 modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
da260249 216 sizeof(uint64_t));
f32fc648
FB
217 if (!modified_ram_pages)
218 goto fail;
94a6b54f
PB
219 modified_ram_pages_table =
220 qemu_mallocz(kqemu_phys_ram_size >> TARGET_PAGE_BITS);
f32fc648
FB
221 if (!modified_ram_pages_table)
222 goto fail;
223
da260249 224 memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
94a6b54f
PB
225 kinit.ram_base = kqemu_phys_ram_base;
226 kinit.ram_size = kqemu_phys_ram_size;
da260249
FB
227 kinit.ram_dirty = phys_ram_dirty;
228 kinit.pages_to_flush = pages_to_flush;
229 kinit.ram_pages_to_update = ram_pages_to_update;
230 kinit.modified_ram_pages = modified_ram_pages;
6e4255f6 231#ifdef _WIN32
da260249 232 ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
6e4255f6
FB
233 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
234#else
da260249 235 ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
6e4255f6 236#endif
9df217a3
FB
237 if (ret < 0) {
238 fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
239 fail:
6e4255f6
FB
240 kqemu_closefd(kqemu_fd);
241 kqemu_fd = KQEMU_INVALID_FD;
9df217a3
FB
242 return -1;
243 }
244 kqemu_update_cpuid(env);
f32fc648 245 env->kqemu_enabled = kqemu_allowed;
9df217a3 246 nb_pages_to_flush = 0;
aa062973 247 nb_ram_pages_to_update = 0;
da260249
FB
248
249 qpi_init();
9df217a3
FB
250 return 0;
251}
252
253void kqemu_flush_page(CPUState *env, target_ulong addr)
254{
d12d51d5 255 LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
9df217a3
FB
256 if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
257 nb_pages_to_flush = KQEMU_FLUSH_ALL;
258 else
259 pages_to_flush[nb_pages_to_flush++] = addr;
260}
261
262void kqemu_flush(CPUState *env, int global)
263{
d12d51d5 264 LOG_INT("kqemu_flush:\n");
9df217a3
FB
265 nb_pages_to_flush = KQEMU_FLUSH_ALL;
266}
267
aa062973
FB
268void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
269{
d12d51d5 270 LOG_INT("kqemu_set_notdirty: addr=%08lx\n",
da260249 271 (unsigned long)ram_addr);
fc8dc060
FB
272 /* we only track transitions to dirty state */
273 if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
274 return;
aa062973
FB
275 if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
276 nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
277 else
278 ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
279}
280
f32fc648
FB
281static void kqemu_reset_modified_ram_pages(void)
282{
283 int i;
284 unsigned long page_index;
3b46e624 285
f32fc648
FB
286 for(i = 0; i < nb_modified_ram_pages; i++) {
287 page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
288 modified_ram_pages_table[page_index] = 0;
289 }
290 nb_modified_ram_pages = 0;
291}
292
293void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
294{
295 unsigned long page_index;
296 int ret;
297#ifdef _WIN32
298 DWORD temp;
299#endif
300
301 page_index = ram_addr >> TARGET_PAGE_BITS;
302 if (!modified_ram_pages_table[page_index]) {
303#if 0
304 printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
305#endif
306 modified_ram_pages_table[page_index] = 1;
307 modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
308 if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
309 /* flush */
310#ifdef _WIN32
5fafdf24
TS
311 ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
312 &nb_modified_ram_pages,
f32fc648
FB
313 sizeof(nb_modified_ram_pages),
314 NULL, 0, &temp, NULL);
315#else
5fafdf24 316 ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
f32fc648
FB
317 &nb_modified_ram_pages);
318#endif
319 kqemu_reset_modified_ram_pages();
320 }
321 }
322}
323
da260249
FB
324void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
325 ram_addr_t phys_offset)
326{
327 struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
328 uint64_t end;
329 int ret, io_index;
330
331 end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
332 start_addr &= TARGET_PAGE_MASK;
333 kphys_mem->phys_addr = start_addr;
334 kphys_mem->size = end - start_addr;
335 kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
336 io_index = phys_offset & ~TARGET_PAGE_MASK;
337 switch(io_index) {
338 case IO_MEM_RAM:
339 kphys_mem->io_index = KQEMU_IO_MEM_RAM;
340 break;
341 case IO_MEM_ROM:
342 kphys_mem->io_index = KQEMU_IO_MEM_ROM;
343 break;
344 default:
345 if (qpi_io_memory == io_index) {
346 kphys_mem->io_index = KQEMU_IO_MEM_COMM;
347 } else {
348 kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
349 }
350 break;
351 }
352#ifdef _WIN32
353 {
354 DWORD temp;
355 ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
356 kphys_mem, sizeof(*kphys_mem),
357 NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
358 }
359#else
360 ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
361#endif
362 if (ret < 0) {
363 fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
364 ret, start_addr,
365 (unsigned long)size, (unsigned long)phys_offset);
366 }
367}
368
9df217a3
FB
369struct fpstate {
370 uint16_t fpuc;
371 uint16_t dummy1;
372 uint16_t fpus;
373 uint16_t dummy2;
374 uint16_t fptag;
375 uint16_t dummy3;
376
377 uint32_t fpip;
378 uint32_t fpcs;
379 uint32_t fpoo;
380 uint32_t fpos;
381 uint8_t fpregs1[8 * 10];
382};
383
384struct fpxstate {
385 uint16_t fpuc;
386 uint16_t fpus;
387 uint16_t fptag;
388 uint16_t fop;
389 uint32_t fpuip;
390 uint16_t cs_sel;
391 uint16_t dummy0;
392 uint32_t fpudp;
393 uint16_t ds_sel;
394 uint16_t dummy1;
395 uint32_t mxcsr;
396 uint32_t mxcsr_mask;
397 uint8_t fpregs1[8 * 16];
c28e951f
FB
398 uint8_t xmm_regs[16 * 16];
399 uint8_t dummy2[96];
9df217a3
FB
400};
401
402static struct fpxstate fpx1 __attribute__((aligned(16)));
403
404static void restore_native_fp_frstor(CPUState *env)
405{
406 int fptag, i, j;
407 struct fpstate fp1, *fp = &fp1;
3b46e624 408
9df217a3
FB
409 fp->fpuc = env->fpuc;
410 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
411 fptag = 0;
412 for (i=7; i>=0; i--) {
413 fptag <<= 2;
414 if (env->fptags[i]) {
415 fptag |= 3;
416 } else {
417 /* the FPU automatically computes it */
418 }
419 }
420 fp->fptag = fptag;
421 j = env->fpstt;
422 for(i = 0;i < 8; i++) {
423 memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
424 j = (j + 1) & 7;
425 }
426 asm volatile ("frstor %0" : "=m" (*fp));
427}
5fafdf24 428
9df217a3
FB
429static void save_native_fp_fsave(CPUState *env)
430{
431 int fptag, i, j;
432 uint16_t fpuc;
433 struct fpstate fp1, *fp = &fp1;
434
435 asm volatile ("fsave %0" : : "m" (*fp));
436 env->fpuc = fp->fpuc;
437 env->fpstt = (fp->fpus >> 11) & 7;
438 env->fpus = fp->fpus & ~0x3800;
439 fptag = fp->fptag;
440 for(i = 0;i < 8; i++) {
441 env->fptags[i] = ((fptag & 3) == 3);
442 fptag >>= 2;
443 }
444 j = env->fpstt;
445 for(i = 0;i < 8; i++) {
446 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
447 j = (j + 1) & 7;
448 }
449 /* we must restore the default rounding state */
450 fpuc = 0x037f | (env->fpuc & (3 << 10));
451 asm volatile("fldcw %0" : : "m" (fpuc));
452}
453
454static void restore_native_fp_fxrstor(CPUState *env)
455{
456 struct fpxstate *fp = &fpx1;
457 int i, j, fptag;
458
459 fp->fpuc = env->fpuc;
460 fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
461 fptag = 0;
462 for(i = 0; i < 8; i++)
463 fptag |= (env->fptags[i] << i);
464 fp->fptag = fptag ^ 0xff;
465
466 j = env->fpstt;
467 for(i = 0;i < 8; i++) {
468 memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
469 j = (j + 1) & 7;
470 }
471 if (env->cpuid_features & CPUID_SSE) {
472 fp->mxcsr = env->mxcsr;
473 /* XXX: check if DAZ is not available */
474 fp->mxcsr_mask = 0xffff;
c28e951f 475 memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
476 }
477 asm volatile ("fxrstor %0" : "=m" (*fp));
478}
479
480static void save_native_fp_fxsave(CPUState *env)
481{
482 struct fpxstate *fp = &fpx1;
483 int fptag, i, j;
484 uint16_t fpuc;
485
486 asm volatile ("fxsave %0" : : "m" (*fp));
487 env->fpuc = fp->fpuc;
488 env->fpstt = (fp->fpus >> 11) & 7;
489 env->fpus = fp->fpus & ~0x3800;
490 fptag = fp->fptag ^ 0xff;
491 for(i = 0;i < 8; i++) {
492 env->fptags[i] = (fptag >> i) & 1;
493 }
494 j = env->fpstt;
495 for(i = 0;i < 8; i++) {
496 memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
497 j = (j + 1) & 7;
498 }
499 if (env->cpuid_features & CPUID_SSE) {
500 env->mxcsr = fp->mxcsr;
c28e951f 501 memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
9df217a3
FB
502 }
503
504 /* we must restore the default rounding state */
505 asm volatile ("fninit");
506 fpuc = 0x037f | (env->fpuc & (3 << 10));
507 asm volatile("fldcw %0" : : "m" (fpuc));
508}
509
c28e951f
FB
510static int do_syscall(CPUState *env,
511 struct kqemu_cpu_state *kenv)
512{
513 int selector;
3b46e624 514
c28e951f 515 selector = (env->star >> 32) & 0xffff;
da260249 516#ifdef TARGET_X86_64
c28e951f 517 if (env->hflags & HF_LMA_MASK) {
93eac243
FB
518 int code64;
519
c28e951f
FB
520 env->regs[R_ECX] = kenv->next_eip;
521 env->regs[11] = env->eflags;
522
93eac243
FB
523 code64 = env->hflags & HF_CS64_MASK;
524
c28e951f 525 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
526 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
527 0, 0xffffffff,
c4e27dd4 528 DESC_G_MASK | DESC_P_MASK |
c28e951f
FB
529 DESC_S_MASK |
530 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
5fafdf24 531 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
532 0, 0xffffffff,
533 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
534 DESC_S_MASK |
535 DESC_W_MASK | DESC_A_MASK);
536 env->eflags &= ~env->fmask;
93eac243 537 if (code64)
c28e951f
FB
538 env->eip = env->lstar;
539 else
540 env->eip = env->cstar;
5fafdf24 541 } else
c28e951f
FB
542#endif
543 {
544 env->regs[R_ECX] = (uint32_t)kenv->next_eip;
3b46e624 545
c28e951f 546 cpu_x86_set_cpl(env, 0);
5fafdf24
TS
547 cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
548 0, 0xffffffff,
c28e951f
FB
549 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
550 DESC_S_MASK |
551 DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
5fafdf24 552 cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
c28e951f
FB
553 0, 0xffffffff,
554 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
555 DESC_S_MASK |
556 DESC_W_MASK | DESC_A_MASK);
557 env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
558 env->eip = (uint32_t)env->star;
559 }
560 return 2;
561}
562
f32fc648 563#ifdef CONFIG_PROFILER
aa062973
FB
564
565#define PC_REC_SIZE 1
566#define PC_REC_HASH_BITS 16
567#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
568
569typedef struct PCRecord {
570 unsigned long pc;
571 int64_t count;
572 struct PCRecord *next;
573} PCRecord;
574
f32fc648
FB
575static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
576static int nb_pc_records;
aa062973 577
f32fc648 578static void kqemu_record_pc(unsigned long pc)
aa062973
FB
579{
580 unsigned long h;
581 PCRecord **pr, *r;
582
583 h = pc / PC_REC_SIZE;
584 h = h ^ (h >> PC_REC_HASH_BITS);
585 h &= (PC_REC_HASH_SIZE - 1);
586 pr = &pc_rec_hash[h];
587 for(;;) {
588 r = *pr;
589 if (r == NULL)
590 break;
591 if (r->pc == pc) {
592 r->count++;
593 return;
594 }
595 pr = &r->next;
596 }
597 r = malloc(sizeof(PCRecord));
598 r->count = 1;
599 r->pc = pc;
600 r->next = NULL;
601 *pr = r;
602 nb_pc_records++;
603}
604
f32fc648 605static int pc_rec_cmp(const void *p1, const void *p2)
aa062973
FB
606{
607 PCRecord *r1 = *(PCRecord **)p1;
608 PCRecord *r2 = *(PCRecord **)p2;
609 if (r1->count < r2->count)
610 return 1;
611 else if (r1->count == r2->count)
612 return 0;
613 else
614 return -1;
615}
616
f32fc648
FB
617static void kqemu_record_flush(void)
618{
619 PCRecord *r, *r_next;
620 int h;
621
622 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
623 for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
624 r_next = r->next;
625 free(r);
626 }
627 pc_rec_hash[h] = NULL;
628 }
629 nb_pc_records = 0;
630}
631
aa062973
FB
632void kqemu_record_dump(void)
633{
634 PCRecord **pr, *r;
635 int i, h;
636 FILE *f;
637 int64_t total, sum;
638
639 pr = malloc(sizeof(PCRecord *) * nb_pc_records);
640 i = 0;
641 total = 0;
642 for(h = 0; h < PC_REC_HASH_SIZE; h++) {
643 for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
644 pr[i++] = r;
645 total += r->count;
646 }
647 }
648 qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
3b46e624 649
aa062973
FB
650 f = fopen("/tmp/kqemu.stats", "w");
651 if (!f) {
652 perror("/tmp/kqemu.stats");
653 exit(1);
654 }
26a76461 655 fprintf(f, "total: %" PRId64 "\n", total);
aa062973
FB
656 sum = 0;
657 for(i = 0; i < nb_pc_records; i++) {
658 r = pr[i];
659 sum += r->count;
5fafdf24
TS
660 fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
661 r->pc,
662 r->count,
aa062973
FB
663 (double)r->count / (double)total * 100.0,
664 (double)sum / (double)total * 100.0);
665 }
666 fclose(f);
667 free(pr);
f32fc648
FB
668
669 kqemu_record_flush();
aa062973
FB
670}
671#endif
672
da260249
FB
673static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
674 const SegmentCache *sc)
675{
676 ksc->selector = sc->selector;
677 ksc->flags = sc->flags;
678 ksc->limit = sc->limit;
679 ksc->base = sc->base;
680}
681
682static inline void kqemu_save_seg(SegmentCache *sc,
683 const struct kqemu_segment_cache *ksc)
684{
685 sc->selector = ksc->selector;
686 sc->flags = ksc->flags;
687 sc->limit = ksc->limit;
688 sc->base = ksc->base;
689}
690
9df217a3
FB
691int kqemu_cpu_exec(CPUState *env)
692{
693 struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
f32fc648
FB
694 int ret, cpl, i;
695#ifdef CONFIG_PROFILER
696 int64_t ti;
697#endif
6e4255f6
FB
698#ifdef _WIN32
699 DWORD temp;
700#endif
9df217a3 701
f32fc648
FB
702#ifdef CONFIG_PROFILER
703 ti = profile_getclock();
704#endif
d12d51d5
AL
705 LOG_INT("kqemu: cpu_exec: enter\n");
706 LOG_INT_STATE(env);
da260249
FB
707 for(i = 0; i < CPU_NB_REGS; i++)
708 kenv->regs[i] = env->regs[i];
9df217a3
FB
709 kenv->eip = env->eip;
710 kenv->eflags = env->eflags;
da260249
FB
711 for(i = 0; i < 6; i++)
712 kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
713 kqemu_load_seg(&kenv->ldt, &env->ldt);
714 kqemu_load_seg(&kenv->tr, &env->tr);
715 kqemu_load_seg(&kenv->gdt, &env->gdt);
716 kqemu_load_seg(&kenv->idt, &env->idt);
9df217a3
FB
717 kenv->cr0 = env->cr[0];
718 kenv->cr2 = env->cr[2];
719 kenv->cr3 = env->cr[3];
720 kenv->cr4 = env->cr[4];
721 kenv->a20_mask = env->a20_mask;
c28e951f 722 kenv->efer = env->efer;
f32fc648
FB
723 kenv->tsc_offset = 0;
724 kenv->star = env->star;
725 kenv->sysenter_cs = env->sysenter_cs;
726 kenv->sysenter_esp = env->sysenter_esp;
727 kenv->sysenter_eip = env->sysenter_eip;
da260249 728#ifdef TARGET_X86_64
f32fc648
FB
729 kenv->lstar = env->lstar;
730 kenv->cstar = env->cstar;
731 kenv->fmask = env->fmask;
732 kenv->kernelgsbase = env->kernelgsbase;
c28e951f 733#endif
9df217a3
FB
734 if (env->dr[7] & 0xff) {
735 kenv->dr7 = env->dr[7];
736 kenv->dr0 = env->dr[0];
737 kenv->dr1 = env->dr[1];
738 kenv->dr2 = env->dr[2];
739 kenv->dr3 = env->dr[3];
740 } else {
741 kenv->dr7 = 0;
742 }
743 kenv->dr6 = env->dr[6];
f32fc648
FB
744 cpl = (env->hflags & HF_CPL_MASK);
745 kenv->cpl = cpl;
9df217a3 746 kenv->nb_pages_to_flush = nb_pages_to_flush;
f32fc648 747 kenv->user_only = (env->kqemu_enabled == 1);
aa062973 748 kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
aa062973 749 nb_ram_pages_to_update = 0;
f32fc648 750 kenv->nb_modified_ram_pages = nb_modified_ram_pages;
da260249 751
f32fc648
FB
752 kqemu_reset_modified_ram_pages();
753
754 if (env->cpuid_features & CPUID_FXSR)
755 restore_native_fp_fxrstor(env);
756 else
757 restore_native_fp_frstor(env);
9df217a3 758
6e4255f6 759#ifdef _WIN32
a332e112
FB
760 if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
761 kenv, sizeof(struct kqemu_cpu_state),
762 kenv, sizeof(struct kqemu_cpu_state),
763 &temp, NULL)) {
764 ret = kenv->retval;
765 } else {
766 ret = -1;
767 }
6e4255f6 768#else
6e4255f6
FB
769 ioctl(kqemu_fd, KQEMU_EXEC, kenv);
770 ret = kenv->retval;
6e4255f6 771#endif
f32fc648
FB
772 if (env->cpuid_features & CPUID_FXSR)
773 save_native_fp_fxsave(env);
774 else
775 save_native_fp_fsave(env);
9df217a3 776
da260249
FB
777 for(i = 0; i < CPU_NB_REGS; i++)
778 env->regs[i] = kenv->regs[i];
9df217a3
FB
779 env->eip = kenv->eip;
780 env->eflags = kenv->eflags;
da260249
FB
781 for(i = 0; i < 6; i++)
782 kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
f32fc648 783 cpu_x86_set_cpl(env, kenv->cpl);
da260249 784 kqemu_save_seg(&env->ldt, &kenv->ldt);
f32fc648
FB
785 env->cr[0] = kenv->cr0;
786 env->cr[4] = kenv->cr4;
787 env->cr[3] = kenv->cr3;
9df217a3
FB
788 env->cr[2] = kenv->cr2;
789 env->dr[6] = kenv->dr6;
da260249 790#ifdef TARGET_X86_64
f32fc648 791 env->kernelgsbase = kenv->kernelgsbase;
f32fc648
FB
792#endif
793
794 /* flush pages as indicated by kqemu */
795 if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
796 tlb_flush(env, 1);
797 } else {
798 for(i = 0; i < kenv->nb_pages_to_flush; i++) {
799 tlb_flush_page(env, pages_to_flush[i]);
800 }
801 }
802 nb_pages_to_flush = 0;
803
804#ifdef CONFIG_PROFILER
805 kqemu_time += profile_getclock() - ti;
806 kqemu_exec_count++;
807#endif
9df217a3 808
aa062973
FB
809 if (kenv->nb_ram_pages_to_update > 0) {
810 cpu_tlb_update_dirty(env);
811 }
aa062973 812
f32fc648
FB
813 if (kenv->nb_modified_ram_pages > 0) {
814 for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
815 unsigned long addr;
816 addr = modified_ram_pages[i];
817 tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
818 }
819 }
f32fc648 820
aa062973
FB
821 /* restore the hidden flags */
822 {
823 unsigned int new_hflags;
824#ifdef TARGET_X86_64
5fafdf24 825 if ((env->hflags & HF_LMA_MASK) &&
aa062973
FB
826 (env->segs[R_CS].flags & DESC_L_MASK)) {
827 /* long mode */
828 new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
829 } else
830#endif
831 {
832 /* legacy / compatibility case */
833 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
834 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
835 new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
836 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
5fafdf24 837 if (!(env->cr[0] & CR0_PE_MASK) ||
aa062973
FB
838 (env->eflags & VM_MASK) ||
839 !(env->hflags & HF_CS32_MASK)) {
840 /* XXX: try to avoid this test. The problem comes from the
841 fact that is real mode or vm86 mode we only modify the
842 'base' and 'selector' fields of the segment cache to go
843 faster. A solution may be to force addseg to one in
844 translate-i386.c. */
845 new_hflags |= HF_ADDSEG_MASK;
846 } else {
5fafdf24 847 new_hflags |= ((env->segs[R_DS].base |
aa062973 848 env->segs[R_ES].base |
5fafdf24 849 env->segs[R_SS].base) != 0) <<
aa062973
FB
850 HF_ADDSEG_SHIFT;
851 }
852 }
5fafdf24 853 env->hflags = (env->hflags &
aa062973
FB
854 ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
855 new_hflags;
856 }
f32fc648
FB
857 /* update FPU flags */
858 env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
859 ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
860 if (env->cr[4] & CR4_OSFXSR_MASK)
861 env->hflags |= HF_OSFXSR_MASK;
862 else
863 env->hflags &= ~HF_OSFXSR_MASK;
3b46e624 864
d12d51d5 865 LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
c28e951f
FB
866 if (ret == KQEMU_RET_SYSCALL) {
867 /* syscall instruction */
868 return do_syscall(env, kenv);
5fafdf24 869 } else
9df217a3
FB
870 if ((ret & 0xff00) == KQEMU_RET_INT) {
871 env->exception_index = ret & 0xff;
872 env->error_code = 0;
873 env->exception_is_int = 1;
874 env->exception_next_eip = kenv->next_eip;
f32fc648
FB
875#ifdef CONFIG_PROFILER
876 kqemu_ret_int_count++;
877#endif
d12d51d5
AL
878 LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
879 LOG_INT_STATE(env);
9df217a3
FB
880 return 1;
881 } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
882 env->exception_index = ret & 0xff;
883 env->error_code = kenv->error_code;
884 env->exception_is_int = 0;
885 env->exception_next_eip = 0;
f32fc648
FB
886#ifdef CONFIG_PROFILER
887 kqemu_ret_excp_count++;
888#endif
d12d51d5 889 LOG_INT("kqemu: exception v=%02x e=%04x:\n",
9df217a3 890 env->exception_index, env->error_code);
d12d51d5 891 LOG_INT_STATE(env);
9df217a3
FB
892 return 1;
893 } else if (ret == KQEMU_RET_INTR) {
f32fc648
FB
894#ifdef CONFIG_PROFILER
895 kqemu_ret_intr_count++;
896#endif
d12d51d5 897 LOG_INT_STATE(env);
9df217a3 898 return 0;
5fafdf24 899 } else if (ret == KQEMU_RET_SOFTMMU) {
f32fc648
FB
900#ifdef CONFIG_PROFILER
901 {
902 unsigned long pc = env->eip + env->segs[R_CS].base;
903 kqemu_record_pc(pc);
904 }
aa062973 905#endif
d12d51d5 906 LOG_INT_STATE(env);
9df217a3
FB
907 return 2;
908 } else {
909 cpu_dump_state(env, stderr, fprintf, 0);
910 fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
911 exit(1);
912 }
913 return 0;
914}
915
a332e112
FB
916void kqemu_cpu_interrupt(CPUState *env)
917{
da260249 918#if defined(_WIN32)
5fafdf24 919 /* cancelling the I/O request causes KQEMU to finish executing the
a332e112
FB
920 current block and successfully returning. */
921 CancelIo(kqemu_fd);
922#endif
923}
924
da260249
FB
925/*
926 QEMU paravirtualization interface. The current interface only
927 allows to modify the IF and IOPL flags when running in
928 kqemu.
929
930 At this point it is not very satisfactory. I leave it for reference
931 as it adds little complexity.
932*/
933
934#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
935
936static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
937{
938 return 0;
939}
940
941static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
942{
943 return 0;
944}
945
946static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
947{
948}
949
950static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
951{
952}
953
954static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
955{
956 CPUState *env;
957
958 env = cpu_single_env;
959 if (!env)
960 return 0;
961 return env->eflags & (IF_MASK | IOPL_MASK);
962}
963
964/* Note: after writing to this address, the guest code must make sure
965 it is exiting the current TB. pushf/popf can be used for that
966 purpose. */
967static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
968{
969 CPUState *env;
970
971 env = cpu_single_env;
972 if (!env)
973 return;
974 env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
975 (val & (IF_MASK | IOPL_MASK));
976}
977
978static CPUReadMemoryFunc *qpi_mem_read[3] = {
979 qpi_mem_readb,
980 qpi_mem_readw,
981 qpi_mem_readl,
982};
983
984static CPUWriteMemoryFunc *qpi_mem_write[3] = {
985 qpi_mem_writeb,
986 qpi_mem_writew,
987 qpi_mem_writel,
988};
989
990static void qpi_init(void)
991{
992 kqemu_comm_base = 0xff000000 | 1;
993 qpi_io_memory = cpu_register_io_memory(0,
994 qpi_mem_read,
995 qpi_mem_write, NULL);
996 cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
997 0x1000, qpi_io_memory);
998}
9df217a3 999#endif