]> git.proxmox.com Git - qemu.git/blob - exec.c
suppressed ring 0 hacks
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <string.h>
24 #include <errno.h>
25 #include <unistd.h>
26 #include <inttypes.h>
27 #include <sys/mman.h>
28
29 #include "cpu-i386.h"
30 #include "exec.h"
31
32 //#define DEBUG_TB_INVALIDATE
33 //#define DEBUG_FLUSH
34
35 /* make various TB consistency checks */
36 //#define DEBUG_TB_CHECK
37
38 /* threshold to flush the translated code buffer */
39 #define CODE_GEN_BUFFER_MAX_SIZE (CODE_GEN_BUFFER_SIZE - CODE_GEN_MAX_SIZE)
40
41 #define CODE_GEN_MAX_BLOCKS (CODE_GEN_BUFFER_SIZE / 64)
42
43 TranslationBlock tbs[CODE_GEN_MAX_BLOCKS];
44 TranslationBlock *tb_hash[CODE_GEN_HASH_SIZE];
45 int nb_tbs;
46 /* any access to the tbs or the page table must use this lock */
47 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
48
49 uint8_t code_gen_buffer[CODE_GEN_BUFFER_SIZE];
50 uint8_t *code_gen_ptr;
51
52 /* XXX: pack the flags in the low bits of the pointer ? */
53 typedef struct PageDesc {
54 unsigned long flags;
55 TranslationBlock *first_tb;
56 } PageDesc;
57
58 #define L2_BITS 10
59 #define L1_BITS (32 - L2_BITS - TARGET_PAGE_BITS)
60
61 #define L1_SIZE (1 << L1_BITS)
62 #define L2_SIZE (1 << L2_BITS)
63
64 static void tb_invalidate_page(unsigned long address);
65
66 unsigned long real_host_page_size;
67 unsigned long host_page_bits;
68 unsigned long host_page_size;
69 unsigned long host_page_mask;
70
71 static PageDesc *l1_map[L1_SIZE];
72
73 static void page_init(void)
74 {
75 /* NOTE: we can always suppose that host_page_size >=
76 TARGET_PAGE_SIZE */
77 real_host_page_size = getpagesize();
78 if (host_page_size == 0)
79 host_page_size = real_host_page_size;
80 if (host_page_size < TARGET_PAGE_SIZE)
81 host_page_size = TARGET_PAGE_SIZE;
82 host_page_bits = 0;
83 while ((1 << host_page_bits) < host_page_size)
84 host_page_bits++;
85 host_page_mask = ~(host_page_size - 1);
86 }
87
88 /* dump memory mappings */
89 void page_dump(FILE *f)
90 {
91 unsigned long start, end;
92 int i, j, prot, prot1;
93 PageDesc *p;
94
95 fprintf(f, "%-8s %-8s %-8s %s\n",
96 "start", "end", "size", "prot");
97 start = -1;
98 end = -1;
99 prot = 0;
100 for(i = 0; i <= L1_SIZE; i++) {
101 if (i < L1_SIZE)
102 p = l1_map[i];
103 else
104 p = NULL;
105 for(j = 0;j < L2_SIZE; j++) {
106 if (!p)
107 prot1 = 0;
108 else
109 prot1 = p[j].flags;
110 if (prot1 != prot) {
111 end = (i << (32 - L1_BITS)) | (j << TARGET_PAGE_BITS);
112 if (start != -1) {
113 fprintf(f, "%08lx-%08lx %08lx %c%c%c\n",
114 start, end, end - start,
115 prot & PAGE_READ ? 'r' : '-',
116 prot & PAGE_WRITE ? 'w' : '-',
117 prot & PAGE_EXEC ? 'x' : '-');
118 }
119 if (prot1 != 0)
120 start = end;
121 else
122 start = -1;
123 prot = prot1;
124 }
125 if (!p)
126 break;
127 }
128 }
129 }
130
131 static inline PageDesc *page_find_alloc(unsigned int index)
132 {
133 PageDesc **lp, *p;
134
135 lp = &l1_map[index >> L2_BITS];
136 p = *lp;
137 if (!p) {
138 /* allocate if not found */
139 p = malloc(sizeof(PageDesc) * L2_SIZE);
140 memset(p, 0, sizeof(PageDesc) * L2_SIZE);
141 *lp = p;
142 }
143 return p + (index & (L2_SIZE - 1));
144 }
145
146 static inline PageDesc *page_find(unsigned int index)
147 {
148 PageDesc *p;
149
150 p = l1_map[index >> L2_BITS];
151 if (!p)
152 return 0;
153 return p + (index & (L2_SIZE - 1));
154 }
155
156 int page_get_flags(unsigned long address)
157 {
158 PageDesc *p;
159
160 p = page_find(address >> TARGET_PAGE_BITS);
161 if (!p)
162 return 0;
163 return p->flags;
164 }
165
166 /* modify the flags of a page and invalidate the code if
167 necessary. The flag PAGE_WRITE_ORG is positionned automatically
168 depending on PAGE_WRITE */
169 void page_set_flags(unsigned long start, unsigned long end, int flags)
170 {
171 PageDesc *p;
172 unsigned long addr;
173
174 start = start & TARGET_PAGE_MASK;
175 end = TARGET_PAGE_ALIGN(end);
176 if (flags & PAGE_WRITE)
177 flags |= PAGE_WRITE_ORG;
178 spin_lock(&tb_lock);
179 for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
180 p = page_find_alloc(addr >> TARGET_PAGE_BITS);
181 /* if the write protection is set, then we invalidate the code
182 inside */
183 if (!(p->flags & PAGE_WRITE) &&
184 (flags & PAGE_WRITE) &&
185 p->first_tb) {
186 tb_invalidate_page(addr);
187 }
188 p->flags = flags;
189 }
190 spin_unlock(&tb_lock);
191 }
192
193 void cpu_exec_init(void)
194 {
195 if (!code_gen_ptr) {
196 code_gen_ptr = code_gen_buffer;
197 page_init();
198 }
199 }
200
201 /* set to NULL all the 'first_tb' fields in all PageDescs */
202 static void page_flush_tb(void)
203 {
204 int i, j;
205 PageDesc *p;
206
207 for(i = 0; i < L1_SIZE; i++) {
208 p = l1_map[i];
209 if (p) {
210 for(j = 0; j < L2_SIZE; j++)
211 p[j].first_tb = NULL;
212 }
213 }
214 }
215
216 /* flush all the translation blocks */
217 /* XXX: tb_flush is currently not thread safe */
218 void tb_flush(void)
219 {
220 int i;
221 #ifdef DEBUG_FLUSH
222 printf("qemu: flush code_size=%d nb_tbs=%d avg_tb_size=%d\n",
223 code_gen_ptr - code_gen_buffer,
224 nb_tbs,
225 (code_gen_ptr - code_gen_buffer) / nb_tbs);
226 #endif
227 nb_tbs = 0;
228 for(i = 0;i < CODE_GEN_HASH_SIZE; i++)
229 tb_hash[i] = NULL;
230 page_flush_tb();
231 code_gen_ptr = code_gen_buffer;
232 /* XXX: flush processor icache at this point if cache flush is
233 expensive */
234 }
235
236 #ifdef DEBUG_TB_CHECK
237
238 static void tb_invalidate_check(unsigned long address)
239 {
240 TranslationBlock *tb;
241 int i;
242 address &= TARGET_PAGE_MASK;
243 for(i = 0;i < CODE_GEN_HASH_SIZE; i++) {
244 for(tb = tb_hash[i]; tb != NULL; tb = tb->hash_next) {
245 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
246 address >= tb->pc + tb->size)) {
247 printf("ERROR invalidate: address=%08lx PC=%08lx size=%04x\n",
248 address, tb->pc, tb->size);
249 }
250 }
251 }
252 }
253
254 /* verify that all the pages have correct rights for code */
255 static void tb_page_check(void)
256 {
257 TranslationBlock *tb;
258 int i, flags1, flags2;
259
260 for(i = 0;i < CODE_GEN_HASH_SIZE; i++) {
261 for(tb = tb_hash[i]; tb != NULL; tb = tb->hash_next) {
262 flags1 = page_get_flags(tb->pc);
263 flags2 = page_get_flags(tb->pc + tb->size - 1);
264 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
265 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
266 tb->pc, tb->size, flags1, flags2);
267 }
268 }
269 }
270 }
271
272 void tb_jmp_check(TranslationBlock *tb)
273 {
274 TranslationBlock *tb1;
275 unsigned int n1;
276
277 /* suppress any remaining jumps to this TB */
278 tb1 = tb->jmp_first;
279 for(;;) {
280 n1 = (long)tb1 & 3;
281 tb1 = (TranslationBlock *)((long)tb1 & ~3);
282 if (n1 == 2)
283 break;
284 tb1 = tb1->jmp_next[n1];
285 }
286 /* check end of list */
287 if (tb1 != tb) {
288 printf("ERROR: jmp_list from 0x%08lx\n", (long)tb);
289 }
290 }
291
292 #endif
293
294 /* invalidate one TB */
295 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
296 int next_offset)
297 {
298 TranslationBlock *tb1;
299 for(;;) {
300 tb1 = *ptb;
301 if (tb1 == tb) {
302 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
303 break;
304 }
305 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
306 }
307 }
308
309 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
310 {
311 TranslationBlock *tb1, **ptb;
312 unsigned int n1;
313
314 ptb = &tb->jmp_next[n];
315 tb1 = *ptb;
316 if (tb1) {
317 /* find tb(n) in circular list */
318 for(;;) {
319 tb1 = *ptb;
320 n1 = (long)tb1 & 3;
321 tb1 = (TranslationBlock *)((long)tb1 & ~3);
322 if (n1 == n && tb1 == tb)
323 break;
324 if (n1 == 2) {
325 ptb = &tb1->jmp_first;
326 } else {
327 ptb = &tb1->jmp_next[n1];
328 }
329 }
330 /* now we can suppress tb(n) from the list */
331 *ptb = tb->jmp_next[n];
332
333 tb->jmp_next[n] = NULL;
334 }
335 }
336
337 /* reset the jump entry 'n' of a TB so that it is not chained to
338 another TB */
339 static inline void tb_reset_jump(TranslationBlock *tb, int n)
340 {
341 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
342 }
343
344 static inline void tb_invalidate(TranslationBlock *tb, int parity)
345 {
346 PageDesc *p;
347 unsigned int page_index1, page_index2;
348 unsigned int h, n1;
349 TranslationBlock *tb1, *tb2;
350
351 /* remove the TB from the hash list */
352 h = tb_hash_func(tb->pc);
353 tb_remove(&tb_hash[h], tb,
354 offsetof(TranslationBlock, hash_next));
355 /* remove the TB from the page list */
356 page_index1 = tb->pc >> TARGET_PAGE_BITS;
357 if ((page_index1 & 1) == parity) {
358 p = page_find(page_index1);
359 tb_remove(&p->first_tb, tb,
360 offsetof(TranslationBlock, page_next[page_index1 & 1]));
361 }
362 page_index2 = (tb->pc + tb->size - 1) >> TARGET_PAGE_BITS;
363 if ((page_index2 & 1) == parity) {
364 p = page_find(page_index2);
365 tb_remove(&p->first_tb, tb,
366 offsetof(TranslationBlock, page_next[page_index2 & 1]));
367 }
368
369 /* suppress this TB from the two jump lists */
370 tb_jmp_remove(tb, 0);
371 tb_jmp_remove(tb, 1);
372
373 /* suppress any remaining jumps to this TB */
374 tb1 = tb->jmp_first;
375 for(;;) {
376 n1 = (long)tb1 & 3;
377 if (n1 == 2)
378 break;
379 tb1 = (TranslationBlock *)((long)tb1 & ~3);
380 tb2 = tb1->jmp_next[n1];
381 tb_reset_jump(tb1, n1);
382 tb1->jmp_next[n1] = NULL;
383 tb1 = tb2;
384 }
385 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
386 }
387
388 /* invalidate all TBs which intersect with the target page starting at addr */
389 static void tb_invalidate_page(unsigned long address)
390 {
391 TranslationBlock *tb_next, *tb;
392 unsigned int page_index;
393 int parity1, parity2;
394 PageDesc *p;
395 #ifdef DEBUG_TB_INVALIDATE
396 printf("tb_invalidate_page: %lx\n", address);
397 #endif
398
399 page_index = address >> TARGET_PAGE_BITS;
400 p = page_find(page_index);
401 if (!p)
402 return;
403 tb = p->first_tb;
404 parity1 = page_index & 1;
405 parity2 = parity1 ^ 1;
406 while (tb != NULL) {
407 tb_next = tb->page_next[parity1];
408 tb_invalidate(tb, parity2);
409 tb = tb_next;
410 }
411 p->first_tb = NULL;
412 }
413
414 /* add the tb in the target page and protect it if necessary */
415 static inline void tb_alloc_page(TranslationBlock *tb, unsigned int page_index)
416 {
417 PageDesc *p;
418 unsigned long host_start, host_end, addr, page_addr;
419 int prot;
420
421 p = page_find_alloc(page_index);
422 tb->page_next[page_index & 1] = p->first_tb;
423 p->first_tb = tb;
424 if (p->flags & PAGE_WRITE) {
425 /* force the host page as non writable (writes will have a
426 page fault + mprotect overhead) */
427 page_addr = (page_index << TARGET_PAGE_BITS);
428 host_start = page_addr & host_page_mask;
429 host_end = host_start + host_page_size;
430 prot = 0;
431 for(addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE)
432 prot |= page_get_flags(addr);
433 mprotect((void *)host_start, host_page_size,
434 (prot & PAGE_BITS) & ~PAGE_WRITE);
435 #ifdef DEBUG_TB_INVALIDATE
436 printf("protecting code page: 0x%08lx\n",
437 host_start);
438 #endif
439 p->flags &= ~PAGE_WRITE;
440 #ifdef DEBUG_TB_CHECK
441 tb_page_check();
442 #endif
443 }
444 }
445
446 /* Allocate a new translation block. Flush the translation buffer if
447 too many translation blocks or too much generated code. */
448 TranslationBlock *tb_alloc(unsigned long pc)
449 {
450 TranslationBlock *tb;
451
452 if (nb_tbs >= CODE_GEN_MAX_BLOCKS ||
453 (code_gen_ptr - code_gen_buffer) >= CODE_GEN_BUFFER_MAX_SIZE)
454 return NULL;
455 tb = &tbs[nb_tbs++];
456 tb->pc = pc;
457 return tb;
458 }
459
460 /* link the tb with the other TBs */
461 void tb_link(TranslationBlock *tb)
462 {
463 unsigned int page_index1, page_index2;
464
465 /* add in the page list */
466 page_index1 = tb->pc >> TARGET_PAGE_BITS;
467 tb_alloc_page(tb, page_index1);
468 page_index2 = (tb->pc + tb->size - 1) >> TARGET_PAGE_BITS;
469 if (page_index2 != page_index1) {
470 tb_alloc_page(tb, page_index2);
471 }
472 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
473 tb->jmp_next[0] = NULL;
474 tb->jmp_next[1] = NULL;
475
476 /* init original jump addresses */
477 if (tb->tb_next_offset[0] != 0xffff)
478 tb_reset_jump(tb, 0);
479 if (tb->tb_next_offset[1] != 0xffff)
480 tb_reset_jump(tb, 1);
481 }
482
483 /* called from signal handler: invalidate the code and unprotect the
484 page. Return TRUE if the fault was succesfully handled. */
485 int page_unprotect(unsigned long address)
486 {
487 unsigned int page_index, prot, pindex;
488 PageDesc *p, *p1;
489 unsigned long host_start, host_end, addr;
490
491 host_start = address & host_page_mask;
492 page_index = host_start >> TARGET_PAGE_BITS;
493 p1 = page_find(page_index);
494 if (!p1)
495 return 0;
496 host_end = host_start + host_page_size;
497 p = p1;
498 prot = 0;
499 for(addr = host_start;addr < host_end; addr += TARGET_PAGE_SIZE) {
500 prot |= p->flags;
501 p++;
502 }
503 /* if the page was really writable, then we change its
504 protection back to writable */
505 if (prot & PAGE_WRITE_ORG) {
506 mprotect((void *)host_start, host_page_size,
507 (prot & PAGE_BITS) | PAGE_WRITE);
508 pindex = (address - host_start) >> TARGET_PAGE_BITS;
509 p1[pindex].flags |= PAGE_WRITE;
510 /* and since the content will be modified, we must invalidate
511 the corresponding translated code. */
512 tb_invalidate_page(address);
513 #ifdef DEBUG_TB_CHECK
514 tb_invalidate_check(address);
515 #endif
516 return 1;
517 } else {
518 return 0;
519 }
520 }
521
522 /* call this function when system calls directly modify a memory area */
523 void page_unprotect_range(uint8_t *data, unsigned long data_size)
524 {
525 unsigned long start, end, addr;
526
527 start = (unsigned long)data;
528 end = start + data_size;
529 start &= TARGET_PAGE_MASK;
530 end = TARGET_PAGE_ALIGN(end);
531 for(addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
532 page_unprotect(addr);
533 }
534 }
535
536 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
537 tb[1].tc_ptr. Return NULL if not found */
538 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
539 {
540 int m_min, m_max, m;
541 unsigned long v;
542 TranslationBlock *tb;
543
544 if (nb_tbs <= 0)
545 return NULL;
546 if (tc_ptr < (unsigned long)code_gen_buffer ||
547 tc_ptr >= (unsigned long)code_gen_ptr)
548 return NULL;
549 /* binary search (cf Knuth) */
550 m_min = 0;
551 m_max = nb_tbs - 1;
552 while (m_min <= m_max) {
553 m = (m_min + m_max) >> 1;
554 tb = &tbs[m];
555 v = (unsigned long)tb->tc_ptr;
556 if (v == tc_ptr)
557 return tb;
558 else if (tc_ptr < v) {
559 m_max = m - 1;
560 } else {
561 m_min = m + 1;
562 }
563 }
564 return &tbs[m_max];
565 }
566
567 void cpu_abort(CPUState *env, const char *fmt, ...)
568 {
569 va_list ap;
570
571 va_start(ap, fmt);
572 fprintf(stderr, "qemu: fatal: ");
573 vfprintf(stderr, fmt, ap);
574 fprintf(stderr, "\n");
575 #ifdef TARGET_I386
576 cpu_x86_dump_state(env, stderr, X86_DUMP_FPU | X86_DUMP_CCOP);
577 #endif
578 va_end(ap);
579 abort();
580 }
581
582 #ifdef TARGET_I386
583 /* unmap all maped pages and flush all associated code */
584 void page_unmap(void)
585 {
586 PageDesc *p, *pmap;
587 unsigned long addr;
588 int i, j, ret, j1;
589
590 for(i = 0; i < L1_SIZE; i++) {
591 pmap = l1_map[i];
592 if (pmap) {
593 p = pmap;
594 for(j = 0;j < L2_SIZE;) {
595 if (p->flags & PAGE_VALID) {
596 addr = (i << (32 - L1_BITS)) | (j << TARGET_PAGE_BITS);
597 /* we try to find a range to make less syscalls */
598 j1 = j;
599 p++;
600 j++;
601 while (j < L2_SIZE && (p->flags & PAGE_VALID)) {
602 p++;
603 j++;
604 }
605 ret = munmap((void *)addr, (j - j1) << TARGET_PAGE_BITS);
606 if (ret != 0) {
607 fprintf(stderr, "Could not unmap page 0x%08lx\n", addr);
608 exit(1);
609 }
610 } else {
611 p++;
612 j++;
613 }
614 }
615 free(pmap);
616 l1_map[i] = NULL;
617 }
618 }
619 tb_flush();
620 }
621 #endif