]>
git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/lguest/page_tables.c
1 /* Shadow page table operations.
2 * Copyright (C) Rusty Russell IBM Corporation 2006.
3 * GPL v2 and any later version */
5 #include <linux/types.h>
6 #include <linux/spinlock.h>
7 #include <linux/random.h>
8 #include <linux/percpu.h>
9 #include <asm/tlbflush.h>
12 #define PTES_PER_PAGE_SHIFT 10
13 #define PTES_PER_PAGE (1 << PTES_PER_PAGE_SHIFT)
14 #define SWITCHER_PGD_INDEX (PTES_PER_PAGE - 1)
16 static DEFINE_PER_CPU(spte_t
*, switcher_pte_pages
);
17 #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
19 static unsigned vaddr_to_pgd_index(unsigned long vaddr
)
21 return vaddr
>> (PAGE_SHIFT
+ PTES_PER_PAGE_SHIFT
);
24 /* These access the shadow versions (ie. the ones used by the CPU). */
25 static spgd_t
*spgd_addr(struct lguest
*lg
, u32 i
, unsigned long vaddr
)
27 unsigned int index
= vaddr_to_pgd_index(vaddr
);
29 if (index
>= SWITCHER_PGD_INDEX
) {
30 kill_guest(lg
, "attempt to access switcher pages");
33 return &lg
->pgdirs
[i
].pgdir
[index
];
36 static spte_t
*spte_addr(struct lguest
*lg
, spgd_t spgd
, unsigned long vaddr
)
38 spte_t
*page
= __va(spgd
.pfn
<< PAGE_SHIFT
);
39 BUG_ON(!(spgd
.flags
& _PAGE_PRESENT
));
40 return &page
[(vaddr
>> PAGE_SHIFT
) % PTES_PER_PAGE
];
43 /* These access the guest versions. */
44 static unsigned long gpgd_addr(struct lguest
*lg
, unsigned long vaddr
)
46 unsigned int index
= vaddr
>> (PAGE_SHIFT
+ PTES_PER_PAGE_SHIFT
);
47 return lg
->pgdirs
[lg
->pgdidx
].cr3
+ index
* sizeof(gpgd_t
);
50 static unsigned long gpte_addr(struct lguest
*lg
,
51 gpgd_t gpgd
, unsigned long vaddr
)
53 unsigned long gpage
= gpgd
.pfn
<< PAGE_SHIFT
;
54 BUG_ON(!(gpgd
.flags
& _PAGE_PRESENT
));
55 return gpage
+ ((vaddr
>>PAGE_SHIFT
) % PTES_PER_PAGE
) * sizeof(gpte_t
);
58 /* Do a virtual -> physical mapping on a user page. */
59 static unsigned long get_pfn(unsigned long virtpfn
, int write
)
62 unsigned long ret
= -1UL;
64 down_read(¤t
->mm
->mmap_sem
);
65 if (get_user_pages(current
, current
->mm
, virtpfn
<< PAGE_SHIFT
,
66 1, write
, 1, &page
, NULL
) == 1)
67 ret
= page_to_pfn(page
);
68 up_read(¤t
->mm
->mmap_sem
);
72 static spte_t
gpte_to_spte(struct lguest
*lg
, gpte_t gpte
, int write
)
77 /* We ignore the global flag. */
78 spte
.flags
= (gpte
.flags
& ~_PAGE_GLOBAL
);
79 pfn
= get_pfn(gpte
.pfn
, write
);
81 kill_guest(lg
, "failed to get page %u", gpte
.pfn
);
82 /* Must not put_page() bogus page on cleanup. */
89 static void release_pte(spte_t pte
)
91 if (pte
.flags
& _PAGE_PRESENT
)
92 put_page(pfn_to_page(pte
.pfn
));
95 static void check_gpte(struct lguest
*lg
, gpte_t gpte
)
97 if ((gpte
.flags
& (_PAGE_PWT
|_PAGE_PSE
)) || gpte
.pfn
>= lg
->pfn_limit
)
98 kill_guest(lg
, "bad page table entry");
101 static void check_gpgd(struct lguest
*lg
, gpgd_t gpgd
)
103 if ((gpgd
.flags
& ~_PAGE_TABLE
) || gpgd
.pfn
>= lg
->pfn_limit
)
104 kill_guest(lg
, "bad page directory entry");
107 /* FIXME: We hold reference to pages, which prevents them from being
108 swapped. It'd be nice to have a callback when Linux wants to swap out. */
110 /* We fault pages in, which allows us to update accessed/dirty bits.
111 * Return true if we got page. */
112 int demand_page(struct lguest
*lg
, unsigned long vaddr
, int errcode
)
116 unsigned long gpte_ptr
;
120 gpgd
= mkgpgd(lgread_u32(lg
, gpgd_addr(lg
, vaddr
)));
121 if (!(gpgd
.flags
& _PAGE_PRESENT
))
124 spgd
= spgd_addr(lg
, lg
->pgdidx
, vaddr
);
125 if (!(spgd
->flags
& _PAGE_PRESENT
)) {
126 /* Get a page of PTEs for them. */
127 unsigned long ptepage
= get_zeroed_page(GFP_KERNEL
);
128 /* FIXME: Steal from self in this case? */
130 kill_guest(lg
, "out of memory allocating pte page");
133 check_gpgd(lg
, gpgd
);
134 spgd
->raw
.val
= (__pa(ptepage
) | gpgd
.flags
);
137 gpte_ptr
= gpte_addr(lg
, gpgd
, vaddr
);
138 gpte
= mkgpte(lgread_u32(lg
, gpte_ptr
));
141 if (!(gpte
.flags
& _PAGE_PRESENT
))
144 /* Write to read-only page? */
145 if ((errcode
& 2) && !(gpte
.flags
& _PAGE_RW
))
148 /* User access to a non-user page? */
149 if ((errcode
& 4) && !(gpte
.flags
& _PAGE_USER
))
152 check_gpte(lg
, gpte
);
153 gpte
.flags
|= _PAGE_ACCESSED
;
155 gpte
.flags
|= _PAGE_DIRTY
;
157 /* We're done with the old pte. */
158 spte
= spte_addr(lg
, *spgd
, vaddr
);
161 /* We don't make it writable if this isn't a write: later
162 * write will fault so we can set dirty bit in guest. */
163 if (gpte
.flags
& _PAGE_DIRTY
)
164 *spte
= gpte_to_spte(lg
, gpte
, 1);
166 gpte_t ro_gpte
= gpte
;
167 ro_gpte
.flags
&= ~_PAGE_RW
;
168 *spte
= gpte_to_spte(lg
, ro_gpte
, 0);
171 /* Now we update dirty/accessed on guest. */
172 lgwrite_u32(lg
, gpte_ptr
, gpte
.raw
.val
);
176 /* This is much faster than the full demand_page logic. */
177 static int page_writable(struct lguest
*lg
, unsigned long vaddr
)
182 spgd
= spgd_addr(lg
, lg
->pgdidx
, vaddr
);
183 if (!(spgd
->flags
& _PAGE_PRESENT
))
186 flags
= spte_addr(lg
, *spgd
, vaddr
)->flags
;
187 return (flags
& (_PAGE_PRESENT
|_PAGE_RW
)) == (_PAGE_PRESENT
|_PAGE_RW
);
190 void pin_page(struct lguest
*lg
, unsigned long vaddr
)
192 if (!page_writable(lg
, vaddr
) && !demand_page(lg
, vaddr
, 2))
193 kill_guest(lg
, "bad stack page %#lx", vaddr
);
196 static void release_pgd(struct lguest
*lg
, spgd_t
*spgd
)
198 if (spgd
->flags
& _PAGE_PRESENT
) {
200 spte_t
*ptepage
= __va(spgd
->pfn
<< PAGE_SHIFT
);
201 for (i
= 0; i
< PTES_PER_PAGE
; i
++)
202 release_pte(ptepage
[i
]);
203 free_page((long)ptepage
);
208 static void flush_user_mappings(struct lguest
*lg
, int idx
)
211 for (i
= 0; i
< vaddr_to_pgd_index(lg
->page_offset
); i
++)
212 release_pgd(lg
, lg
->pgdirs
[idx
].pgdir
+ i
);
215 void guest_pagetable_flush_user(struct lguest
*lg
)
217 flush_user_mappings(lg
, lg
->pgdidx
);
220 static unsigned int find_pgdir(struct lguest
*lg
, unsigned long pgtable
)
223 for (i
= 0; i
< ARRAY_SIZE(lg
->pgdirs
); i
++)
224 if (lg
->pgdirs
[i
].cr3
== pgtable
)
229 static unsigned int new_pgdir(struct lguest
*lg
,
235 next
= random32() % ARRAY_SIZE(lg
->pgdirs
);
236 if (!lg
->pgdirs
[next
].pgdir
) {
237 lg
->pgdirs
[next
].pgdir
= (spgd_t
*)get_zeroed_page(GFP_KERNEL
);
238 if (!lg
->pgdirs
[next
].pgdir
)
241 /* There are no mappings: you'll need to re-pin */
244 lg
->pgdirs
[next
].cr3
= cr3
;
245 /* Release all the non-kernel mappings. */
246 flush_user_mappings(lg
, next
);
251 void guest_new_pagetable(struct lguest
*lg
, unsigned long pgtable
)
253 int newpgdir
, repin
= 0;
255 newpgdir
= find_pgdir(lg
, pgtable
);
256 if (newpgdir
== ARRAY_SIZE(lg
->pgdirs
))
257 newpgdir
= new_pgdir(lg
, pgtable
, &repin
);
258 lg
->pgdidx
= newpgdir
;
263 static void release_all_pagetables(struct lguest
*lg
)
267 for (i
= 0; i
< ARRAY_SIZE(lg
->pgdirs
); i
++)
268 if (lg
->pgdirs
[i
].pgdir
)
269 for (j
= 0; j
< SWITCHER_PGD_INDEX
; j
++)
270 release_pgd(lg
, lg
->pgdirs
[i
].pgdir
+ j
);
273 void guest_pagetable_clear_all(struct lguest
*lg
)
275 release_all_pagetables(lg
);
279 static void do_set_pte(struct lguest
*lg
, int idx
,
280 unsigned long vaddr
, gpte_t gpte
)
282 spgd_t
*spgd
= spgd_addr(lg
, idx
, vaddr
);
283 if (spgd
->flags
& _PAGE_PRESENT
) {
284 spte_t
*spte
= spte_addr(lg
, *spgd
, vaddr
);
286 if (gpte
.flags
& (_PAGE_DIRTY
| _PAGE_ACCESSED
)) {
287 check_gpte(lg
, gpte
);
288 *spte
= gpte_to_spte(lg
, gpte
, gpte
.flags
&_PAGE_DIRTY
);
294 void guest_set_pte(struct lguest
*lg
,
295 unsigned long cr3
, unsigned long vaddr
, gpte_t gpte
)
297 /* Kernel mappings must be changed on all top levels. */
298 if (vaddr
>= lg
->page_offset
) {
300 for (i
= 0; i
< ARRAY_SIZE(lg
->pgdirs
); i
++)
301 if (lg
->pgdirs
[i
].pgdir
)
302 do_set_pte(lg
, i
, vaddr
, gpte
);
304 int pgdir
= find_pgdir(lg
, cr3
);
305 if (pgdir
!= ARRAY_SIZE(lg
->pgdirs
))
306 do_set_pte(lg
, pgdir
, vaddr
, gpte
);
310 void guest_set_pmd(struct lguest
*lg
, unsigned long cr3
, u32 idx
)
314 if (idx
>= SWITCHER_PGD_INDEX
)
317 pgdir
= find_pgdir(lg
, cr3
);
318 if (pgdir
< ARRAY_SIZE(lg
->pgdirs
))
319 release_pgd(lg
, lg
->pgdirs
[pgdir
].pgdir
+ idx
);
322 int init_guest_pagetable(struct lguest
*lg
, unsigned long pgtable
)
324 /* We assume this in flush_user_mappings, so check now */
325 if (vaddr_to_pgd_index(lg
->page_offset
) >= SWITCHER_PGD_INDEX
)
328 lg
->pgdirs
[lg
->pgdidx
].cr3
= pgtable
;
329 lg
->pgdirs
[lg
->pgdidx
].pgdir
= (spgd_t
*)get_zeroed_page(GFP_KERNEL
);
330 if (!lg
->pgdirs
[lg
->pgdidx
].pgdir
)
335 void free_guest_pagetable(struct lguest
*lg
)
339 release_all_pagetables(lg
);
340 for (i
= 0; i
< ARRAY_SIZE(lg
->pgdirs
); i
++)
341 free_page((long)lg
->pgdirs
[i
].pgdir
);
344 /* Caller must be preempt-safe */
345 void map_switcher_in_guest(struct lguest
*lg
, struct lguest_pages
*pages
)
347 spte_t
*switcher_pte_page
= __get_cpu_var(switcher_pte_pages
);
351 /* Since switcher less that 4MB, we simply mug top pte page. */
352 switcher_pgd
.pfn
= __pa(switcher_pte_page
) >> PAGE_SHIFT
;
353 switcher_pgd
.flags
= _PAGE_KERNEL
;
354 lg
->pgdirs
[lg
->pgdidx
].pgdir
[SWITCHER_PGD_INDEX
] = switcher_pgd
;
356 /* Map our regs page over stack page. */
357 regs_pte
.pfn
= __pa(lg
->regs_page
) >> PAGE_SHIFT
;
358 regs_pte
.flags
= _PAGE_KERNEL
;
359 switcher_pte_page
[(unsigned long)pages
/PAGE_SIZE
%PTES_PER_PAGE
]
363 static void free_switcher_pte_pages(void)
367 for_each_possible_cpu(i
)
368 free_page((long)switcher_pte_page(i
));
371 static __init
void populate_switcher_pte_page(unsigned int cpu
,
372 struct page
*switcher_page
[],
376 spte_t
*pte
= switcher_pte_page(cpu
);
378 for (i
= 0; i
< pages
; i
++) {
379 pte
[i
].pfn
= page_to_pfn(switcher_page
[i
]);
380 pte
[i
].flags
= _PAGE_PRESENT
|_PAGE_ACCESSED
;
383 /* We only map this CPU's pages, so guest can't see others. */
386 /* First page (regs) is rw, second (state) is ro. */
387 pte
[i
].pfn
= page_to_pfn(switcher_page
[i
]);
388 pte
[i
].flags
= _PAGE_PRESENT
|_PAGE_ACCESSED
|_PAGE_RW
;
389 pte
[i
+1].pfn
= page_to_pfn(switcher_page
[i
+1]);
390 pte
[i
+1].flags
= _PAGE_PRESENT
|_PAGE_ACCESSED
;
393 __init
int init_pagetables(struct page
**switcher_page
, unsigned int pages
)
397 for_each_possible_cpu(i
) {
398 switcher_pte_page(i
) = (spte_t
*)get_zeroed_page(GFP_KERNEL
);
399 if (!switcher_pte_page(i
)) {
400 free_switcher_pte_pages();
403 populate_switcher_pte_page(i
, switcher_page
, pages
);
408 void free_pagetables(void)
410 free_switcher_pte_pages();