]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - arch/x86/mm/pgtable_32.c
x86: uninline __pte_free_tlb() and __pmd_free_tlb()
[mirror_ubuntu-eoan-kernel.git] / arch / x86 / mm / pgtable_32.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/i386/mm/pgtable.c
3 */
4
1da177e4
LT
5#include <linux/sched.h>
6#include <linux/kernel.h>
7#include <linux/errno.h>
8#include <linux/mm.h>
27eb0b28 9#include <linux/nmi.h>
1da177e4
LT
10#include <linux/swap.h>
11#include <linux/smp.h>
12#include <linux/highmem.h>
13#include <linux/slab.h>
14#include <linux/pagemap.h>
15#include <linux/spinlock.h>
052e7994 16#include <linux/module.h>
f1d1a842 17#include <linux/quicklist.h>
1da177e4
LT
18
19#include <asm/system.h>
20#include <asm/pgtable.h>
21#include <asm/pgalloc.h>
22#include <asm/fixmap.h>
23#include <asm/e820.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
26
27void show_mem(void)
28{
29 int total = 0, reserved = 0;
30 int shared = 0, cached = 0;
31 int highmem = 0;
32 struct page *page;
33 pg_data_t *pgdat;
34 unsigned long i;
208d54e5 35 unsigned long flags;
1da177e4 36
f90e7185 37 printk(KERN_INFO "Mem-info:\n");
1da177e4 38 show_free_areas();
f90e7185 39 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
ec936fc5 40 for_each_online_pgdat(pgdat) {
208d54e5 41 pgdat_resize_lock(pgdat, &flags);
1da177e4 42 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
27eb0b28
PB
43 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
44 touch_nmi_watchdog();
408fde81 45 page = pgdat_page_nr(pgdat, i);
1da177e4
LT
46 total++;
47 if (PageHighMem(page))
48 highmem++;
49 if (PageReserved(page))
50 reserved++;
51 else if (PageSwapCache(page))
52 cached++;
53 else if (page_count(page))
54 shared += page_count(page) - 1;
55 }
208d54e5 56 pgdat_resize_unlock(pgdat, &flags);
1da177e4 57 }
f90e7185
CL
58 printk(KERN_INFO "%d pages of RAM\n", total);
59 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
60 printk(KERN_INFO "%d reserved pages\n", reserved);
61 printk(KERN_INFO "%d pages shared\n", shared);
62 printk(KERN_INFO "%d pages swap cached\n", cached);
6f4e1e50 63
b1e7a8fd 64 printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
ce866b34
CL
65 printk(KERN_INFO "%lu pages writeback\n",
66 global_page_state(NR_WRITEBACK));
65ba55f5 67 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
972d1a7b
CL
68 printk(KERN_INFO "%lu pages slab\n",
69 global_page_state(NR_SLAB_RECLAIMABLE) +
70 global_page_state(NR_SLAB_UNRECLAIMABLE));
df849a15
CL
71 printk(KERN_INFO "%lu pages pagetables\n",
72 global_page_state(NR_PAGETABLE));
1da177e4
LT
73}
74
75/*
76 * Associate a virtual page frame with a given physical page frame
77 * and protection flags for that frame.
78 */
79static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
80{
81 pgd_t *pgd;
82 pud_t *pud;
83 pmd_t *pmd;
84 pte_t *pte;
85
86 pgd = swapper_pg_dir + pgd_index(vaddr);
87 if (pgd_none(*pgd)) {
88 BUG();
89 return;
90 }
91 pud = pud_offset(pgd, vaddr);
92 if (pud_none(*pud)) {
93 BUG();
94 return;
95 }
96 pmd = pmd_offset(pud, vaddr);
97 if (pmd_none(*pmd)) {
98 BUG();
99 return;
100 }
101 pte = pte_offset_kernel(pmd, vaddr);
b0bfece4 102 if (pgprot_val(flags))
aa506dc7 103 set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags));
b0bfece4
JB
104 else
105 pte_clear(&init_mm, vaddr, pte);
1da177e4
LT
106
107 /*
108 * It's enough to flush this one mapping.
109 * (PGE mappings get flushed as well)
110 */
111 __flush_tlb_one(vaddr);
112}
113
114/*
115 * Associate a large virtual page frame with a given physical page frame
116 * and protection flags for that frame. pfn is for the base of the page,
117 * vaddr is what the page gets mapped to - both must be properly aligned.
118 * The pmd must already be instantiated. Assumes PAE mode.
119 */
120void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
121{
122 pgd_t *pgd;
123 pud_t *pud;
124 pmd_t *pmd;
125
126 if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */
f90e7185 127 printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n");
1da177e4
LT
128 return; /* BUG(); */
129 }
130 if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */
f90e7185 131 printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n");
1da177e4
LT
132 return; /* BUG(); */
133 }
134 pgd = swapper_pg_dir + pgd_index(vaddr);
135 if (pgd_none(*pgd)) {
f90e7185 136 printk(KERN_WARNING "set_pmd_pfn: pgd_none\n");
1da177e4
LT
137 return; /* BUG(); */
138 }
139 pud = pud_offset(pgd, vaddr);
140 pmd = pmd_offset(pud, vaddr);
141 set_pmd(pmd, pfn_pmd(pfn, flags));
142 /*
143 * It's enough to flush this one mapping.
144 * (PGE mappings get flushed as well)
145 */
146 __flush_tlb_one(vaddr);
147}
148
052e7994 149static int fixmaps;
052e7994
JF
150unsigned long __FIXADDR_TOP = 0xfffff000;
151EXPORT_SYMBOL(__FIXADDR_TOP);
052e7994 152
1da177e4
LT
153void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
154{
155 unsigned long address = __fix_to_virt(idx);
156
157 if (idx >= __end_of_fixed_addresses) {
158 BUG();
159 return;
160 }
161 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
052e7994
JF
162 fixmaps++;
163}
164
165/**
166 * reserve_top_address - reserves a hole in the top of kernel address space
167 * @reserve - size of hole to reserve
168 *
169 * Can be used to relocate the fixmap area and poke a hole in the top
170 * of kernel address space to make room for a hypervisor.
171 */
172void reserve_top_address(unsigned long reserve)
173{
174 BUG_ON(fixmaps > 0);
7ce0bcfd
ZA
175 printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
176 (int)-reserve);
052e7994
JF
177 __FIXADDR_TOP = -reserve - PAGE_SIZE;
178 __VMALLOC_RESERVE += reserve;
1da177e4
LT
179}
180
181pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
182{
183 return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
184}
185
186struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
187{
188 struct page *pte;
189
190#ifdef CONFIG_HIGHPTE
191 pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
192#else
193 pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
194#endif
195 return pte;
196}
197
1da177e4
LT
198/*
199 * List of all pgd's needed for non-PAE so it can invalidate entries
200 * in both cached and uncached pgd's; not needed for PAE since the
201 * kernel pmd is shared. If PAE were not to share the pmd a similar
202 * tactic would be needed. This is essentially codepath-based locking
203 * against pageattr.c; it is the unique case in which a valid change
204 * of kernel pagetables can't be lazily synchronized by vmalloc faults.
205 * vmalloc faults work because attached pagetables are never freed.
1da177e4
LT
206 * -- wli
207 */
1da177e4
LT
208static inline void pgd_list_add(pgd_t *pgd)
209{
210 struct page *page = virt_to_page(pgd);
e3ed910d
JF
211
212 list_add(&page->lru, &pgd_list);
1da177e4
LT
213}
214
215static inline void pgd_list_del(pgd_t *pgd)
216{
e3ed910d
JF
217 struct page *page = virt_to_page(pgd);
218
219 list_del(&page->lru);
1da177e4
LT
220}
221
f1d1a842
CL
222
223
5311ab62
JF
224#if (PTRS_PER_PMD == 1)
225/* Non-PAE pgd constructor */
2378569d 226static void pgd_ctor(void *pgd)
1da177e4
LT
227{
228 unsigned long flags;
229
5311ab62
JF
230 /* !PAE, no pagetable sharing */
231 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
232
233 spin_lock_irqsave(&pgd_lock, flags);
1da177e4 234
5311ab62 235 /* must happen under lock */
d7271b14 236 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
1da177e4 237 swapper_pg_dir + USER_PTRS_PER_PGD,
d7271b14 238 KERNEL_PGD_PTRS);
c119ecce 239 paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
5311ab62
JF
240 __pa(swapper_pg_dir) >> PAGE_SHIFT,
241 USER_PTRS_PER_PGD,
242 KERNEL_PGD_PTRS);
1da177e4
LT
243 pgd_list_add(pgd);
244 spin_unlock_irqrestore(&pgd_lock, flags);
1da177e4 245}
5311ab62
JF
246#else /* PTRS_PER_PMD > 1 */
247/* PAE pgd constructor */
2378569d 248static void pgd_ctor(void *pgd)
5311ab62
JF
249{
250 /* PAE, kernel PMD may be shared */
251
252 if (SHARED_KERNEL_PMD) {
253 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
254 swapper_pg_dir + USER_PTRS_PER_PGD,
255 KERNEL_PGD_PTRS);
256 } else {
257 unsigned long flags;
258
259 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
260 spin_lock_irqsave(&pgd_lock, flags);
261 pgd_list_add(pgd);
262 spin_unlock_irqrestore(&pgd_lock, flags);
263 }
264}
265#endif /* PTRS_PER_PMD */
1da177e4 266
2378569d 267static void pgd_dtor(void *pgd)
1da177e4
LT
268{
269 unsigned long flags; /* can be called from interrupt context */
270
f1d1a842
CL
271 if (SHARED_KERNEL_PMD)
272 return;
5311ab62 273
1da177e4
LT
274 spin_lock_irqsave(&pgd_lock, flags);
275 pgd_list_del(pgd);
276 spin_unlock_irqrestore(&pgd_lock, flags);
277}
278
5311ab62
JF
279#define UNSHARED_PTRS_PER_PGD \
280 (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
281
8fe3deef
JF
282#ifdef CONFIG_X86_PAE
283/*
284 * Mop up any pmd pages which may still be attached to the pgd.
285 * Normally they will be freed by munmap/exit_mmap, but any pmd we
286 * preallocate which never got a corresponding vma will need to be
287 * freed manually.
288 */
289static void pgd_mop_up_pmds(pgd_t *pgdp)
290{
291 int i;
292
508bebbb 293 for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
8fe3deef
JF
294 pgd_t pgd = pgdp[i];
295
296 if (pgd_val(pgd) != 0) {
297 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
298
299 pgdp[i] = native_make_pgd(0);
300
301 paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
302 pmd_free(pmd);
303 }
304 }
305}
306
307/*
308 * In PAE mode, we need to do a cr3 reload (=tlb flush) when
309 * updating the top-level pagetable entries to guarantee the
310 * processor notices the update. Since this is expensive, and
311 * all 4 top-level entries are used almost immediately in a
312 * new process's life, we just pre-populate them here.
508bebbb
JF
313 *
314 * Also, if we're in a paravirt environment where the kernel pmd is
315 * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
316 * and initialize the kernel pmds here.
8fe3deef
JF
317 */
318static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
319{
320 pud_t *pud;
321 unsigned long addr;
322 int i;
323
324 pud = pud_offset(pgd, 0);
508bebbb
JF
325 for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
326 i++, pud++, addr += PUD_SIZE) {
8fe3deef
JF
327 pmd_t *pmd = pmd_alloc_one(mm, addr);
328
329 if (!pmd) {
330 pgd_mop_up_pmds(pgd);
331 return 0;
332 }
333
508bebbb
JF
334 if (i >= USER_PTRS_PER_PGD)
335 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
336 sizeof(pmd_t) * PTRS_PER_PMD);
337
8fe3deef
JF
338 pud_populate(mm, pud, pmd);
339 }
340
341 return 1;
342}
343#else /* !CONFIG_X86_PAE */
344/* No need to prepopulate any pagetable entries in non-PAE modes. */
345static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
346{
347 return 1;
348}
349
350static void pgd_mop_up_pmds(pgd_t *pgd)
351{
352}
353#endif /* CONFIG_X86_PAE */
354
1da177e4
LT
355pgd_t *pgd_alloc(struct mm_struct *mm)
356{
f1d1a842 357 pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
1da177e4 358
6c435456
JF
359 mm->pgd = pgd; /* so that alloc_pd can use it */
360
8fe3deef
JF
361 if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
362 quicklist_free(0, pgd_dtor, pgd);
363 pgd = NULL;
364 }
365
1da177e4 366 return pgd;
1da177e4
LT
367}
368
369void pgd_free(pgd_t *pgd)
370{
8fe3deef 371 pgd_mop_up_pmds(pgd);
f1d1a842 372 quicklist_free(0, pgd_dtor, pgd);
1da177e4 373}
f1d1a842
CL
374
375void check_pgt_cache(void)
376{
377 quicklist_trim(0, pgd_dtor, 25, 16);
378}
5aa05085
IM
379
380void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
381{
382 paravirt_release_pt(page_to_pfn(pte));
383 tlb_remove_page(tlb, pte);
384}
385
386#ifdef CONFIG_X86_PAE
387
388void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
389{
390 /* This is called just after the pmd has been detached from
391 the pgd, which requires a full tlb flush to be recognized
392 by the CPU. Rather than incurring multiple tlb flushes
393 while the address space is being pulled down, make the tlb
394 gathering machinery do a full flush when we're done. */
395 tlb->fullmm = 1;
396
397 paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
398 tlb_remove_page(tlb, virt_to_page(pmd));
399}
400
401#endif