]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86/mm/init_64.c
x86: move k8 related declarations
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / mm / init_64.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
1da177e4
LT
9#include <linux/signal.h>
10#include <linux/sched.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/ptrace.h>
16#include <linux/mman.h>
17#include <linux/mm.h>
18#include <linux/swap.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/pagemap.h>
22#include <linux/bootmem.h>
23#include <linux/proc_fs.h>
59170891 24#include <linux/pci.h>
6fb14755 25#include <linux/pfn.h>
c9cf5528 26#include <linux/poison.h>
17a941d8 27#include <linux/dma-mapping.h>
44df75e6
MT
28#include <linux/module.h>
29#include <linux/memory_hotplug.h>
ae32b129 30#include <linux/nmi.h>
1da177e4
LT
31
32#include <asm/processor.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/pgtable.h>
36#include <asm/pgalloc.h>
37#include <asm/dma.h>
38#include <asm/fixmap.h>
39#include <asm/e820.h>
40#include <asm/apic.h>
41#include <asm/tlb.h>
42#include <asm/mmu_context.h>
43#include <asm/proto.h>
44#include <asm/smp.h>
2bc0414e 45#include <asm/sections.h>
1da177e4
LT
46
47#ifndef Dprintk
48#define Dprintk(x...)
49#endif
50
e6584504 51const struct dma_mapping_ops* dma_ops;
17a941d8
MBY
52EXPORT_SYMBOL(dma_ops);
53
e18c6874
AK
54static unsigned long dma_reserve __initdata;
55
1da177e4
LT
56DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57
58/*
59 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60 * physical space so we can cache the place of the first one and move
61 * around without checking the pgd every time.
62 */
63
64void show_mem(void)
65{
e92343cc
AK
66 long i, total = 0, reserved = 0;
67 long shared = 0, cached = 0;
1da177e4
LT
68 pg_data_t *pgdat;
69 struct page *page;
70
e92343cc 71 printk(KERN_INFO "Mem-info:\n");
1da177e4 72 show_free_areas();
e92343cc 73 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
1da177e4 74
ec936fc5 75 for_each_online_pgdat(pgdat) {
1da177e4 76 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
ae32b129
KR
77 /* this loop can take a while with 256 GB and 4k pages
78 so update the NMI watchdog */
79 if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
80 touch_nmi_watchdog();
81 }
12710a56
BP
82 if (!pfn_valid(pgdat->node_start_pfn + i))
83 continue;
1da177e4
LT
84 page = pfn_to_page(pgdat->node_start_pfn + i);
85 total++;
e92343cc
AK
86 if (PageReserved(page))
87 reserved++;
88 else if (PageSwapCache(page))
89 cached++;
90 else if (page_count(page))
91 shared += page_count(page) - 1;
1da177e4
LT
92 }
93 }
e92343cc
AK
94 printk(KERN_INFO "%lu pages of RAM\n", total);
95 printk(KERN_INFO "%lu reserved pages\n",reserved);
96 printk(KERN_INFO "%lu pages shared\n",shared);
97 printk(KERN_INFO "%lu pages swap cached\n",cached);
1da177e4
LT
98}
99
1da177e4
LT
100int after_bootmem;
101
5f44a669 102static __init void *spp_getpage(void)
1da177e4
LT
103{
104 void *ptr;
105 if (after_bootmem)
106 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
107 else
108 ptr = alloc_bootmem_pages(PAGE_SIZE);
109 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
110 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
111
112 Dprintk("spp_getpage %p\n", ptr);
113 return ptr;
114}
115
5f44a669 116static __init void set_pte_phys(unsigned long vaddr,
1da177e4
LT
117 unsigned long phys, pgprot_t prot)
118{
119 pgd_t *pgd;
120 pud_t *pud;
121 pmd_t *pmd;
122 pte_t *pte, new_pte;
123
124 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
125
126 pgd = pgd_offset_k(vaddr);
127 if (pgd_none(*pgd)) {
128 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
129 return;
130 }
131 pud = pud_offset(pgd, vaddr);
132 if (pud_none(*pud)) {
133 pmd = (pmd_t *) spp_getpage();
134 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
135 if (pmd != pmd_offset(pud, 0)) {
136 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
137 return;
138 }
139 }
140 pmd = pmd_offset(pud, vaddr);
141 if (pmd_none(*pmd)) {
142 pte = (pte_t *) spp_getpage();
143 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
144 if (pte != pte_offset_kernel(pmd, 0)) {
145 printk("PAGETABLE BUG #02!\n");
146 return;
147 }
148 }
149 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
150
151 pte = pte_offset_kernel(pmd, vaddr);
152 if (!pte_none(*pte) &&
153 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
154 pte_ERROR(*pte);
155 set_pte(pte, new_pte);
156
157 /*
158 * It's enough to flush this one mapping.
159 * (PGE mappings get flushed as well)
160 */
161 __flush_tlb_one(vaddr);
162}
163
164/* NOTE: this is meant to be run only at boot */
5f44a669
AK
165void __init
166__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
1da177e4
LT
167{
168 unsigned long address = __fix_to_virt(idx);
169
170 if (idx >= __end_of_fixed_addresses) {
171 printk("Invalid __set_fixmap\n");
172 return;
173 }
174 set_pte_phys(address, phys, prot);
175}
176
a3142c8e 177unsigned long __meminitdata table_start, table_end;
1da177e4 178
dafe41ee 179static __meminit void *alloc_low_page(unsigned long *phys)
1da177e4 180{
dafe41ee 181 unsigned long pfn = table_end++;
1da177e4
LT
182 void *adr;
183
44df75e6
MT
184 if (after_bootmem) {
185 adr = (void *)get_zeroed_page(GFP_ATOMIC);
186 *phys = __pa(adr);
187 return adr;
188 }
189
1da177e4
LT
190 if (pfn >= end_pfn)
191 panic("alloc_low_page: ran out of memory");
dafe41ee
VG
192
193 adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE);
44df75e6 194 memset(adr, 0, PAGE_SIZE);
dafe41ee
VG
195 *phys = pfn * PAGE_SIZE;
196 return adr;
197}
1da177e4 198
dafe41ee 199static __meminit void unmap_low_page(void *adr)
1da177e4 200{
44df75e6
MT
201
202 if (after_bootmem)
203 return;
204
dafe41ee 205 early_iounmap(adr, PAGE_SIZE);
1da177e4
LT
206}
207
f2d3efed 208/* Must run before zap_low_mappings */
a3142c8e 209__meminit void *early_ioremap(unsigned long addr, unsigned long size)
f2d3efed 210{
dafe41ee
VG
211 unsigned long vaddr;
212 pmd_t *pmd, *last_pmd;
213 int i, pmds;
214
215 pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
216 vaddr = __START_KERNEL_map;
217 pmd = level2_kernel_pgt;
218 last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
219 for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
220 for (i = 0; i < pmds; i++) {
221 if (pmd_present(pmd[i]))
222 goto next;
223 }
224 vaddr += addr & ~PMD_MASK;
225 addr &= PMD_MASK;
226 for (i = 0; i < pmds; i++, addr += PMD_SIZE)
227 set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
228 __flush_tlb();
229 return (void *)vaddr;
230 next:
231 ;
f2d3efed 232 }
dafe41ee
VG
233 printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
234 return NULL;
f2d3efed
AK
235}
236
237/* To avoid virtual aliases later */
a3142c8e 238__meminit void early_iounmap(void *addr, unsigned long size)
f2d3efed 239{
dafe41ee
VG
240 unsigned long vaddr;
241 pmd_t *pmd;
242 int i, pmds;
243
244 vaddr = (unsigned long)addr;
245 pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
246 pmd = level2_kernel_pgt + pmd_index(vaddr);
247 for (i = 0; i < pmds; i++)
248 pmd_clear(pmd + i);
f2d3efed
AK
249 __flush_tlb();
250}
251
44df75e6 252static void __meminit
6ad91658 253phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
44df75e6 254{
6ad91658 255 int i = pmd_index(address);
44df75e6 256
6ad91658 257 for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
44df75e6 258 unsigned long entry;
6ad91658 259 pmd_t *pmd = pmd_page + pmd_index(address);
44df75e6 260
5f51e139
JB
261 if (address >= end) {
262 if (!after_bootmem)
263 for (; i < PTRS_PER_PMD; i++, pmd++)
264 set_pmd(pmd, __pmd(0));
44df75e6
MT
265 break;
266 }
6ad91658
KM
267
268 if (pmd_val(*pmd))
269 continue;
270
44df75e6
MT
271 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
272 entry &= __supported_pte_mask;
273 set_pmd(pmd, __pmd(entry));
274 }
275}
276
277static void __meminit
278phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
279{
6ad91658
KM
280 pmd_t *pmd = pmd_offset(pud,0);
281 spin_lock(&init_mm.page_table_lock);
282 phys_pmd_init(pmd, address, end);
283 spin_unlock(&init_mm.page_table_lock);
284 __flush_tlb_all();
44df75e6
MT
285}
286
6ad91658 287static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
1da177e4 288{
6ad91658 289 int i = pud_index(addr);
44df75e6 290
44df75e6 291
6ad91658 292 for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
6ad91658
KM
293 unsigned long pmd_phys;
294 pud_t *pud = pud_page + pud_index(addr);
1da177e4
LT
295 pmd_t *pmd;
296
6ad91658 297 if (addr >= end)
1da177e4 298 break;
1da177e4 299
6ad91658 300 if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
1da177e4
LT
301 set_pud(pud, __pud(0));
302 continue;
303 }
304
6ad91658
KM
305 if (pud_val(*pud)) {
306 phys_pmd_update(pud, addr, end);
307 continue;
308 }
309
dafe41ee 310 pmd = alloc_low_page(&pmd_phys);
44df75e6 311 spin_lock(&init_mm.page_table_lock);
1da177e4 312 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
6ad91658 313 phys_pmd_init(pmd, addr, end);
44df75e6 314 spin_unlock(&init_mm.page_table_lock);
dafe41ee 315 unmap_low_page(pmd);
1da177e4
LT
316 }
317 __flush_tlb();
318}
319
320static void __init find_early_table_space(unsigned long end)
321{
6c5acd16 322 unsigned long puds, pmds, tables, start;
1da177e4
LT
323
324 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
325 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
326 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
327 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
328
ee408c79
AK
329 /* RED-PEN putting page tables only on node 0 could
330 cause a hotspot and fill up ZONE_DMA. The page tables
331 need roughly 0.5KB per GB. */
332 start = 0x8000;
333 table_start = find_e820_area(start, end, tables);
1da177e4
LT
334 if (table_start == -1UL)
335 panic("Cannot find space for the kernel page tables");
336
337 table_start >>= PAGE_SHIFT;
338 table_end = table_start;
44df75e6
MT
339
340 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
5f51e139
JB
341 end, table_start << PAGE_SHIFT,
342 (table_start << PAGE_SHIFT) + tables);
1da177e4
LT
343}
344
345/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
346 This runs before bootmem is initialized and gets pages directly from the
347 physical memory. To access them they are temporarily mapped. */
b6fd6ecb 348void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
1da177e4
LT
349{
350 unsigned long next;
351
352 Dprintk("init_memory_mapping\n");
353
354 /*
355 * Find space for the kernel direct mapping tables.
356 * Later we should allocate these tables in the local node of the memory
357 * mapped. Unfortunately this is done currently before the nodes are
358 * discovered.
359 */
44df75e6
MT
360 if (!after_bootmem)
361 find_early_table_space(end);
1da177e4
LT
362
363 start = (unsigned long)__va(start);
364 end = (unsigned long)__va(end);
365
366 for (; start < end; start = next) {
1da177e4 367 unsigned long pud_phys;
44df75e6
MT
368 pgd_t *pgd = pgd_offset_k(start);
369 pud_t *pud;
370
371 if (after_bootmem)
d2ae5b5f 372 pud = pud_offset(pgd, start & PGDIR_MASK);
44df75e6 373 else
dafe41ee 374 pud = alloc_low_page(&pud_phys);
44df75e6 375
1da177e4
LT
376 next = start + PGDIR_SIZE;
377 if (next > end)
378 next = end;
379 phys_pud_init(pud, __pa(start), __pa(next));
44df75e6
MT
380 if (!after_bootmem)
381 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
dafe41ee 382 unmap_low_page(pud);
1da177e4
LT
383 }
384
44df75e6 385 if (!after_bootmem)
f51c9452 386 mmu_cr4_features = read_cr4();
1da177e4 387 __flush_tlb_all();
1da177e4
LT
388}
389
2b97690f 390#ifndef CONFIG_NUMA
1da177e4
LT
391void __init paging_init(void)
392{
6391af17
MG
393 unsigned long max_zone_pfns[MAX_NR_ZONES];
394 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
395 max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
396 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
397 max_zone_pfns[ZONE_NORMAL] = end_pfn;
398
44df75e6
MT
399 memory_present(0, 0, end_pfn);
400 sparse_init();
5cb248ab 401 free_area_init_nodes(max_zone_pfns);
1da177e4
LT
402}
403#endif
404
405/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
406 from the CPU leading to inconsistent cache lines. address and size
407 must be aligned to 2MB boundaries.
408 Does nothing when the mapping doesn't exist. */
409void __init clear_kernel_mapping(unsigned long address, unsigned long size)
410{
411 unsigned long end = address + size;
412
413 BUG_ON(address & ~LARGE_PAGE_MASK);
414 BUG_ON(size & ~LARGE_PAGE_MASK);
415
416 for (; address < end; address += LARGE_PAGE_SIZE) {
417 pgd_t *pgd = pgd_offset_k(address);
418 pud_t *pud;
419 pmd_t *pmd;
420 if (pgd_none(*pgd))
421 continue;
422 pud = pud_offset(pgd, address);
423 if (pud_none(*pud))
424 continue;
425 pmd = pmd_offset(pud, address);
426 if (!pmd || pmd_none(*pmd))
427 continue;
428 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
429 /* Could handle this, but it should not happen currently. */
430 printk(KERN_ERR
431 "clear_kernel_mapping: mapping has been split. will leak memory\n");
432 pmd_ERROR(*pmd);
433 }
434 set_pmd(pmd, __pmd(0));
435 }
436 __flush_tlb_all();
437}
438
44df75e6
MT
439/*
440 * Memory hotplug specific functions
44df75e6 441 */
44df75e6
MT
442void online_page(struct page *page)
443{
444 ClearPageReserved(page);
7835e98b 445 init_page_count(page);
44df75e6
MT
446 __free_page(page);
447 totalram_pages++;
448 num_physpages++;
449}
450
bc02af93 451#ifdef CONFIG_MEMORY_HOTPLUG
9d99aaa3
AK
452/*
453 * Memory is added always to NORMAL zone. This means you will never get
454 * additional DMA/DMA32 memory.
455 */
bc02af93 456int arch_add_memory(int nid, u64 start, u64 size)
44df75e6 457{
bc02af93 458 struct pglist_data *pgdat = NODE_DATA(nid);
776ed98b 459 struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
44df75e6
MT
460 unsigned long start_pfn = start >> PAGE_SHIFT;
461 unsigned long nr_pages = size >> PAGE_SHIFT;
462 int ret;
463
45e0b78b
KM
464 init_memory_mapping(start, (start + size -1));
465
44df75e6
MT
466 ret = __add_pages(zone, start_pfn, nr_pages);
467 if (ret)
468 goto error;
469
44df75e6
MT
470 return ret;
471error:
472 printk("%s: Problem encountered in __add_pages!\n", __func__);
473 return ret;
474}
bc02af93 475EXPORT_SYMBOL_GPL(arch_add_memory);
44df75e6 476
8243229f 477#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
4942e998
KM
478int memory_add_physaddr_to_nid(u64 start)
479{
480 return 0;
481}
8c2676a5 482EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
4942e998
KM
483#endif
484
45e0b78b
KM
485#endif /* CONFIG_MEMORY_HOTPLUG */
486
487#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
bc02af93
YG
488/*
489 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
490 * just online the pages.
491 */
492int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
493{
494 int err = -EIO;
495 unsigned long pfn;
496 unsigned long total = 0, mem = 0;
497 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
498 if (pfn_valid(pfn)) {
499 online_page(pfn_to_page(pfn));
500 err = 0;
501 mem++;
502 }
503 total++;
504 }
505 if (!err) {
506 z->spanned_pages += total;
507 z->present_pages += mem;
508 z->zone_pgdat->node_spanned_pages += total;
509 z->zone_pgdat->node_present_pages += mem;
510 }
511 return err;
512}
45e0b78b 513#endif
44df75e6 514
1da177e4
LT
515static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
516 kcore_vsyscall;
517
518void __init mem_init(void)
519{
0a43e4bf 520 long codesize, reservedpages, datasize, initsize;
1da177e4 521
0dc243ae 522 pci_iommu_alloc();
1da177e4 523
1da177e4
LT
524 /* clear the zero-page */
525 memset(empty_zero_page, 0, PAGE_SIZE);
526
527 reservedpages = 0;
528
529 /* this will put all low memory onto the freelists */
2b97690f 530#ifdef CONFIG_NUMA
0a43e4bf 531 totalram_pages = numa_free_all_bootmem();
1da177e4 532#else
0a43e4bf 533 totalram_pages = free_all_bootmem();
1da177e4 534#endif
5cb248ab
MG
535 reservedpages = end_pfn - totalram_pages -
536 absent_pages_in_range(0, end_pfn);
1da177e4
LT
537
538 after_bootmem = 1;
539
540 codesize = (unsigned long) &_etext - (unsigned long) &_text;
541 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
542 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
543
544 /* Register memory areas for /proc/kcore */
545 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
546 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
547 VMALLOC_END-VMALLOC_START);
548 kclist_add(&kcore_kernel, &_stext, _end - _stext);
549 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
550 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
551 VSYSCALL_END - VSYSCALL_START);
552
0a43e4bf 553 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
1da177e4
LT
554 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
555 end_pfn << (PAGE_SHIFT-10),
556 codesize >> 10,
557 reservedpages << (PAGE_SHIFT-10),
558 datasize >> 10,
559 initsize >> 10);
1da177e4
LT
560}
561
d167a518 562void free_init_pages(char *what, unsigned long begin, unsigned long end)
1da177e4
LT
563{
564 unsigned long addr;
565
d167a518
GH
566 if (begin >= end)
567 return;
568
6fb14755 569 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
d167a518 570 for (addr = begin; addr < end; addr += PAGE_SIZE) {
e3ebadd9
LT
571 ClearPageReserved(virt_to_page(addr));
572 init_page_count(virt_to_page(addr));
573 memset((void *)(addr & ~(PAGE_SIZE-1)),
574 POISON_FREE_INITMEM, PAGE_SIZE);
6fb14755
JB
575 if (addr >= __START_KERNEL_map)
576 change_page_attr_addr(addr, 1, __pgprot(0));
e3ebadd9 577 free_page(addr);
1da177e4
LT
578 totalram_pages++;
579 }
6fb14755
JB
580 if (addr > __START_KERNEL_map)
581 global_flush_tlb();
d167a518
GH
582}
583
584void free_initmem(void)
585{
d167a518 586 free_init_pages("unused kernel memory",
e3ebadd9
LT
587 (unsigned long)(&__init_begin),
588 (unsigned long)(&__init_end));
1da177e4
LT
589}
590
67df197b
AV
591#ifdef CONFIG_DEBUG_RODATA
592
67df197b
AV
593void mark_rodata_ro(void)
594{
e3ebadd9 595 unsigned long start = (unsigned long)_stext, end;
67df197b 596
602033ed
LT
597#ifdef CONFIG_HOTPLUG_CPU
598 /* It must still be possible to apply SMP alternatives. */
599 if (num_possible_cpus() > 1)
600 start = (unsigned long)_etext;
601#endif
602
603#ifdef CONFIG_KPROBES
604 start = (unsigned long)__start_rodata;
605#endif
606
e3ebadd9
LT
607 end = (unsigned long)__end_rodata;
608 start = (start + PAGE_SIZE - 1) & PAGE_MASK;
609 end &= PAGE_MASK;
610 if (end <= start)
611 return;
612
613 change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
67df197b 614
6fb14755 615 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
e3ebadd9 616 (end - start) >> 10);
67df197b
AV
617
618 /*
619 * change_page_attr_addr() requires a global_flush_tlb() call after it.
620 * We do this after the printk so that if something went wrong in the
621 * change, the printk gets out at least to give a better debug hint
622 * of who is the culprit.
623 */
624 global_flush_tlb();
625}
626#endif
627
1da177e4
LT
628#ifdef CONFIG_BLK_DEV_INITRD
629void free_initrd_mem(unsigned long start, unsigned long end)
630{
e3ebadd9 631 free_init_pages("initrd memory", start, end);
1da177e4
LT
632}
633#endif
634
635void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
636{
2b97690f 637#ifdef CONFIG_NUMA
1da177e4 638 int nid = phys_to_nid(phys);
5e58a02a
AK
639#endif
640 unsigned long pfn = phys >> PAGE_SHIFT;
641 if (pfn >= end_pfn) {
642 /* This can happen with kdump kernels when accessing firmware
643 tables. */
644 if (pfn < end_pfn_map)
645 return;
646 printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
647 phys, len);
648 return;
649 }
650
651 /* Should check here against the e820 map to avoid double free */
652#ifdef CONFIG_NUMA
1da177e4
LT
653 reserve_bootmem_node(NODE_DATA(nid), phys, len);
654#else
655 reserve_bootmem(phys, len);
656#endif
0e0b864e 657 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
e18c6874 658 dma_reserve += len / PAGE_SIZE;
0e0b864e
MG
659 set_dma_reserve(dma_reserve);
660 }
1da177e4
LT
661}
662
663int kern_addr_valid(unsigned long addr)
664{
665 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
666 pgd_t *pgd;
667 pud_t *pud;
668 pmd_t *pmd;
669 pte_t *pte;
670
671 if (above != 0 && above != -1UL)
672 return 0;
673
674 pgd = pgd_offset_k(addr);
675 if (pgd_none(*pgd))
676 return 0;
677
678 pud = pud_offset(pgd, addr);
679 if (pud_none(*pud))
680 return 0;
681
682 pmd = pmd_offset(pud, addr);
683 if (pmd_none(*pmd))
684 return 0;
685 if (pmd_large(*pmd))
686 return pfn_valid(pmd_pfn(*pmd));
687
688 pte = pte_offset_kernel(pmd, addr);
689 if (pte_none(*pte))
690 return 0;
691 return pfn_valid(pte_pfn(*pte));
692}
693
103efcd9 694/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
1e014410
AK
695 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
696 not need special handling anymore. */
1da177e4
LT
697
698static struct vm_area_struct gate_vma = {
699 .vm_start = VSYSCALL_START,
103efcd9
EP
700 .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
701 .vm_page_prot = PAGE_READONLY_EXEC,
702 .vm_flags = VM_READ | VM_EXEC
1da177e4
LT
703};
704
1da177e4
LT
705struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
706{
707#ifdef CONFIG_IA32_EMULATION
1e014410
AK
708 if (test_tsk_thread_flag(tsk, TIF_IA32))
709 return NULL;
1da177e4
LT
710#endif
711 return &gate_vma;
712}
713
714int in_gate_area(struct task_struct *task, unsigned long addr)
715{
716 struct vm_area_struct *vma = get_gate_vma(task);
1e014410
AK
717 if (!vma)
718 return 0;
1da177e4
LT
719 return (addr >= vma->vm_start) && (addr < vma->vm_end);
720}
721
722/* Use this when you have no reliable task/vma, typically from interrupt
723 * context. It is less reliable than using the task's vma and may give
724 * false positives.
725 */
726int in_gate_area_no_task(unsigned long addr)
727{
1e014410 728 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
1da177e4 729}
2e1c49db 730
2aae950b
AK
731const char *arch_vma_name(struct vm_area_struct *vma)
732{
733 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
734 return "[vdso]";
735 if (vma == &gate_vma)
736 return "[vsyscall]";
737 return NULL;
738}
0889eba5
CL
739
740#ifdef CONFIG_SPARSEMEM_VMEMMAP
741/*
742 * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
743 */
744int __meminit vmemmap_populate(struct page *start_page,
745 unsigned long size, int node)
746{
747 unsigned long addr = (unsigned long)start_page;
748 unsigned long end = (unsigned long)(start_page + size);
749 unsigned long next;
750 pgd_t *pgd;
751 pud_t *pud;
752 pmd_t *pmd;
753
754 for (; addr < end; addr = next) {
755 next = pmd_addr_end(addr, end);
756
757 pgd = vmemmap_pgd_populate(addr, node);
758 if (!pgd)
759 return -ENOMEM;
760 pud = vmemmap_pud_populate(pgd, addr, node);
761 if (!pud)
762 return -ENOMEM;
763
764 pmd = pmd_offset(pud, addr);
765 if (pmd_none(*pmd)) {
766 pte_t entry;
767 void *p = vmemmap_alloc_block(PMD_SIZE, node);
768 if (!p)
769 return -ENOMEM;
770
771 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
772 mk_pte_huge(entry);
773 set_pmd(pmd, __pmd(pte_val(entry)));
774
775 printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
776 addr, addr + PMD_SIZE - 1, p, node);
777 } else
778 vmemmap_verify((pte_t *)pmd, node, addr, next);
779 }
780
781 return 0;
782}
783#endif