]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * PowerPC version | |
3 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | |
4 | * | |
5 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | |
6 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | |
7 | * Copyright (C) 1996 Paul Mackerras | |
8 | * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). | |
9 | * | |
10 | * Derived from "arch/i386/mm/init.c" | |
11 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
12 | * | |
13 | * Dave Engebretsen <engebret@us.ibm.com> | |
14 | * Rework for PPC64 port. | |
15 | * | |
16 | * This program is free software; you can redistribute it and/or | |
17 | * modify it under the terms of the GNU General Public License | |
18 | * as published by the Free Software Foundation; either version | |
19 | * 2 of the License, or (at your option) any later version. | |
20 | * | |
21 | */ | |
22 | ||
23 | #include <linux/config.h> | |
24 | #include <linux/signal.h> | |
25 | #include <linux/sched.h> | |
26 | #include <linux/kernel.h> | |
27 | #include <linux/errno.h> | |
28 | #include <linux/string.h> | |
29 | #include <linux/types.h> | |
30 | #include <linux/mman.h> | |
31 | #include <linux/mm.h> | |
32 | #include <linux/swap.h> | |
33 | #include <linux/stddef.h> | |
34 | #include <linux/vmalloc.h> | |
35 | #include <linux/init.h> | |
36 | #include <linux/delay.h> | |
37 | #include <linux/bootmem.h> | |
38 | #include <linux/highmem.h> | |
39 | #include <linux/idr.h> | |
40 | #include <linux/nodemask.h> | |
41 | #include <linux/module.h> | |
42 | ||
43 | #include <asm/pgalloc.h> | |
44 | #include <asm/page.h> | |
45 | #include <asm/abs_addr.h> | |
46 | #include <asm/prom.h> | |
47 | #include <asm/lmb.h> | |
48 | #include <asm/rtas.h> | |
49 | #include <asm/io.h> | |
50 | #include <asm/mmu_context.h> | |
51 | #include <asm/pgtable.h> | |
52 | #include <asm/mmu.h> | |
53 | #include <asm/uaccess.h> | |
54 | #include <asm/smp.h> | |
55 | #include <asm/machdep.h> | |
56 | #include <asm/tlb.h> | |
57 | #include <asm/eeh.h> | |
58 | #include <asm/processor.h> | |
59 | #include <asm/mmzone.h> | |
60 | #include <asm/cputable.h> | |
61 | #include <asm/ppcdebug.h> | |
62 | #include <asm/sections.h> | |
63 | #include <asm/system.h> | |
64 | #include <asm/iommu.h> | |
65 | #include <asm/abs_addr.h> | |
66 | #include <asm/vdso.h> | |
1f8d419e | 67 | #include <asm/imalloc.h> |
1da177e4 LT |
68 | |
69 | int mem_init_done; | |
70 | unsigned long ioremap_bot = IMALLOC_BASE; | |
71 | static unsigned long phbs_io_bot = PHBS_IO_BASE; | |
72 | ||
73 | extern pgd_t swapper_pg_dir[]; | |
74 | extern struct task_struct *current_set[NR_CPUS]; | |
75 | ||
1da177e4 LT |
76 | unsigned long klimit = (unsigned long)_end; |
77 | ||
78 | unsigned long _SDR1=0; | |
79 | unsigned long _ASR=0; | |
80 | ||
81 | /* max amount of RAM to use */ | |
82 | unsigned long __max_memory; | |
83 | ||
84 | /* info on what we think the IO hole is */ | |
85 | unsigned long io_hole_start; | |
86 | unsigned long io_hole_size; | |
87 | ||
88 | void show_mem(void) | |
89 | { | |
90 | unsigned long total = 0, reserved = 0; | |
91 | unsigned long shared = 0, cached = 0; | |
92 | struct page *page; | |
93 | pg_data_t *pgdat; | |
94 | unsigned long i; | |
95 | ||
96 | printk("Mem-info:\n"); | |
97 | show_free_areas(); | |
98 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | |
99 | for_each_pgdat(pgdat) { | |
100 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | |
408fde81 | 101 | page = pgdat_page_nr(pgdat, i); |
1da177e4 LT |
102 | total++; |
103 | if (PageReserved(page)) | |
104 | reserved++; | |
105 | else if (PageSwapCache(page)) | |
106 | cached++; | |
107 | else if (page_count(page)) | |
108 | shared += page_count(page) - 1; | |
109 | } | |
110 | } | |
111 | printk("%ld pages of RAM\n", total); | |
112 | printk("%ld reserved pages\n", reserved); | |
113 | printk("%ld pages shared\n", shared); | |
114 | printk("%ld pages swap cached\n", cached); | |
115 | } | |
116 | ||
117 | #ifdef CONFIG_PPC_ISERIES | |
118 | ||
119 | void __iomem *ioremap(unsigned long addr, unsigned long size) | |
120 | { | |
121 | return (void __iomem *)addr; | |
122 | } | |
123 | ||
124 | extern void __iomem *__ioremap(unsigned long addr, unsigned long size, | |
125 | unsigned long flags) | |
126 | { | |
127 | return (void __iomem *)addr; | |
128 | } | |
129 | ||
130 | void iounmap(volatile void __iomem *addr) | |
131 | { | |
132 | return; | |
133 | } | |
134 | ||
135 | #else | |
136 | ||
137 | /* | |
138 | * map_io_page currently only called by __ioremap | |
139 | * map_io_page adds an entry to the ioremap page table | |
140 | * and adds an entry to the HPT, possibly bolting it | |
141 | */ | |
58366af5 | 142 | static int map_io_page(unsigned long ea, unsigned long pa, int flags) |
1da177e4 LT |
143 | { |
144 | pgd_t *pgdp; | |
58366af5 | 145 | pud_t *pudp; |
1da177e4 LT |
146 | pmd_t *pmdp; |
147 | pte_t *ptep; | |
148 | unsigned long vsid; | |
149 | ||
150 | if (mem_init_done) { | |
20cee16c DG |
151 | spin_lock(&init_mm.page_table_lock); |
152 | pgdp = pgd_offset_k(ea); | |
153 | pudp = pud_alloc(&init_mm, pgdp, ea); | |
58366af5 BH |
154 | if (!pudp) |
155 | return -ENOMEM; | |
20cee16c | 156 | pmdp = pmd_alloc(&init_mm, pudp, ea); |
58366af5 BH |
157 | if (!pmdp) |
158 | return -ENOMEM; | |
20cee16c | 159 | ptep = pte_alloc_kernel(&init_mm, pmdp, ea); |
58366af5 BH |
160 | if (!ptep) |
161 | return -ENOMEM; | |
1da177e4 | 162 | pa = abs_to_phys(pa); |
20cee16c | 163 | set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, |
dfbacdc1 | 164 | __pgprot(flags))); |
20cee16c | 165 | spin_unlock(&init_mm.page_table_lock); |
1da177e4 LT |
166 | } else { |
167 | unsigned long va, vpn, hash, hpteg; | |
168 | ||
169 | /* | |
170 | * If the mm subsystem is not fully up, we cannot create a | |
171 | * linux page table entry for this mapping. Simply bolt an | |
172 | * entry in the hardware page table. | |
173 | */ | |
174 | vsid = get_kernel_vsid(ea); | |
175 | va = (vsid << 28) | (ea & 0xFFFFFFF); | |
176 | vpn = va >> PAGE_SHIFT; | |
177 | ||
178 | hash = hpt_hash(vpn, 0); | |
179 | ||
180 | hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); | |
181 | ||
182 | /* Panic if a pte grpup is full */ | |
183 | if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0, | |
184 | _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX, | |
185 | 1, 0) == -1) { | |
186 | panic("map_io_page: could not insert mapping"); | |
187 | } | |
188 | } | |
58366af5 | 189 | return 0; |
1da177e4 LT |
190 | } |
191 | ||
192 | ||
193 | static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, | |
194 | unsigned long ea, unsigned long size, | |
195 | unsigned long flags) | |
196 | { | |
197 | unsigned long i; | |
198 | ||
199 | if ((flags & _PAGE_PRESENT) == 0) | |
200 | flags |= pgprot_val(PAGE_KERNEL); | |
1da177e4 | 201 | |
dfbacdc1 | 202 | for (i = 0; i < size; i += PAGE_SIZE) |
58366af5 | 203 | if (map_io_page(ea+i, pa+i, flags)) |
20cee16c | 204 | return NULL; |
1da177e4 LT |
205 | |
206 | return (void __iomem *) (ea + (addr & ~PAGE_MASK)); | |
207 | } | |
208 | ||
209 | ||
210 | void __iomem * | |
211 | ioremap(unsigned long addr, unsigned long size) | |
212 | { | |
dfbacdc1 | 213 | return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); |
1da177e4 LT |
214 | } |
215 | ||
58366af5 BH |
216 | void __iomem * __ioremap(unsigned long addr, unsigned long size, |
217 | unsigned long flags) | |
1da177e4 LT |
218 | { |
219 | unsigned long pa, ea; | |
58366af5 | 220 | void __iomem *ret; |
1da177e4 LT |
221 | |
222 | /* | |
223 | * Choose an address to map it to. | |
224 | * Once the imalloc system is running, we use it. | |
225 | * Before that, we map using addresses going | |
226 | * up from ioremap_bot. imalloc will use | |
227 | * the addresses from ioremap_bot through | |
228 | * IMALLOC_END (0xE000001fffffffff) | |
229 | * | |
230 | */ | |
231 | pa = addr & PAGE_MASK; | |
232 | size = PAGE_ALIGN(addr + size) - pa; | |
233 | ||
234 | if (size == 0) | |
235 | return NULL; | |
236 | ||
237 | if (mem_init_done) { | |
238 | struct vm_struct *area; | |
239 | area = im_get_free_area(size); | |
240 | if (area == NULL) | |
241 | return NULL; | |
242 | ea = (unsigned long)(area->addr); | |
58366af5 BH |
243 | ret = __ioremap_com(addr, pa, ea, size, flags); |
244 | if (!ret) | |
245 | im_free(area->addr); | |
1da177e4 LT |
246 | } else { |
247 | ea = ioremap_bot; | |
58366af5 BH |
248 | ret = __ioremap_com(addr, pa, ea, size, flags); |
249 | if (ret) | |
250 | ioremap_bot += size; | |
1da177e4 | 251 | } |
58366af5 | 252 | return ret; |
1da177e4 LT |
253 | } |
254 | ||
255 | #define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) | |
256 | ||
257 | int __ioremap_explicit(unsigned long pa, unsigned long ea, | |
258 | unsigned long size, unsigned long flags) | |
259 | { | |
260 | struct vm_struct *area; | |
58366af5 | 261 | void __iomem *ret; |
1da177e4 LT |
262 | |
263 | /* For now, require page-aligned values for pa, ea, and size */ | |
264 | if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || | |
265 | !IS_PAGE_ALIGNED(size)) { | |
266 | printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); | |
267 | return 1; | |
268 | } | |
269 | ||
270 | if (!mem_init_done) { | |
271 | /* Two things to consider in this case: | |
272 | * 1) No records will be kept (imalloc, etc) that the region | |
273 | * has been remapped | |
274 | * 2) It won't be easy to iounmap() the region later (because | |
275 | * of 1) | |
276 | */ | |
277 | ; | |
278 | } else { | |
279 | area = im_get_area(ea, size, | |
280 | IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); | |
281 | if (area == NULL) { | |
282 | /* Expected when PHB-dlpar is in play */ | |
283 | return 1; | |
284 | } | |
285 | if (ea != (unsigned long) area->addr) { | |
dfbacdc1 BH |
286 | printk(KERN_ERR "unexpected addr return from " |
287 | "im_get_area\n"); | |
1da177e4 LT |
288 | return 1; |
289 | } | |
290 | } | |
291 | ||
58366af5 BH |
292 | ret = __ioremap_com(pa, pa, ea, size, flags); |
293 | if (ret == NULL) { | |
294 | printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); | |
295 | return 1; | |
296 | } | |
297 | if (ret != (void *) ea) { | |
1da177e4 LT |
298 | printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); |
299 | return 1; | |
300 | } | |
301 | ||
302 | return 0; | |
303 | } | |
304 | ||
1da177e4 LT |
305 | /* |
306 | * Unmap an IO region and remove it from imalloc'd list. | |
307 | * Access to IO memory should be serialized by driver. | |
308 | * This code is modeled after vmalloc code - unmap_vm_area() | |
309 | * | |
dfbacdc1 | 310 | * XXX what about calls before mem_init_done (ie python_countermeasures()) |
1da177e4 LT |
311 | */ |
312 | void iounmap(volatile void __iomem *token) | |
313 | { | |
1da177e4 LT |
314 | void *addr; |
315 | ||
58366af5 | 316 | if (!mem_init_done) |
1da177e4 | 317 | return; |
1da177e4 LT |
318 | |
319 | addr = (void *) ((unsigned long __force) token & PAGE_MASK); | |
1da177e4 | 320 | |
20cee16c | 321 | im_free(addr); |
1da177e4 LT |
322 | } |
323 | ||
324 | static int iounmap_subset_regions(unsigned long addr, unsigned long size) | |
325 | { | |
326 | struct vm_struct *area; | |
327 | ||
328 | /* Check whether subsets of this region exist */ | |
329 | area = im_get_area(addr, size, IM_REGION_SUPERSET); | |
330 | if (area == NULL) | |
331 | return 1; | |
332 | ||
333 | while (area) { | |
334 | iounmap((void __iomem *) area->addr); | |
335 | area = im_get_area(addr, size, | |
336 | IM_REGION_SUPERSET); | |
337 | } | |
338 | ||
339 | return 0; | |
340 | } | |
341 | ||
342 | int iounmap_explicit(volatile void __iomem *start, unsigned long size) | |
343 | { | |
344 | struct vm_struct *area; | |
345 | unsigned long addr; | |
346 | int rc; | |
347 | ||
348 | addr = (unsigned long __force) start & PAGE_MASK; | |
349 | ||
350 | /* Verify that the region either exists or is a subset of an existing | |
351 | * region. In the latter case, split the parent region to create | |
352 | * the exact region | |
353 | */ | |
354 | area = im_get_area(addr, size, | |
355 | IM_REGION_EXISTS | IM_REGION_SUBSET); | |
356 | if (area == NULL) { | |
357 | /* Determine whether subset regions exist. If so, unmap */ | |
358 | rc = iounmap_subset_regions(addr, size); | |
359 | if (rc) { | |
360 | printk(KERN_ERR | |
361 | "%s() cannot unmap nonexistent range 0x%lx\n", | |
362 | __FUNCTION__, addr); | |
363 | return 1; | |
364 | } | |
365 | } else { | |
366 | iounmap((void __iomem *) area->addr); | |
367 | } | |
368 | /* | |
369 | * FIXME! This can't be right: | |
370 | iounmap(area->addr); | |
371 | * Maybe it should be "iounmap(area);" | |
372 | */ | |
373 | return 0; | |
374 | } | |
375 | ||
376 | #endif | |
377 | ||
378 | EXPORT_SYMBOL(ioremap); | |
379 | EXPORT_SYMBOL(__ioremap); | |
380 | EXPORT_SYMBOL(iounmap); | |
381 | ||
382 | void free_initmem(void) | |
383 | { | |
384 | unsigned long addr; | |
385 | ||
386 | addr = (unsigned long)__init_begin; | |
387 | for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { | |
388 | ClearPageReserved(virt_to_page(addr)); | |
389 | set_page_count(virt_to_page(addr), 1); | |
390 | free_page(addr); | |
391 | totalram_pages++; | |
392 | } | |
393 | printk ("Freeing unused kernel memory: %luk freed\n", | |
394 | ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); | |
395 | } | |
396 | ||
397 | #ifdef CONFIG_BLK_DEV_INITRD | |
398 | void free_initrd_mem(unsigned long start, unsigned long end) | |
399 | { | |
400 | if (start < end) | |
401 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | |
402 | for (; start < end; start += PAGE_SIZE) { | |
403 | ClearPageReserved(virt_to_page(start)); | |
404 | set_page_count(virt_to_page(start), 1); | |
405 | free_page(start); | |
406 | totalram_pages++; | |
407 | } | |
408 | } | |
409 | #endif | |
410 | ||
411 | static DEFINE_SPINLOCK(mmu_context_lock); | |
412 | static DEFINE_IDR(mmu_context_idr); | |
413 | ||
414 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | |
415 | { | |
416 | int index; | |
417 | int err; | |
418 | ||
419 | #ifdef CONFIG_HUGETLB_PAGE | |
420 | /* We leave htlb_segs as it was, but for a fork, we need to | |
421 | * clear the huge_pgdir. */ | |
422 | mm->context.huge_pgdir = NULL; | |
423 | #endif | |
424 | ||
425 | again: | |
426 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | |
427 | return -ENOMEM; | |
428 | ||
429 | spin_lock(&mmu_context_lock); | |
430 | err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); | |
431 | spin_unlock(&mmu_context_lock); | |
432 | ||
433 | if (err == -EAGAIN) | |
434 | goto again; | |
435 | else if (err) | |
436 | return err; | |
437 | ||
438 | if (index > MAX_CONTEXT) { | |
439 | idr_remove(&mmu_context_idr, index); | |
440 | return -ENOMEM; | |
441 | } | |
442 | ||
443 | mm->context.id = index; | |
444 | ||
445 | return 0; | |
446 | } | |
447 | ||
448 | void destroy_context(struct mm_struct *mm) | |
449 | { | |
450 | spin_lock(&mmu_context_lock); | |
451 | idr_remove(&mmu_context_idr, mm->context.id); | |
452 | spin_unlock(&mmu_context_lock); | |
453 | ||
454 | mm->context.id = NO_CONTEXT; | |
455 | ||
456 | hugetlb_mm_free_pgd(mm); | |
457 | } | |
458 | ||
459 | /* | |
460 | * Do very early mm setup. | |
461 | */ | |
462 | void __init mm_init_ppc64(void) | |
463 | { | |
464 | #ifndef CONFIG_PPC_ISERIES | |
465 | unsigned long i; | |
466 | #endif | |
467 | ||
468 | ppc64_boot_msg(0x100, "MM Init"); | |
469 | ||
470 | /* This is the story of the IO hole... please, keep seated, | |
471 | * unfortunately, we are out of oxygen masks at the moment. | |
472 | * So we need some rough way to tell where your big IO hole | |
473 | * is. On pmac, it's between 2G and 4G, on POWER3, it's around | |
474 | * that area as well, on POWER4 we don't have one, etc... | |
475 | * We need that as a "hint" when sizing the TCE table on POWER3 | |
476 | * So far, the simplest way that seem work well enough for us it | |
477 | * to just assume that the first discontinuity in our physical | |
478 | * RAM layout is the IO hole. That may not be correct in the future | |
479 | * (and isn't on iSeries but then we don't care ;) | |
480 | */ | |
481 | ||
482 | #ifndef CONFIG_PPC_ISERIES | |
483 | for (i = 1; i < lmb.memory.cnt; i++) { | |
484 | unsigned long base, prevbase, prevsize; | |
485 | ||
486 | prevbase = lmb.memory.region[i-1].physbase; | |
487 | prevsize = lmb.memory.region[i-1].size; | |
488 | base = lmb.memory.region[i].physbase; | |
489 | if (base > (prevbase + prevsize)) { | |
490 | io_hole_start = prevbase + prevsize; | |
491 | io_hole_size = base - (prevbase + prevsize); | |
492 | break; | |
493 | } | |
494 | } | |
495 | #endif /* CONFIG_PPC_ISERIES */ | |
496 | if (io_hole_start) | |
497 | printk("IO Hole assumed to be %lx -> %lx\n", | |
498 | io_hole_start, io_hole_start + io_hole_size - 1); | |
499 | ||
500 | ppc64_boot_msg(0x100, "MM Init Done"); | |
501 | } | |
502 | ||
503 | /* | |
504 | * This is called by /dev/mem to know if a given address has to | |
505 | * be mapped non-cacheable or not | |
506 | */ | |
507 | int page_is_ram(unsigned long pfn) | |
508 | { | |
509 | int i; | |
510 | unsigned long paddr = (pfn << PAGE_SHIFT); | |
511 | ||
512 | for (i=0; i < lmb.memory.cnt; i++) { | |
513 | unsigned long base; | |
514 | ||
515 | #ifdef CONFIG_MSCHUNKS | |
516 | base = lmb.memory.region[i].physbase; | |
517 | #else | |
518 | base = lmb.memory.region[i].base; | |
519 | #endif | |
520 | if ((paddr >= base) && | |
521 | (paddr < (base + lmb.memory.region[i].size))) { | |
522 | return 1; | |
523 | } | |
524 | } | |
525 | ||
526 | return 0; | |
527 | } | |
528 | EXPORT_SYMBOL(page_is_ram); | |
529 | ||
530 | /* | |
531 | * Initialize the bootmem system and give it all the memory we | |
532 | * have available. | |
533 | */ | |
534 | #ifndef CONFIG_DISCONTIGMEM | |
535 | void __init do_init_bootmem(void) | |
536 | { | |
537 | unsigned long i; | |
538 | unsigned long start, bootmap_pages; | |
539 | unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; | |
540 | int boot_mapsize; | |
541 | ||
542 | /* | |
543 | * Find an area to use for the bootmem bitmap. Calculate the size of | |
544 | * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. | |
545 | * Add 1 additional page in case the address isn't page-aligned. | |
546 | */ | |
547 | bootmap_pages = bootmem_bootmap_pages(total_pages); | |
548 | ||
549 | start = abs_to_phys(lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE)); | |
550 | BUG_ON(!start); | |
551 | ||
552 | boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); | |
553 | ||
554 | max_pfn = max_low_pfn; | |
555 | ||
556 | /* add all physical memory to the bootmem map. Also find the first */ | |
557 | for (i=0; i < lmb.memory.cnt; i++) { | |
558 | unsigned long physbase, size; | |
559 | ||
560 | physbase = lmb.memory.region[i].physbase; | |
561 | size = lmb.memory.region[i].size; | |
562 | free_bootmem(physbase, size); | |
563 | } | |
564 | ||
565 | /* reserve the sections we're already using */ | |
566 | for (i=0; i < lmb.reserved.cnt; i++) { | |
567 | unsigned long physbase = lmb.reserved.region[i].physbase; | |
568 | unsigned long size = lmb.reserved.region[i].size; | |
569 | ||
570 | reserve_bootmem(physbase, size); | |
571 | } | |
572 | } | |
573 | ||
574 | /* | |
575 | * paging_init() sets up the page tables - in fact we've already done this. | |
576 | */ | |
577 | void __init paging_init(void) | |
578 | { | |
579 | unsigned long zones_size[MAX_NR_ZONES]; | |
580 | unsigned long zholes_size[MAX_NR_ZONES]; | |
581 | unsigned long total_ram = lmb_phys_mem_size(); | |
582 | unsigned long top_of_ram = lmb_end_of_DRAM(); | |
583 | ||
584 | printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", | |
585 | top_of_ram, total_ram); | |
586 | printk(KERN_INFO "Memory hole size: %ldMB\n", | |
587 | (top_of_ram - total_ram) >> 20); | |
588 | /* | |
589 | * All pages are DMA-able so we put them all in the DMA zone. | |
590 | */ | |
591 | memset(zones_size, 0, sizeof(zones_size)); | |
592 | memset(zholes_size, 0, sizeof(zholes_size)); | |
593 | ||
594 | zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; | |
595 | zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; | |
596 | ||
25128092 | 597 | free_area_init_node(0, NODE_DATA(0), zones_size, |
1da177e4 LT |
598 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); |
599 | } | |
600 | #endif /* CONFIG_DISCONTIGMEM */ | |
601 | ||
602 | static struct kcore_list kcore_vmem; | |
603 | ||
604 | static int __init setup_kcore(void) | |
605 | { | |
606 | int i; | |
607 | ||
608 | for (i=0; i < lmb.memory.cnt; i++) { | |
609 | unsigned long physbase, size; | |
610 | struct kcore_list *kcore_mem; | |
611 | ||
612 | physbase = lmb.memory.region[i].physbase; | |
613 | size = lmb.memory.region[i].size; | |
614 | ||
615 | /* GFP_ATOMIC to avoid might_sleep warnings during boot */ | |
616 | kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); | |
617 | if (!kcore_mem) | |
618 | panic("mem_init: kmalloc failed\n"); | |
619 | ||
620 | kclist_add(kcore_mem, __va(physbase), size); | |
621 | } | |
622 | ||
623 | kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); | |
624 | ||
625 | return 0; | |
626 | } | |
627 | module_init(setup_kcore); | |
628 | ||
629 | void __init mem_init(void) | |
630 | { | |
631 | #ifdef CONFIG_DISCONTIGMEM | |
632 | int nid; | |
633 | #endif | |
634 | pg_data_t *pgdat; | |
635 | unsigned long i; | |
636 | struct page *page; | |
637 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; | |
638 | ||
639 | num_physpages = max_low_pfn; /* RAM is assumed contiguous */ | |
640 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | |
641 | ||
642 | #ifdef CONFIG_DISCONTIGMEM | |
643 | for_each_online_node(nid) { | |
644 | if (NODE_DATA(nid)->node_spanned_pages != 0) { | |
645 | printk("freeing bootmem node %x\n", nid); | |
646 | totalram_pages += | |
647 | free_all_bootmem_node(NODE_DATA(nid)); | |
648 | } | |
649 | } | |
650 | #else | |
651 | max_mapnr = num_physpages; | |
652 | totalram_pages += free_all_bootmem(); | |
653 | #endif | |
654 | ||
655 | for_each_pgdat(pgdat) { | |
656 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | |
408fde81 | 657 | page = pgdat_page_nr(pgdat, i); |
1da177e4 LT |
658 | if (PageReserved(page)) |
659 | reservedpages++; | |
660 | } | |
661 | } | |
662 | ||
663 | codesize = (unsigned long)&_etext - (unsigned long)&_stext; | |
664 | initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; | |
665 | datasize = (unsigned long)&_edata - (unsigned long)&__init_end; | |
666 | bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; | |
667 | ||
668 | printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " | |
669 | "%luk reserved, %luk data, %luk bss, %luk init)\n", | |
670 | (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), | |
671 | num_physpages << (PAGE_SHIFT-10), | |
672 | codesize >> 10, | |
673 | reservedpages << (PAGE_SHIFT-10), | |
674 | datasize >> 10, | |
675 | bsssize >> 10, | |
676 | initsize >> 10); | |
677 | ||
678 | mem_init_done = 1; | |
679 | ||
680 | #ifdef CONFIG_PPC_ISERIES | |
681 | iommu_vio_init(); | |
682 | #endif | |
683 | /* Initialize the vDSO */ | |
684 | vdso_init(); | |
685 | } | |
686 | ||
687 | /* | |
688 | * This is called when a page has been modified by the kernel. | |
689 | * It just marks the page as not i-cache clean. We do the i-cache | |
690 | * flush later when the page is given to a user process, if necessary. | |
691 | */ | |
692 | void flush_dcache_page(struct page *page) | |
693 | { | |
694 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | |
695 | return; | |
696 | /* avoid an atomic op if possible */ | |
697 | if (test_bit(PG_arch_1, &page->flags)) | |
698 | clear_bit(PG_arch_1, &page->flags); | |
699 | } | |
700 | EXPORT_SYMBOL(flush_dcache_page); | |
701 | ||
702 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | |
703 | { | |
704 | clear_page(page); | |
705 | ||
706 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | |
707 | return; | |
708 | /* | |
709 | * We shouldnt have to do this, but some versions of glibc | |
710 | * require it (ld.so assumes zero filled pages are icache clean) | |
711 | * - Anton | |
712 | */ | |
713 | ||
714 | /* avoid an atomic op if possible */ | |
715 | if (test_bit(PG_arch_1, &pg->flags)) | |
716 | clear_bit(PG_arch_1, &pg->flags); | |
717 | } | |
718 | EXPORT_SYMBOL(clear_user_page); | |
719 | ||
720 | void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, | |
721 | struct page *pg) | |
722 | { | |
723 | copy_page(vto, vfrom); | |
724 | ||
725 | /* | |
726 | * We should be able to use the following optimisation, however | |
727 | * there are two problems. | |
728 | * Firstly a bug in some versions of binutils meant PLT sections | |
729 | * were not marked executable. | |
730 | * Secondly the first word in the GOT section is blrl, used | |
731 | * to establish the GOT address. Until recently the GOT was | |
732 | * not marked executable. | |
733 | * - Anton | |
734 | */ | |
735 | #if 0 | |
736 | if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) | |
737 | return; | |
738 | #endif | |
739 | ||
740 | if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | |
741 | return; | |
742 | ||
743 | /* avoid an atomic op if possible */ | |
744 | if (test_bit(PG_arch_1, &pg->flags)) | |
745 | clear_bit(PG_arch_1, &pg->flags); | |
746 | } | |
747 | ||
748 | void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, | |
749 | unsigned long addr, int len) | |
750 | { | |
751 | unsigned long maddr; | |
752 | ||
753 | maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); | |
754 | flush_icache_range(maddr, maddr + len); | |
755 | } | |
756 | EXPORT_SYMBOL(flush_icache_user_range); | |
757 | ||
758 | /* | |
759 | * This is called at the end of handling a user page fault, when the | |
760 | * fault has been handled by updating a PTE in the linux page tables. | |
761 | * We use it to preload an HPTE into the hash table corresponding to | |
762 | * the updated linux PTE. | |
763 | * | |
764 | * This must always be called with the mm->page_table_lock held | |
765 | */ | |
766 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, | |
767 | pte_t pte) | |
768 | { | |
769 | unsigned long vsid; | |
770 | void *pgdir; | |
771 | pte_t *ptep; | |
772 | int local = 0; | |
773 | cpumask_t tmp; | |
774 | unsigned long flags; | |
775 | ||
776 | /* handle i-cache coherency */ | |
777 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && | |
778 | !cpu_has_feature(CPU_FTR_NOEXECUTE)) { | |
779 | unsigned long pfn = pte_pfn(pte); | |
780 | if (pfn_valid(pfn)) { | |
781 | struct page *page = pfn_to_page(pfn); | |
782 | if (!PageReserved(page) | |
783 | && !test_bit(PG_arch_1, &page->flags)) { | |
784 | __flush_dcache_icache(page_address(page)); | |
785 | set_bit(PG_arch_1, &page->flags); | |
786 | } | |
787 | } | |
788 | } | |
789 | ||
790 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | |
791 | if (!pte_young(pte)) | |
792 | return; | |
793 | ||
794 | pgdir = vma->vm_mm->pgd; | |
795 | if (pgdir == NULL) | |
796 | return; | |
797 | ||
798 | ptep = find_linux_pte(pgdir, ea); | |
799 | if (!ptep) | |
800 | return; | |
801 | ||
802 | vsid = get_vsid(vma->vm_mm->context.id, ea); | |
803 | ||
804 | local_irq_save(flags); | |
805 | tmp = cpumask_of_cpu(smp_processor_id()); | |
806 | if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) | |
807 | local = 1; | |
808 | ||
809 | __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, | |
810 | 0x300, local); | |
811 | local_irq_restore(flags); | |
812 | } | |
813 | ||
814 | void __iomem * reserve_phb_iospace(unsigned long size) | |
815 | { | |
816 | void __iomem *virt_addr; | |
817 | ||
818 | if (phbs_io_bot >= IMALLOC_BASE) | |
819 | panic("reserve_phb_iospace(): phb io space overflow\n"); | |
820 | ||
821 | virt_addr = (void __iomem *) phbs_io_bot; | |
822 | phbs_io_bot += size; | |
823 | ||
824 | return virt_addr; | |
825 | } | |
826 | ||
827 | kmem_cache_t *zero_cache; | |
828 | ||
829 | static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) | |
830 | { | |
831 | memset(pte, 0, PAGE_SIZE); | |
832 | } | |
833 | ||
834 | void pgtable_cache_init(void) | |
835 | { | |
836 | zero_cache = kmem_cache_create("zero", | |
837 | PAGE_SIZE, | |
838 | 0, | |
839 | SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, | |
840 | zero_ctor, | |
841 | NULL); | |
842 | if (!zero_cache) | |
843 | panic("pgtable_cache_init(): could not create zero_cache!\n"); | |
844 | } | |
845 | ||
846 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, | |
847 | unsigned long size, pgprot_t vma_prot) | |
848 | { | |
849 | if (ppc_md.phys_mem_access_prot) | |
850 | return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); | |
851 | ||
852 | if (!page_is_ram(addr >> PAGE_SHIFT)) | |
853 | vma_prot = __pgprot(pgprot_val(vma_prot) | |
854 | | _PAGE_GUARDED | _PAGE_NO_CACHE); | |
855 | return vma_prot; | |
856 | } | |
857 | EXPORT_SYMBOL(phys_mem_access_prot); |