2 * This file contains ioremap and related functions for 64-bit machines.
4 * Derived from arch/ppc64/mm/init.c
5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
8 * and Cort Dougan (PReP) (cort@cs.nmt.edu)
9 * Copyright (C) 1996 Paul Mackerras
11 * Derived from "arch/i386/mm/init.c"
12 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
14 * Dave Engebretsen <engebret@us.ibm.com>
15 * Rework for PPC64 port.
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License
19 * as published by the Free Software Foundation; either version
20 * 2 of the License, or (at your option) any later version.
24 #include <linux/signal.h>
25 #include <linux/sched.h>
26 #include <linux/kernel.h>
27 #include <linux/errno.h>
28 #include <linux/string.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/mman.h>
33 #include <linux/swap.h>
34 #include <linux/stddef.h>
35 #include <linux/vmalloc.h>
36 #include <linux/memblock.h>
37 #include <linux/slab.h>
38 #include <linux/hugetlb.h>
40 #include <asm/pgalloc.h>
44 #include <asm/mmu_context.h>
45 #include <asm/pgtable.h>
48 #include <asm/machdep.h>
50 #include <asm/trace.h>
51 #include <asm/processor.h>
52 #include <asm/cputable.h>
53 #include <asm/sections.h>
54 #include <asm/firmware.h>
56 #include <asm/powernv.h>
60 #ifdef CONFIG_PPC_STD_MMU_64
61 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
62 #error TASK_SIZE_USER64 exceeds user VSID range
66 #ifdef CONFIG_PPC_BOOK3S_64
68 * partition table and process table for ISA 3.0
70 struct prtb_entry
*process_tb
;
71 struct patb_entry
*partition_tb
;
75 unsigned long __pte_index_size
;
76 EXPORT_SYMBOL(__pte_index_size
);
77 unsigned long __pmd_index_size
;
78 EXPORT_SYMBOL(__pmd_index_size
);
79 unsigned long __pud_index_size
;
80 EXPORT_SYMBOL(__pud_index_size
);
81 unsigned long __pgd_index_size
;
82 EXPORT_SYMBOL(__pgd_index_size
);
83 unsigned long __pmd_cache_index
;
84 EXPORT_SYMBOL(__pmd_cache_index
);
85 unsigned long __pte_table_size
;
86 EXPORT_SYMBOL(__pte_table_size
);
87 unsigned long __pmd_table_size
;
88 EXPORT_SYMBOL(__pmd_table_size
);
89 unsigned long __pud_table_size
;
90 EXPORT_SYMBOL(__pud_table_size
);
91 unsigned long __pgd_table_size
;
92 EXPORT_SYMBOL(__pgd_table_size
);
93 unsigned long __pmd_val_bits
;
94 EXPORT_SYMBOL(__pmd_val_bits
);
95 unsigned long __pud_val_bits
;
96 EXPORT_SYMBOL(__pud_val_bits
);
97 unsigned long __pgd_val_bits
;
98 EXPORT_SYMBOL(__pgd_val_bits
);
99 unsigned long __kernel_virt_start
;
100 EXPORT_SYMBOL(__kernel_virt_start
);
101 unsigned long __kernel_virt_size
;
102 EXPORT_SYMBOL(__kernel_virt_size
);
103 unsigned long __vmalloc_start
;
104 EXPORT_SYMBOL(__vmalloc_start
);
105 unsigned long __vmalloc_end
;
106 EXPORT_SYMBOL(__vmalloc_end
);
107 struct page
*vmemmap
;
108 EXPORT_SYMBOL(vmemmap
);
109 unsigned long __pte_frag_nr
;
110 EXPORT_SYMBOL(__pte_frag_nr
);
111 unsigned long __pte_frag_size_shift
;
112 EXPORT_SYMBOL(__pte_frag_size_shift
);
113 unsigned long ioremap_bot
;
114 #else /* !CONFIG_PPC_BOOK3S_64 */
115 unsigned long ioremap_bot
= IOREMAP_BASE
;
119 * __ioremap_at - Low level function to establish the page tables
122 void __iomem
* __ioremap_at(phys_addr_t pa
, void *ea
, unsigned long size
,
127 /* Make sure we have the base flags */
128 if ((flags
& _PAGE_PRESENT
) == 0)
129 flags
|= pgprot_val(PAGE_KERNEL
);
131 /* We don't support the 4K PFN hack with ioremap */
132 if (flags
& H_PAGE_4K_PFN
)
135 WARN_ON(pa
& ~PAGE_MASK
);
136 WARN_ON(((unsigned long)ea
) & ~PAGE_MASK
);
137 WARN_ON(size
& ~PAGE_MASK
);
139 for (i
= 0; i
< size
; i
+= PAGE_SIZE
)
140 if (map_kernel_page((unsigned long)ea
+i
, pa
+i
, flags
))
143 return (void __iomem
*)ea
;
147 * __iounmap_from - Low level function to tear down the page tables
148 * for an IO mapping. This is used for mappings that
149 * are manipulated manually, like partial unmapping of
150 * PCI IOs or ISA space.
152 void __iounmap_at(void *ea
, unsigned long size
)
154 WARN_ON(((unsigned long)ea
) & ~PAGE_MASK
);
155 WARN_ON(size
& ~PAGE_MASK
);
157 unmap_kernel_range((unsigned long)ea
, size
);
160 void __iomem
* __ioremap_caller(phys_addr_t addr
, unsigned long size
,
161 unsigned long flags
, void *caller
)
163 phys_addr_t paligned
;
167 * Choose an address to map it to.
168 * Once the imalloc system is running, we use it.
169 * Before that, we map using addresses going
170 * up from ioremap_bot. imalloc will use
171 * the addresses from ioremap_bot through
175 paligned
= addr
& PAGE_MASK
;
176 size
= PAGE_ALIGN(addr
+ size
) - paligned
;
178 if ((size
== 0) || (paligned
== 0))
181 if (slab_is_available()) {
182 struct vm_struct
*area
;
184 area
= __get_vm_area_caller(size
, VM_IOREMAP
,
185 ioremap_bot
, IOREMAP_END
,
190 area
->phys_addr
= paligned
;
191 ret
= __ioremap_at(paligned
, area
->addr
, size
, flags
);
195 ret
= __ioremap_at(paligned
, (void *)ioremap_bot
, size
, flags
);
201 ret
+= addr
& ~PAGE_MASK
;
205 void __iomem
* __ioremap(phys_addr_t addr
, unsigned long size
,
208 return __ioremap_caller(addr
, size
, flags
, __builtin_return_address(0));
211 void __iomem
* ioremap(phys_addr_t addr
, unsigned long size
)
213 unsigned long flags
= pgprot_val(pgprot_noncached(__pgprot(0)));
214 void *caller
= __builtin_return_address(0);
217 return ppc_md
.ioremap(addr
, size
, flags
, caller
);
218 return __ioremap_caller(addr
, size
, flags
, caller
);
221 void __iomem
* ioremap_wc(phys_addr_t addr
, unsigned long size
)
223 unsigned long flags
= pgprot_val(pgprot_noncached_wc(__pgprot(0)));
224 void *caller
= __builtin_return_address(0);
227 return ppc_md
.ioremap(addr
, size
, flags
, caller
);
228 return __ioremap_caller(addr
, size
, flags
, caller
);
231 void __iomem
* ioremap_prot(phys_addr_t addr
, unsigned long size
,
234 void *caller
= __builtin_return_address(0);
236 /* writeable implies dirty for kernel addresses */
237 if (flags
& _PAGE_WRITE
)
238 flags
|= _PAGE_DIRTY
;
240 /* we don't want to let _PAGE_EXEC leak out */
241 flags
&= ~_PAGE_EXEC
;
243 * Force kernel mapping.
245 #if defined(CONFIG_PPC_BOOK3S_64)
246 flags
|= _PAGE_PRIVILEGED
;
248 flags
&= ~_PAGE_USER
;
253 /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
254 * which means that we just cleared supervisor access... oops ;-) This
257 flags
|= _PAGE_BAP_SR
;
261 return ppc_md
.ioremap(addr
, size
, flags
, caller
);
262 return __ioremap_caller(addr
, size
, flags
, caller
);
267 * Unmap an IO region and remove it from imalloc'd list.
268 * Access to IO memory should be serialized by driver.
270 void __iounmap(volatile void __iomem
*token
)
274 if (!slab_is_available())
277 addr
= (void *) ((unsigned long __force
)
278 PCI_FIX_ADDR(token
) & PAGE_MASK
);
279 if ((unsigned long)addr
< ioremap_bot
) {
280 printk(KERN_WARNING
"Attempt to iounmap early bolted mapping"
287 void iounmap(volatile void __iomem
*token
)
290 ppc_md
.iounmap(token
);
295 EXPORT_SYMBOL(ioremap
);
296 EXPORT_SYMBOL(ioremap_wc
);
297 EXPORT_SYMBOL(ioremap_prot
);
298 EXPORT_SYMBOL(__ioremap
);
299 EXPORT_SYMBOL(__ioremap_at
);
300 EXPORT_SYMBOL(iounmap
);
301 EXPORT_SYMBOL(__iounmap
);
302 EXPORT_SYMBOL(__iounmap_at
);
304 #ifndef __PAGETABLE_PUD_FOLDED
305 /* 4 level page table */
306 struct page
*pgd_page(pgd_t pgd
)
309 return pte_page(pgd_pte(pgd
));
310 return virt_to_page(pgd_page_vaddr(pgd
));
314 struct page
*pud_page(pud_t pud
)
317 return pte_page(pud_pte(pud
));
318 return virt_to_page(pud_page_vaddr(pud
));
322 * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
323 * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
325 struct page
*pmd_page(pmd_t pmd
)
327 if (pmd_trans_huge(pmd
) || pmd_huge(pmd
) || pmd_devmap(pmd
))
328 return pte_page(pmd_pte(pmd
));
329 return virt_to_page(pmd_page_vaddr(pmd
));
332 #ifdef CONFIG_PPC_64K_PAGES
333 static pte_t
*get_from_cache(struct mm_struct
*mm
)
335 void *pte_frag
, *ret
;
337 spin_lock(&mm
->page_table_lock
);
338 ret
= mm
->context
.pte_frag
;
340 pte_frag
= ret
+ PTE_FRAG_SIZE
;
342 * If we have taken up all the fragments mark PTE page NULL
344 if (((unsigned long)pte_frag
& ~PAGE_MASK
) == 0)
346 mm
->context
.pte_frag
= pte_frag
;
348 spin_unlock(&mm
->page_table_lock
);
352 static pte_t
*__alloc_for_cache(struct mm_struct
*mm
, int kernel
)
358 page
= alloc_page(PGALLOC_GFP
| __GFP_ACCOUNT
);
361 if (!pgtable_page_ctor(page
)) {
366 page
= alloc_page(PGALLOC_GFP
);
371 ret
= page_address(page
);
372 spin_lock(&mm
->page_table_lock
);
374 * If we find pgtable_page set, we return
375 * the allocated page with single fragement
378 if (likely(!mm
->context
.pte_frag
)) {
379 set_page_count(page
, PTE_FRAG_NR
);
380 mm
->context
.pte_frag
= ret
+ PTE_FRAG_SIZE
;
382 spin_unlock(&mm
->page_table_lock
);
387 pte_t
*pte_fragment_alloc(struct mm_struct
*mm
, unsigned long vmaddr
, int kernel
)
391 pte
= get_from_cache(mm
);
395 return __alloc_for_cache(mm
, kernel
);
397 #endif /* CONFIG_PPC_64K_PAGES */
399 void pte_fragment_free(unsigned long *table
, int kernel
)
401 struct page
*page
= virt_to_page(table
);
402 if (put_page_testzero(page
)) {
404 pgtable_page_dtor(page
);
405 free_hot_cold_page(page
, 0);
410 void pgtable_free_tlb(struct mmu_gather
*tlb
, void *table
, int shift
)
412 unsigned long pgf
= (unsigned long)table
;
414 BUG_ON(shift
> MAX_PGTABLE_INDEX_SIZE
);
416 tlb_remove_table(tlb
, (void *)pgf
);
419 void __tlb_remove_table(void *_table
)
421 void *table
= (void *)((unsigned long)_table
& ~MAX_PGTABLE_INDEX_SIZE
);
422 unsigned shift
= (unsigned long)_table
& MAX_PGTABLE_INDEX_SIZE
;
425 /* PTE page needs special handling */
426 pte_fragment_free(table
, 0);
428 BUG_ON(shift
> MAX_PGTABLE_INDEX_SIZE
);
429 kmem_cache_free(PGT_CACHE(shift
), table
);
433 void pgtable_free_tlb(struct mmu_gather
*tlb
, void *table
, int shift
)
436 /* PTE page needs special handling */
437 pte_fragment_free(table
, 0);
439 BUG_ON(shift
> MAX_PGTABLE_INDEX_SIZE
);
440 kmem_cache_free(PGT_CACHE(shift
), table
);
445 #ifdef CONFIG_PPC_BOOK3S_64
446 void __init
mmu_partition_table_init(void)
448 unsigned long patb_size
= 1UL << PATB_SIZE_SHIFT
;
451 BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT
> 36), "Partition table size too large.");
452 partition_tb
= __va(memblock_alloc_base(patb_size
, patb_size
,
453 MEMBLOCK_ALLOC_ANYWHERE
));
455 /* Initialize the Partition Table with no entries */
456 memset((void *)partition_tb
, 0, patb_size
);
459 * update partition table control register,
462 ptcr
= __pa(partition_tb
) | (PATB_SIZE_SHIFT
- 12);
463 mtspr(SPRN_PTCR
, ptcr
);
464 powernv_set_nmmu_ptcr(ptcr
);
467 void mmu_partition_table_set_entry(unsigned int lpid
, unsigned long dw0
,
470 unsigned long old
= be64_to_cpu(partition_tb
[lpid
].patb0
);
472 partition_tb
[lpid
].patb0
= cpu_to_be64(dw0
);
473 partition_tb
[lpid
].patb1
= cpu_to_be64(dw1
);
476 * Global flush of TLBs and partition table caches for this lpid.
477 * The type of flush (hash or radix) depends on what the previous
478 * use of this partition ID was, not the new use.
480 asm volatile("ptesync" : : : "memory");
482 asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
483 "r" (TLBIEL_INVAL_SET_LPID
), "r" (lpid
));
484 trace_tlbie(lpid
, 0, TLBIEL_INVAL_SET_LPID
, lpid
, 2, 0, 1);
486 asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
487 "r" (TLBIEL_INVAL_SET_LPID
), "r" (lpid
));
488 trace_tlbie(lpid
, 0, TLBIEL_INVAL_SET_LPID
, lpid
, 2, 0, 0);
490 asm volatile("eieio; tlbsync; ptesync" : : : "memory");
492 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry
);
493 #endif /* CONFIG_PPC_BOOK3S_64 */
495 #ifdef CONFIG_STRICT_KERNEL_RWX
496 void mark_rodata_ro(void)
498 if (!mmu_has_feature(MMU_FTR_KERNEL_RO
)) {
499 pr_warn("Warning: Unable to mark rodata read only on this CPU.\n");
504 radix__mark_rodata_ro();
506 hash__mark_rodata_ro();
509 void mark_initmem_nx(void)
512 radix__mark_initmem_nx();
514 hash__mark_initmem_nx();