]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * This file contains the routines for flushing entries from the | |
3 | * TLB and MMU hash table. | |
4 | * | |
5 | * Derived from arch/ppc64/mm/init.c: | |
6 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) | |
7 | * | |
8 | * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) | |
9 | * and Cort Dougan (PReP) (cort@cs.nmt.edu) | |
10 | * Copyright (C) 1996 Paul Mackerras | |
1da177e4 LT |
11 | * |
12 | * Derived from "arch/i386/mm/init.c" | |
13 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
14 | * | |
15 | * Dave Engebretsen <engebret@us.ibm.com> | |
16 | * Rework for PPC64 port. | |
17 | * | |
18 | * This program is free software; you can redistribute it and/or | |
19 | * modify it under the terms of the GNU General Public License | |
20 | * as published by the Free Software Foundation; either version | |
21 | * 2 of the License, or (at your option) any later version. | |
22 | */ | |
3c726f8d | 23 | |
1da177e4 LT |
24 | #include <linux/kernel.h> |
25 | #include <linux/mm.h> | |
26 | #include <linux/init.h> | |
27 | #include <linux/percpu.h> | |
28 | #include <linux/hardirq.h> | |
29 | #include <asm/pgalloc.h> | |
30 | #include <asm/tlbflush.h> | |
31 | #include <asm/tlb.h> | |
3c726f8d | 32 | #include <asm/bug.h> |
1da177e4 LT |
33 | |
34 | DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); | |
35 | ||
36 | /* This is declared as we are using the more or less generic | |
2ef9481e | 37 | * include/asm-powerpc/tlb.h file -- tgall |
1da177e4 LT |
38 | */ |
39 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | |
40 | DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); | |
41 | unsigned long pte_freelist_forced_free; | |
42 | ||
e28f7faf DG |
43 | struct pte_freelist_batch |
44 | { | |
45 | struct rcu_head rcu; | |
46 | unsigned int index; | |
47 | pgtable_free_t tables[0]; | |
48 | }; | |
49 | ||
50 | DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); | |
51 | unsigned long pte_freelist_forced_free; | |
52 | ||
53 | #define PTE_FREELIST_SIZE \ | |
54 | ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ | |
55 | / sizeof(pgtable_free_t)) | |
56 | ||
57 | #ifdef CONFIG_SMP | |
58 | static void pte_free_smp_sync(void *arg) | |
59 | { | |
60 | /* Do nothing, just ensure we sync with all CPUs */ | |
61 | } | |
62 | #endif | |
63 | ||
64 | /* This is only called when we are critically out of memory | |
65 | * (and fail to get a page in pte_free_tlb). | |
66 | */ | |
67 | static void pgtable_free_now(pgtable_free_t pgf) | |
68 | { | |
69 | pte_freelist_forced_free++; | |
70 | ||
71 | smp_call_function(pte_free_smp_sync, NULL, 0, 1); | |
72 | ||
73 | pgtable_free(pgf); | |
74 | } | |
75 | ||
76 | static void pte_free_rcu_callback(struct rcu_head *head) | |
77 | { | |
78 | struct pte_freelist_batch *batch = | |
79 | container_of(head, struct pte_freelist_batch, rcu); | |
80 | unsigned int i; | |
81 | ||
82 | for (i = 0; i < batch->index; i++) | |
83 | pgtable_free(batch->tables[i]); | |
84 | ||
85 | free_page((unsigned long)batch); | |
86 | } | |
87 | ||
88 | static void pte_free_submit(struct pte_freelist_batch *batch) | |
89 | { | |
90 | INIT_RCU_HEAD(&batch->rcu); | |
91 | call_rcu(&batch->rcu, pte_free_rcu_callback); | |
92 | } | |
93 | ||
94 | void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) | |
1da177e4 | 95 | { |
01edcd89 | 96 | /* This is safe since tlb_gather_mmu has disabled preemption */ |
1da177e4 LT |
97 | cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); |
98 | struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); | |
99 | ||
100 | if (atomic_read(&tlb->mm->mm_users) < 2 || | |
101 | cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { | |
e28f7faf | 102 | pgtable_free(pgf); |
1da177e4 LT |
103 | return; |
104 | } | |
105 | ||
106 | if (*batchp == NULL) { | |
107 | *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); | |
108 | if (*batchp == NULL) { | |
e28f7faf | 109 | pgtable_free_now(pgf); |
1da177e4 LT |
110 | return; |
111 | } | |
112 | (*batchp)->index = 0; | |
113 | } | |
e28f7faf | 114 | (*batchp)->tables[(*batchp)->index++] = pgf; |
1da177e4 LT |
115 | if ((*batchp)->index == PTE_FREELIST_SIZE) { |
116 | pte_free_submit(*batchp); | |
117 | *batchp = NULL; | |
118 | } | |
119 | } | |
120 | ||
121 | /* | |
a741e679 BH |
122 | * A linux PTE was changed and the corresponding hash table entry |
123 | * neesd to be flushed. This function will either perform the flush | |
124 | * immediately or will batch it up if the current CPU has an active | |
125 | * batch on it. | |
126 | * | |
127 | * Must be called from within some kind of spinlock/non-preempt region... | |
1da177e4 | 128 | */ |
a741e679 BH |
129 | void hpte_need_flush(struct mm_struct *mm, unsigned long addr, |
130 | pte_t *ptep, unsigned long pte, int huge) | |
1da177e4 | 131 | { |
1da177e4 | 132 | struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); |
a741e679 | 133 | unsigned long vsid, vaddr; |
bf72aeba | 134 | unsigned int psize; |
a741e679 | 135 | real_pte_t rpte; |
61b1a942 | 136 | int i; |
1da177e4 | 137 | |
1da177e4 LT |
138 | i = batch->index; |
139 | ||
3c726f8d BH |
140 | /* We mask the address for the base page size. Huge pages will |
141 | * have applied their own masking already | |
142 | */ | |
143 | addr &= PAGE_MASK; | |
144 | ||
16c2d476 BH |
145 | /* Get page size (maybe move back to caller). |
146 | * | |
147 | * NOTE: when using special 64K mappings in 4K environment like | |
148 | * for SPEs, we obtain the page size from the slice, which thus | |
149 | * must still exist (and thus the VMA not reused) at the time | |
150 | * of this call | |
151 | */ | |
3c726f8d BH |
152 | if (huge) { |
153 | #ifdef CONFIG_HUGETLB_PAGE | |
154 | psize = mmu_huge_psize; | |
155 | #else | |
156 | BUG(); | |
16c2d476 | 157 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ |
3c726f8d | 158 | #endif |
bf72aeba | 159 | } else |
16c2d476 | 160 | psize = pte_pagesize_index(mm, addr, pte); |
3c726f8d | 161 | |
a741e679 BH |
162 | /* Build full vaddr */ |
163 | if (!is_kernel_addr(addr)) { | |
164 | vsid = get_vsid(mm->context.id, addr); | |
165 | WARN_ON(vsid == 0); | |
166 | } else | |
167 | vsid = get_kernel_vsid(addr); | |
168 | vaddr = (vsid << 28 ) | (addr & 0x0fffffff); | |
169 | rpte = __real_pte(__pte(pte), ptep); | |
170 | ||
171 | /* | |
172 | * Check if we have an active batch on this CPU. If not, just | |
173 | * flush now and return. For now, we don global invalidates | |
174 | * in that case, might be worth testing the mm cpu mask though | |
175 | * and decide to use local invalidates instead... | |
176 | */ | |
177 | if (!batch->active) { | |
178 | flush_hash_page(vaddr, rpte, psize, 0); | |
179 | return; | |
180 | } | |
181 | ||
1da177e4 LT |
182 | /* |
183 | * This can happen when we are in the middle of a TLB batch and | |
184 | * we encounter memory pressure (eg copy_page_range when it tries | |
185 | * to allocate a new pte). If we have to reclaim memory and end | |
186 | * up scanning and resetting referenced bits then our batch context | |
187 | * will change mid stream. | |
3c726f8d BH |
188 | * |
189 | * We also need to ensure only one page size is present in a given | |
190 | * batch | |
1da177e4 | 191 | */ |
3c726f8d | 192 | if (i != 0 && (mm != batch->mm || batch->psize != psize)) { |
a741e679 | 193 | __flush_tlb_pending(batch); |
1da177e4 LT |
194 | i = 0; |
195 | } | |
1da177e4 | 196 | if (i == 0) { |
1da177e4 | 197 | batch->mm = mm; |
3c726f8d | 198 | batch->psize = psize; |
1da177e4 | 199 | } |
a741e679 BH |
200 | batch->pte[i] = rpte; |
201 | batch->vaddr[i] = vaddr; | |
1da177e4 LT |
202 | batch->index = ++i; |
203 | if (i >= PPC64_TLB_BATCH_NR) | |
a741e679 | 204 | __flush_tlb_pending(batch); |
1da177e4 LT |
205 | } |
206 | ||
a741e679 BH |
207 | /* |
208 | * This function is called when terminating an mmu batch or when a batch | |
209 | * is full. It will perform the flush of all the entries currently stored | |
210 | * in a batch. | |
211 | * | |
212 | * Must be called from within some kind of spinlock/non-preempt region... | |
213 | */ | |
1da177e4 LT |
214 | void __flush_tlb_pending(struct ppc64_tlb_batch *batch) |
215 | { | |
1da177e4 | 216 | cpumask_t tmp; |
a741e679 | 217 | int i, local = 0; |
1da177e4 | 218 | |
1da177e4 | 219 | i = batch->index; |
a741e679 | 220 | tmp = cpumask_of_cpu(smp_processor_id()); |
1da177e4 LT |
221 | if (cpus_equal(batch->mm->cpu_vm_mask, tmp)) |
222 | local = 1; | |
1da177e4 | 223 | if (i == 1) |
3c726f8d BH |
224 | flush_hash_page(batch->vaddr[0], batch->pte[0], |
225 | batch->psize, local); | |
1da177e4 | 226 | else |
61b1a942 | 227 | flush_hash_range(i, local); |
1da177e4 | 228 | batch->index = 0; |
1da177e4 LT |
229 | } |
230 | ||
1da177e4 LT |
231 | void pte_free_finish(void) |
232 | { | |
01edcd89 | 233 | /* This is safe since tlb_gather_mmu has disabled preemption */ |
1da177e4 LT |
234 | struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); |
235 | ||
236 | if (*batchp == NULL) | |
237 | return; | |
238 | pte_free_submit(*batchp); | |
239 | *batchp = NULL; | |
240 | } | |
3d5134ee BH |
241 | |
242 | /** | |
243 | * __flush_hash_table_range - Flush all HPTEs for a given address range | |
244 | * from the hash table (and the TLB). But keeps | |
245 | * the linux PTEs intact. | |
246 | * | |
247 | * @mm : mm_struct of the target address space (generally init_mm) | |
248 | * @start : starting address | |
249 | * @end : ending address (not included in the flush) | |
250 | * | |
251 | * This function is mostly to be used by some IO hotplug code in order | |
252 | * to remove all hash entries from a given address range used to map IO | |
253 | * space on a removed PCI-PCI bidge without tearing down the full mapping | |
254 | * since 64K pages may overlap with other bridges when using 64K pages | |
255 | * with 4K HW pages on IO space. | |
256 | * | |
257 | * Because of that usage pattern, it's only available with CONFIG_HOTPLUG | |
258 | * and is implemented for small size rather than speed. | |
259 | */ | |
260 | #ifdef CONFIG_HOTPLUG | |
261 | ||
262 | void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | |
263 | unsigned long end) | |
264 | { | |
265 | unsigned long flags; | |
266 | ||
267 | start = _ALIGN_DOWN(start, PAGE_SIZE); | |
268 | end = _ALIGN_UP(end, PAGE_SIZE); | |
269 | ||
270 | BUG_ON(!mm->pgd); | |
271 | ||
272 | /* Note: Normally, we should only ever use a batch within a | |
273 | * PTE locked section. This violates the rule, but will work | |
274 | * since we don't actually modify the PTEs, we just flush the | |
275 | * hash while leaving the PTEs intact (including their reference | |
276 | * to being hashed). This is not the most performance oriented | |
277 | * way to do things but is fine for our needs here. | |
278 | */ | |
279 | local_irq_save(flags); | |
280 | arch_enter_lazy_mmu_mode(); | |
281 | for (; start < end; start += PAGE_SIZE) { | |
282 | pte_t *ptep = find_linux_pte(mm->pgd, start); | |
283 | unsigned long pte; | |
284 | ||
285 | if (ptep == NULL) | |
286 | continue; | |
287 | pte = pte_val(*ptep); | |
288 | if (!(pte & _PAGE_HASHPTE)) | |
289 | continue; | |
290 | hpte_need_flush(mm, start, ptep, pte, 0); | |
291 | } | |
292 | arch_leave_lazy_mmu_mode(); | |
293 | local_irq_restore(flags); | |
294 | } | |
295 | ||
296 | #endif /* CONFIG_HOTPLUG */ |