]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * PPC64 (POWER4) Huge TLB Page Support for Kernel. | |
3 | * | |
4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | |
5 | * | |
6 | * Based on the IA-32 version: | |
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | |
8 | */ | |
9 | ||
10 | #include <linux/init.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/mm.h> | |
13 | #include <linux/hugetlb.h> | |
14 | #include <linux/pagemap.h> | |
15 | #include <linux/smp_lock.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/err.h> | |
18 | #include <linux/sysctl.h> | |
19 | #include <asm/mman.h> | |
20 | #include <asm/pgalloc.h> | |
21 | #include <asm/tlb.h> | |
22 | #include <asm/tlbflush.h> | |
23 | #include <asm/mmu_context.h> | |
24 | #include <asm/machdep.h> | |
25 | #include <asm/cputable.h> | |
26 | #include <asm/tlb.h> | |
27 | ||
28 | #include <linux/sysctl.h> | |
29 | ||
c594adad DG |
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | |
32 | ||
f10a04c0 DG |
33 | #ifdef CONFIG_PPC_64K_PAGES |
34 | #define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT) | |
35 | #else | |
36 | #define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT) | |
37 | #endif | |
38 | #define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) | |
39 | #define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE) | |
40 | ||
41 | #define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE) | |
42 | #define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) | |
43 | #define HUGEPD_MASK (~(HUGEPD_SIZE-1)) | |
44 | ||
45 | #define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) | |
46 | ||
47 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | |
48 | * will choke on pointers to hugepte tables, which is handy for | |
49 | * catching screwups early. */ | |
50 | #define HUGEPD_OK 0x1 | |
51 | ||
52 | typedef struct { unsigned long pd; } hugepd_t; | |
53 | ||
54 | #define hugepd_none(hpd) ((hpd).pd == 0) | |
55 | ||
56 | static inline pte_t *hugepd_page(hugepd_t hpd) | |
57 | { | |
58 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | |
59 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | |
60 | } | |
61 | ||
62 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) | |
63 | { | |
64 | unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); | |
65 | pte_t *dir = hugepd_page(*hpdp); | |
66 | ||
67 | return dir + idx; | |
68 | } | |
69 | ||
70 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | |
71 | unsigned long address) | |
72 | { | |
73 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache, | |
74 | GFP_KERNEL|__GFP_REPEAT); | |
75 | ||
76 | if (! new) | |
77 | return -ENOMEM; | |
78 | ||
79 | spin_lock(&mm->page_table_lock); | |
80 | if (!hugepd_none(*hpdp)) | |
81 | kmem_cache_free(huge_pgtable_cache, new); | |
82 | else | |
83 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | |
84 | spin_unlock(&mm->page_table_lock); | |
85 | return 0; | |
86 | } | |
87 | ||
e28f7faf DG |
88 | /* Modelled after find_linux_pte() */ |
89 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |
1da177e4 | 90 | { |
e28f7faf DG |
91 | pgd_t *pg; |
92 | pud_t *pu; | |
1da177e4 | 93 | |
e28f7faf | 94 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
1da177e4 | 95 | |
e28f7faf DG |
96 | addr &= HPAGE_MASK; |
97 | ||
98 | pg = pgd_offset(mm, addr); | |
99 | if (!pgd_none(*pg)) { | |
100 | pu = pud_offset(pg, addr); | |
101 | if (!pud_none(*pu)) { | |
3c726f8d | 102 | #ifdef CONFIG_PPC_64K_PAGES |
f10a04c0 DG |
103 | pmd_t *pm; |
104 | pm = pmd_offset(pu, addr); | |
105 | if (!pmd_none(*pm)) | |
106 | return hugepte_offset((hugepd_t *)pm, addr); | |
107 | #else | |
108 | return hugepte_offset((hugepd_t *)pu, addr); | |
109 | #endif | |
e28f7faf DG |
110 | } |
111 | } | |
1da177e4 | 112 | |
e28f7faf | 113 | return NULL; |
1da177e4 LT |
114 | } |
115 | ||
e28f7faf | 116 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
1da177e4 | 117 | { |
e28f7faf DG |
118 | pgd_t *pg; |
119 | pud_t *pu; | |
f10a04c0 | 120 | hugepd_t *hpdp = NULL; |
1da177e4 | 121 | |
1da177e4 LT |
122 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
123 | ||
e28f7faf | 124 | addr &= HPAGE_MASK; |
1da177e4 | 125 | |
e28f7faf DG |
126 | pg = pgd_offset(mm, addr); |
127 | pu = pud_alloc(mm, pg, addr); | |
1da177e4 | 128 | |
e28f7faf | 129 | if (pu) { |
f10a04c0 DG |
130 | #ifdef CONFIG_PPC_64K_PAGES |
131 | pmd_t *pm; | |
e28f7faf | 132 | pm = pmd_alloc(mm, pu, addr); |
f10a04c0 DG |
133 | if (pm) |
134 | hpdp = (hugepd_t *)pm; | |
135 | #else | |
136 | hpdp = (hugepd_t *)pu; | |
137 | #endif | |
138 | } | |
139 | ||
140 | if (! hpdp) | |
141 | return NULL; | |
142 | ||
143 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) | |
144 | return NULL; | |
145 | ||
146 | return hugepte_offset(hpdp, addr); | |
147 | } | |
148 | ||
39dde65c KC |
149 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
150 | { | |
151 | return 0; | |
152 | } | |
153 | ||
f10a04c0 DG |
154 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) |
155 | { | |
156 | pte_t *hugepte = hugepd_page(*hpdp); | |
157 | ||
158 | hpdp->pd = 0; | |
159 | tlb->need_flush = 1; | |
160 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, | |
c9169f87 | 161 | PGF_CACHENUM_MASK)); |
f10a04c0 DG |
162 | } |
163 | ||
3c726f8d | 164 | #ifdef CONFIG_PPC_64K_PAGES |
f10a04c0 DG |
165 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
166 | unsigned long addr, unsigned long end, | |
167 | unsigned long floor, unsigned long ceiling) | |
168 | { | |
169 | pmd_t *pmd; | |
170 | unsigned long next; | |
171 | unsigned long start; | |
172 | ||
173 | start = addr; | |
174 | pmd = pmd_offset(pud, addr); | |
175 | do { | |
176 | next = pmd_addr_end(addr, end); | |
177 | if (pmd_none(*pmd)) | |
178 | continue; | |
179 | free_hugepte_range(tlb, (hugepd_t *)pmd); | |
180 | } while (pmd++, addr = next, addr != end); | |
181 | ||
182 | start &= PUD_MASK; | |
183 | if (start < floor) | |
184 | return; | |
185 | if (ceiling) { | |
186 | ceiling &= PUD_MASK; | |
187 | if (!ceiling) | |
188 | return; | |
1da177e4 | 189 | } |
f10a04c0 DG |
190 | if (end - 1 > ceiling - 1) |
191 | return; | |
1da177e4 | 192 | |
f10a04c0 DG |
193 | pmd = pmd_offset(pud, start); |
194 | pud_clear(pud); | |
195 | pmd_free_tlb(tlb, pmd); | |
196 | } | |
197 | #endif | |
198 | ||
199 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |
200 | unsigned long addr, unsigned long end, | |
201 | unsigned long floor, unsigned long ceiling) | |
202 | { | |
203 | pud_t *pud; | |
204 | unsigned long next; | |
205 | unsigned long start; | |
206 | ||
207 | start = addr; | |
208 | pud = pud_offset(pgd, addr); | |
209 | do { | |
210 | next = pud_addr_end(addr, end); | |
211 | #ifdef CONFIG_PPC_64K_PAGES | |
212 | if (pud_none_or_clear_bad(pud)) | |
213 | continue; | |
214 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | |
215 | #else | |
216 | if (pud_none(*pud)) | |
217 | continue; | |
218 | free_hugepte_range(tlb, (hugepd_t *)pud); | |
219 | #endif | |
220 | } while (pud++, addr = next, addr != end); | |
221 | ||
222 | start &= PGDIR_MASK; | |
223 | if (start < floor) | |
224 | return; | |
225 | if (ceiling) { | |
226 | ceiling &= PGDIR_MASK; | |
227 | if (!ceiling) | |
228 | return; | |
229 | } | |
230 | if (end - 1 > ceiling - 1) | |
231 | return; | |
232 | ||
233 | pud = pud_offset(pgd, start); | |
234 | pgd_clear(pgd); | |
235 | pud_free_tlb(tlb, pud); | |
236 | } | |
237 | ||
238 | /* | |
239 | * This function frees user-level page tables of a process. | |
240 | * | |
241 | * Must be called with pagetable lock held. | |
242 | */ | |
243 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, | |
244 | unsigned long addr, unsigned long end, | |
245 | unsigned long floor, unsigned long ceiling) | |
246 | { | |
247 | pgd_t *pgd; | |
248 | unsigned long next; | |
249 | unsigned long start; | |
250 | ||
251 | /* | |
252 | * Comments below take from the normal free_pgd_range(). They | |
253 | * apply here too. The tests against HUGEPD_MASK below are | |
254 | * essential, because we *don't* test for this at the bottom | |
255 | * level. Without them we'll attempt to free a hugepte table | |
256 | * when we unmap just part of it, even if there are other | |
257 | * active mappings using it. | |
258 | * | |
259 | * The next few lines have given us lots of grief... | |
260 | * | |
261 | * Why are we testing HUGEPD* at this top level? Because | |
262 | * often there will be no work to do at all, and we'd prefer | |
263 | * not to go all the way down to the bottom just to discover | |
264 | * that. | |
265 | * | |
266 | * Why all these "- 1"s? Because 0 represents both the bottom | |
267 | * of the address space and the top of it (using -1 for the | |
268 | * top wouldn't help much: the masks would do the wrong thing). | |
269 | * The rule is that addr 0 and floor 0 refer to the bottom of | |
270 | * the address space, but end 0 and ceiling 0 refer to the top | |
271 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | |
272 | * that end 0 case should be mythical). | |
273 | * | |
274 | * Wherever addr is brought up or ceiling brought down, we | |
275 | * must be careful to reject "the opposite 0" before it | |
276 | * confuses the subsequent tests. But what about where end is | |
277 | * brought down by HUGEPD_SIZE below? no, end can't go down to | |
278 | * 0 there. | |
279 | * | |
280 | * Whereas we round start (addr) and ceiling down, by different | |
281 | * masks at different levels, in order to test whether a table | |
282 | * now has no other vmas using it, so can be freed, we don't | |
283 | * bother to round floor or end up - the tests don't need that. | |
284 | */ | |
285 | ||
286 | addr &= HUGEPD_MASK; | |
287 | if (addr < floor) { | |
288 | addr += HUGEPD_SIZE; | |
289 | if (!addr) | |
290 | return; | |
291 | } | |
292 | if (ceiling) { | |
293 | ceiling &= HUGEPD_MASK; | |
294 | if (!ceiling) | |
295 | return; | |
296 | } | |
297 | if (end - 1 > ceiling - 1) | |
298 | end -= HUGEPD_SIZE; | |
299 | if (addr > end - 1) | |
300 | return; | |
301 | ||
302 | start = addr; | |
303 | pgd = pgd_offset((*tlb)->mm, addr); | |
304 | do { | |
305 | BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); | |
306 | next = pgd_addr_end(addr, end); | |
307 | if (pgd_none_or_clear_bad(pgd)) | |
308 | continue; | |
309 | hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | |
310 | } while (pgd++, addr = next, addr != end); | |
1da177e4 LT |
311 | } |
312 | ||
e28f7faf DG |
313 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
314 | pte_t *ptep, pte_t pte) | |
315 | { | |
e28f7faf | 316 | if (pte_present(*ptep)) { |
3c726f8d BH |
317 | /* We open-code pte_clear because we need to pass the right |
318 | * argument to hpte_update (huge / !huge) | |
319 | */ | |
320 | unsigned long old = pte_update(ptep, ~0UL); | |
321 | if (old & _PAGE_HASHPTE) | |
322 | hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); | |
e28f7faf DG |
323 | flush_tlb_pending(); |
324 | } | |
3c726f8d | 325 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); |
1da177e4 LT |
326 | } |
327 | ||
e28f7faf DG |
328 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
329 | pte_t *ptep) | |
1da177e4 | 330 | { |
e28f7faf | 331 | unsigned long old = pte_update(ptep, ~0UL); |
1da177e4 | 332 | |
e28f7faf | 333 | if (old & _PAGE_HASHPTE) |
3c726f8d BH |
334 | hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); |
335 | *ptep = __pte(0); | |
1da177e4 | 336 | |
e28f7faf | 337 | return __pte(old); |
1da177e4 LT |
338 | } |
339 | ||
23ed6cb9 DG |
340 | struct slb_flush_info { |
341 | struct mm_struct *mm; | |
342 | u16 newareas; | |
343 | }; | |
344 | ||
c594adad | 345 | static void flush_low_segments(void *parm) |
1da177e4 | 346 | { |
23ed6cb9 | 347 | struct slb_flush_info *fi = parm; |
1da177e4 LT |
348 | unsigned long i; |
349 | ||
23ed6cb9 DG |
350 | BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); |
351 | ||
352 | if (current->active_mm != fi->mm) | |
353 | return; | |
1da177e4 | 354 | |
23ed6cb9 DG |
355 | /* Only need to do anything if this CPU is working in the same |
356 | * mm as the one which has changed */ | |
357 | ||
358 | /* update the paca copy of the context struct */ | |
359 | get_paca()->context = current->active_mm->context; | |
c594adad | 360 | |
23ed6cb9 | 361 | asm volatile("isync" : : : "memory"); |
c594adad | 362 | for (i = 0; i < NUM_LOW_AREAS; i++) { |
23ed6cb9 | 363 | if (! (fi->newareas & (1U << i))) |
1da177e4 | 364 | continue; |
14b34661 DG |
365 | asm volatile("slbie %0" |
366 | : : "r" ((i << SID_SHIFT) | SLBIE_C)); | |
1da177e4 | 367 | } |
1da177e4 LT |
368 | asm volatile("isync" : : : "memory"); |
369 | } | |
370 | ||
c594adad DG |
371 | static void flush_high_segments(void *parm) |
372 | { | |
23ed6cb9 | 373 | struct slb_flush_info *fi = parm; |
c594adad DG |
374 | unsigned long i, j; |
375 | ||
c594adad | 376 | |
23ed6cb9 DG |
377 | BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); |
378 | ||
379 | if (current->active_mm != fi->mm) | |
380 | return; | |
381 | ||
382 | /* Only need to do anything if this CPU is working in the same | |
383 | * mm as the one which has changed */ | |
c594adad | 384 | |
23ed6cb9 DG |
385 | /* update the paca copy of the context struct */ |
386 | get_paca()->context = current->active_mm->context; | |
387 | ||
388 | asm volatile("isync" : : : "memory"); | |
c594adad | 389 | for (i = 0; i < NUM_HIGH_AREAS; i++) { |
23ed6cb9 | 390 | if (! (fi->newareas & (1U << i))) |
c594adad DG |
391 | continue; |
392 | for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) | |
393 | asm volatile("slbie %0" | |
14b34661 | 394 | :: "r" (((i << HTLB_AREA_SHIFT) |
23ed6cb9 | 395 | + (j << SID_SHIFT)) | SLBIE_C)); |
c594adad | 396 | } |
c594adad DG |
397 | asm volatile("isync" : : : "memory"); |
398 | } | |
399 | ||
400 | static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) | |
1da177e4 | 401 | { |
c594adad DG |
402 | unsigned long start = area << SID_SHIFT; |
403 | unsigned long end = (area+1) << SID_SHIFT; | |
1da177e4 | 404 | struct vm_area_struct *vma; |
1da177e4 | 405 | |
c594adad | 406 | BUG_ON(area >= NUM_LOW_AREAS); |
1da177e4 LT |
407 | |
408 | /* Check no VMAs are in the region */ | |
409 | vma = find_vma(mm, start); | |
410 | if (vma && (vma->vm_start < end)) | |
411 | return -EBUSY; | |
412 | ||
1da177e4 LT |
413 | return 0; |
414 | } | |
415 | ||
c594adad DG |
416 | static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) |
417 | { | |
418 | unsigned long start = area << HTLB_AREA_SHIFT; | |
419 | unsigned long end = (area+1) << HTLB_AREA_SHIFT; | |
420 | struct vm_area_struct *vma; | |
421 | ||
422 | BUG_ON(area >= NUM_HIGH_AREAS); | |
423 | ||
7d24f0b8 DG |
424 | /* Hack, so that each addresses is controlled by exactly one |
425 | * of the high or low area bitmaps, the first high area starts | |
426 | * at 4GB, not 0 */ | |
427 | if (start == 0) | |
428 | start = 0x100000000UL; | |
429 | ||
c594adad DG |
430 | /* Check no VMAs are in the region */ |
431 | vma = find_vma(mm, start); | |
432 | if (vma && (vma->vm_start < end)) | |
433 | return -EBUSY; | |
434 | ||
435 | return 0; | |
436 | } | |
437 | ||
438 | static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) | |
1da177e4 LT |
439 | { |
440 | unsigned long i; | |
23ed6cb9 | 441 | struct slb_flush_info fi; |
1da177e4 | 442 | |
c594adad DG |
443 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); |
444 | BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); | |
445 | ||
446 | newareas &= ~(mm->context.low_htlb_areas); | |
447 | if (! newareas) | |
1da177e4 LT |
448 | return 0; /* The segments we want are already open */ |
449 | ||
c594adad DG |
450 | for (i = 0; i < NUM_LOW_AREAS; i++) |
451 | if ((1 << i) & newareas) | |
452 | if (prepare_low_area_for_htlb(mm, i) != 0) | |
453 | return -EBUSY; | |
454 | ||
455 | mm->context.low_htlb_areas |= newareas; | |
456 | ||
c594adad DG |
457 | /* the context change must make it to memory before the flush, |
458 | * so that further SLB misses do the right thing. */ | |
459 | mb(); | |
23ed6cb9 DG |
460 | |
461 | fi.mm = mm; | |
462 | fi.newareas = newareas; | |
463 | on_each_cpu(flush_low_segments, &fi, 0, 1); | |
c594adad DG |
464 | |
465 | return 0; | |
466 | } | |
467 | ||
468 | static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) | |
469 | { | |
23ed6cb9 | 470 | struct slb_flush_info fi; |
c594adad DG |
471 | unsigned long i; |
472 | ||
473 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); | |
474 | BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) | |
475 | != NUM_HIGH_AREAS); | |
476 | ||
477 | newareas &= ~(mm->context.high_htlb_areas); | |
478 | if (! newareas) | |
479 | return 0; /* The areas we want are already open */ | |
480 | ||
481 | for (i = 0; i < NUM_HIGH_AREAS; i++) | |
482 | if ((1 << i) & newareas) | |
483 | if (prepare_high_area_for_htlb(mm, i) != 0) | |
1da177e4 LT |
484 | return -EBUSY; |
485 | ||
c594adad | 486 | mm->context.high_htlb_areas |= newareas; |
1da177e4 | 487 | |
1da177e4 LT |
488 | /* the context change must make it to memory before the flush, |
489 | * so that further SLB misses do the right thing. */ | |
490 | mb(); | |
23ed6cb9 DG |
491 | |
492 | fi.mm = mm; | |
493 | fi.newareas = newareas; | |
494 | on_each_cpu(flush_high_segments, &fi, 0, 1); | |
1da177e4 LT |
495 | |
496 | return 0; | |
497 | } | |
498 | ||
68589bc3 | 499 | int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) |
1da177e4 | 500 | { |
5e391dc9 | 501 | int err = 0; |
c594adad | 502 | |
68589bc3 HD |
503 | if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) |
504 | return -EINVAL; | |
505 | if (len & ~HPAGE_MASK) | |
506 | return -EINVAL; | |
507 | if (addr & ~HPAGE_MASK) | |
c594adad DG |
508 | return -EINVAL; |
509 | ||
5e391dc9 | 510 | if (addr < 0x100000000UL) |
c594adad | 511 | err = open_low_hpage_areas(current->mm, |
1da177e4 | 512 | LOW_ESID_MASK(addr, len)); |
9a94c579 | 513 | if ((addr + len) > 0x100000000UL) |
c594adad DG |
514 | err = open_high_hpage_areas(current->mm, |
515 | HTLB_AREA_MASK(addr, len)); | |
516 | if (err) { | |
517 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | |
518 | " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", | |
519 | addr, len, | |
520 | LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); | |
1da177e4 LT |
521 | return err; |
522 | } | |
523 | ||
c594adad | 524 | return 0; |
1da177e4 LT |
525 | } |
526 | ||
1da177e4 LT |
527 | struct page * |
528 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |
529 | { | |
530 | pte_t *ptep; | |
531 | struct page *page; | |
532 | ||
533 | if (! in_hugepage_area(mm->context, address)) | |
534 | return ERR_PTR(-EINVAL); | |
535 | ||
536 | ptep = huge_pte_offset(mm, address); | |
537 | page = pte_page(*ptep); | |
538 | if (page) | |
539 | page += (address % HPAGE_SIZE) / PAGE_SIZE; | |
540 | ||
541 | return page; | |
542 | } | |
543 | ||
544 | int pmd_huge(pmd_t pmd) | |
545 | { | |
546 | return 0; | |
547 | } | |
548 | ||
549 | struct page * | |
550 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |
551 | pmd_t *pmd, int write) | |
552 | { | |
553 | BUG(); | |
554 | return NULL; | |
555 | } | |
556 | ||
1da177e4 LT |
557 | /* Because we have an exclusive hugepage region which lies within the |
558 | * normal user address space, we have to take special measures to make | |
559 | * non-huge mmap()s evade the hugepage reserved regions. */ | |
560 | unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | |
561 | unsigned long len, unsigned long pgoff, | |
562 | unsigned long flags) | |
563 | { | |
564 | struct mm_struct *mm = current->mm; | |
565 | struct vm_area_struct *vma; | |
566 | unsigned long start_addr; | |
567 | ||
568 | if (len > TASK_SIZE) | |
569 | return -ENOMEM; | |
570 | ||
571 | if (addr) { | |
572 | addr = PAGE_ALIGN(addr); | |
573 | vma = find_vma(mm, addr); | |
574 | if (((TASK_SIZE - len) >= addr) | |
575 | && (!vma || (addr+len) <= vma->vm_start) | |
576 | && !is_hugepage_only_range(mm, addr,len)) | |
577 | return addr; | |
578 | } | |
1363c3cd WW |
579 | if (len > mm->cached_hole_size) { |
580 | start_addr = addr = mm->free_area_cache; | |
581 | } else { | |
582 | start_addr = addr = TASK_UNMAPPED_BASE; | |
583 | mm->cached_hole_size = 0; | |
584 | } | |
1da177e4 LT |
585 | |
586 | full_search: | |
587 | vma = find_vma(mm, addr); | |
588 | while (TASK_SIZE - len >= addr) { | |
589 | BUG_ON(vma && (addr >= vma->vm_end)); | |
590 | ||
591 | if (touches_hugepage_low_range(mm, addr, len)) { | |
592 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
593 | vma = find_vma(mm, addr); | |
594 | continue; | |
595 | } | |
c594adad DG |
596 | if (touches_hugepage_high_range(mm, addr, len)) { |
597 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | |
1da177e4 LT |
598 | vma = find_vma(mm, addr); |
599 | continue; | |
600 | } | |
601 | if (!vma || addr + len <= vma->vm_start) { | |
602 | /* | |
603 | * Remember the place where we stopped the search: | |
604 | */ | |
605 | mm->free_area_cache = addr + len; | |
606 | return addr; | |
607 | } | |
1363c3cd WW |
608 | if (addr + mm->cached_hole_size < vma->vm_start) |
609 | mm->cached_hole_size = vma->vm_start - addr; | |
1da177e4 LT |
610 | addr = vma->vm_end; |
611 | vma = vma->vm_next; | |
612 | } | |
613 | ||
614 | /* Make sure we didn't miss any holes */ | |
615 | if (start_addr != TASK_UNMAPPED_BASE) { | |
616 | start_addr = addr = TASK_UNMAPPED_BASE; | |
1363c3cd | 617 | mm->cached_hole_size = 0; |
1da177e4 LT |
618 | goto full_search; |
619 | } | |
620 | return -ENOMEM; | |
621 | } | |
622 | ||
623 | /* | |
624 | * This mmap-allocator allocates new areas top-down from below the | |
625 | * stack's low limit (the base): | |
626 | * | |
627 | * Because we have an exclusive hugepage region which lies within the | |
628 | * normal user address space, we have to take special measures to make | |
629 | * non-huge mmap()s evade the hugepage reserved regions. | |
630 | */ | |
631 | unsigned long | |
632 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |
633 | const unsigned long len, const unsigned long pgoff, | |
634 | const unsigned long flags) | |
635 | { | |
636 | struct vm_area_struct *vma, *prev_vma; | |
637 | struct mm_struct *mm = current->mm; | |
638 | unsigned long base = mm->mmap_base, addr = addr0; | |
1363c3cd | 639 | unsigned long largest_hole = mm->cached_hole_size; |
1da177e4 LT |
640 | int first_time = 1; |
641 | ||
642 | /* requested length too big for entire address space */ | |
643 | if (len > TASK_SIZE) | |
644 | return -ENOMEM; | |
645 | ||
646 | /* dont allow allocations above current base */ | |
647 | if (mm->free_area_cache > base) | |
648 | mm->free_area_cache = base; | |
649 | ||
650 | /* requesting a specific address */ | |
651 | if (addr) { | |
652 | addr = PAGE_ALIGN(addr); | |
653 | vma = find_vma(mm, addr); | |
654 | if (TASK_SIZE - len >= addr && | |
655 | (!vma || addr + len <= vma->vm_start) | |
656 | && !is_hugepage_only_range(mm, addr,len)) | |
657 | return addr; | |
658 | } | |
659 | ||
1363c3cd WW |
660 | if (len <= largest_hole) { |
661 | largest_hole = 0; | |
662 | mm->free_area_cache = base; | |
663 | } | |
1da177e4 LT |
664 | try_again: |
665 | /* make sure it can fit in the remaining address space */ | |
666 | if (mm->free_area_cache < len) | |
667 | goto fail; | |
668 | ||
669 | /* either no address requested or cant fit in requested address hole */ | |
670 | addr = (mm->free_area_cache - len) & PAGE_MASK; | |
671 | do { | |
672 | hugepage_recheck: | |
673 | if (touches_hugepage_low_range(mm, addr, len)) { | |
674 | addr = (addr & ((~0) << SID_SHIFT)) - len; | |
675 | goto hugepage_recheck; | |
c594adad DG |
676 | } else if (touches_hugepage_high_range(mm, addr, len)) { |
677 | addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; | |
678 | goto hugepage_recheck; | |
1da177e4 LT |
679 | } |
680 | ||
681 | /* | |
682 | * Lookup failure means no vma is above this address, | |
683 | * i.e. return with success: | |
684 | */ | |
685 | if (!(vma = find_vma_prev(mm, addr, &prev_vma))) | |
686 | return addr; | |
687 | ||
688 | /* | |
689 | * new region fits between prev_vma->vm_end and | |
690 | * vma->vm_start, use it: | |
691 | */ | |
692 | if (addr+len <= vma->vm_start && | |
1363c3cd | 693 | (!prev_vma || (addr >= prev_vma->vm_end))) { |
1da177e4 | 694 | /* remember the address as a hint for next time */ |
1363c3cd WW |
695 | mm->cached_hole_size = largest_hole; |
696 | return (mm->free_area_cache = addr); | |
697 | } else { | |
1da177e4 | 698 | /* pull free_area_cache down to the first hole */ |
1363c3cd | 699 | if (mm->free_area_cache == vma->vm_end) { |
1da177e4 | 700 | mm->free_area_cache = vma->vm_start; |
1363c3cd WW |
701 | mm->cached_hole_size = largest_hole; |
702 | } | |
703 | } | |
704 | ||
705 | /* remember the largest hole we saw so far */ | |
706 | if (addr + largest_hole < vma->vm_start) | |
707 | largest_hole = vma->vm_start - addr; | |
1da177e4 LT |
708 | |
709 | /* try just below the current vma->vm_start */ | |
710 | addr = vma->vm_start-len; | |
711 | } while (len <= vma->vm_start); | |
712 | ||
713 | fail: | |
714 | /* | |
715 | * if hint left us with no space for the requested | |
716 | * mapping then try again: | |
717 | */ | |
718 | if (first_time) { | |
719 | mm->free_area_cache = base; | |
1363c3cd | 720 | largest_hole = 0; |
1da177e4 LT |
721 | first_time = 0; |
722 | goto try_again; | |
723 | } | |
724 | /* | |
725 | * A failed mmap() very likely causes application failure, | |
726 | * so fall back to the bottom-up function here. This scenario | |
727 | * can happen with large stack limits and large mmap() | |
728 | * allocations. | |
729 | */ | |
730 | mm->free_area_cache = TASK_UNMAPPED_BASE; | |
1363c3cd | 731 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
732 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); |
733 | /* | |
734 | * Restore the topdown base: | |
735 | */ | |
736 | mm->free_area_cache = base; | |
1363c3cd | 737 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
738 | |
739 | return addr; | |
740 | } | |
741 | ||
456752f7 DG |
742 | static int htlb_check_hinted_area(unsigned long addr, unsigned long len) |
743 | { | |
744 | struct vm_area_struct *vma; | |
745 | ||
746 | vma = find_vma(current->mm, addr); | |
6aa3e1e9 DG |
747 | if (TASK_SIZE - len >= addr && |
748 | (!vma || ((addr + len) <= vma->vm_start))) | |
456752f7 DG |
749 | return 0; |
750 | ||
751 | return -ENOMEM; | |
752 | } | |
753 | ||
1da177e4 LT |
754 | static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) |
755 | { | |
756 | unsigned long addr = 0; | |
757 | struct vm_area_struct *vma; | |
758 | ||
759 | vma = find_vma(current->mm, addr); | |
760 | while (addr + len <= 0x100000000UL) { | |
761 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | |
762 | ||
763 | if (! __within_hugepage_low_range(addr, len, segmask)) { | |
764 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
765 | vma = find_vma(current->mm, addr); | |
766 | continue; | |
767 | } | |
768 | ||
769 | if (!vma || (addr + len) <= vma->vm_start) | |
770 | return addr; | |
771 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
772 | /* Depending on segmask this might not be a confirmed | |
773 | * hugepage region, so the ALIGN could have skipped | |
774 | * some VMAs */ | |
775 | vma = find_vma(current->mm, addr); | |
776 | } | |
777 | ||
778 | return -ENOMEM; | |
779 | } | |
780 | ||
c594adad | 781 | static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) |
1da177e4 | 782 | { |
c594adad | 783 | unsigned long addr = 0x100000000UL; |
1da177e4 LT |
784 | struct vm_area_struct *vma; |
785 | ||
786 | vma = find_vma(current->mm, addr); | |
c594adad | 787 | while (addr + len <= TASK_SIZE_USER64) { |
1da177e4 | 788 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ |
c594adad DG |
789 | |
790 | if (! __within_hugepage_high_range(addr, len, areamask)) { | |
791 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | |
792 | vma = find_vma(current->mm, addr); | |
793 | continue; | |
794 | } | |
1da177e4 LT |
795 | |
796 | if (!vma || (addr + len) <= vma->vm_start) | |
797 | return addr; | |
798 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
c594adad DG |
799 | /* Depending on segmask this might not be a confirmed |
800 | * hugepage region, so the ALIGN could have skipped | |
801 | * some VMAs */ | |
802 | vma = find_vma(current->mm, addr); | |
1da177e4 LT |
803 | } |
804 | ||
805 | return -ENOMEM; | |
806 | } | |
807 | ||
808 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |
809 | unsigned long len, unsigned long pgoff, | |
810 | unsigned long flags) | |
811 | { | |
c594adad DG |
812 | int lastshift; |
813 | u16 areamask, curareas; | |
814 | ||
3c726f8d BH |
815 | if (HPAGE_SHIFT == 0) |
816 | return -EINVAL; | |
1da177e4 LT |
817 | if (len & ~HPAGE_MASK) |
818 | return -EINVAL; | |
6aa3e1e9 DG |
819 | if (len > TASK_SIZE) |
820 | return -ENOMEM; | |
1da177e4 LT |
821 | |
822 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | |
823 | return -EINVAL; | |
824 | ||
456752f7 DG |
825 | /* Paranoia, caller should have dealt with this */ |
826 | BUG_ON((addr + len) < addr); | |
827 | ||
1da177e4 | 828 | if (test_thread_flag(TIF_32BIT)) { |
c594adad | 829 | curareas = current->mm->context.low_htlb_areas; |
1da177e4 | 830 | |
456752f7 DG |
831 | /* First see if we can use the hint address */ |
832 | if (addr && (htlb_check_hinted_area(addr, len) == 0)) { | |
833 | areamask = LOW_ESID_MASK(addr, len); | |
834 | if (open_low_hpage_areas(current->mm, areamask) == 0) | |
835 | return addr; | |
836 | } | |
837 | ||
838 | /* Next see if we can map in the existing low areas */ | |
c594adad | 839 | addr = htlb_get_low_area(len, curareas); |
1da177e4 LT |
840 | if (addr != -ENOMEM) |
841 | return addr; | |
842 | ||
456752f7 | 843 | /* Finally go looking for areas to open */ |
c594adad DG |
844 | lastshift = 0; |
845 | for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); | |
846 | ! lastshift; areamask >>=1) { | |
847 | if (areamask & 1) | |
1da177e4 LT |
848 | lastshift = 1; |
849 | ||
c594adad | 850 | addr = htlb_get_low_area(len, curareas | areamask); |
1da177e4 | 851 | if ((addr != -ENOMEM) |
c594adad | 852 | && open_low_hpage_areas(current->mm, areamask) == 0) |
1da177e4 LT |
853 | return addr; |
854 | } | |
1da177e4 | 855 | } else { |
c594adad DG |
856 | curareas = current->mm->context.high_htlb_areas; |
857 | ||
456752f7 DG |
858 | /* First see if we can use the hint address */ |
859 | /* We discourage 64-bit processes from doing hugepage | |
860 | * mappings below 4GB (must use MAP_FIXED) */ | |
861 | if ((addr >= 0x100000000UL) | |
862 | && (htlb_check_hinted_area(addr, len) == 0)) { | |
863 | areamask = HTLB_AREA_MASK(addr, len); | |
864 | if (open_high_hpage_areas(current->mm, areamask) == 0) | |
865 | return addr; | |
866 | } | |
867 | ||
868 | /* Next see if we can map in the existing high areas */ | |
c594adad DG |
869 | addr = htlb_get_high_area(len, curareas); |
870 | if (addr != -ENOMEM) | |
871 | return addr; | |
872 | ||
456752f7 | 873 | /* Finally go looking for areas to open */ |
c594adad DG |
874 | lastshift = 0; |
875 | for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); | |
876 | ! lastshift; areamask >>=1) { | |
877 | if (areamask & 1) | |
878 | lastshift = 1; | |
879 | ||
880 | addr = htlb_get_high_area(len, curareas | areamask); | |
881 | if ((addr != -ENOMEM) | |
882 | && open_high_hpage_areas(current->mm, areamask) == 0) | |
883 | return addr; | |
884 | } | |
1da177e4 | 885 | } |
c594adad DG |
886 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" |
887 | " enough areas\n"); | |
888 | return -ENOMEM; | |
1da177e4 LT |
889 | } |
890 | ||
cbf52afd DG |
891 | /* |
892 | * Called by asm hashtable.S for doing lazy icache flush | |
893 | */ | |
894 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | |
895 | pte_t pte, int trap) | |
896 | { | |
897 | struct page *page; | |
898 | int i; | |
899 | ||
900 | if (!pfn_valid(pte_pfn(pte))) | |
901 | return rflags; | |
902 | ||
903 | page = pte_page(pte); | |
904 | ||
905 | /* page is dirty */ | |
906 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | |
907 | if (trap == 0x400) { | |
908 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) | |
909 | __flush_dcache_icache(page_address(page+i)); | |
910 | set_bit(PG_arch_1, &page->flags); | |
911 | } else { | |
912 | rflags |= HPTE_R_N; | |
913 | } | |
914 | } | |
915 | return rflags; | |
916 | } | |
917 | ||
1da177e4 | 918 | int hash_huge_page(struct mm_struct *mm, unsigned long access, |
cbf52afd DG |
919 | unsigned long ea, unsigned long vsid, int local, |
920 | unsigned long trap) | |
1da177e4 LT |
921 | { |
922 | pte_t *ptep; | |
3c726f8d BH |
923 | unsigned long old_pte, new_pte; |
924 | unsigned long va, rflags, pa; | |
1da177e4 LT |
925 | long slot; |
926 | int err = 1; | |
927 | ||
1da177e4 LT |
928 | ptep = huge_pte_offset(mm, ea); |
929 | ||
930 | /* Search the Linux page table for a match with va */ | |
931 | va = (vsid << 28) | (ea & 0x0fffffff); | |
1da177e4 LT |
932 | |
933 | /* | |
934 | * If no pte found or not present, send the problem up to | |
935 | * do_page_fault | |
936 | */ | |
937 | if (unlikely(!ptep || pte_none(*ptep))) | |
938 | goto out; | |
939 | ||
1da177e4 LT |
940 | /* |
941 | * Check the user's access rights to the page. If access should be | |
942 | * prevented then send the problem up to do_page_fault. | |
943 | */ | |
944 | if (unlikely(access & ~pte_val(*ptep))) | |
945 | goto out; | |
946 | /* | |
947 | * At this point, we have a pte (old_pte) which can be used to build | |
948 | * or update an HPTE. There are 2 cases: | |
949 | * | |
950 | * 1. There is a valid (present) pte with no associated HPTE (this is | |
951 | * the most common case) | |
952 | * 2. There is a valid (present) pte with an associated HPTE. The | |
953 | * current values of the pp bits in the HPTE prevent access | |
954 | * because we are doing software DIRTY bit management and the | |
955 | * page is currently not DIRTY. | |
956 | */ | |
957 | ||
958 | ||
3c726f8d BH |
959 | do { |
960 | old_pte = pte_val(*ptep); | |
961 | if (old_pte & _PAGE_BUSY) | |
962 | goto out; | |
963 | new_pte = old_pte | _PAGE_BUSY | | |
964 | _PAGE_ACCESSED | _PAGE_HASHPTE; | |
965 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | |
966 | old_pte, new_pte)); | |
967 | ||
968 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | |
1da177e4 | 969 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
3c726f8d | 970 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
cbf52afd DG |
971 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) |
972 | /* No CPU has hugepages but lacks no execute, so we | |
973 | * don't need to worry about that case */ | |
974 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), | |
975 | trap); | |
1da177e4 LT |
976 | |
977 | /* Check if pte already has an hpte (case 2) */ | |
3c726f8d | 978 | if (unlikely(old_pte & _PAGE_HASHPTE)) { |
1da177e4 LT |
979 | /* There MIGHT be an HPTE for this pte */ |
980 | unsigned long hash, slot; | |
981 | ||
3c726f8d BH |
982 | hash = hpt_hash(va, HPAGE_SHIFT); |
983 | if (old_pte & _PAGE_F_SECOND) | |
1da177e4 LT |
984 | hash = ~hash; |
985 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
3c726f8d | 986 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
1da177e4 | 987 | |
325c82a0 BH |
988 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, |
989 | local) == -1) | |
3c726f8d | 990 | old_pte &= ~_PAGE_HPTEFLAGS; |
1da177e4 LT |
991 | } |
992 | ||
3c726f8d BH |
993 | if (likely(!(old_pte & _PAGE_HASHPTE))) { |
994 | unsigned long hash = hpt_hash(va, HPAGE_SHIFT); | |
1da177e4 LT |
995 | unsigned long hpte_group; |
996 | ||
3c726f8d | 997 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; |
1da177e4 LT |
998 | |
999 | repeat: | |
1000 | hpte_group = ((hash & htab_hash_mask) * | |
1001 | HPTES_PER_GROUP) & ~0x7UL; | |
1002 | ||
3c726f8d BH |
1003 | /* clear HPTE slot informations in new PTE */ |
1004 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | |
1da177e4 LT |
1005 | |
1006 | /* Add in WIMG bits */ | |
1007 | /* XXX We should store these in the pte */ | |
3c726f8d | 1008 | /* --BenH: I think they are ... */ |
96e28449 | 1009 | rflags |= _PAGE_COHERENT; |
1da177e4 | 1010 | |
3c726f8d BH |
1011 | /* Insert into the hash table, primary slot */ |
1012 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | |
1013 | mmu_huge_psize); | |
1da177e4 LT |
1014 | |
1015 | /* Primary is full, try the secondary */ | |
1016 | if (unlikely(slot == -1)) { | |
3c726f8d | 1017 | new_pte |= _PAGE_F_SECOND; |
1da177e4 LT |
1018 | hpte_group = ((~hash & htab_hash_mask) * |
1019 | HPTES_PER_GROUP) & ~0x7UL; | |
3c726f8d | 1020 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, |
67b10813 | 1021 | HPTE_V_SECONDARY, |
3c726f8d | 1022 | mmu_huge_psize); |
1da177e4 LT |
1023 | if (slot == -1) { |
1024 | if (mftb() & 0x1) | |
67b10813 BH |
1025 | hpte_group = ((hash & htab_hash_mask) * |
1026 | HPTES_PER_GROUP)&~0x7UL; | |
1da177e4 LT |
1027 | |
1028 | ppc_md.hpte_remove(hpte_group); | |
1029 | goto repeat; | |
1030 | } | |
1031 | } | |
1032 | ||
1033 | if (unlikely(slot == -2)) | |
1034 | panic("hash_huge_page: pte_insert failed\n"); | |
1035 | ||
3c726f8d | 1036 | new_pte |= (slot << 12) & _PAGE_F_GIX; |
1da177e4 LT |
1037 | } |
1038 | ||
3c726f8d | 1039 | /* |
01edcd89 | 1040 | * No need to use ldarx/stdcx here |
3c726f8d BH |
1041 | */ |
1042 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | |
1043 | ||
1da177e4 LT |
1044 | err = 0; |
1045 | ||
1046 | out: | |
1da177e4 LT |
1047 | return err; |
1048 | } | |
f10a04c0 | 1049 | |
e18b890b | 1050 | static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) |
f10a04c0 DG |
1051 | { |
1052 | memset(addr, 0, kmem_cache_size(cache)); | |
1053 | } | |
1054 | ||
1055 | static int __init hugetlbpage_init(void) | |
1056 | { | |
1057 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | |
1058 | return -ENODEV; | |
1059 | ||
1060 | huge_pgtable_cache = kmem_cache_create("hugepte_cache", | |
1061 | HUGEPTE_TABLE_SIZE, | |
1062 | HUGEPTE_TABLE_SIZE, | |
1063 | SLAB_HWCACHE_ALIGN | | |
1064 | SLAB_MUST_HWCACHE_ALIGN, | |
1065 | zero_ctor, NULL); | |
1066 | if (! huge_pgtable_cache) | |
1067 | panic("hugetlbpage_init(): could not create hugepte cache\n"); | |
1068 | ||
1069 | return 0; | |
1070 | } | |
1071 | ||
1072 | module_init(hugetlbpage_init); |