]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/x86/mm/pageattr.c
FRV: Change the timerfd syscalls to be the same as i386
[mirror_ubuntu-hirsute-kernel.git] / arch / x86 / mm / pageattr.c
CommitLineData
9f4c815c
IM
1/*
2 * Copyright 2002 Andi Kleen, SuSE Labs.
1da177e4 3 * Thanks to Ben LaHaise for precious feedback.
9f4c815c 4 */
1da177e4 5#include <linux/highmem.h>
8192206d 6#include <linux/bootmem.h>
1da177e4 7#include <linux/module.h>
9f4c815c 8#include <linux/sched.h>
1da177e4 9#include <linux/slab.h>
9f4c815c 10#include <linux/mm.h>
76ebd054 11#include <linux/interrupt.h>
9f4c815c 12
950f9d95 13#include <asm/e820.h>
1da177e4
LT
14#include <asm/processor.h>
15#include <asm/tlbflush.h>
f8af095d 16#include <asm/sections.h>
9f4c815c
IM
17#include <asm/uaccess.h>
18#include <asm/pgalloc.h>
c31c7d48 19#include <asm/proto.h>
1da177e4 20
9df84993
IM
21/*
22 * The current flushing context - we pass it instead of 5 arguments:
23 */
72e458df
TG
24struct cpa_data {
25 unsigned long vaddr;
72e458df
TG
26 pgprot_t mask_set;
27 pgprot_t mask_clr;
65e074df 28 int numpages;
f4ae5da0 29 int flushtlb;
c31c7d48 30 unsigned long pfn;
72e458df
TG
31};
32
c31c7d48
TG
33#ifdef CONFIG_X86_64
34
35static inline unsigned long highmap_start_pfn(void)
36{
37 return __pa(_text) >> PAGE_SHIFT;
38}
39
40static inline unsigned long highmap_end_pfn(void)
41{
42 return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
43}
44
45#endif
46
ed724be6
AV
47static inline int
48within(unsigned long addr, unsigned long start, unsigned long end)
687c4825 49{
ed724be6
AV
50 return addr >= start && addr < end;
51}
52
d7c8f21a
TG
53/*
54 * Flushing functions
55 */
cd8ddf1a 56
cd8ddf1a
TG
57/**
58 * clflush_cache_range - flush a cache range with clflush
59 * @addr: virtual start address
60 * @size: number of bytes to flush
61 *
62 * clflush is an unordered instruction which needs fencing with mfence
63 * to avoid ordering issues.
64 */
4c61afcd 65void clflush_cache_range(void *vaddr, unsigned int size)
d7c8f21a 66{
4c61afcd 67 void *vend = vaddr + size - 1;
d7c8f21a 68
cd8ddf1a 69 mb();
4c61afcd
IM
70
71 for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
72 clflush(vaddr);
73 /*
74 * Flush any possible final partial cacheline:
75 */
76 clflush(vend);
77
cd8ddf1a 78 mb();
d7c8f21a
TG
79}
80
af1e6844 81static void __cpa_flush_all(void *arg)
d7c8f21a 82{
6bb8383b
AK
83 unsigned long cache = (unsigned long)arg;
84
d7c8f21a
TG
85 /*
86 * Flush all to work around Errata in early athlons regarding
87 * large page flushing.
88 */
89 __flush_tlb_all();
90
6bb8383b 91 if (cache && boot_cpu_data.x86_model >= 4)
d7c8f21a
TG
92 wbinvd();
93}
94
6bb8383b 95static void cpa_flush_all(unsigned long cache)
d7c8f21a
TG
96{
97 BUG_ON(irqs_disabled());
98
6bb8383b 99 on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
d7c8f21a
TG
100}
101
57a6a46a
TG
102static void __cpa_flush_range(void *arg)
103{
57a6a46a
TG
104 /*
105 * We could optimize that further and do individual per page
106 * tlb invalidates for a low number of pages. Caveat: we must
107 * flush the high aliases on 64bit as well.
108 */
109 __flush_tlb_all();
57a6a46a
TG
110}
111
6bb8383b 112static void cpa_flush_range(unsigned long start, int numpages, int cache)
57a6a46a 113{
4c61afcd
IM
114 unsigned int i, level;
115 unsigned long addr;
116
57a6a46a 117 BUG_ON(irqs_disabled());
4c61afcd 118 WARN_ON(PAGE_ALIGN(start) != start);
57a6a46a 119
3b233e52 120 on_each_cpu(__cpa_flush_range, NULL, 1, 1);
57a6a46a 121
6bb8383b
AK
122 if (!cache)
123 return;
124
3b233e52
TG
125 /*
126 * We only need to flush on one CPU,
127 * clflush is a MESI-coherent instruction that
128 * will cause all other CPUs to flush the same
129 * cachelines:
130 */
4c61afcd
IM
131 for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
132 pte_t *pte = lookup_address(addr, &level);
133
134 /*
135 * Only flush present addresses:
136 */
7bfb72e8 137 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
4c61afcd
IM
138 clflush_cache_range((void *) addr, PAGE_SIZE);
139 }
57a6a46a
TG
140}
141
ed724be6
AV
142/*
143 * Certain areas of memory on x86 require very specific protection flags,
144 * for example the BIOS area or kernel text. Callers don't always get this
145 * right (again, ioremap() on BIOS memory is not uncommon) so this function
146 * checks and fixes these known static required protection bits.
147 */
c31c7d48
TG
148static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
149 unsigned long pfn)
ed724be6
AV
150{
151 pgprot_t forbidden = __pgprot(0);
152
687c4825 153 /*
ed724be6
AV
154 * The BIOS area between 640k and 1Mb needs to be executable for
155 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
687c4825 156 */
c31c7d48 157 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
ed724be6
AV
158 pgprot_val(forbidden) |= _PAGE_NX;
159
160 /*
161 * The kernel text needs to be executable for obvious reasons
c31c7d48
TG
162 * Does not cover __inittext since that is gone later on. On
163 * 64bit we do not enforce !NX on the low mapping
ed724be6
AV
164 */
165 if (within(address, (unsigned long)_text, (unsigned long)_etext))
166 pgprot_val(forbidden) |= _PAGE_NX;
cc0f21bb 167
cc0f21bb 168 /*
c31c7d48
TG
169 * The .rodata section needs to be read-only. Using the pfn
170 * catches all aliases.
cc0f21bb 171 */
c31c7d48
TG
172 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
173 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
cc0f21bb 174 pgprot_val(forbidden) |= _PAGE_RW;
ed724be6
AV
175
176 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
687c4825
IM
177
178 return prot;
179}
180
9a14aefc
TG
181/*
182 * Lookup the page table entry for a virtual address. Return a pointer
183 * to the entry and the level of the mapping.
184 *
185 * Note: We return pud and pmd either when the entry is marked large
186 * or when the present bit is not set. Otherwise we would return a
187 * pointer to a nonexisting mapping.
188 */
da7bfc50 189pte_t *lookup_address(unsigned long address, unsigned int *level)
9f4c815c 190{
1da177e4
LT
191 pgd_t *pgd = pgd_offset_k(address);
192 pud_t *pud;
193 pmd_t *pmd;
9f4c815c 194
30551bb3
TG
195 *level = PG_LEVEL_NONE;
196
1da177e4
LT
197 if (pgd_none(*pgd))
198 return NULL;
9df84993 199
1da177e4
LT
200 pud = pud_offset(pgd, address);
201 if (pud_none(*pud))
202 return NULL;
c2f71ee2
AK
203
204 *level = PG_LEVEL_1G;
205 if (pud_large(*pud) || !pud_present(*pud))
206 return (pte_t *)pud;
207
1da177e4
LT
208 pmd = pmd_offset(pud, address);
209 if (pmd_none(*pmd))
210 return NULL;
30551bb3
TG
211
212 *level = PG_LEVEL_2M;
9a14aefc 213 if (pmd_large(*pmd) || !pmd_present(*pmd))
1da177e4 214 return (pte_t *)pmd;
1da177e4 215
30551bb3 216 *level = PG_LEVEL_4K;
9df84993 217
9f4c815c
IM
218 return pte_offset_kernel(pmd, address);
219}
220
9df84993
IM
221/*
222 * Set the new pmd in all the pgds we know about:
223 */
9a3dc780 224static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
9f4c815c 225{
9f4c815c
IM
226 /* change init_mm */
227 set_pte_atomic(kpte, pte);
44af6c41 228#ifdef CONFIG_X86_32
e4b71dcf 229 if (!SHARED_KERNEL_PMD) {
44af6c41
IM
230 struct page *page;
231
e3ed910d 232 list_for_each_entry(page, &pgd_list, lru) {
44af6c41
IM
233 pgd_t *pgd;
234 pud_t *pud;
235 pmd_t *pmd;
236
237 pgd = (pgd_t *)page_address(page) + pgd_index(address);
238 pud = pud_offset(pgd, address);
239 pmd = pmd_offset(pud, address);
240 set_pte_atomic((pte_t *)pmd, pte);
241 }
1da177e4 242 }
44af6c41 243#endif
1da177e4
LT
244}
245
9df84993
IM
246static int
247try_preserve_large_page(pte_t *kpte, unsigned long address,
248 struct cpa_data *cpa)
65e074df 249{
c31c7d48 250 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
65e074df
TG
251 pte_t new_pte, old_pte, *tmp;
252 pgprot_t old_prot, new_prot;
fac84939 253 int i, do_split = 1;
da7bfc50 254 unsigned int level;
65e074df
TG
255
256 spin_lock_irqsave(&pgd_lock, flags);
257 /*
258 * Check for races, another CPU might have split this page
259 * up already:
260 */
261 tmp = lookup_address(address, &level);
262 if (tmp != kpte)
263 goto out_unlock;
264
265 switch (level) {
266 case PG_LEVEL_2M:
31422c51
AK
267 psize = PMD_PAGE_SIZE;
268 pmask = PMD_PAGE_MASK;
65e074df 269 break;
f07333fd 270#ifdef CONFIG_X86_64
65e074df 271 case PG_LEVEL_1G:
5d3c8b21
AK
272 psize = PUD_PAGE_SIZE;
273 pmask = PUD_PAGE_MASK;
f07333fd
AK
274 break;
275#endif
65e074df 276 default:
beaff633 277 do_split = -EINVAL;
65e074df
TG
278 goto out_unlock;
279 }
280
281 /*
282 * Calculate the number of pages, which fit into this large
283 * page starting at address:
284 */
285 nextpage_addr = (address + psize) & pmask;
286 numpages = (nextpage_addr - address) >> PAGE_SHIFT;
287 if (numpages < cpa->numpages)
288 cpa->numpages = numpages;
289
290 /*
291 * We are safe now. Check whether the new pgprot is the same:
292 */
293 old_pte = *kpte;
294 old_prot = new_prot = pte_pgprot(old_pte);
295
296 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
297 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
c31c7d48
TG
298
299 /*
300 * old_pte points to the large page base address. So we need
301 * to add the offset of the virtual address:
302 */
303 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
304 cpa->pfn = pfn;
305
306 new_prot = static_protections(new_prot, address, pfn);
65e074df 307
fac84939
TG
308 /*
309 * We need to check the full range, whether
310 * static_protection() requires a different pgprot for one of
311 * the pages in the range we try to preserve:
312 */
313 addr = address + PAGE_SIZE;
c31c7d48
TG
314 pfn++;
315 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
316 pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
fac84939
TG
317
318 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
319 goto out_unlock;
320 }
321
65e074df
TG
322 /*
323 * If there are no changes, return. maxpages has been updated
324 * above:
325 */
326 if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
beaff633 327 do_split = 0;
65e074df
TG
328 goto out_unlock;
329 }
330
331 /*
332 * We need to change the attributes. Check, whether we can
333 * change the large page in one go. We request a split, when
334 * the address is not aligned and the number of pages is
335 * smaller than the number of pages in the large page. Note
336 * that we limited the number of possible pages already to
337 * the number of pages in the large page.
338 */
339 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
340 /*
341 * The address is aligned and the number of pages
342 * covers the full page.
343 */
344 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
345 __set_pmd_pte(kpte, address, new_pte);
346 cpa->flushtlb = 1;
beaff633 347 do_split = 0;
65e074df
TG
348 }
349
350out_unlock:
351 spin_unlock_irqrestore(&pgd_lock, flags);
9df84993 352
beaff633 353 return do_split;
65e074df
TG
354}
355
76ebd054
TG
356static LIST_HEAD(page_pool);
357static unsigned long pool_size, pool_pages, pool_low;
358static unsigned long pool_used, pool_failed, pool_refill;
359
360static void cpa_fill_pool(void)
361{
362 struct page *p;
363 gfp_t gfp = GFP_KERNEL;
364
365 /* Do not allocate from interrupt context */
366 if (in_irq() || irqs_disabled())
367 return;
368 /*
369 * Check unlocked. I does not matter when we have one more
370 * page in the pool. The bit lock avoids recursive pool
371 * allocations:
372 */
373 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
374 return;
375
376#ifdef CONFIG_DEBUG_PAGEALLOC
377 /*
378 * We could do:
379 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
380 * but this fails on !PREEMPT kernels
381 */
382 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
383#endif
384
385 while (pool_pages < pool_size) {
386 p = alloc_pages(gfp, 0);
387 if (!p) {
388 pool_failed++;
389 break;
390 }
391 spin_lock_irq(&pgd_lock);
392 list_add(&p->lru, &page_pool);
393 pool_pages++;
394 spin_unlock_irq(&pgd_lock);
395 }
396 clear_bit_unlock(0, &pool_refill);
397}
398
399#define SHIFT_MB (20 - PAGE_SHIFT)
400#define ROUND_MB_GB ((1 << 10) - 1)
401#define SHIFT_MB_GB 10
402#define POOL_PAGES_PER_GB 16
403
404void __init cpa_init(void)
405{
406 struct sysinfo si;
407 unsigned long gb;
408
409 si_meminfo(&si);
410 /*
411 * Calculate the number of pool pages:
412 *
413 * Convert totalram (nr of pages) to MiB and round to the next
414 * GiB. Shift MiB to Gib and multiply the result by
415 * POOL_PAGES_PER_GB:
416 */
417 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
418 pool_size = POOL_PAGES_PER_GB * gb;
419 pool_low = pool_size;
420
421 cpa_fill_pool();
422 printk(KERN_DEBUG
423 "CPA: page pool initialized %lu of %lu pages preallocated\n",
424 pool_pages, pool_size);
425}
426
7afe15b9 427static int split_large_page(pte_t *kpte, unsigned long address)
bb5c2dbd 428{
7b610eec 429 unsigned long flags, pfn, pfninc = 1;
9df84993 430 unsigned int i, level;
bb5c2dbd 431 pte_t *pbase, *tmp;
9df84993 432 pgprot_t ref_prot;
bb5c2dbd
IM
433 struct page *base;
434
eb5b5f02
TG
435 /*
436 * Get a page from the pool. The pool list is protected by the
437 * pgd_lock, which we have to take anyway for the split
438 * operation:
439 */
440 spin_lock_irqsave(&pgd_lock, flags);
441 if (list_empty(&page_pool)) {
442 spin_unlock_irqrestore(&pgd_lock, flags);
bb5c2dbd 443 return -ENOMEM;
eb5b5f02
TG
444 }
445
446 base = list_first_entry(&page_pool, struct page, lru);
447 list_del(&base->lru);
448 pool_pages--;
449
450 if (pool_pages < pool_low)
451 pool_low = pool_pages;
bb5c2dbd 452
bb5c2dbd
IM
453 /*
454 * Check for races, another CPU might have split this page
455 * up for us already:
456 */
457 tmp = lookup_address(address, &level);
6ce9fc17 458 if (tmp != kpte)
bb5c2dbd
IM
459 goto out_unlock;
460
bb5c2dbd 461 pbase = (pte_t *)page_address(base);
44af6c41 462#ifdef CONFIG_X86_32
bb5c2dbd 463 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
44af6c41 464#endif
07cf89c0 465 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
bb5c2dbd 466
f07333fd
AK
467#ifdef CONFIG_X86_64
468 if (level == PG_LEVEL_1G) {
469 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
470 pgprot_val(ref_prot) |= _PAGE_PSE;
f07333fd
AK
471 }
472#endif
473
63c1dcf4
TG
474 /*
475 * Get the target pfn from the original entry:
476 */
477 pfn = pte_pfn(*kpte);
f07333fd 478 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
63c1dcf4 479 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
bb5c2dbd
IM
480
481 /*
07cf89c0 482 * Install the new, split up pagetable. Important details here:
4c881ca1
HY
483 *
484 * On Intel the NX bit of all levels must be cleared to make a
485 * page executable. See section 4.13.2 of Intel 64 and IA-32
486 * Architectures Software Developer's Manual).
07cf89c0
TG
487 *
488 * Mark the entry present. The current mapping might be
489 * set to not present, which we preserved above.
bb5c2dbd 490 */
4c881ca1 491 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
07cf89c0 492 pgprot_val(ref_prot) |= _PAGE_PRESENT;
9a3dc780 493 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
bb5c2dbd
IM
494 base = NULL;
495
496out_unlock:
eb5b5f02
TG
497 /*
498 * If we dropped out via the lookup_address check under
499 * pgd_lock then stick the page back into the pool:
500 */
501 if (base) {
502 list_add(&base->lru, &page_pool);
503 pool_pages++;
504 } else
505 pool_used++;
9a3dc780 506 spin_unlock_irqrestore(&pgd_lock, flags);
bb5c2dbd 507
bb5c2dbd
IM
508 return 0;
509}
510
c31c7d48 511static int __change_page_attr(struct cpa_data *cpa, int primary)
9f4c815c 512{
c31c7d48 513 unsigned long address = cpa->vaddr;
da7bfc50
HH
514 int do_split, err;
515 unsigned int level;
c31c7d48 516 pte_t *kpte, old_pte;
1da177e4 517
97f99fed 518repeat:
f0646e43 519 kpte = lookup_address(address, &level);
1da177e4 520 if (!kpte)
c31c7d48
TG
521 return primary ? -EINVAL : 0;
522
523 old_pte = *kpte;
524 if (!pte_val(old_pte)) {
525 if (!primary)
526 return 0;
527 printk(KERN_WARNING "CPA: called for zero pte. "
528 "vaddr = %lx cpa->vaddr = %lx\n", address,
529 cpa->vaddr);
530 WARN_ON(1);
1da177e4 531 return -EINVAL;
c31c7d48 532 }
9f4c815c 533
30551bb3 534 if (level == PG_LEVEL_4K) {
c31c7d48 535 pte_t new_pte;
626c2c9d 536 pgprot_t new_prot = pte_pgprot(old_pte);
c31c7d48 537 unsigned long pfn = pte_pfn(old_pte);
86f03989 538
72e458df
TG
539 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
540 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
86f03989 541
c31c7d48 542 new_prot = static_protections(new_prot, address, pfn);
86f03989 543
626c2c9d
AV
544 /*
545 * We need to keep the pfn from the existing PTE,
546 * after all we're only going to change it's attributes
547 * not the memory it points to
548 */
c31c7d48
TG
549 new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
550 cpa->pfn = pfn;
f4ae5da0
TG
551 /*
552 * Do we really change anything ?
553 */
554 if (pte_val(old_pte) != pte_val(new_pte)) {
555 set_pte_atomic(kpte, new_pte);
556 cpa->flushtlb = 1;
557 }
65e074df
TG
558 cpa->numpages = 1;
559 return 0;
1da177e4 560 }
65e074df
TG
561
562 /*
563 * Check, whether we can keep the large page intact
564 * and just change the pte:
565 */
beaff633 566 do_split = try_preserve_large_page(kpte, address, cpa);
65e074df
TG
567 /*
568 * When the range fits into the existing large page,
569 * return. cp->numpages and cpa->tlbflush have been updated in
570 * try_large_page:
571 */
87f7f8fe
IM
572 if (do_split <= 0)
573 return do_split;
65e074df
TG
574
575 /*
576 * We have to split the large page:
577 */
87f7f8fe
IM
578 err = split_large_page(kpte, address);
579 if (!err) {
580 cpa->flushtlb = 1;
581 goto repeat;
582 }
beaff633 583
87f7f8fe 584 return err;
9f4c815c 585}
1da177e4 586
c31c7d48
TG
587static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
588
589static int cpa_process_alias(struct cpa_data *cpa)
1da177e4 590{
c31c7d48 591 struct cpa_data alias_cpa;
f34b439f 592 int ret = 0;
44af6c41 593
c31c7d48
TG
594 if (cpa->pfn > max_pfn_mapped)
595 return 0;
626c2c9d 596
f34b439f
TG
597 /*
598 * No need to redo, when the primary call touched the direct
599 * mapping already:
600 */
601 if (!within(cpa->vaddr, PAGE_OFFSET,
602 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
44af6c41 603
f34b439f
TG
604 alias_cpa = *cpa;
605 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
606
607 ret = __change_page_attr_set_clr(&alias_cpa, 0);
608 }
44af6c41 609
44af6c41 610#ifdef CONFIG_X86_64
c31c7d48
TG
611 if (ret)
612 return ret;
f34b439f
TG
613 /*
614 * No need to redo, when the primary call touched the high
615 * mapping already:
616 */
617 if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
618 return 0;
619
488fd995 620 /*
0879750f
TG
621 * If the physical address is inside the kernel map, we need
622 * to touch the high mapped kernel as well:
488fd995 623 */
c31c7d48
TG
624 if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
625 return 0;
0879750f 626
c31c7d48
TG
627 alias_cpa = *cpa;
628 alias_cpa.vaddr =
629 (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
630
631 /*
632 * The high mapping range is imprecise, so ignore the return value.
633 */
634 __change_page_attr_set_clr(&alias_cpa, 0);
488fd995 635#endif
c31c7d48 636 return ret;
1da177e4
LT
637}
638
c31c7d48 639static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
ff31452b 640{
65e074df 641 int ret, numpages = cpa->numpages;
ff31452b 642
65e074df
TG
643 while (numpages) {
644 /*
645 * Store the remaining nr of pages for the large page
646 * preservation check.
647 */
648 cpa->numpages = numpages;
c31c7d48
TG
649
650 ret = __change_page_attr(cpa, checkalias);
ff31452b
TG
651 if (ret)
652 return ret;
ff31452b 653
c31c7d48
TG
654 if (checkalias) {
655 ret = cpa_process_alias(cpa);
656 if (ret)
657 return ret;
658 }
659
65e074df
TG
660 /*
661 * Adjust the number of pages with the result of the
662 * CPA operation. Either a large page has been
663 * preserved or a single page update happened.
664 */
665 BUG_ON(cpa->numpages > numpages);
666 numpages -= cpa->numpages;
667 cpa->vaddr += cpa->numpages * PAGE_SIZE;
668 }
ff31452b
TG
669 return 0;
670}
671
6bb8383b
AK
672static inline int cache_attr(pgprot_t attr)
673{
674 return pgprot_val(attr) &
675 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
676}
677
ff31452b
TG
678static int change_page_attr_set_clr(unsigned long addr, int numpages,
679 pgprot_t mask_set, pgprot_t mask_clr)
680{
72e458df 681 struct cpa_data cpa;
af96e443 682 int ret, cache, checkalias;
331e4065
TG
683
684 /*
685 * Check, if we are requested to change a not supported
686 * feature:
687 */
688 mask_set = canon_pgprot(mask_set);
689 mask_clr = canon_pgprot(mask_clr);
690 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
691 return 0;
692
69b1415e
TG
693 /* Ensure we are PAGE_SIZE aligned */
694 if (addr & ~PAGE_MASK) {
695 addr &= PAGE_MASK;
696 /*
697 * People should not be passing in unaligned addresses:
698 */
699 WARN_ON_ONCE(1);
700 }
701
72e458df
TG
702 cpa.vaddr = addr;
703 cpa.numpages = numpages;
704 cpa.mask_set = mask_set;
705 cpa.mask_clr = mask_clr;
f4ae5da0 706 cpa.flushtlb = 0;
72e458df 707
af96e443
TG
708 /* No alias checking for _NX bit modifications */
709 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
710
711 ret = __change_page_attr_set_clr(&cpa, checkalias);
ff31452b 712
f4ae5da0
TG
713 /*
714 * Check whether we really changed something:
715 */
716 if (!cpa.flushtlb)
76ebd054 717 goto out;
f4ae5da0 718
6bb8383b
AK
719 /*
720 * No need to flush, when we did not set any of the caching
721 * attributes:
722 */
723 cache = cache_attr(mask_set);
724
57a6a46a
TG
725 /*
726 * On success we use clflush, when the CPU supports it to
727 * avoid the wbindv. If the CPU does not support it and in the
af1e6844 728 * error case we fall back to cpa_flush_all (which uses
57a6a46a
TG
729 * wbindv):
730 */
731 if (!ret && cpu_has_clflush)
6bb8383b 732 cpa_flush_range(addr, numpages, cache);
57a6a46a 733 else
6bb8383b 734 cpa_flush_all(cache);
ff31452b 735
76ebd054
TG
736out:
737 cpa_fill_pool();
ff31452b
TG
738 return ret;
739}
740
56744546
TG
741static inline int change_page_attr_set(unsigned long addr, int numpages,
742 pgprot_t mask)
75cbade8 743{
56744546 744 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
75cbade8
AV
745}
746
56744546
TG
747static inline int change_page_attr_clear(unsigned long addr, int numpages,
748 pgprot_t mask)
72932c7a 749{
5827040d 750 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
72932c7a
TG
751}
752
753int set_memory_uc(unsigned long addr, int numpages)
754{
755 return change_page_attr_set(addr, numpages,
756 __pgprot(_PAGE_PCD | _PAGE_PWT));
75cbade8
AV
757}
758EXPORT_SYMBOL(set_memory_uc);
759
760int set_memory_wb(unsigned long addr, int numpages)
761{
72932c7a
TG
762 return change_page_attr_clear(addr, numpages,
763 __pgprot(_PAGE_PCD | _PAGE_PWT));
75cbade8
AV
764}
765EXPORT_SYMBOL(set_memory_wb);
766
767int set_memory_x(unsigned long addr, int numpages)
768{
72932c7a 769 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
75cbade8
AV
770}
771EXPORT_SYMBOL(set_memory_x);
772
773int set_memory_nx(unsigned long addr, int numpages)
774{
72932c7a 775 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
75cbade8
AV
776}
777EXPORT_SYMBOL(set_memory_nx);
778
779int set_memory_ro(unsigned long addr, int numpages)
780{
72932c7a 781 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
75cbade8 782}
75cbade8
AV
783
784int set_memory_rw(unsigned long addr, int numpages)
785{
72932c7a 786 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
75cbade8 787}
f62d0f00
IM
788
789int set_memory_np(unsigned long addr, int numpages)
790{
72932c7a 791 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
f62d0f00 792}
75cbade8
AV
793
794int set_pages_uc(struct page *page, int numpages)
795{
796 unsigned long addr = (unsigned long)page_address(page);
75cbade8 797
d7c8f21a 798 return set_memory_uc(addr, numpages);
75cbade8
AV
799}
800EXPORT_SYMBOL(set_pages_uc);
801
802int set_pages_wb(struct page *page, int numpages)
803{
804 unsigned long addr = (unsigned long)page_address(page);
75cbade8 805
d7c8f21a 806 return set_memory_wb(addr, numpages);
75cbade8
AV
807}
808EXPORT_SYMBOL(set_pages_wb);
809
810int set_pages_x(struct page *page, int numpages)
811{
812 unsigned long addr = (unsigned long)page_address(page);
75cbade8 813
d7c8f21a 814 return set_memory_x(addr, numpages);
75cbade8
AV
815}
816EXPORT_SYMBOL(set_pages_x);
817
818int set_pages_nx(struct page *page, int numpages)
819{
820 unsigned long addr = (unsigned long)page_address(page);
75cbade8 821
d7c8f21a 822 return set_memory_nx(addr, numpages);
75cbade8
AV
823}
824EXPORT_SYMBOL(set_pages_nx);
825
826int set_pages_ro(struct page *page, int numpages)
827{
828 unsigned long addr = (unsigned long)page_address(page);
75cbade8 829
d7c8f21a 830 return set_memory_ro(addr, numpages);
75cbade8 831}
75cbade8
AV
832
833int set_pages_rw(struct page *page, int numpages)
834{
835 unsigned long addr = (unsigned long)page_address(page);
e81d5dc4 836
d7c8f21a 837 return set_memory_rw(addr, numpages);
78c94aba
IM
838}
839
1da177e4 840#ifdef CONFIG_DEBUG_PAGEALLOC
f62d0f00
IM
841
842static int __set_pages_p(struct page *page, int numpages)
843{
72e458df
TG
844 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
845 .numpages = numpages,
846 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
847 .mask_clr = __pgprot(0)};
72932c7a 848
c31c7d48 849 return __change_page_attr_set_clr(&cpa, 1);
f62d0f00
IM
850}
851
852static int __set_pages_np(struct page *page, int numpages)
853{
72e458df
TG
854 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
855 .numpages = numpages,
856 .mask_set = __pgprot(0),
857 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
72932c7a 858
c31c7d48 859 return __change_page_attr_set_clr(&cpa, 1);
f62d0f00
IM
860}
861
1da177e4
LT
862void kernel_map_pages(struct page *page, int numpages, int enable)
863{
864 if (PageHighMem(page))
865 return;
9f4c815c 866 if (!enable) {
f9b8404c
IM
867 debug_check_no_locks_freed(page_address(page),
868 numpages * PAGE_SIZE);
9f4c815c 869 }
de5097c2 870
12d6f21e
IM
871 /*
872 * If page allocator is not up yet then do not call c_p_a():
873 */
874 if (!debug_pagealloc_enabled)
875 return;
876
9f4c815c 877 /*
f8d8406b
IM
878 * The return value is ignored as the calls cannot fail.
879 * Large pages are kept enabled at boot time, and are
880 * split up quickly with DEBUG_PAGEALLOC. If a splitup
881 * fails here (due to temporary memory shortage) no damage
882 * is done because we just keep the largepage intact up
883 * to the next attempt when it will likely be split up:
1da177e4 884 */
f62d0f00
IM
885 if (enable)
886 __set_pages_p(page, numpages);
887 else
888 __set_pages_np(page, numpages);
9f4c815c
IM
889
890 /*
e4b71dcf
IM
891 * We should perform an IPI and flush all tlbs,
892 * but that can deadlock->flush only current cpu:
1da177e4
LT
893 */
894 __flush_tlb_all();
76ebd054
TG
895
896 /*
897 * Try to refill the page pool here. We can do this only after
898 * the tlb flush.
899 */
900 cpa_fill_pool();
1da177e4
LT
901}
902#endif
d1028a15
AV
903
904/*
905 * The testcases use internal knowledge of the implementation that shouldn't
906 * be exposed to the rest of the kernel. Include these directly here.
907 */
908#ifdef CONFIG_CPA_DEBUG
909#include "pageattr-test.c"
910#endif