]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/x86/mm/pageattr.c
x86: CPA no alias checking for _NX
[mirror_ubuntu-hirsute-kernel.git] / arch / x86 / mm / pageattr.c
CommitLineData
9f4c815c
IM
1/*
2 * Copyright 2002 Andi Kleen, SuSE Labs.
1da177e4 3 * Thanks to Ben LaHaise for precious feedback.
9f4c815c 4 */
1da177e4 5#include <linux/highmem.h>
8192206d 6#include <linux/bootmem.h>
1da177e4 7#include <linux/module.h>
9f4c815c 8#include <linux/sched.h>
1da177e4 9#include <linux/slab.h>
9f4c815c 10#include <linux/mm.h>
76ebd054 11#include <linux/interrupt.h>
9f4c815c 12
950f9d95 13#include <asm/e820.h>
1da177e4
LT
14#include <asm/processor.h>
15#include <asm/tlbflush.h>
f8af095d 16#include <asm/sections.h>
9f4c815c
IM
17#include <asm/uaccess.h>
18#include <asm/pgalloc.h>
c31c7d48 19#include <asm/proto.h>
1da177e4 20
9df84993
IM
21/*
22 * The current flushing context - we pass it instead of 5 arguments:
23 */
72e458df
TG
24struct cpa_data {
25 unsigned long vaddr;
72e458df
TG
26 pgprot_t mask_set;
27 pgprot_t mask_clr;
65e074df 28 int numpages;
f4ae5da0 29 int flushtlb;
c31c7d48 30 unsigned long pfn;
72e458df
TG
31};
32
c31c7d48
TG
33#ifdef CONFIG_X86_64
34
35static inline unsigned long highmap_start_pfn(void)
36{
37 return __pa(_text) >> PAGE_SHIFT;
38}
39
40static inline unsigned long highmap_end_pfn(void)
41{
42 return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
43}
44
45#endif
46
ed724be6
AV
47static inline int
48within(unsigned long addr, unsigned long start, unsigned long end)
687c4825 49{
ed724be6
AV
50 return addr >= start && addr < end;
51}
52
d7c8f21a
TG
53/*
54 * Flushing functions
55 */
cd8ddf1a 56
cd8ddf1a
TG
57/**
58 * clflush_cache_range - flush a cache range with clflush
59 * @addr: virtual start address
60 * @size: number of bytes to flush
61 *
62 * clflush is an unordered instruction which needs fencing with mfence
63 * to avoid ordering issues.
64 */
4c61afcd 65void clflush_cache_range(void *vaddr, unsigned int size)
d7c8f21a 66{
4c61afcd 67 void *vend = vaddr + size - 1;
d7c8f21a 68
cd8ddf1a 69 mb();
4c61afcd
IM
70
71 for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
72 clflush(vaddr);
73 /*
74 * Flush any possible final partial cacheline:
75 */
76 clflush(vend);
77
cd8ddf1a 78 mb();
d7c8f21a
TG
79}
80
af1e6844 81static void __cpa_flush_all(void *arg)
d7c8f21a 82{
6bb8383b
AK
83 unsigned long cache = (unsigned long)arg;
84
d7c8f21a
TG
85 /*
86 * Flush all to work around Errata in early athlons regarding
87 * large page flushing.
88 */
89 __flush_tlb_all();
90
6bb8383b 91 if (cache && boot_cpu_data.x86_model >= 4)
d7c8f21a
TG
92 wbinvd();
93}
94
6bb8383b 95static void cpa_flush_all(unsigned long cache)
d7c8f21a
TG
96{
97 BUG_ON(irqs_disabled());
98
6bb8383b 99 on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
d7c8f21a
TG
100}
101
57a6a46a
TG
102static void __cpa_flush_range(void *arg)
103{
57a6a46a
TG
104 /*
105 * We could optimize that further and do individual per page
106 * tlb invalidates for a low number of pages. Caveat: we must
107 * flush the high aliases on 64bit as well.
108 */
109 __flush_tlb_all();
57a6a46a
TG
110}
111
6bb8383b 112static void cpa_flush_range(unsigned long start, int numpages, int cache)
57a6a46a 113{
4c61afcd
IM
114 unsigned int i, level;
115 unsigned long addr;
116
57a6a46a 117 BUG_ON(irqs_disabled());
4c61afcd 118 WARN_ON(PAGE_ALIGN(start) != start);
57a6a46a 119
3b233e52 120 on_each_cpu(__cpa_flush_range, NULL, 1, 1);
57a6a46a 121
6bb8383b
AK
122 if (!cache)
123 return;
124
3b233e52
TG
125 /*
126 * We only need to flush on one CPU,
127 * clflush is a MESI-coherent instruction that
128 * will cause all other CPUs to flush the same
129 * cachelines:
130 */
4c61afcd
IM
131 for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
132 pte_t *pte = lookup_address(addr, &level);
133
134 /*
135 * Only flush present addresses:
136 */
7bfb72e8 137 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
4c61afcd
IM
138 clflush_cache_range((void *) addr, PAGE_SIZE);
139 }
57a6a46a
TG
140}
141
ed724be6
AV
142/*
143 * Certain areas of memory on x86 require very specific protection flags,
144 * for example the BIOS area or kernel text. Callers don't always get this
145 * right (again, ioremap() on BIOS memory is not uncommon) so this function
146 * checks and fixes these known static required protection bits.
147 */
c31c7d48
TG
148static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
149 unsigned long pfn)
ed724be6
AV
150{
151 pgprot_t forbidden = __pgprot(0);
152
687c4825 153 /*
ed724be6
AV
154 * The BIOS area between 640k and 1Mb needs to be executable for
155 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
687c4825 156 */
c31c7d48 157 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
ed724be6
AV
158 pgprot_val(forbidden) |= _PAGE_NX;
159
160 /*
161 * The kernel text needs to be executable for obvious reasons
c31c7d48
TG
162 * Does not cover __inittext since that is gone later on. On
163 * 64bit we do not enforce !NX on the low mapping
ed724be6
AV
164 */
165 if (within(address, (unsigned long)_text, (unsigned long)_etext))
166 pgprot_val(forbidden) |= _PAGE_NX;
cc0f21bb 167
cc0f21bb 168 /*
c31c7d48
TG
169 * The .rodata section needs to be read-only. Using the pfn
170 * catches all aliases.
cc0f21bb 171 */
c31c7d48
TG
172 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
173 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
cc0f21bb 174 pgprot_val(forbidden) |= _PAGE_RW;
ed724be6
AV
175
176 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
687c4825
IM
177
178 return prot;
179}
180
9a14aefc
TG
181/*
182 * Lookup the page table entry for a virtual address. Return a pointer
183 * to the entry and the level of the mapping.
184 *
185 * Note: We return pud and pmd either when the entry is marked large
186 * or when the present bit is not set. Otherwise we would return a
187 * pointer to a nonexisting mapping.
188 */
da7bfc50 189pte_t *lookup_address(unsigned long address, unsigned int *level)
9f4c815c 190{
1da177e4
LT
191 pgd_t *pgd = pgd_offset_k(address);
192 pud_t *pud;
193 pmd_t *pmd;
9f4c815c 194
30551bb3
TG
195 *level = PG_LEVEL_NONE;
196
1da177e4
LT
197 if (pgd_none(*pgd))
198 return NULL;
9df84993 199
1da177e4
LT
200 pud = pud_offset(pgd, address);
201 if (pud_none(*pud))
202 return NULL;
c2f71ee2
AK
203
204 *level = PG_LEVEL_1G;
205 if (pud_large(*pud) || !pud_present(*pud))
206 return (pte_t *)pud;
207
1da177e4
LT
208 pmd = pmd_offset(pud, address);
209 if (pmd_none(*pmd))
210 return NULL;
30551bb3
TG
211
212 *level = PG_LEVEL_2M;
9a14aefc 213 if (pmd_large(*pmd) || !pmd_present(*pmd))
1da177e4 214 return (pte_t *)pmd;
1da177e4 215
30551bb3 216 *level = PG_LEVEL_4K;
9df84993 217
9f4c815c
IM
218 return pte_offset_kernel(pmd, address);
219}
220
9df84993
IM
221/*
222 * Set the new pmd in all the pgds we know about:
223 */
9a3dc780 224static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
9f4c815c 225{
9f4c815c
IM
226 /* change init_mm */
227 set_pte_atomic(kpte, pte);
44af6c41 228#ifdef CONFIG_X86_32
e4b71dcf 229 if (!SHARED_KERNEL_PMD) {
44af6c41
IM
230 struct page *page;
231
e3ed910d 232 list_for_each_entry(page, &pgd_list, lru) {
44af6c41
IM
233 pgd_t *pgd;
234 pud_t *pud;
235 pmd_t *pmd;
236
237 pgd = (pgd_t *)page_address(page) + pgd_index(address);
238 pud = pud_offset(pgd, address);
239 pmd = pmd_offset(pud, address);
240 set_pte_atomic((pte_t *)pmd, pte);
241 }
1da177e4 242 }
44af6c41 243#endif
1da177e4
LT
244}
245
9df84993
IM
246static int
247try_preserve_large_page(pte_t *kpte, unsigned long address,
248 struct cpa_data *cpa)
65e074df 249{
c31c7d48 250 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
65e074df
TG
251 pte_t new_pte, old_pte, *tmp;
252 pgprot_t old_prot, new_prot;
fac84939 253 int i, do_split = 1;
da7bfc50 254 unsigned int level;
65e074df
TG
255
256 spin_lock_irqsave(&pgd_lock, flags);
257 /*
258 * Check for races, another CPU might have split this page
259 * up already:
260 */
261 tmp = lookup_address(address, &level);
262 if (tmp != kpte)
263 goto out_unlock;
264
265 switch (level) {
266 case PG_LEVEL_2M:
31422c51
AK
267 psize = PMD_PAGE_SIZE;
268 pmask = PMD_PAGE_MASK;
65e074df 269 break;
f07333fd 270#ifdef CONFIG_X86_64
65e074df 271 case PG_LEVEL_1G:
5d3c8b21
AK
272 psize = PUD_PAGE_SIZE;
273 pmask = PUD_PAGE_MASK;
f07333fd
AK
274 break;
275#endif
65e074df 276 default:
beaff633 277 do_split = -EINVAL;
65e074df
TG
278 goto out_unlock;
279 }
280
281 /*
282 * Calculate the number of pages, which fit into this large
283 * page starting at address:
284 */
285 nextpage_addr = (address + psize) & pmask;
286 numpages = (nextpage_addr - address) >> PAGE_SHIFT;
287 if (numpages < cpa->numpages)
288 cpa->numpages = numpages;
289
290 /*
291 * We are safe now. Check whether the new pgprot is the same:
292 */
293 old_pte = *kpte;
294 old_prot = new_prot = pte_pgprot(old_pte);
295
296 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
297 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
c31c7d48
TG
298
299 /*
300 * old_pte points to the large page base address. So we need
301 * to add the offset of the virtual address:
302 */
303 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
304 cpa->pfn = pfn;
305
306 new_prot = static_protections(new_prot, address, pfn);
65e074df 307
fac84939
TG
308 /*
309 * We need to check the full range, whether
310 * static_protection() requires a different pgprot for one of
311 * the pages in the range we try to preserve:
312 */
313 addr = address + PAGE_SIZE;
c31c7d48
TG
314 pfn++;
315 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
316 pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
fac84939
TG
317
318 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
319 goto out_unlock;
320 }
321
65e074df
TG
322 /*
323 * If there are no changes, return. maxpages has been updated
324 * above:
325 */
326 if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
beaff633 327 do_split = 0;
65e074df
TG
328 goto out_unlock;
329 }
330
331 /*
332 * We need to change the attributes. Check, whether we can
333 * change the large page in one go. We request a split, when
334 * the address is not aligned and the number of pages is
335 * smaller than the number of pages in the large page. Note
336 * that we limited the number of possible pages already to
337 * the number of pages in the large page.
338 */
339 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
340 /*
341 * The address is aligned and the number of pages
342 * covers the full page.
343 */
344 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
345 __set_pmd_pte(kpte, address, new_pte);
346 cpa->flushtlb = 1;
beaff633 347 do_split = 0;
65e074df
TG
348 }
349
350out_unlock:
351 spin_unlock_irqrestore(&pgd_lock, flags);
9df84993 352
beaff633 353 return do_split;
65e074df
TG
354}
355
76ebd054
TG
356static LIST_HEAD(page_pool);
357static unsigned long pool_size, pool_pages, pool_low;
358static unsigned long pool_used, pool_failed, pool_refill;
359
360static void cpa_fill_pool(void)
361{
362 struct page *p;
363 gfp_t gfp = GFP_KERNEL;
364
365 /* Do not allocate from interrupt context */
366 if (in_irq() || irqs_disabled())
367 return;
368 /*
369 * Check unlocked. I does not matter when we have one more
370 * page in the pool. The bit lock avoids recursive pool
371 * allocations:
372 */
373 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
374 return;
375
376#ifdef CONFIG_DEBUG_PAGEALLOC
377 /*
378 * We could do:
379 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
380 * but this fails on !PREEMPT kernels
381 */
382 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
383#endif
384
385 while (pool_pages < pool_size) {
386 p = alloc_pages(gfp, 0);
387 if (!p) {
388 pool_failed++;
389 break;
390 }
391 spin_lock_irq(&pgd_lock);
392 list_add(&p->lru, &page_pool);
393 pool_pages++;
394 spin_unlock_irq(&pgd_lock);
395 }
396 clear_bit_unlock(0, &pool_refill);
397}
398
399#define SHIFT_MB (20 - PAGE_SHIFT)
400#define ROUND_MB_GB ((1 << 10) - 1)
401#define SHIFT_MB_GB 10
402#define POOL_PAGES_PER_GB 16
403
404void __init cpa_init(void)
405{
406 struct sysinfo si;
407 unsigned long gb;
408
409 si_meminfo(&si);
410 /*
411 * Calculate the number of pool pages:
412 *
413 * Convert totalram (nr of pages) to MiB and round to the next
414 * GiB. Shift MiB to Gib and multiply the result by
415 * POOL_PAGES_PER_GB:
416 */
417 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
418 pool_size = POOL_PAGES_PER_GB * gb;
419 pool_low = pool_size;
420
421 cpa_fill_pool();
422 printk(KERN_DEBUG
423 "CPA: page pool initialized %lu of %lu pages preallocated\n",
424 pool_pages, pool_size);
425}
426
7afe15b9 427static int split_large_page(pte_t *kpte, unsigned long address)
bb5c2dbd 428{
7b610eec 429 unsigned long flags, pfn, pfninc = 1;
9df84993 430 unsigned int i, level;
bb5c2dbd 431 pte_t *pbase, *tmp;
9df84993 432 pgprot_t ref_prot;
bb5c2dbd
IM
433 struct page *base;
434
eb5b5f02
TG
435 /*
436 * Get a page from the pool. The pool list is protected by the
437 * pgd_lock, which we have to take anyway for the split
438 * operation:
439 */
440 spin_lock_irqsave(&pgd_lock, flags);
441 if (list_empty(&page_pool)) {
442 spin_unlock_irqrestore(&pgd_lock, flags);
bb5c2dbd 443 return -ENOMEM;
eb5b5f02
TG
444 }
445
446 base = list_first_entry(&page_pool, struct page, lru);
447 list_del(&base->lru);
448 pool_pages--;
449
450 if (pool_pages < pool_low)
451 pool_low = pool_pages;
bb5c2dbd 452
bb5c2dbd
IM
453 /*
454 * Check for races, another CPU might have split this page
455 * up for us already:
456 */
457 tmp = lookup_address(address, &level);
6ce9fc17 458 if (tmp != kpte)
bb5c2dbd
IM
459 goto out_unlock;
460
bb5c2dbd 461 pbase = (pte_t *)page_address(base);
44af6c41 462#ifdef CONFIG_X86_32
bb5c2dbd 463 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
44af6c41 464#endif
07cf89c0 465 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
bb5c2dbd 466
f07333fd
AK
467#ifdef CONFIG_X86_64
468 if (level == PG_LEVEL_1G) {
469 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
470 pgprot_val(ref_prot) |= _PAGE_PSE;
f07333fd
AK
471 }
472#endif
473
63c1dcf4
TG
474 /*
475 * Get the target pfn from the original entry:
476 */
477 pfn = pte_pfn(*kpte);
f07333fd 478 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
63c1dcf4 479 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
bb5c2dbd
IM
480
481 /*
07cf89c0 482 * Install the new, split up pagetable. Important details here:
4c881ca1
HY
483 *
484 * On Intel the NX bit of all levels must be cleared to make a
485 * page executable. See section 4.13.2 of Intel 64 and IA-32
486 * Architectures Software Developer's Manual).
07cf89c0
TG
487 *
488 * Mark the entry present. The current mapping might be
489 * set to not present, which we preserved above.
bb5c2dbd 490 */
4c881ca1 491 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
07cf89c0 492 pgprot_val(ref_prot) |= _PAGE_PRESENT;
9a3dc780 493 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
bb5c2dbd
IM
494 base = NULL;
495
496out_unlock:
eb5b5f02
TG
497 /*
498 * If we dropped out via the lookup_address check under
499 * pgd_lock then stick the page back into the pool:
500 */
501 if (base) {
502 list_add(&base->lru, &page_pool);
503 pool_pages++;
504 } else
505 pool_used++;
9a3dc780 506 spin_unlock_irqrestore(&pgd_lock, flags);
bb5c2dbd 507
bb5c2dbd
IM
508 return 0;
509}
510
c31c7d48 511static int __change_page_attr(struct cpa_data *cpa, int primary)
9f4c815c 512{
c31c7d48 513 unsigned long address = cpa->vaddr;
da7bfc50
HH
514 int do_split, err;
515 unsigned int level;
1da177e4 516 struct page *kpte_page;
c31c7d48 517 pte_t *kpte, old_pte;
1da177e4 518
97f99fed 519repeat:
f0646e43 520 kpte = lookup_address(address, &level);
1da177e4 521 if (!kpte)
c31c7d48
TG
522 return primary ? -EINVAL : 0;
523
524 old_pte = *kpte;
525 if (!pte_val(old_pte)) {
526 if (!primary)
527 return 0;
528 printk(KERN_WARNING "CPA: called for zero pte. "
529 "vaddr = %lx cpa->vaddr = %lx\n", address,
530 cpa->vaddr);
531 WARN_ON(1);
1da177e4 532 return -EINVAL;
c31c7d48 533 }
9f4c815c 534
1da177e4 535 kpte_page = virt_to_page(kpte);
65d2f0bc
AK
536 BUG_ON(PageLRU(kpte_page));
537 BUG_ON(PageCompound(kpte_page));
538
30551bb3 539 if (level == PG_LEVEL_4K) {
c31c7d48 540 pte_t new_pte;
626c2c9d 541 pgprot_t new_prot = pte_pgprot(old_pte);
c31c7d48 542 unsigned long pfn = pte_pfn(old_pte);
86f03989 543
72e458df
TG
544 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
545 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
86f03989 546
c31c7d48 547 new_prot = static_protections(new_prot, address, pfn);
86f03989 548
626c2c9d
AV
549 /*
550 * We need to keep the pfn from the existing PTE,
551 * after all we're only going to change it's attributes
552 * not the memory it points to
553 */
c31c7d48
TG
554 new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
555 cpa->pfn = pfn;
f4ae5da0
TG
556 /*
557 * Do we really change anything ?
558 */
559 if (pte_val(old_pte) != pte_val(new_pte)) {
560 set_pte_atomic(kpte, new_pte);
561 cpa->flushtlb = 1;
562 }
65e074df
TG
563 cpa->numpages = 1;
564 return 0;
1da177e4 565 }
65e074df
TG
566
567 /*
568 * Check, whether we can keep the large page intact
569 * and just change the pte:
570 */
beaff633 571 do_split = try_preserve_large_page(kpte, address, cpa);
65e074df
TG
572 /*
573 * When the range fits into the existing large page,
574 * return. cp->numpages and cpa->tlbflush have been updated in
575 * try_large_page:
576 */
87f7f8fe
IM
577 if (do_split <= 0)
578 return do_split;
65e074df
TG
579
580 /*
581 * We have to split the large page:
582 */
87f7f8fe
IM
583 err = split_large_page(kpte, address);
584 if (!err) {
585 cpa->flushtlb = 1;
586 goto repeat;
587 }
beaff633 588
87f7f8fe 589 return err;
9f4c815c 590}
1da177e4 591
c31c7d48
TG
592static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
593
594static int cpa_process_alias(struct cpa_data *cpa)
1da177e4 595{
c31c7d48
TG
596 struct cpa_data alias_cpa;
597 int ret;
44af6c41 598
c31c7d48
TG
599 if (cpa->pfn > max_pfn_mapped)
600 return 0;
626c2c9d 601
c31c7d48
TG
602 alias_cpa = *cpa;
603 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
44af6c41 604
c31c7d48 605 ret = __change_page_attr_set_clr(&alias_cpa, 0);
44af6c41 606
44af6c41 607#ifdef CONFIG_X86_64
c31c7d48
TG
608 if (ret)
609 return ret;
488fd995 610 /*
0879750f
TG
611 * If the physical address is inside the kernel map, we need
612 * to touch the high mapped kernel as well:
488fd995 613 */
c31c7d48
TG
614 if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
615 return 0;
0879750f 616
c31c7d48
TG
617 alias_cpa = *cpa;
618 alias_cpa.vaddr =
619 (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
620
621 /*
622 * The high mapping range is imprecise, so ignore the return value.
623 */
624 __change_page_attr_set_clr(&alias_cpa, 0);
488fd995 625#endif
c31c7d48 626 return ret;
1da177e4
LT
627}
628
c31c7d48 629static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
ff31452b 630{
65e074df 631 int ret, numpages = cpa->numpages;
ff31452b 632
65e074df
TG
633 while (numpages) {
634 /*
635 * Store the remaining nr of pages for the large page
636 * preservation check.
637 */
638 cpa->numpages = numpages;
c31c7d48
TG
639
640 ret = __change_page_attr(cpa, checkalias);
ff31452b
TG
641 if (ret)
642 return ret;
ff31452b 643
c31c7d48
TG
644 if (checkalias) {
645 ret = cpa_process_alias(cpa);
646 if (ret)
647 return ret;
648 }
649
65e074df
TG
650 /*
651 * Adjust the number of pages with the result of the
652 * CPA operation. Either a large page has been
653 * preserved or a single page update happened.
654 */
655 BUG_ON(cpa->numpages > numpages);
656 numpages -= cpa->numpages;
657 cpa->vaddr += cpa->numpages * PAGE_SIZE;
658 }
ff31452b
TG
659 return 0;
660}
661
6bb8383b
AK
662static inline int cache_attr(pgprot_t attr)
663{
664 return pgprot_val(attr) &
665 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
666}
667
ff31452b
TG
668static int change_page_attr_set_clr(unsigned long addr, int numpages,
669 pgprot_t mask_set, pgprot_t mask_clr)
670{
72e458df 671 struct cpa_data cpa;
af96e443 672 int ret, cache, checkalias;
331e4065
TG
673
674 /*
675 * Check, if we are requested to change a not supported
676 * feature:
677 */
678 mask_set = canon_pgprot(mask_set);
679 mask_clr = canon_pgprot(mask_clr);
680 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
681 return 0;
682
69b1415e
TG
683 /* Ensure we are PAGE_SIZE aligned */
684 if (addr & ~PAGE_MASK) {
685 addr &= PAGE_MASK;
686 /*
687 * People should not be passing in unaligned addresses:
688 */
689 WARN_ON_ONCE(1);
690 }
691
72e458df
TG
692 cpa.vaddr = addr;
693 cpa.numpages = numpages;
694 cpa.mask_set = mask_set;
695 cpa.mask_clr = mask_clr;
f4ae5da0 696 cpa.flushtlb = 0;
72e458df 697
af96e443
TG
698 /* No alias checking for _NX bit modifications */
699 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
700
701 ret = __change_page_attr_set_clr(&cpa, checkalias);
ff31452b 702
f4ae5da0
TG
703 /*
704 * Check whether we really changed something:
705 */
706 if (!cpa.flushtlb)
76ebd054 707 goto out;
f4ae5da0 708
6bb8383b
AK
709 /*
710 * No need to flush, when we did not set any of the caching
711 * attributes:
712 */
713 cache = cache_attr(mask_set);
714
57a6a46a
TG
715 /*
716 * On success we use clflush, when the CPU supports it to
717 * avoid the wbindv. If the CPU does not support it and in the
af1e6844 718 * error case we fall back to cpa_flush_all (which uses
57a6a46a
TG
719 * wbindv):
720 */
721 if (!ret && cpu_has_clflush)
6bb8383b 722 cpa_flush_range(addr, numpages, cache);
57a6a46a 723 else
6bb8383b 724 cpa_flush_all(cache);
ff31452b 725
76ebd054
TG
726out:
727 cpa_fill_pool();
ff31452b
TG
728 return ret;
729}
730
56744546
TG
731static inline int change_page_attr_set(unsigned long addr, int numpages,
732 pgprot_t mask)
75cbade8 733{
56744546 734 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
75cbade8
AV
735}
736
56744546
TG
737static inline int change_page_attr_clear(unsigned long addr, int numpages,
738 pgprot_t mask)
72932c7a 739{
5827040d 740 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
72932c7a
TG
741}
742
743int set_memory_uc(unsigned long addr, int numpages)
744{
745 return change_page_attr_set(addr, numpages,
746 __pgprot(_PAGE_PCD | _PAGE_PWT));
75cbade8
AV
747}
748EXPORT_SYMBOL(set_memory_uc);
749
750int set_memory_wb(unsigned long addr, int numpages)
751{
72932c7a
TG
752 return change_page_attr_clear(addr, numpages,
753 __pgprot(_PAGE_PCD | _PAGE_PWT));
75cbade8
AV
754}
755EXPORT_SYMBOL(set_memory_wb);
756
757int set_memory_x(unsigned long addr, int numpages)
758{
72932c7a 759 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
75cbade8
AV
760}
761EXPORT_SYMBOL(set_memory_x);
762
763int set_memory_nx(unsigned long addr, int numpages)
764{
72932c7a 765 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
75cbade8
AV
766}
767EXPORT_SYMBOL(set_memory_nx);
768
769int set_memory_ro(unsigned long addr, int numpages)
770{
72932c7a 771 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
75cbade8 772}
75cbade8
AV
773
774int set_memory_rw(unsigned long addr, int numpages)
775{
72932c7a 776 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
75cbade8 777}
f62d0f00
IM
778
779int set_memory_np(unsigned long addr, int numpages)
780{
72932c7a 781 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
f62d0f00 782}
75cbade8
AV
783
784int set_pages_uc(struct page *page, int numpages)
785{
786 unsigned long addr = (unsigned long)page_address(page);
75cbade8 787
d7c8f21a 788 return set_memory_uc(addr, numpages);
75cbade8
AV
789}
790EXPORT_SYMBOL(set_pages_uc);
791
792int set_pages_wb(struct page *page, int numpages)
793{
794 unsigned long addr = (unsigned long)page_address(page);
75cbade8 795
d7c8f21a 796 return set_memory_wb(addr, numpages);
75cbade8
AV
797}
798EXPORT_SYMBOL(set_pages_wb);
799
800int set_pages_x(struct page *page, int numpages)
801{
802 unsigned long addr = (unsigned long)page_address(page);
75cbade8 803
d7c8f21a 804 return set_memory_x(addr, numpages);
75cbade8
AV
805}
806EXPORT_SYMBOL(set_pages_x);
807
808int set_pages_nx(struct page *page, int numpages)
809{
810 unsigned long addr = (unsigned long)page_address(page);
75cbade8 811
d7c8f21a 812 return set_memory_nx(addr, numpages);
75cbade8
AV
813}
814EXPORT_SYMBOL(set_pages_nx);
815
816int set_pages_ro(struct page *page, int numpages)
817{
818 unsigned long addr = (unsigned long)page_address(page);
75cbade8 819
d7c8f21a 820 return set_memory_ro(addr, numpages);
75cbade8 821}
75cbade8
AV
822
823int set_pages_rw(struct page *page, int numpages)
824{
825 unsigned long addr = (unsigned long)page_address(page);
e81d5dc4 826
d7c8f21a 827 return set_memory_rw(addr, numpages);
78c94aba
IM
828}
829
1da177e4 830#ifdef CONFIG_DEBUG_PAGEALLOC
f62d0f00
IM
831
832static int __set_pages_p(struct page *page, int numpages)
833{
72e458df
TG
834 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
835 .numpages = numpages,
836 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
837 .mask_clr = __pgprot(0)};
72932c7a 838
c31c7d48 839 return __change_page_attr_set_clr(&cpa, 1);
f62d0f00
IM
840}
841
842static int __set_pages_np(struct page *page, int numpages)
843{
72e458df
TG
844 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
845 .numpages = numpages,
846 .mask_set = __pgprot(0),
847 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
72932c7a 848
c31c7d48 849 return __change_page_attr_set_clr(&cpa, 1);
f62d0f00
IM
850}
851
1da177e4
LT
852void kernel_map_pages(struct page *page, int numpages, int enable)
853{
854 if (PageHighMem(page))
855 return;
9f4c815c 856 if (!enable) {
f9b8404c
IM
857 debug_check_no_locks_freed(page_address(page),
858 numpages * PAGE_SIZE);
9f4c815c 859 }
de5097c2 860
12d6f21e
IM
861 /*
862 * If page allocator is not up yet then do not call c_p_a():
863 */
864 if (!debug_pagealloc_enabled)
865 return;
866
9f4c815c 867 /*
f8d8406b
IM
868 * The return value is ignored as the calls cannot fail.
869 * Large pages are kept enabled at boot time, and are
870 * split up quickly with DEBUG_PAGEALLOC. If a splitup
871 * fails here (due to temporary memory shortage) no damage
872 * is done because we just keep the largepage intact up
873 * to the next attempt when it will likely be split up:
1da177e4 874 */
f62d0f00
IM
875 if (enable)
876 __set_pages_p(page, numpages);
877 else
878 __set_pages_np(page, numpages);
9f4c815c
IM
879
880 /*
e4b71dcf
IM
881 * We should perform an IPI and flush all tlbs,
882 * but that can deadlock->flush only current cpu:
1da177e4
LT
883 */
884 __flush_tlb_all();
76ebd054
TG
885
886 /*
887 * Try to refill the page pool here. We can do this only after
888 * the tlb flush.
889 */
890 cpa_fill_pool();
1da177e4
LT
891}
892#endif
d1028a15
AV
893
894/*
895 * The testcases use internal knowledge of the implementation that shouldn't
896 * be exposed to the rest of the kernel. Include these directly here.
897 */
898#ifdef CONFIG_CPA_DEBUG
899#include "pageattr-test.c"
900#endif