2 * Copyright IBM Corp. 2007, 2011
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/spinlock.h>
14 #include <linux/rcupdate.h>
15 #include <linux/slab.h>
16 #include <linux/swapops.h>
17 #include <linux/sysctl.h>
18 #include <linux/ksm.h>
19 #include <linux/mman.h>
21 #include <asm/pgtable.h>
22 #include <asm/pgalloc.h>
24 #include <asm/tlbflush.h>
25 #include <asm/mmu_context.h>
27 static inline pte_t
ptep_flush_direct(struct mm_struct
*mm
,
28 unsigned long addr
, pte_t
*ptep
)
33 if (unlikely(pte_val(old
) & _PAGE_INVALID
))
35 atomic_inc(&mm
->context
.flush_count
);
36 if (MACHINE_HAS_TLB_LC
&&
37 cpumask_equal(mm_cpumask(mm
), cpumask_of(smp_processor_id())))
38 __ptep_ipte_local(addr
, ptep
);
40 __ptep_ipte(addr
, ptep
);
41 atomic_dec(&mm
->context
.flush_count
);
45 static inline pte_t
ptep_flush_lazy(struct mm_struct
*mm
,
46 unsigned long addr
, pte_t
*ptep
)
51 if (unlikely(pte_val(old
) & _PAGE_INVALID
))
53 atomic_inc(&mm
->context
.flush_count
);
54 if (cpumask_equal(&mm
->context
.cpu_attach_mask
,
55 cpumask_of(smp_processor_id()))) {
56 pte_val(*ptep
) |= _PAGE_INVALID
;
57 mm
->context
.flush_mm
= 1;
59 __ptep_ipte(addr
, ptep
);
60 atomic_dec(&mm
->context
.flush_count
);
64 static inline pgste_t
pgste_get_lock(pte_t
*ptep
)
66 unsigned long new = 0;
73 " nihh %0,0xff7f\n" /* clear PCL bit in old */
74 " oihh %1,0x0080\n" /* set PCL bit in new */
77 : "=&d" (old
), "=&d" (new), "=Q" (ptep
[PTRS_PER_PTE
])
78 : "Q" (ptep
[PTRS_PER_PTE
]) : "cc", "memory");
83 static inline void pgste_set_unlock(pte_t
*ptep
, pgste_t pgste
)
87 " nihh %1,0xff7f\n" /* clear PCL bit */
89 : "=Q" (ptep
[PTRS_PER_PTE
])
90 : "d" (pgste_val(pgste
)), "Q" (ptep
[PTRS_PER_PTE
])
95 static inline pgste_t
pgste_get(pte_t
*ptep
)
97 unsigned long pgste
= 0;
99 pgste
= *(unsigned long *)(ptep
+ PTRS_PER_PTE
);
101 return __pgste(pgste
);
104 static inline void pgste_set(pte_t
*ptep
, pgste_t pgste
)
107 *(pgste_t
*)(ptep
+ PTRS_PER_PTE
) = pgste
;
111 static inline pgste_t
pgste_update_all(pte_t pte
, pgste_t pgste
,
112 struct mm_struct
*mm
)
115 unsigned long address
, bits
, skey
;
117 if (!mm_use_skey(mm
) || pte_val(pte
) & _PAGE_INVALID
)
119 address
= pte_val(pte
) & PAGE_MASK
;
120 skey
= (unsigned long) page_get_storage_key(address
);
121 bits
= skey
& (_PAGE_CHANGED
| _PAGE_REFERENCED
);
122 /* Transfer page changed & referenced bit to guest bits in pgste */
123 pgste_val(pgste
) |= bits
<< 48; /* GR bit & GC bit */
124 /* Copy page access key and fetch protection bit to pgste */
125 pgste_val(pgste
) &= ~(PGSTE_ACC_BITS
| PGSTE_FP_BIT
);
126 pgste_val(pgste
) |= (skey
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
)) << 56;
132 static inline void pgste_set_key(pte_t
*ptep
, pgste_t pgste
, pte_t entry
,
133 struct mm_struct
*mm
)
136 unsigned long address
;
139 if (!mm_use_skey(mm
) || pte_val(entry
) & _PAGE_INVALID
)
141 VM_BUG_ON(!(pte_val(*ptep
) & _PAGE_INVALID
));
142 address
= pte_val(entry
) & PAGE_MASK
;
144 * Set page access key and fetch protection bit from pgste.
145 * The guest C/R information is still in the PGSTE, set real
148 nkey
= (pgste_val(pgste
) & (PGSTE_ACC_BITS
| PGSTE_FP_BIT
)) >> 56;
149 nkey
|= (pgste_val(pgste
) & (PGSTE_GR_BIT
| PGSTE_GC_BIT
)) >> 48;
150 page_set_storage_key(address
, nkey
, 0);
154 static inline pgste_t
pgste_set_pte(pte_t
*ptep
, pgste_t pgste
, pte_t entry
)
157 if ((pte_val(entry
) & _PAGE_PRESENT
) &&
158 (pte_val(entry
) & _PAGE_WRITE
) &&
159 !(pte_val(entry
) & _PAGE_INVALID
)) {
160 if (!MACHINE_HAS_ESOP
) {
162 * Without enhanced suppression-on-protection force
163 * the dirty bit on for all writable ptes.
165 pte_val(entry
) |= _PAGE_DIRTY
;
166 pte_val(entry
) &= ~_PAGE_PROTECT
;
168 if (!(pte_val(entry
) & _PAGE_PROTECT
))
169 /* This pte allows write access, set user-dirty */
170 pgste_val(pgste
) |= PGSTE_UC_BIT
;
177 static inline pgste_t
pgste_ipte_notify(struct mm_struct
*mm
,
179 pte_t
*ptep
, pgste_t pgste
)
182 if (pgste_val(pgste
) & PGSTE_IN_BIT
) {
183 pgste_val(pgste
) &= ~PGSTE_IN_BIT
;
184 ptep_notify(mm
, addr
, ptep
);
190 static inline pgste_t
ptep_xchg_start(struct mm_struct
*mm
,
191 unsigned long addr
, pte_t
*ptep
)
193 pgste_t pgste
= __pgste(0);
195 if (mm_has_pgste(mm
)) {
196 pgste
= pgste_get_lock(ptep
);
197 pgste
= pgste_ipte_notify(mm
, addr
, ptep
, pgste
);
202 static inline void ptep_xchg_commit(struct mm_struct
*mm
,
203 unsigned long addr
, pte_t
*ptep
,
204 pgste_t pgste
, pte_t old
, pte_t
new)
206 if (mm_has_pgste(mm
)) {
207 if (pte_val(old
) & _PAGE_INVALID
)
208 pgste_set_key(ptep
, pgste
, new, mm
);
209 if (pte_val(new) & _PAGE_INVALID
) {
210 pgste
= pgste_update_all(old
, pgste
, mm
);
211 if ((pgste_val(pgste
) & _PGSTE_GPS_USAGE_MASK
) ==
212 _PGSTE_GPS_USAGE_UNUSED
)
213 pte_val(old
) |= _PAGE_UNUSED
;
215 pgste
= pgste_set_pte(ptep
, pgste
, new);
216 pgste_set_unlock(ptep
, pgste
);
222 pte_t
ptep_xchg_direct(struct mm_struct
*mm
, unsigned long addr
,
223 pte_t
*ptep
, pte_t
new)
229 pgste
= ptep_xchg_start(mm
, addr
, ptep
);
230 old
= ptep_flush_direct(mm
, addr
, ptep
);
231 ptep_xchg_commit(mm
, addr
, ptep
, pgste
, old
, new);
235 EXPORT_SYMBOL(ptep_xchg_direct
);
237 pte_t
ptep_xchg_lazy(struct mm_struct
*mm
, unsigned long addr
,
238 pte_t
*ptep
, pte_t
new)
244 pgste
= ptep_xchg_start(mm
, addr
, ptep
);
245 old
= ptep_flush_lazy(mm
, addr
, ptep
);
246 ptep_xchg_commit(mm
, addr
, ptep
, pgste
, old
, new);
250 EXPORT_SYMBOL(ptep_xchg_lazy
);
252 pte_t
ptep_modify_prot_start(struct mm_struct
*mm
, unsigned long addr
,
259 pgste
= ptep_xchg_start(mm
, addr
, ptep
);
260 old
= ptep_flush_lazy(mm
, addr
, ptep
);
261 if (mm_has_pgste(mm
)) {
262 pgste
= pgste_update_all(old
, pgste
, mm
);
263 pgste_set(ptep
, pgste
);
267 EXPORT_SYMBOL(ptep_modify_prot_start
);
269 void ptep_modify_prot_commit(struct mm_struct
*mm
, unsigned long addr
,
270 pte_t
*ptep
, pte_t pte
)
274 if (mm_has_pgste(mm
)) {
275 pgste
= pgste_get(ptep
);
276 pgste_set_key(ptep
, pgste
, pte
, mm
);
277 pgste
= pgste_set_pte(ptep
, pgste
, pte
);
278 pgste_set_unlock(ptep
, pgste
);
284 EXPORT_SYMBOL(ptep_modify_prot_commit
);
286 static inline pmd_t
pmdp_flush_direct(struct mm_struct
*mm
,
287 unsigned long addr
, pmd_t
*pmdp
)
292 if (pmd_val(old
) & _SEGMENT_ENTRY_INVALID
)
294 if (!MACHINE_HAS_IDTE
) {
298 atomic_inc(&mm
->context
.flush_count
);
299 if (MACHINE_HAS_TLB_LC
&&
300 cpumask_equal(mm_cpumask(mm
), cpumask_of(smp_processor_id())))
301 __pmdp_idte_local(addr
, pmdp
);
303 __pmdp_idte(addr
, pmdp
);
304 atomic_dec(&mm
->context
.flush_count
);
308 static inline pmd_t
pmdp_flush_lazy(struct mm_struct
*mm
,
309 unsigned long addr
, pmd_t
*pmdp
)
314 if (pmd_val(old
) & _SEGMENT_ENTRY_INVALID
)
316 atomic_inc(&mm
->context
.flush_count
);
317 if (cpumask_equal(&mm
->context
.cpu_attach_mask
,
318 cpumask_of(smp_processor_id()))) {
319 pmd_val(*pmdp
) |= _SEGMENT_ENTRY_INVALID
;
320 mm
->context
.flush_mm
= 1;
321 } else if (MACHINE_HAS_IDTE
)
322 __pmdp_idte(addr
, pmdp
);
325 atomic_dec(&mm
->context
.flush_count
);
329 pmd_t
pmdp_xchg_direct(struct mm_struct
*mm
, unsigned long addr
,
330 pmd_t
*pmdp
, pmd_t
new)
335 old
= pmdp_flush_direct(mm
, addr
, pmdp
);
340 EXPORT_SYMBOL(pmdp_xchg_direct
);
342 pmd_t
pmdp_xchg_lazy(struct mm_struct
*mm
, unsigned long addr
,
343 pmd_t
*pmdp
, pmd_t
new)
348 old
= pmdp_flush_lazy(mm
, addr
, pmdp
);
353 EXPORT_SYMBOL(pmdp_xchg_lazy
);
355 static inline pud_t
pudp_flush_direct(struct mm_struct
*mm
,
356 unsigned long addr
, pud_t
*pudp
)
361 if (pud_val(old
) & _REGION_ENTRY_INVALID
)
363 if (!MACHINE_HAS_IDTE
) {
365 * Invalid bit position is the same for pmd and pud, so we can
366 * re-use _pmd_csp() here
368 __pmdp_csp((pmd_t
*) pudp
);
371 atomic_inc(&mm
->context
.flush_count
);
372 if (MACHINE_HAS_TLB_LC
&&
373 cpumask_equal(mm_cpumask(mm
), cpumask_of(smp_processor_id())))
374 __pudp_idte_local(addr
, pudp
);
376 __pudp_idte(addr
, pudp
);
377 atomic_dec(&mm
->context
.flush_count
);
381 pud_t
pudp_xchg_direct(struct mm_struct
*mm
, unsigned long addr
,
382 pud_t
*pudp
, pud_t
new)
387 old
= pudp_flush_direct(mm
, addr
, pudp
);
392 EXPORT_SYMBOL(pudp_xchg_direct
);
394 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
395 void pgtable_trans_huge_deposit(struct mm_struct
*mm
, pmd_t
*pmdp
,
398 struct list_head
*lh
= (struct list_head
*) pgtable
;
400 assert_spin_locked(pmd_lockptr(mm
, pmdp
));
403 if (!pmd_huge_pte(mm
, pmdp
))
406 list_add(lh
, (struct list_head
*) pmd_huge_pte(mm
, pmdp
));
407 pmd_huge_pte(mm
, pmdp
) = pgtable
;
410 pgtable_t
pgtable_trans_huge_withdraw(struct mm_struct
*mm
, pmd_t
*pmdp
)
412 struct list_head
*lh
;
416 assert_spin_locked(pmd_lockptr(mm
, pmdp
));
419 pgtable
= pmd_huge_pte(mm
, pmdp
);
420 lh
= (struct list_head
*) pgtable
;
422 pmd_huge_pte(mm
, pmdp
) = NULL
;
424 pmd_huge_pte(mm
, pmdp
) = (pgtable_t
) lh
->next
;
427 ptep
= (pte_t
*) pgtable
;
428 pte_val(*ptep
) = _PAGE_INVALID
;
430 pte_val(*ptep
) = _PAGE_INVALID
;
433 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
436 void ptep_set_pte_at(struct mm_struct
*mm
, unsigned long addr
,
437 pte_t
*ptep
, pte_t entry
)
441 /* the mm_has_pgste() check is done in set_pte_at() */
443 pgste
= pgste_get_lock(ptep
);
444 pgste_val(pgste
) &= ~_PGSTE_GPS_ZERO
;
445 pgste_set_key(ptep
, pgste
, entry
, mm
);
446 pgste
= pgste_set_pte(ptep
, pgste
, entry
);
447 pgste_set_unlock(ptep
, pgste
);
451 void ptep_set_notify(struct mm_struct
*mm
, unsigned long addr
, pte_t
*ptep
)
456 pgste
= pgste_get_lock(ptep
);
457 pgste_val(pgste
) |= PGSTE_IN_BIT
;
458 pgste_set_unlock(ptep
, pgste
);
462 static void ptep_zap_swap_entry(struct mm_struct
*mm
, swp_entry_t entry
)
464 if (!non_swap_entry(entry
))
465 dec_mm_counter(mm
, MM_SWAPENTS
);
466 else if (is_migration_entry(entry
)) {
467 struct page
*page
= migration_entry_to_page(entry
);
469 dec_mm_counter(mm
, mm_counter(page
));
471 free_swap_and_cache(entry
);
474 void ptep_zap_unused(struct mm_struct
*mm
, unsigned long addr
,
475 pte_t
*ptep
, int reset
)
477 unsigned long pgstev
;
481 /* Zap unused and logically-zero pages */
483 pgste
= pgste_get_lock(ptep
);
484 pgstev
= pgste_val(pgste
);
486 if (!reset
&& pte_swap(pte
) &&
487 ((pgstev
& _PGSTE_GPS_USAGE_MASK
) == _PGSTE_GPS_USAGE_UNUSED
||
488 (pgstev
& _PGSTE_GPS_ZERO
))) {
489 ptep_zap_swap_entry(mm
, pte_to_swp_entry(pte
));
490 pte_clear(mm
, addr
, ptep
);
493 pgste_val(pgste
) &= ~_PGSTE_GPS_USAGE_MASK
;
494 pgste_set_unlock(ptep
, pgste
);
498 void ptep_zap_key(struct mm_struct
*mm
, unsigned long addr
, pte_t
*ptep
)
503 /* Clear storage key */
505 pgste
= pgste_get_lock(ptep
);
506 pgste_val(pgste
) &= ~(PGSTE_ACC_BITS
| PGSTE_FP_BIT
|
507 PGSTE_GR_BIT
| PGSTE_GC_BIT
);
508 ptev
= pte_val(*ptep
);
509 if (!(ptev
& _PAGE_INVALID
) && (ptev
& _PAGE_WRITE
))
510 page_set_storage_key(ptev
& PAGE_MASK
, PAGE_DEFAULT_KEY
, 1);
511 pgste_set_unlock(ptep
, pgste
);
516 * Test and reset if a guest page is dirty
518 bool test_and_clear_guest_dirty(struct mm_struct
*mm
, unsigned long addr
)
526 ptep
= get_locked_pte(mm
, addr
, &ptl
);
530 pgste
= pgste_get_lock(ptep
);
531 dirty
= !!(pgste_val(pgste
) & PGSTE_UC_BIT
);
532 pgste_val(pgste
) &= ~PGSTE_UC_BIT
;
534 if (dirty
&& (pte_val(pte
) & _PAGE_PRESENT
)) {
535 pgste
= pgste_ipte_notify(mm
, addr
, ptep
, pgste
);
536 __ptep_ipte(addr
, ptep
);
537 if (MACHINE_HAS_ESOP
|| !(pte_val(pte
) & _PAGE_WRITE
))
538 pte_val(pte
) |= _PAGE_PROTECT
;
540 pte_val(pte
) |= _PAGE_INVALID
;
543 pgste_set_unlock(ptep
, pgste
);
548 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty
);
550 int set_guest_storage_key(struct mm_struct
*mm
, unsigned long addr
,
551 unsigned char key
, bool nq
)
558 down_read(&mm
->mmap_sem
);
559 ptep
= get_locked_pte(mm
, addr
, &ptl
);
560 if (unlikely(!ptep
)) {
561 up_read(&mm
->mmap_sem
);
565 new = old
= pgste_get_lock(ptep
);
566 pgste_val(new) &= ~(PGSTE_GR_BIT
| PGSTE_GC_BIT
|
567 PGSTE_ACC_BITS
| PGSTE_FP_BIT
);
568 keyul
= (unsigned long) key
;
569 pgste_val(new) |= (keyul
& (_PAGE_CHANGED
| _PAGE_REFERENCED
)) << 48;
570 pgste_val(new) |= (keyul
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
)) << 56;
571 if (!(pte_val(*ptep
) & _PAGE_INVALID
)) {
572 unsigned long address
, bits
, skey
;
574 address
= pte_val(*ptep
) & PAGE_MASK
;
575 skey
= (unsigned long) page_get_storage_key(address
);
576 bits
= skey
& (_PAGE_CHANGED
| _PAGE_REFERENCED
);
577 skey
= key
& (_PAGE_ACC_BITS
| _PAGE_FP_BIT
);
578 /* Set storage key ACC and FP */
579 page_set_storage_key(address
, skey
, !nq
);
580 /* Merge host changed & referenced into pgste */
581 pgste_val(new) |= bits
<< 52;
583 /* changing the guest storage key is considered a change of the page */
584 if ((pgste_val(new) ^ pgste_val(old
)) &
585 (PGSTE_ACC_BITS
| PGSTE_FP_BIT
| PGSTE_GR_BIT
| PGSTE_GC_BIT
))
586 pgste_val(new) |= PGSTE_UC_BIT
;
588 pgste_set_unlock(ptep
, new);
589 pte_unmap_unlock(ptep
, ptl
);
590 up_read(&mm
->mmap_sem
);
593 EXPORT_SYMBOL(set_guest_storage_key
);
595 unsigned char get_guest_storage_key(struct mm_struct
*mm
, unsigned long addr
)
602 down_read(&mm
->mmap_sem
);
603 ptep
= get_locked_pte(mm
, addr
, &ptl
);
604 if (unlikely(!ptep
)) {
605 up_read(&mm
->mmap_sem
);
608 pgste
= pgste_get_lock(ptep
);
610 if (pte_val(*ptep
) & _PAGE_INVALID
) {
611 key
= (pgste_val(pgste
) & PGSTE_ACC_BITS
) >> 56;
612 key
|= (pgste_val(pgste
) & PGSTE_FP_BIT
) >> 56;
613 key
|= (pgste_val(pgste
) & PGSTE_GR_BIT
) >> 48;
614 key
|= (pgste_val(pgste
) & PGSTE_GC_BIT
) >> 48;
616 key
= page_get_storage_key(pte_val(*ptep
) & PAGE_MASK
);
618 /* Reflect guest's logical view, not physical */
619 if (pgste_val(pgste
) & PGSTE_GR_BIT
)
620 key
|= _PAGE_REFERENCED
;
621 if (pgste_val(pgste
) & PGSTE_GC_BIT
)
622 key
|= _PAGE_CHANGED
;
625 pgste_set_unlock(ptep
, pgste
);
626 pte_unmap_unlock(ptep
, ptl
);
627 up_read(&mm
->mmap_sem
);
630 EXPORT_SYMBOL(get_guest_storage_key
);