1 From 1cb923a3733ac738f0d96fe4738bdf159db86cfd Mon Sep 17 00:00:00 2001
2 From: Dave Hansen <dave.hansen@linux.intel.com>
3 Date: Mon, 4 Dec 2017 15:07:37 +0100
4 Subject: [PATCH 192/241] x86/mm/pti: Add mapping helper functions
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
11 Add the pagetable helper functions do manage the separate user space page
14 [ tglx: Split out from the big combo kaiser patch. Folded Andys
15 simplification and made it out of line as Boris suggested ]
17 Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
18 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
19 Cc: Andy Lutomirski <luto@kernel.org>
20 Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
21 Cc: Borislav Petkov <bp@alien8.de>
22 Cc: Brian Gerst <brgerst@gmail.com>
23 Cc: David Laight <David.Laight@aculab.com>
24 Cc: Denys Vlasenko <dvlasenk@redhat.com>
25 Cc: Eduardo Valentin <eduval@amazon.com>
26 Cc: Greg KH <gregkh@linuxfoundation.org>
27 Cc: H. Peter Anvin <hpa@zytor.com>
28 Cc: Josh Poimboeuf <jpoimboe@redhat.com>
29 Cc: Juergen Gross <jgross@suse.com>
30 Cc: Linus Torvalds <torvalds@linux-foundation.org>
31 Cc: Peter Zijlstra <peterz@infradead.org>
32 Cc: Will Deacon <will.deacon@arm.com>
33 Cc: aliguori@amazon.com
34 Cc: daniel.gruss@iaik.tugraz.at
36 Cc: keescook@google.com
37 Cc: linux-kernel@vger.kernel.org
38 Signed-off-by: Ingo Molnar <mingo@kernel.org>
39 (cherry picked from commit 61e9b3671007a5da8127955a1a3bda7e0d5f42e8)
40 Signed-off-by: Andy Whitcroft <apw@canonical.com>
41 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
42 (cherry picked from commit fb45c59197f3134db6e223bb4fec0529774c07e1)
43 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
45 arch/x86/include/asm/pgtable.h | 6 ++-
46 arch/x86/include/asm/pgtable_64.h | 92 +++++++++++++++++++++++++++++++++++++++
47 arch/x86/mm/pti.c | 41 +++++++++++++++++
48 3 files changed, 138 insertions(+), 1 deletion(-)
50 diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
51 index bb8e9ea7deb4..abbb47c75467 100644
52 --- a/arch/x86/include/asm/pgtable.h
53 +++ b/arch/x86/include/asm/pgtable.h
54 @@ -894,7 +894,11 @@ static inline int pgd_none(pgd_t pgd)
55 * pgd_offset() returns a (pgd_t *)
56 * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
58 -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
59 +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
61 + * a shortcut to get a pgd_t in a given mm
63 +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
65 * a shortcut which implies the use of the kernel's pgd, instead
67 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
68 index 2160c1fee920..1ac15b03cf30 100644
69 --- a/arch/x86/include/asm/pgtable_64.h
70 +++ b/arch/x86/include/asm/pgtable_64.h
71 @@ -130,9 +130,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
75 +#ifdef CONFIG_PAGE_TABLE_ISOLATION
77 + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
78 + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
79 + * the user one is in the last 4k. To switch between them, you
80 + * just need to flip the 12th bit in their addresses.
82 +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
85 + * This generates better code than the inline assembly in
88 +static inline void *ptr_set_bit(void *ptr, int bit)
90 + unsigned long __ptr = (unsigned long)ptr;
93 + return (void *)__ptr;
95 +static inline void *ptr_clear_bit(void *ptr, int bit)
97 + unsigned long __ptr = (unsigned long)ptr;
100 + return (void *)__ptr;
103 +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
105 + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
108 +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
110 + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
113 +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
115 + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
118 +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
120 + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
122 +#endif /* CONFIG_PAGE_TABLE_ISOLATION */
125 + * Page table pages are page-aligned. The lower half of the top
126 + * level is used for userspace and the top half for the kernel.
128 + * Returns true for parts of the PGD that map userspace and
129 + * false for the parts that map the kernel.
131 +static inline bool pgdp_maps_userspace(void *__ptr)
133 + unsigned long ptr = (unsigned long)__ptr;
135 + return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
138 +#ifdef CONFIG_PAGE_TABLE_ISOLATION
139 +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
142 + * Take a PGD location (pgdp) and a pgd value that needs to be set there.
143 + * Populates the user and returns the resulting PGD that must be set in
144 + * the kernel copy of the page tables.
146 +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
148 + if (!static_cpu_has(X86_FEATURE_PTI))
150 + return __pti_set_user_pgd(pgdp, pgd);
153 +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
159 static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
161 +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
162 + p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
168 static inline void native_p4d_clear(p4d_t *p4d)
169 @@ -146,7 +234,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
171 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
173 +#ifdef CONFIG_PAGE_TABLE_ISOLATION
174 + *pgdp = pti_set_user_pgd(pgdp, pgd);
180 static inline void native_pgd_clear(pgd_t *pgd)
181 diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
182 index a13f6b109865..69a983365392 100644
183 --- a/arch/x86/mm/pti.c
184 +++ b/arch/x86/mm/pti.c
185 @@ -96,6 +96,47 @@ void __init pti_check_boottime_disable(void)
186 setup_force_cpu_cap(X86_FEATURE_PTI);
189 +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
192 + * Changes to the high (kernel) portion of the kernelmode page
193 + * tables are not automatically propagated to the usermode tables.
195 + * Users should keep in mind that, unlike the kernelmode tables,
196 + * there is no vmalloc_fault equivalent for the usermode tables.
197 + * Top-level entries added to init_mm's usermode pgd after boot
198 + * will not be automatically propagated to other mms.
200 + if (!pgdp_maps_userspace(pgdp))
204 + * The user page tables get the full PGD, accessible from
207 + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
210 + * If this is normal user memory, make it NX in the kernel
211 + * pagetables so that, if we somehow screw up and return to
212 + * usermode with the kernel CR3 loaded, we'll get a page fault
213 + * instead of allowing user code to execute with the wrong CR3.
215 + * As exceptions, we don't set NX if:
216 + * - _PAGE_USER is not set. This could be an executable
217 + * EFI runtime mapping or something similar, and the kernel
218 + * may execute from it
219 + * - we don't have NX support
220 + * - we're clearing the PGD (i.e. the new pgd is not present).
222 + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
223 + (__supported_pte_mask & _PAGE_NX))
224 + pgd.pgd |= _PAGE_NX;
226 + /* return the copy of the PGD we want the kernel to use: */
231 * Initialize kernel page table isolation