]>
Commit | Line | Data |
---|---|---|
321d628a FG |
1 | From ddb5e7b381d37d0f8bca61f0b761ae5c3a2f5ee0 Mon Sep 17 00:00:00 2001 |
2 | From: Andy Lutomirski <luto@kernel.org> | |
3 | Date: Sun, 17 Sep 2017 09:03:48 -0700 | |
4 | Subject: [PATCH 043/231] x86/mm: Factor out CR3-building code | |
5 | MIME-Version: 1.0 | |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | Current, the code that assembles a value to load into CR3 is | |
12 | open-coded everywhere. Factor it out into helpers build_cr3() and | |
13 | build_cr3_noflush(). | |
14 | ||
15 | This makes one semantic change: __get_current_cr3_fast() was wrong | |
16 | on SME systems. No one noticed because the only caller is in the | |
17 | VMX code, and there are no CPUs with both SME and VMX. | |
18 | ||
19 | Signed-off-by: Andy Lutomirski <luto@kernel.org> | |
20 | Cc: Borislav Petkov <bpetkov@suse.de> | |
21 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
22 | Cc: Peter Zijlstra <peterz@infradead.org> | |
23 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
24 | Cc: Tom Lendacky <Thomas.Lendacky@amd.com> | |
25 | Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org | |
26 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
27 | (backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f) | |
28 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
29 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
30 | (cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8) | |
31 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
32 | --- | |
33 | arch/x86/include/asm/mmu_context.h | 15 ++++++--- | |
34 | arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++--- | |
35 | 2 files changed, 75 insertions(+), 8 deletions(-) | |
36 | ||
37 | diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h | |
38 | index 7ae318c340d9..a999ba6b721f 100644 | |
39 | --- a/arch/x86/include/asm/mmu_context.h | |
40 | +++ b/arch/x86/include/asm/mmu_context.h | |
41 | @@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, | |
42 | return __pkru_allows_pkey(vma_pkey(vma), write); | |
43 | } | |
44 | ||
45 | +static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) | |
46 | +{ | |
47 | + return __sme_pa(mm->pgd) | asid; | |
48 | +} | |
49 | + | |
50 | +static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) | |
51 | +{ | |
52 | + return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH; | |
53 | +} | |
54 | ||
55 | /* | |
56 | * This can be used from process context to figure out what the value of | |
57 | @@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, | |
58 | */ | |
59 | static inline unsigned long __get_current_cr3_fast(void) | |
60 | { | |
61 | - unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); | |
62 | - | |
63 | - if (static_cpu_has(X86_FEATURE_PCID)) | |
64 | - cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid); | |
65 | + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), | |
66 | + this_cpu_read(cpu_tlbstate.loaded_mm_asid)); | |
67 | ||
68 | /* For now, be very restrictive about when this can be called. */ | |
69 | VM_WARN_ON(in_nmi() || preemptible()); | |
70 | diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c | |
71 | index 57943b4d8f2e..440400316c8a 100644 | |
72 | --- a/arch/x86/mm/tlb.c | |
73 | +++ b/arch/x86/mm/tlb.c | |
74 | @@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |
75 | * without going through leave_mm() / switch_mm_irqs_off() or that | |
76 | * does something like write_cr3(read_cr3_pa()). | |
77 | */ | |
78 | - VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid)); | |
79 | +#ifdef CONFIG_DEBUG_VM | |
80 | + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { | |
81 | + /* | |
82 | + * If we were to BUG here, we'd be very likely to kill | |
83 | + * the system so hard that we don't see the call trace. | |
84 | + * Try to recover instead by ignoring the error and doing | |
85 | + * a global flush to minimize the chance of corruption. | |
86 | + * | |
87 | + * (This is far from being a fully correct recovery. | |
88 | + * Architecturally, the CPU could prefetch something | |
89 | + * back into an incorrect ASID slot and leave it there | |
90 | + * to cause trouble down the road. It's better than | |
91 | + * nothing, though.) | |
92 | + */ | |
93 | + __flush_tlb_all(); | |
94 | + } | |
95 | +#endif | |
96 | ||
97 | if (real_prev == next) { | |
98 | VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != | |
99 | @@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |
100 | */ | |
101 | this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen, | |
102 | next_tlb_gen); | |
103 | - write_cr3(__pa(next->pgd) | prev_asid); | |
104 | + write_cr3(build_cr3(next, prev_asid)); | |
105 | ||
106 | /* | |
107 | * This gets called via leave_mm() in the idle path | |
108 | @@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |
109 | if (need_flush) { | |
110 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); | |
111 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); | |
112 | - write_cr3(__pa(next->pgd) | new_asid); | |
113 | + write_cr3(build_cr3(next, new_asid)); | |
114 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, | |
115 | TLB_FLUSH_ALL); | |
116 | } else { | |
117 | /* The new ASID is already up to date. */ | |
118 | - write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH); | |
119 | + write_cr3(build_cr3_noflush(next, new_asid)); | |
120 | trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); | |
121 | } | |
122 | ||
123 | @@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |
124 | switch_ldt(real_prev, next); | |
125 | } | |
126 | ||
127 | +/* | |
128 | + * Call this when reinitializing a CPU. It fixes the following potential | |
129 | + * problems: | |
130 | + * | |
131 | + * - The ASID changed from what cpu_tlbstate thinks it is (most likely | |
132 | + * because the CPU was taken down and came back up with CR3's PCID | |
133 | + * bits clear. CPU hotplug can do this. | |
134 | + * | |
135 | + * - The TLB contains junk in slots corresponding to inactive ASIDs. | |
136 | + * | |
137 | + * - The CPU went so far out to lunch that it may have missed a TLB | |
138 | + * flush. | |
139 | + */ | |
140 | +void initialize_tlbstate_and_flush(void) | |
141 | +{ | |
142 | + int i; | |
143 | + struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm); | |
144 | + u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen); | |
145 | + unsigned long cr3 = __read_cr3(); | |
146 | + | |
147 | + /* Assert that CR3 already references the right mm. */ | |
148 | + WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); | |
149 | + | |
150 | + /* | |
151 | + * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization | |
152 | + * doesn't work like other CR4 bits because it can only be set from | |
153 | + * long mode.) | |
154 | + */ | |
155 | + WARN_ON(boot_cpu_has(X86_FEATURE_PCID) && | |
156 | + !(cr4_read_shadow() & X86_CR4_PCIDE)); | |
157 | + | |
158 | + /* Force ASID 0 and force a TLB flush. */ | |
159 | + write_cr3(build_cr3(mm, 0)); | |
160 | + | |
161 | + /* Reinitialize tlbstate. */ | |
162 | + this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); | |
163 | + this_cpu_write(cpu_tlbstate.next_asid, 1); | |
164 | + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); | |
165 | + this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen); | |
166 | + | |
167 | + for (i = 1; i < TLB_NR_DYN_ASIDS; i++) | |
168 | + this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0); | |
169 | +} | |
170 | + | |
171 | /* | |
172 | * flush_tlb_func_common()'s memory ordering requirement is that any | |
173 | * TLB fills that happen after we flush the TLB are ordered after we | |
174 | -- | |
175 | 2.14.2 | |
176 |