]> git.proxmox.com Git - pve-kernel.git/blob - patches/kernel/0043-x86-mm-Factor-out-CR3-building-code.patch
0d69f2a4408a0fbc2e603891aa1e9ca1a9c620be
[pve-kernel.git] / patches / kernel / 0043-x86-mm-Factor-out-CR3-building-code.patch
1 From ddb5e7b381d37d0f8bca61f0b761ae5c3a2f5ee0 Mon Sep 17 00:00:00 2001
2 From: Andy Lutomirski <luto@kernel.org>
3 Date: Sun, 17 Sep 2017 09:03:48 -0700
4 Subject: [PATCH 043/241] x86/mm: Factor out CR3-building code
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 CVE-2017-5754
10
11 Current, the code that assembles a value to load into CR3 is
12 open-coded everywhere. Factor it out into helpers build_cr3() and
13 build_cr3_noflush().
14
15 This makes one semantic change: __get_current_cr3_fast() was wrong
16 on SME systems. No one noticed because the only caller is in the
17 VMX code, and there are no CPUs with both SME and VMX.
18
19 Signed-off-by: Andy Lutomirski <luto@kernel.org>
20 Cc: Borislav Petkov <bpetkov@suse.de>
21 Cc: Linus Torvalds <torvalds@linux-foundation.org>
22 Cc: Peter Zijlstra <peterz@infradead.org>
23 Cc: Thomas Gleixner <tglx@linutronix.de>
24 Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
25 Link: http://lkml.kernel.org/r/ce350cf11e93e2842d14d0b95b0199c7d881f527.1505663533.git.luto@kernel.org
26 Signed-off-by: Ingo Molnar <mingo@kernel.org>
27 (backported from commit 47061a24e2ee5bd8a40d473d47a5bd823fa0081f)
28 Signed-off-by: Andy Whitcroft <apw@canonical.com>
29 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
30 (cherry picked from commit 72be211bac7be521f128d419d63cae38ba60ace8)
31 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
32 ---
33 arch/x86/include/asm/mmu_context.h | 15 ++++++---
34 arch/x86/mm/tlb.c | 68 +++++++++++++++++++++++++++++++++++---
35 2 files changed, 75 insertions(+), 8 deletions(-)
36
37 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
38 index 7ae318c340d9..a999ba6b721f 100644
39 --- a/arch/x86/include/asm/mmu_context.h
40 +++ b/arch/x86/include/asm/mmu_context.h
41 @@ -286,6 +286,15 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
42 return __pkru_allows_pkey(vma_pkey(vma), write);
43 }
44
45 +static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
46 +{
47 + return __sme_pa(mm->pgd) | asid;
48 +}
49 +
50 +static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
51 +{
52 + return __sme_pa(mm->pgd) | asid | CR3_NOFLUSH;
53 +}
54
55 /*
56 * This can be used from process context to figure out what the value of
57 @@ -296,10 +305,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
58 */
59 static inline unsigned long __get_current_cr3_fast(void)
60 {
61 - unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
62 -
63 - if (static_cpu_has(X86_FEATURE_PCID))
64 - cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
65 + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
66 + this_cpu_read(cpu_tlbstate.loaded_mm_asid));
67
68 /* For now, be very restrictive about when this can be called. */
69 VM_WARN_ON(in_nmi() || preemptible());
70 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
71 index 57943b4d8f2e..440400316c8a 100644
72 --- a/arch/x86/mm/tlb.c
73 +++ b/arch/x86/mm/tlb.c
74 @@ -123,7 +123,23 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
75 * without going through leave_mm() / switch_mm_irqs_off() or that
76 * does something like write_cr3(read_cr3_pa()).
77 */
78 - VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
79 +#ifdef CONFIG_DEBUG_VM
80 + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
81 + /*
82 + * If we were to BUG here, we'd be very likely to kill
83 + * the system so hard that we don't see the call trace.
84 + * Try to recover instead by ignoring the error and doing
85 + * a global flush to minimize the chance of corruption.
86 + *
87 + * (This is far from being a fully correct recovery.
88 + * Architecturally, the CPU could prefetch something
89 + * back into an incorrect ASID slot and leave it there
90 + * to cause trouble down the road. It's better than
91 + * nothing, though.)
92 + */
93 + __flush_tlb_all();
94 + }
95 +#endif
96
97 if (real_prev == next) {
98 VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
99 @@ -153,7 +169,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
100 */
101 this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
102 next_tlb_gen);
103 - write_cr3(__pa(next->pgd) | prev_asid);
104 + write_cr3(build_cr3(next, prev_asid));
105
106 /*
107 * This gets called via leave_mm() in the idle path
108 @@ -204,12 +220,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
109 if (need_flush) {
110 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
111 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
112 - write_cr3(__pa(next->pgd) | new_asid);
113 + write_cr3(build_cr3(next, new_asid));
114 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
115 TLB_FLUSH_ALL);
116 } else {
117 /* The new ASID is already up to date. */
118 - write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
119 + write_cr3(build_cr3_noflush(next, new_asid));
120 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
121 }
122
123 @@ -221,6 +237,50 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
124 switch_ldt(real_prev, next);
125 }
126
127 +/*
128 + * Call this when reinitializing a CPU. It fixes the following potential
129 + * problems:
130 + *
131 + * - The ASID changed from what cpu_tlbstate thinks it is (most likely
132 + * because the CPU was taken down and came back up with CR3's PCID
133 + * bits clear. CPU hotplug can do this.
134 + *
135 + * - The TLB contains junk in slots corresponding to inactive ASIDs.
136 + *
137 + * - The CPU went so far out to lunch that it may have missed a TLB
138 + * flush.
139 + */
140 +void initialize_tlbstate_and_flush(void)
141 +{
142 + int i;
143 + struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
144 + u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
145 + unsigned long cr3 = __read_cr3();
146 +
147 + /* Assert that CR3 already references the right mm. */
148 + WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
149 +
150 + /*
151 + * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
152 + * doesn't work like other CR4 bits because it can only be set from
153 + * long mode.)
154 + */
155 + WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
156 + !(cr4_read_shadow() & X86_CR4_PCIDE));
157 +
158 + /* Force ASID 0 and force a TLB flush. */
159 + write_cr3(build_cr3(mm, 0));
160 +
161 + /* Reinitialize tlbstate. */
162 + this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
163 + this_cpu_write(cpu_tlbstate.next_asid, 1);
164 + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
165 + this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
166 +
167 + for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
168 + this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
169 +}
170 +
171 /*
172 * flush_tlb_func_common()'s memory ordering requirement is that any
173 * TLB fills that happen after we flush the TLB are ordered after we
174 --
175 2.14.2
176