]> git.proxmox.com Git - pve-kernel-jessie.git/blame - CVE-2016-2069-x86-mm-Add-barriers.patch
update DRBD to 9.0.1
[pve-kernel-jessie.git] / CVE-2016-2069-x86-mm-Add-barriers.patch
CommitLineData
25ad1249
WB
1From 71b3c126e61177eb693423f2e18a1914205b165e Mon Sep 17 00:00:00 2001
2From: Andy Lutomirski <luto@kernel.org>
3Date: Wed, 6 Jan 2016 12:21:01 -0800
4Subject: [PATCH] x86/mm: Add barriers and document switch_mm()-vs-flush
5 synchronization
6
7When switch_mm() activates a new PGD, it also sets a bit that
8tells other CPUs that the PGD is in use so that TLB flush IPIs
9will be sent. In order for that to work correctly, the bit
10needs to be visible prior to loading the PGD and therefore
11starting to fill the local TLB.
12
13Document all the barriers that make this work correctly and add
14a couple that were missing.
15
16Signed-off-by: Andy Lutomirski <luto@kernel.org>
17Cc: Andrew Morton <akpm@linux-foundation.org>
18Cc: Andy Lutomirski <luto@amacapital.net>
19Cc: Borislav Petkov <bp@alien8.de>
20Cc: Brian Gerst <brgerst@gmail.com>
21Cc: Dave Hansen <dave.hansen@linux.intel.com>
22Cc: Denys Vlasenko <dvlasenk@redhat.com>
23Cc: H. Peter Anvin <hpa@zytor.com>
24Cc: Linus Torvalds <torvalds@linux-foundation.org>
25Cc: Peter Zijlstra <peterz@infradead.org>
26Cc: Rik van Riel <riel@redhat.com>
27Cc: Thomas Gleixner <tglx@linutronix.de>
28Cc: linux-mm@kvack.org
29Cc: stable@vger.kernel.org
30Signed-off-by: Ingo Molnar <mingo@kernel.org>
31---
32 arch/x86/include/asm/mmu_context.h | 33 ++++++++++++++++++++++++++++++++-
33 arch/x86/mm/tlb.c | 29 ++++++++++++++++++++++++++---
34 2 files changed, 58 insertions(+), 4 deletions(-)
35
36diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
37index 379cd36..1edc9cd 100644
38--- a/arch/x86/include/asm/mmu_context.h
39+++ b/arch/x86/include/asm/mmu_context.h
40@@ -116,8 +116,34 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
41 #endif
42 cpumask_set_cpu(cpu, mm_cpumask(next));
43
44- /* Re-load page tables */
45+ /*
46+ * Re-load page tables.
47+ *
48+ * This logic has an ordering constraint:
49+ *
50+ * CPU 0: Write to a PTE for 'next'
51+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
52+ * CPU 1: set bit 1 in next's mm_cpumask
53+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
54+ *
55+ * We need to prevent an outcome in which CPU 1 observes
56+ * the new PTE value and CPU 0 observes bit 1 clear in
57+ * mm_cpumask. (If that occurs, then the IPI will never
58+ * be sent, and CPU 0's TLB will contain a stale entry.)
59+ *
60+ * The bad outcome can occur if either CPU's load is
61+ * reordered before that CPU's store, so both CPUs much
62+ * execute full barriers to prevent this from happening.
63+ *
64+ * Thus, switch_mm needs a full barrier between the
65+ * store to mm_cpumask and any operation that could load
66+ * from next->pgd. This barrier synchronizes with
67+ * remote TLB flushers. Fortunately, load_cr3 is
68+ * serializing and thus acts as a full barrier.
69+ *
70+ */
71 load_cr3(next->pgd);
72+
73 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
74
75 /* Stop flush ipis for the previous mm */
76@@ -156,10 +182,15 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
77 * schedule, protecting us from simultaneous changes.
78 */
79 cpumask_set_cpu(cpu, mm_cpumask(next));
80+
81 /*
82 * We were in lazy tlb mode and leave_mm disabled
83 * tlb flush IPI delivery. We must reload CR3
84 * to make sure to use no freed page tables.
85+ *
86+ * As above, this is a barrier that forces
87+ * TLB repopulation to be ordered after the
88+ * store to mm_cpumask.
89 */
90 load_cr3(next->pgd);
91 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
92diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
93index 8ddb5d0..8f4cc3d 100644
94--- a/arch/x86/mm/tlb.c
95+++ b/arch/x86/mm/tlb.c
96@@ -161,7 +161,10 @@ void flush_tlb_current_task(void)
97 preempt_disable();
98
99 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
100+
101+ /* This is an implicit full barrier that synchronizes with switch_mm. */
102 local_flush_tlb();
103+
104 trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
105 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
106 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
107@@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
108 unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
109
110 preempt_disable();
111- if (current->active_mm != mm)
112+ if (current->active_mm != mm) {
113+ /* Synchronize with switch_mm. */
114+ smp_mb();
115+
116 goto out;
117+ }
118
119 if (!current->mm) {
120 leave_mm(smp_processor_id());
121+
122+ /* Synchronize with switch_mm. */
123+ smp_mb();
124+
125 goto out;
126 }
127
128 if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
129 base_pages_to_flush = (end - start) >> PAGE_SHIFT;
130
131+ /*
132+ * Both branches below are implicit full barriers (MOV to CR or
133+ * INVLPG) that synchronize with switch_mm.
134+ */
135 if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
136 base_pages_to_flush = TLB_FLUSH_ALL;
137 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
138@@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
139 preempt_disable();
140
141 if (current->active_mm == mm) {
142- if (current->mm)
143+ if (current->mm) {
144+ /*
145+ * Implicit full barrier (INVLPG) that synchronizes
146+ * with switch_mm.
147+ */
148 __flush_tlb_one(start);
149- else
150+ } else {
151 leave_mm(smp_processor_id());
152+
153+ /* Synchronize with switch_mm. */
154+ smp_mb();
155+ }
156 }
157
158 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
159--
1602.1.4
161