]>
Commit | Line | Data |
---|---|---|
59d5af67 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
321d628a FG |
2 | From: Andy Lutomirski <luto@kernel.org> |
3 | Date: Thu, 29 Jun 2017 08:53:17 -0700 | |
59d5af67 | 4 | Subject: [PATCH] x86/mm: Rework lazy TLB mode and TLB freshness tracking |
321d628a FG |
5 | MIME-Version: 1.0 |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | x86's lazy TLB mode used to be fairly weak -- it would switch to | |
12 | init_mm the first time it tried to flush a lazy TLB. This meant an | |
13 | unnecessary CR3 write and, if the flush was remote, an unnecessary | |
14 | IPI. | |
15 | ||
16 | Rewrite it entirely. When we enter lazy mode, we simply remove the | |
17 | CPU from mm_cpumask. This means that we need a way to figure out | |
18 | whether we've missed a flush when we switch back out of lazy mode. | |
19 | I use the tlb_gen machinery to track whether a context is up to | |
20 | date. | |
21 | ||
22 | Note to reviewers: this patch, my itself, looks a bit odd. I'm | |
23 | using an array of length 1 containing (ctx_id, tlb_gen) rather than | |
24 | just storing tlb_gen, and making it at array isn't necessary yet. | |
25 | I'm doing this because the next few patches add PCID support, and, | |
26 | with PCID, we need ctx_id, and the array will end up with a length | |
27 | greater than 1. Making it an array now means that there will be | |
28 | less churn and therefore less stress on your eyeballs. | |
29 | ||
30 | NB: This is dubious but, AFAICT, still correct on Xen and UV. | |
31 | xen_exit_mmap() uses mm_cpumask() for nefarious purposes and this | |
32 | patch changes the way that mm_cpumask() works. This should be okay, | |
33 | since Xen *also* iterates all online CPUs to find all the CPUs it | |
34 | needs to twiddle. | |
35 | ||
36 | The UV tlbflush code is rather dated and should be changed. | |
37 | ||
38 | Here are some benchmark results, done on a Skylake laptop at 2.3 GHz | |
39 | (turbo off, intel_pstate requesting max performance) under KVM with | |
40 | the guest using idle=poll (to avoid artifacts when bouncing between | |
41 | CPUs). I haven't done any real statistics here -- I just ran them | |
42 | in a loop and picked the fastest results that didn't look like | |
43 | outliers. Unpatched means commit a4eb8b993554, so all the | |
44 | bookkeeping overhead is gone. | |
45 | ||
46 | MADV_DONTNEED; touch the page; switch CPUs using sched_setaffinity. In | |
47 | an unpatched kernel, MADV_DONTNEED will send an IPI to the previous CPU. | |
48 | This is intended to be a nearly worst-case test. | |
49 | ||
50 | patched: 13.4µs | |
51 | unpatched: 21.6µs | |
52 | ||
53 | Vitaly's pthread_mmap microbenchmark with 8 threads (on four cores), | |
54 | nrounds = 100, 256M data | |
55 | ||
56 | patched: 1.1 seconds or so | |
57 | unpatched: 1.9 seconds or so | |
58 | ||
59 | The sleepup on Vitaly's test appearss to be because it spends a lot | |
60 | of time blocked on mmap_sem, and this patch avoids sending IPIs to | |
61 | blocked CPUs. | |
62 | ||
63 | Signed-off-by: Andy Lutomirski <luto@kernel.org> | |
64 | Reviewed-by: Nadav Amit <nadav.amit@gmail.com> | |
65 | Reviewed-by: Thomas Gleixner <tglx@linutronix.de> | |
66 | Cc: Andrew Banman <abanman@sgi.com> | |
67 | Cc: Andrew Morton <akpm@linux-foundation.org> | |
68 | Cc: Arjan van de Ven <arjan@linux.intel.com> | |
69 | Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> | |
70 | Cc: Borislav Petkov <bp@alien8.de> | |
71 | Cc: Dave Hansen <dave.hansen@intel.com> | |
72 | Cc: Dimitri Sivanich <sivanich@sgi.com> | |
73 | Cc: Juergen Gross <jgross@suse.com> | |
74 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
75 | Cc: Mel Gorman <mgorman@suse.de> | |
76 | Cc: Mike Travis <travis@sgi.com> | |
77 | Cc: Peter Zijlstra <peterz@infradead.org> | |
78 | Cc: Rik van Riel <riel@redhat.com> | |
79 | Cc: linux-mm@kvack.org | |
80 | Link: http://lkml.kernel.org/r/ddf2c92962339f4ba39d8fc41b853936ec0b44f1.1498751203.git.luto@kernel.org | |
81 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
82 | (cherry picked from commit 94b1b03b519b81c494900cb112aa00ed205cc2d9) | |
83 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
84 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
85 | (cherry picked from commit b381b7ae452f2bc6384507a897247be7c93a71cc) | |
86 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
87 | --- | |
88 | arch/x86/include/asm/mmu_context.h | 6 +- | |
89 | arch/x86/include/asm/tlbflush.h | 4 - | |
90 | arch/x86/mm/init.c | 1 - | |
91 | arch/x86/mm/tlb.c | 197 ++++++++++++++++++++++--------------- | |
92 | arch/x86/xen/mmu_pv.c | 5 +- | |
93 | 5 files changed, 124 insertions(+), 89 deletions(-) | |
94 | ||
95 | diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h | |
96 | index 6c05679c715b..d6b055b328f2 100644 | |
97 | --- a/arch/x86/include/asm/mmu_context.h | |
98 | +++ b/arch/x86/include/asm/mmu_context.h | |
99 | @@ -128,8 +128,10 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) | |
100 | ||
101 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | |
102 | { | |
103 | - if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) | |
104 | - this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); | |
105 | + int cpu = smp_processor_id(); | |
106 | + | |
107 | + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) | |
108 | + cpumask_clear_cpu(cpu, mm_cpumask(mm)); | |
109 | } | |
110 | ||
111 | static inline int init_new_context(struct task_struct *tsk, | |
112 | diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h | |
113 | index 3a167c214560..6397275008db 100644 | |
114 | --- a/arch/x86/include/asm/tlbflush.h | |
115 | +++ b/arch/x86/include/asm/tlbflush.h | |
116 | @@ -95,7 +95,6 @@ struct tlb_state { | |
117 | * mode even if we've already switched back to swapper_pg_dir. | |
118 | */ | |
119 | struct mm_struct *loaded_mm; | |
120 | - int state; | |
121 | ||
122 | /* | |
123 | * Access to this CR4 shadow and to H/W CR4 is protected by | |
124 | @@ -318,9 +317,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) | |
125 | void native_flush_tlb_others(const struct cpumask *cpumask, | |
126 | const struct flush_tlb_info *info); | |
127 | ||
128 | -#define TLBSTATE_OK 1 | |
129 | -#define TLBSTATE_LAZY 2 | |
130 | - | |
131 | static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, | |
132 | struct mm_struct *mm) | |
133 | { | |
134 | diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c | |
135 | index df2624b091a7..c86dc071bb10 100644 | |
136 | --- a/arch/x86/mm/init.c | |
137 | +++ b/arch/x86/mm/init.c | |
138 | @@ -849,7 +849,6 @@ void __init zone_sizes_init(void) | |
139 | ||
140 | DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { | |
141 | .loaded_mm = &init_mm, | |
142 | - .state = 0, | |
143 | .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ | |
144 | }; | |
145 | EXPORT_SYMBOL_GPL(cpu_tlbstate); | |
146 | diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c | |
147 | index 4e5a5ddb9e4d..0982c997d36f 100644 | |
148 | --- a/arch/x86/mm/tlb.c | |
149 | +++ b/arch/x86/mm/tlb.c | |
150 | @@ -45,8 +45,8 @@ void leave_mm(int cpu) | |
151 | if (loaded_mm == &init_mm) | |
152 | return; | |
153 | ||
154 | - if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) | |
155 | - BUG(); | |
156 | + /* Warn if we're not lazy. */ | |
157 | + WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))); | |
158 | ||
159 | switch_mm(NULL, &init_mm, NULL); | |
160 | } | |
161 | @@ -65,94 +65,117 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, | |
162 | void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |
163 | struct task_struct *tsk) | |
164 | { | |
165 | - unsigned cpu = smp_processor_id(); | |
166 | struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); | |
167 | + unsigned cpu = smp_processor_id(); | |
168 | + u64 next_tlb_gen; | |
169 | ||
170 | /* | |
171 | - * NB: The scheduler will call us with prev == next when | |
172 | - * switching from lazy TLB mode to normal mode if active_mm | |
173 | - * isn't changing. When this happens, there is no guarantee | |
174 | - * that CR3 (and hence cpu_tlbstate.loaded_mm) matches next. | |
175 | + * NB: The scheduler will call us with prev == next when switching | |
176 | + * from lazy TLB mode to normal mode if active_mm isn't changing. | |
177 | + * When this happens, we don't assume that CR3 (and hence | |
178 | + * cpu_tlbstate.loaded_mm) matches next. | |
179 | * | |
180 | * NB: leave_mm() calls us with prev == NULL and tsk == NULL. | |
181 | */ | |
182 | ||
183 | - this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); | |
184 | + /* We don't want flush_tlb_func_* to run concurrently with us. */ | |
185 | + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) | |
186 | + WARN_ON_ONCE(!irqs_disabled()); | |
187 | + | |
188 | + /* | |
189 | + * Verify that CR3 is what we think it is. This will catch | |
190 | + * hypothetical buggy code that directly switches to swapper_pg_dir | |
191 | + * without going through leave_mm() / switch_mm_irqs_off(). | |
192 | + */ | |
193 | + VM_BUG_ON(read_cr3_pa() != __pa(real_prev->pgd)); | |
194 | ||
195 | if (real_prev == next) { | |
196 | - /* | |
197 | - * There's nothing to do: we always keep the per-mm control | |
198 | - * regs in sync with cpu_tlbstate.loaded_mm. Just | |
199 | - * sanity-check mm_cpumask. | |
200 | - */ | |
201 | - if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next)))) | |
202 | - cpumask_set_cpu(cpu, mm_cpumask(next)); | |
203 | - return; | |
204 | - } | |
205 | + VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) != | |
206 | + next->context.ctx_id); | |
207 | + | |
208 | + if (cpumask_test_cpu(cpu, mm_cpumask(next))) { | |
209 | + /* | |
210 | + * There's nothing to do: we weren't lazy, and we | |
211 | + * aren't changing our mm. We don't need to flush | |
212 | + * anything, nor do we need to update CR3, CR4, or | |
213 | + * LDTR. | |
214 | + */ | |
215 | + return; | |
216 | + } | |
217 | + | |
218 | + /* Resume remote flushes and then read tlb_gen. */ | |
219 | + cpumask_set_cpu(cpu, mm_cpumask(next)); | |
220 | + next_tlb_gen = atomic64_read(&next->context.tlb_gen); | |
221 | + | |
222 | + if (this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen) < next_tlb_gen) { | |
223 | + /* | |
224 | + * Ideally, we'd have a flush_tlb() variant that | |
225 | + * takes the known CR3 value as input. This would | |
226 | + * be faster on Xen PV and on hypothetical CPUs | |
227 | + * on which INVPCID is fast. | |
228 | + */ | |
229 | + this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, | |
230 | + next_tlb_gen); | |
231 | + write_cr3(__pa(next->pgd)); | |
232 | + | |
233 | + /* | |
234 | + * This gets called via leave_mm() in the idle path | |
235 | + * where RCU functions differently. Tracing normally | |
236 | + * uses RCU, so we have to call the tracepoint | |
237 | + * specially here. | |
238 | + */ | |
239 | + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, | |
240 | + TLB_FLUSH_ALL); | |
241 | + } | |
242 | ||
243 | - if (IS_ENABLED(CONFIG_VMAP_STACK)) { | |
244 | /* | |
245 | - * If our current stack is in vmalloc space and isn't | |
246 | - * mapped in the new pgd, we'll double-fault. Forcibly | |
247 | - * map it. | |
248 | + * We just exited lazy mode, which means that CR4 and/or LDTR | |
249 | + * may be stale. (Changes to the required CR4 and LDTR states | |
250 | + * are not reflected in tlb_gen.) | |
251 | */ | |
252 | - unsigned int stack_pgd_index = pgd_index(current_stack_pointer()); | |
253 | - | |
254 | - pgd_t *pgd = next->pgd + stack_pgd_index; | |
255 | - | |
256 | - if (unlikely(pgd_none(*pgd))) | |
257 | - set_pgd(pgd, init_mm.pgd[stack_pgd_index]); | |
258 | - } | |
259 | + } else { | |
260 | + VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) == | |
261 | + next->context.ctx_id); | |
262 | + | |
263 | + if (IS_ENABLED(CONFIG_VMAP_STACK)) { | |
264 | + /* | |
265 | + * If our current stack is in vmalloc space and isn't | |
266 | + * mapped in the new pgd, we'll double-fault. Forcibly | |
267 | + * map it. | |
268 | + */ | |
269 | + unsigned int index = pgd_index(current_stack_pointer()); | |
270 | + pgd_t *pgd = next->pgd + index; | |
271 | + | |
272 | + if (unlikely(pgd_none(*pgd))) | |
273 | + set_pgd(pgd, init_mm.pgd[index]); | |
274 | + } | |
275 | ||
276 | - this_cpu_write(cpu_tlbstate.loaded_mm, next); | |
277 | - this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id); | |
278 | - this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen)); | |
279 | + /* Stop remote flushes for the previous mm */ | |
280 | + if (cpumask_test_cpu(cpu, mm_cpumask(real_prev))) | |
281 | + cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); | |
282 | ||
283 | - WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next))); | |
284 | - cpumask_set_cpu(cpu, mm_cpumask(next)); | |
285 | + VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next))); | |
286 | ||
287 | - /* | |
288 | - * Re-load page tables. | |
289 | - * | |
290 | - * This logic has an ordering constraint: | |
291 | - * | |
292 | - * CPU 0: Write to a PTE for 'next' | |
293 | - * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. | |
294 | - * CPU 1: set bit 1 in next's mm_cpumask | |
295 | - * CPU 1: load from the PTE that CPU 0 writes (implicit) | |
296 | - * | |
297 | - * We need to prevent an outcome in which CPU 1 observes | |
298 | - * the new PTE value and CPU 0 observes bit 1 clear in | |
299 | - * mm_cpumask. (If that occurs, then the IPI will never | |
300 | - * be sent, and CPU 0's TLB will contain a stale entry.) | |
301 | - * | |
302 | - * The bad outcome can occur if either CPU's load is | |
303 | - * reordered before that CPU's store, so both CPUs must | |
304 | - * execute full barriers to prevent this from happening. | |
305 | - * | |
306 | - * Thus, switch_mm needs a full barrier between the | |
307 | - * store to mm_cpumask and any operation that could load | |
308 | - * from next->pgd. TLB fills are special and can happen | |
309 | - * due to instruction fetches or for no reason at all, | |
310 | - * and neither LOCK nor MFENCE orders them. | |
311 | - * Fortunately, load_cr3() is serializing and gives the | |
312 | - * ordering guarantee we need. | |
313 | - */ | |
314 | - load_cr3(next->pgd); | |
315 | + /* | |
316 | + * Start remote flushes and then read tlb_gen. | |
317 | + */ | |
318 | + cpumask_set_cpu(cpu, mm_cpumask(next)); | |
319 | + next_tlb_gen = atomic64_read(&next->context.tlb_gen); | |
320 | ||
321 | - /* | |
322 | - * This gets called via leave_mm() in the idle path where RCU | |
323 | - * functions differently. Tracing normally uses RCU, so we have to | |
324 | - * call the tracepoint specially here. | |
325 | - */ | |
326 | - trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); | |
327 | + this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id); | |
328 | + this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, next_tlb_gen); | |
329 | + this_cpu_write(cpu_tlbstate.loaded_mm, next); | |
330 | + write_cr3(__pa(next->pgd)); | |
331 | ||
332 | - /* Stop flush ipis for the previous mm */ | |
333 | - WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) && | |
334 | - real_prev != &init_mm); | |
335 | - cpumask_clear_cpu(cpu, mm_cpumask(real_prev)); | |
336 | + /* | |
337 | + * This gets called via leave_mm() in the idle path where RCU | |
338 | + * functions differently. Tracing normally uses RCU, so we | |
339 | + * have to call the tracepoint specially here. | |
340 | + */ | |
341 | + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, | |
342 | + TLB_FLUSH_ALL); | |
343 | + } | |
344 | ||
345 | - /* Load per-mm CR4 and LDTR state */ | |
346 | load_mm_cr4(next); | |
347 | switch_ldt(real_prev, next); | |
348 | } | |
349 | @@ -186,13 +209,13 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, | |
350 | VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) != | |
351 | loaded_mm->context.ctx_id); | |
352 | ||
353 | - if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) { | |
354 | + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) { | |
355 | /* | |
356 | - * leave_mm() is adequate to handle any type of flush, and | |
357 | - * we would prefer not to receive further IPIs. leave_mm() | |
358 | - * clears this CPU's bit in mm_cpumask(). | |
359 | + * We're in lazy mode -- don't flush. We can get here on | |
360 | + * remote flushes due to races and on local flushes if a | |
361 | + * kernel thread coincidentally flushes the mm it's lazily | |
362 | + * still using. | |
363 | */ | |
364 | - leave_mm(smp_processor_id()); | |
365 | return; | |
366 | } | |
367 | ||
368 | @@ -203,6 +226,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f, | |
369 | * be handled can catch us all the way up, leaving no work for | |
370 | * the second flush. | |
371 | */ | |
372 | + trace_tlb_flush(reason, 0); | |
373 | return; | |
374 | } | |
375 | ||
376 | @@ -304,6 +328,21 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |
377 | (info->end - info->start) >> PAGE_SHIFT); | |
378 | ||
379 | if (is_uv_system()) { | |
380 | + /* | |
381 | + * This whole special case is confused. UV has a "Broadcast | |
382 | + * Assist Unit", which seems to be a fancy way to send IPIs. | |
383 | + * Back when x86 used an explicit TLB flush IPI, UV was | |
384 | + * optimized to use its own mechanism. These days, x86 uses | |
385 | + * smp_call_function_many(), but UV still uses a manual IPI, | |
386 | + * and that IPI's action is out of date -- it does a manual | |
387 | + * flush instead of calling flush_tlb_func_remote(). This | |
388 | + * means that the percpu tlb_gen variables won't be updated | |
389 | + * and we'll do pointless flushes on future context switches. | |
390 | + * | |
391 | + * Rather than hooking native_flush_tlb_others() here, I think | |
392 | + * that UV should be updated so that smp_call_function_many(), | |
393 | + * etc, are optimal on UV. | |
394 | + */ | |
395 | unsigned int cpu; | |
396 | ||
397 | cpu = smp_processor_id(); | |
398 | @@ -363,6 +402,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |
399 | ||
400 | if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) | |
401 | flush_tlb_others(mm_cpumask(mm), &info); | |
402 | + | |
403 | put_cpu(); | |
404 | } | |
405 | ||
406 | @@ -371,8 +411,6 @@ static void do_flush_tlb_all(void *info) | |
407 | { | |
408 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | |
409 | __flush_tlb_all(); | |
410 | - if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) | |
411 | - leave_mm(smp_processor_id()); | |
412 | } | |
413 | ||
414 | void flush_tlb_all(void) | |
415 | @@ -425,6 +463,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) | |
416 | ||
417 | if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) | |
418 | flush_tlb_others(&batch->cpumask, &info); | |
419 | + | |
420 | cpumask_clear(&batch->cpumask); | |
421 | ||
422 | put_cpu(); | |
423 | diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c | |
424 | index 5f61b7e2e6b2..ba76f3ce997f 100644 | |
425 | --- a/arch/x86/xen/mmu_pv.c | |
426 | +++ b/arch/x86/xen/mmu_pv.c | |
427 | @@ -1005,14 +1005,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |
428 | /* Get the "official" set of cpus referring to our pagetable. */ | |
429 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { | |
430 | for_each_online_cpu(cpu) { | |
431 | - if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) | |
432 | - && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) | |
433 | + if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) | |
434 | continue; | |
435 | smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1); | |
436 | } | |
437 | return; | |
438 | } | |
439 | - cpumask_copy(mask, mm_cpumask(mm)); | |
440 | ||
441 | /* | |
442 | * It's possible that a vcpu may have a stale reference to our | |
443 | @@ -1021,6 +1019,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |
444 | * look at its actual current cr3 value, and force it to flush | |
445 | * if needed. | |
446 | */ | |
447 | + cpumask_clear(mask); | |
448 | for_each_online_cpu(cpu) { | |
449 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) | |
450 | cpumask_set_cpu(cpu, mask); | |
451 | -- | |
452 | 2.14.2 | |
453 |