]>
Commit | Line | Data |
---|---|---|
59d5af67 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
321d628a FG |
2 | From: Andy Lutomirski <luto@kernel.org> |
3 | Date: Sat, 4 Nov 2017 04:16:12 -0700 | |
59d5af67 | 4 | Subject: [PATCH] Revert "x86/mm: Stop calling leave_mm() in idle code" |
321d628a FG |
5 | MIME-Version: 1.0 |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | This reverts commit 43858b4f25cf0adc5c2ca9cf5ce5fdf2532941e5. | |
12 | ||
13 | The reason I removed the leave_mm() calls in question is because the | |
14 | heuristic wasn't needed after that patch. With the original version | |
15 | of my PCID series, we never flushed a "lazy cpu" (i.e. a CPU running | |
16 | kernel thread) due a flush on the loaded mm. | |
17 | ||
18 | Unfortunately, that caused architectural issues, so now I've | |
19 | reinstated these flushes on non-PCID systems in: | |
20 | ||
21 | commit b956575bed91 ("x86/mm: Flush more aggressively in lazy TLB mode"). | |
22 | ||
23 | That, in turn, gives us a power management and occasionally | |
24 | performance regression as compared to old kernels: a process that | |
25 | goes into a deep idle state on a given CPU and gets its mm flushed | |
26 | due to activity on a different CPU will wake the idle CPU. | |
27 | ||
28 | Reinstate the old ugly heuristic: if a CPU goes into ACPI C3 or an | |
29 | intel_idle state that is likely to cause a TLB flush gets its mm | |
30 | switched to init_mm before going idle. | |
31 | ||
32 | FWIW, this heuristic is lousy. Whether we should change CR3 before | |
33 | idle isn't a good hint except insofar as the performance hit is a bit | |
34 | lower if the TLB is getting flushed by the idle code anyway. What we | |
35 | really want to know is whether we anticipate being idle long enough | |
36 | that the mm is likely to be flushed before we wake up. This is more a | |
37 | matter of the expected latency than the idle state that gets chosen. | |
38 | This heuristic also completely fails on systems that don't know | |
39 | whether the TLB will be flushed (e.g. AMD systems?). OTOH it may be a | |
40 | bit obsolete anyway -- PCID systems don't presently benefit from this | |
41 | heuristic at all. | |
42 | ||
43 | We also shouldn't do this callback from innermost bit of the idle code | |
44 | due to the RCU nastiness it causes. All the information need is | |
45 | available before rcu_idle_enter() needs to happen. | |
46 | ||
47 | Signed-off-by: Andy Lutomirski <luto@kernel.org> | |
48 | Cc: Borislav Petkov <bp@alien8.de> | |
49 | Cc: Borislav Petkov <bpetkov@suse.de> | |
50 | Cc: Brian Gerst <brgerst@gmail.com> | |
51 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
52 | Cc: H. Peter Anvin <hpa@zytor.com> | |
53 | Cc: Josh Poimboeuf <jpoimboe@redhat.com> | |
54 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
55 | Cc: Peter Zijlstra <peterz@infradead.org> | |
56 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
57 | Fixes: 43858b4f25cf "x86/mm: Stop calling leave_mm() in idle code" | |
58 | Link: http://lkml.kernel.org/r/c513bbd4e653747213e05bc7062de000bf0202a5.1509793738.git.luto@kernel.org | |
59 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
60 | (cherry picked from commit 675357362aeba19688440eb1aaa7991067f73b12) | |
61 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
62 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
63 | (cherry picked from commit b607843145fd0593fcd87e2596d1dc5a1d5f79a5) | |
64 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
65 | --- | |
66 | arch/x86/mm/tlb.c | 16 +++++++++++++--- | |
67 | 1 file changed, 13 insertions(+), 3 deletions(-) | |
68 | ||
69 | diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c | |
70 | index b27aceaf7ed1..ed06f1593390 100644 | |
71 | --- a/arch/x86/mm/tlb.c | |
72 | +++ b/arch/x86/mm/tlb.c | |
73 | @@ -194,12 +194,22 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | |
74 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); | |
75 | this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); | |
76 | write_cr3(build_cr3(next, new_asid)); | |
77 | - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, | |
78 | - TLB_FLUSH_ALL); | |
79 | + | |
80 | + /* | |
81 | + * NB: This gets called via leave_mm() in the idle path | |
82 | + * where RCU functions differently. Tracing normally | |
83 | + * uses RCU, so we need to use the _rcuidle variant. | |
84 | + * | |
85 | + * (There is no good reason for this. The idle code should | |
86 | + * be rearranged to call this before rcu_idle_enter().) | |
87 | + */ | |
88 | + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); | |
89 | } else { | |
90 | /* The new ASID is already up to date. */ | |
91 | write_cr3(build_cr3_noflush(next, new_asid)); | |
92 | - trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); | |
93 | + | |
94 | + /* See above wrt _rcuidle. */ | |
95 | + trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); | |
96 | } | |
97 | ||
98 | this_cpu_write(cpu_tlbstate.loaded_mm, next); | |
99 | -- | |
100 | 2.14.2 | |
101 |