]>
Commit | Line | Data |
---|---|---|
321d628a FG |
1 | From 2a767692d6140051e569ab59a1440b3760839e03 Mon Sep 17 00:00:00 2001 |
2 | From: Andy Lutomirski <luto@kernel.org> | |
3 | Date: Tue, 11 Jul 2017 10:33:38 -0500 | |
633c5ed1 | 4 | Subject: [PATCH 018/242] x86/entry/64: Refactor IRQ stacks and make them |
321d628a FG |
5 | NMI-safe |
6 | MIME-Version: 1.0 | |
7 | Content-Type: text/plain; charset=UTF-8 | |
8 | Content-Transfer-Encoding: 8bit | |
9 | ||
10 | CVE-2017-5754 | |
11 | ||
12 | This will allow IRQ stacks to nest inside NMIs or similar entries | |
13 | that can happen during IRQ stack setup or teardown. | |
14 | ||
15 | The new macros won't work correctly if they're invoked with IRQs on. | |
16 | Add a check under CONFIG_DEBUG_ENTRY to detect that. | |
17 | ||
18 | Signed-off-by: Andy Lutomirski <luto@kernel.org> | |
19 | [ Use %r10 instead of %r11 in xen_do_hypervisor_callback to make objtool | |
20 | and ORC unwinder's lives a little easier. ] | |
21 | Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> | |
22 | Cc: Borislav Petkov <bp@alien8.de> | |
23 | Cc: Brian Gerst <brgerst@gmail.com> | |
24 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
25 | Cc: H. Peter Anvin <hpa@zytor.com> | |
26 | Cc: Jiri Slaby <jslaby@suse.cz> | |
27 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
28 | Cc: Mike Galbraith <efault@gmx.de> | |
29 | Cc: Peter Zijlstra <peterz@infradead.org> | |
30 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
31 | Cc: live-patching@vger.kernel.org | |
32 | Link: http://lkml.kernel.org/r/b0b2ff5fb97d2da2e1d7e1f380190c92545c8bb5.1499786555.git.jpoimboe@redhat.com | |
33 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
34 | (cherry picked from commit 1d3e53e8624a3ec85f4041ca6d973da7c1575938) | |
35 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
36 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
37 | (cherry picked from commit be58b042e135d0ee777a54798f33015857d7e2e0) | |
38 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
39 | --- | |
40 | arch/x86/kernel/process_64.c | 3 ++ | |
41 | arch/x86/Kconfig.debug | 2 -- | |
42 | arch/x86/entry/entry_64.S | 85 +++++++++++++++++++++++++++++++------------- | |
43 | 3 files changed, 64 insertions(+), 26 deletions(-) | |
44 | ||
45 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | |
46 | index fe56e6f93cbb..1e7701c4cd80 100644 | |
47 | --- a/arch/x86/kernel/process_64.c | |
48 | +++ b/arch/x86/kernel/process_64.c | |
49 | @@ -404,6 +404,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |
50 | int cpu = smp_processor_id(); | |
51 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | |
52 | ||
53 | + WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && | |
54 | + this_cpu_read(irq_count) != -1); | |
55 | + | |
56 | switch_fpu_prepare(prev_fpu, cpu); | |
57 | ||
58 | /* We must save %fs and %gs before load_TLS() because | |
59 | diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug | |
60 | index cd20ca0b4043..1fc519f3c49e 100644 | |
61 | --- a/arch/x86/Kconfig.debug | |
62 | +++ b/arch/x86/Kconfig.debug | |
63 | @@ -305,8 +305,6 @@ config DEBUG_ENTRY | |
64 | Some of these sanity checks may slow down kernel entries and | |
65 | exits or otherwise impact performance. | |
66 | ||
67 | - This is currently used to help test NMI code. | |
68 | - | |
69 | If unsure, say N. | |
70 | ||
71 | config DEBUG_NMI_SELFTEST | |
72 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | |
73 | index 6d078b89a5e8..07b4056af8a8 100644 | |
74 | --- a/arch/x86/entry/entry_64.S | |
75 | +++ b/arch/x86/entry/entry_64.S | |
76 | @@ -447,6 +447,59 @@ ENTRY(irq_entries_start) | |
77 | .endr | |
78 | END(irq_entries_start) | |
79 | ||
80 | +.macro DEBUG_ENTRY_ASSERT_IRQS_OFF | |
81 | +#ifdef CONFIG_DEBUG_ENTRY | |
82 | + pushfq | |
83 | + testl $X86_EFLAGS_IF, (%rsp) | |
84 | + jz .Lokay_\@ | |
85 | + ud2 | |
86 | +.Lokay_\@: | |
87 | + addq $8, %rsp | |
88 | +#endif | |
89 | +.endm | |
90 | + | |
91 | +/* | |
92 | + * Enters the IRQ stack if we're not already using it. NMI-safe. Clobbers | |
93 | + * flags and puts old RSP into old_rsp, and leaves all other GPRs alone. | |
94 | + * Requires kernel GSBASE. | |
95 | + * | |
96 | + * The invariant is that, if irq_count != -1, then the IRQ stack is in use. | |
97 | + */ | |
98 | +.macro ENTER_IRQ_STACK old_rsp | |
99 | + DEBUG_ENTRY_ASSERT_IRQS_OFF | |
100 | + movq %rsp, \old_rsp | |
101 | + incl PER_CPU_VAR(irq_count) | |
102 | + | |
103 | + /* | |
104 | + * Right now, if we just incremented irq_count to zero, we've | |
105 | + * claimed the IRQ stack but we haven't switched to it yet. | |
106 | + * | |
107 | + * If anything is added that can interrupt us here without using IST, | |
108 | + * it must be *extremely* careful to limit its stack usage. This | |
109 | + * could include kprobes and a hypothetical future IST-less #DB | |
110 | + * handler. | |
111 | + */ | |
112 | + | |
113 | + cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp | |
114 | + pushq \old_rsp | |
115 | +.endm | |
116 | + | |
117 | +/* | |
118 | + * Undoes ENTER_IRQ_STACK. | |
119 | + */ | |
120 | +.macro LEAVE_IRQ_STACK | |
121 | + DEBUG_ENTRY_ASSERT_IRQS_OFF | |
122 | + /* We need to be off the IRQ stack before decrementing irq_count. */ | |
123 | + popq %rsp | |
124 | + | |
125 | + /* | |
126 | + * As in ENTER_IRQ_STACK, irq_count == 0, we are still claiming | |
127 | + * the irq stack but we're not on it. | |
128 | + */ | |
129 | + | |
130 | + decl PER_CPU_VAR(irq_count) | |
131 | +.endm | |
132 | + | |
133 | /* | |
134 | * Interrupt entry/exit. | |
135 | * | |
136 | @@ -485,17 +538,7 @@ END(irq_entries_start) | |
137 | CALL_enter_from_user_mode | |
138 | ||
139 | 1: | |
140 | - /* | |
141 | - * Save previous stack pointer, optionally switch to interrupt stack. | |
142 | - * irq_count is used to check if a CPU is already on an interrupt stack | |
143 | - * or not. While this is essentially redundant with preempt_count it is | |
144 | - * a little cheaper to use a separate counter in the PDA (short of | |
145 | - * moving irq_enter into assembly, which would be too much work) | |
146 | - */ | |
147 | - movq %rsp, %rdi | |
148 | - incl PER_CPU_VAR(irq_count) | |
149 | - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp | |
150 | - pushq %rdi | |
151 | + ENTER_IRQ_STACK old_rsp=%rdi | |
152 | /* We entered an interrupt context - irqs are off: */ | |
153 | TRACE_IRQS_OFF | |
154 | ||
155 | @@ -515,10 +558,8 @@ common_interrupt: | |
156 | ret_from_intr: | |
157 | DISABLE_INTERRUPTS(CLBR_ANY) | |
158 | TRACE_IRQS_OFF | |
159 | - decl PER_CPU_VAR(irq_count) | |
160 | ||
161 | - /* Restore saved previous stack */ | |
162 | - popq %rsp | |
163 | + LEAVE_IRQ_STACK | |
164 | ||
165 | testb $3, CS(%rsp) | |
166 | jz retint_kernel | |
167 | @@ -892,12 +933,10 @@ bad_gs: | |
168 | ENTRY(do_softirq_own_stack) | |
169 | pushq %rbp | |
170 | mov %rsp, %rbp | |
171 | - incl PER_CPU_VAR(irq_count) | |
172 | - cmove PER_CPU_VAR(irq_stack_ptr), %rsp | |
173 | - push %rbp /* frame pointer backlink */ | |
174 | + ENTER_IRQ_STACK old_rsp=%r11 | |
175 | call __do_softirq | |
176 | + LEAVE_IRQ_STACK | |
177 | leaveq | |
178 | - decl PER_CPU_VAR(irq_count) | |
179 | ret | |
180 | END(do_softirq_own_stack) | |
181 | ||
182 | @@ -924,13 +963,11 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */ | |
183 | * see the correct pointer to the pt_regs | |
184 | */ | |
185 | movq %rdi, %rsp /* we don't return, adjust the stack frame */ | |
186 | -11: incl PER_CPU_VAR(irq_count) | |
187 | - movq %rsp, %rbp | |
188 | - cmovzq PER_CPU_VAR(irq_stack_ptr), %rsp | |
189 | - pushq %rbp /* frame pointer backlink */ | |
190 | + | |
191 | + ENTER_IRQ_STACK old_rsp=%r10 | |
192 | call xen_evtchn_do_upcall | |
193 | - popq %rsp | |
194 | - decl PER_CPU_VAR(irq_count) | |
195 | + LEAVE_IRQ_STACK | |
196 | + | |
197 | #ifndef CONFIG_PREEMPT | |
198 | call xen_maybe_preempt_hcall | |
199 | #endif | |
200 | -- | |
201 | 2.14.2 | |
202 |