1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Tim Chen <tim@otc-grantley-02.jf.intel.com>
3 Date: Thu, 16 Nov 2017 04:47:48 -0800
4 Subject: [PATCH] x86/spec_ctrl: Add sysctl knobs to enable/disable SPEC_CTRL
7 Content-Type: text/plain; charset=UTF-8
8 Content-Transfer-Encoding: 8bit
13 There are 2 ways to control IBPB and IBRS
16 noibrs kernel boot parameter will disable IBRS usage
17 noibpb kernel boot parameter will disable IBPB usage
18 Otherwise if the above parameters are not specified, the system
19 will enable ibrs and ibpb usage if the cpu supports it.
22 echo 0 > /proc/sys/kernel/ibrs_enabled will turn off IBRS
23 echo 1 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in kernel
24 echo 2 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in both userspace and kernel
26 Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
27 Signed-off-by: Andy Whitcroft <apw@canonical.com>
28 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
29 [marcelo.cerri@canonical.com: add x86 guards to kernel/smp.c]
30 [marcelo.cerri@canonical.com: include asm/msr.h under x86 guard in kernel/sysctl.c]
31 Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
32 (cherry picked from commit 23225db7b02c7f8b94e5d5050987430089e6f7cc)
33 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
35 Documentation/admin-guide/kernel-parameters.txt | 10 ++
36 arch/x86/include/asm/mwait.h | 4 +-
37 arch/x86/include/asm/spec_ctrl.h | 24 ++++-
38 include/linux/smp.h | 87 +++++++++++++++++
39 arch/x86/kernel/cpu/intel.c | 11 ++-
40 arch/x86/kernel/cpu/microcode/core.c | 11 +++
41 arch/x86/kernel/process.c | 6 +-
42 arch/x86/kernel/smpboot.c | 4 +-
43 arch/x86/kvm/vmx.c | 4 +-
44 arch/x86/lib/delay.c | 6 +-
45 arch/x86/mm/tlb.c | 2 +-
46 kernel/smp.c | 41 ++++++++
47 kernel/sysctl.c | 125 ++++++++++++++++++++++++
48 13 files changed, 316 insertions(+), 19 deletions(-)
50 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
51 index 1a6ebc6cdf26..e7216bc05b3b 100644
52 --- a/Documentation/admin-guide/kernel-parameters.txt
53 +++ b/Documentation/admin-guide/kernel-parameters.txt
54 @@ -2566,6 +2566,16 @@
55 noexec=on: enable non-executable mappings (default)
56 noexec=off: disable non-executable mappings
59 + Don't use indirect branch restricted speculation (IBRS)
60 + feature when running in secure environment,
61 + to avoid performance overhead.
64 + Don't use indirect branch prediction barrier (IBPB)
65 + feature when running in secure environment,
66 + to avoid performance overhead.
69 Disable SMAP (Supervisor Mode Access Prevention)
70 even if it is supported by processor.
71 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
72 index f15120ada161..d665daab3f84 100644
73 --- a/arch/x86/include/asm/mwait.h
74 +++ b/arch/x86/include/asm/mwait.h
75 @@ -107,14 +107,14 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
79 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
81 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
83 __monitor((void *)¤t_thread_info()->flags, 0, 0);
87 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
89 native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
91 current_clr_polling();
92 diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h
93 index 55ee1f36bda2..4c69e51261cc 100644
94 --- a/arch/x86/include/asm/spec_ctrl.h
95 +++ b/arch/x86/include/asm/spec_ctrl.h
103 #define __ASM_ENABLE_IBRS \
106 @@ -104,15 +107,30 @@
110 -ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL
120 .macro ENABLE_IBRS_CLOBBER
121 -ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL
124 + __ASM_ENABLE_IBRS_CLOBBER
132 -ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL
140 diff --git a/include/linux/smp.h b/include/linux/smp.h
141 index 68123c1fe549..e2935c0a1bb4 100644
142 --- a/include/linux/smp.h
143 +++ b/include/linux/smp.h
144 @@ -50,6 +50,93 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
146 int smp_call_function_single_async(int cpu, struct call_single_data *csd);
149 +/* indicate usage of IBRS to control execution speculation */
150 +extern int use_ibrs;
151 +extern u32 sysctl_ibrs_enabled;
152 +extern struct mutex spec_ctrl_mutex;
153 +#define ibrs_supported (use_ibrs & 0x2)
154 +#define ibrs_disabled (use_ibrs & 0x4)
155 +static inline void set_ibrs_inuse(void)
157 + if (ibrs_supported)
160 +static inline void clear_ibrs_inuse(void)
164 +static inline int check_ibrs_inuse(void)
166 + if (use_ibrs & 0x1)
169 + /* rmb to prevent wrong speculation for security */
173 +static inline void set_ibrs_supported(void)
176 + if (!ibrs_disabled)
179 +static inline void set_ibrs_disabled(void)
182 + if (check_ibrs_inuse())
183 + clear_ibrs_inuse();
185 +static inline void clear_ibrs_disabled(void)
190 +#define ibrs_inuse (check_ibrs_inuse())
192 +/* indicate usage of IBPB to control execution speculation */
193 +extern int use_ibpb;
194 +extern u32 sysctl_ibpb_enabled;
195 +#define ibpb_supported (use_ibpb & 0x2)
196 +#define ibpb_disabled (use_ibpb & 0x4)
197 +static inline void set_ibpb_inuse(void)
199 + if (ibpb_supported)
202 +static inline void clear_ibpb_inuse(void)
206 +static inline int check_ibpb_inuse(void)
208 + if (use_ibpb & 0x1)
211 + /* rmb to prevent wrong speculation for security */
215 +static inline void set_ibpb_supported(void)
218 + if (!ibpb_disabled)
221 +static inline void set_ibpb_disabled(void)
224 + if (check_ibpb_inuse())
225 + clear_ibpb_inuse();
227 +static inline void clear_ibpb_disabled(void)
232 +#define ibpb_inuse (check_ibpb_inuse())
237 #include <linux/preempt.h>
238 diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
239 index f1d94c73625a..c69ea2efbed1 100644
240 --- a/arch/x86/kernel/cpu/intel.c
241 +++ b/arch/x86/kernel/cpu/intel.c
242 @@ -628,10 +628,17 @@ static void init_intel(struct cpuinfo_x86 *c)
244 init_intel_misc_features(c);
246 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
247 + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
248 printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
250 + set_ibrs_supported();
251 + set_ibpb_supported();
253 + sysctl_ibrs_enabled = 1;
255 + sysctl_ibpb_enabled = 1;
257 printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n");
262 diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
263 index c4fa4a85d4cb..6450aeda72fc 100644
264 --- a/arch/x86/kernel/cpu/microcode/core.c
265 +++ b/arch/x86/kernel/cpu/microcode/core.c
266 @@ -535,6 +535,17 @@ static ssize_t reload_store(struct device *dev,
269 perf_check_microcode();
271 + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
272 + printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n");
273 + set_ibrs_supported();
274 + set_ibpb_supported();
276 + sysctl_ibrs_enabled = 1;
278 + sysctl_ibpb_enabled = 1;
281 mutex_unlock(µcode_mutex);
284 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
285 index 3adb3806a284..3fdf5358998e 100644
286 --- a/arch/x86/kernel/process.c
287 +++ b/arch/x86/kernel/process.c
288 @@ -447,16 +447,16 @@ static __cpuidle void mwait_idle(void)
292 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
294 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
296 __monitor((void *)¤t_thread_info()->flags, 0, 0);
297 if (!need_resched()) {
299 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
301 native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
303 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
305 native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
308 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
309 index a652bff7add4..9317aa4a7446 100644
310 --- a/arch/x86/kernel/smpboot.c
311 +++ b/arch/x86/kernel/smpboot.c
312 @@ -1693,14 +1693,14 @@ void native_play_dead(void)
314 tboot_shutdown(TB_SHUTDOWN_WFS);
316 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
318 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
320 mwait_play_dead(); /* Only returns on failure */
321 if (cpuidle_play_dead())
324 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
326 native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
329 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
330 index 496884b6467f..d2168203bddc 100644
331 --- a/arch/x86/kvm/vmx.c
332 +++ b/arch/x86/kvm/vmx.c
333 @@ -2269,7 +2269,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
334 if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
335 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
336 vmcs_load(vmx->loaded_vmcs->vmcs);
337 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
339 native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
342 @@ -9102,7 +9102,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
344 atomic_switch_perf_msrs(vmx);
346 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
348 add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL,
349 vcpu->arch.spec_ctrl, FEATURE_ENABLE_IBRS);
351 diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
352 index b088463973e4..72a174642550 100644
353 --- a/arch/x86/lib/delay.c
354 +++ b/arch/x86/lib/delay.c
355 @@ -107,8 +107,7 @@ static void delay_mwaitx(unsigned long __loops)
357 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
359 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
360 - (delay > IBRS_DISABLE_THRESHOLD))
361 + if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD))
362 native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
365 @@ -124,8 +123,7 @@ static void delay_mwaitx(unsigned long __loops)
367 __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
369 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
370 - (delay > IBRS_DISABLE_THRESHOLD))
371 + if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD))
372 native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
374 end = rdtsc_ordered();
375 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
376 index 301e6efbc514..6365f769de3d 100644
377 --- a/arch/x86/mm/tlb.c
378 +++ b/arch/x86/mm/tlb.c
379 @@ -221,7 +221,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
382 /* Null tsk means switching to kernel, so that's safe */
383 - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && tsk &&
384 + if (ibpb_inuse && tsk &&
385 ___ptrace_may_access(tsk, current, PTRACE_MODE_IBPB))
386 native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
388 diff --git a/kernel/smp.c b/kernel/smp.c
389 index 3061483cb3ad..3bece045f4a4 100644
392 @@ -498,6 +498,26 @@ EXPORT_SYMBOL(smp_call_function);
393 unsigned int setup_max_cpus = NR_CPUS;
394 EXPORT_SYMBOL(setup_max_cpus);
399 + * bit 0 = indicate if ibrs is currently in use
400 + * bit 1 = indicate if system supports ibrs
401 + * bit 2 = indicate if admin disables ibrs
405 +EXPORT_SYMBOL(use_ibrs);
409 + * bit 0 = indicate if ibpb is currently in use
410 + * bit 1 = indicate if system supports ibpb
411 + * bit 2 = indicate if admin disables ibpb
414 +EXPORT_SYMBOL(use_ibpb);
418 * Setup routine for controlling SMP activation
419 @@ -522,6 +542,27 @@ static int __init nosmp(char *str)
421 early_param("nosmp", nosmp);
424 +static int __init noibrs(char *str)
426 + set_ibrs_disabled();
431 +early_param("noibrs", noibrs);
433 +static int __init noibpb(char *str)
435 + set_ibpb_disabled();
440 +early_param("noibpb", noibpb);
444 /* this is hard limit */
445 static int __init nrcpus(char *str)
447 diff --git a/kernel/sysctl.c b/kernel/sysctl.c
448 index 7ab08d5728e6..69c37bd6251a 100644
449 --- a/kernel/sysctl.c
450 +++ b/kernel/sysctl.c
452 #include <asm/processor.h>
455 +#include <asm/msr.h>
457 #include <asm/stacktrace.h>
459 @@ -222,6 +223,15 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
460 void __user *buffer, size_t *lenp, loff_t *ppos);
464 +int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
465 + void __user *buffer, size_t *lenp, loff_t *ppos);
466 +int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
467 + void __user *buffer, size_t *lenp, loff_t *ppos);
468 +int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
469 + void __user *buffer, size_t *lenp, loff_t *ppos);
472 #ifdef CONFIG_MAGIC_SYSRQ
473 /* Note: sysrq code uses it's own private copy */
474 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
475 @@ -258,6 +268,12 @@ extern struct ctl_table epoll_table[];
476 int sysctl_legacy_va_layout;
479 +u32 sysctl_ibrs_dump = 0;
480 +u32 sysctl_ibrs_enabled = 0;
481 +EXPORT_SYMBOL(sysctl_ibrs_enabled);
482 +u32 sysctl_ibpb_enabled = 0;
483 +EXPORT_SYMBOL(sysctl_ibpb_enabled);
485 /* The default sysctl tables: */
487 static struct ctl_table sysctl_base_table[] = {
488 @@ -1241,6 +1257,35 @@ static struct ctl_table kern_table[] = {
495 + .procname = "ibrs_enabled",
496 + .data = &sysctl_ibrs_enabled,
497 + .maxlen = sizeof(unsigned int),
499 + .proc_handler = proc_dointvec_ibrs_ctrl,
504 + .procname = "ibpb_enabled",
505 + .data = &sysctl_ibpb_enabled,
506 + .maxlen = sizeof(unsigned int),
508 + .proc_handler = proc_dointvec_ibpb_ctrl,
513 + .procname = "ibrs_dump",
514 + .data = &sysctl_ibrs_dump,
515 + .maxlen = sizeof(unsigned int),
517 + .proc_handler = proc_dointvec_ibrs_dump,
524 @@ -2585,6 +2630,86 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
525 do_proc_dointvec_minmax_conv, ¶m);
529 +int proc_dointvec_ibrs_dump(struct ctl_table *table, int write,
530 + void __user *buffer, size_t *lenp, loff_t *ppos)
535 + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
536 + printk("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
537 + printk("use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
538 + for_each_online_cpu(cpu) {
541 + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL))
542 + rdmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, &val);
545 + printk("read cpu %d ibrs val %lu\n", cpu, (unsigned long) val);
550 +int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write,
551 + void __user *buffer, size_t *lenp, loff_t *ppos)
556 + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
557 + pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
558 + pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
559 + if (sysctl_ibrs_enabled == 0) {
560 + /* always set IBRS off */
561 + set_ibrs_disabled();
562 + if (ibrs_supported) {
563 + for_each_online_cpu(cpu)
564 + wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, 0x0);
566 + } else if (sysctl_ibrs_enabled == 2) {
567 + /* always set IBRS on, even in user space */
568 + clear_ibrs_disabled();
569 + if (ibrs_supported) {
570 + for_each_online_cpu(cpu)
571 + wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS);
573 + sysctl_ibrs_enabled = 0;
575 + } else if (sysctl_ibrs_enabled == 1) {
576 + /* use IBRS in kernel */
577 + clear_ibrs_disabled();
579 + /* platform don't support ibrs */
580 + sysctl_ibrs_enabled = 0;
582 + pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
586 +int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write,
587 + void __user *buffer, size_t *lenp, loff_t *ppos)
591 + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
592 + pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled);
593 + pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
594 + if (sysctl_ibpb_enabled == 0)
595 + set_ibpb_disabled();
596 + else if (sysctl_ibpb_enabled == 1) {
597 + clear_ibpb_disabled();
599 + /* platform don't support ibpb */
600 + sysctl_ibpb_enabled = 0;
602 + pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb);
608 struct do_proc_douintvec_minmax_conv_param {