From 357b57d7357d94c89bd1b8d5c15641e549c8e4c2 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 16 Nov 2017 04:47:48 -0800 Subject: [PATCH] x86/spec_ctrl: Add sysctl knobs to enable/disable SPEC_CTRL feature CVE-2017-5715 (Spectre v2 Intel) There are 2 ways to control IBPB and IBRS 1. At boot time noibrs kernel boot parameter will disable IBRS usage noibpb kernel boot parameter will disable IBPB usage Otherwise if the above parameters are not specified, the system will enable ibrs and ibpb usage if the cpu supports it. 2. At run time echo 0 > /proc/sys/kernel/ibrs_enabled will turn off IBRS echo 1 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in kernel echo 2 > /proc/sys/kernel/ibrs_enabled will turn on IBRS in both userspace and kernel Signed-off-by: Tim Chen Signed-off-by: Andy Whitcroft Signed-off-by: Kleber Sacilotto de Souza [marcelo.cerri@canonical.com: add x86 guards to kernel/smp.c] [marcelo.cerri@canonical.com: include asm/msr.h under x86 guard in kernel/sysctl.c] Signed-off-by: Marcelo Henrique Cerri Signed-off-by: Andy Whitcroft Acked-by: Colin Ian King Acked-by: Kamal Mostafa Signed-off-by: Khalid Elmously --- .../admin-guide/kernel-parameters.txt | 10 ++ arch/x86/include/asm/mwait.h | 4 +- arch/x86/include/asm/spec_ctrl.h | 24 +++- arch/x86/kernel/cpu/intel.c | 11 +- arch/x86/kernel/cpu/microcode/core.c | 11 ++ arch/x86/kernel/process.c | 6 +- arch/x86/kernel/smpboot.c | 4 +- arch/x86/kvm/vmx.c | 4 +- arch/x86/lib/delay.c | 6 +- arch/x86/mm/tlb.c | 2 +- include/linux/smp.h | 87 ++++++++++++ kernel/smp.c | 41 ++++++ kernel/sysctl.c | 125 ++++++++++++++++++ 13 files changed, 316 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ee29f470f0b4..1bbfe73fcd6c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2566,6 +2566,16 @@ noexec=on: enable non-executable mappings (default) noexec=off: disable non-executable mappings + noibrs [X86] + Don't use indirect branch restricted speculation (IBRS) + feature when running in secure environment, + to avoid performance overhead. + + noibpb [X86] + Don't use indirect branch prediction barrier (IBPB) + feature when running in secure environment, + to avoid performance overhead. + nosmap [X86] Disable SMAP (Supervisor Mode Access Prevention) even if it is supported by processor. diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index f15120ada161..d665daab3f84 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -107,14 +107,14 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) mb(); } - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); __monitor((void *)¤t_thread_info()->flags, 0, 0); if (!need_resched()) __mwait(eax, ecx); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); } current_clr_polling(); diff --git a/arch/x86/include/asm/spec_ctrl.h b/arch/x86/include/asm/spec_ctrl.h index 55ee1f36bda2..4c69e51261cc 100644 --- a/arch/x86/include/asm/spec_ctrl.h +++ b/arch/x86/include/asm/spec_ctrl.h @@ -8,6 +8,9 @@ #ifdef __ASSEMBLY__ +.extern use_ibrs +.extern use_ibpb + #define __ASM_ENABLE_IBRS \ pushq %rax; \ pushq %rcx; \ @@ -104,15 +107,30 @@ add $(32*8), %rsp; .macro ENABLE_IBRS -ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS), X86_FEATURE_SPEC_CTRL + testl $1, use_ibrs + jz 10f + __ASM_ENABLE_IBRS + jmp 20f +10: + lfence +20: .endm .macro ENABLE_IBRS_CLOBBER -ALTERNATIVE "", __stringify(__ASM_ENABLE_IBRS_CLOBBER), X86_FEATURE_SPEC_CTRL + testl $1, use_ibrs + jz 11f + __ASM_ENABLE_IBRS_CLOBBER + jmp 21f +11: + lfence +21: .endm .macro DISABLE_IBRS -ALTERNATIVE "", __stringify(__ASM_DISABLE_IBRS), X86_FEATURE_SPEC_CTRL + testl $1, use_ibrs + jz 9f + __ASM_DISABLE_IBRS +9: .endm .macro STUFF_RSB diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f1d94c73625a..c69ea2efbed1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -628,10 +628,17 @@ static void init_intel(struct cpuinfo_x86 *c) init_intel_misc_features(c); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n"); - else + set_ibrs_supported(); + set_ibpb_supported(); + if (ibrs_inuse) + sysctl_ibrs_enabled = 1; + if (ibpb_inuse) + sysctl_ibpb_enabled = 1; + } else { printk_once(KERN_INFO "FEATURE SPEC_CTRL Not Present\n"); + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index c4fa4a85d4cb..6450aeda72fc 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -535,6 +535,17 @@ static ssize_t reload_store(struct device *dev, } if (!ret) perf_check_microcode(); + + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { + printk_once(KERN_INFO "FEATURE SPEC_CTRL Present\n"); + set_ibrs_supported(); + set_ibpb_supported(); + if (ibrs_inuse) + sysctl_ibrs_enabled = 1; + if (ibpb_inuse) + sysctl_ibpb_enabled = 1; + } + mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3adb3806a284..3fdf5358998e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -447,16 +447,16 @@ static __cpuidle void mwait_idle(void) mb(); /* quirk */ } - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); __monitor((void *)¤t_thread_info()->flags, 0, 0); if (!need_resched()) { __sti_mwait(0, 0); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); } else { - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); local_irq_enable(); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a652bff7add4..9317aa4a7446 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1693,14 +1693,14 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); mwait_play_dead(); /* Only returns on failure */ if (cpuidle_play_dead()) hlt_play_dead(); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8832fad6eda6..5edf05ce45de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2266,7 +2266,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibpb_inuse) native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB); } @@ -9040,7 +9040,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + if (ibrs_inuse) add_atomic_switch_msr(vmx, MSR_IA32_SPEC_CTRL, vcpu->arch.spec_ctrl, FEATURE_ENABLE_IBRS); diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index b088463973e4..72a174642550 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -107,8 +107,7 @@ static void delay_mwaitx(unsigned long __loops) for (;;) { delay = min_t(u64, MWAITX_MAX_LOOPS, loops); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && - (delay > IBRS_DISABLE_THRESHOLD)) + if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD)) native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* @@ -124,8 +123,7 @@ static void delay_mwaitx(unsigned long __loops) */ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && - (delay > IBRS_DISABLE_THRESHOLD)) + if (ibrs_inuse && (delay > IBRS_DISABLE_THRESHOLD)) native_wrmsrl(MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); end = rdtsc_ordered(); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 301e6efbc514..6365f769de3d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -221,7 +221,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, bool need_flush; /* Null tsk means switching to kernel, so that's safe */ - if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) && tsk && + if (ibpb_inuse && tsk && ___ptrace_may_access(tsk, current, PTRACE_MODE_IBPB)) native_wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB); diff --git a/include/linux/smp.h b/include/linux/smp.h index 68123c1fe549..e2935c0a1bb4 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,6 +50,93 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), int smp_call_function_single_async(int cpu, struct call_single_data *csd); +#ifdef CONFIG_X86 +/* indicate usage of IBRS to control execution speculation */ +extern int use_ibrs; +extern u32 sysctl_ibrs_enabled; +extern struct mutex spec_ctrl_mutex; +#define ibrs_supported (use_ibrs & 0x2) +#define ibrs_disabled (use_ibrs & 0x4) +static inline void set_ibrs_inuse(void) +{ + if (ibrs_supported) + use_ibrs |= 0x1; +} +static inline void clear_ibrs_inuse(void) +{ + use_ibrs &= ~0x1; +} +static inline int check_ibrs_inuse(void) +{ + if (use_ibrs & 0x1) + return 1; + else + /* rmb to prevent wrong speculation for security */ + rmb(); + return 0; +} +static inline void set_ibrs_supported(void) +{ + use_ibrs |= 0x2; + if (!ibrs_disabled) + set_ibrs_inuse(); +} +static inline void set_ibrs_disabled(void) +{ + use_ibrs |= 0x4; + if (check_ibrs_inuse()) + clear_ibrs_inuse(); +} +static inline void clear_ibrs_disabled(void) +{ + use_ibrs &= ~0x4; + set_ibrs_inuse(); +} +#define ibrs_inuse (check_ibrs_inuse()) + +/* indicate usage of IBPB to control execution speculation */ +extern int use_ibpb; +extern u32 sysctl_ibpb_enabled; +#define ibpb_supported (use_ibpb & 0x2) +#define ibpb_disabled (use_ibpb & 0x4) +static inline void set_ibpb_inuse(void) +{ + if (ibpb_supported) + use_ibpb |= 0x1; +} +static inline void clear_ibpb_inuse(void) +{ + use_ibpb &= ~0x1; +} +static inline int check_ibpb_inuse(void) +{ + if (use_ibpb & 0x1) + return 1; + else + /* rmb to prevent wrong speculation for security */ + rmb(); + return 0; +} +static inline void set_ibpb_supported(void) +{ + use_ibpb |= 0x2; + if (!ibpb_disabled) + set_ibpb_inuse(); +} +static inline void set_ibpb_disabled(void) +{ + use_ibpb |= 0x4; + if (check_ibpb_inuse()) + clear_ibpb_inuse(); +} +static inline void clear_ibpb_disabled(void) +{ + use_ibpb &= ~0x4; + set_ibpb_inuse(); +} +#define ibpb_inuse (check_ibpb_inuse()) +#endif + #ifdef CONFIG_SMP #include diff --git a/kernel/smp.c b/kernel/smp.c index 3061483cb3ad..3bece045f4a4 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -498,6 +498,26 @@ EXPORT_SYMBOL(smp_call_function); unsigned int setup_max_cpus = NR_CPUS; EXPORT_SYMBOL(setup_max_cpus); +#ifdef CONFIG_X86 +/* + * use IBRS + * bit 0 = indicate if ibrs is currently in use + * bit 1 = indicate if system supports ibrs + * bit 2 = indicate if admin disables ibrs +*/ + +int use_ibrs; +EXPORT_SYMBOL(use_ibrs); + +/* + * use IBRS + * bit 0 = indicate if ibpb is currently in use + * bit 1 = indicate if system supports ibpb + * bit 2 = indicate if admin disables ibpb +*/ +int use_ibpb; +EXPORT_SYMBOL(use_ibpb); +#endif /* * Setup routine for controlling SMP activation @@ -522,6 +542,27 @@ static int __init nosmp(char *str) early_param("nosmp", nosmp); +#ifdef CONFIG_X86 +static int __init noibrs(char *str) +{ + set_ibrs_disabled(); + + return 0; +} + +early_param("noibrs", noibrs); + +static int __init noibpb(char *str) +{ + set_ibpb_disabled(); + + return 0; +} + +early_param("noibpb", noibpb); +#endif + + /* this is hard limit */ static int __init nrcpus(char *str) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7ab08d5728e6..69c37bd6251a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -72,6 +72,7 @@ #include #ifdef CONFIG_X86 +#include #include #include #include @@ -222,6 +223,15 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +#ifdef CONFIG_X86 +int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +int proc_dointvec_ibrs_dump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +#endif + #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE; @@ -258,6 +268,12 @@ extern struct ctl_table epoll_table[]; int sysctl_legacy_va_layout; #endif +u32 sysctl_ibrs_dump = 0; +u32 sysctl_ibrs_enabled = 0; +EXPORT_SYMBOL(sysctl_ibrs_enabled); +u32 sysctl_ibpb_enabled = 0; +EXPORT_SYMBOL(sysctl_ibpb_enabled); + /* The default sysctl tables: */ static struct ctl_table sysctl_base_table[] = { @@ -1241,6 +1257,35 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, +#endif +#ifdef CONFIG_X86 + { + .procname = "ibrs_enabled", + .data = &sysctl_ibrs_enabled, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_ibrs_ctrl, + .extra1 = &zero, + .extra2 = &two, + }, + { + .procname = "ibpb_enabled", + .data = &sysctl_ibpb_enabled, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_ibpb_ctrl, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "ibrs_dump", + .data = &sysctl_ibrs_dump, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_ibrs_dump, + .extra1 = &zero, + .extra2 = &one, + }, #endif { } }; @@ -2585,6 +2630,86 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, do_proc_dointvec_minmax_conv, ¶m); } +#ifdef CONFIG_X86 +int proc_dointvec_ibrs_dump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + unsigned int cpu; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + printk("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled); + printk("use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb); + for_each_online_cpu(cpu) { + u64 val; + + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + rdmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, &val); + else + val = 0; + printk("read cpu %d ibrs val %lu\n", cpu, (unsigned long) val); + } + return ret; +} + +int proc_dointvec_ibrs_ctrl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + unsigned int cpu; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled); + pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb); + if (sysctl_ibrs_enabled == 0) { + /* always set IBRS off */ + set_ibrs_disabled(); + if (ibrs_supported) { + for_each_online_cpu(cpu) + wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, 0x0); + } + } else if (sysctl_ibrs_enabled == 2) { + /* always set IBRS on, even in user space */ + clear_ibrs_disabled(); + if (ibrs_supported) { + for_each_online_cpu(cpu) + wrmsrl_on_cpu(cpu, MSR_IA32_SPEC_CTRL, FEATURE_ENABLE_IBRS); + } else { + sysctl_ibrs_enabled = 0; + } + } else if (sysctl_ibrs_enabled == 1) { + /* use IBRS in kernel */ + clear_ibrs_disabled(); + if (!ibrs_inuse) + /* platform don't support ibrs */ + sysctl_ibrs_enabled = 0; + } + pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb); + return ret; +} + +int proc_dointvec_ibpb_ctrl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + pr_debug("sysctl_ibrs_enabled = %u, sysctl_ibpb_enabled = %u\n", sysctl_ibrs_enabled, sysctl_ibpb_enabled); + pr_debug("before:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb); + if (sysctl_ibpb_enabled == 0) + set_ibpb_disabled(); + else if (sysctl_ibpb_enabled == 1) { + clear_ibpb_disabled(); + if (!ibpb_inuse) + /* platform don't support ibpb */ + sysctl_ibpb_enabled = 0; + } + pr_debug("after:use_ibrs = %d, use_ibpb = %d\n", use_ibrs, use_ibpb); + return ret; +} +#endif + + struct do_proc_douintvec_minmax_conv_param { unsigned int *min; unsigned int *max; -- 2.39.2