futex: Replace PF_EXITPIDONE with a state

[mirror_ubuntu-bionic-kernel.git] / kernel / cpu.c
diff --git a/kernel/cpu.c b/kernel/cpu.c

index 53f7dc65f9a3b917c1c347834257cf26521eff95..c408d0a735ed1b907f916517e15590913a56af32 100644 (file)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,6 +10,7 @@
  #include <linux/sched/signal.h>
  #include <linux/sched/hotplug.h>
  #include <linux/sched/task.h>
+#include <linux/sched/smt.h>
  #include <linux/unistd.h>
  #include <linux/cpu.h>
  #include <linux/oom.h>
@@ -60,6 +61,7 @@ struct cpuhp_cpu_state {
         bool                    rollback;
         bool                    single;
         bool                    bringup;
+       bool                    booted_once;
         struct hlist_node       *node;
         struct hlist_node       *last;
         enum cpuhp_state        cb_state;
@@ -312,6 +314,15 @@ void cpus_write_unlock(void)
  
  void lockdep_assert_cpus_held(void)
  {
+       /*
+        * We can't have hotplug operations before userspace starts running,
+        * and some init codepaths will knowingly not take the hotplug lock.
+        * This is all valid, so mute lockdep until it makes sense to report
+        * unheld locks.
+        */
+       if (system_state < SYSTEM_RUNNING)
+               return;
+
         percpu_rwsem_assert_held(&cpu_hotplug_lock);
  }
  
@@ -346,6 +357,67 @@ void cpu_hotplug_enable(void)
  EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
  #endif /* CONFIG_HOTPLUG_CPU */
  
+/*
+ * Architectures that need SMT-specific errata handling during SMT hotplug
+ * should override this.
+ */
+void __weak arch_smt_update(void) { }
+
+#ifdef CONFIG_HOTPLUG_SMT
+enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+
+void __init cpu_smt_disable(bool force)
+{
+       if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+               cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+               return;
+
+       if (force) {
+               pr_info("SMT: Force disabled\n");
+               cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+       } else {
+               pr_info("SMT: disabled\n");
+               cpu_smt_control = CPU_SMT_DISABLED;
+       }
+}
+
+/*
+ * The decision whether SMT is supported can only be done after the full
+ * CPU identification. Called from architecture code.
+ */
+void __init cpu_smt_check_topology(void)
+{
+       if (!topology_smt_supported())
+               cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
+static int __init smt_cmdline_disable(char *str)
+{
+       cpu_smt_disable(str && !strcmp(str, "force"));
+       return 0;
+}
+early_param("nosmt", smt_cmdline_disable);
+
+static inline bool cpu_smt_allowed(unsigned int cpu)
+{
+       if (cpu_smt_control == CPU_SMT_ENABLED)
+               return true;
+
+       if (topology_is_primary_thread(cpu))
+               return true;
+
+       /*
+        * On x86 it's required to boot all logical CPUs at least once so
+        * that the init code can get a chance to set CR4.MCE on each
+        * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
+        * core will shutdown the machine.
+        */
+       return !per_cpu(cpuhp_state, cpu).booted_once;
+}
+#else
+static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+#endif
+
  static inline enum cpuhp_state
  cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
  {
@@ -426,6 +498,16 @@ static int bringup_wait_for_ap(unsigned int cpu)
         stop_machine_unpark(cpu);
         kthread_unpark(st->thread);
  
+       /*
+        * SMT soft disabling on X86 requires to bring the CPU out of the
+        * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
+        * CPU marked itself as booted_once in cpu_notify_starting() so the
+        * cpu_smt_allowed() check will now return false if this is not the
+        * primary sibling.
+        */
+       if (!cpu_smt_allowed(cpu))
+               return -ECANCELED;
+
         if (st->target <= CPUHP_AP_ONLINE_IDLE)
                 return 0;
  
@@ -466,6 +548,20 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
         }
  }
  
+static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
+{
+       if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+               return true;
+       /*
+        * When CPU hotplug is disabled, then taking the CPU down is not
+        * possible because takedown_cpu() and the architecture and
+        * subsystem specific mechanisms are not available. So the CPU
+        * which would be completely unplugged again needs to stay around
+        * in the current state.
+        */
+       return st->state <= CPUHP_BRINGUP_CPU;
+}
+
  static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                               enum cpuhp_state target)
  {
@@ -476,8 +572,10 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                 st->state++;
                 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
                 if (ret) {
-                       st->target = prev_state;
-                       undo_cpu_up(cpu, st);
+                       if (can_rollback_cpu(st)) {
+                               st->target = prev_state;
+                               undo_cpu_up(cpu, st);
+                       }
                         break;
                 }
         }
@@ -522,15 +620,15 @@ static void cpuhp_thread_fun(unsigned int cpu)
         bool bringup = st->bringup;
         enum cpuhp_state state;
  
+       if (WARN_ON_ONCE(!st->should_run))
+               return;
+
         /*
          * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
          * that if we see ->should_run we also see the rest of the state.
          */
         smp_mb();
  
-       if (WARN_ON_ONCE(!st->should_run))
-               return;
-
         cpuhp_lock_acquire(bringup);
  
         if (st->single) {
@@ -758,7 +856,6 @@ static int takedown_cpu(unsigned int cpu)
  
         /* Park the smpboot threads */
         kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
-       smpboot_park_threads(cpu);
  
         /*
          * Prevent irq alloc/free while the dying cpu reorganizes the
@@ -843,7 +940,8 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
                 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
                 if (ret) {
                         st->target = prev_state;
-                       undo_cpu_down(cpu, st);
+                       if (st->state < prev_state)
+                               undo_cpu_down(cpu, st);
                         break;
                 }
         }
@@ -896,7 +994,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
          * to do the further cleanups.
          */
         ret = cpuhp_down_callbacks(cpu, st, target);
-       if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+       if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
                 cpuhp_reset_state(st, prev_state);
                 __cpuhp_kick_ap(st);
         }
@@ -908,23 +1006,23 @@ out:
          * concurrent CPU hotplug via cpu_add_remove_lock.
          */
         lockup_detector_cleanup();
+       arch_smt_update();
         return ret;
  }
  
+static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
+{
+       if (cpu_hotplug_disabled)
+               return -EBUSY;
+       return _cpu_down(cpu, 0, target);
+}
+
  static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
  {
         int err;
  
         cpu_maps_update_begin();
-
-       if (cpu_hotplug_disabled) {
-               err = -EBUSY;
-               goto out;
-       }
-
-       err = _cpu_down(cpu, 0, target);
-
-out:
+       err = cpu_down_maps_locked(cpu, target);
         cpu_maps_update_done();
         return err;
  }
@@ -953,6 +1051,7 @@ void notify_cpu_starting(unsigned int cpu)
         int ret;
  
         rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
+       st->booted_once = true;
         while (st->state < target) {
                 st->state++;
                 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
@@ -1036,6 +1135,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
         ret = cpuhp_up_callbacks(cpu, st, target);
  out:
         cpus_write_unlock();
+       arch_smt_update();
         return ret;
  }
  
@@ -1062,6 +1162,10 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
                 err = -EBUSY;
                 goto out;
         }
+       if (!cpu_smt_allowed(cpu)) {
+               err = -EPERM;
+               goto out;
+       }
  
         err = _cpu_up(cpu, 0, target);
  out:
@@ -1344,7 +1448,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
         [CPUHP_AP_SMPBOOT_THREADS] = {
                 .name                   = "smpboot/threads:online",
                 .startup.single         = smpboot_unpark_threads,
-               .teardown.single        = NULL,
+               .teardown.single        = smpboot_park_threads,
         },
         [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
                 .name                   = "irq/affinity:online",
@@ -1841,6 +1945,9 @@ static ssize_t write_cpuhp_fail(struct device *dev,
         if (ret)
                 return ret;
  
+       if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
+               return -EINVAL;
+
         /*
          * Cannot fail STARTING/DYING callbacks.
          */
@@ -1918,10 +2025,175 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = {
         NULL
  };
  
+#ifdef CONFIG_HOTPLUG_SMT
+
+static const char *smt_states[] = {
+       [CPU_SMT_ENABLED]               = "on",
+       [CPU_SMT_DISABLED]              = "off",
+       [CPU_SMT_FORCE_DISABLED]        = "forceoff",
+       [CPU_SMT_NOT_SUPPORTED]         = "notsupported",
+};
+
+static ssize_t
+show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
+}
+
+static void cpuhp_offline_cpu_device(unsigned int cpu)
+{
+       struct device *dev = get_cpu_device(cpu);
+
+       dev->offline = true;
+       /* Tell user space about the state change */
+       kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
+}
+
+static void cpuhp_online_cpu_device(unsigned int cpu)
+{
+       struct device *dev = get_cpu_device(cpu);
+
+       dev->offline = false;
+       /* Tell user space about the state change */
+       kobject_uevent(&dev->kobj, KOBJ_ONLINE);
+}
+
+int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+{
+       int cpu, ret = 0;
+
+       cpu_maps_update_begin();
+       for_each_online_cpu(cpu) {
+               if (topology_is_primary_thread(cpu))
+                       continue;
+               ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
+               if (ret)
+                       break;
+               /*
+                * As this needs to hold the cpu maps lock it's impossible
+                * to call device_offline() because that ends up calling
+                * cpu_down() which takes cpu maps lock. cpu maps lock
+                * needs to be held as this might race against in kernel
+                * abusers of the hotplug machinery (thermal management).
+                *
+                * So nothing would update device:offline state. That would
+                * leave the sysfs entry stale and prevent onlining after
+                * smt control has been changed to 'off' again. This is
+                * called under the sysfs hotplug lock, so it is properly
+                * serialized against the regular offline usage.
+                */
+               cpuhp_offline_cpu_device(cpu);
+       }
+       if (!ret) {
+               cpu_smt_control = ctrlval;
+               arch_smt_update();
+       }
+       cpu_maps_update_done();
+       return ret;
+}
+
+int cpuhp_smt_enable(void)
+{
+       int cpu, ret = 0;
+
+       cpu_maps_update_begin();
+       cpu_smt_control = CPU_SMT_ENABLED;
+       arch_smt_update();
+       for_each_present_cpu(cpu) {
+               /* Skip online CPUs and CPUs on offline nodes */
+               if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
+                       continue;
+               ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
+               if (ret)
+                       break;
+               /* See comment in cpuhp_smt_disable() */
+               cpuhp_online_cpu_device(cpu);
+       }
+       cpu_maps_update_done();
+       return ret;
+}
+
+static ssize_t
+store_smt_control(struct device *dev, struct device_attribute *attr,
+                 const char *buf, size_t count)
+{
+       int ctrlval, ret;
+
+       if (sysfs_streq(buf, "on"))
+               ctrlval = CPU_SMT_ENABLED;
+       else if (sysfs_streq(buf, "off"))
+               ctrlval = CPU_SMT_DISABLED;
+       else if (sysfs_streq(buf, "forceoff"))
+               ctrlval = CPU_SMT_FORCE_DISABLED;
+       else
+               return -EINVAL;
+
+       if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
+               return -EPERM;
+
+       if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+               return -ENODEV;
+
+       ret = lock_device_hotplug_sysfs();
+       if (ret)
+               return ret;
+
+       if (ctrlval != cpu_smt_control) {
+               switch (ctrlval) {
+               case CPU_SMT_ENABLED:
+                       ret = cpuhp_smt_enable();
+                       break;
+               case CPU_SMT_DISABLED:
+               case CPU_SMT_FORCE_DISABLED:
+                       ret = cpuhp_smt_disable(ctrlval);
+                       break;
+               }
+       }
+
+       unlock_device_hotplug();
+       return ret ? ret : count;
+}
+static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+
+static ssize_t
+show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       bool active = topology_max_smt_threads() > 1;
+
+       return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
+}
+static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+
+static struct attribute *cpuhp_smt_attrs[] = {
+       &dev_attr_control.attr,
+       &dev_attr_active.attr,
+       NULL
+};
+
+static const struct attribute_group cpuhp_smt_attr_group = {
+       .attrs = cpuhp_smt_attrs,
+       .name = "smt",
+       NULL
+};
+
+static int __init cpu_smt_state_init(void)
+{
+       return sysfs_create_group(&cpu_subsys.dev_root->kobj,
+                                 &cpuhp_smt_attr_group);
+}
+
+#else
+static inline int cpu_smt_state_init(void) { return 0; }
+#endif
+
  static int __init cpuhp_sysfs_init(void)
  {
         int cpu, ret;
  
+       ret = cpu_smt_state_init();
+       if (ret)
+               return ret;
+
         ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
                                  &cpuhp_cpu_root_attr_group);
         if (ret)
@@ -2022,7 +2294,53 @@ void __init boot_cpu_init(void)
  /*
   * Must be called _AFTER_ setting up the per_cpu areas
   */
-void __init boot_cpu_state_init(void)
+void __init boot_cpu_hotplug_init(void)
  {
+#ifdef CONFIG_SMP
+       per_cpu_ptr(&cpuhp_state, smp_processor_id())->booted_once = true;
+#endif
         per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
  }
+
+/*
+ * These are used for a global "mitigations=" cmdline option for toggling
+ * optional CPU mitigations.
+ */
+enum cpu_mitigations {
+       CPU_MITIGATIONS_OFF,
+       CPU_MITIGATIONS_AUTO,
+       CPU_MITIGATIONS_AUTO_NOSMT,
+};
+
+static enum cpu_mitigations cpu_mitigations __ro_after_init =
+       CPU_MITIGATIONS_AUTO;
+
+static int __init mitigations_parse_cmdline(char *arg)
+{
+       if (!strcmp(arg, "off"))
+               cpu_mitigations = CPU_MITIGATIONS_OFF;
+       else if (!strcmp(arg, "auto"))
+               cpu_mitigations = CPU_MITIGATIONS_AUTO;
+       else if (!strcmp(arg, "auto,nosmt"))
+               cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+       else
+               pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
+                       arg);
+
+       return 0;
+}
+early_param("mitigations", mitigations_parse_cmdline);
+
+/* mitigations=off */
+bool cpu_mitigations_off(void)
+{
+       return cpu_mitigations == CPU_MITIGATIONS_OFF;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_off);
+
+/* mitigations=auto,nosmt */
+bool cpu_mitigations_auto_nosmt(void)
+{
+       return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);