]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
Merge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)
Pull smp hotplug updates from Thomas Gleixner:
 "This is the next part of the hotplug rework.

   - Convert all notifiers with a priority assigned

   - Convert all CPU_STARTING/DYING notifiers

     The final removal of the STARTING/DYING infrastructure will happen
     when the merge window closes.

  Another 700 hundred line of unpenetrable maze gone :)"

* 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
  timers/core: Correct callback order during CPU hot plug
  leds/trigger/cpu: Move from CPU_STARTING to ONLINE level
  powerpc/numa: Convert to hotplug state machine
  arm/perf: Fix hotplug state machine conversion
  irqchip/armada: Avoid unused function warnings
  ARC/time: Convert to hotplug state machine
  clocksource/atlas7: Convert to hotplug state machine
  clocksource/armada-370-xp: Convert to hotplug state machine
  clocksource/exynos_mct: Convert to hotplug state machine
  clocksource/arm_global_timer: Convert to hotplug state machine
  rcu: Convert rcutree to hotplug state machine
  KVM/arm/arm64/vgic-new: Convert to hotplug state machine
  smp/cfd: Convert core to hotplug state machine
  x86/x2apic: Convert to CPU hotplug state machine
  profile: Convert to hotplug state machine
  timers/core: Convert to hotplug state machine
  hrtimer: Convert to hotplug state machine
  x86/tboot: Convert to hotplug state machine
  arm64/armv8 deprecated: Convert to hotplug state machine
  hwtracing/coresight-etm4x: Convert to hotplug state machine
  ...

23 files changed:
1  2 
arch/arm/mach-mvebu/coherency.c
arch/arm/xen/enlighten.c
arch/arm64/kernel/armv8_deprecated.c
arch/s390/kernel/perf_cpum_sf.c
arch/x86/entry/vdso/vma.c
arch/x86/events/amd/ibs.c
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/rapl.c
arch/x86/events/intel/uncore.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kvm/x86.c
drivers/acpi/processor_driver.c
drivers/irqchip/irq-armada-370-xp.c
drivers/irqchip/irq-bcm2836.c
drivers/irqchip/irq-gic.c
drivers/perf/arm_pmu.c
include/linux/perf_event.h
kernel/events/core.c
kernel/smp.c
kernel/workqueue.c
virt/kvm/kvm_main.c

index e80f0dde218919dab8d7a2b5873a4962755f3ee2,77aaa5243a2075675c59fedbc140421a56c34956..ae2a018b93050fa8171d2164124d85796e801bb6
@@@ -111,20 -111,12 +111,12 @@@ static struct notifier_block mvebu_hwcc
        .notifier_call = mvebu_hwcc_notifier,
  };
  
- static int armada_xp_clear_shared_l2_notifier_func(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
+ static int armada_xp_clear_l2_starting(unsigned int cpu)
  {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
-               armada_xp_clear_shared_l2();
-       return NOTIFY_OK;
+       armada_xp_clear_shared_l2();
+       return 0;
  }
  
- static struct notifier_block armada_xp_clear_shared_l2_notifier = {
-       .notifier_call = armada_xp_clear_shared_l2_notifier_func,
-       .priority = 100,
- };
  static void __init armada_370_coherency_init(struct device_node *np)
  {
        struct resource res;
  
        of_node_put(cpu_config_np);
  
-       register_cpu_notifier(&armada_xp_clear_shared_l2_notifier);
+       cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
+                                 "AP_ARM_MVEBU_COHERENCY",
+                                 armada_xp_clear_l2_starting, NULL);
  exit:
        set_cpu_coherent();
  }
  
  /*
 - * This ioremap hook is used on Armada 375/38x to ensure that PCIe
 - * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
 - * is needed as a workaround for a deadlock issue between the PCIe
 - * interface and the cache controller.
 + * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
 + * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
 + * needed for the HW I/O coherency mechanism to work properly without
 + * deadlock.
   */
  static void __iomem *
 -armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
 -                            unsigned int mtype, void *caller)
 +armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
 +                       unsigned int mtype, void *caller)
  {
 -      struct resource pcie_mem;
 -
 -      mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
 -
 -      if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
 -              mtype = MT_UNCACHED;
 -
 +      mtype = MT_UNCACHED;
        return __arm_ioremap_caller(phys_addr, size, mtype, caller);
  }
  
@@@ -180,8 -179,7 +173,8 @@@ static void __init armada_375_380_coher
        struct device_node *cache_dn;
  
        coherency_cpu_base = of_iomap(np, 0);
 -      arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
 +      arch_ioremap_caller = armada_wa_ioremap_caller;
 +      pci_ioremap_set_mem_type(MT_UNCACHED);
  
        /*
         * We should switch the PL310 to I/O coherency mode only if
diff --combined arch/arm/xen/enlighten.c
index 0bea3d271f6efd28c9b8efb225ca20e68c3c2889,d822e2313950bd2f03b837c7f6ae574d944c3a58..b0b82f5ea33825943fb75066cf1af0d52f17b3c2
  #include <xen/page.h>
  #include <xen/interface/sched.h>
  #include <xen/xen-ops.h>
 -#include <asm/paravirt.h>
  #include <asm/xen/hypervisor.h>
  #include <asm/xen/hypercall.h>
 +#include <asm/xen/xen-ops.h>
  #include <asm/system_misc.h>
 +#include <asm/efi.h>
  #include <linux/interrupt.h>
  #include <linux/irqreturn.h>
  #include <linux/module.h>
  #include <linux/of.h>
 +#include <linux/of_fdt.h>
  #include <linux/of_irq.h>
  #include <linux/of_address.h>
  #include <linux/cpuidle.h>
@@@ -32,7 -30,6 +32,7 @@@
  #include <linux/time64.h>
  #include <linux/timekeeping.h>
  #include <linux/timekeeper_internal.h>
 +#include <linux/acpi.h>
  
  #include <linux/mm.h>
  
@@@ -49,16 -46,14 +49,16 @@@ struct shared_info *HYPERVISOR_shared_i
  DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
  static struct vcpu_info __percpu *xen_vcpu_info;
  
 +/* Linux <-> Xen vCPU id mapping */
 +DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
 +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 +
  /* These are unused until we support booting "pre-ballooned" */
  unsigned long xen_released_pages;
  struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
  
  static __read_mostly unsigned int xen_events_irq;
  
 -static __initdata struct device_node *xen_node;
 -
  int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
                               unsigned long addr,
                               xen_pfn_t *gfn, int nr,
@@@ -89,6 -84,19 +89,6 @@@ int xen_unmap_domain_gfn_range(struct v
  }
  EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
  
 -static unsigned long long xen_stolen_accounting(int cpu)
 -{
 -      struct vcpu_runstate_info state;
 -
 -      BUG_ON(cpu != smp_processor_id());
 -
 -      xen_get_runstate_snapshot(&state);
 -
 -      WARN_ON(state.state != RUNSTATE_running);
 -
 -      return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
 -}
 -
  static void xen_read_wallclock(struct timespec64 *ts)
  {
        u32 version;
@@@ -153,12 -161,11 +153,11 @@@ static struct notifier_block xen_pvcloc
        .notifier_call = xen_pvclock_gtod_notify,
  };
  
- static void xen_percpu_init(void)
+ static int xen_starting_cpu(unsigned int cpu)
  {
        struct vcpu_register_vcpu_info info;
        struct vcpu_info *vcpup;
        int err;
-       int cpu = get_cpu();
  
        /* 
         * VCPUOP_register_vcpu_info cannot be called twice for the same
        pr_info("Xen: initializing cpu%d\n", cpu);
        vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
  
 +      /* Direct vCPU id mapping for ARM guests. */
 +      per_cpu(xen_vcpu_id, cpu) = cpu;
 +
        info.mfn = virt_to_gfn(vcpup);
        info.offset = xen_offset_in_page(vcpup);
  
 -      err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
 +      err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
 +                               &info);
        BUG_ON(err);
        per_cpu(xen_vcpu, cpu) = vcpup;
  
  
  after_register_vcpu_info:
        enable_percpu_irq(xen_events_irq, 0);
-       put_cpu();
+       return 0;
+ }
+ static int xen_dying_cpu(unsigned int cpu)
+ {
+       disable_percpu_irq(xen_events_irq);
+       return 0;
  }
  
  static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
@@@ -205,74 -214,12 +210,52 @@@ static void xen_power_off(void
        BUG_ON(rc);
  }
  
- static int xen_cpu_notification(struct notifier_block *self,
-                               unsigned long action,
-                               void *hcpu)
- {
-       switch (action) {
-       case CPU_STARTING:
-               xen_percpu_init();
-               break;
-       case CPU_DYING:
-               disable_percpu_irq(xen_events_irq);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
- }
- static struct notifier_block xen_cpu_notifier = {
-       .notifier_call = xen_cpu_notification,
- };
  static irqreturn_t xen_arm_callback(int irq, void *arg)
  {
        xen_hvm_evtchn_do_upcall();
        return IRQ_HANDLED;
  }
  
 +static __initdata struct {
 +      const char *compat;
 +      const char *prefix;
 +      const char *version;
 +      bool found;
 +} hyper_node = {"xen,xen", "xen,xen-", NULL, false};
 +
 +static int __init fdt_find_hyper_node(unsigned long node, const char *uname,
 +                                    int depth, void *data)
 +{
 +      const void *s = NULL;
 +      int len;
 +
 +      if (depth != 1 || strcmp(uname, "hypervisor") != 0)
 +              return 0;
 +
 +      if (of_flat_dt_is_compatible(node, hyper_node.compat))
 +              hyper_node.found = true;
 +
 +      s = of_get_flat_dt_prop(node, "compatible", &len);
 +      if (strlen(hyper_node.prefix) + 3  < len &&
 +          !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix)))
 +              hyper_node.version = s + strlen(hyper_node.prefix);
 +
 +      /*
 +       * Check if Xen supports EFI by checking whether there is the
 +       * "/hypervisor/uefi" node in DT. If so, runtime services are available
 +       * through proxy functions (e.g. in case of Xen dom0 EFI implementation
 +       * they call special hypercall which executes relevant EFI functions)
 +       * and that is why they are always enabled.
 +       */
 +      if (IS_ENABLED(CONFIG_XEN_EFI)) {
 +              if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) &&
 +                  !efi_runtime_disabled())
 +                      set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 +      }
 +
 +      return 0;
 +}
 +
  /*
   * see Documentation/devicetree/bindings/arm/xen.txt for the
   * documentation of the Xen Device Tree format.
  #define GRANT_TABLE_PHYSADDR 0
  void __init xen_early_init(void)
  {
 -      int len;
 -      const char *s = NULL;
 -      const char *version = NULL;
 -      const char *xen_prefix = "xen,xen-";
 -
 -      xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
 -      if (!xen_node) {
 +      of_scan_flat_dt(fdt_find_hyper_node, NULL);
 +      if (!hyper_node.found) {
                pr_debug("No Xen support\n");
                return;
        }
 -      s = of_get_property(xen_node, "compatible", &len);
 -      if (strlen(xen_prefix) + 3  < len &&
 -                      !strncmp(xen_prefix, s, strlen(xen_prefix)))
 -              version = s + strlen(xen_prefix);
 -      if (version == NULL) {
 +
 +      if (hyper_node.version == NULL) {
                pr_debug("Xen version not found\n");
                return;
        }
  
 -      pr_info("Xen %s support found\n", version);
 +      pr_info("Xen %s support found\n", hyper_node.version);
  
        xen_domain_type = XEN_HVM_DOMAIN;
  
                add_preferred_console("hvc", 0, NULL);
  }
  
 +static void __init xen_acpi_guest_init(void)
 +{
 +#ifdef CONFIG_ACPI
 +      struct xen_hvm_param a;
 +      int interrupt, trigger, polarity;
 +
 +      a.domid = DOMID_SELF;
 +      a.index = HVM_PARAM_CALLBACK_IRQ;
 +
 +      if (HYPERVISOR_hvm_op(HVMOP_get_param, &a)
 +          || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) {
 +              xen_events_irq = 0;
 +              return;
 +      }
 +
 +      interrupt = a.value & 0xff;
 +      trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE
 +                                       : ACPI_LEVEL_SENSITIVE;
 +      polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW
 +                                        : ACPI_ACTIVE_HIGH;
 +      xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity);
 +#endif
 +}
 +
 +static void __init xen_dt_guest_init(void)
 +{
 +      struct device_node *xen_node;
 +
 +      xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
 +      if (!xen_node) {
 +              pr_err("Xen support was detected before, but it has disappeared\n");
 +              return;
 +      }
 +
 +      xen_events_irq = irq_of_parse_and_map(xen_node, 0);
 +}
 +
  static int __init xen_guest_init(void)
  {
        struct xen_add_to_physmap xatp;
        struct shared_info *shared_info_page = NULL;
 -      struct resource res;
 -      phys_addr_t grant_frames;
  
        if (!xen_domain())
                return 0;
  
 -      if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
 -              pr_err("Xen grant table base address not found\n");
 -              return -ENODEV;
 -      }
 -      grant_frames = res.start;
 +      if (!acpi_disabled)
 +              xen_acpi_guest_init();
 +      else
 +              xen_dt_guest_init();
  
 -      xen_events_irq = irq_of_parse_and_map(xen_node, 0);
        if (!xen_events_irq) {
                pr_err("Xen event channel interrupt not found\n");
                return -ENODEV;
        }
  
 +      /*
 +       * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI
 +       * parameters are found. Force enable runtime services.
 +       */
 +      if (efi_enabled(EFI_RUNTIME_SERVICES))
 +              xen_efi_runtime_setup();
 +
        shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
  
        if (!shared_info_page) {
        if (xen_vcpu_info == NULL)
                return -ENOMEM;
  
 -      if (gnttab_setup_auto_xlat_frames(grant_frames)) {
 +      /* Direct vCPU id mapping for ARM guests. */
 +      per_cpu(xen_vcpu_id, 0) = 0;
 +
 +      xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
 +      if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
 +                                        &xen_auto_xlat_grant_frames.vaddr,
 +                                        xen_auto_xlat_grant_frames.count)) {
                free_percpu(xen_vcpu_info);
                return -ENOMEM;
        }
                return -EINVAL;
        }
  
-       xen_percpu_init();
-       register_cpu_notifier(&xen_cpu_notifier);
 -      pv_time_ops.steal_clock = xen_stolen_accounting;
 -      static_key_slow_inc(&paravirt_steal_enabled);
 +      xen_time_setup_guest();
 +
        if (xen_initial_domain())
                pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
  
-       return 0;
+       return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING,
+                                "AP_ARM_XEN_STARTING", xen_starting_cpu,
+                                xen_dying_cpu);
  }
  early_initcall(xen_guest_init);
  
@@@ -477,5 -384,4 +458,5 @@@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op)
  EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
  EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
  EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
 +EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
  EXPORT_SYMBOL_GPL(privcmd_call);
index 5f72475e2e3b2f6ab213e81d2b123424d9323014,9668c230674a330db7563b72e3e1389497f30536..42ffdb54e162d64164ab9f515d1ce21a379fb3d7
@@@ -121,7 -121,7 +121,7 @@@ static int run_all_cpu_set_hw_mode(stru
   *  0                 - If all the hooks ran successfully.
   * -EINVAL    - At least one hook is not supported by the CPU.
   */
- static int run_all_insn_set_hw_mode(unsigned long cpu)
+ static int run_all_insn_set_hw_mode(unsigned int cpu)
  {
        int rc = 0;
        unsigned long flags;
        list_for_each_entry(insn, &insn_emulation, node) {
                bool enable = (insn->current_mode == INSN_HW);
                if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) {
-                       pr_warn("CPU[%ld] cannot support the emulation of %s",
+                       pr_warn("CPU[%u] cannot support the emulation of %s",
                                cpu, insn->ops->name);
                        rc = -EINVAL;
                }
@@@ -316,6 -316,28 +316,6 @@@ static void __init register_insn_emulat
   */
  #define TYPE_SWPB (1 << 22)
  
 -/*
 - * Set up process info to signal segmentation fault - called on access error.
 - */
 -static void set_segfault(struct pt_regs *regs, unsigned long addr)
 -{
 -      siginfo_t info;
 -
 -      down_read(&current->mm->mmap_sem);
 -      if (find_vma(current->mm, addr) == NULL)
 -              info.si_code = SEGV_MAPERR;
 -      else
 -              info.si_code = SEGV_ACCERR;
 -      up_read(&current->mm->mmap_sem);
 -
 -      info.si_signo = SIGSEGV;
 -      info.si_errno = 0;
 -      info.si_addr  = (void *) instruction_pointer(regs);
 -
 -      pr_debug("SWP{B} emulation: access caused memory abort!\n");
 -      arm64_notify_die("Illegal memory access", regs, &info, 0);
 -}
 -
  static int emulate_swpX(unsigned int address, unsigned int *data,
                        unsigned int type)
  {
        return res;
  }
  
 +#define       ARM_OPCODE_CONDITION_UNCOND     0xf
 +
 +static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
 +{
 +      u32 cc_bits  = opcode >> 28;
 +
 +      if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
 +              if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
 +                      return ARM_OPCODE_CONDTEST_PASS;
 +              else
 +                      return ARM_OPCODE_CONDTEST_FAIL;
 +      }
 +      return ARM_OPCODE_CONDTEST_UNCOND;
 +}
 +
  /*
   * swp_handler logs the id of calling process, dissects the instruction, sanity
   * checks the memory location, calls emulate_swpX for the actual operation and
@@@ -373,7 -380,7 +373,7 @@@ static int swp_handler(struct pt_regs *
  
        type = instr & TYPE_SWPB;
  
 -      switch (arm_check_condition(instr, regs->pstate)) {
 +      switch (aarch32_check_condition(instr, regs->pstate)) {
        case ARM_OPCODE_CONDTEST_PASS:
                break;
        case ARM_OPCODE_CONDTEST_FAIL:
@@@ -423,8 -430,7 +423,8 @@@ ret
        return 0;
  
  fault:
 -      set_segfault(regs, address);
 +      pr_debug("SWP{B} emulation: access caused memory abort!\n");
 +      arm64_notify_segfault(regs, address);
  
        return 0;
  }
@@@ -455,7 -461,7 +455,7 @@@ static int cp15barrier_handler(struct p
  {
        perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
  
 -      switch (arm_check_condition(instr, regs->pstate)) {
 +      switch (aarch32_check_condition(instr, regs->pstate)) {
        case ARM_OPCODE_CONDTEST_PASS:
                break;
        case ARM_OPCODE_CONDTEST_FAIL:
@@@ -611,20 -617,6 +611,6 @@@ static struct insn_emulation_ops setend
        .set_hw_mode = setend_set_hw_mode,
  };
  
- static int insn_cpu_hotplug_notify(struct notifier_block *b,
-                             unsigned long action, void *hcpu)
- {
-       int rc = 0;
-       if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING)
-               rc = run_all_insn_set_hw_mode((unsigned long)hcpu);
-       return notifier_from_errno(rc);
- }
- static struct notifier_block insn_cpu_hotplug_notifier = {
-       .notifier_call = insn_cpu_hotplug_notify,
- };
  /*
   * Invoked as late_initcall, since not needed before init spawned.
   */
@@@ -643,7 -635,9 +629,9 @@@ static int __init armv8_deprecated_init
                        pr_info("setend instruction emulation is not supported on the system");
        }
  
-       register_cpu_notifier(&insn_cpu_hotplug_notifier);
+       cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
+                                 "AP_ARM64_ISNDEP_STARTING",
+                                 run_all_insn_set_hw_mode, NULL);
        register_insn_emulation_sysctl(ctl_abi);
  
        return 0;
index 53acf2d76fa9809527e9d0869e4a5db8b7d48c1f,f4a4c118f8b403f7f3ece68f95750bf52ec7e891..fcc634c1479a1ed1d7cd05592d2b8c0ae59a408d
@@@ -601,12 -601,17 +601,12 @@@ static void release_pmc_hardware(void
  
        irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
        on_each_cpu(setup_pmc_cpu, &flags, 1);
 -      perf_release_sampling();
  }
  
  static int reserve_pmc_hardware(void)
  {
        int flags = PMC_INIT;
 -      int err;
  
 -      err = perf_reserve_sampling();
 -      if (err)
 -              return err;
        on_each_cpu(setup_pmc_cpu, &flags, 1);
        if (flags & PMC_FAILURE) {
                release_pmc_hardware();
@@@ -974,15 -979,12 +974,15 @@@ static int perf_push_sample(struct perf
        struct pt_regs regs;
        struct perf_sf_sde_regs *sde_regs;
        struct perf_sample_data data;
 -      struct perf_raw_record raw;
 +      struct perf_raw_record raw = {
 +              .frag = {
 +                      .size = sfr->size,
 +                      .data = sfr,
 +              },
 +      };
  
        /* Setup perf sample */
        perf_sample_data_init(&data, 0, event->hw.last_period);
 -      raw.size = sfr->size;
 -      raw.data = sfr;
        data.raw = &raw;
  
        /* Setup pt_regs to look like an CPU-measurement external interrupt
@@@ -1504,37 -1506,28 +1504,28 @@@ static void cpumf_measurement_alert(str
                sf_disable();
        }
  }
- static int cpumf_pmu_notifier(struct notifier_block *self,
-                             unsigned long action, void *hcpu)
+ static int cpusf_pmu_setup(unsigned int cpu, int flags)
  {
-       int flags;
        /* Ignore the notification if no events are scheduled on the PMU.
         * This might be racy...
         */
        if (!atomic_read(&num_events))
-               return NOTIFY_OK;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
-               flags = PMC_INIT;
-               local_irq_disable();
-               setup_pmc_cpu(&flags);
-               local_irq_enable();
-               break;
-       case CPU_DOWN_PREPARE:
-               flags = PMC_RELEASE;
-               local_irq_disable();
-               setup_pmc_cpu(&flags);
-               local_irq_enable();
-               break;
-       default:
-               break;
-       }
+               return 0;
  
-       return NOTIFY_OK;
+       local_irq_disable();
+       setup_pmc_cpu(&flags);
+       local_irq_enable();
+       return 0;
+ }
+ static int s390_pmu_sf_online_cpu(unsigned int cpu)
+ {
+       return cpusf_pmu_setup(cpu, PMC_INIT);
+ }
+ static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+ {
+       return cpusf_pmu_setup(cpu, PMC_RELEASE);
  }
  
  static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
@@@ -1634,7 -1627,9 +1625,9 @@@ static int __init init_cpum_sampling_pm
                                        cpumf_measurement_alert);
                goto out;
        }
-       perf_cpu_notifier(cpumf_pmu_notifier);
+       cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "AP_PERF_S390_SF_ONLINE",
+                         s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
  out:
        return err;
  }
index 3329844e3c43913d278af48b4a93f5d1e1ce2ccc,25b0368de7f6752e4758f664417d8c7b8e52259e..f840766659a8f4fc2c68d778c7b495ad9b40cf98
@@@ -12,7 -12,6 +12,7 @@@
  #include <linux/random.h>
  #include <linux/elf.h>
  #include <linux/cpu.h>
 +#include <linux/ptrace.h>
  #include <asm/pvclock.h>
  #include <asm/vgtod.h>
  #include <asm/proto.h>
@@@ -98,40 -97,10 +98,40 @@@ static int vdso_fault(const struct vm_s
        return 0;
  }
  
 -static const struct vm_special_mapping text_mapping = {
 -      .name = "[vdso]",
 -      .fault = vdso_fault,
 -};
 +static void vdso_fix_landing(const struct vdso_image *image,
 +              struct vm_area_struct *new_vma)
 +{
 +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 +      if (in_ia32_syscall() && image == &vdso_image_32) {
 +              struct pt_regs *regs = current_pt_regs();
 +              unsigned long vdso_land = image->sym_int80_landing_pad;
 +              unsigned long old_land_addr = vdso_land +
 +                      (unsigned long)current->mm->context.vdso;
 +
 +              /* Fixing userspace landing - look at do_fast_syscall_32 */
 +              if (regs->ip == old_land_addr)
 +                      regs->ip = new_vma->vm_start + vdso_land;
 +      }
 +#endif
 +}
 +
 +static int vdso_mremap(const struct vm_special_mapping *sm,
 +              struct vm_area_struct *new_vma)
 +{
 +      unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
 +      const struct vdso_image *image = current->mm->context.vdso_image;
 +
 +      if (image->size != new_size)
 +              return -EINVAL;
 +
 +      if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
 +              return -EFAULT;
 +
 +      vdso_fix_landing(image, new_vma);
 +      current->mm->context.vdso = (void __user *)new_vma->vm_start;
 +
 +      return 0;
 +}
  
  static int vvar_fault(const struct vm_special_mapping *sm,
                      struct vm_area_struct *vma, struct vm_fault *vmf)
@@@ -182,12 -151,6 +182,12 @@@ static int map_vdso(const struct vdso_i
        struct vm_area_struct *vma;
        unsigned long addr, text_start;
        int ret = 0;
 +
 +      static const struct vm_special_mapping vdso_mapping = {
 +              .name = "[vdso]",
 +              .fault = vdso_fault,
 +              .mremap = vdso_mremap,
 +      };
        static const struct vm_special_mapping vvar_mapping = {
                .name = "[vvar]",
                .fault = vvar_fault,
                                       image->size,
                                       VM_READ|VM_EXEC|
                                       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
 -                                     &text_mapping);
 +                                     &vdso_mapping);
  
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
@@@ -331,15 -294,9 +331,9 @@@ static void vgetcpu_cpu_init(void *arg
        write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
  }
  
- static int
- vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
+ static int vgetcpu_online(unsigned int cpu)
  {
-       long cpu = (long)arg;
-       if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
-               smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
-       return NOTIFY_DONE;
+       return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
  }
  
  static int __init init_vdso(void)
        init_vdso_image(&vdso_image_x32);
  #endif
  
-       cpu_notifier_register_begin();
-       on_each_cpu(vgetcpu_cpu_init, NULL, 1);
        /* notifier priority > KVM */
-       __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
-       cpu_notifier_register_done();
-       return 0;
+       return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
+                                "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL);
  }
  subsys_initcall(init_vdso);
  #endif /* CONFIG_X86_64 */
index 72dea2f40fc4d43bb61b486f8f784c5a02aa6fe7,1a59a181582b2638b70d36a64ad383f3e302563c..155ea5324ae03ffd2f451fe1a92e464ae939486f
@@@ -655,12 -655,8 +655,12 @@@ fail
        }
  
        if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 -              raw.size = sizeof(u32) + ibs_data.size;
 -              raw.data = ibs_data.data;
 +              raw = (struct perf_raw_record){
 +                      .frag = {
 +                              .size = sizeof(u32) + ibs_data.size,
 +                              .data = ibs_data.data,
 +                      },
 +              };
                data.raw = &raw;
        }
  
@@@ -725,13 -721,10 +725,10 @@@ static __init int perf_ibs_pmu_init(str
        return ret;
  }
  
- static __init int perf_event_ibs_init(void)
+ static __init void perf_event_ibs_init(void)
  {
        struct attribute **attr = ibs_op_format_attrs;
  
-       if (!ibs_caps)
-               return -ENODEV; /* ibs not supported by the cpu */
        perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
  
        if (ibs_caps & IBS_CAPS_OPCNT) {
  
        register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
        pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-       return 0;
  }
  
  #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
  
- static __init int perf_event_ibs_init(void) { return 0; }
+ static __init void perf_event_ibs_init(void) { }
  
  #endif
  
@@@ -925,7 -916,7 +920,7 @@@ static inline int get_ibs_lvt_offset(vo
        return val & IBSCTL_LVT_OFFSET_MASK;
  }
  
- static void setup_APIC_ibs(void *dummy)
+ static void setup_APIC_ibs(void)
  {
        int offset;
  
@@@ -940,7 -931,7 +935,7 @@@ failed
                smp_processor_id());
  }
  
- static void clear_APIC_ibs(void *dummy)
+ static void clear_APIC_ibs(void)
  {
        int offset;
  
                setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
  }
  
+ static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
+ {
+       setup_APIC_ibs();
+       return 0;
+ }
  #ifdef CONFIG_PM
  
  static int perf_ibs_suspend(void)
  {
-       clear_APIC_ibs(NULL);
+       clear_APIC_ibs();
        return 0;
  }
  
  static void perf_ibs_resume(void)
  {
        ibs_eilvt_setup();
-       setup_APIC_ibs(NULL);
+       setup_APIC_ibs();
  }
  
  static struct syscore_ops perf_ibs_syscore_ops = {
@@@ -979,27 -976,15 +980,15 @@@ static inline void perf_ibs_pm_init(voi
  
  #endif
  
- static int
- perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+ static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
  {
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_STARTING:
-               setup_APIC_ibs(NULL);
-               break;
-       case CPU_DYING:
-               clear_APIC_ibs(NULL);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
+       clear_APIC_ibs();
+       return 0;
  }
  
  static __init int amd_ibs_init(void)
  {
        u32 caps;
-       int ret = -EINVAL;
  
        caps = __get_ibs_caps();
        if (!caps)
        ibs_eilvt_setup();
  
        if (!ibs_eilvt_valid())
-               goto out;
+               return -EINVAL;
  
        perf_ibs_pm_init();
-       cpu_notifier_register_begin();
        ibs_caps = caps;
        /* make ibs_caps visible to other cpus: */
        smp_mb();
-       smp_call_function(setup_APIC_ibs, NULL, 1);
-       __perf_cpu_notifier(perf_ibs_cpu_notifier);
-       cpu_notifier_register_done();
+       /*
+        * x86_pmu_amd_ibs_starting_cpu will be called from core on
+        * all online cpus.
+        */
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
+                         "AP_PERF_X86_AMD_IBS_STARTING",
+                         x86_pmu_amd_ibs_starting_cpu,
+                         x86_pmu_amd_ibs_dying_cpu);
  
-       ret = perf_event_ibs_init();
- out:
-       if (ret)
-               pr_err("Failed to setup IBS, %d\n", ret);
-       return ret;
+       perf_event_ibs_init();
+       return 0;
  }
  
  /* Since we need the pci subsystem to init ibs we can't do this earlier: */
diff --combined arch/x86/events/core.c
index dfebbde2a4cc252a7bc8ab2cd5f84c679df9336c,4ce3745f26f5dd370666a703f36f1db37cb123f2..c17f0de5fd391c3ce000df6428802521384d179f
@@@ -263,7 -263,7 +263,7 @@@ static bool check_hw_exists(void
  
  msr_fail:
        pr_cont("Broken PMU hardware detected, using software events only.\n");
 -      pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 +      printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
                boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
                reg, val_new);
  
@@@ -1477,49 -1477,49 +1477,49 @@@ NOKPROBE_SYMBOL(perf_event_nmi_handler)
  struct event_constraint emptyconstraint;
  struct event_constraint unconstrained;
  
- static int
- x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+ static int x86_pmu_prepare_cpu(unsigned int cpu)
  {
-       unsigned int cpu = (long)hcpu;
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       int i, ret = NOTIFY_OK;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
-                       cpuc->kfree_on_online[i] = NULL;
-               if (x86_pmu.cpu_prepare)
-                       ret = x86_pmu.cpu_prepare(cpu);
-               break;
-       case CPU_STARTING:
-               if (x86_pmu.cpu_starting)
-                       x86_pmu.cpu_starting(cpu);
-               break;
+       int i;
  
-       case CPU_ONLINE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
-                       kfree(cpuc->kfree_on_online[i]);
-                       cpuc->kfree_on_online[i] = NULL;
-               }
-               break;
+       for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+               cpuc->kfree_on_online[i] = NULL;
+       if (x86_pmu.cpu_prepare)
+               return x86_pmu.cpu_prepare(cpu);
+       return 0;
+ }
  
-       case CPU_DYING:
-               if (x86_pmu.cpu_dying)
-                       x86_pmu.cpu_dying(cpu);
-               break;
+ static int x86_pmu_dead_cpu(unsigned int cpu)
+ {
+       if (x86_pmu.cpu_dead)
+               x86_pmu.cpu_dead(cpu);
+       return 0;
+ }
  
-       case CPU_UP_CANCELED:
-       case CPU_DEAD:
-               if (x86_pmu.cpu_dead)
-                       x86_pmu.cpu_dead(cpu);
-               break;
+ static int x86_pmu_online_cpu(unsigned int cpu)
+ {
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       int i;
  
-       default:
-               break;
+       for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+               kfree(cpuc->kfree_on_online[i]);
+               cpuc->kfree_on_online[i] = NULL;
        }
+       return 0;
+ }
  
-       return ret;
+ static int x86_pmu_starting_cpu(unsigned int cpu)
+ {
+       if (x86_pmu.cpu_starting)
+               x86_pmu.cpu_starting(cpu);
+       return 0;
+ }
+ static int x86_pmu_dying_cpu(unsigned int cpu)
+ {
+       if (x86_pmu.cpu_dying)
+               x86_pmu.cpu_dying(cpu);
+       return 0;
  }
  
  static void __init pmu_check_apic(void)
@@@ -1622,29 -1622,6 +1622,29 @@@ ssize_t events_sysfs_show(struct devic
  }
  EXPORT_SYMBOL_GPL(events_sysfs_show);
  
 +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
 +                        char *page)
 +{
 +      struct perf_pmu_events_ht_attr *pmu_attr =
 +              container_of(attr, struct perf_pmu_events_ht_attr, attr);
 +
 +      /*
 +       * Report conditional events depending on Hyper-Threading.
 +       *
 +       * This is overly conservative as usually the HT special
 +       * handling is not needed if the other CPU thread is idle.
 +       *
 +       * Note this does not (and cannot) handle the case when thread
 +       * siblings are invisible, for example with virtualization
 +       * if they are owned by some other guest.  The user tool
 +       * has to re-read when a thread sibling gets onlined later.
 +       */
 +      return sprintf(page, "%s",
 +                      topology_max_smt_threads() > 1 ?
 +                      pmu_attr->event_str_ht :
 +                      pmu_attr->event_str_noht);
 +}
 +
  EVENT_ATTR(cpu-cycles,                        CPU_CYCLES              );
  EVENT_ATTR(instructions,              INSTRUCTIONS            );
  EVENT_ATTR(cache-references,          CACHE_REFERENCES        );
@@@ -1787,10 -1764,39 +1787,39 @@@ static int __init init_hw_perf_events(v
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
  
-       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-       perf_cpu_notifier(x86_pmu_notifier);
+       /*
+        * Install callbacks. Core will call them for each online
+        * cpu.
+        */
+       err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE",
+                               x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
+       if (err)
+               return err;
+       err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
+                               "AP_PERF_X86_STARTING", x86_pmu_starting_cpu,
+                               x86_pmu_dying_cpu);
+       if (err)
+               goto out;
+       err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE",
+                               x86_pmu_online_cpu, NULL);
+       if (err)
+               goto out1;
+       err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+       if (err)
+               goto out2;
  
        return 0;
+ out2:
+       cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
+ out1:
+       cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
+ out:
+       cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
+       return err;
  }
  early_initcall(init_hw_perf_events);
  
index 0974ba11e9544a169a5071806f409aed9682291b,6a1441be485b5a21e5c8168283adc25c5d8254fe..2cbde2f449aa8ced63adf14b14f9ceb3d464068c
@@@ -16,7 -16,6 +16,7 @@@
  
  #include <asm/cpufeature.h>
  #include <asm/hardirq.h>
 +#include <asm/intel-family.h>
  #include <asm/apic.h>
  
  #include "../perf_event.h"
@@@ -186,7 -185,7 +186,7 @@@ static struct event_constraint intel_sl
        EVENT_CONSTRAINT_END
  };
  
 -struct event_constraint intel_skl_event_constraints[] = {
 +static struct event_constraint intel_skl_event_constraints[] = {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
        FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
  };
  
  static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
 -      INTEL_UEVENT_EXTRA_REG(0x01b7,
 -                             MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
 -      INTEL_UEVENT_EXTRA_REG(0x02b7,
 -                             MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
 +      INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
 +      INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
        EVENT_EXTRA_END
  };
  
@@@ -242,51 -243,14 +242,51 @@@ EVENT_ATTR_STR(mem-loads,       mem_ld_nhm,     "
  EVENT_ATTR_STR(mem-loads,     mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
  EVENT_ATTR_STR(mem-stores,    mem_st_snb,     "event=0xcd,umask=0x2");
  
 -struct attribute *nhm_events_attrs[] = {
 +static struct attribute *nhm_events_attrs[] = {
        EVENT_PTR(mem_ld_nhm),
        NULL,
  };
  
 -struct attribute *snb_events_attrs[] = {
 +/*
 + * topdown events for Intel Core CPUs.
 + *
 + * The events are all in slots, which is a free slot in a 4 wide
 + * pipeline. Some events are already reported in slots, for cycle
 + * events we multiply by the pipeline width (4).
 + *
 + * With Hyper Threading on, topdown metrics are either summed or averaged
 + * between the threads of a core: (count_t0 + count_t1).
 + *
 + * For the average case the metric is always scaled to pipeline width,
 + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
 + */
 +
 +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
 +      "event=0x3c,umask=0x0",                 /* cpu_clk_unhalted.thread */
 +      "event=0x3c,umask=0x0,any=1");          /* cpu_clk_unhalted.thread_any */
 +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
 +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
 +      "event=0xe,umask=0x1");                 /* uops_issued.any */
 +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
 +      "event=0xc2,umask=0x2");                /* uops_retired.retire_slots */
 +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
 +      "event=0x9c,umask=0x1");                /* idq_uops_not_delivered_core */
 +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
 +      "event=0xd,umask=0x3,cmask=1",          /* int_misc.recovery_cycles */
 +      "event=0xd,umask=0x3,cmask=1,any=1");   /* int_misc.recovery_cycles_any */
 +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
 +      "4", "2");
 +
 +static struct attribute *snb_events_attrs[] = {
        EVENT_PTR(mem_ld_snb),
        EVENT_PTR(mem_st_snb),
 +      EVENT_PTR(td_slots_issued),
 +      EVENT_PTR(td_slots_retired),
 +      EVENT_PTR(td_fetch_bubbles),
 +      EVENT_PTR(td_total_slots),
 +      EVENT_PTR(td_total_slots_scale),
 +      EVENT_PTR(td_recovery_bubbles),
 +      EVENT_PTR(td_recovery_bubbles_scale),
        NULL,
  };
  
@@@ -316,7 -280,7 +316,7 @@@ static struct event_constraint intel_hs
        EVENT_CONSTRAINT_END
  };
  
 -struct event_constraint intel_bdw_event_constraints[] = {
 +static struct event_constraint intel_bdw_event_constraints[] = {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
        FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
        FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
@@@ -1397,29 -1361,6 +1397,29 @@@ static __initconst const u64 atom_hw_ca
   },
  };
  
 +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
 +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
 +/* no_alloc_cycles.not_delivered */
 +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
 +             "event=0xca,umask=0x50");
 +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
 +/* uops_retired.all */
 +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
 +             "event=0xc2,umask=0x10");
 +/* uops_retired.all */
 +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
 +             "event=0xc2,umask=0x10");
 +
 +static struct attribute *slm_events_attrs[] = {
 +      EVENT_PTR(td_total_slots_slm),
 +      EVENT_PTR(td_total_slots_scale_slm),
 +      EVENT_PTR(td_fetch_bubbles_slm),
 +      EVENT_PTR(td_fetch_bubbles_scale_slm),
 +      EVENT_PTR(td_slots_issued_slm),
 +      EVENT_PTR(td_slots_retired_slm),
 +      NULL
 +};
 +
  static struct extra_reg intel_slm_extra_regs[] __read_mostly =
  {
        /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@@ -3109,7 -3050,7 +3109,7 @@@ static int intel_pmu_cpu_prepare(int cp
                cpuc->excl_thread_id = 0;
        }
  
-       return NOTIFY_OK;
+       return 0;
  
  err_constraint_list:
        kfree(cpuc->constraint_list);
@@@ -3120,7 -3061,7 +3120,7 @@@ err_shared_regs
        cpuc->shared_regs = NULL;
  
  err:
-       return NOTIFY_BAD;
+       return -ENOMEM;
  }
  
  static void intel_pmu_cpu_starting(int cpu)
@@@ -3349,11 -3290,11 +3349,11 @@@ static int intel_snb_pebs_broken(int cp
        u32 rev = UINT_MAX; /* default to broken for unknown models */
  
        switch (cpu_data(cpu).x86_model) {
 -      case 42: /* SNB */
 +      case INTEL_FAM6_SANDYBRIDGE:
                rev = 0x28;
                break;
  
 -      case 45: /* SNB-EP */
 +      case INTEL_FAM6_SANDYBRIDGE_X:
                switch (cpu_data(cpu).x86_mask) {
                case 6: rev = 0x618; break;
                case 7: rev = 0x70c; break;
@@@ -3390,13 -3331,6 +3390,13 @@@ static void intel_snb_check_microcode(v
        }
  }
  
 +static bool is_lbr_from(unsigned long msr)
 +{
 +      unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
 +
 +      return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
 +}
 +
  /*
   * Under certain circumstances, access certain MSR may cause #GP.
   * The function tests if the input MSR can be safely accessed.
@@@ -3417,24 -3351,13 +3417,24 @@@ static bool check_msr(unsigned long msr
         * Only change the bits which can be updated by wrmsrl.
         */
        val_tmp = val_old ^ mask;
 +
 +      if (is_lbr_from(msr))
 +              val_tmp = lbr_from_signext_quirk_wr(val_tmp);
 +
        if (wrmsrl_safe(msr, val_tmp) ||
            rdmsrl_safe(msr, &val_new))
                return false;
  
 +      /*
 +       * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
 +       * should equal rdmsrl()'s even with the quirk.
 +       */
        if (val_new != val_tmp)
                return false;
  
 +      if (is_lbr_from(msr))
 +              val_old = lbr_from_signext_quirk_wr(val_old);
 +
        /* Here it's sure that the MSR can be safely accessed.
         * Restore the old value and return.
         */
@@@ -3543,13 -3466,6 +3543,13 @@@ static struct attribute *hsw_events_att
        EVENT_PTR(cycles_ct),
        EVENT_PTR(mem_ld_hsw),
        EVENT_PTR(mem_st_hsw),
 +      EVENT_PTR(td_slots_issued),
 +      EVENT_PTR(td_slots_retired),
 +      EVENT_PTR(td_fetch_bubbles),
 +      EVENT_PTR(td_total_slots),
 +      EVENT_PTR(td_total_slots_scale),
 +      EVENT_PTR(td_recovery_bubbles),
 +      EVENT_PTR(td_recovery_bubbles_scale),
        NULL
  };
  
@@@ -3621,15 -3537,15 +3621,15 @@@ __init int intel_pmu_init(void
         * Install the hw-cache-events table:
         */
        switch (boot_cpu_data.x86_model) {
 -      case 14: /* 65nm Core "Yonah" */
 +      case INTEL_FAM6_CORE_YONAH:
                pr_cont("Core events, ");
                break;
  
 -      case 15: /* 65nm Core2 "Merom"          */
 +      case INTEL_FAM6_CORE2_MEROM:
                x86_add_quirk(intel_clovertown_quirk);
 -      case 22: /* 65nm Core2 "Merom-L"        */
 -      case 23: /* 45nm Core2 "Penryn"         */
 -      case 29: /* 45nm Core2 "Dunnington (MP) */
 +      case INTEL_FAM6_CORE2_MEROM_L:
 +      case INTEL_FAM6_CORE2_PENRYN:
 +      case INTEL_FAM6_CORE2_DUNNINGTON:
                memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
                pr_cont("Core2 events, ");
                break;
  
 -      case 30: /* 45nm Nehalem    */
 -      case 26: /* 45nm Nehalem-EP */
 -      case 46: /* 45nm Nehalem-EX */
 +      case INTEL_FAM6_NEHALEM:
 +      case INTEL_FAM6_NEHALEM_EP:
 +      case INTEL_FAM6_NEHALEM_EX:
                memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
                pr_cont("Nehalem events, ");
                break;
  
 -      case 28: /* 45nm Atom "Pineview"   */
 -      case 38: /* 45nm Atom "Lincroft"   */
 -      case 39: /* 32nm Atom "Penwell"    */
 -      case 53: /* 32nm Atom "Cloverview" */
 -      case 54: /* 32nm Atom "Cedarview"  */
 +      case INTEL_FAM6_ATOM_PINEVIEW:
 +      case INTEL_FAM6_ATOM_LINCROFT:
 +      case INTEL_FAM6_ATOM_PENWELL:
 +      case INTEL_FAM6_ATOM_CLOVERVIEW:
 +      case INTEL_FAM6_ATOM_CEDARVIEW:
                memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
  
                pr_cont("Atom events, ");
                break;
  
 -      case 55: /* 22nm Atom "Silvermont"                */
 -      case 76: /* 14nm Atom "Airmont"                   */
 -      case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
 +      case INTEL_FAM6_ATOM_SILVERMONT1:
 +      case INTEL_FAM6_ATOM_SILVERMONT2:
 +      case INTEL_FAM6_ATOM_AIRMONT:
                memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
                x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
                x86_pmu.extra_regs = intel_slm_extra_regs;
                x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 +              x86_pmu.cpu_events = slm_events_attrs;
                pr_cont("Silvermont events, ");
                break;
  
 -      case 92: /* 14nm Atom "Goldmont" */
 -      case 95: /* 14nm Atom "Goldmont Denverton" */
 +      case INTEL_FAM6_ATOM_GOLDMONT:
 +      case INTEL_FAM6_ATOM_DENVERTON:
                memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
                pr_cont("Goldmont events, ");
                break;
  
 -      case 37: /* 32nm Westmere    */
 -      case 44: /* 32nm Westmere-EP */
 -      case 47: /* 32nm Westmere-EX */
 +      case INTEL_FAM6_WESTMERE:
 +      case INTEL_FAM6_WESTMERE_EP:
 +      case INTEL_FAM6_WESTMERE_EX:
                memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
                pr_cont("Westmere events, ");
                break;
  
 -      case 42: /* 32nm SandyBridge         */
 -      case 45: /* 32nm SandyBridge-E/EN/EP */
 +      case INTEL_FAM6_SANDYBRIDGE:
 +      case INTEL_FAM6_SANDYBRIDGE_X:
                x86_add_quirk(intel_sandybridge_quirk);
                x86_add_quirk(intel_ht_bug);
                memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                x86_pmu.event_constraints = intel_snb_event_constraints;
                x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
                x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
 -              if (boot_cpu_data.x86_model == 45)
 +              if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
                        x86_pmu.extra_regs = intel_snbep_extra_regs;
                else
                        x86_pmu.extra_regs = intel_snb_extra_regs;
                pr_cont("SandyBridge events, ");
                break;
  
 -      case 58: /* 22nm IvyBridge       */
 -      case 62: /* 22nm IvyBridge-EP/EX */
 +      case INTEL_FAM6_IVYBRIDGE:
 +      case INTEL_FAM6_IVYBRIDGE_X:
                x86_add_quirk(intel_ht_bug);
                memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
                x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
                x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
                x86_pmu.pebs_prec_dist = true;
 -              if (boot_cpu_data.x86_model == 62)
 +              if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
                        x86_pmu.extra_regs = intel_snbep_extra_regs;
                else
                        x86_pmu.extra_regs = intel_snb_extra_regs;
                break;
  
  
 -      case 60: /* 22nm Haswell Core */
 -      case 63: /* 22nm Haswell Server */
 -      case 69: /* 22nm Haswell ULT */
 -      case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
 +      case INTEL_FAM6_HASWELL_CORE:
 +      case INTEL_FAM6_HASWELL_X:
 +      case INTEL_FAM6_HASWELL_ULT:
 +      case INTEL_FAM6_HASWELL_GT3E:
                x86_add_quirk(intel_ht_bug);
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                pr_cont("Haswell events, ");
                break;
  
 -      case 61: /* 14nm Broadwell Core-M */
 -      case 86: /* 14nm Broadwell Xeon D */
 -      case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
 -      case 79: /* 14nm Broadwell Server */
 +      case INTEL_FAM6_BROADWELL_CORE:
 +      case INTEL_FAM6_BROADWELL_XEON_D:
 +      case INTEL_FAM6_BROADWELL_GT3E:
 +      case INTEL_FAM6_BROADWELL_X:
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
                pr_cont("Broadwell events, ");
                break;
  
 -      case 87: /* Knights Landing Xeon Phi */
 +      case INTEL_FAM6_XEON_PHI_KNL:
                memcpy(hw_cache_event_ids,
                       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs,
                pr_cont("Knights Landing events, ");
                break;
  
 -      case 142: /* 14nm Kabylake Mobile */
 -      case 158: /* 14nm Kabylake Desktop */
 -      case 78: /* 14nm Skylake Mobile */
 -      case 94: /* 14nm Skylake Desktop */
 -      case 85: /* 14nm Skylake Server */
 +      case INTEL_FAM6_SKYLAKE_MOBILE:
 +      case INTEL_FAM6_SKYLAKE_DESKTOP:
 +      case INTEL_FAM6_SKYLAKE_X:
 +      case INTEL_FAM6_KABYLAKE_MOBILE:
 +      case INTEL_FAM6_KABYLAKE_DESKTOP:
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
                intel_pmu_lbr_init_skl();
  
 +              /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
 +              event_attr_td_recovery_bubbles.event_str_noht =
 +                      "event=0xd,umask=0x1,cmask=1";
 +              event_attr_td_recovery_bubbles.event_str_ht =
 +                      "event=0xd,umask=0x1,cmask=1,any=1";
 +
                x86_pmu.event_constraints = intel_skl_event_constraints;
                x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
                x86_pmu.extra_regs = intel_skl_extra_regs;
                        x86_pmu.lbr_nr = 0;
        }
  
 +      if (x86_pmu.lbr_nr)
 +              pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
        /*
         * Access extra MSR may cause #GP under certain circumstances.
         * E.g. KVM doesn't support offcore event
   */
  static __init int fixup_ht_bug(void)
  {
 -      int cpu = smp_processor_id();
 -      int w, c;
 +      int c;
        /*
         * problem not present on this CPU model, nothing to do
         */
        if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
                return 0;
  
 -      w = cpumask_weight(topology_sibling_cpumask(cpu));
 -      if (w > 1) {
 +      if (topology_max_smt_threads() > 1) {
                pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
                return 0;
        }
index 4c7638b91fa56ea2dde92d50a818a3d8d3b7d1da,d6d7be0b34951cfcf7ae915ae1dcb6545e396a81..3ca87b5a8677608c86ac8d748b59ead0d160f580
@@@ -89,7 -89,6 +89,7 @@@
  #include <linux/slab.h>
  #include <linux/perf_event.h>
  #include <asm/cpu_device_id.h>
 +#include <asm/intel-family.h>
  #include "../perf_event.h"
  
  MODULE_LICENSE("GPL");
@@@ -366,7 -365,7 +366,7 @@@ static int cstate_pmu_event_add(struct 
   * Check if exiting cpu is the designated reader. If so migrate the
   * events when there is a valid target available
   */
- static void cstate_cpu_exit(int cpu)
+ static int cstate_cpu_exit(unsigned int cpu)
  {
        unsigned int target;
  
                        perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
                }
        }
+       return 0;
  }
  
- static void cstate_cpu_init(int cpu)
+ static int cstate_cpu_init(unsigned int cpu)
  {
        unsigned int target;
  
                                 topology_core_cpumask(cpu));
        if (has_cstate_pkg && target >= nr_cpu_ids)
                cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
- }
  
- static int cstate_cpu_notifier(struct notifier_block *self,
-                              unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_STARTING:
-               cstate_cpu_init(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               cstate_cpu_exit(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
+       return 0;
  }
  
- static struct notifier_block cstate_cpu_nb = {
-       .notifier_call  = cstate_cpu_notifier,
-       .priority       = CPU_PRI_PERF + 1,
- };
  static struct pmu cstate_core_pmu = {
        .attr_groups    = core_attr_groups,
        .name           = "cstate_core",
@@@ -512,37 -491,37 +492,37 @@@ static const struct cstate_model slm_cs
        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
  
  static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 -      X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
 -      X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
 -      X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
 +      X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM,    nhm_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
  
 -      X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
 -      X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
 -      X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
 +      X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE,    nhm_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
  
 -      X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
 -      X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
 +      X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE,   snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
  
 -      X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
 -      X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
 +      X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
  
 -      X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
 -      X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
 -      X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
 +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,    snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
  
 -      X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
 +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
  
 -      X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
 -      X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
 -      X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
 +      X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
  
 -      X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
 -      X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
 -      X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
 -      X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
 +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
  
 -      X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
 -      X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
 +      X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
 +      X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
        { },
  };
  MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
@@@ -600,18 -579,20 +580,20 @@@ static inline void cstate_cleanup(void
  
  static int __init cstate_init(void)
  {
-       int cpu, err;
+       int err;
  
-       cpu_notifier_register_begin();
-       for_each_online_cpu(cpu)
-               cstate_cpu_init(cpu);
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
+                         "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init,
+                         NULL);
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
+                         "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit);
  
        if (has_cstate_core) {
                err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
                if (err) {
                        has_cstate_core = false;
                        pr_info("Failed to register cstate core pmu\n");
-                       goto out;
+                       return err;
                }
        }
  
                        has_cstate_pkg = false;
                        pr_info("Failed to register cstate pkg pmu\n");
                        cstate_cleanup();
-                       goto out;
+                       return err;
                }
        }
-       __register_cpu_notifier(&cstate_cpu_nb);
- out:
-       cpu_notifier_register_done();
        return err;
  }
  
@@@ -652,9 -631,8 +632,8 @@@ module_init(cstate_pmu_init)
  
  static void __exit cstate_pmu_exit(void)
  {
-       cpu_notifier_register_begin();
-       __unregister_cpu_notifier(&cstate_cpu_nb);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
        cstate_cleanup();
-       cpu_notifier_register_done();
  }
  module_exit(cstate_pmu_exit);
index d0c58b35155f1721b59f6b935a65df9dfe814a65,6255ede56174e53ce4889426c2a879598e3a74d8..28865938aadf267e42829c3393cc405b0bf0672e
@@@ -55,7 -55,6 +55,7 @@@
  #include <linux/slab.h>
  #include <linux/perf_event.h>
  #include <asm/cpu_device_id.h>
 +#include <asm/intel-family.h>
  #include "../perf_event.h"
  
  MODULE_LICENSE("GPL");
@@@ -556,14 -555,14 +556,14 @@@ const struct attribute_group *rapl_attr
        NULL,
  };
  
- static void rapl_cpu_exit(int cpu)
+ static int rapl_cpu_offline(unsigned int cpu)
  {
        struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
        int target;
  
        /* Check if exiting cpu is used for collecting rapl events */
        if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
-               return;
+               return 0;
  
        pmu->cpu = -1;
        /* Find a new cpu to collect rapl events */
                pmu->cpu = target;
                perf_pmu_migrate_context(pmu->pmu, cpu, target);
        }
+       return 0;
  }
  
- static void rapl_cpu_init(int cpu)
+ static int rapl_cpu_online(unsigned int cpu)
  {
        struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
        int target;
         */
        target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
        if (target < nr_cpu_ids)
-               return;
+               return 0;
  
        cpumask_set_cpu(cpu, &rapl_cpu_mask);
        pmu->cpu = cpu;
+       return 0;
  }
  
- static int rapl_cpu_prepare(int cpu)
+ static int rapl_cpu_prepare(unsigned int cpu)
  {
        struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
  
        return 0;
  }
  
- static int rapl_cpu_notifier(struct notifier_block *self,
-                            unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               rapl_cpu_prepare(cpu);
-               break;
-       case CPU_DOWN_FAILED:
-       case CPU_ONLINE:
-               rapl_cpu_init(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               rapl_cpu_exit(cpu);
-               break;
-       }
-       return NOTIFY_OK;
- }
- static struct notifier_block rapl_cpu_nb = {
-       .notifier_call  = rapl_cpu_notifier,
-       .priority       = CPU_PRI_PERF + 1,
- };
  static int rapl_check_hw_unit(bool apply_quirk)
  {
        u64 msr_rapl_power_unit_bits;
@@@ -692,24 -666,6 +667,6 @@@ static void __init rapl_advertise(void
        }
  }
  
- static int __init rapl_prepare_cpus(void)
- {
-       unsigned int cpu, pkg;
-       int ret;
-       for_each_online_cpu(cpu) {
-               pkg = topology_logical_package_id(cpu);
-               if (rapl_pmus->pmus[pkg])
-                       continue;
-               ret = rapl_cpu_prepare(cpu);
-               if (ret)
-                       return ret;
-               rapl_cpu_init(cpu);
-       }
-       return 0;
- }
  static void cleanup_rapl_pmus(void)
  {
        int i;
@@@ -787,27 -743,26 +744,27 @@@ static const struct intel_rapl_init_fu
  };
  
  static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 -      X86_RAPL_MODEL_MATCH(42, snb_rapl_init),        /* Sandy Bridge */
 -      X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),      /* Sandy Bridge-EP */
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
  
 -      X86_RAPL_MODEL_MATCH(58, snb_rapl_init),        /* Ivy Bridge */
 -      X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),      /* IvyTown */
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,   snb_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
  
 -      X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),        /* Haswell */
 -      X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),        /* Haswell-Server */
 -      X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),        /* Haswell-Celeron */
 -      X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),        /* Haswell GT3e */
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsw_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
  
 -      X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),        /* Broadwell */
 -      X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),        /* Broadwell-H */
 -      X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),        /* Broadwell-Server */
 -      X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),        /* Broadwell Xeon D */
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,      hsw_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
  
 -      X86_RAPL_MODEL_MATCH(87, knl_rapl_init),        /* Knights Landing */
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
  
 -      X86_RAPL_MODEL_MATCH(78, skl_rapl_init),        /* Skylake */
 -      X86_RAPL_MODEL_MATCH(94, skl_rapl_init),        /* Skylake H/S */
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
 +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,       hsx_rapl_init),
        {},
  };
  
@@@ -837,35 -792,44 +794,44 @@@ static int __init rapl_pmu_init(void
        if (ret)
                return ret;
  
-       cpu_notifier_register_begin();
+       /*
+        * Install callbacks. Core will call them for each online cpu.
+        */
  
-       ret = rapl_prepare_cpus();
+       ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP",
+                               rapl_cpu_prepare, NULL);
        if (ret)
                goto out;
  
+       ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
+                               "AP_PERF_X86_RAPL_ONLINE",
+                               rapl_cpu_online, rapl_cpu_offline);
+       if (ret)
+               goto out1;
        ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
        if (ret)
-               goto out;
+               goto out2;
  
-       __register_cpu_notifier(&rapl_cpu_nb);
-       cpu_notifier_register_done();
        rapl_advertise();
        return 0;
  
+ out2:
+       cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ out1:
+       cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
  out:
        pr_warn("Initialization failed (%d), disabled\n", ret);
        cleanup_rapl_pmus();
-       cpu_notifier_register_done();
        return ret;
  }
  module_init(rapl_pmu_init);
  
  static void __exit intel_rapl_exit(void)
  {
-       cpu_notifier_register_begin();
-       __unregister_cpu_notifier(&rapl_cpu_nb);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+       cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
        perf_pmu_unregister(&rapl_pmus->pmu);
        cleanup_rapl_pmus();
-       cpu_notifier_register_done();
  }
  module_exit(intel_rapl_exit);
index 59b4974c697fbc169e1c74929949258a4e9f7503,8e280a7cd0cc0c8386db524dc0d94c0fb236cf46..3f3d0d67749b634226286975d66cf45f9ebd63cb
@@@ -1,5 -1,4 +1,5 @@@
  #include <asm/cpu_device_id.h>
 +#include <asm/intel-family.h>
  #include "uncore.h"
  
  static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@@ -883,7 -882,7 +883,7 @@@ uncore_types_init(struct intel_uncore_t
  static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  {
        struct intel_uncore_type *type;
 -      struct intel_uncore_pmu *pmu;
 +      struct intel_uncore_pmu *pmu = NULL;
        struct intel_uncore_box *box;
        int phys_id, pkg, ret;
  
        }
  
        type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
 +
        /*
 -       * for performance monitoring unit with multiple boxes,
 -       * each box has a different function id.
 -       */
 -      pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
 -      /* Knights Landing uses a common PCI device ID for multiple instances of
 -       * an uncore PMU device type. There is only one entry per device type in
 -       * the knl_uncore_pci_ids table inspite of multiple devices present for
 -       * some device types. Hence PCI device idx would be 0 for all devices.
 -       * So increment pmu pointer to point to an unused array element.
 +       * Some platforms, e.g.  Knights Landing, use a common PCI device ID
 +       * for multiple instances of an uncore PMU device type. We should check
 +       * PCI slot and func to indicate the uncore box.
         */
 -      if (boot_cpu_data.x86_model == 87) {
 -              while (pmu->func_id >= 0)
 -                      pmu++;
 +      if (id->driver_data & ~0xffff) {
 +              struct pci_driver *pci_drv = pdev->driver;
 +              const struct pci_device_id *ids = pci_drv->id_table;
 +              unsigned int devfn;
 +
 +              while (ids && ids->vendor) {
 +                      if ((ids->vendor == pdev->vendor) &&
 +                          (ids->device == pdev->device)) {
 +                              devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
 +                                                UNCORE_PCI_DEV_FUNC(ids->driver_data));
 +                              if (devfn == pdev->devfn) {
 +                                      pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
 +                                      break;
 +                              }
 +                      }
 +                      ids++;
 +              }
 +              if (pmu == NULL)
 +                      return -ENODEV;
 +      } else {
 +              /*
 +               * for performance monitoring unit with multiple boxes,
 +               * each box has a different function id.
 +               */
 +              pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
        }
  
        if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
  
  static void uncore_pci_remove(struct pci_dev *pdev)
  {
 -      struct intel_uncore_box *box = pci_get_drvdata(pdev);
 +      struct intel_uncore_box *box;
        struct intel_uncore_pmu *pmu;
        int i, phys_id, pkg;
  
@@@ -1052,7 -1034,7 +1052,7 @@@ static void uncore_pci_exit(void
        }
  }
  
- static void uncore_cpu_dying(int cpu)
+ static int uncore_cpu_dying(unsigned int cpu)
  {
        struct intel_uncore_type *type, **types = uncore_msr_uncores;
        struct intel_uncore_pmu *pmu;
                                uncore_box_exit(box);
                }
        }
+       return 0;
  }
  
- static void uncore_cpu_starting(int cpu, bool init)
+ static int first_init;
+ static int uncore_cpu_starting(unsigned int cpu)
  {
        struct intel_uncore_type *type, **types = uncore_msr_uncores;
        struct intel_uncore_pmu *pmu;
        struct intel_uncore_box *box;
        int i, pkg, ncpus = 1;
  
-       if (init) {
+       if (first_init) {
                /*
                 * On init we get the number of online cpus in the package
                 * and set refcount for all of them.
                                uncore_box_init(box);
                }
        }
+       return 0;
  }
  
- static int uncore_cpu_prepare(int cpu)
+ static int uncore_cpu_prepare(unsigned int cpu)
  {
        struct intel_uncore_type *type, **types = uncore_msr_uncores;
        struct intel_uncore_pmu *pmu;
@@@ -1164,13 -1151,13 +1169,13 @@@ static void uncore_change_context(struc
                uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
  }
  
- static void uncore_event_exit_cpu(int cpu)
+ static int uncore_event_cpu_offline(unsigned int cpu)
  {
        int target;
  
        /* Check if exiting cpu is used for collecting uncore events */
        if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-               return;
+               return 0;
  
        /* Find a new cpu to collect uncore events */
        target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
  
        uncore_change_context(uncore_msr_uncores, cpu, target);
        uncore_change_context(uncore_pci_uncores, cpu, target);
+       return 0;
  }
  
- static void uncore_event_init_cpu(int cpu)
+ static int uncore_event_cpu_online(unsigned int cpu)
  {
        int target;
  
         */
        target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
        if (target < nr_cpu_ids)
-               return;
+               return 0;
  
        cpumask_set_cpu(cpu, &uncore_cpu_mask);
  
        uncore_change_context(uncore_msr_uncores, -1, cpu);
        uncore_change_context(uncore_pci_uncores, -1, cpu);
+       return 0;
  }
  
- static int uncore_cpu_notifier(struct notifier_block *self,
-                              unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               return notifier_from_errno(uncore_cpu_prepare(cpu));
-       case CPU_STARTING:
-               uncore_cpu_starting(cpu, false);
-       case CPU_DOWN_FAILED:
-               uncore_event_init_cpu(cpu);
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               uncore_cpu_dying(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               uncore_event_exit_cpu(cpu);
-               break;
-       }
-       return NOTIFY_OK;
- }
- static struct notifier_block uncore_cpu_nb = {
-       .notifier_call  = uncore_cpu_notifier,
-       /*
-        * to migrate uncore events, our notifier should be executed
-        * before perf core's notifier.
-        */
-       .priority       = CPU_PRI_PERF + 1,
- };
  static int __init type_pmu_register(struct intel_uncore_type *type)
  {
        int i, ret;
@@@ -1282,41 -1235,6 +1253,6 @@@ err
        return ret;
  }
  
- static void __init uncore_cpu_setup(void *dummy)
- {
-       uncore_cpu_starting(smp_processor_id(), true);
- }
- /* Lazy to avoid allocation of a few bytes for the normal case */
- static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
- static int __init uncore_cpumask_init(bool msr)
- {
-       unsigned int cpu;
-       for_each_online_cpu(cpu) {
-               unsigned int pkg = topology_logical_package_id(cpu);
-               int ret;
-               if (test_and_set_bit(pkg, packages))
-                       continue;
-               /*
-                * The first online cpu of each package allocates and takes
-                * the refcounts for all other online cpus in that package.
-                * If msrs are not enabled no allocation is required.
-                */
-               if (msr) {
-                       ret = uncore_cpu_prepare(cpu);
-                       if (ret)
-                               return ret;
-               }
-               uncore_event_init_cpu(cpu);
-               smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
-       }
-       __register_cpu_notifier(&uncore_cpu_nb);
-       return 0;
- }
  #define X86_UNCORE_MODEL_MATCH(model, init)   \
        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
  
@@@ -1379,32 -1297,30 +1315,32 @@@ static const struct intel_uncore_init_f
  };
  
  static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
 +      .cpu_init = skl_uncore_cpu_init,
        .pci_init = skl_uncore_pci_init,
  };
  
  static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 -      X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),    /* Nehalem */
 -      X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
 -      X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),    /* Westmere */
 -      X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
 -      X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),    /* Sandy Bridge */
 -      X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),    /* Ivy Bridge */
 -      X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),    /* Haswell */
 -      X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),    /* Haswell Celeron */
 -      X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),    /* Haswell */
 -      X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),    /* Broadwell */
 -      X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),    /* Broadwell */
 -      X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),  /* Sandy Bridge-EP */
 -      X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),  /* Nehalem-EX */
 -      X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),  /* Westmere-EX aka. Xeon E7 */
 -      X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),  /* Ivy Bridge-EP */
 -      X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),  /* Haswell-EP */
 -      X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),    /* BDX-EP */
 -      X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),    /* BDX-DE */
 -      X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),    /* Knights Landing */
 -      X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),    /* SkyLake */
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,     nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,        nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE,       nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP,    nhm_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,    snb_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,      ivb_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,   hsw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,    hsw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,   hsw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,  snbep_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX,     nhmex_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX,    nhmex_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,    ivbep_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X,      hswep_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,    bdx_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,   knl_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
 +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
        {},
  };
  
@@@ -1440,11 -1356,33 +1376,33 @@@ static int __init intel_uncore_init(voi
        if (cret && pret)
                return -ENODEV;
  
-       cpu_notifier_register_begin();
-       ret = uncore_cpumask_init(!cret);
-       if (ret)
-               goto err;
-       cpu_notifier_register_done();
+       /*
+        * Install callbacks. Core will call them for each online cpu.
+        *
+        * The first online cpu of each package allocates and takes
+        * the refcounts for all other online cpus in that package.
+        * If msrs are not enabled no allocation is required and
+        * uncore_cpu_prepare() is not called for each online cpu.
+        */
+       if (!cret) {
+              ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
+                                       "PERF_X86_UNCORE_PREP",
+                                       uncore_cpu_prepare, NULL);
+               if (ret)
+                       goto err;
+       } else {
+               cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
+                                         "PERF_X86_UNCORE_PREP",
+                                         uncore_cpu_prepare, NULL);
+       }
+       first_init = 1;
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
+                         "AP_PERF_X86_UNCORE_STARTING",
+                         uncore_cpu_starting, uncore_cpu_dying);
+       first_init = 0;
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+                         "AP_PERF_X86_UNCORE_ONLINE",
+                         uncore_event_cpu_online, uncore_event_cpu_offline);
        return 0;
  
  err:
        on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
        uncore_types_exit(uncore_msr_uncores);
        uncore_pci_exit();
-       cpu_notifier_register_done();
        return ret;
  }
  module_init(intel_uncore_init);
  
  static void __exit intel_uncore_exit(void)
  {
-       cpu_notifier_register_begin();
-       __unregister_cpu_notifier(&uncore_cpu_nb);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
+       cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
        uncore_types_exit(uncore_msr_uncores);
        uncore_pci_exit();
-       cpu_notifier_register_done();
  }
  module_exit(intel_uncore_exit);
index 24170d0809ba9e45eb88fce04c0a7be4fe708b8b,b5da5a8e5e45052c223e8886fd4b74636e8251af..6368fa69d2afa0eb44c5e90fe5293c4ceeafa93c
@@@ -152,68 -152,48 +152,48 @@@ static void init_x2apic_ldr(void
        }
  }
  
-  /*
-   * At CPU state changes, update the x2apic cluster sibling info.
-   */
- static int
- update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ /*
+  * At CPU state changes, update the x2apic cluster sibling info.
+  */
+ int x2apic_prepare_cpu(unsigned int cpu)
  {
-       unsigned int this_cpu = (unsigned long)hcpu;
-       unsigned int cpu;
-       int err = 0;
-       switch (action) {
-       case CPU_UP_PREPARE:
-               if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
-                                       GFP_KERNEL)) {
-                       err = -ENOMEM;
-               } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
-                                              GFP_KERNEL)) {
-                       free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
-                       err = -ENOMEM;
-               }
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-       case CPU_DEAD:
-               for_each_online_cpu(cpu) {
-                       if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-                               continue;
-                       cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-                       cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
-               }
-               free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
-               free_cpumask_var(per_cpu(ipi_mask, this_cpu));
-               break;
+       if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
+               return -ENOMEM;
+       if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
+               free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
+               return -ENOMEM;
        }
  
-       return notifier_from_errno(err);
+       return 0;
  }
  
- static struct notifier_block x2apic_cpu_notifier = {
-       .notifier_call = update_clusterinfo,
- };
- static int x2apic_init_cpu_notifier(void)
+ int x2apic_dead_cpu(unsigned int this_cpu)
  {
-       int cpu = smp_processor_id();
-       zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
-       zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
+       int cpu;
  
-       BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
-       cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
-       register_hotcpu_notifier(&x2apic_cpu_notifier);
-       return 1;
+       for_each_online_cpu(cpu) {
+               if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+                       continue;
+               cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+               cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+       }
+       free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+       free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+       return 0;
  }
  
  static int x2apic_cluster_probe(void)
  {
-       if (x2apic_mode)
-               return x2apic_init_cpu_notifier();
-       else
+       int cpu = smp_processor_id();
+       if (!x2apic_mode)
                return 0;
+       cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+       cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+                         x2apic_prepare_cpu, x2apic_dead_cpu);
+       return 1;
  }
  
  static const struct cpumask *x2apic_cluster_target_cpus(void)
@@@ -270,6 -250,7 +250,6 @@@ static struct apic apic_x2apic_cluster 
  
        .get_apic_id                    = x2apic_get_apic_id,
        .set_apic_id                    = x2apic_set_apic_id,
 -      .apic_id_mask                   = 0xFFFFFFFFu,
  
        .cpu_mask_to_apicid_and         = x2apic_cpu_mask_to_apicid_and,
  
diff --combined arch/x86/kvm/x86.c
index b2766723c951e967a992a9730c6b283151d41076,f899127b4832884209b5d239279eeea8aad07068..45608a7da9b3406b0a93bfc3620be089bc564e5e
@@@ -55,6 -55,9 +55,6 @@@
  #include <linux/irqbypass.h>
  #include <trace/events/kvm.h>
  
 -#define CREATE_TRACE_POINTS
 -#include "trace.h"
 -
  #include <asm/debugreg.h>
  #include <asm/msr.h>
  #include <asm/desc.h>
@@@ -65,9 -68,6 +65,9 @@@
  #include <asm/div64.h>
  #include <asm/irq_remapping.h>
  
 +#define CREATE_TRACE_POINTS
 +#include "trace.h"
 +
  #define MAX_IO_MSRS 256
  #define KVM_MAX_MCE_BANKS 32
  #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@@ -5552,9 -5552,10 +5552,10 @@@ int kvm_fast_pio_out(struct kvm_vcpu *v
  }
  EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
  
- static void tsc_bad(void *info)
+ static int kvmclock_cpu_down_prep(unsigned int cpu)
  {
        __this_cpu_write(cpu_tsc_khz, 0);
+       return 0;
  }
  
  static void tsc_khz_changed(void *data)
@@@ -5659,35 -5660,18 +5660,18 @@@ static struct notifier_block kvmclock_c
        .notifier_call  = kvmclock_cpufreq_notifier
  };
  
- static int kvmclock_cpu_notifier(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
+ static int kvmclock_cpu_online(unsigned int cpu)
  {
-       unsigned int cpu = (unsigned long)hcpu;
-       switch (action) {
-               case CPU_ONLINE:
-               case CPU_DOWN_FAILED:
-                       smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
-                       break;
-               case CPU_DOWN_PREPARE:
-                       smp_call_function_single(cpu, tsc_bad, NULL, 1);
-                       break;
-       }
-       return NOTIFY_OK;
+       tsc_khz_changed(NULL);
+       return 0;
  }
  
- static struct notifier_block kvmclock_cpu_notifier_block = {
-       .notifier_call  = kvmclock_cpu_notifier,
-       .priority = -INT_MAX
- };
  static void kvm_timer_init(void)
  {
        int cpu;
  
        max_tsc_khz = tsc_khz;
  
-       cpu_notifier_register_begin();
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
  #ifdef CONFIG_CPU_FREQ
                struct cpufreq_policy policy;
                                          CPUFREQ_TRANSITION_NOTIFIER);
        }
        pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
-       for_each_online_cpu(cpu)
-               smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
-       __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
-       cpu_notifier_register_done();
  
+       cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+                         kvmclock_cpu_online, kvmclock_cpu_down_prep);
  }
  
  static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@@ -5896,7 -5877,7 +5877,7 @@@ void kvm_arch_exit(void
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
                                            CPUFREQ_TRANSITION_NOTIFIER);
-       unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+       cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
  #ifdef CONFIG_X86_64
        pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
  #endif
index 0ca14ac7bb28826d01654f652c7028125f07823f,eecdb196b2d16f3ffd22fc52fa0eda1252fabb49..0553aeebb2288098e07f1e5c4203110cede48f99
@@@ -90,7 -90,7 +90,7 @@@ static void acpi_processor_notify(acpi_
                                                  pr->performance_platform_limit);
                break;
        case ACPI_PROCESSOR_NOTIFY_POWER:
 -              acpi_processor_cst_has_changed(pr);
 +              acpi_processor_power_state_has_changed(pr);
                acpi_bus_generate_netlink_event(device->pnp.device_class,
                                                  dev_name(&device->dev), event, 0);
                break;
@@@ -118,12 -118,13 +118,13 @@@ static int acpi_cpu_soft_notify(struct 
        struct acpi_device *device;
        action &= ~CPU_TASKS_FROZEN;
  
-       /*
-        * CPU_STARTING and CPU_DYING must not sleep. Return here since
-        * acpi_bus_get_device() may sleep.
-        */
-       if (action == CPU_STARTING || action == CPU_DYING)
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_DEAD:
+               break;
+       default:
                return NOTIFY_DONE;
+       }
  
        if (!pr || acpi_bus_get_device(pr->handle, &device))
                return NOTIFY_DONE;
index 7c42b1d13faf035ed571a8142ad52a76835d8ae0,cdef4405ca50766a238cdee66a8b2044c7486a89..8bcee65a0b8c92a22c8d49ac362656a19233e04f
@@@ -345,38 -345,20 +345,20 @@@ static void armada_mpic_send_doorbell(c
                ARMADA_370_XP_SW_TRIG_INT_OFFS);
  }
  
- static int armada_xp_mpic_secondary_init(struct notifier_block *nfb,
-                                        unsigned long action, void *hcpu)
+ static int armada_xp_mpic_starting_cpu(unsigned int cpu)
  {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
-               armada_xp_mpic_perf_init();
-               armada_xp_mpic_smp_cpu_init();
-       }
-       return NOTIFY_OK;
+       armada_xp_mpic_perf_init();
+       armada_xp_mpic_smp_cpu_init();
+       return 0;
  }
  
- static struct notifier_block armada_370_xp_mpic_cpu_notifier = {
-       .notifier_call = armada_xp_mpic_secondary_init,
-       .priority = 100,
- };
- static int mpic_cascaded_secondary_init(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
+ static int mpic_cascaded_starting_cpu(unsigned int cpu)
  {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
-               armada_xp_mpic_perf_init();
-               enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
-       }
-       return NOTIFY_OK;
+       armada_xp_mpic_perf_init();
+       enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
+       return 0;
  }
- static struct notifier_block mpic_cascaded_cpu_notifier = {
-       .notifier_call = mpic_cascaded_secondary_init,
-       .priority = 100,
- };
- #endif /* CONFIG_SMP */
+ #endif
  
  static const struct irq_domain_ops armada_370_xp_mpic_irq_ops = {
        .map = armada_370_xp_mpic_irq_map,
@@@ -541,7 -523,7 +523,7 @@@ static void armada_370_xp_mpic_resume(v
                writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
  }
  
 -struct syscore_ops armada_370_xp_mpic_syscore_ops = {
 +static struct syscore_ops armada_370_xp_mpic_syscore_ops = {
        .suspend        = armada_370_xp_mpic_suspend,
        .resume         = armada_370_xp_mpic_resume,
  };
@@@ -595,11 -577,15 +577,15 @@@ static int __init armada_370_xp_mpic_of
                set_handle_irq(armada_370_xp_handle_irq);
  #ifdef CONFIG_SMP
                set_smp_cross_call(armada_mpic_send_doorbell);
-               register_cpu_notifier(&armada_370_xp_mpic_cpu_notifier);
+               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING,
+                                         "AP_IRQ_ARMADA_XP_STARTING",
+                                         armada_xp_mpic_starting_cpu, NULL);
  #endif
        } else {
  #ifdef CONFIG_SMP
-               register_cpu_notifier(&mpic_cascaded_cpu_notifier);
+               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
+                                         "AP_IRQ_ARMADA_CASC_STARTING",
+                                         mpic_cascaded_starting_cpu, NULL);
  #endif
                irq_set_chained_handler(parent_irq,
                                        armada_370_xp_mpic_handle_cascade_irq);
index df1949c0aa23ad927a58f898f0739dc9e35ca46d,f2575cb2b013f8120817bdf3a63886e2d12b6caf..d96b2c947e74e3edab3917551c64fbd1ced0f34c
@@@ -180,7 -180,7 +180,7 @@@ __exception_irq_entry bcm2836_arm_irqch
        } else if (stat) {
                u32 hwirq = ffs(stat) - 1;
  
 -              handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
 +              handle_domain_irq(intc.domain, hwirq, regs);
        }
  }
  
@@@ -202,30 -202,23 +202,23 @@@ static void bcm2836_arm_irqchip_send_ip
        }
  }
  
- /* Unmasks the IPI on the CPU when it's online. */
- static int bcm2836_arm_irqchip_cpu_notify(struct notifier_block *nfb,
-                                         unsigned long action, void *hcpu)
+ static int bcm2836_cpu_starting(unsigned int cpu)
  {
-       unsigned int cpu = (unsigned long)hcpu;
-       unsigned int int_reg = LOCAL_MAILBOX_INT_CONTROL0;
-       unsigned int mailbox = 0;
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
-               bcm2836_arm_irqchip_unmask_per_cpu_irq(int_reg, mailbox, cpu);
-       else if (action == CPU_DYING)
-               bcm2836_arm_irqchip_mask_per_cpu_irq(int_reg, mailbox, cpu);
-       return NOTIFY_OK;
+       bcm2836_arm_irqchip_unmask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0,
+                                              cpu);
+       return 0;
  }
  
- static struct notifier_block bcm2836_arm_irqchip_cpu_notifier = {
-       .notifier_call = bcm2836_arm_irqchip_cpu_notify,
-       .priority = 100,
- };
+ static int bcm2836_cpu_dying(unsigned int cpu)
+ {
+       bcm2836_arm_irqchip_mask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0,
+                                            cpu);
+       return 0;
+ }
  
  #ifdef CONFIG_ARM
 -int __init bcm2836_smp_boot_secondary(unsigned int cpu,
 -                                    struct task_struct *idle)
 +static int __init bcm2836_smp_boot_secondary(unsigned int cpu,
 +                                           struct task_struct *idle)
  {
        unsigned long secondary_startup_phys =
                (unsigned long)virt_to_phys((void *)secondary_startup);
@@@ -251,10 -244,9 +244,9 @@@ bcm2836_arm_irqchip_smp_init(void
  {
  #ifdef CONFIG_SMP
        /* Unmask IPIs to the boot CPU. */
-       bcm2836_arm_irqchip_cpu_notify(&bcm2836_arm_irqchip_cpu_notifier,
-                                      CPU_STARTING,
-                                      (void *)(uintptr_t)smp_processor_id());
-       register_cpu_notifier(&bcm2836_arm_irqchip_cpu_notifier);
+       cpuhp_setup_state(CPUHP_AP_IRQ_BCM2836_STARTING,
+                         "AP_IRQ_BCM2836_STARTING", bcm2836_cpu_starting,
+                         bcm2836_cpu_dying);
  
        set_smp_cross_call(bcm2836_arm_irqchip_send_ipi);
  
index 1de07eb5839c7b522bf6e81382a7584afc02f217,606f114166a163b5f37927e2324cd1dd011cc5eb..c2cab572c5111c392c076d62232da4828cfa00a1
@@@ -75,7 -75,7 +75,7 @@@ struct gic_chip_data 
        void __iomem *raw_dist_base;
        void __iomem *raw_cpu_base;
        u32 percpu_offset;
 -#ifdef CONFIG_CPU_PM
 +#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
        u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
        u32 saved_spi_active[DIV_ROUND_UP(1020, 32)];
        u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
@@@ -449,7 -449,7 +449,7 @@@ static void gic_cpu_if_up(struct gic_ch
  }
  
  
 -static void __init gic_dist_init(struct gic_chip_data *gic)
 +static void gic_dist_init(struct gic_chip_data *gic)
  {
        unsigned int i;
        u32 cpumask;
@@@ -528,14 -528,14 +528,14 @@@ int gic_cpu_if_down(unsigned int gic_nr
        return 0;
  }
  
 -#ifdef CONFIG_CPU_PM
 +#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
  /*
   * Saves the GIC distributor registers during suspend or idle.  Must be called
   * with interrupts disabled but before powering down the GIC.  After calling
   * this function, no interrupts will be delivered by the GIC, and another
   * platform-specific wakeup source must be enabled.
   */
 -static void gic_dist_save(struct gic_chip_data *gic)
 +void gic_dist_save(struct gic_chip_data *gic)
  {
        unsigned int gic_irqs;
        void __iomem *dist_base;
   * handled normally, but any edge interrupts that occured will not be seen by
   * the GIC and need to be handled by the platform-specific wakeup source.
   */
 -static void gic_dist_restore(struct gic_chip_data *gic)
 +void gic_dist_restore(struct gic_chip_data *gic)
  {
        unsigned int gic_irqs;
        unsigned int i;
        writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
  }
  
 -static void gic_cpu_save(struct gic_chip_data *gic)
 +void gic_cpu_save(struct gic_chip_data *gic)
  {
        int i;
        u32 *ptr;
  
  }
  
 -static void gic_cpu_restore(struct gic_chip_data *gic)
 +void gic_cpu_restore(struct gic_chip_data *gic)
  {
        int i;
        u32 *ptr;
@@@ -727,7 -727,7 +727,7 @@@ static struct notifier_block gic_notifi
        .notifier_call = gic_notifier,
  };
  
 -static int __init gic_pm_init(struct gic_chip_data *gic)
 +static int gic_pm_init(struct gic_chip_data *gic)
  {
        gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
                sizeof(u32));
@@@ -757,7 -757,7 +757,7 @@@ free_ppi_enable
        return -ENOMEM;
  }
  #else
 -static int __init gic_pm_init(struct gic_chip_data *gic)
 +static int gic_pm_init(struct gic_chip_data *gic)
  {
        return 0;
  }
@@@ -984,25 -984,12 +984,12 @@@ static int gic_irq_domain_translate(str
        return -EINVAL;
  }
  
- #ifdef CONFIG_SMP
- static int gic_secondary_init(struct notifier_block *nfb, unsigned long action,
-                             void *hcpu)
+ static int gic_starting_cpu(unsigned int cpu)
  {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
-               gic_cpu_init(&gic_data[0]);
-       return NOTIFY_OK;
+       gic_cpu_init(&gic_data[0]);
+       return 0;
  }
  
- /*
-  * Notifier for enabling the GIC CPU interface. Set an arbitrarily high
-  * priority because the GIC needs to be up before the ARM generic timers.
-  */
- static struct notifier_block gic_cpu_notifier = {
-       .notifier_call = gic_secondary_init,
-       .priority = 100,
- };
- #endif
  static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
                                unsigned int nr_irqs, void *arg)
  {
@@@ -1032,31 -1019,32 +1019,31 @@@ static const struct irq_domain_ops gic_
        .unmap = gic_irq_domain_unmap,
  };
  
 -static int __init __gic_init_bases(struct gic_chip_data *gic, int irq_start,
 -                                 struct fwnode_handle *handle)
 +static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
 +                        const char *name, bool use_eoimode1)
  {
 -      irq_hw_number_t hwirq_base;
 -      int gic_irqs, irq_base, i, ret;
 -
 -      if (WARN_ON(!gic || gic->domain))
 -              return -EINVAL;
 -
        /* Initialize irq_chip */
        gic->chip = gic_chip;
 +      gic->chip.name = name;
 +      gic->chip.parent_device = dev;
  
 -      if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
 +      if (use_eoimode1) {
                gic->chip.irq_mask = gic_eoimode1_mask_irq;
                gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
                gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
 -              gic->chip.name = kasprintf(GFP_KERNEL, "GICv2");
 -      } else {
 -              gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d",
 -                                         (int)(gic - &gic_data[0]));
        }
  
  #ifdef CONFIG_SMP
        if (gic == &gic_data[0])
                gic->chip.irq_set_affinity = gic_set_affinity;
  #endif
 +}
 +
 +static int gic_init_bases(struct gic_chip_data *gic, int irq_start,
 +                        struct fwnode_handle *handle)
 +{
 +      irq_hw_number_t hwirq_base;
 +      int gic_irqs, irq_base, ret;
  
        if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
                /* Frankein-GIC without banked registers... */
                goto error;
        }
  
 +      gic_dist_init(gic);
 +      ret = gic_cpu_init(gic);
 +      if (ret)
 +              goto error;
 +
 +      ret = gic_pm_init(gic);
 +      if (ret)
 +              goto error;
 +
 +      return 0;
 +
 +error:
 +      if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
 +              free_percpu(gic->dist_base.percpu_base);
 +              free_percpu(gic->cpu_base.percpu_base);
 +      }
 +
 +      return ret;
 +}
 +
 +static int __init __gic_init_bases(struct gic_chip_data *gic,
 +                                 int irq_start,
 +                                 struct fwnode_handle *handle)
 +{
 +      char *name;
 +      int i, ret;
 +
 +      if (WARN_ON(!gic || gic->domain))
 +              return -EINVAL;
 +
        if (gic == &gic_data[0]) {
                /*
                 * Initialize the CPU interface map to all CPUs.
                        gic_cpu_map[i] = 0xff;
  #ifdef CONFIG_SMP
                set_smp_cross_call(gic_raise_softirq);
-               register_cpu_notifier(&gic_cpu_notifier);
  #endif
+               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
+                                         "AP_IRQ_GIC_STARTING",
+                                         gic_starting_cpu, NULL);
                set_handle_irq(gic_handle_irq);
                if (static_key_true(&supports_deactivate))
                        pr_info("GIC: Using split EOI/Deactivate mode\n");
        }
  
 -      gic_dist_init(gic);
 -      ret = gic_cpu_init(gic);
 -      if (ret)
 -              goto error;
 -
 -      ret = gic_pm_init(gic);
 -      if (ret)
 -              goto error;
 -
 -      return 0;
 -
 -error:
 -      if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
 -              free_percpu(gic->dist_base.percpu_base);
 -              free_percpu(gic->cpu_base.percpu_base);
 +      if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
 +              name = kasprintf(GFP_KERNEL, "GICv2");
 +              gic_init_chip(gic, NULL, name, true);
 +      } else {
 +              name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
 +              gic_init_chip(gic, NULL, name, false);
        }
  
 -      kfree(gic->chip.name);
 +      ret = gic_init_bases(gic, irq_start, handle);
 +      if (ret)
 +              kfree(name);
  
        return ret;
  }
@@@ -1272,7 -1239,7 +1261,7 @@@ static bool gic_check_eoimode(struct de
        return true;
  }
  
 -static int __init gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
 +static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
  {
        if (!gic || !node)
                return -EINVAL;
@@@ -1296,34 -1263,6 +1285,34 @@@ error
        return -ENOMEM;
  }
  
 +int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 +{
 +      int ret;
 +
 +      if (!dev || !dev->of_node || !gic || !irq)
 +              return -EINVAL;
 +
 +      *gic = devm_kzalloc(dev, sizeof(**gic), GFP_KERNEL);
 +      if (!*gic)
 +              return -ENOMEM;
 +
 +      gic_init_chip(*gic, dev, dev->of_node->name, false);
 +
 +      ret = gic_of_setup(*gic, dev->of_node);
 +      if (ret)
 +              return ret;
 +
 +      ret = gic_init_bases(*gic, -1, &dev->of_node->fwnode);
 +      if (ret) {
 +              gic_teardown(*gic);
 +              return ret;
 +      }
 +
 +      irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic);
 +
 +      return 0;
 +}
 +
  static void __init gic_of_setup_kvm_info(struct device_node *node)
  {
        int ret;
@@@ -1403,11 -1342,7 +1392,11 @@@ IRQCHIP_DECLARE(cortex_a7_gic, "arm,cor
  IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
  IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
  IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
 -
 +#else
 +int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 +{
 +      return -ENOTSUPP;
 +}
  #endif
  
  #ifdef CONFIG_ACPI
diff --combined drivers/perf/arm_pmu.c
index 8e4d7f590b064f597393b9c02d1e32bafc0ed4ba,f6ab4f7f75bf97973227023e6d6933c6ca46ad98..6ccb994bdfcbd160148c535f18ea656ea7ee13e6
@@@ -603,8 -603,7 +603,8 @@@ static void cpu_pmu_free_irq(struct arm
  
        irq = platform_get_irq(pmu_device, 0);
        if (irq >= 0 && irq_is_percpu(irq)) {
 -              on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
 +              on_each_cpu_mask(&cpu_pmu->supported_cpus,
 +                               cpu_pmu_disable_percpu_irq, &irq, 1);
                free_percpu_irq(irq, &hw_events->percpu_pmu);
        } else {
                for (i = 0; i < irqs; ++i) {
@@@ -646,9 -645,7 +646,9 @@@ static int cpu_pmu_request_irq(struct a
                                irq);
                        return err;
                }
 -              on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
 +
 +              on_each_cpu_mask(&cpu_pmu->supported_cpus,
 +                               cpu_pmu_enable_percpu_irq, &irq, 1);
        } else {
                for (i = 0; i < irqs; ++i) {
                        int cpu = i;
        return 0;
  }
  
+ static DEFINE_MUTEX(arm_pmu_mutex);
+ static LIST_HEAD(arm_pmu_list);
  /*
   * PMU hardware loses all context when a CPU goes offline.
   * When a CPU is hotplugged back in, since some hardware registers are
   * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
   * junk values out of them.
   */
- static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
-                         void *hcpu)
+ static int arm_perf_starting_cpu(unsigned int cpu)
  {
-       int cpu = (unsigned long)hcpu;
-       struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
-       if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
-               return NOTIFY_DONE;
-       if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-               return NOTIFY_DONE;
+       struct arm_pmu *pmu;
  
-       if (pmu->reset)
-               pmu->reset(pmu);
-       else
-               return NOTIFY_DONE;
+       mutex_lock(&arm_pmu_mutex);
+       list_for_each_entry(pmu, &arm_pmu_list, entry) {
  
-       return NOTIFY_OK;
+               if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+                       continue;
+               if (pmu->reset)
+                       pmu->reset(pmu);
+       }
+       mutex_unlock(&arm_pmu_mutex);
+       return 0;
  }
  
  #ifdef CONFIG_CPU_PM
@@@ -822,10 -818,9 +821,9 @@@ static int cpu_pmu_init(struct arm_pmu 
        if (!cpu_hw_events)
                return -ENOMEM;
  
-       cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
-       err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
-       if (err)
-               goto out_hw_events;
+       mutex_lock(&arm_pmu_mutex);
+       list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
+       mutex_unlock(&arm_pmu_mutex);
  
        err = cpu_pm_pmu_register(cpu_pmu);
        if (err)
        return 0;
  
  out_unregister:
-       unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
- out_hw_events:
+       mutex_lock(&arm_pmu_mutex);
+       list_del(&cpu_pmu->entry);
+       mutex_unlock(&arm_pmu_mutex);
        free_percpu(cpu_hw_events);
        return err;
  }
  static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
  {
        cpu_pm_pmu_unregister(cpu_pmu);
-       unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+       mutex_lock(&arm_pmu_mutex);
+       list_del(&cpu_pmu->entry);
+       mutex_unlock(&arm_pmu_mutex);
        free_percpu(cpu_pmu->hw_events);
  }
  
@@@ -964,23 -962,9 +965,23 @@@ static int of_pmu_irq_cfg(struct arm_pm
                i++;
        } while (1);
  
 -      /* If we didn't manage to parse anything, claim to support all CPUs */
 -      if (cpumask_weight(&pmu->supported_cpus) == 0)
 -              cpumask_setall(&pmu->supported_cpus);
 +      /* If we didn't manage to parse anything, try the interrupt affinity */
 +      if (cpumask_weight(&pmu->supported_cpus) == 0) {
 +              if (!using_spi) {
 +                      /* If using PPIs, check the affinity of the partition */
 +                      int ret, irq;
 +
 +                      irq = platform_get_irq(pdev, 0);
 +                      ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
 +                      if (ret) {
 +                              kfree(irqs);
 +                              return ret;
 +                      }
 +              } else {
 +                      /* Otherwise default to all CPUs */
 +                      cpumask_setall(&pmu->supported_cpus);
 +              }
 +      }
  
        /* If we matched up the IRQ affinities, use them to route the SPIs */
        if (using_spi && i == pdev->num_resources)
@@@ -1061,3 -1045,17 +1062,17 @@@ out_free
        kfree(pmu);
        return ret;
  }
+ static int arm_pmu_hp_init(void)
+ {
+       int ret;
+       ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                       "AP_PERF_ARM_STARTING",
+                                       arm_perf_starting_cpu, NULL);
+       if (ret)
+               pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
+                      ret);
+       return ret;
+ }
+ subsys_initcall(arm_pmu_hp_init);
index e1f921c2e4e05764509bf1887789d4c863b39677,ddd3dab0f39e457446185a84e895f3b87d00effc..8ed4326164cc843b41da6fbfe69d85cee2d61232
@@@ -69,22 -69,9 +69,22 @@@ struct perf_callchain_entry_ctx 
        bool                        contexts_maxed;
  };
  
 +typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
 +                                   unsigned long off, unsigned long len);
 +
 +struct perf_raw_frag {
 +      union {
 +              struct perf_raw_frag    *next;
 +              unsigned long           pad;
 +      };
 +      perf_copy_f                     copy;
 +      void                            *data;
 +      u32                             size;
 +} __packed;
 +
  struct perf_raw_record {
 +      struct perf_raw_frag            frag;
        u32                             size;
 -      void                            *data;
  };
  
  /*
@@@ -530,11 -517,6 +530,11 @@@ struct swevent_hlist 
  struct perf_cgroup;
  struct ring_buffer;
  
 +struct pmu_event_list {
 +      raw_spinlock_t          lock;
 +      struct list_head        list;
 +};
 +
  /**
   * struct perf_event - performance event kernel representation:
   */
@@@ -693,7 -675,6 +693,7 @@@ struct perf_event 
        int                             cgrp_defer_enabled;
  #endif
  
 +      struct list_head                sb_list;
  #endif /* CONFIG_PERF_EVENTS */
  };
  
@@@ -1093,7 -1074,7 +1093,7 @@@ extern void perf_callchain_kernel(struc
  extern struct perf_callchain_entry *
  get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
                   u32 max_stack, bool crosstask, bool add_mark);
 -extern int get_callchain_buffers(void);
 +extern int get_callchain_buffers(int max_stack);
  extern void put_callchain_buffers(void);
  
  extern int sysctl_perf_event_max_stack;
@@@ -1302,61 -1283,14 +1302,26 @@@ extern void perf_restore_debug_store(vo
  static inline void perf_restore_debug_store(void)                     { }
  #endif
  
 +static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
 +{
 +      return frag->pad < sizeof(u64);
 +}
 +
  #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
  
- /*
-  * This has to have a higher priority than migration_notifier in sched/core.c.
-  */
- #define perf_cpu_notifier(fn)                                         \
- do {                                                                  \
-       static struct notifier_block fn##_nb =                          \
-               { .notifier_call = fn, .priority = CPU_PRI_PERF };      \
-       unsigned long cpu = smp_processor_id();                         \
-       unsigned long flags;                                            \
-                                                                       \
-       cpu_notifier_register_begin();                                  \
-       fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,                     \
-               (void *)(unsigned long)cpu);                            \
-       local_irq_save(flags);                                          \
-       fn(&fn##_nb, (unsigned long)CPU_STARTING,                       \
-               (void *)(unsigned long)cpu);                            \
-       local_irq_restore(flags);                                       \
-       fn(&fn##_nb, (unsigned long)CPU_ONLINE,                         \
-               (void *)(unsigned long)cpu);                            \
-       __register_cpu_notifier(&fn##_nb);                              \
-       cpu_notifier_register_done();                                   \
- } while (0)
- /*
-  * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
-  * callback for already online CPUs.
-  */
- #define __perf_cpu_notifier(fn)                                               \
- do {                                                                  \
-       static struct notifier_block fn##_nb =                          \
-               { .notifier_call = fn, .priority = CPU_PRI_PERF };      \
-                                                                       \
-       __register_cpu_notifier(&fn##_nb);                              \
- } while (0)
  struct perf_pmu_events_attr {
        struct device_attribute attr;
        u64 id;
        const char *event_str;
  };
  
 +struct perf_pmu_events_ht_attr {
 +      struct device_attribute                 attr;
 +      u64                                     id;
 +      const char                              *event_str_ht;
 +      const char                              *event_str_noht;
 +};
 +
  ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
                              char *page);
  
@@@ -1385,4 -1319,13 +1350,13 @@@ _name##_show(struct device *dev,                                      
                                                                        \
  static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
  
+ /* Performance counter hotplug functions */
+ #ifdef CONFIG_PERF_EVENTS
+ int perf_event_init_cpu(unsigned int cpu);
+ int perf_event_exit_cpu(unsigned int cpu);
+ #else
+ #define perf_event_init_cpu   NULL
+ #define perf_event_exit_cpu   NULL
+ #endif
  #endif /* _LINUX_PERF_EVENT_H */
diff --combined kernel/events/core.c
index 09ae27b353c1e31a021c0602ce1d33a19dccf083,f3ef1c29a7c958e669b53b729a1af62291604c43..356a6c7cb52a08819739c8ea712c86fcc520448c
@@@ -335,7 -335,6 +335,7 @@@ static atomic_t perf_sched_count
  
  static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
  static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 +static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
  
  static atomic_t nr_mmap_events __read_mostly;
  static atomic_t nr_comm_events __read_mostly;
@@@ -397,13 -396,6 +397,13 @@@ int perf_proc_update_handler(struct ctl
        if (ret || !write)
                return ret;
  
 +      /*
 +       * If throttling is disabled don't allow the write:
 +       */
 +      if (sysctl_perf_cpu_time_max_percent == 100 ||
 +          sysctl_perf_cpu_time_max_percent == 0)
 +              return -EINVAL;
 +
        max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
        perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
        update_perf_cpu_limits();
@@@ -3694,39 -3686,6 +3694,39 @@@ static void free_event_rcu(struct rcu_h
  static void ring_buffer_attach(struct perf_event *event,
                               struct ring_buffer *rb);
  
 +static void detach_sb_event(struct perf_event *event)
 +{
 +      struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
 +
 +      raw_spin_lock(&pel->lock);
 +      list_del_rcu(&event->sb_list);
 +      raw_spin_unlock(&pel->lock);
 +}
 +
 +static bool is_sb_event(struct perf_event *event)
 +{
 +      struct perf_event_attr *attr = &event->attr;
 +
 +      if (event->parent)
 +              return false;
 +
 +      if (event->attach_state & PERF_ATTACH_TASK)
 +              return false;
 +
 +      if (attr->mmap || attr->mmap_data || attr->mmap2 ||
 +          attr->comm || attr->comm_exec ||
 +          attr->task ||
 +          attr->context_switch)
 +              return true;
 +      return false;
 +}
 +
 +static void unaccount_pmu_sb_event(struct perf_event *event)
 +{
 +      if (is_sb_event(event))
 +              detach_sb_event(event);
 +}
 +
  static void unaccount_event_cpu(struct perf_event *event, int cpu)
  {
        if (event->parent)
@@@ -3790,8 -3749,6 +3790,8 @@@ static void unaccount_event(struct perf
        }
  
        unaccount_event_cpu(event, event->cpu);
 +
 +      unaccount_pmu_sb_event(event);
  }
  
  static void perf_sched_delayed(struct work_struct *work)
@@@ -5617,26 -5574,16 +5617,26 @@@ void perf_output_sample(struct perf_out
        }
  
        if (sample_type & PERF_SAMPLE_RAW) {
 -              if (data->raw) {
 -                      u32 raw_size = data->raw->size;
 -                      u32 real_size = round_up(raw_size + sizeof(u32),
 -                                               sizeof(u64)) - sizeof(u32);
 -                      u64 zero = 0;
 -
 -                      perf_output_put(handle, real_size);
 -                      __output_copy(handle, data->raw->data, raw_size);
 -                      if (real_size - raw_size)
 -                              __output_copy(handle, &zero, real_size - raw_size);
 +              struct perf_raw_record *raw = data->raw;
 +
 +              if (raw) {
 +                      struct perf_raw_frag *frag = &raw->frag;
 +
 +                      perf_output_put(handle, raw->size);
 +                      do {
 +                              if (frag->copy) {
 +                                      __output_custom(handle, frag->copy,
 +                                                      frag->data, frag->size);
 +                              } else {
 +                                      __output_copy(handle, frag->data,
 +                                                    frag->size);
 +                              }
 +                              if (perf_raw_frag_last(frag))
 +                                      break;
 +                              frag = frag->next;
 +                      } while (1);
 +                      if (frag->pad)
 +                              __output_skip(handle, NULL, frag->pad);
                } else {
                        struct {
                                u32     size;
@@@ -5761,28 -5708,14 +5761,28 @@@ void perf_prepare_sample(struct perf_ev
        }
  
        if (sample_type & PERF_SAMPLE_RAW) {
 -              int size = sizeof(u32);
 -
 -              if (data->raw)
 -                      size += data->raw->size;
 -              else
 -                      size += sizeof(u32);
 +              struct perf_raw_record *raw = data->raw;
 +              int size;
 +
 +              if (raw) {
 +                      struct perf_raw_frag *frag = &raw->frag;
 +                      u32 sum = 0;
 +
 +                      do {
 +                              sum += frag->size;
 +                              if (perf_raw_frag_last(frag))
 +                                      break;
 +                              frag = frag->next;
 +                      } while (1);
 +
 +                      size = round_up(sum + sizeof(u32), sizeof(u64));
 +                      raw->size = size - sizeof(u32);
 +                      frag->pad = raw->size - sum;
 +              } else {
 +                      size = sizeof(u64);
 +              }
  
 -              header->size += round_up(size, sizeof(u64));
 +              header->size += size;
        }
  
        if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@@ -5942,11 -5875,11 +5942,11 @@@ perf_event_read_event(struct perf_even
        perf_output_end(&handle);
  }
  
 -typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
 +typedef void (perf_iterate_f)(struct perf_event *event, void *data);
  
  static void
 -perf_event_aux_ctx(struct perf_event_context *ctx,
 -                 perf_event_aux_output_cb output,
 +perf_iterate_ctx(struct perf_event_context *ctx,
 +                 perf_iterate_f output,
                   void *data, bool all)
  {
        struct perf_event *event;
        }
  }
  
 -static void
 -perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
 -                      struct perf_event_context *task_ctx)
 +static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
  {
 -      rcu_read_lock();
 -      preempt_disable();
 -      perf_event_aux_ctx(task_ctx, output, data, false);
 -      preempt_enable();
 -      rcu_read_unlock();
 +      struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
 +      struct perf_event *event;
 +
 +      list_for_each_entry_rcu(event, &pel->list, sb_list) {
 +              if (event->state < PERF_EVENT_STATE_INACTIVE)
 +                      continue;
 +              if (!event_filter_match(event))
 +                      continue;
 +              output(event, data);
 +      }
  }
  
 +/*
 + * Iterate all events that need to receive side-band events.
 + *
 + * For new callers; ensure that account_pmu_sb_event() includes
 + * your event, otherwise it might not get delivered.
 + */
  static void
 -perf_event_aux(perf_event_aux_output_cb output, void *data,
 +perf_iterate_sb(perf_iterate_f output, void *data,
               struct perf_event_context *task_ctx)
  {
 -      struct perf_cpu_context *cpuctx;
        struct perf_event_context *ctx;
 -      struct pmu *pmu;
        int ctxn;
  
 +      rcu_read_lock();
 +      preempt_disable();
 +
        /*
 -       * If we have task_ctx != NULL we only notify
 -       * the task context itself. The task_ctx is set
 -       * only for EXIT events before releasing task
 +       * If we have task_ctx != NULL we only notify the task context itself.
 +       * The task_ctx is set only for EXIT events before releasing task
         * context.
         */
        if (task_ctx) {
 -              perf_event_aux_task_ctx(output, data, task_ctx);
 -              return;
 +              perf_iterate_ctx(task_ctx, output, data, false);
 +              goto done;
        }
  
 -      rcu_read_lock();
 -      list_for_each_entry_rcu(pmu, &pmus, entry) {
 -              cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
 -              if (cpuctx->unique_pmu != pmu)
 -                      goto next;
 -              perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
 -              ctxn = pmu->task_ctx_nr;
 -              if (ctxn < 0)
 -                      goto next;
 +      perf_iterate_sb_cpu(output, data);
 +
 +      for_each_task_context_nr(ctxn) {
                ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
                if (ctx)
 -                      perf_event_aux_ctx(ctx, output, data, false);
 -next:
 -              put_cpu_ptr(pmu->pmu_cpu_context);
 +                      perf_iterate_ctx(ctx, output, data, false);
        }
 +done:
 +      preempt_enable();
        rcu_read_unlock();
  }
  
@@@ -6060,7 -5990,7 +6060,7 @@@ void perf_event_exec(void
  
                perf_event_enable_on_exec(ctxn);
  
 -              perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
 +              perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
                                   true);
        }
        rcu_read_unlock();
@@@ -6104,9 -6034,9 +6104,9 @@@ static int __perf_pmu_output_stop(void 
        };
  
        rcu_read_lock();
 -      perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
 +      perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
        if (cpuctx->task_ctx)
 -              perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
 +              perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
                                   &ro, false);
        rcu_read_unlock();
  
@@@ -6235,7 -6165,7 +6235,7 @@@ static void perf_event_task(struct task
                },
        };
  
 -      perf_event_aux(perf_event_task_output,
 +      perf_iterate_sb(perf_event_task_output,
                       &task_event,
                       task_ctx);
  }
@@@ -6314,7 -6244,7 +6314,7 @@@ static void perf_event_comm_event(struc
  
        comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
  
 -      perf_event_aux(perf_event_comm_output,
 +      perf_iterate_sb(perf_event_comm_output,
                       comm_event,
                       NULL);
  }
@@@ -6545,7 -6475,7 +6545,7 @@@ got_name
  
        mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
  
 -      perf_event_aux(perf_event_mmap_output,
 +      perf_iterate_sb(perf_event_mmap_output,
                       mmap_event,
                       NULL);
  
@@@ -6628,7 -6558,7 +6628,7 @@@ static void perf_addr_filters_adjust(st
                if (!ctx)
                        continue;
  
 -              perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
 +              perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
        }
        rcu_read_unlock();
  }
@@@ -6815,7 -6745,7 +6815,7 @@@ static void perf_event_switch(struct ta
                },
        };
  
 -      perf_event_aux(perf_event_switch_output,
 +      perf_iterate_sb(perf_event_switch_output,
                       &switch_event,
                       NULL);
  }
@@@ -7422,7 -7352,7 +7422,7 @@@ static struct pmu perf_swevent = 
  static int perf_tp_filter_match(struct perf_event *event,
                                struct perf_sample_data *data)
  {
 -      void *record = data->raw->data;
 +      void *record = data->raw->frag.data;
  
        /* only top level events have filters set */
        if (event->parent)
@@@ -7478,10 -7408,8 +7478,10 @@@ void perf_tp_event(u16 event_type, u64 
        struct perf_event *event;
  
        struct perf_raw_record raw = {
 -              .size = entry_size,
 -              .data = record,
 +              .frag = {
 +                      .size = entry_size,
 +                      .data = record,
 +              },
        };
  
        perf_sample_data_init(&data, 0, 0);
@@@ -7622,7 -7550,7 +7622,7 @@@ static void perf_event_free_bpf_prog(st
        prog = event->tp_event->prog;
        if (prog) {
                event->tp_event->prog = NULL;
 -              bpf_prog_put_rcu(prog);
 +              bpf_prog_put(prog);
        }
  }
  
@@@ -8739,28 -8667,6 +8739,28 @@@ unlock
        return pmu;
  }
  
 +static void attach_sb_event(struct perf_event *event)
 +{
 +      struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
 +
 +      raw_spin_lock(&pel->lock);
 +      list_add_rcu(&event->sb_list, &pel->list);
 +      raw_spin_unlock(&pel->lock);
 +}
 +
 +/*
 + * We keep a list of all !task (and therefore per-cpu) events
 + * that need to receive side-band records.
 + *
 + * This avoids having to scan all the various PMU per-cpu contexts
 + * looking for them.
 + */
 +static void account_pmu_sb_event(struct perf_event *event)
 +{
 +      if (is_sb_event(event))
 +              attach_sb_event(event);
 +}
 +
  static void account_event_cpu(struct perf_event *event, int cpu)
  {
        if (event->parent)
@@@ -8841,8 -8747,6 +8841,8 @@@ static void account_event(struct perf_e
  enabled:
  
        account_event_cpu(event, event->cpu);
 +
 +      account_pmu_sb_event(event);
  }
  
  /*
@@@ -8991,7 -8895,7 +8991,7 @@@ perf_event_alloc(struct perf_event_att
  
        if (!event->parent) {
                if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 -                      err = get_callchain_buffers();
 +                      err = get_callchain_buffers(attr->sample_max_stack);
                        if (err)
                                goto err_addr_filters;
                }
@@@ -9313,9 -9217,6 +9313,9 @@@ SYSCALL_DEFINE5(perf_event_open
                        return -EINVAL;
        }
  
 +      if (!attr.sample_max_stack)
 +              attr.sample_max_stack = sysctl_perf_event_max_stack;
 +
        /*
         * In cgroup mode, the pid argument is used to pass the fd
         * opened to the cgroup directory in cgroupfs. The cpu argument
  
        if (is_sampling_event(event)) {
                if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
 -                      err = -ENOTSUPP;
 +                      err = -EOPNOTSUPP;
                        goto err_alloc;
                }
        }
@@@ -10351,13 -10252,10 +10351,13 @@@ static void __init perf_event_init_all_
                swhash = &per_cpu(swevent_htable, cpu);
                mutex_init(&swhash->hlist_mutex);
                INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
 +
 +              INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
 +              raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
        }
  }
  
static void perf_event_init_cpu(int cpu)
int perf_event_init_cpu(unsigned int cpu)
  {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
  
                rcu_assign_pointer(swhash->swevent_hlist, hlist);
        }
        mutex_unlock(&swhash->hlist_mutex);
+       return 0;
  }
  
  #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@@ -10401,14 -10300,17 +10402,17 @@@ static void perf_event_exit_cpu_context
        }
        srcu_read_unlock(&pmus_srcu, idx);
  }
+ #else
+ static void perf_event_exit_cpu_context(int cpu) { }
+ #endif
  
static void perf_event_exit_cpu(int cpu)
int perf_event_exit_cpu(unsigned int cpu)
  {
        perf_event_exit_cpu_context(cpu);
+       return 0;
  }
- #else
- static inline void perf_event_exit_cpu(int cpu) { }
- #endif
  
  static int
  perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
@@@ -10430,46 -10332,6 +10434,6 @@@ static struct notifier_block perf_reboo
        .priority = INT_MIN,
  };
  
- static int
- perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               /*
-                * This must be done before the CPU comes alive, because the
-                * moment we can run tasks we can encounter (software) events.
-                *
-                * Specifically, someone can have inherited events on kthreadd
-                * or a pre-existing worker thread that gets re-bound.
-                */
-               perf_event_init_cpu(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               /*
-                * This must be done before the CPU dies because after that an
-                * active event might want to IPI the CPU and that'll not work
-                * so great for dead CPUs.
-                *
-                * XXX smp_call_function_single() return -ENXIO without a warn
-                * so we could possibly deal with this.
-                *
-                * This is safe against new events arriving because
-                * sys_perf_event_open() serializes against hotplug using
-                * get_online_cpus().
-                */
-               perf_event_exit_cpu(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
- }
  void __init perf_event_init(void)
  {
        int ret;
        perf_pmu_register(&perf_cpu_clock, NULL, -1);
        perf_pmu_register(&perf_task_clock, NULL, -1);
        perf_tp_register();
-       perf_cpu_notifier(perf_cpu_notify);
+       perf_event_init_cpu(smp_processor_id());
        register_reboot_notifier(&perf_reboot_notifier);
  
        ret = init_hw_breakpoint();
diff --combined kernel/smp.c
index 36552beed39713526aa384a9cf9f1878630834fb,7180491c9678d775989e2090c15629fbb47af22a..3aa642d39c0370849372cab0c95f9a5d7760e794
@@@ -33,69 -33,54 +33,54 @@@ static DEFINE_PER_CPU_SHARED_ALIGNED(st
  
  static void flush_smp_call_function_queue(bool warn_cpu_offline);
  
- static int
- hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ int smpcfd_prepare_cpu(unsigned int cpu)
  {
-       long cpu = (long)hcpu;
        struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
  
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
-                               cpu_to_node(cpu)))
-                       return notifier_from_errno(-ENOMEM);
-               cfd->csd = alloc_percpu(struct call_single_data);
-               if (!cfd->csd) {
-                       free_cpumask_var(cfd->cpumask);
-                       return notifier_from_errno(-ENOMEM);
-               }
-               break;
- #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               /* Fall-through to the CPU_DEAD[_FROZEN] case. */
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
+       if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
+                                    cpu_to_node(cpu)))
+               return -ENOMEM;
+       cfd->csd = alloc_percpu(struct call_single_data);
+       if (!cfd->csd) {
                free_cpumask_var(cfd->cpumask);
-               free_percpu(cfd->csd);
-               break;
-       case CPU_DYING:
-       case CPU_DYING_FROZEN:
-               /*
-                * The IPIs for the smp-call-function callbacks queued by other
-                * CPUs might arrive late, either due to hardware latencies or
-                * because this CPU disabled interrupts (inside stop-machine)
-                * before the IPIs were sent. So flush out any pending callbacks
-                * explicitly (without waiting for the IPIs to arrive), to
-                * ensure that the outgoing CPU doesn't go offline with work
-                * still pending.
-                */
-               flush_smp_call_function_queue(false);
-               break;
- #endif
-       };
-       return NOTIFY_OK;
+               return -ENOMEM;
+       }
+       return 0;
  }
  
- static struct notifier_block hotplug_cfd_notifier = {
-       .notifier_call          = hotplug_cfd,
- };
+ int smpcfd_dead_cpu(unsigned int cpu)
+ {
+       struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
+       free_cpumask_var(cfd->cpumask);
+       free_percpu(cfd->csd);
+       return 0;
+ }
+ int smpcfd_dying_cpu(unsigned int cpu)
+ {
+       /*
+        * The IPIs for the smp-call-function callbacks queued by other
+        * CPUs might arrive late, either due to hardware latencies or
+        * because this CPU disabled interrupts (inside stop-machine)
+        * before the IPIs were sent. So flush out any pending callbacks
+        * explicitly (without waiting for the IPIs to arrive), to
+        * ensure that the outgoing CPU doesn't go offline with work
+        * still pending.
+        */
+       flush_smp_call_function_queue(false);
+       return 0;
+ }
  
  void __init call_function_init(void)
  {
-       void *cpu = (void *)(long)smp_processor_id();
        int i;
  
        for_each_possible_cpu(i)
                init_llist_head(&per_cpu(call_single_queue, i));
  
-       hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
-       register_cpu_notifier(&hotplug_cfd_notifier);
+       smpcfd_prepare_cpu(smp_processor_id());
  }
  
  /*
   */
  static __always_inline void csd_lock_wait(struct call_single_data *csd)
  {
 -      smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
 +      smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
  }
  
  static __always_inline void csd_lock(struct call_single_data *csd)
diff --combined kernel/workqueue.c
index d12bd958077e80a8fd36f1162c0cea2881692eaa,c9dd5fbdbf333a785218ed1fe61bcdb732a2189e..ef071ca73fc325e69adb599e7637358c49cd215b
@@@ -4369,8 -4369,8 +4369,8 @@@ static void show_pwq(struct pool_workqu
  /**
   * show_workqueue_state - dump workqueue state
   *
 - * Called from a sysrq handler and prints out all busy workqueues and
 - * pools.
 + * Called from a sysrq handler or try_to_freeze_tasks() and prints out
 + * all busy workqueues and pools.
   */
  void show_workqueue_state(void)
  {
@@@ -4600,91 -4600,76 +4600,72 @@@ static void restore_unbound_workers_cpu
        if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
                return;
  
 -      /* is @cpu the only online CPU? */
        cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
 -      if (cpumask_weight(&cpumask) != 1)
 -              return;
  
        /* as we're called from CPU_ONLINE, the following shouldn't fail */
        for_each_pool_worker(worker, pool)
 -              WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
 -                                                pool->attrs->cpumask) < 0);
 +              WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
  }
  
- /*
-  * Workqueues should be brought up before normal priority CPU notifiers.
-  * This will be registered high priority CPU notifier.
-  */
- static int workqueue_cpu_up_callback(struct notifier_block *nfb,
-                                              unsigned long action,
-                                              void *hcpu)
+ int workqueue_prepare_cpu(unsigned int cpu)
+ {
+       struct worker_pool *pool;
+       for_each_cpu_worker_pool(pool, cpu) {
+               if (pool->nr_workers)
+                       continue;
+               if (!create_worker(pool))
+                       return -ENOMEM;
+       }
+       return 0;
+ }
+ int workqueue_online_cpu(unsigned int cpu)
  {
-       int cpu = (unsigned long)hcpu;
        struct worker_pool *pool;
        struct workqueue_struct *wq;
        int pi;
  
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               for_each_cpu_worker_pool(pool, cpu) {
-                       if (pool->nr_workers)
-                               continue;
-                       if (!create_worker(pool))
-                               return NOTIFY_BAD;
-               }
-               break;
-       case CPU_DOWN_FAILED:
-       case CPU_ONLINE:
-               mutex_lock(&wq_pool_mutex);
+       mutex_lock(&wq_pool_mutex);
  
-               for_each_pool(pool, pi) {
-                       mutex_lock(&pool->attach_mutex);
+       for_each_pool(pool, pi) {
+               mutex_lock(&pool->attach_mutex);
  
-                       if (pool->cpu == cpu)
-                               rebind_workers(pool);
-                       else if (pool->cpu < 0)
-                               restore_unbound_workers_cpumask(pool, cpu);
+               if (pool->cpu == cpu)
+                       rebind_workers(pool);
+               else if (pool->cpu < 0)
+                       restore_unbound_workers_cpumask(pool, cpu);
  
-                       mutex_unlock(&pool->attach_mutex);
-               }
+               mutex_unlock(&pool->attach_mutex);
+       }
  
-               /* update NUMA affinity of unbound workqueues */
-               list_for_each_entry(wq, &workqueues, list)
-                       wq_update_unbound_numa(wq, cpu, true);
+       /* update NUMA affinity of unbound workqueues */
+       list_for_each_entry(wq, &workqueues, list)
+               wq_update_unbound_numa(wq, cpu, true);
  
-               mutex_unlock(&wq_pool_mutex);
-               break;
-       }
-       return NOTIFY_OK;
+       mutex_unlock(&wq_pool_mutex);
+       return 0;
  }
  
- /*
-  * Workqueues should be brought down after normal priority CPU notifiers.
-  * This will be registered as low priority CPU notifier.
-  */
- static int workqueue_cpu_down_callback(struct notifier_block *nfb,
-                                                unsigned long action,
-                                                void *hcpu)
+ int workqueue_offline_cpu(unsigned int cpu)
  {
-       int cpu = (unsigned long)hcpu;
        struct work_struct unbind_work;
        struct workqueue_struct *wq;
  
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_DOWN_PREPARE:
-               /* unbinding per-cpu workers should happen on the local CPU */
-               INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
-               queue_work_on(cpu, system_highpri_wq, &unbind_work);
-               /* update NUMA affinity of unbound workqueues */
-               mutex_lock(&wq_pool_mutex);
-               list_for_each_entry(wq, &workqueues, list)
-                       wq_update_unbound_numa(wq, cpu, false);
-               mutex_unlock(&wq_pool_mutex);
-               /* wait for per-cpu unbinding to finish */
-               flush_work(&unbind_work);
-               destroy_work_on_stack(&unbind_work);
-               break;
-       }
-       return NOTIFY_OK;
+       /* unbinding per-cpu workers should happen on the local CPU */
+       INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
+       queue_work_on(cpu, system_highpri_wq, &unbind_work);
+       /* update NUMA affinity of unbound workqueues */
+       mutex_lock(&wq_pool_mutex);
+       list_for_each_entry(wq, &workqueues, list)
+               wq_update_unbound_numa(wq, cpu, false);
+       mutex_unlock(&wq_pool_mutex);
+       /* wait for per-cpu unbinding to finish */
+       flush_work(&unbind_work);
+       destroy_work_on_stack(&unbind_work);
+       return 0;
  }
  
  #ifdef CONFIG_SMP
@@@ -5486,9 -5471,6 +5467,6 @@@ static int __init init_workqueues(void
  
        pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
  
-       cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
-       hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
        wq_numa_init();
  
        /* initialize CPU pools */
diff --combined virt/kvm/kvm_main.c
index ce3d8e5be73e38f54d4b2fab2bdb9874abaccf0d,c1d6cf5a74a1609c104202a6f95eaf4055efec7c..2e791367c576c9b2fb7dc6ee30f49f61356d3a05
@@@ -148,7 -148,6 +148,7 @@@ int vcpu_load(struct kvm_vcpu *vcpu
        put_cpu();
        return 0;
  }
 +EXPORT_SYMBOL_GPL(vcpu_load);
  
  void vcpu_put(struct kvm_vcpu *vcpu)
  {
        preempt_enable();
        mutex_unlock(&vcpu->mutex);
  }
 +EXPORT_SYMBOL_GPL(vcpu_put);
  
  static void ack_flush(void *_completed)
  {
@@@ -3050,7 -3048,6 +3050,7 @@@ static int kvm_dev_ioctl_create_vm(unsi
  {
        int r;
        struct kvm *kvm;
 +      struct file *file;
  
        kvm = kvm_create_vm(type);
        if (IS_ERR(kvm))
                return r;
        }
  #endif
 -      r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
 +      r = get_unused_fd_flags(O_CLOEXEC);
        if (r < 0) {
                kvm_put_kvm(kvm);
                return r;
        }
 +      file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
 +      if (IS_ERR(file)) {
 +              put_unused_fd(r);
 +              kvm_put_kvm(kvm);
 +              return PTR_ERR(file);
 +      }
  
        if (kvm_create_vm_debugfs(kvm, r) < 0) {
 -              kvm_put_kvm(kvm);
 +              put_unused_fd(r);
 +              fput(file);
                return -ENOMEM;
        }
  
 +      fd_install(r, file);
        return r;
  }
  
@@@ -3155,12 -3144,13 +3155,13 @@@ static void hardware_enable_nolock(voi
        }
  }
  
- static void hardware_enable(void)
+ static int kvm_starting_cpu(unsigned int cpu)
  {
        raw_spin_lock(&kvm_count_lock);
        if (kvm_usage_count)
                hardware_enable_nolock(NULL);
        raw_spin_unlock(&kvm_count_lock);
+       return 0;
  }
  
  static void hardware_disable_nolock(void *junk)
        kvm_arch_hardware_disable();
  }
  
- static void hardware_disable(void)
+ static int kvm_dying_cpu(unsigned int cpu)
  {
        raw_spin_lock(&kvm_count_lock);
        if (kvm_usage_count)
                hardware_disable_nolock(NULL);
        raw_spin_unlock(&kvm_count_lock);
+       return 0;
  }
  
  static void hardware_disable_all_nolock(void)
@@@ -3219,21 -3210,6 +3221,6 @@@ static int hardware_enable_all(void
        return r;
  }
  
- static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
-                          void *v)
- {
-       val &= ~CPU_TASKS_FROZEN;
-       switch (val) {
-       case CPU_DYING:
-               hardware_disable();
-               break;
-       case CPU_STARTING:
-               hardware_enable();
-               break;
-       }
-       return NOTIFY_OK;
- }
  static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
                      void *v)
  {
@@@ -3500,10 -3476,6 +3487,6 @@@ int kvm_io_bus_unregister_dev(struct kv
        return r;
  }
  
- static struct notifier_block kvm_cpu_notifier = {
-       .notifier_call = kvm_cpu_hotplug,
- };
  static int kvm_debugfs_open(struct inode *inode, struct file *file,
                           int (*get)(void *, u64 *), int (*set)(void *, u64),
                           const char *fmt)
@@@ -3754,7 -3726,8 +3737,8 @@@ int kvm_init(void *opaque, unsigned vcp
                        goto out_free_1;
        }
  
-       r = register_cpu_notifier(&kvm_cpu_notifier);
+       r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING",
+                                     kvm_starting_cpu, kvm_dying_cpu);
        if (r)
                goto out_free_2;
        register_reboot_notifier(&kvm_reboot_notifier);
@@@ -3808,7 -3781,7 +3792,7 @@@ out_free
        kmem_cache_destroy(kvm_vcpu_cache);
  out_free_3:
        unregister_reboot_notifier(&kvm_reboot_notifier);
-       unregister_cpu_notifier(&kvm_cpu_notifier);
+       cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
  out_free_2:
  out_free_1:
        kvm_arch_hardware_unsetup();
@@@ -3831,7 -3804,7 +3815,7 @@@ void kvm_exit(void
        kvm_async_pf_deinit();
        unregister_syscore_ops(&kvm_syscore_ops);
        unregister_reboot_notifier(&kvm_reboot_notifier);
-       unregister_cpu_notifier(&kvm_cpu_notifier);
+       cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
        on_each_cpu(hardware_disable_nolock, NULL, 1);
        kvm_arch_hardware_unsetup();
        kvm_arch_exit();