Merge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)
diff --combined arch/arm/mach-mvebu/coherency.c

index e80f0dde218919dab8d7a2b5873a4962755f3ee2,77aaa5243a2075675c59fedbc140421a56c34956..ae2a018b93050fa8171d2164124d85796e801bb6
--- 1/arch/arm/mach-mvebu/coherency.c
--- 2/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@@ -111,20 -111,12 +111,12 @@@ static struct notifier_block mvebu_hwcc
         .notifier_call = mvebu_hwcc_notifier,
   };
   
- static int armada_xp_clear_shared_l2_notifier_func(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
+ static int armada_xp_clear_l2_starting(unsigned int cpu)
   {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
-               armada_xp_clear_shared_l2();
- 
-       return NOTIFY_OK;
+       armada_xp_clear_shared_l2();
+       return 0;
   }
   
- static struct notifier_block armada_xp_clear_shared_l2_notifier = {
-       .notifier_call = armada_xp_clear_shared_l2_notifier_func,
-       .priority = 100,
- };
- 
   static void __init armada_370_coherency_init(struct device_node *np)
   {
         struct resource res;
@@@ -155,23 -147,30 +147,24 @@@
   
         of_node_put(cpu_config_np);
   
-       register_cpu_notifier(&armada_xp_clear_shared_l2_notifier);
- 
+       cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
+                                 "AP_ARM_MVEBU_COHERENCY",
+                                 armada_xp_clear_l2_starting, NULL);
   exit:
         set_cpu_coherent();
   }
   
   /*
- - * This ioremap hook is used on Armada 375/38x to ensure that PCIe
- - * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
- - * is needed as a workaround for a deadlock issue between the PCIe
- - * interface and the cache controller.
+ + * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
+ + * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
+ + * needed for the HW I/O coherency mechanism to work properly without
+ + * deadlock.
    */
   static void __iomem *
- -armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
- -                            unsigned int mtype, void *caller)
+ +armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+ +                       unsigned int mtype, void *caller)
   {
- -      struct resource pcie_mem;
- -
- -      mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
- -
- -      if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
- -              mtype = MT_UNCACHED;
- -
+ +      mtype = MT_UNCACHED;
         return __arm_ioremap_caller(phys_addr, size, mtype, caller);
   }
   
@@@ -180,8 -179,7 +173,8 @@@ static void __init armada_375_380_coher
         struct device_node *cache_dn;
   
         coherency_cpu_base = of_iomap(np, 0);
- -      arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+ +      arch_ioremap_caller = armada_wa_ioremap_caller;
+ +      pci_ioremap_set_mem_type(MT_UNCACHED);
   
         /*
          * We should switch the PL310 to I/O coherency mode only if
diff --combined arch/arm/xen/enlighten.c

index 0bea3d271f6efd28c9b8efb225ca20e68c3c2889,d822e2313950bd2f03b837c7f6ae574d944c3a58..b0b82f5ea33825943fb75066cf1af0d52f17b3c2
--- 1/arch/arm/xen/enlighten.c
--- 2/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@@ -12,16 -12,14 +12,16 @@@
   #include <xen/page.h>
   #include <xen/interface/sched.h>
   #include <xen/xen-ops.h>
- -#include <asm/paravirt.h>
   #include <asm/xen/hypervisor.h>
   #include <asm/xen/hypercall.h>
+ +#include <asm/xen/xen-ops.h>
   #include <asm/system_misc.h>
+ +#include <asm/efi.h>
   #include <linux/interrupt.h>
   #include <linux/irqreturn.h>
   #include <linux/module.h>
   #include <linux/of.h>
+ +#include <linux/of_fdt.h>
   #include <linux/of_irq.h>
   #include <linux/of_address.h>
   #include <linux/cpuidle.h>
@@@ -32,7 -30,6 +32,7 @@@
   #include <linux/time64.h>
   #include <linux/timekeeping.h>
   #include <linux/timekeeper_internal.h>
+ +#include <linux/acpi.h>
   
   #include <linux/mm.h>
   
@@@ -49,16 -46,14 +49,16 @@@ struct shared_info *HYPERVISOR_shared_i
   DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
   static struct vcpu_info __percpu *xen_vcpu_info;
   
+ +/* Linux <-> Xen vCPU id mapping */
+ +DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+ +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+ +
   /* These are unused until we support booting "pre-ballooned" */
   unsigned long xen_released_pages;
   struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
   
   static __read_mostly unsigned int xen_events_irq;
   
- -static __initdata struct device_node *xen_node;
- -
   int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
                                unsigned long addr,
                                xen_pfn_t *gfn, int nr,
@@@ -89,6 -84,19 +89,6 @@@ int xen_unmap_domain_gfn_range(struct v
   }
   EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
   
- -static unsigned long long xen_stolen_accounting(int cpu)
- -{
- -      struct vcpu_runstate_info state;
- -
- -      BUG_ON(cpu != smp_processor_id());
- -
- -      xen_get_runstate_snapshot(&state);
- -
- -      WARN_ON(state.state != RUNSTATE_running);
- -
- -      return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
- -}
- -
   static void xen_read_wallclock(struct timespec64 *ts)
   {
         u32 version;
@@@ -153,12 -161,11 +153,11 @@@ static struct notifier_block xen_pvcloc
         .notifier_call = xen_pvclock_gtod_notify,
   };
   
- static void xen_percpu_init(void)
+ static int xen_starting_cpu(unsigned int cpu)
   {
         struct vcpu_register_vcpu_info info;
         struct vcpu_info *vcpup;
         int err;
-       int cpu = get_cpu();
   
         /* 
          * VCPUOP_register_vcpu_info cannot be called twice for the same
@@@ -171,14 -178,10 +170,14 @@@
         pr_info("Xen: initializing cpu%d\n", cpu);
         vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
   
+ +      /* Direct vCPU id mapping for ARM guests. */
+ +      per_cpu(xen_vcpu_id, cpu) = cpu;
+ +
         info.mfn = virt_to_gfn(vcpup);
         info.offset = xen_offset_in_page(vcpup);
   
- -      err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+ +      err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+ +                               &info);
         BUG_ON(err);
         per_cpu(xen_vcpu, cpu) = vcpup;
   
@@@ -186,7 -189,13 +185,13 @@@
   
   after_register_vcpu_info:
         enable_percpu_irq(xen_events_irq, 0);
-       put_cpu();
+       return 0;
+ }
+ 
+ static int xen_dying_cpu(unsigned int cpu)
+ {
+       disable_percpu_irq(xen_events_irq);
+       return 0;
   }
   
   static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
@@@ -205,74 -214,12 +210,52 @@@ static void xen_power_off(void
         BUG_ON(rc);
   }
   
- static int xen_cpu_notification(struct notifier_block *self,
-                               unsigned long action,
-                               void *hcpu)
- {
-       switch (action) {
-       case CPU_STARTING:
-               xen_percpu_init();
-               break;
-       case CPU_DYING:
-               disable_percpu_irq(xen_events_irq);
-               break;
-       default:
-               break;
-       }
- 
-       return NOTIFY_OK;
- }
- 
- static struct notifier_block xen_cpu_notifier = {
-       .notifier_call = xen_cpu_notification,
- };
- 
   static irqreturn_t xen_arm_callback(int irq, void *arg)
   {
         xen_hvm_evtchn_do_upcall();
         return IRQ_HANDLED;
   }
   
+ +static __initdata struct {
+ +      const char *compat;
+ +      const char *prefix;
+ +      const char *version;
+ +      bool found;
+ +} hyper_node = {"xen,xen", "xen,xen-", NULL, false};
+ +
+ +static int __init fdt_find_hyper_node(unsigned long node, const char *uname,
+ +                                    int depth, void *data)
+ +{
+ +      const void *s = NULL;
+ +      int len;
+ +
+ +      if (depth != 1 || strcmp(uname, "hypervisor") != 0)
+ +              return 0;
+ +
+ +      if (of_flat_dt_is_compatible(node, hyper_node.compat))
+ +              hyper_node.found = true;
+ +
+ +      s = of_get_flat_dt_prop(node, "compatible", &len);
+ +      if (strlen(hyper_node.prefix) + 3  < len &&
+ +          !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix)))
+ +              hyper_node.version = s + strlen(hyper_node.prefix);
+ +
+ +      /*
+ +       * Check if Xen supports EFI by checking whether there is the
+ +       * "/hypervisor/uefi" node in DT. If so, runtime services are available
+ +       * through proxy functions (e.g. in case of Xen dom0 EFI implementation
+ +       * they call special hypercall which executes relevant EFI functions)
+ +       * and that is why they are always enabled.
+ +       */
+ +      if (IS_ENABLED(CONFIG_XEN_EFI)) {
+ +              if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) &&
+ +                  !efi_runtime_disabled())
+ +                      set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   /*
    * see Documentation/devicetree/bindings/arm/xen.txt for the
    * documentation of the Xen Device Tree format.
@@@ -280,18 -227,26 +263,18 @@@
   #define GRANT_TABLE_PHYSADDR 0
   void __init xen_early_init(void)
   {
- -      int len;
- -      const char *s = NULL;
- -      const char *version = NULL;
- -      const char *xen_prefix = "xen,xen-";
- -
- -      xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
- -      if (!xen_node) {
+ +      of_scan_flat_dt(fdt_find_hyper_node, NULL);
+ +      if (!hyper_node.found) {
                 pr_debug("No Xen support\n");
                 return;
         }
- -      s = of_get_property(xen_node, "compatible", &len);
- -      if (strlen(xen_prefix) + 3  < len &&
- -                      !strncmp(xen_prefix, s, strlen(xen_prefix)))
- -              version = s + strlen(xen_prefix);
- -      if (version == NULL) {
+ +
+ +      if (hyper_node.version == NULL) {
                 pr_debug("Xen version not found\n");
                 return;
         }
   
- -      pr_info("Xen %s support found\n", version);
+ +      pr_info("Xen %s support found\n", hyper_node.version);
   
         xen_domain_type = XEN_HVM_DOMAIN;
   
@@@ -306,68 -261,28 +289,68 @@@
                 add_preferred_console("hvc", 0, NULL);
   }
   
+ +static void __init xen_acpi_guest_init(void)
+ +{
+ +#ifdef CONFIG_ACPI
+ +      struct xen_hvm_param a;
+ +      int interrupt, trigger, polarity;
+ +
+ +      a.domid = DOMID_SELF;
+ +      a.index = HVM_PARAM_CALLBACK_IRQ;
+ +
+ +      if (HYPERVISOR_hvm_op(HVMOP_get_param, &a)
+ +          || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) {
+ +              xen_events_irq = 0;
+ +              return;
+ +      }
+ +
+ +      interrupt = a.value & 0xff;
+ +      trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE
+ +                                       : ACPI_LEVEL_SENSITIVE;
+ +      polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW
+ +                                        : ACPI_ACTIVE_HIGH;
+ +      xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity);
+ +#endif
+ +}
+ +
+ +static void __init xen_dt_guest_init(void)
+ +{
+ +      struct device_node *xen_node;
+ +
+ +      xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
+ +      if (!xen_node) {
+ +              pr_err("Xen support was detected before, but it has disappeared\n");
+ +              return;
+ +      }
+ +
+ +      xen_events_irq = irq_of_parse_and_map(xen_node, 0);
+ +}
+ +
   static int __init xen_guest_init(void)
   {
         struct xen_add_to_physmap xatp;
         struct shared_info *shared_info_page = NULL;
- -      struct resource res;
- -      phys_addr_t grant_frames;
   
         if (!xen_domain())
                 return 0;
   
- -      if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
- -              pr_err("Xen grant table base address not found\n");
- -              return -ENODEV;
- -      }
- -      grant_frames = res.start;
+ +      if (!acpi_disabled)
+ +              xen_acpi_guest_init();
+ +      else
+ +              xen_dt_guest_init();
   
- -      xen_events_irq = irq_of_parse_and_map(xen_node, 0);
         if (!xen_events_irq) {
                 pr_err("Xen event channel interrupt not found\n");
                 return -ENODEV;
         }
   
+ +      /*
+ +       * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI
+ +       * parameters are found. Force enable runtime services.
+ +       */
+ +      if (efi_enabled(EFI_RUNTIME_SERVICES))
+ +              xen_efi_runtime_setup();
+ +
         shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
   
         if (!shared_info_page) {
@@@ -396,13 -311,7 +379,13 @@@
         if (xen_vcpu_info == NULL)
                 return -ENOMEM;
   
- -      if (gnttab_setup_auto_xlat_frames(grant_frames)) {
+ +      /* Direct vCPU id mapping for ARM guests. */
+ +      per_cpu(xen_vcpu_id, 0) = 0;
+ +
+ +      xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
+ +      if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
+ +                                        &xen_auto_xlat_grant_frames.vaddr,
+ +                                        xen_auto_xlat_grant_frames.count)) {
                 free_percpu(xen_vcpu_info);
                 return -ENOMEM;
         }
@@@ -425,16 -334,14 +408,14 @@@
                 return -EINVAL;
         }
   
-       xen_percpu_init();
- 
-       register_cpu_notifier(&xen_cpu_notifier);
- 
- -      pv_time_ops.steal_clock = xen_stolen_accounting;
- -      static_key_slow_inc(&paravirt_steal_enabled);
+ +      xen_time_setup_guest();
+ +
         if (xen_initial_domain())
                 pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
   
-       return 0;
+       return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING,
+                                "AP_ARM_XEN_STARTING", xen_starting_cpu,
+                                xen_dying_cpu);
   }
   early_initcall(xen_guest_init);
   
@@@ -477,5 -384,4 +458,5 @@@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op)
   EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
   EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
   EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
+ +EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
   EXPORT_SYMBOL_GPL(privcmd_call);
diff --combined arch/arm64/kernel/armv8_deprecated.c

index 5f72475e2e3b2f6ab213e81d2b123424d9323014,9668c230674a330db7563b72e3e1389497f30536..42ffdb54e162d64164ab9f515d1ce21a379fb3d7
--- 1/arch/arm64/kernel/armv8_deprecated.c
--- 2/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@@ -121,7 -121,7 +121,7 @@@ static int run_all_cpu_set_hw_mode(stru
    *  0                 - If all the hooks ran successfully.
    * -EINVAL    - At least one hook is not supported by the CPU.
    */
- static int run_all_insn_set_hw_mode(unsigned long cpu)
+ static int run_all_insn_set_hw_mode(unsigned int cpu)
   {
         int rc = 0;
         unsigned long flags;
@@@ -131,7 -131,7 +131,7 @@@
         list_for_each_entry(insn, &insn_emulation, node) {
                 bool enable = (insn->current_mode == INSN_HW);
                 if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) {
-                       pr_warn("CPU[%ld] cannot support the emulation of %s",
+                       pr_warn("CPU[%u] cannot support the emulation of %s",
                                 cpu, insn->ops->name);
                         rc = -EINVAL;
                 }
@@@ -316,6 -316,28 +316,6 @@@ static void __init register_insn_emulat
    */
   #define TYPE_SWPB (1 << 22)
   
- -/*
- - * Set up process info to signal segmentation fault - called on access error.
- - */
- -static void set_segfault(struct pt_regs *regs, unsigned long addr)
- -{
- -      siginfo_t info;
- -
- -      down_read(&current->mm->mmap_sem);
- -      if (find_vma(current->mm, addr) == NULL)
- -              info.si_code = SEGV_MAPERR;
- -      else
- -              info.si_code = SEGV_ACCERR;
- -      up_read(&current->mm->mmap_sem);
- -
- -      info.si_signo = SIGSEGV;
- -      info.si_errno = 0;
- -      info.si_addr  = (void *) instruction_pointer(regs);
- -
- -      pr_debug("SWP{B} emulation: access caused memory abort!\n");
- -      arm64_notify_die("Illegal memory access", regs, &info, 0);
- -}
- -
   static int emulate_swpX(unsigned int address, unsigned int *data,
                         unsigned int type)
   {
@@@ -344,21 -366,6 +344,21 @@@
         return res;
   }
   
+ +#define       ARM_OPCODE_CONDITION_UNCOND     0xf
+ +
+ +static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
+ +{
+ +      u32 cc_bits  = opcode >> 28;
+ +
+ +      if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+ +              if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
+ +                      return ARM_OPCODE_CONDTEST_PASS;
+ +              else
+ +                      return ARM_OPCODE_CONDTEST_FAIL;
+ +      }
+ +      return ARM_OPCODE_CONDTEST_UNCOND;
+ +}
+ +
   /*
    * swp_handler logs the id of calling process, dissects the instruction, sanity
    * checks the memory location, calls emulate_swpX for the actual operation and
@@@ -373,7 -380,7 +373,7 @@@ static int swp_handler(struct pt_regs *
   
         type = instr & TYPE_SWPB;
   
- -      switch (arm_check_condition(instr, regs->pstate)) {
+ +      switch (aarch32_check_condition(instr, regs->pstate)) {
         case ARM_OPCODE_CONDTEST_PASS:
                 break;
         case ARM_OPCODE_CONDTEST_FAIL:
@@@ -423,8 -430,7 +423,8 @@@ ret
         return 0;
   
   fault:
- -      set_segfault(regs, address);
+ +      pr_debug("SWP{B} emulation: access caused memory abort!\n");
+ +      arm64_notify_segfault(regs, address);
   
         return 0;
   }
@@@ -455,7 -461,7 +455,7 @@@ static int cp15barrier_handler(struct p
   {
         perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
   
- -      switch (arm_check_condition(instr, regs->pstate)) {
+ +      switch (aarch32_check_condition(instr, regs->pstate)) {
         case ARM_OPCODE_CONDTEST_PASS:
                 break;
         case ARM_OPCODE_CONDTEST_FAIL:
@@@ -611,20 -617,6 +611,6 @@@ static struct insn_emulation_ops setend
         .set_hw_mode = setend_set_hw_mode,
   };
   
- static int insn_cpu_hotplug_notify(struct notifier_block *b,
-                             unsigned long action, void *hcpu)
- {
-       int rc = 0;
-       if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING)
-               rc = run_all_insn_set_hw_mode((unsigned long)hcpu);
- 
-       return notifier_from_errno(rc);
- }
- 
- static struct notifier_block insn_cpu_hotplug_notifier = {
-       .notifier_call = insn_cpu_hotplug_notify,
- };
- 
   /*
    * Invoked as late_initcall, since not needed before init spawned.
    */
@@@ -643,7 -635,9 +629,9 @@@ static int __init armv8_deprecated_init
                         pr_info("setend instruction emulation is not supported on the system");
         }
   
-       register_cpu_notifier(&insn_cpu_hotplug_notifier);
+       cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
+                                 "AP_ARM64_ISNDEP_STARTING",
+                                 run_all_insn_set_hw_mode, NULL);
         register_insn_emulation_sysctl(ctl_abi);
   
         return 0;
diff --combined arch/s390/kernel/perf_cpum_sf.c

index 53acf2d76fa9809527e9d0869e4a5db8b7d48c1f,f4a4c118f8b403f7f3ece68f95750bf52ec7e891..fcc634c1479a1ed1d7cd05592d2b8c0ae59a408d
--- 1/arch/s390/kernel/perf_cpum_sf.c
--- 2/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@@ -601,12 -601,17 +601,12 @@@ static void release_pmc_hardware(void
   
         irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
         on_each_cpu(setup_pmc_cpu, &flags, 1);
- -      perf_release_sampling();
   }
   
   static int reserve_pmc_hardware(void)
   {
         int flags = PMC_INIT;
- -      int err;
   
- -      err = perf_reserve_sampling();
- -      if (err)
- -              return err;
         on_each_cpu(setup_pmc_cpu, &flags, 1);
         if (flags & PMC_FAILURE) {
                 release_pmc_hardware();
@@@ -974,15 -979,12 +974,15 @@@ static int perf_push_sample(struct perf
         struct pt_regs regs;
         struct perf_sf_sde_regs *sde_regs;
         struct perf_sample_data data;
- -      struct perf_raw_record raw;
+ +      struct perf_raw_record raw = {
+ +              .frag = {
+ +                      .size = sfr->size,
+ +                      .data = sfr,
+ +              },
+ +      };
   
         /* Setup perf sample */
         perf_sample_data_init(&data, 0, event->hw.last_period);
- -      raw.size = sfr->size;
- -      raw.data = sfr;
         data.raw = &raw;
   
         /* Setup pt_regs to look like an CPU-measurement external interrupt
@@@ -1504,37 -1506,28 +1504,28 @@@ static void cpumf_measurement_alert(str
                 sf_disable();
         }
   }
- 
- static int cpumf_pmu_notifier(struct notifier_block *self,
-                             unsigned long action, void *hcpu)
+ static int cpusf_pmu_setup(unsigned int cpu, int flags)
   {
-       int flags;
- 
         /* Ignore the notification if no events are scheduled on the PMU.
          * This might be racy...
          */
         if (!atomic_read(&num_events))
-               return NOTIFY_OK;
- 
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
-               flags = PMC_INIT;
-               local_irq_disable();
-               setup_pmc_cpu(&flags);
-               local_irq_enable();
-               break;
-       case CPU_DOWN_PREPARE:
-               flags = PMC_RELEASE;
-               local_irq_disable();
-               setup_pmc_cpu(&flags);
-               local_irq_enable();
-               break;
-       default:
-               break;
-       }
+               return 0;
   
-       return NOTIFY_OK;
+       local_irq_disable();
+       setup_pmc_cpu(&flags);
+       local_irq_enable();
+       return 0;
+ }
+ 
+ static int s390_pmu_sf_online_cpu(unsigned int cpu)
+ {
+       return cpusf_pmu_setup(cpu, PMC_INIT);
+ }
+ 
+ static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+ {
+       return cpusf_pmu_setup(cpu, PMC_RELEASE);
   }
   
   static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
@@@ -1634,7 -1627,9 +1625,9 @@@ static int __init init_cpum_sampling_pm
                                         cpumf_measurement_alert);
                 goto out;
         }
-       perf_cpu_notifier(cpumf_pmu_notifier);
+ 
+       cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "AP_PERF_S390_SF_ONLINE",
+                         s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
   out:
         return err;
   }
diff --combined arch/x86/entry/vdso/vma.c

index 3329844e3c43913d278af48b4a93f5d1e1ce2ccc,25b0368de7f6752e4758f664417d8c7b8e52259e..f840766659a8f4fc2c68d778c7b495ad9b40cf98
--- 1/arch/x86/entry/vdso/vma.c
--- 2/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@@ -12,7 -12,6 +12,7 @@@
   #include <linux/random.h>
   #include <linux/elf.h>
   #include <linux/cpu.h>
+ +#include <linux/ptrace.h>
   #include <asm/pvclock.h>
   #include <asm/vgtod.h>
   #include <asm/proto.h>
@@@ -98,40 -97,10 +98,40 @@@ static int vdso_fault(const struct vm_s
         return 0;
   }
   
- -static const struct vm_special_mapping text_mapping = {
- -      .name = "[vdso]",
- -      .fault = vdso_fault,
- -};
+ +static void vdso_fix_landing(const struct vdso_image *image,
+ +              struct vm_area_struct *new_vma)
+ +{
+ +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ +      if (in_ia32_syscall() && image == &vdso_image_32) {
+ +              struct pt_regs *regs = current_pt_regs();
+ +              unsigned long vdso_land = image->sym_int80_landing_pad;
+ +              unsigned long old_land_addr = vdso_land +
+ +                      (unsigned long)current->mm->context.vdso;
+ +
+ +              /* Fixing userspace landing - look at do_fast_syscall_32 */
+ +              if (regs->ip == old_land_addr)
+ +                      regs->ip = new_vma->vm_start + vdso_land;
+ +      }
+ +#endif
+ +}
+ +
+ +static int vdso_mremap(const struct vm_special_mapping *sm,
+ +              struct vm_area_struct *new_vma)
+ +{
+ +      unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ +      const struct vdso_image *image = current->mm->context.vdso_image;
+ +
+ +      if (image->size != new_size)
+ +              return -EINVAL;
+ +
+ +      if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
+ +              return -EFAULT;
+ +
+ +      vdso_fix_landing(image, new_vma);
+ +      current->mm->context.vdso = (void __user *)new_vma->vm_start;
+ +
+ +      return 0;
+ +}
   
   static int vvar_fault(const struct vm_special_mapping *sm,
                       struct vm_area_struct *vma, struct vm_fault *vmf)
@@@ -182,12 -151,6 +182,12 @@@ static int map_vdso(const struct vdso_i
         struct vm_area_struct *vma;
         unsigned long addr, text_start;
         int ret = 0;
+ +
+ +      static const struct vm_special_mapping vdso_mapping = {
+ +              .name = "[vdso]",
+ +              .fault = vdso_fault,
+ +              .mremap = vdso_mremap,
+ +      };
         static const struct vm_special_mapping vvar_mapping = {
                 .name = "[vvar]",
                 .fault = vvar_fault,
@@@ -222,7 -185,7 +222,7 @@@
                                        image->size,
                                        VM_READ|VM_EXEC|
                                        VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- -                                     &text_mapping);
+ +                                     &vdso_mapping);
   
         if (IS_ERR(vma)) {
                 ret = PTR_ERR(vma);
@@@ -331,15 -294,9 +331,9 @@@ static void vgetcpu_cpu_init(void *arg
         write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
   }
   
- static int
- vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
+ static int vgetcpu_online(unsigned int cpu)
   {
-       long cpu = (long)arg;
- 
-       if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
-               smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
- 
-       return NOTIFY_DONE;
+       return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
   }
   
   static int __init init_vdso(void)
@@@ -350,15 -307,9 +344,9 @@@
         init_vdso_image(&vdso_image_x32);
   #endif
   
-       cpu_notifier_register_begin();
- 
-       on_each_cpu(vgetcpu_cpu_init, NULL, 1);
         /* notifier priority > KVM */
-       __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
- 
-       cpu_notifier_register_done();
- 
-       return 0;
+       return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
+                                "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL);
   }
   subsys_initcall(init_vdso);
   #endif /* CONFIG_X86_64 */
diff --combined arch/x86/events/amd/ibs.c

index 72dea2f40fc4d43bb61b486f8f784c5a02aa6fe7,1a59a181582b2638b70d36a64ad383f3e302563c..155ea5324ae03ffd2f451fe1a92e464ae939486f
--- 1/arch/x86/events/amd/ibs.c
--- 2/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@@ -655,12 -655,8 +655,12 @@@ fail
         }
   
         if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- -              raw.size = sizeof(u32) + ibs_data.size;
- -              raw.data = ibs_data.data;
+ +              raw = (struct perf_raw_record){
+ +                      .frag = {
+ +                              .size = sizeof(u32) + ibs_data.size,
+ +                              .data = ibs_data.data,
+ +                      },
+ +              };
                 data.raw = &raw;
         }
   
@@@ -725,13 -721,10 +725,10 @@@ static __init int perf_ibs_pmu_init(str
         return ret;
   }
   
- static __init int perf_event_ibs_init(void)
+ static __init void perf_event_ibs_init(void)
   {
         struct attribute **attr = ibs_op_format_attrs;
   
-       if (!ibs_caps)
-               return -ENODEV; /* ibs not supported by the cpu */
- 
         perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
   
         if (ibs_caps & IBS_CAPS_OPCNT) {
@@@ -742,13 -735,11 +739,11 @@@
   
         register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
         pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
- 
-       return 0;
   }
   
   #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
   
- static __init int perf_event_ibs_init(void) { return 0; }
+ static __init void perf_event_ibs_init(void) { }
   
   #endif
   
@@@ -925,7 -916,7 +920,7 @@@ static inline int get_ibs_lvt_offset(vo
         return val & IBSCTL_LVT_OFFSET_MASK;
   }
   
- static void setup_APIC_ibs(void *dummy)
+ static void setup_APIC_ibs(void)
   {
         int offset;
   
@@@ -940,7 -931,7 +935,7 @@@ failed
                 smp_processor_id());
   }
   
- static void clear_APIC_ibs(void *dummy)
+ static void clear_APIC_ibs(void)
   {
         int offset;
   
@@@ -949,18 -940,24 +944,24 @@@
                 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
   }
   
+ static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
+ {
+       setup_APIC_ibs();
+       return 0;
+ }
+ 
   #ifdef CONFIG_PM
   
   static int perf_ibs_suspend(void)
   {
-       clear_APIC_ibs(NULL);
+       clear_APIC_ibs();
         return 0;
   }
   
   static void perf_ibs_resume(void)
   {
         ibs_eilvt_setup();
-       setup_APIC_ibs(NULL);
+       setup_APIC_ibs();
   }
   
   static struct syscore_ops perf_ibs_syscore_ops = {
@@@ -979,27 -976,15 +980,15 @@@ static inline void perf_ibs_pm_init(voi
   
   #endif
   
- static int
- perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+ static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
   {
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_STARTING:
-               setup_APIC_ibs(NULL);
-               break;
-       case CPU_DYING:
-               clear_APIC_ibs(NULL);
-               break;
-       default:
-               break;
-       }
- 
-       return NOTIFY_OK;
+       clear_APIC_ibs();
+       return 0;
   }
   
   static __init int amd_ibs_init(void)
   {
         u32 caps;
-       int ret = -EINVAL;
   
         caps = __get_ibs_caps();
         if (!caps)
@@@ -1008,22 -993,25 +997,25 @@@
         ibs_eilvt_setup();
   
         if (!ibs_eilvt_valid())
-               goto out;
+               return -EINVAL;
   
         perf_ibs_pm_init();
-       cpu_notifier_register_begin();
+ 
         ibs_caps = caps;
         /* make ibs_caps visible to other cpus: */
         smp_mb();
-       smp_call_function(setup_APIC_ibs, NULL, 1);
-       __perf_cpu_notifier(perf_ibs_cpu_notifier);
-       cpu_notifier_register_done();
+       /*
+        * x86_pmu_amd_ibs_starting_cpu will be called from core on
+        * all online cpus.
+        */
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
+                         "AP_PERF_X86_AMD_IBS_STARTING",
+                         x86_pmu_amd_ibs_starting_cpu,
+                         x86_pmu_amd_ibs_dying_cpu);
   
-       ret = perf_event_ibs_init();
- out:
-       if (ret)
-               pr_err("Failed to setup IBS, %d\n", ret);
-       return ret;
+       perf_event_ibs_init();
+ 
+       return 0;
   }
   
   /* Since we need the pci subsystem to init ibs we can't do this earlier: */
diff --combined arch/x86/events/core.c

index dfebbde2a4cc252a7bc8ab2cd5f84c679df9336c,4ce3745f26f5dd370666a703f36f1db37cb123f2..c17f0de5fd391c3ce000df6428802521384d179f
--- 1/arch/x86/events/core.c
--- 2/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@@ -263,7 -263,7 +263,7 @@@ static bool check_hw_exists(void
   
   msr_fail:
         pr_cont("Broken PMU hardware detected, using software events only.\n");
- -      pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ +      printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
                 boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
                 reg, val_new);
   
@@@ -1477,49 -1477,49 +1477,49 @@@ NOKPROBE_SYMBOL(perf_event_nmi_handler)
   struct event_constraint emptyconstraint;
   struct event_constraint unconstrained;
   
- static int
- x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+ static int x86_pmu_prepare_cpu(unsigned int cpu)
   {
-       unsigned int cpu = (long)hcpu;
         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       int i, ret = NOTIFY_OK;
- 
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
-                       cpuc->kfree_on_online[i] = NULL;
-               if (x86_pmu.cpu_prepare)
-                       ret = x86_pmu.cpu_prepare(cpu);
-               break;
- 
-       case CPU_STARTING:
-               if (x86_pmu.cpu_starting)
-                       x86_pmu.cpu_starting(cpu);
-               break;
+       int i;
   
-       case CPU_ONLINE:
-               for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
-                       kfree(cpuc->kfree_on_online[i]);
-                       cpuc->kfree_on_online[i] = NULL;
-               }
-               break;
+       for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+               cpuc->kfree_on_online[i] = NULL;
+       if (x86_pmu.cpu_prepare)
+               return x86_pmu.cpu_prepare(cpu);
+       return 0;
+ }
   
-       case CPU_DYING:
-               if (x86_pmu.cpu_dying)
-                       x86_pmu.cpu_dying(cpu);
-               break;
+ static int x86_pmu_dead_cpu(unsigned int cpu)
+ {
+       if (x86_pmu.cpu_dead)
+               x86_pmu.cpu_dead(cpu);
+       return 0;
+ }
   
-       case CPU_UP_CANCELED:
-       case CPU_DEAD:
-               if (x86_pmu.cpu_dead)
-                       x86_pmu.cpu_dead(cpu);
-               break;
+ static int x86_pmu_online_cpu(unsigned int cpu)
+ {
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+       int i;
   
-       default:
-               break;
+       for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+               kfree(cpuc->kfree_on_online[i]);
+               cpuc->kfree_on_online[i] = NULL;
         }
+       return 0;
+ }
   
-       return ret;
+ static int x86_pmu_starting_cpu(unsigned int cpu)
+ {
+       if (x86_pmu.cpu_starting)
+               x86_pmu.cpu_starting(cpu);
+       return 0;
+ }
+ 
+ static int x86_pmu_dying_cpu(unsigned int cpu)
+ {
+       if (x86_pmu.cpu_dying)
+               x86_pmu.cpu_dying(cpu);
+       return 0;
   }
   
   static void __init pmu_check_apic(void)
@@@ -1622,29 -1622,6 +1622,29 @@@ ssize_t events_sysfs_show(struct devic
   }
   EXPORT_SYMBOL_GPL(events_sysfs_show);
   
+ +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ +                        char *page)
+ +{
+ +      struct perf_pmu_events_ht_attr *pmu_attr =
+ +              container_of(attr, struct perf_pmu_events_ht_attr, attr);
+ +
+ +      /*
+ +       * Report conditional events depending on Hyper-Threading.
+ +       *
+ +       * This is overly conservative as usually the HT special
+ +       * handling is not needed if the other CPU thread is idle.
+ +       *
+ +       * Note this does not (and cannot) handle the case when thread
+ +       * siblings are invisible, for example with virtualization
+ +       * if they are owned by some other guest.  The user tool
+ +       * has to re-read when a thread sibling gets onlined later.
+ +       */
+ +      return sprintf(page, "%s",
+ +                      topology_max_smt_threads() > 1 ?
+ +                      pmu_attr->event_str_ht :
+ +                      pmu_attr->event_str_noht);
+ +}
+ +
   EVENT_ATTR(cpu-cycles,                        CPU_CYCLES              );
   EVENT_ATTR(instructions,              INSTRUCTIONS            );
   EVENT_ATTR(cache-references,          CACHE_REFERENCES        );
@@@ -1787,10 -1764,39 +1787,39 @@@ static int __init init_hw_perf_events(v
         pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
         pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
   
-       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
-       perf_cpu_notifier(x86_pmu_notifier);
+       /*
+        * Install callbacks. Core will call them for each online
+        * cpu.
+        */
+       err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE",
+                               x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
+       if (err)
+               return err;
+ 
+       err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
+                               "AP_PERF_X86_STARTING", x86_pmu_starting_cpu,
+                               x86_pmu_dying_cpu);
+       if (err)
+               goto out;
+ 
+       err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE",
+                               x86_pmu_online_cpu, NULL);
+       if (err)
+               goto out1;
+ 
+       err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+       if (err)
+               goto out2;
   
         return 0;
+ 
+ out2:
+       cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
+ out1:
+       cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
+ out:
+       cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
+       return err;
   }
   early_initcall(init_hw_perf_events);
   
diff --combined arch/x86/events/intel/core.c

index 0974ba11e9544a169a5071806f409aed9682291b,6a1441be485b5a21e5c8168283adc25c5d8254fe..2cbde2f449aa8ced63adf14b14f9ceb3d464068c
--- 1/arch/x86/events/intel/core.c
--- 2/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@@ -16,7 -16,6 +16,7 @@@
   
   #include <asm/cpufeature.h>
   #include <asm/hardirq.h>
+ +#include <asm/intel-family.h>
   #include <asm/apic.h>
   
   #include "../perf_event.h"
@@@ -186,7 -185,7 +186,7 @@@ static struct event_constraint intel_sl
         EVENT_CONSTRAINT_END
   };
   
- -struct event_constraint intel_skl_event_constraints[] = {
+ +static struct event_constraint intel_skl_event_constraints[] = {
         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
@@@ -205,8 -204,10 +205,8 @@@
   };
   
   static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- -      INTEL_UEVENT_EXTRA_REG(0x01b7,
- -                             MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- -      INTEL_UEVENT_EXTRA_REG(0x02b7,
- -                             MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ +      INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+ +      INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
         EVENT_EXTRA_END
   };
   
@@@ -242,51 -243,14 +242,51 @@@ EVENT_ATTR_STR(mem-loads,       mem_ld_nhm,     "
   EVENT_ATTR_STR(mem-loads,     mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
   EVENT_ATTR_STR(mem-stores,    mem_st_snb,     "event=0xcd,umask=0x2");
   
- -struct attribute *nhm_events_attrs[] = {
+ +static struct attribute *nhm_events_attrs[] = {
         EVENT_PTR(mem_ld_nhm),
         NULL,
   };
   
- -struct attribute *snb_events_attrs[] = {
+ +/*
+ + * topdown events for Intel Core CPUs.
+ + *
+ + * The events are all in slots, which is a free slot in a 4 wide
+ + * pipeline. Some events are already reported in slots, for cycle
+ + * events we multiply by the pipeline width (4).
+ + *
+ + * With Hyper Threading on, topdown metrics are either summed or averaged
+ + * between the threads of a core: (count_t0 + count_t1).
+ + *
+ + * For the average case the metric is always scaled to pipeline width,
+ + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ + */
+ +
+ +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+ +      "event=0x3c,umask=0x0",                 /* cpu_clk_unhalted.thread */
+ +      "event=0x3c,umask=0x0,any=1");          /* cpu_clk_unhalted.thread_any */
+ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+ +      "event=0xe,umask=0x1");                 /* uops_issued.any */
+ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+ +      "event=0xc2,umask=0x2");                /* uops_retired.retire_slots */
+ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+ +      "event=0x9c,umask=0x1");                /* idq_uops_not_delivered_core */
+ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+ +      "event=0xd,umask=0x3,cmask=1",          /* int_misc.recovery_cycles */
+ +      "event=0xd,umask=0x3,cmask=1,any=1");   /* int_misc.recovery_cycles_any */
+ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+ +      "4", "2");
+ +
+ +static struct attribute *snb_events_attrs[] = {
         EVENT_PTR(mem_ld_snb),
         EVENT_PTR(mem_st_snb),
+ +      EVENT_PTR(td_slots_issued),
+ +      EVENT_PTR(td_slots_retired),
+ +      EVENT_PTR(td_fetch_bubbles),
+ +      EVENT_PTR(td_total_slots),
+ +      EVENT_PTR(td_total_slots_scale),
+ +      EVENT_PTR(td_recovery_bubbles),
+ +      EVENT_PTR(td_recovery_bubbles_scale),
         NULL,
   };
   
@@@ -316,7 -280,7 +316,7 @@@ static struct event_constraint intel_hs
         EVENT_CONSTRAINT_END
   };
   
- -struct event_constraint intel_bdw_event_constraints[] = {
+ +static struct event_constraint intel_bdw_event_constraints[] = {
         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
@@@ -1397,29 -1361,6 +1397,29 @@@ static __initconst const u64 atom_hw_ca
    },
   };
   
+ +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+ +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+ +/* no_alloc_cycles.not_delivered */
+ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+ +             "event=0xca,umask=0x50");
+ +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+ +/* uops_retired.all */
+ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+ +             "event=0xc2,umask=0x10");
+ +/* uops_retired.all */
+ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+ +             "event=0xc2,umask=0x10");
+ +
+ +static struct attribute *slm_events_attrs[] = {
+ +      EVENT_PTR(td_total_slots_slm),
+ +      EVENT_PTR(td_total_slots_scale_slm),
+ +      EVENT_PTR(td_fetch_bubbles_slm),
+ +      EVENT_PTR(td_fetch_bubbles_scale_slm),
+ +      EVENT_PTR(td_slots_issued_slm),
+ +      EVENT_PTR(td_slots_retired_slm),
+ +      NULL
+ +};
+ +
   static struct extra_reg intel_slm_extra_regs[] __read_mostly =
   {
         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@@ -3109,7 -3050,7 +3109,7 @@@ static int intel_pmu_cpu_prepare(int cp
                 cpuc->excl_thread_id = 0;
         }
   
-       return NOTIFY_OK;
+       return 0;
   
   err_constraint_list:
         kfree(cpuc->constraint_list);
@@@ -3120,7 -3061,7 +3120,7 @@@ err_shared_regs
         cpuc->shared_regs = NULL;
   
   err:
-       return NOTIFY_BAD;
+       return -ENOMEM;
   }
   
   static void intel_pmu_cpu_starting(int cpu)
@@@ -3349,11 -3290,11 +3349,11 @@@ static int intel_snb_pebs_broken(int cp
         u32 rev = UINT_MAX; /* default to broken for unknown models */
   
         switch (cpu_data(cpu).x86_model) {
- -      case 42: /* SNB */
+ +      case INTEL_FAM6_SANDYBRIDGE:
                 rev = 0x28;
                 break;
   
- -      case 45: /* SNB-EP */
+ +      case INTEL_FAM6_SANDYBRIDGE_X:
                 switch (cpu_data(cpu).x86_mask) {
                 case 6: rev = 0x618; break;
                 case 7: rev = 0x70c; break;
@@@ -3390,13 -3331,6 +3390,13 @@@ static void intel_snb_check_microcode(v
         }
   }
   
+ +static bool is_lbr_from(unsigned long msr)
+ +{
+ +      unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+ +
+ +      return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+ +}
+ +
   /*
    * Under certain circumstances, access certain MSR may cause #GP.
    * The function tests if the input MSR can be safely accessed.
@@@ -3417,24 -3351,13 +3417,24 @@@ static bool check_msr(unsigned long msr
          * Only change the bits which can be updated by wrmsrl.
          */
         val_tmp = val_old ^ mask;
+ +
+ +      if (is_lbr_from(msr))
+ +              val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+ +
         if (wrmsrl_safe(msr, val_tmp) ||
             rdmsrl_safe(msr, &val_new))
                 return false;
   
+ +      /*
+ +       * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
+ +       * should equal rdmsrl()'s even with the quirk.
+ +       */
         if (val_new != val_tmp)
                 return false;
   
+ +      if (is_lbr_from(msr))
+ +              val_old = lbr_from_signext_quirk_wr(val_old);
+ +
         /* Here it's sure that the MSR can be safely accessed.
          * Restore the old value and return.
          */
@@@ -3543,13 -3466,6 +3543,13 @@@ static struct attribute *hsw_events_att
         EVENT_PTR(cycles_ct),
         EVENT_PTR(mem_ld_hsw),
         EVENT_PTR(mem_st_hsw),
+ +      EVENT_PTR(td_slots_issued),
+ +      EVENT_PTR(td_slots_retired),
+ +      EVENT_PTR(td_fetch_bubbles),
+ +      EVENT_PTR(td_total_slots),
+ +      EVENT_PTR(td_total_slots_scale),
+ +      EVENT_PTR(td_recovery_bubbles),
+ +      EVENT_PTR(td_recovery_bubbles_scale),
         NULL
   };
   
@@@ -3621,15 -3537,15 +3621,15 @@@ __init int intel_pmu_init(void
          * Install the hw-cache-events table:
          */
         switch (boot_cpu_data.x86_model) {
- -      case 14: /* 65nm Core "Yonah" */
+ +      case INTEL_FAM6_CORE_YONAH:
                 pr_cont("Core events, ");
                 break;
   
- -      case 15: /* 65nm Core2 "Merom"          */
+ +      case INTEL_FAM6_CORE2_MEROM:
                 x86_add_quirk(intel_clovertown_quirk);
- -      case 22: /* 65nm Core2 "Merom-L"        */
- -      case 23: /* 45nm Core2 "Penryn"         */
- -      case 29: /* 45nm Core2 "Dunnington (MP) */
+ +      case INTEL_FAM6_CORE2_MEROM_L:
+ +      case INTEL_FAM6_CORE2_PENRYN:
+ +      case INTEL_FAM6_CORE2_DUNNINGTON:
                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
@@@ -3640,9 -3556,9 +3640,9 @@@
                 pr_cont("Core2 events, ");
                 break;
   
- -      case 30: /* 45nm Nehalem    */
- -      case 26: /* 45nm Nehalem-EP */
- -      case 46: /* 45nm Nehalem-EX */
+ +      case INTEL_FAM6_NEHALEM:
+ +      case INTEL_FAM6_NEHALEM_EP:
+ +      case INTEL_FAM6_NEHALEM_EX:
                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@@ -3670,11 -3586,11 +3670,11 @@@
                 pr_cont("Nehalem events, ");
                 break;
   
- -      case 28: /* 45nm Atom "Pineview"   */
- -      case 38: /* 45nm Atom "Lincroft"   */
- -      case 39: /* 32nm Atom "Penwell"    */
- -      case 53: /* 32nm Atom "Cloverview" */
- -      case 54: /* 32nm Atom "Cedarview"  */
+ +      case INTEL_FAM6_ATOM_PINEVIEW:
+ +      case INTEL_FAM6_ATOM_LINCROFT:
+ +      case INTEL_FAM6_ATOM_PENWELL:
+ +      case INTEL_FAM6_ATOM_CLOVERVIEW:
+ +      case INTEL_FAM6_ATOM_CEDARVIEW:
                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
@@@ -3686,9 -3602,9 +3686,9 @@@
                 pr_cont("Atom events, ");
                 break;
   
- -      case 55: /* 22nm Atom "Silvermont"                */
- -      case 76: /* 14nm Atom "Airmont"                   */
- -      case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ +      case INTEL_FAM6_ATOM_SILVERMONT1:
+ +      case INTEL_FAM6_ATOM_SILVERMONT2:
+ +      case INTEL_FAM6_ATOM_AIRMONT:
                 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
                         sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@@ -3700,12 -3616,11 +3700,12 @@@
                 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
                 x86_pmu.extra_regs = intel_slm_extra_regs;
                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ +              x86_pmu.cpu_events = slm_events_attrs;
                 pr_cont("Silvermont events, ");
                 break;
   
- -      case 92: /* 14nm Atom "Goldmont" */
- -      case 95: /* 14nm Atom "Goldmont Denverton" */
+ +      case INTEL_FAM6_ATOM_GOLDMONT:
+ +      case INTEL_FAM6_ATOM_DENVERTON:
                 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@@ -3728,9 -3643,9 +3728,9 @@@
                 pr_cont("Goldmont events, ");
                 break;
   
- -      case 37: /* 32nm Westmere    */
- -      case 44: /* 32nm Westmere-EP */
- -      case 47: /* 32nm Westmere-EX */
+ +      case INTEL_FAM6_WESTMERE:
+ +      case INTEL_FAM6_WESTMERE_EP:
+ +      case INTEL_FAM6_WESTMERE_EX:
                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@@ -3757,8 -3672,8 +3757,8 @@@
                 pr_cont("Westmere events, ");
                 break;
   
- -      case 42: /* 32nm SandyBridge         */
- -      case 45: /* 32nm SandyBridge-E/EN/EP */
+ +      case INTEL_FAM6_SANDYBRIDGE:
+ +      case INTEL_FAM6_SANDYBRIDGE_X:
                 x86_add_quirk(intel_sandybridge_quirk);
                 x86_add_quirk(intel_ht_bug);
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@@ -3771,7 -3686,7 +3771,7 @@@
                 x86_pmu.event_constraints = intel_snb_event_constraints;
                 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
                 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- -              if (boot_cpu_data.x86_model == 45)
+ +              if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
                         x86_pmu.extra_regs = intel_snbep_extra_regs;
                 else
                         x86_pmu.extra_regs = intel_snb_extra_regs;
@@@ -3793,8 -3708,8 +3793,8 @@@
                 pr_cont("SandyBridge events, ");
                 break;
   
- -      case 58: /* 22nm IvyBridge       */
- -      case 62: /* 22nm IvyBridge-EP/EX */
+ +      case INTEL_FAM6_IVYBRIDGE:
+ +      case INTEL_FAM6_IVYBRIDGE_X:
                 x86_add_quirk(intel_ht_bug);
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
@@@ -3810,7 -3725,7 +3810,7 @@@
                 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
                 x86_pmu.pebs_prec_dist = true;
- -              if (boot_cpu_data.x86_model == 62)
+ +              if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
                         x86_pmu.extra_regs = intel_snbep_extra_regs;
                 else
                         x86_pmu.extra_regs = intel_snb_extra_regs;
@@@ -3828,10 -3743,10 +3828,10 @@@
                 break;
   
   
- -      case 60: /* 22nm Haswell Core */
- -      case 63: /* 22nm Haswell Server */
- -      case 69: /* 22nm Haswell ULT */
- -      case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ +      case INTEL_FAM6_HASWELL_CORE:
+ +      case INTEL_FAM6_HASWELL_X:
+ +      case INTEL_FAM6_HASWELL_ULT:
+ +      case INTEL_FAM6_HASWELL_GT3E:
                 x86_add_quirk(intel_ht_bug);
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@@ -3855,10 -3770,10 +3855,10 @@@
                 pr_cont("Haswell events, ");
                 break;
   
- -      case 61: /* 14nm Broadwell Core-M */
- -      case 86: /* 14nm Broadwell Xeon D */
- -      case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- -      case 79: /* 14nm Broadwell Server */
+ +      case INTEL_FAM6_BROADWELL_CORE:
+ +      case INTEL_FAM6_BROADWELL_XEON_D:
+ +      case INTEL_FAM6_BROADWELL_GT3E:
+ +      case INTEL_FAM6_BROADWELL_X:
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@@ -3891,7 -3806,7 +3891,7 @@@
                 pr_cont("Broadwell events, ");
                 break;
   
- -      case 87: /* Knights Landing Xeon Phi */
+ +      case INTEL_FAM6_XEON_PHI_KNL:
                 memcpy(hw_cache_event_ids,
                        slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs,
@@@ -3909,22 -3824,16 +3909,22 @@@
                 pr_cont("Knights Landing events, ");
                 break;
   
- -      case 142: /* 14nm Kabylake Mobile */
- -      case 158: /* 14nm Kabylake Desktop */
- -      case 78: /* 14nm Skylake Mobile */
- -      case 94: /* 14nm Skylake Desktop */
- -      case 85: /* 14nm Skylake Server */
+ +      case INTEL_FAM6_SKYLAKE_MOBILE:
+ +      case INTEL_FAM6_SKYLAKE_DESKTOP:
+ +      case INTEL_FAM6_SKYLAKE_X:
+ +      case INTEL_FAM6_KABYLAKE_MOBILE:
+ +      case INTEL_FAM6_KABYLAKE_DESKTOP:
                 x86_pmu.late_ack = true;
                 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
                 intel_pmu_lbr_init_skl();
   
+ +              /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+ +              event_attr_td_recovery_bubbles.event_str_noht =
+ +                      "event=0xd,umask=0x1,cmask=1";
+ +              event_attr_td_recovery_bubbles.event_str_ht =
+ +                      "event=0xd,umask=0x1,cmask=1,any=1";
+ +
                 x86_pmu.event_constraints = intel_skl_event_constraints;
                 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
                 x86_pmu.extra_regs = intel_skl_extra_regs;
@@@ -4005,8 -3914,6 +4005,8 @@@
                         x86_pmu.lbr_nr = 0;
         }
   
+ +      if (x86_pmu.lbr_nr)
+ +              pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
         /*
          * Access extra MSR may cause #GP under certain circumstances.
          * E.g. KVM doesn't support offcore event
@@@ -4039,14 -3946,16 +4039,14 @@@
    */
   static __init int fixup_ht_bug(void)
   {
- -      int cpu = smp_processor_id();
- -      int w, c;
+ +      int c;
         /*
          * problem not present on this CPU model, nothing to do
          */
         if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
                 return 0;
   
- -      w = cpumask_weight(topology_sibling_cpumask(cpu));
- -      if (w > 1) {
+ +      if (topology_max_smt_threads() > 1) {
                 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
                 return 0;
         }
diff --combined arch/x86/events/intel/cstate.c

index 4c7638b91fa56ea2dde92d50a818a3d8d3b7d1da,d6d7be0b34951cfcf7ae915ae1dcb6545e396a81..3ca87b5a8677608c86ac8d748b59ead0d160f580
--- 1/arch/x86/events/intel/cstate.c
--- 2/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@@ -89,7 -89,6 +89,7 @@@
   #include <linux/slab.h>
   #include <linux/perf_event.h>
   #include <asm/cpu_device_id.h>
+ +#include <asm/intel-family.h>
   #include "../perf_event.h"
   
   MODULE_LICENSE("GPL");
@@@ -366,7 -365,7 +366,7 @@@ static int cstate_pmu_event_add(struct 
    * Check if exiting cpu is the designated reader. If so migrate the
    * events when there is a valid target available
    */
- static void cstate_cpu_exit(int cpu)
+ static int cstate_cpu_exit(unsigned int cpu)
   {
         unsigned int target;
   
@@@ -391,9 -390,10 +391,10 @@@
                         perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
                 }
         }
+       return 0;
   }
   
- static void cstate_cpu_init(int cpu)
+ static int cstate_cpu_init(unsigned int cpu)
   {
         unsigned int target;
   
@@@ -415,31 -415,10 +416,10 @@@
                                  topology_core_cpumask(cpu));
         if (has_cstate_pkg && target >= nr_cpu_ids)
                 cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
- }
   
- static int cstate_cpu_notifier(struct notifier_block *self,
-                              unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
- 
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_STARTING:
-               cstate_cpu_init(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               cstate_cpu_exit(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
+       return 0;
   }
   
- static struct notifier_block cstate_cpu_nb = {
-       .notifier_call  = cstate_cpu_notifier,
-       .priority       = CPU_PRI_PERF + 1,
- };
- 
   static struct pmu cstate_core_pmu = {
         .attr_groups    = core_attr_groups,
         .name           = "cstate_core",
@@@ -512,37 -491,37 +492,37 @@@ static const struct cstate_model slm_cs
         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
   
   static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- -      X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
- -      X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
- -      X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM,    nhm_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
   
- -      X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
- -      X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
- -      X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE,    nhm_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
   
- -      X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
- -      X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE,   snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
   
- -      X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
- -      X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
   
- -      X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
- -      X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
- -      X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,    snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
   
- -      X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
   
- -      X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
- -      X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
- -      X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
   
- -      X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
- -      X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
- -      X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
- -      X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
   
- -      X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
- -      X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
+ +      X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
+ +      X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
         { },
   };
   MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
@@@ -600,18 -579,20 +580,20 @@@ static inline void cstate_cleanup(void
   
   static int __init cstate_init(void)
   {
-       int cpu, err;
+       int err;
   
-       cpu_notifier_register_begin();
-       for_each_online_cpu(cpu)
-               cstate_cpu_init(cpu);
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
+                         "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init,
+                         NULL);
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
+                         "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit);
   
         if (has_cstate_core) {
                 err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
                 if (err) {
                         has_cstate_core = false;
                         pr_info("Failed to register cstate core pmu\n");
-                       goto out;
+                       return err;
                 }
         }
   
@@@ -621,12 -602,10 +603,10 @@@
                         has_cstate_pkg = false;
                         pr_info("Failed to register cstate pkg pmu\n");
                         cstate_cleanup();
-                       goto out;
+                       return err;
                 }
         }
-       __register_cpu_notifier(&cstate_cpu_nb);
- out:
-       cpu_notifier_register_done();
+ 
         return err;
   }
   
@@@ -652,9 -631,8 +632,8 @@@ module_init(cstate_pmu_init)
   
   static void __exit cstate_pmu_exit(void)
   {
-       cpu_notifier_register_begin();
-       __unregister_cpu_notifier(&cstate_cpu_nb);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
         cstate_cleanup();
-       cpu_notifier_register_done();
   }
   module_exit(cstate_pmu_exit);
diff --combined arch/x86/events/intel/rapl.c

index d0c58b35155f1721b59f6b935a65df9dfe814a65,6255ede56174e53ce4889426c2a879598e3a74d8..28865938aadf267e42829c3393cc405b0bf0672e
--- 1/arch/x86/events/intel/rapl.c
--- 2/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@@ -55,7 -55,6 +55,7 @@@
   #include <linux/slab.h>
   #include <linux/perf_event.h>
   #include <asm/cpu_device_id.h>
+ +#include <asm/intel-family.h>
   #include "../perf_event.h"
   
   MODULE_LICENSE("GPL");
@@@ -556,14 -555,14 +556,14 @@@ const struct attribute_group *rapl_attr
         NULL,
   };
   
- static void rapl_cpu_exit(int cpu)
+ static int rapl_cpu_offline(unsigned int cpu)
   {
         struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
         int target;
   
         /* Check if exiting cpu is used for collecting rapl events */
         if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
-               return;
+               return 0;
   
         pmu->cpu = -1;
         /* Find a new cpu to collect rapl events */
@@@ -575,9 -574,10 +575,10 @@@
                 pmu->cpu = target;
                 perf_pmu_migrate_context(pmu->pmu, cpu, target);
         }
+       return 0;
   }
   
- static void rapl_cpu_init(int cpu)
+ static int rapl_cpu_online(unsigned int cpu)
   {
         struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
         int target;
@@@ -588,13 -588,14 +589,14 @@@
          */
         target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
         if (target < nr_cpu_ids)
-               return;
+               return 0;
   
         cpumask_set_cpu(cpu, &rapl_cpu_mask);
         pmu->cpu = cpu;
+       return 0;
   }
   
- static int rapl_cpu_prepare(int cpu)
+ static int rapl_cpu_prepare(unsigned int cpu)
   {
         struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
   
@@@ -615,33 -616,6 +617,6 @@@
         return 0;
   }
   
- static int rapl_cpu_notifier(struct notifier_block *self,
-                            unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
- 
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               rapl_cpu_prepare(cpu);
-               break;
- 
-       case CPU_DOWN_FAILED:
-       case CPU_ONLINE:
-               rapl_cpu_init(cpu);
-               break;
- 
-       case CPU_DOWN_PREPARE:
-               rapl_cpu_exit(cpu);
-               break;
-       }
-       return NOTIFY_OK;
- }
- 
- static struct notifier_block rapl_cpu_nb = {
-       .notifier_call  = rapl_cpu_notifier,
-       .priority       = CPU_PRI_PERF + 1,
- };
- 
   static int rapl_check_hw_unit(bool apply_quirk)
   {
         u64 msr_rapl_power_unit_bits;
@@@ -692,24 -666,6 +667,6 @@@ static void __init rapl_advertise(void
         }
   }
   
- static int __init rapl_prepare_cpus(void)
- {
-       unsigned int cpu, pkg;
-       int ret;
- 
-       for_each_online_cpu(cpu) {
-               pkg = topology_logical_package_id(cpu);
-               if (rapl_pmus->pmus[pkg])
-                       continue;
- 
-               ret = rapl_cpu_prepare(cpu);
-               if (ret)
-                       return ret;
-               rapl_cpu_init(cpu);
-       }
-       return 0;
- }
- 
   static void cleanup_rapl_pmus(void)
   {
         int i;
@@@ -787,27 -743,26 +744,27 @@@ static const struct intel_rapl_init_fu
   };
   
   static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- -      X86_RAPL_MODEL_MATCH(42, snb_rapl_init),        /* Sandy Bridge */
- -      X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),      /* Sandy Bridge-EP */
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
   
- -      X86_RAPL_MODEL_MATCH(58, snb_rapl_init),        /* Ivy Bridge */
- -      X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),      /* IvyTown */
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,   snb_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
   
- -      X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),        /* Haswell */
- -      X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),        /* Haswell-Server */
- -      X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),        /* Haswell-Celeron */
- -      X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),        /* Haswell GT3e */
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsw_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
   
- -      X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),        /* Broadwell */
- -      X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),        /* Broadwell-H */
- -      X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),        /* Broadwell-Server */
- -      X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),        /* Broadwell Xeon D */
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,      hsw_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
   
- -      X86_RAPL_MODEL_MATCH(87, knl_rapl_init),        /* Knights Landing */
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
   
- -      X86_RAPL_MODEL_MATCH(78, skl_rapl_init),        /* Skylake */
- -      X86_RAPL_MODEL_MATCH(94, skl_rapl_init),        /* Skylake H/S */
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+ +      X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,       hsx_rapl_init),
         {},
   };
   
@@@ -837,35 -792,44 +794,44 @@@ static int __init rapl_pmu_init(void
         if (ret)
                 return ret;
   
-       cpu_notifier_register_begin();
+       /*
+        * Install callbacks. Core will call them for each online cpu.
+        */
   
-       ret = rapl_prepare_cpus();
+       ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP",
+                               rapl_cpu_prepare, NULL);
         if (ret)
                 goto out;
   
+       ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
+                               "AP_PERF_X86_RAPL_ONLINE",
+                               rapl_cpu_online, rapl_cpu_offline);
+       if (ret)
+               goto out1;
+ 
         ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
         if (ret)
-               goto out;
+               goto out2;
   
-       __register_cpu_notifier(&rapl_cpu_nb);
-       cpu_notifier_register_done();
         rapl_advertise();
         return 0;
   
+ out2:
+       cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ out1:
+       cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
   out:
         pr_warn("Initialization failed (%d), disabled\n", ret);
         cleanup_rapl_pmus();
-       cpu_notifier_register_done();
         return ret;
   }
   module_init(rapl_pmu_init);
   
   static void __exit intel_rapl_exit(void)
   {
-       cpu_notifier_register_begin();
-       __unregister_cpu_notifier(&rapl_cpu_nb);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+       cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
         perf_pmu_unregister(&rapl_pmus->pmu);
         cleanup_rapl_pmus();
-       cpu_notifier_register_done();
   }
   module_exit(intel_rapl_exit);
diff --combined arch/x86/events/intel/uncore.c

index 59b4974c697fbc169e1c74929949258a4e9f7503,8e280a7cd0cc0c8386db524dc0d94c0fb236cf46..3f3d0d67749b634226286975d66cf45f9ebd63cb
--- 1/arch/x86/events/intel/uncore.c
--- 2/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@@ -1,5 -1,4 +1,5 @@@
   #include <asm/cpu_device_id.h>
+ +#include <asm/intel-family.h>
   #include "uncore.h"
   
   static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@@ -883,7 -882,7 +883,7 @@@ uncore_types_init(struct intel_uncore_t
   static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
   {
         struct intel_uncore_type *type;
- -      struct intel_uncore_pmu *pmu;
+ +      struct intel_uncore_pmu *pmu = NULL;
         struct intel_uncore_box *box;
         int phys_id, pkg, ret;
   
@@@ -904,37 -903,20 +904,37 @@@
         }
   
         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+ +
         /*
- -       * for performance monitoring unit with multiple boxes,
- -       * each box has a different function id.
- -       */
- -      pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- -      /* Knights Landing uses a common PCI device ID for multiple instances of
- -       * an uncore PMU device type. There is only one entry per device type in
- -       * the knl_uncore_pci_ids table inspite of multiple devices present for
- -       * some device types. Hence PCI device idx would be 0 for all devices.
- -       * So increment pmu pointer to point to an unused array element.
+ +       * Some platforms, e.g.  Knights Landing, use a common PCI device ID
+ +       * for multiple instances of an uncore PMU device type. We should check
+ +       * PCI slot and func to indicate the uncore box.
          */
- -      if (boot_cpu_data.x86_model == 87) {
- -              while (pmu->func_id >= 0)
- -                      pmu++;
+ +      if (id->driver_data & ~0xffff) {
+ +              struct pci_driver *pci_drv = pdev->driver;
+ +              const struct pci_device_id *ids = pci_drv->id_table;
+ +              unsigned int devfn;
+ +
+ +              while (ids && ids->vendor) {
+ +                      if ((ids->vendor == pdev->vendor) &&
+ +                          (ids->device == pdev->device)) {
+ +                              devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ +                                                UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ +                              if (devfn == pdev->devfn) {
+ +                                      pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ +                                      break;
+ +                              }
+ +                      }
+ +                      ids++;
+ +              }
+ +              if (pmu == NULL)
+ +                      return -ENODEV;
+ +      } else {
+ +              /*
+ +               * for performance monitoring unit with multiple boxes,
+ +               * each box has a different function id.
+ +               */
+ +              pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
         }
   
         if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
@@@ -974,7 -956,7 +974,7 @@@
   
   static void uncore_pci_remove(struct pci_dev *pdev)
   {
- -      struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ +      struct intel_uncore_box *box;
         struct intel_uncore_pmu *pmu;
         int i, phys_id, pkg;
   
@@@ -1052,7 -1034,7 +1052,7 @@@ static void uncore_pci_exit(void
         }
   }
   
- static void uncore_cpu_dying(int cpu)
+ static int uncore_cpu_dying(unsigned int cpu)
   {
         struct intel_uncore_type *type, **types = uncore_msr_uncores;
         struct intel_uncore_pmu *pmu;
@@@ -1069,16 -1051,19 +1069,19 @@@
                                 uncore_box_exit(box);
                 }
         }
+       return 0;
   }
   
- static void uncore_cpu_starting(int cpu, bool init)
+ static int first_init;
+ 
+ static int uncore_cpu_starting(unsigned int cpu)
   {
         struct intel_uncore_type *type, **types = uncore_msr_uncores;
         struct intel_uncore_pmu *pmu;
         struct intel_uncore_box *box;
         int i, pkg, ncpus = 1;
   
-       if (init) {
+       if (first_init) {
                 /*
                  * On init we get the number of online cpus in the package
                  * and set refcount for all of them.
@@@ -1099,9 -1084,11 +1102,11 @@@
                                 uncore_box_init(box);
                 }
         }
+ 
+       return 0;
   }
   
- static int uncore_cpu_prepare(int cpu)
+ static int uncore_cpu_prepare(unsigned int cpu)
   {
         struct intel_uncore_type *type, **types = uncore_msr_uncores;
         struct intel_uncore_pmu *pmu;
@@@ -1164,13 -1151,13 +1169,13 @@@ static void uncore_change_context(struc
                 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
   }
   
- static void uncore_event_exit_cpu(int cpu)
+ static int uncore_event_cpu_offline(unsigned int cpu)
   {
         int target;
   
         /* Check if exiting cpu is used for collecting uncore events */
         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
-               return;
+               return 0;
   
         /* Find a new cpu to collect uncore events */
         target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
@@@ -1183,9 -1170,10 +1188,10 @@@
   
         uncore_change_context(uncore_msr_uncores, cpu, target);
         uncore_change_context(uncore_pci_uncores, cpu, target);
+       return 0;
   }
   
- static void uncore_event_init_cpu(int cpu)
+ static int uncore_event_cpu_online(unsigned int cpu)
   {
         int target;
   
@@@ -1195,50 -1183,15 +1201,15 @@@
          */
         target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
         if (target < nr_cpu_ids)
-               return;
+               return 0;
   
         cpumask_set_cpu(cpu, &uncore_cpu_mask);
   
         uncore_change_context(uncore_msr_uncores, -1, cpu);
         uncore_change_context(uncore_pci_uncores, -1, cpu);
+       return 0;
   }
   
- static int uncore_cpu_notifier(struct notifier_block *self,
-                              unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
- 
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               return notifier_from_errno(uncore_cpu_prepare(cpu));
- 
-       case CPU_STARTING:
-               uncore_cpu_starting(cpu, false);
-       case CPU_DOWN_FAILED:
-               uncore_event_init_cpu(cpu);
-               break;
- 
-       case CPU_UP_CANCELED:
-       case CPU_DYING:
-               uncore_cpu_dying(cpu);
-               break;
- 
-       case CPU_DOWN_PREPARE:
-               uncore_event_exit_cpu(cpu);
-               break;
-       }
-       return NOTIFY_OK;
- }
- 
- static struct notifier_block uncore_cpu_nb = {
-       .notifier_call  = uncore_cpu_notifier,
-       /*
-        * to migrate uncore events, our notifier should be executed
-        * before perf core's notifier.
-        */
-       .priority       = CPU_PRI_PERF + 1,
- };
- 
   static int __init type_pmu_register(struct intel_uncore_type *type)
   {
         int i, ret;
@@@ -1282,41 -1235,6 +1253,6 @@@ err
         return ret;
   }
   
- static void __init uncore_cpu_setup(void *dummy)
- {
-       uncore_cpu_starting(smp_processor_id(), true);
- }
- 
- /* Lazy to avoid allocation of a few bytes for the normal case */
- static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
- 
- static int __init uncore_cpumask_init(bool msr)
- {
-       unsigned int cpu;
- 
-       for_each_online_cpu(cpu) {
-               unsigned int pkg = topology_logical_package_id(cpu);
-               int ret;
- 
-               if (test_and_set_bit(pkg, packages))
-                       continue;
-               /*
-                * The first online cpu of each package allocates and takes
-                * the refcounts for all other online cpus in that package.
-                * If msrs are not enabled no allocation is required.
-                */
-               if (msr) {
-                       ret = uncore_cpu_prepare(cpu);
-                       if (ret)
-                               return ret;
-               }
-               uncore_event_init_cpu(cpu);
-               smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
-       }
-       __register_cpu_notifier(&uncore_cpu_nb);
-       return 0;
- }
- 
   #define X86_UNCORE_MODEL_MATCH(model, init)   \
         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
   
@@@ -1379,32 -1297,30 +1315,32 @@@ static const struct intel_uncore_init_f
   };
   
   static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ +      .cpu_init = skl_uncore_cpu_init,
         .pci_init = skl_uncore_pci_init,
   };
   
   static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- -      X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),    /* Nehalem */
- -      X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
- -      X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),    /* Westmere */
- -      X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
- -      X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),    /* Sandy Bridge */
- -      X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),    /* Ivy Bridge */
- -      X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),    /* Haswell */
- -      X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),    /* Haswell Celeron */
- -      X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),    /* Haswell */
- -      X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),    /* Broadwell */
- -      X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),    /* Broadwell */
- -      X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),  /* Sandy Bridge-EP */
- -      X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),  /* Nehalem-EX */
- -      X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),  /* Westmere-EX aka. Xeon E7 */
- -      X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),  /* Ivy Bridge-EP */
- -      X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),  /* Haswell-EP */
- -      X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),    /* BDX-EP */
- -      X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),    /* BDX-DE */
- -      X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),    /* Knights Landing */
- -      X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),    /* SkyLake */
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,     nhm_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,        nhm_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE,       nhm_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP,    nhm_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,    snb_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,      ivb_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,   hsw_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,    hsw_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,   hsw_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,  snbep_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX,     nhmex_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX,    nhmex_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,    ivbep_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X,      hswep_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,    bdx_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,   knl_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
+ +      X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
         {},
   };
   
@@@ -1440,11 -1356,33 +1376,33 @@@ static int __init intel_uncore_init(voi
         if (cret && pret)
                 return -ENODEV;
   
-       cpu_notifier_register_begin();
-       ret = uncore_cpumask_init(!cret);
-       if (ret)
-               goto err;
-       cpu_notifier_register_done();
+       /*
+        * Install callbacks. Core will call them for each online cpu.
+        *
+        * The first online cpu of each package allocates and takes
+        * the refcounts for all other online cpus in that package.
+        * If msrs are not enabled no allocation is required and
+        * uncore_cpu_prepare() is not called for each online cpu.
+        */
+       if (!cret) {
+              ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
+                                       "PERF_X86_UNCORE_PREP",
+                                       uncore_cpu_prepare, NULL);
+               if (ret)
+                       goto err;
+       } else {
+               cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
+                                         "PERF_X86_UNCORE_PREP",
+                                         uncore_cpu_prepare, NULL);
+       }
+       first_init = 1;
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
+                         "AP_PERF_X86_UNCORE_STARTING",
+                         uncore_cpu_starting, uncore_cpu_dying);
+       first_init = 0;
+       cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+                         "AP_PERF_X86_UNCORE_ONLINE",
+                         uncore_event_cpu_online, uncore_event_cpu_offline);
         return 0;
   
   err:
@@@ -1452,17 -1390,16 +1410,16 @@@
         on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
         uncore_types_exit(uncore_msr_uncores);
         uncore_pci_exit();
-       cpu_notifier_register_done();
         return ret;
   }
   module_init(intel_uncore_init);
   
   static void __exit intel_uncore_exit(void)
   {
-       cpu_notifier_register_begin();
-       __unregister_cpu_notifier(&uncore_cpu_nb);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
+       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
+       cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
         uncore_types_exit(uncore_msr_uncores);
         uncore_pci_exit();
-       cpu_notifier_register_done();
   }
   module_exit(intel_uncore_exit);
diff --combined arch/x86/kernel/apic/x2apic_cluster.c

index 24170d0809ba9e45eb88fce04c0a7be4fe708b8b,b5da5a8e5e45052c223e8886fd4b74636e8251af..6368fa69d2afa0eb44c5e90fe5293c4ceeafa93c
--- 1/arch/x86/kernel/apic/x2apic_cluster.c
--- 2/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@@ -152,68 -152,48 +152,48 @@@ static void init_x2apic_ldr(void
         }
   }
   
-  /*
-   * At CPU state changes, update the x2apic cluster sibling info.
-   */
- static int
- update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ /*
+  * At CPU state changes, update the x2apic cluster sibling info.
+  */
+ int x2apic_prepare_cpu(unsigned int cpu)
   {
-       unsigned int this_cpu = (unsigned long)hcpu;
-       unsigned int cpu;
-       int err = 0;
- 
-       switch (action) {
-       case CPU_UP_PREPARE:
-               if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
-                                       GFP_KERNEL)) {
-                       err = -ENOMEM;
-               } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
-                                              GFP_KERNEL)) {
-                       free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
-                       err = -ENOMEM;
-               }
-               break;
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-       case CPU_DEAD:
-               for_each_online_cpu(cpu) {
-                       if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-                               continue;
-                       cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-                       cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
-               }
-               free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
-               free_cpumask_var(per_cpu(ipi_mask, this_cpu));
-               break;
+       if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
+               return -ENOMEM;
+ 
+       if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
+               free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
+               return -ENOMEM;
         }
   
-       return notifier_from_errno(err);
+       return 0;
   }
   
- static struct notifier_block x2apic_cpu_notifier = {
-       .notifier_call = update_clusterinfo,
- };
- 
- static int x2apic_init_cpu_notifier(void)
+ int x2apic_dead_cpu(unsigned int this_cpu)
   {
-       int cpu = smp_processor_id();
- 
-       zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
-       zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
+       int cpu;
   
-       BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
- 
-       cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
-       register_hotcpu_notifier(&x2apic_cpu_notifier);
-       return 1;
+       for_each_online_cpu(cpu) {
+               if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+                       continue;
+               cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+               cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+       }
+       free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+       free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+       return 0;
   }
   
   static int x2apic_cluster_probe(void)
   {
-       if (x2apic_mode)
-               return x2apic_init_cpu_notifier();
-       else
+       int cpu = smp_processor_id();
+ 
+       if (!x2apic_mode)
                 return 0;
+ 
+       cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+       cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+                         x2apic_prepare_cpu, x2apic_dead_cpu);
+       return 1;
   }
   
   static const struct cpumask *x2apic_cluster_target_cpus(void)
@@@ -270,6 -250,7 +250,6 @@@ static struct apic apic_x2apic_cluster 
   
         .get_apic_id                    = x2apic_get_apic_id,
         .set_apic_id                    = x2apic_set_apic_id,
- -      .apic_id_mask                   = 0xFFFFFFFFu,
   
         .cpu_mask_to_apicid_and         = x2apic_cpu_mask_to_apicid_and,
   
diff --combined arch/x86/kvm/x86.c

index b2766723c951e967a992a9730c6b283151d41076,f899127b4832884209b5d239279eeea8aad07068..45608a7da9b3406b0a93bfc3620be089bc564e5e
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -55,6 -55,9 +55,6 @@@
   #include <linux/irqbypass.h>
   #include <trace/events/kvm.h>
   
- -#define CREATE_TRACE_POINTS
- -#include "trace.h"
- -
   #include <asm/debugreg.h>
   #include <asm/msr.h>
   #include <asm/desc.h>
@@@ -65,9 -68,6 +65,9 @@@
   #include <asm/div64.h>
   #include <asm/irq_remapping.h>
   
+ +#define CREATE_TRACE_POINTS
+ +#include "trace.h"
+ +
   #define MAX_IO_MSRS 256
   #define KVM_MAX_MCE_BANKS 32
   #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@@ -5552,9 -5552,10 +5552,10 @@@ int kvm_fast_pio_out(struct kvm_vcpu *v
   }
   EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
   
- static void tsc_bad(void *info)
+ static int kvmclock_cpu_down_prep(unsigned int cpu)
   {
         __this_cpu_write(cpu_tsc_khz, 0);
+       return 0;
   }
   
   static void tsc_khz_changed(void *data)
@@@ -5659,35 -5660,18 +5660,18 @@@ static struct notifier_block kvmclock_c
         .notifier_call  = kvmclock_cpufreq_notifier
   };
   
- static int kvmclock_cpu_notifier(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
+ static int kvmclock_cpu_online(unsigned int cpu)
   {
-       unsigned int cpu = (unsigned long)hcpu;
- 
-       switch (action) {
-               case CPU_ONLINE:
-               case CPU_DOWN_FAILED:
-                       smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
-                       break;
-               case CPU_DOWN_PREPARE:
-                       smp_call_function_single(cpu, tsc_bad, NULL, 1);
-                       break;
-       }
-       return NOTIFY_OK;
+       tsc_khz_changed(NULL);
+       return 0;
   }
   
- static struct notifier_block kvmclock_cpu_notifier_block = {
-       .notifier_call  = kvmclock_cpu_notifier,
-       .priority = -INT_MAX
- };
- 
   static void kvm_timer_init(void)
   {
         int cpu;
   
         max_tsc_khz = tsc_khz;
   
-       cpu_notifier_register_begin();
         if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
   #ifdef CONFIG_CPU_FREQ
                 struct cpufreq_policy policy;
@@@ -5702,12 -5686,9 +5686,9 @@@
                                           CPUFREQ_TRANSITION_NOTIFIER);
         }
         pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
-       for_each_online_cpu(cpu)
-               smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
- 
-       __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
-       cpu_notifier_register_done();
   
+       cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+                         kvmclock_cpu_online, kvmclock_cpu_down_prep);
   }
   
   static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
@@@ -5896,7 -5877,7 +5877,7 @@@ void kvm_arch_exit(void
         if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
                                             CPUFREQ_TRANSITION_NOTIFIER);
-       unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+       cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
   #ifdef CONFIG_X86_64
         pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
   #endif
diff --combined drivers/acpi/processor_driver.c

index 0ca14ac7bb28826d01654f652c7028125f07823f,eecdb196b2d16f3ffd22fc52fa0eda1252fabb49..0553aeebb2288098e07f1e5c4203110cede48f99
--- 1/drivers/acpi/processor_driver.c
--- 2/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@@ -90,7 -90,7 +90,7 @@@ static void acpi_processor_notify(acpi_
                                                   pr->performance_platform_limit);
                 break;
         case ACPI_PROCESSOR_NOTIFY_POWER:
- -              acpi_processor_cst_has_changed(pr);
+ +              acpi_processor_power_state_has_changed(pr);
                 acpi_bus_generate_netlink_event(device->pnp.device_class,
                                                   dev_name(&device->dev), event, 0);
                 break;
@@@ -118,12 -118,13 +118,13 @@@ static int acpi_cpu_soft_notify(struct 
         struct acpi_device *device;
         action &= ~CPU_TASKS_FROZEN;
   
-       /*
-        * CPU_STARTING and CPU_DYING must not sleep. Return here since
-        * acpi_bus_get_device() may sleep.
-        */
-       if (action == CPU_STARTING || action == CPU_DYING)
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_DEAD:
+               break;
+       default:
                 return NOTIFY_DONE;
+       }
   
         if (!pr || acpi_bus_get_device(pr->handle, &device))
                 return NOTIFY_DONE;
diff --combined drivers/irqchip/irq-armada-370-xp.c

index 7c42b1d13faf035ed571a8142ad52a76835d8ae0,cdef4405ca50766a238cdee66a8b2044c7486a89..8bcee65a0b8c92a22c8d49ac362656a19233e04f
--- 1/drivers/irqchip/irq-armada-370-xp.c
--- 2/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@@ -345,38 -345,20 +345,20 @@@ static void armada_mpic_send_doorbell(c
                 ARMADA_370_XP_SW_TRIG_INT_OFFS);
   }
   
- static int armada_xp_mpic_secondary_init(struct notifier_block *nfb,
-                                        unsigned long action, void *hcpu)
+ static int armada_xp_mpic_starting_cpu(unsigned int cpu)
   {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
-               armada_xp_mpic_perf_init();
-               armada_xp_mpic_smp_cpu_init();
-       }
- 
-       return NOTIFY_OK;
+       armada_xp_mpic_perf_init();
+       armada_xp_mpic_smp_cpu_init();
+       return 0;
   }
   
- static struct notifier_block armada_370_xp_mpic_cpu_notifier = {
-       .notifier_call = armada_xp_mpic_secondary_init,
-       .priority = 100,
- };
- 
- static int mpic_cascaded_secondary_init(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
+ static int mpic_cascaded_starting_cpu(unsigned int cpu)
   {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
-               armada_xp_mpic_perf_init();
-               enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
-       }
- 
-       return NOTIFY_OK;
+       armada_xp_mpic_perf_init();
+       enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
+       return 0;
   }
- 
- static struct notifier_block mpic_cascaded_cpu_notifier = {
-       .notifier_call = mpic_cascaded_secondary_init,
-       .priority = 100,
- };
- #endif /* CONFIG_SMP */
+ #endif
   
   static const struct irq_domain_ops armada_370_xp_mpic_irq_ops = {
         .map = armada_370_xp_mpic_irq_map,
@@@ -541,7 -523,7 +523,7 @@@ static void armada_370_xp_mpic_resume(v
                 writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
   }
   
- -struct syscore_ops armada_370_xp_mpic_syscore_ops = {
+ +static struct syscore_ops armada_370_xp_mpic_syscore_ops = {
         .suspend        = armada_370_xp_mpic_suspend,
         .resume         = armada_370_xp_mpic_resume,
   };
@@@ -595,11 -577,15 +577,15 @@@ static int __init armada_370_xp_mpic_of
                 set_handle_irq(armada_370_xp_handle_irq);
   #ifdef CONFIG_SMP
                 set_smp_cross_call(armada_mpic_send_doorbell);
-               register_cpu_notifier(&armada_370_xp_mpic_cpu_notifier);
+               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING,
+                                         "AP_IRQ_ARMADA_XP_STARTING",
+                                         armada_xp_mpic_starting_cpu, NULL);
   #endif
         } else {
   #ifdef CONFIG_SMP
-               register_cpu_notifier(&mpic_cascaded_cpu_notifier);
+               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
+                                         "AP_IRQ_ARMADA_CASC_STARTING",
+                                         mpic_cascaded_starting_cpu, NULL);
   #endif
                 irq_set_chained_handler(parent_irq,
                                         armada_370_xp_mpic_handle_cascade_irq);
diff --combined drivers/irqchip/irq-bcm2836.c

index df1949c0aa23ad927a58f898f0739dc9e35ca46d,f2575cb2b013f8120817bdf3a63886e2d12b6caf..d96b2c947e74e3edab3917551c64fbd1ced0f34c
--- 1/drivers/irqchip/irq-bcm2836.c
--- 2/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@@ -180,7 -180,7 +180,7 @@@ __exception_irq_entry bcm2836_arm_irqch
         } else if (stat) {
                 u32 hwirq = ffs(stat) - 1;
   
- -              handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
+ +              handle_domain_irq(intc.domain, hwirq, regs);
         }
   }
   
@@@ -202,30 -202,23 +202,23 @@@ static void bcm2836_arm_irqchip_send_ip
         }
   }
   
- /* Unmasks the IPI on the CPU when it's online. */
- static int bcm2836_arm_irqchip_cpu_notify(struct notifier_block *nfb,
-                                         unsigned long action, void *hcpu)
+ static int bcm2836_cpu_starting(unsigned int cpu)
   {
-       unsigned int cpu = (unsigned long)hcpu;
-       unsigned int int_reg = LOCAL_MAILBOX_INT_CONTROL0;
-       unsigned int mailbox = 0;
- 
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
-               bcm2836_arm_irqchip_unmask_per_cpu_irq(int_reg, mailbox, cpu);
-       else if (action == CPU_DYING)
-               bcm2836_arm_irqchip_mask_per_cpu_irq(int_reg, mailbox, cpu);
- 
-       return NOTIFY_OK;
+       bcm2836_arm_irqchip_unmask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0,
+                                              cpu);
+       return 0;
   }
   
- static struct notifier_block bcm2836_arm_irqchip_cpu_notifier = {
-       .notifier_call = bcm2836_arm_irqchip_cpu_notify,
-       .priority = 100,
- };
+ static int bcm2836_cpu_dying(unsigned int cpu)
+ {
+       bcm2836_arm_irqchip_mask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0,
+                                            cpu);
+       return 0;
+ }
   
   #ifdef CONFIG_ARM
- -int __init bcm2836_smp_boot_secondary(unsigned int cpu,
- -                                    struct task_struct *idle)
+ +static int __init bcm2836_smp_boot_secondary(unsigned int cpu,
+ +                                           struct task_struct *idle)
   {
         unsigned long secondary_startup_phys =
                 (unsigned long)virt_to_phys((void *)secondary_startup);
@@@ -251,10 -244,9 +244,9 @@@ bcm2836_arm_irqchip_smp_init(void
   {
   #ifdef CONFIG_SMP
         /* Unmask IPIs to the boot CPU. */
-       bcm2836_arm_irqchip_cpu_notify(&bcm2836_arm_irqchip_cpu_notifier,
-                                      CPU_STARTING,
-                                      (void *)(uintptr_t)smp_processor_id());
-       register_cpu_notifier(&bcm2836_arm_irqchip_cpu_notifier);
+       cpuhp_setup_state(CPUHP_AP_IRQ_BCM2836_STARTING,
+                         "AP_IRQ_BCM2836_STARTING", bcm2836_cpu_starting,
+                         bcm2836_cpu_dying);
   
         set_smp_cross_call(bcm2836_arm_irqchip_send_ipi);
   
diff --combined drivers/irqchip/irq-gic.c

index 1de07eb5839c7b522bf6e81382a7584afc02f217,606f114166a163b5f37927e2324cd1dd011cc5eb..c2cab572c5111c392c076d62232da4828cfa00a1
--- 1/drivers/irqchip/irq-gic.c
--- 2/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@@ -75,7 -75,7 +75,7 @@@ struct gic_chip_data 
         void __iomem *raw_dist_base;
         void __iomem *raw_cpu_base;
         u32 percpu_offset;
- -#ifdef CONFIG_CPU_PM
+ +#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
         u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
         u32 saved_spi_active[DIV_ROUND_UP(1020, 32)];
         u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
@@@ -449,7 -449,7 +449,7 @@@ static void gic_cpu_if_up(struct gic_ch
   }
   
   
- -static void __init gic_dist_init(struct gic_chip_data *gic)
+ +static void gic_dist_init(struct gic_chip_data *gic)
   {
         unsigned int i;
         u32 cpumask;
@@@ -528,14 -528,14 +528,14 @@@ int gic_cpu_if_down(unsigned int gic_nr
         return 0;
   }
   
- -#ifdef CONFIG_CPU_PM
+ +#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
   /*
    * Saves the GIC distributor registers during suspend or idle.  Must be called
    * with interrupts disabled but before powering down the GIC.  After calling
    * this function, no interrupts will be delivered by the GIC, and another
    * platform-specific wakeup source must be enabled.
    */
- -static void gic_dist_save(struct gic_chip_data *gic)
+ +void gic_dist_save(struct gic_chip_data *gic)
   {
         unsigned int gic_irqs;
         void __iomem *dist_base;
@@@ -574,7 -574,7 +574,7 @@@
    * handled normally, but any edge interrupts that occured will not be seen by
    * the GIC and need to be handled by the platform-specific wakeup source.
    */
- -static void gic_dist_restore(struct gic_chip_data *gic)
+ +void gic_dist_restore(struct gic_chip_data *gic)
   {
         unsigned int gic_irqs;
         unsigned int i;
@@@ -620,7 -620,7 +620,7 @@@
         writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
   }
   
- -static void gic_cpu_save(struct gic_chip_data *gic)
+ +void gic_cpu_save(struct gic_chip_data *gic)
   {
         int i;
         u32 *ptr;
@@@ -650,7 -650,7 +650,7 @@@
   
   }
   
- -static void gic_cpu_restore(struct gic_chip_data *gic)
+ +void gic_cpu_restore(struct gic_chip_data *gic)
   {
         int i;
         u32 *ptr;
@@@ -727,7 -727,7 +727,7 @@@ static struct notifier_block gic_notifi
         .notifier_call = gic_notifier,
   };
   
- -static int __init gic_pm_init(struct gic_chip_data *gic)
+ +static int gic_pm_init(struct gic_chip_data *gic)
   {
         gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
                 sizeof(u32));
@@@ -757,7 -757,7 +757,7 @@@ free_ppi_enable
         return -ENOMEM;
   }
   #else
- -static int __init gic_pm_init(struct gic_chip_data *gic)
+ +static int gic_pm_init(struct gic_chip_data *gic)
   {
         return 0;
   }
@@@ -984,25 -984,12 +984,12 @@@ static int gic_irq_domain_translate(str
         return -EINVAL;
   }
   
- #ifdef CONFIG_SMP
- static int gic_secondary_init(struct notifier_block *nfb, unsigned long action,
-                             void *hcpu)
+ static int gic_starting_cpu(unsigned int cpu)
   {
-       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
-               gic_cpu_init(&gic_data[0]);
-       return NOTIFY_OK;
+       gic_cpu_init(&gic_data[0]);
+       return 0;
   }
   
- /*
-  * Notifier for enabling the GIC CPU interface. Set an arbitrarily high
-  * priority because the GIC needs to be up before the ARM generic timers.
-  */
- static struct notifier_block gic_cpu_notifier = {
-       .notifier_call = gic_secondary_init,
-       .priority = 100,
- };
- #endif
- 
   static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
                                 unsigned int nr_irqs, void *arg)
   {
@@@ -1032,31 -1019,32 +1019,31 @@@ static const struct irq_domain_ops gic_
         .unmap = gic_irq_domain_unmap,
   };
   
- -static int __init __gic_init_bases(struct gic_chip_data *gic, int irq_start,
- -                                 struct fwnode_handle *handle)
+ +static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
+ +                        const char *name, bool use_eoimode1)
   {
- -      irq_hw_number_t hwirq_base;
- -      int gic_irqs, irq_base, i, ret;
- -
- -      if (WARN_ON(!gic || gic->domain))
- -              return -EINVAL;
- -
         /* Initialize irq_chip */
         gic->chip = gic_chip;
+ +      gic->chip.name = name;
+ +      gic->chip.parent_device = dev;
   
- -      if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
+ +      if (use_eoimode1) {
                 gic->chip.irq_mask = gic_eoimode1_mask_irq;
                 gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
                 gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
- -              gic->chip.name = kasprintf(GFP_KERNEL, "GICv2");
- -      } else {
- -              gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d",
- -                                         (int)(gic - &gic_data[0]));
         }
   
   #ifdef CONFIG_SMP
         if (gic == &gic_data[0])
                 gic->chip.irq_set_affinity = gic_set_affinity;
   #endif
+ +}
+ +
+ +static int gic_init_bases(struct gic_chip_data *gic, int irq_start,
+ +                        struct fwnode_handle *handle)
+ +{
+ +      irq_hw_number_t hwirq_base;
+ +      int gic_irqs, irq_base, ret;
   
         if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
                 /* Frankein-GIC without banked registers... */
@@@ -1137,36 -1125,6 +1124,36 @@@
                 goto error;
         }
   
+ +      gic_dist_init(gic);
+ +      ret = gic_cpu_init(gic);
+ +      if (ret)
+ +              goto error;
+ +
+ +      ret = gic_pm_init(gic);
+ +      if (ret)
+ +              goto error;
+ +
+ +      return 0;
+ +
+ +error:
+ +      if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
+ +              free_percpu(gic->dist_base.percpu_base);
+ +              free_percpu(gic->cpu_base.percpu_base);
+ +      }
+ +
+ +      return ret;
+ +}
+ +
+ +static int __init __gic_init_bases(struct gic_chip_data *gic,
+ +                                 int irq_start,
+ +                                 struct fwnode_handle *handle)
+ +{
+ +      char *name;
+ +      int i, ret;
+ +
+ +      if (WARN_ON(!gic || gic->domain))
+ +              return -EINVAL;
+ +
         if (gic == &gic_data[0]) {
                 /*
                  * Initialize the CPU interface map to all CPUs.
@@@ -1177,24 -1135,33 +1164,26 @@@
                         gic_cpu_map[i] = 0xff;
   #ifdef CONFIG_SMP
                 set_smp_cross_call(gic_raise_softirq);
-               register_cpu_notifier(&gic_cpu_notifier);
   #endif
+               cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
+                                         "AP_IRQ_GIC_STARTING",
+                                         gic_starting_cpu, NULL);
                 set_handle_irq(gic_handle_irq);
                 if (static_key_true(&supports_deactivate))
                         pr_info("GIC: Using split EOI/Deactivate mode\n");
         }
   
- -      gic_dist_init(gic);
- -      ret = gic_cpu_init(gic);
- -      if (ret)
- -              goto error;
- -
- -      ret = gic_pm_init(gic);
- -      if (ret)
- -              goto error;
- -
- -      return 0;
- -
- -error:
- -      if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
- -              free_percpu(gic->dist_base.percpu_base);
- -              free_percpu(gic->cpu_base.percpu_base);
+ +      if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
+ +              name = kasprintf(GFP_KERNEL, "GICv2");
+ +              gic_init_chip(gic, NULL, name, true);
+ +      } else {
+ +              name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
+ +              gic_init_chip(gic, NULL, name, false);
         }
   
- -      kfree(gic->chip.name);
+ +      ret = gic_init_bases(gic, irq_start, handle);
+ +      if (ret)
+ +              kfree(name);
   
         return ret;
   }
@@@ -1272,7 -1239,7 +1261,7 @@@ static bool gic_check_eoimode(struct de
         return true;
   }
   
- -static int __init gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
+ +static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
   {
         if (!gic || !node)
                 return -EINVAL;
@@@ -1296,34 -1263,6 +1285,34 @@@ error
         return -ENOMEM;
   }
   
+ +int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
+ +{
+ +      int ret;
+ +
+ +      if (!dev || !dev->of_node || !gic || !irq)
+ +              return -EINVAL;
+ +
+ +      *gic = devm_kzalloc(dev, sizeof(**gic), GFP_KERNEL);
+ +      if (!*gic)
+ +              return -ENOMEM;
+ +
+ +      gic_init_chip(*gic, dev, dev->of_node->name, false);
+ +
+ +      ret = gic_of_setup(*gic, dev->of_node);
+ +      if (ret)
+ +              return ret;
+ +
+ +      ret = gic_init_bases(*gic, -1, &dev->of_node->fwnode);
+ +      if (ret) {
+ +              gic_teardown(*gic);
+ +              return ret;
+ +      }
+ +
+ +      irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic);
+ +
+ +      return 0;
+ +}
+ +
   static void __init gic_of_setup_kvm_info(struct device_node *node)
   {
         int ret;
@@@ -1403,11 -1342,7 +1392,11 @@@ IRQCHIP_DECLARE(cortex_a7_gic, "arm,cor
   IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
   IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
   IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
- -
+ +#else
+ +int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
+ +{
+ +      return -ENOTSUPP;
+ +}
   #endif
   
   #ifdef CONFIG_ACPI
diff --combined drivers/perf/arm_pmu.c

index 8e4d7f590b064f597393b9c02d1e32bafc0ed4ba,f6ab4f7f75bf97973227023e6d6933c6ca46ad98..6ccb994bdfcbd160148c535f18ea656ea7ee13e6
--- 1/drivers/perf/arm_pmu.c
--- 2/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@@ -603,8 -603,7 +603,8 @@@ static void cpu_pmu_free_irq(struct arm
   
         irq = platform_get_irq(pmu_device, 0);
         if (irq >= 0 && irq_is_percpu(irq)) {
- -              on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+ +              on_each_cpu_mask(&cpu_pmu->supported_cpus,
+ +                               cpu_pmu_disable_percpu_irq, &irq, 1);
                 free_percpu_irq(irq, &hw_events->percpu_pmu);
         } else {
                 for (i = 0; i < irqs; ++i) {
@@@ -646,9 -645,7 +646,9 @@@ static int cpu_pmu_request_irq(struct a
                                 irq);
                         return err;
                 }
- -              on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+ +
+ +              on_each_cpu_mask(&cpu_pmu->supported_cpus,
+ +                               cpu_pmu_enable_percpu_irq, &irq, 1);
         } else {
                 for (i = 0; i < irqs; ++i) {
                         int cpu = i;
@@@ -688,30 -685,29 +688,29 @@@
         return 0;
   }
   
+ static DEFINE_MUTEX(arm_pmu_mutex);
+ static LIST_HEAD(arm_pmu_list);
+ 
   /*
    * PMU hardware loses all context when a CPU goes offline.
    * When a CPU is hotplugged back in, since some hardware registers are
    * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
    * junk values out of them.
    */
- static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
-                         void *hcpu)
+ static int arm_perf_starting_cpu(unsigned int cpu)
   {
-       int cpu = (unsigned long)hcpu;
-       struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
- 
-       if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
-               return NOTIFY_DONE;
- 
-       if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-               return NOTIFY_DONE;
+       struct arm_pmu *pmu;
   
-       if (pmu->reset)
-               pmu->reset(pmu);
-       else
-               return NOTIFY_DONE;
+       mutex_lock(&arm_pmu_mutex);
+       list_for_each_entry(pmu, &arm_pmu_list, entry) {
   
-       return NOTIFY_OK;
+               if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+                       continue;
+               if (pmu->reset)
+                       pmu->reset(pmu);
+       }
+       mutex_unlock(&arm_pmu_mutex);
+       return 0;
   }
   
   #ifdef CONFIG_CPU_PM
@@@ -822,10 -818,9 +821,9 @@@ static int cpu_pmu_init(struct arm_pmu 
         if (!cpu_hw_events)
                 return -ENOMEM;
   
-       cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
-       err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
-       if (err)
-               goto out_hw_events;
+       mutex_lock(&arm_pmu_mutex);
+       list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
+       mutex_unlock(&arm_pmu_mutex);
   
         err = cpu_pm_pmu_register(cpu_pmu);
         if (err)
@@@ -861,8 -856,9 +859,9 @@@
         return 0;
   
   out_unregister:
-       unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
- out_hw_events:
+       mutex_lock(&arm_pmu_mutex);
+       list_del(&cpu_pmu->entry);
+       mutex_unlock(&arm_pmu_mutex);
         free_percpu(cpu_hw_events);
         return err;
   }
@@@ -870,7 -866,9 +869,9 @@@
   static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
   {
         cpu_pm_pmu_unregister(cpu_pmu);
-       unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+       mutex_lock(&arm_pmu_mutex);
+       list_del(&cpu_pmu->entry);
+       mutex_unlock(&arm_pmu_mutex);
         free_percpu(cpu_pmu->hw_events);
   }
   
@@@ -964,23 -962,9 +965,23 @@@ static int of_pmu_irq_cfg(struct arm_pm
                 i++;
         } while (1);
   
- -      /* If we didn't manage to parse anything, claim to support all CPUs */
- -      if (cpumask_weight(&pmu->supported_cpus) == 0)
- -              cpumask_setall(&pmu->supported_cpus);
+ +      /* If we didn't manage to parse anything, try the interrupt affinity */
+ +      if (cpumask_weight(&pmu->supported_cpus) == 0) {
+ +              if (!using_spi) {
+ +                      /* If using PPIs, check the affinity of the partition */
+ +                      int ret, irq;
+ +
+ +                      irq = platform_get_irq(pdev, 0);
+ +                      ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
+ +                      if (ret) {
+ +                              kfree(irqs);
+ +                              return ret;
+ +                      }
+ +              } else {
+ +                      /* Otherwise default to all CPUs */
+ +                      cpumask_setall(&pmu->supported_cpus);
+ +              }
+ +      }
   
         /* If we matched up the IRQ affinities, use them to route the SPIs */
         if (using_spi && i == pdev->num_resources)
@@@ -1061,3 -1045,17 +1062,17 @@@ out_free
         kfree(pmu);
         return ret;
   }
+ 
+ static int arm_pmu_hp_init(void)
+ {
+       int ret;
+ 
+       ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                       "AP_PERF_ARM_STARTING",
+                                       arm_perf_starting_cpu, NULL);
+       if (ret)
+               pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
+                      ret);
+       return ret;
+ }
+ subsys_initcall(arm_pmu_hp_init);
diff --combined include/linux/perf_event.h

index e1f921c2e4e05764509bf1887789d4c863b39677,ddd3dab0f39e457446185a84e895f3b87d00effc..8ed4326164cc843b41da6fbfe69d85cee2d61232
--- 1/include/linux/perf_event.h
--- 2/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@@ -69,22 -69,9 +69,22 @@@ struct perf_callchain_entry_ctx 
         bool                        contexts_maxed;
   };
   
+ +typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
+ +                                   unsigned long off, unsigned long len);
+ +
+ +struct perf_raw_frag {
+ +      union {
+ +              struct perf_raw_frag    *next;
+ +              unsigned long           pad;
+ +      };
+ +      perf_copy_f                     copy;
+ +      void                            *data;
+ +      u32                             size;
+ +} __packed;
+ +
   struct perf_raw_record {
+ +      struct perf_raw_frag            frag;
         u32                             size;
- -      void                            *data;
   };
   
   /*
@@@ -530,11 -517,6 +530,11 @@@ struct swevent_hlist 
   struct perf_cgroup;
   struct ring_buffer;
   
+ +struct pmu_event_list {
+ +      raw_spinlock_t          lock;
+ +      struct list_head        list;
+ +};
+ +
   /**
    * struct perf_event - performance event kernel representation:
    */
@@@ -693,7 -675,6 +693,7 @@@ struct perf_event 
         int                             cgrp_defer_enabled;
   #endif
   
+ +      struct list_head                sb_list;
   #endif /* CONFIG_PERF_EVENTS */
   };
   
@@@ -1093,7 -1074,7 +1093,7 @@@ extern void perf_callchain_kernel(struc
   extern struct perf_callchain_entry *
   get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
                    u32 max_stack, bool crosstask, bool add_mark);
- -extern int get_callchain_buffers(void);
+ +extern int get_callchain_buffers(int max_stack);
   extern void put_callchain_buffers(void);
   
   extern int sysctl_perf_event_max_stack;
@@@ -1302,61 -1283,14 +1302,26 @@@ extern void perf_restore_debug_store(vo
   static inline void perf_restore_debug_store(void)                     { }
   #endif
   
+ +static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
+ +{
+ +      return frag->pad < sizeof(u64);
+ +}
+ +
   #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
   
- /*
-  * This has to have a higher priority than migration_notifier in sched/core.c.
-  */
- #define perf_cpu_notifier(fn)                                         \
- do {                                                                  \
-       static struct notifier_block fn##_nb =                          \
-               { .notifier_call = fn, .priority = CPU_PRI_PERF };      \
-       unsigned long cpu = smp_processor_id();                         \
-       unsigned long flags;                                            \
-                                                                       \
-       cpu_notifier_register_begin();                                  \
-       fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,                     \
-               (void *)(unsigned long)cpu);                            \
-       local_irq_save(flags);                                          \
-       fn(&fn##_nb, (unsigned long)CPU_STARTING,                       \
-               (void *)(unsigned long)cpu);                            \
-       local_irq_restore(flags);                                       \
-       fn(&fn##_nb, (unsigned long)CPU_ONLINE,                         \
-               (void *)(unsigned long)cpu);                            \
-       __register_cpu_notifier(&fn##_nb);                              \
-       cpu_notifier_register_done();                                   \
- } while (0)
- 
- /*
-  * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
-  * callback for already online CPUs.
-  */
- #define __perf_cpu_notifier(fn)                                               \
- do {                                                                  \
-       static struct notifier_block fn##_nb =                          \
-               { .notifier_call = fn, .priority = CPU_PRI_PERF };      \
-                                                                       \
-       __register_cpu_notifier(&fn##_nb);                              \
- } while (0)
- 
   struct perf_pmu_events_attr {
         struct device_attribute attr;
         u64 id;
         const char *event_str;
   };
   
+ +struct perf_pmu_events_ht_attr {
+ +      struct device_attribute                 attr;
+ +      u64                                     id;
+ +      const char                              *event_str_ht;
+ +      const char                              *event_str_noht;
+ +};
+ +
   ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
                               char *page);
   
@@@ -1385,4 -1319,13 +1350,13 @@@ _name##_show(struct device *dev,                                      
                                                                         \
   static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
   
+ /* Performance counter hotplug functions */
+ #ifdef CONFIG_PERF_EVENTS
+ int perf_event_init_cpu(unsigned int cpu);
+ int perf_event_exit_cpu(unsigned int cpu);
+ #else
+ #define perf_event_init_cpu   NULL
+ #define perf_event_exit_cpu   NULL
+ #endif
+ 
   #endif /* _LINUX_PERF_EVENT_H */
diff --combined kernel/events/core.c

index 09ae27b353c1e31a021c0602ce1d33a19dccf083,f3ef1c29a7c958e669b53b729a1af62291604c43..356a6c7cb52a08819739c8ea712c86fcc520448c
--- 1/kernel/events/core.c
--- 2/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -335,7 -335,6 +335,7 @@@ static atomic_t perf_sched_count
   
   static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
   static DEFINE_PER_CPU(int, perf_sched_cb_usages);
+ +static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
   
   static atomic_t nr_mmap_events __read_mostly;
   static atomic_t nr_comm_events __read_mostly;
@@@ -397,13 -396,6 +397,13 @@@ int perf_proc_update_handler(struct ctl
         if (ret || !write)
                 return ret;
   
+ +      /*
+ +       * If throttling is disabled don't allow the write:
+ +       */
+ +      if (sysctl_perf_cpu_time_max_percent == 100 ||
+ +          sysctl_perf_cpu_time_max_percent == 0)
+ +              return -EINVAL;
+ +
         max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
         perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
         update_perf_cpu_limits();
@@@ -3694,39 -3686,6 +3694,39 @@@ static void free_event_rcu(struct rcu_h
   static void ring_buffer_attach(struct perf_event *event,
                                struct ring_buffer *rb);
   
+ +static void detach_sb_event(struct perf_event *event)
+ +{
+ +      struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+ +
+ +      raw_spin_lock(&pel->lock);
+ +      list_del_rcu(&event->sb_list);
+ +      raw_spin_unlock(&pel->lock);
+ +}
+ +
+ +static bool is_sb_event(struct perf_event *event)
+ +{
+ +      struct perf_event_attr *attr = &event->attr;
+ +
+ +      if (event->parent)
+ +              return false;
+ +
+ +      if (event->attach_state & PERF_ATTACH_TASK)
+ +              return false;
+ +
+ +      if (attr->mmap || attr->mmap_data || attr->mmap2 ||
+ +          attr->comm || attr->comm_exec ||
+ +          attr->task ||
+ +          attr->context_switch)
+ +              return true;
+ +      return false;
+ +}
+ +
+ +static void unaccount_pmu_sb_event(struct perf_event *event)
+ +{
+ +      if (is_sb_event(event))
+ +              detach_sb_event(event);
+ +}
+ +
   static void unaccount_event_cpu(struct perf_event *event, int cpu)
   {
         if (event->parent)
@@@ -3790,8 -3749,6 +3790,8 @@@ static void unaccount_event(struct perf
         }
   
         unaccount_event_cpu(event, event->cpu);
+ +
+ +      unaccount_pmu_sb_event(event);
   }
   
   static void perf_sched_delayed(struct work_struct *work)
@@@ -5617,26 -5574,16 +5617,26 @@@ void perf_output_sample(struct perf_out
         }
   
         if (sample_type & PERF_SAMPLE_RAW) {
- -              if (data->raw) {
- -                      u32 raw_size = data->raw->size;
- -                      u32 real_size = round_up(raw_size + sizeof(u32),
- -                                               sizeof(u64)) - sizeof(u32);
- -                      u64 zero = 0;
- -
- -                      perf_output_put(handle, real_size);
- -                      __output_copy(handle, data->raw->data, raw_size);
- -                      if (real_size - raw_size)
- -                              __output_copy(handle, &zero, real_size - raw_size);
+ +              struct perf_raw_record *raw = data->raw;
+ +
+ +              if (raw) {
+ +                      struct perf_raw_frag *frag = &raw->frag;
+ +
+ +                      perf_output_put(handle, raw->size);
+ +                      do {
+ +                              if (frag->copy) {
+ +                                      __output_custom(handle, frag->copy,
+ +                                                      frag->data, frag->size);
+ +                              } else {
+ +                                      __output_copy(handle, frag->data,
+ +                                                    frag->size);
+ +                              }
+ +                              if (perf_raw_frag_last(frag))
+ +                                      break;
+ +                              frag = frag->next;
+ +                      } while (1);
+ +                      if (frag->pad)
+ +                              __output_skip(handle, NULL, frag->pad);
                 } else {
                         struct {
                                 u32     size;
@@@ -5761,28 -5708,14 +5761,28 @@@ void perf_prepare_sample(struct perf_ev
         }
   
         if (sample_type & PERF_SAMPLE_RAW) {
- -              int size = sizeof(u32);
- -
- -              if (data->raw)
- -                      size += data->raw->size;
- -              else
- -                      size += sizeof(u32);
+ +              struct perf_raw_record *raw = data->raw;
+ +              int size;
+ +
+ +              if (raw) {
+ +                      struct perf_raw_frag *frag = &raw->frag;
+ +                      u32 sum = 0;
+ +
+ +                      do {
+ +                              sum += frag->size;
+ +                              if (perf_raw_frag_last(frag))
+ +                                      break;
+ +                              frag = frag->next;
+ +                      } while (1);
+ +
+ +                      size = round_up(sum + sizeof(u32), sizeof(u64));
+ +                      raw->size = size - sizeof(u32);
+ +                      frag->pad = raw->size - sum;
+ +              } else {
+ +                      size = sizeof(u64);
+ +              }
   
- -              header->size += round_up(size, sizeof(u64));
+ +              header->size += size;
         }
   
         if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
@@@ -5942,11 -5875,11 +5942,11 @@@ perf_event_read_event(struct perf_even
         perf_output_end(&handle);
   }
   
- -typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
+ +typedef void (perf_iterate_f)(struct perf_event *event, void *data);
   
   static void
- -perf_event_aux_ctx(struct perf_event_context *ctx,
- -                 perf_event_aux_output_cb output,
+ +perf_iterate_ctx(struct perf_event_context *ctx,
+ +                 perf_iterate_f output,
                    void *data, bool all)
   {
         struct perf_event *event;
@@@ -5963,55 -5896,52 +5963,55 @@@
         }
   }
   
- -static void
- -perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
- -                      struct perf_event_context *task_ctx)
+ +static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
   {
- -      rcu_read_lock();
- -      preempt_disable();
- -      perf_event_aux_ctx(task_ctx, output, data, false);
- -      preempt_enable();
- -      rcu_read_unlock();
+ +      struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
+ +      struct perf_event *event;
+ +
+ +      list_for_each_entry_rcu(event, &pel->list, sb_list) {
+ +              if (event->state < PERF_EVENT_STATE_INACTIVE)
+ +                      continue;
+ +              if (!event_filter_match(event))
+ +                      continue;
+ +              output(event, data);
+ +      }
   }
   
+ +/*
+ + * Iterate all events that need to receive side-band events.
+ + *
+ + * For new callers; ensure that account_pmu_sb_event() includes
+ + * your event, otherwise it might not get delivered.
+ + */
   static void
- -perf_event_aux(perf_event_aux_output_cb output, void *data,
+ +perf_iterate_sb(perf_iterate_f output, void *data,
                struct perf_event_context *task_ctx)
   {
- -      struct perf_cpu_context *cpuctx;
         struct perf_event_context *ctx;
- -      struct pmu *pmu;
         int ctxn;
   
+ +      rcu_read_lock();
+ +      preempt_disable();
+ +
         /*
- -       * If we have task_ctx != NULL we only notify
- -       * the task context itself. The task_ctx is set
- -       * only for EXIT events before releasing task
+ +       * If we have task_ctx != NULL we only notify the task context itself.
+ +       * The task_ctx is set only for EXIT events before releasing task
          * context.
          */
         if (task_ctx) {
- -              perf_event_aux_task_ctx(output, data, task_ctx);
- -              return;
+ +              perf_iterate_ctx(task_ctx, output, data, false);
+ +              goto done;
         }
   
- -      rcu_read_lock();
- -      list_for_each_entry_rcu(pmu, &pmus, entry) {
- -              cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- -              if (cpuctx->unique_pmu != pmu)
- -                      goto next;
- -              perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
- -              ctxn = pmu->task_ctx_nr;
- -              if (ctxn < 0)
- -                      goto next;
+ +      perf_iterate_sb_cpu(output, data);
+ +
+ +      for_each_task_context_nr(ctxn) {
                 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
                 if (ctx)
- -                      perf_event_aux_ctx(ctx, output, data, false);
- -next:
- -              put_cpu_ptr(pmu->pmu_cpu_context);
+ +                      perf_iterate_ctx(ctx, output, data, false);
         }
+ +done:
+ +      preempt_enable();
         rcu_read_unlock();
   }
   
@@@ -6060,7 -5990,7 +6060,7 @@@ void perf_event_exec(void
   
                 perf_event_enable_on_exec(ctxn);
   
- -              perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+ +              perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
                                    true);
         }
         rcu_read_unlock();
@@@ -6104,9 -6034,9 +6104,9 @@@ static int __perf_pmu_output_stop(void 
         };
   
         rcu_read_lock();
- -      perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+ +      perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
         if (cpuctx->task_ctx)
- -              perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+ +              perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
                                    &ro, false);
         rcu_read_unlock();
   
@@@ -6235,7 -6165,7 +6235,7 @@@ static void perf_event_task(struct task
                 },
         };
   
- -      perf_event_aux(perf_event_task_output,
+ +      perf_iterate_sb(perf_event_task_output,
                        &task_event,
                        task_ctx);
   }
@@@ -6314,7 -6244,7 +6314,7 @@@ static void perf_event_comm_event(struc
   
         comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
   
- -      perf_event_aux(perf_event_comm_output,
+ +      perf_iterate_sb(perf_event_comm_output,
                        comm_event,
                        NULL);
   }
@@@ -6545,7 -6475,7 +6545,7 @@@ got_name
   
         mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
   
- -      perf_event_aux(perf_event_mmap_output,
+ +      perf_iterate_sb(perf_event_mmap_output,
                        mmap_event,
                        NULL);
   
@@@ -6628,7 -6558,7 +6628,7 @@@ static void perf_addr_filters_adjust(st
                 if (!ctx)
                         continue;
   
- -              perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+ +              perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
         }
         rcu_read_unlock();
   }
@@@ -6815,7 -6745,7 +6815,7 @@@ static void perf_event_switch(struct ta
                 },
         };
   
- -      perf_event_aux(perf_event_switch_output,
+ +      perf_iterate_sb(perf_event_switch_output,
                        &switch_event,
                        NULL);
   }
@@@ -7422,7 -7352,7 +7422,7 @@@ static struct pmu perf_swevent = 
   static int perf_tp_filter_match(struct perf_event *event,
                                 struct perf_sample_data *data)
   {
- -      void *record = data->raw->data;
+ +      void *record = data->raw->frag.data;
   
         /* only top level events have filters set */
         if (event->parent)
@@@ -7478,10 -7408,8 +7478,10 @@@ void perf_tp_event(u16 event_type, u64 
         struct perf_event *event;
   
         struct perf_raw_record raw = {
- -              .size = entry_size,
- -              .data = record,
+ +              .frag = {
+ +                      .size = entry_size,
+ +                      .data = record,
+ +              },
         };
   
         perf_sample_data_init(&data, 0, 0);
@@@ -7622,7 -7550,7 +7622,7 @@@ static void perf_event_free_bpf_prog(st
         prog = event->tp_event->prog;
         if (prog) {
                 event->tp_event->prog = NULL;
- -              bpf_prog_put_rcu(prog);
+ +              bpf_prog_put(prog);
         }
   }
   
@@@ -8739,28 -8667,6 +8739,28 @@@ unlock
         return pmu;
   }
   
+ +static void attach_sb_event(struct perf_event *event)
+ +{
+ +      struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+ +
+ +      raw_spin_lock(&pel->lock);
+ +      list_add_rcu(&event->sb_list, &pel->list);
+ +      raw_spin_unlock(&pel->lock);
+ +}
+ +
+ +/*
+ + * We keep a list of all !task (and therefore per-cpu) events
+ + * that need to receive side-band records.
+ + *
+ + * This avoids having to scan all the various PMU per-cpu contexts
+ + * looking for them.
+ + */
+ +static void account_pmu_sb_event(struct perf_event *event)
+ +{
+ +      if (is_sb_event(event))
+ +              attach_sb_event(event);
+ +}
+ +
   static void account_event_cpu(struct perf_event *event, int cpu)
   {
         if (event->parent)
@@@ -8841,8 -8747,6 +8841,8 @@@ static void account_event(struct perf_e
   enabled:
   
         account_event_cpu(event, event->cpu);
+ +
+ +      account_pmu_sb_event(event);
   }
   
   /*
@@@ -8991,7 -8895,7 +8991,7 @@@ perf_event_alloc(struct perf_event_att
   
         if (!event->parent) {
                 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
- -                      err = get_callchain_buffers();
+ +                      err = get_callchain_buffers(attr->sample_max_stack);
                         if (err)
                                 goto err_addr_filters;
                 }
@@@ -9313,9 -9217,6 +9313,9 @@@ SYSCALL_DEFINE5(perf_event_open
                         return -EINVAL;
         }
   
+ +      if (!attr.sample_max_stack)
+ +              attr.sample_max_stack = sysctl_perf_event_max_stack;
+ +
         /*
          * In cgroup mode, the pid argument is used to pass the fd
          * opened to the cgroup directory in cgroupfs. The cpu argument
@@@ -9389,7 -9290,7 +9389,7 @@@
   
         if (is_sampling_event(event)) {
                 if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
- -                      err = -ENOTSUPP;
+ +                      err = -EOPNOTSUPP;
                         goto err_alloc;
                 }
         }
@@@ -10351,13 -10252,10 +10351,13 @@@ static void __init perf_event_init_all_
                 swhash = &per_cpu(swevent_htable, cpu);
                 mutex_init(&swhash->hlist_mutex);
                 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
+ +
+ +              INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
+ +              raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
         }
   }
   
- static void perf_event_init_cpu(int cpu)
+ int perf_event_init_cpu(unsigned int cpu)
   {
         struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
   
@@@ -10370,6 -10268,7 +10370,7 @@@
                 rcu_assign_pointer(swhash->swevent_hlist, hlist);
         }
         mutex_unlock(&swhash->hlist_mutex);
+       return 0;
   }
   
   #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@@ -10401,14 -10300,17 +10402,17 @@@ static void perf_event_exit_cpu_context
         }
         srcu_read_unlock(&pmus_srcu, idx);
   }
+ #else
+ 
+ static void perf_event_exit_cpu_context(int cpu) { }
+ 
+ #endif
   
- static void perf_event_exit_cpu(int cpu)
+ int perf_event_exit_cpu(unsigned int cpu)
   {
         perf_event_exit_cpu_context(cpu);
+       return 0;
   }
- #else
- static inline void perf_event_exit_cpu(int cpu) { }
- #endif
   
   static int
   perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
@@@ -10430,46 -10332,6 +10434,6 @@@ static struct notifier_block perf_reboo
         .priority = INT_MIN,
   };
   
- static int
- perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (long)hcpu;
- 
-       switch (action & ~CPU_TASKS_FROZEN) {
- 
-       case CPU_UP_PREPARE:
-               /*
-                * This must be done before the CPU comes alive, because the
-                * moment we can run tasks we can encounter (software) events.
-                *
-                * Specifically, someone can have inherited events on kthreadd
-                * or a pre-existing worker thread that gets re-bound.
-                */
-               perf_event_init_cpu(cpu);
-               break;
- 
-       case CPU_DOWN_PREPARE:
-               /*
-                * This must be done before the CPU dies because after that an
-                * active event might want to IPI the CPU and that'll not work
-                * so great for dead CPUs.
-                *
-                * XXX smp_call_function_single() return -ENXIO without a warn
-                * so we could possibly deal with this.
-                *
-                * This is safe against new events arriving because
-                * sys_perf_event_open() serializes against hotplug using
-                * get_online_cpus().
-                */
-               perf_event_exit_cpu(cpu);
-               break;
-       default:
-               break;
-       }
- 
-       return NOTIFY_OK;
- }
- 
   void __init perf_event_init(void)
   {
         int ret;
@@@ -10482,7 -10344,7 +10446,7 @@@
         perf_pmu_register(&perf_cpu_clock, NULL, -1);
         perf_pmu_register(&perf_task_clock, NULL, -1);
         perf_tp_register();
-       perf_cpu_notifier(perf_cpu_notify);
+       perf_event_init_cpu(smp_processor_id());
         register_reboot_notifier(&perf_reboot_notifier);
   
         ret = init_hw_breakpoint();
diff --combined kernel/smp.c

index 36552beed39713526aa384a9cf9f1878630834fb,7180491c9678d775989e2090c15629fbb47af22a..3aa642d39c0370849372cab0c95f9a5d7760e794
--- 1/kernel/smp.c
--- 2/kernel/smp.c
+++ b/kernel/smp.c
@@@ -33,69 -33,54 +33,54 @@@ static DEFINE_PER_CPU_SHARED_ALIGNED(st
   
   static void flush_smp_call_function_queue(bool warn_cpu_offline);
   
- static int
- hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ int smpcfd_prepare_cpu(unsigned int cpu)
   {
-       long cpu = (long)hcpu;
         struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
   
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
-                               cpu_to_node(cpu)))
-                       return notifier_from_errno(-ENOMEM);
-               cfd->csd = alloc_percpu(struct call_single_data);
-               if (!cfd->csd) {
-                       free_cpumask_var(cfd->cpumask);
-                       return notifier_from_errno(-ENOMEM);
-               }
-               break;
- 
- #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               /* Fall-through to the CPU_DEAD[_FROZEN] case. */
- 
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
+       if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
+                                    cpu_to_node(cpu)))
+               return -ENOMEM;
+       cfd->csd = alloc_percpu(struct call_single_data);
+       if (!cfd->csd) {
                 free_cpumask_var(cfd->cpumask);
-               free_percpu(cfd->csd);
-               break;
- 
-       case CPU_DYING:
-       case CPU_DYING_FROZEN:
-               /*
-                * The IPIs for the smp-call-function callbacks queued by other
-                * CPUs might arrive late, either due to hardware latencies or
-                * because this CPU disabled interrupts (inside stop-machine)
-                * before the IPIs were sent. So flush out any pending callbacks
-                * explicitly (without waiting for the IPIs to arrive), to
-                * ensure that the outgoing CPU doesn't go offline with work
-                * still pending.
-                */
-               flush_smp_call_function_queue(false);
-               break;
- #endif
-       };
- 
-       return NOTIFY_OK;
+               return -ENOMEM;
+       }
+ 
+       return 0;
   }
   
- static struct notifier_block hotplug_cfd_notifier = {
-       .notifier_call          = hotplug_cfd,
- };
+ int smpcfd_dead_cpu(unsigned int cpu)
+ {
+       struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
+ 
+       free_cpumask_var(cfd->cpumask);
+       free_percpu(cfd->csd);
+       return 0;
+ }
+ 
+ int smpcfd_dying_cpu(unsigned int cpu)
+ {
+       /*
+        * The IPIs for the smp-call-function callbacks queued by other
+        * CPUs might arrive late, either due to hardware latencies or
+        * because this CPU disabled interrupts (inside stop-machine)
+        * before the IPIs were sent. So flush out any pending callbacks
+        * explicitly (without waiting for the IPIs to arrive), to
+        * ensure that the outgoing CPU doesn't go offline with work
+        * still pending.
+        */
+       flush_smp_call_function_queue(false);
+       return 0;
+ }
   
   void __init call_function_init(void)
   {
-       void *cpu = (void *)(long)smp_processor_id();
         int i;
   
         for_each_possible_cpu(i)
                 init_llist_head(&per_cpu(call_single_queue, i));
   
-       hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
-       register_cpu_notifier(&hotplug_cfd_notifier);
+       smpcfd_prepare_cpu(smp_processor_id());
   }
   
   /*
@@@ -107,7 -92,7 +92,7 @@@
    */
   static __always_inline void csd_lock_wait(struct call_single_data *csd)
   {
- -      smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
+ +      smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
   }
   
   static __always_inline void csd_lock(struct call_single_data *csd)
diff --combined kernel/workqueue.c

index d12bd958077e80a8fd36f1162c0cea2881692eaa,c9dd5fbdbf333a785218ed1fe61bcdb732a2189e..ef071ca73fc325e69adb599e7637358c49cd215b
--- 1/kernel/workqueue.c
--- 2/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@@ -4369,8 -4369,8 +4369,8 @@@ static void show_pwq(struct pool_workqu
   /**
    * show_workqueue_state - dump workqueue state
    *
- - * Called from a sysrq handler and prints out all busy workqueues and
- - * pools.
+ + * Called from a sysrq handler or try_to_freeze_tasks() and prints out
+ + * all busy workqueues and pools.
    */
   void show_workqueue_state(void)
   {
@@@ -4600,91 -4600,76 +4600,72 @@@ static void restore_unbound_workers_cpu
         if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
                 return;
   
- -      /* is @cpu the only online CPU? */
         cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
- -      if (cpumask_weight(&cpumask) != 1)
- -              return;
   
         /* as we're called from CPU_ONLINE, the following shouldn't fail */
         for_each_pool_worker(worker, pool)
- -              WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
- -                                                pool->attrs->cpumask) < 0);
+ +              WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
   }
   
- /*
-  * Workqueues should be brought up before normal priority CPU notifiers.
-  * This will be registered high priority CPU notifier.
-  */
- static int workqueue_cpu_up_callback(struct notifier_block *nfb,
-                                              unsigned long action,
-                                              void *hcpu)
+ int workqueue_prepare_cpu(unsigned int cpu)
+ {
+       struct worker_pool *pool;
+ 
+       for_each_cpu_worker_pool(pool, cpu) {
+               if (pool->nr_workers)
+                       continue;
+               if (!create_worker(pool))
+                       return -ENOMEM;
+       }
+       return 0;
+ }
+ 
+ int workqueue_online_cpu(unsigned int cpu)
   {
-       int cpu = (unsigned long)hcpu;
         struct worker_pool *pool;
         struct workqueue_struct *wq;
         int pi;
   
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               for_each_cpu_worker_pool(pool, cpu) {
-                       if (pool->nr_workers)
-                               continue;
-                       if (!create_worker(pool))
-                               return NOTIFY_BAD;
-               }
-               break;
- 
-       case CPU_DOWN_FAILED:
-       case CPU_ONLINE:
-               mutex_lock(&wq_pool_mutex);
+       mutex_lock(&wq_pool_mutex);
   
-               for_each_pool(pool, pi) {
-                       mutex_lock(&pool->attach_mutex);
+       for_each_pool(pool, pi) {
+               mutex_lock(&pool->attach_mutex);
   
-                       if (pool->cpu == cpu)
-                               rebind_workers(pool);
-                       else if (pool->cpu < 0)
-                               restore_unbound_workers_cpumask(pool, cpu);
+               if (pool->cpu == cpu)
+                       rebind_workers(pool);
+               else if (pool->cpu < 0)
+                       restore_unbound_workers_cpumask(pool, cpu);
   
-                       mutex_unlock(&pool->attach_mutex);
-               }
+               mutex_unlock(&pool->attach_mutex);
+       }
   
-               /* update NUMA affinity of unbound workqueues */
-               list_for_each_entry(wq, &workqueues, list)
-                       wq_update_unbound_numa(wq, cpu, true);
+       /* update NUMA affinity of unbound workqueues */
+       list_for_each_entry(wq, &workqueues, list)
+               wq_update_unbound_numa(wq, cpu, true);
   
-               mutex_unlock(&wq_pool_mutex);
-               break;
-       }
-       return NOTIFY_OK;
+       mutex_unlock(&wq_pool_mutex);
+       return 0;
   }
   
- /*
-  * Workqueues should be brought down after normal priority CPU notifiers.
-  * This will be registered as low priority CPU notifier.
-  */
- static int workqueue_cpu_down_callback(struct notifier_block *nfb,
-                                                unsigned long action,
-                                                void *hcpu)
+ int workqueue_offline_cpu(unsigned int cpu)
   {
-       int cpu = (unsigned long)hcpu;
         struct work_struct unbind_work;
         struct workqueue_struct *wq;
   
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_DOWN_PREPARE:
-               /* unbinding per-cpu workers should happen on the local CPU */
-               INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
-               queue_work_on(cpu, system_highpri_wq, &unbind_work);
- 
-               /* update NUMA affinity of unbound workqueues */
-               mutex_lock(&wq_pool_mutex);
-               list_for_each_entry(wq, &workqueues, list)
-                       wq_update_unbound_numa(wq, cpu, false);
-               mutex_unlock(&wq_pool_mutex);
- 
-               /* wait for per-cpu unbinding to finish */
-               flush_work(&unbind_work);
-               destroy_work_on_stack(&unbind_work);
-               break;
-       }
-       return NOTIFY_OK;
+       /* unbinding per-cpu workers should happen on the local CPU */
+       INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
+       queue_work_on(cpu, system_highpri_wq, &unbind_work);
+ 
+       /* update NUMA affinity of unbound workqueues */
+       mutex_lock(&wq_pool_mutex);
+       list_for_each_entry(wq, &workqueues, list)
+               wq_update_unbound_numa(wq, cpu, false);
+       mutex_unlock(&wq_pool_mutex);
+ 
+       /* wait for per-cpu unbinding to finish */
+       flush_work(&unbind_work);
+       destroy_work_on_stack(&unbind_work);
+       return 0;
   }
   
   #ifdef CONFIG_SMP
@@@ -5486,9 -5471,6 +5467,6 @@@ static int __init init_workqueues(void
   
         pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
   
-       cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
-       hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
- 
         wq_numa_init();
   
         /* initialize CPU pools */
diff --combined virt/kvm/kvm_main.c

index ce3d8e5be73e38f54d4b2fab2bdb9874abaccf0d,c1d6cf5a74a1609c104202a6f95eaf4055efec7c..2e791367c576c9b2fb7dc6ee30f49f61356d3a05
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -148,7 -148,6 +148,7 @@@ int vcpu_load(struct kvm_vcpu *vcpu
         put_cpu();
         return 0;
   }
+ +EXPORT_SYMBOL_GPL(vcpu_load);
   
   void vcpu_put(struct kvm_vcpu *vcpu)
   {
@@@ -158,7 -157,6 +158,7 @@@
         preempt_enable();
         mutex_unlock(&vcpu->mutex);
   }
+ +EXPORT_SYMBOL_GPL(vcpu_put);
   
   static void ack_flush(void *_completed)
   {
@@@ -3050,7 -3048,6 +3050,7 @@@ static int kvm_dev_ioctl_create_vm(unsi
   {
         int r;
         struct kvm *kvm;
+ +      struct file *file;
   
         kvm = kvm_create_vm(type);
         if (IS_ERR(kvm))
@@@ -3062,25 -3059,17 +3062,25 @@@
                 return r;
         }
   #endif
- -      r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
+ +      r = get_unused_fd_flags(O_CLOEXEC);
         if (r < 0) {
                 kvm_put_kvm(kvm);
                 return r;
         }
+ +      file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+ +      if (IS_ERR(file)) {
+ +              put_unused_fd(r);
+ +              kvm_put_kvm(kvm);
+ +              return PTR_ERR(file);
+ +      }
   
         if (kvm_create_vm_debugfs(kvm, r) < 0) {
- -              kvm_put_kvm(kvm);
+ +              put_unused_fd(r);
+ +              fput(file);
                 return -ENOMEM;
         }
   
+ +      fd_install(r, file);
         return r;
   }
   
@@@ -3155,12 -3144,13 +3155,13 @@@ static void hardware_enable_nolock(voi
         }
   }
   
- static void hardware_enable(void)
+ static int kvm_starting_cpu(unsigned int cpu)
   {
         raw_spin_lock(&kvm_count_lock);
         if (kvm_usage_count)
                 hardware_enable_nolock(NULL);
         raw_spin_unlock(&kvm_count_lock);
+       return 0;
   }
   
   static void hardware_disable_nolock(void *junk)
@@@ -3173,12 -3163,13 +3174,13 @@@
         kvm_arch_hardware_disable();
   }
   
- static void hardware_disable(void)
+ static int kvm_dying_cpu(unsigned int cpu)
   {
         raw_spin_lock(&kvm_count_lock);
         if (kvm_usage_count)
                 hardware_disable_nolock(NULL);
         raw_spin_unlock(&kvm_count_lock);
+       return 0;
   }
   
   static void hardware_disable_all_nolock(void)
@@@ -3219,21 -3210,6 +3221,6 @@@ static int hardware_enable_all(void
         return r;
   }
   
- static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
-                          void *v)
- {
-       val &= ~CPU_TASKS_FROZEN;
-       switch (val) {
-       case CPU_DYING:
-               hardware_disable();
-               break;
-       case CPU_STARTING:
-               hardware_enable();
-               break;
-       }
-       return NOTIFY_OK;
- }
- 
   static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
                       void *v)
   {
@@@ -3500,10 -3476,6 +3487,6 @@@ int kvm_io_bus_unregister_dev(struct kv
         return r;
   }
   
- static struct notifier_block kvm_cpu_notifier = {
-       .notifier_call = kvm_cpu_hotplug,
- };
- 
   static int kvm_debugfs_open(struct inode *inode, struct file *file,
                            int (*get)(void *, u64 *), int (*set)(void *, u64),
                            const char *fmt)
@@@ -3754,7 -3726,8 +3737,8 @@@ int kvm_init(void *opaque, unsigned vcp
                         goto out_free_1;
         }
   
-       r = register_cpu_notifier(&kvm_cpu_notifier);
+       r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING",
+                                     kvm_starting_cpu, kvm_dying_cpu);
         if (r)
                 goto out_free_2;
         register_reboot_notifier(&kvm_reboot_notifier);
@@@ -3808,7 -3781,7 +3792,7 @@@ out_free
         kmem_cache_destroy(kvm_vcpu_cache);
   out_free_3:
         unregister_reboot_notifier(&kvm_reboot_notifier);
-       unregister_cpu_notifier(&kvm_cpu_notifier);
+       cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
   out_free_2:
   out_free_1:
         kvm_arch_hardware_unsetup();
@@@ -3831,7 -3804,7 +3815,7 @@@ void kvm_exit(void
         kvm_async_pf_deinit();
         unregister_syscore_ops(&kvm_syscore_ops);
         unregister_reboot_notifier(&kvm_reboot_notifier);
-       unregister_cpu_notifier(&kvm_cpu_notifier);
+       cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
         on_each_cpu(hardware_disable_nolock, NULL, 1);
         kvm_arch_hardware_unsetup();
         kvm_arch_exit();
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 29 Jul 2016 20:55:30 +0000 (13:55 -0700)
		1	2
arch/arm/mach-mvebu/coherency.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/xen/enlighten.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kernel/armv8_deprecated.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/perf_cpum_sf.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/entry/vdso/vma.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/events/amd/ibs.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/events/intel/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/events/intel/cstate.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/events/intel/rapl.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/events/intel/uncore.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic/x2apic_cluster.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/acpi/processor_driver.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/irqchip/irq-armada-370-xp.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/irqchip/irq-bcm2836.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/irqchip/irq-gic.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/perf/arm_pmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/perf_event.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/events/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/workqueue.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history