.notifier_call = mvebu_hwcc_notifier,
};
- static int armada_xp_clear_shared_l2_notifier_func(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+ static int armada_xp_clear_l2_starting(unsigned int cpu)
{
- if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
- armada_xp_clear_shared_l2();
-
- return NOTIFY_OK;
+ armada_xp_clear_shared_l2();
+ return 0;
}
- static struct notifier_block armada_xp_clear_shared_l2_notifier = {
- .notifier_call = armada_xp_clear_shared_l2_notifier_func,
- .priority = 100,
- };
-
static void __init armada_370_coherency_init(struct device_node *np)
{
struct resource res;
of_node_put(cpu_config_np);
- register_cpu_notifier(&armada_xp_clear_shared_l2_notifier);
-
+ cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
+ "AP_ARM_MVEBU_COHERENCY",
+ armada_xp_clear_l2_starting, NULL);
exit:
set_cpu_coherent();
}
/*
- * This ioremap hook is used on Armada 375/38x to ensure that PCIe
- * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
- * is needed as a workaround for a deadlock issue between the PCIe
- * interface and the cache controller.
+ * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
+ * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
+ * needed for the HW I/O coherency mechanism to work properly without
+ * deadlock.
*/
static void __iomem *
-armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
- unsigned int mtype, void *caller)
+armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+ unsigned int mtype, void *caller)
{
- struct resource pcie_mem;
-
- mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
-
- if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
- mtype = MT_UNCACHED;
-
+ mtype = MT_UNCACHED;
return __arm_ioremap_caller(phys_addr, size, mtype, caller);
}
struct device_node *cache_dn;
coherency_cpu_base = of_iomap(np, 0);
- arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+ arch_ioremap_caller = armada_wa_ioremap_caller;
+ pci_ioremap_set_mem_type(MT_UNCACHED);
/*
* We should switch the PL310 to I/O coherency mode only if
#include <xen/page.h>
#include <xen/interface/sched.h>
#include <xen/xen-ops.h>
-#include <asm/paravirt.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#include <asm/xen/xen-ops.h>
#include <asm/system_misc.h>
+#include <asm/efi.h>
#include <linux/interrupt.h>
#include <linux/irqreturn.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/cpuidle.h>
#include <linux/time64.h>
#include <linux/timekeeping.h>
#include <linux/timekeeper_internal.h>
+#include <linux/acpi.h>
#include <linux/mm.h>
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
static struct vcpu_info __percpu *xen_vcpu_info;
+/* Linux <-> Xen vCPU id mapping */
+DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
+
/* These are unused until we support booting "pre-ballooned" */
unsigned long xen_released_pages;
struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
static __read_mostly unsigned int xen_events_irq;
-static __initdata struct device_node *xen_node;
-
int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *gfn, int nr,
}
EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
-static unsigned long long xen_stolen_accounting(int cpu)
-{
- struct vcpu_runstate_info state;
-
- BUG_ON(cpu != smp_processor_id());
-
- xen_get_runstate_snapshot(&state);
-
- WARN_ON(state.state != RUNSTATE_running);
-
- return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
-}
-
static void xen_read_wallclock(struct timespec64 *ts)
{
u32 version;
.notifier_call = xen_pvclock_gtod_notify,
};
- static void xen_percpu_init(void)
+ static int xen_starting_cpu(unsigned int cpu)
{
struct vcpu_register_vcpu_info info;
struct vcpu_info *vcpup;
int err;
- int cpu = get_cpu();
/*
* VCPUOP_register_vcpu_info cannot be called twice for the same
pr_info("Xen: initializing cpu%d\n", cpu);
vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
+ /* Direct vCPU id mapping for ARM guests. */
+ per_cpu(xen_vcpu_id, cpu) = cpu;
+
info.mfn = virt_to_gfn(vcpup);
info.offset = xen_offset_in_page(vcpup);
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+ err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+ &info);
BUG_ON(err);
per_cpu(xen_vcpu, cpu) = vcpup;
after_register_vcpu_info:
enable_percpu_irq(xen_events_irq, 0);
- put_cpu();
+ return 0;
+ }
+
+ static int xen_dying_cpu(unsigned int cpu)
+ {
+ disable_percpu_irq(xen_events_irq);
+ return 0;
}
static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
BUG_ON(rc);
}
- static int xen_cpu_notification(struct notifier_block *self,
- unsigned long action,
- void *hcpu)
- {
- switch (action) {
- case CPU_STARTING:
- xen_percpu_init();
- break;
- case CPU_DYING:
- disable_percpu_irq(xen_events_irq);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
- }
-
- static struct notifier_block xen_cpu_notifier = {
- .notifier_call = xen_cpu_notification,
- };
-
static irqreturn_t xen_arm_callback(int irq, void *arg)
{
xen_hvm_evtchn_do_upcall();
return IRQ_HANDLED;
}
+static __initdata struct {
+ const char *compat;
+ const char *prefix;
+ const char *version;
+ bool found;
+} hyper_node = {"xen,xen", "xen,xen-", NULL, false};
+
+static int __init fdt_find_hyper_node(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ const void *s = NULL;
+ int len;
+
+ if (depth != 1 || strcmp(uname, "hypervisor") != 0)
+ return 0;
+
+ if (of_flat_dt_is_compatible(node, hyper_node.compat))
+ hyper_node.found = true;
+
+ s = of_get_flat_dt_prop(node, "compatible", &len);
+ if (strlen(hyper_node.prefix) + 3 < len &&
+ !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix)))
+ hyper_node.version = s + strlen(hyper_node.prefix);
+
+ /*
+ * Check if Xen supports EFI by checking whether there is the
+ * "/hypervisor/uefi" node in DT. If so, runtime services are available
+ * through proxy functions (e.g. in case of Xen dom0 EFI implementation
+ * they call special hypercall which executes relevant EFI functions)
+ * and that is why they are always enabled.
+ */
+ if (IS_ENABLED(CONFIG_XEN_EFI)) {
+ if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) &&
+ !efi_runtime_disabled())
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ }
+
+ return 0;
+}
+
/*
* see Documentation/devicetree/bindings/arm/xen.txt for the
* documentation of the Xen Device Tree format.
#define GRANT_TABLE_PHYSADDR 0
void __init xen_early_init(void)
{
- int len;
- const char *s = NULL;
- const char *version = NULL;
- const char *xen_prefix = "xen,xen-";
-
- xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
- if (!xen_node) {
+ of_scan_flat_dt(fdt_find_hyper_node, NULL);
+ if (!hyper_node.found) {
pr_debug("No Xen support\n");
return;
}
- s = of_get_property(xen_node, "compatible", &len);
- if (strlen(xen_prefix) + 3 < len &&
- !strncmp(xen_prefix, s, strlen(xen_prefix)))
- version = s + strlen(xen_prefix);
- if (version == NULL) {
+
+ if (hyper_node.version == NULL) {
pr_debug("Xen version not found\n");
return;
}
- pr_info("Xen %s support found\n", version);
+ pr_info("Xen %s support found\n", hyper_node.version);
xen_domain_type = XEN_HVM_DOMAIN;
add_preferred_console("hvc", 0, NULL);
}
+static void __init xen_acpi_guest_init(void)
+{
+#ifdef CONFIG_ACPI
+ struct xen_hvm_param a;
+ int interrupt, trigger, polarity;
+
+ a.domid = DOMID_SELF;
+ a.index = HVM_PARAM_CALLBACK_IRQ;
+
+ if (HYPERVISOR_hvm_op(HVMOP_get_param, &a)
+ || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) {
+ xen_events_irq = 0;
+ return;
+ }
+
+ interrupt = a.value & 0xff;
+ trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE
+ : ACPI_LEVEL_SENSITIVE;
+ polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW
+ : ACPI_ACTIVE_HIGH;
+ xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity);
+#endif
+}
+
+static void __init xen_dt_guest_init(void)
+{
+ struct device_node *xen_node;
+
+ xen_node = of_find_compatible_node(NULL, NULL, "xen,xen");
+ if (!xen_node) {
+ pr_err("Xen support was detected before, but it has disappeared\n");
+ return;
+ }
+
+ xen_events_irq = irq_of_parse_and_map(xen_node, 0);
+}
+
static int __init xen_guest_init(void)
{
struct xen_add_to_physmap xatp;
struct shared_info *shared_info_page = NULL;
- struct resource res;
- phys_addr_t grant_frames;
if (!xen_domain())
return 0;
- if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) {
- pr_err("Xen grant table base address not found\n");
- return -ENODEV;
- }
- grant_frames = res.start;
+ if (!acpi_disabled)
+ xen_acpi_guest_init();
+ else
+ xen_dt_guest_init();
- xen_events_irq = irq_of_parse_and_map(xen_node, 0);
if (!xen_events_irq) {
pr_err("Xen event channel interrupt not found\n");
return -ENODEV;
}
+ /*
+ * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI
+ * parameters are found. Force enable runtime services.
+ */
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ xen_efi_runtime_setup();
+
shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL);
if (!shared_info_page) {
if (xen_vcpu_info == NULL)
return -ENOMEM;
- if (gnttab_setup_auto_xlat_frames(grant_frames)) {
+ /* Direct vCPU id mapping for ARM guests. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
+ xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
+ if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
+ &xen_auto_xlat_grant_frames.vaddr,
+ xen_auto_xlat_grant_frames.count)) {
free_percpu(xen_vcpu_info);
return -ENOMEM;
}
return -EINVAL;
}
- xen_percpu_init();
-
- register_cpu_notifier(&xen_cpu_notifier);
-
- pv_time_ops.steal_clock = xen_stolen_accounting;
- static_key_slow_inc(¶virt_steal_enabled);
+ xen_time_setup_guest();
+
if (xen_initial_domain())
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
- return 0;
+ return cpuhp_setup_state(CPUHP_AP_ARM_XEN_STARTING,
+ "AP_ARM_XEN_STARTING", xen_starting_cpu,
+ xen_dying_cpu);
}
early_initcall(xen_guest_init);
EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_multicall);
+EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist);
EXPORT_SYMBOL_GPL(privcmd_call);
* 0 - If all the hooks ran successfully.
* -EINVAL - At least one hook is not supported by the CPU.
*/
- static int run_all_insn_set_hw_mode(unsigned long cpu)
+ static int run_all_insn_set_hw_mode(unsigned int cpu)
{
int rc = 0;
unsigned long flags;
list_for_each_entry(insn, &insn_emulation, node) {
bool enable = (insn->current_mode == INSN_HW);
if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) {
- pr_warn("CPU[%ld] cannot support the emulation of %s",
+ pr_warn("CPU[%u] cannot support the emulation of %s",
cpu, insn->ops->name);
rc = -EINVAL;
}
*/
#define TYPE_SWPB (1 << 22)
-/*
- * Set up process info to signal segmentation fault - called on access error.
- */
-static void set_segfault(struct pt_regs *regs, unsigned long addr)
-{
- siginfo_t info;
-
- down_read(¤t->mm->mmap_sem);
- if (find_vma(current->mm, addr) == NULL)
- info.si_code = SEGV_MAPERR;
- else
- info.si_code = SEGV_ACCERR;
- up_read(¤t->mm->mmap_sem);
-
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- info.si_addr = (void *) instruction_pointer(regs);
-
- pr_debug("SWP{B} emulation: access caused memory abort!\n");
- arm64_notify_die("Illegal memory access", regs, &info, 0);
-}
-
static int emulate_swpX(unsigned int address, unsigned int *data,
unsigned int type)
{
return res;
}
+#define ARM_OPCODE_CONDITION_UNCOND 0xf
+
+static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
+{
+ u32 cc_bits = opcode >> 28;
+
+ if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) {
+ if ((*aarch32_opcode_cond_checks[cc_bits])(psr))
+ return ARM_OPCODE_CONDTEST_PASS;
+ else
+ return ARM_OPCODE_CONDTEST_FAIL;
+ }
+ return ARM_OPCODE_CONDTEST_UNCOND;
+}
+
/*
* swp_handler logs the id of calling process, dissects the instruction, sanity
* checks the memory location, calls emulate_swpX for the actual operation and
type = instr & TYPE_SWPB;
- switch (arm_check_condition(instr, regs->pstate)) {
+ switch (aarch32_check_condition(instr, regs->pstate)) {
case ARM_OPCODE_CONDTEST_PASS:
break;
case ARM_OPCODE_CONDTEST_FAIL:
return 0;
fault:
- set_segfault(regs, address);
+ pr_debug("SWP{B} emulation: access caused memory abort!\n");
+ arm64_notify_segfault(regs, address);
return 0;
}
{
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
- switch (arm_check_condition(instr, regs->pstate)) {
+ switch (aarch32_check_condition(instr, regs->pstate)) {
case ARM_OPCODE_CONDTEST_PASS:
break;
case ARM_OPCODE_CONDTEST_FAIL:
.set_hw_mode = setend_set_hw_mode,
};
- static int insn_cpu_hotplug_notify(struct notifier_block *b,
- unsigned long action, void *hcpu)
- {
- int rc = 0;
- if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING)
- rc = run_all_insn_set_hw_mode((unsigned long)hcpu);
-
- return notifier_from_errno(rc);
- }
-
- static struct notifier_block insn_cpu_hotplug_notifier = {
- .notifier_call = insn_cpu_hotplug_notify,
- };
-
/*
* Invoked as late_initcall, since not needed before init spawned.
*/
pr_info("setend instruction emulation is not supported on the system");
}
- register_cpu_notifier(&insn_cpu_hotplug_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING,
+ "AP_ARM64_ISNDEP_STARTING",
+ run_all_insn_set_hw_mode, NULL);
register_insn_emulation_sysctl(ctl_abi);
return 0;
irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
on_each_cpu(setup_pmc_cpu, &flags, 1);
- perf_release_sampling();
}
static int reserve_pmc_hardware(void)
{
int flags = PMC_INIT;
- int err;
- err = perf_reserve_sampling();
- if (err)
- return err;
on_each_cpu(setup_pmc_cpu, &flags, 1);
if (flags & PMC_FAILURE) {
release_pmc_hardware();
struct pt_regs regs;
struct perf_sf_sde_regs *sde_regs;
struct perf_sample_data data;
- struct perf_raw_record raw;
+ struct perf_raw_record raw = {
+ .frag = {
+ .size = sfr->size,
+ .data = sfr,
+ },
+ };
/* Setup perf sample */
perf_sample_data_init(&data, 0, event->hw.last_period);
- raw.size = sfr->size;
- raw.data = sfr;
data.raw = &raw;
/* Setup pt_regs to look like an CPU-measurement external interrupt
sf_disable();
}
}
-
- static int cpumf_pmu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+ static int cpusf_pmu_setup(unsigned int cpu, int flags)
{
- int flags;
-
/* Ignore the notification if no events are scheduled on the PMU.
* This might be racy...
*/
if (!atomic_read(&num_events))
- return NOTIFY_OK;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- flags = PMC_INIT;
- local_irq_disable();
- setup_pmc_cpu(&flags);
- local_irq_enable();
- break;
- case CPU_DOWN_PREPARE:
- flags = PMC_RELEASE;
- local_irq_disable();
- setup_pmc_cpu(&flags);
- local_irq_enable();
- break;
- default:
- break;
- }
+ return 0;
- return NOTIFY_OK;
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
+ return 0;
+ }
+
+ static int s390_pmu_sf_online_cpu(unsigned int cpu)
+ {
+ return cpusf_pmu_setup(cpu, PMC_INIT);
+ }
+
+ static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+ {
+ return cpusf_pmu_setup(cpu, PMC_RELEASE);
}
static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
cpumf_measurement_alert);
goto out;
}
- perf_cpu_notifier(cpumf_pmu_notifier);
+
+ cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "AP_PERF_S390_SF_ONLINE",
+ s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
out:
return err;
}
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
+#include <linux/ptrace.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
return 0;
}
-static const struct vm_special_mapping text_mapping = {
- .name = "[vdso]",
- .fault = vdso_fault,
-};
+static void vdso_fix_landing(const struct vdso_image *image,
+ struct vm_area_struct *new_vma)
+{
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+ if (in_ia32_syscall() && image == &vdso_image_32) {
+ struct pt_regs *regs = current_pt_regs();
+ unsigned long vdso_land = image->sym_int80_landing_pad;
+ unsigned long old_land_addr = vdso_land +
+ (unsigned long)current->mm->context.vdso;
+
+ /* Fixing userspace landing - look at do_fast_syscall_32 */
+ if (regs->ip == old_land_addr)
+ regs->ip = new_vma->vm_start + vdso_land;
+ }
+#endif
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ const struct vdso_image *image = current->mm->context.vdso_image;
+
+ if (image->size != new_size)
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
+ return -EFAULT;
+
+ vdso_fix_landing(image, new_vma);
+ current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+ return 0;
+}
static int vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
struct vm_area_struct *vma;
unsigned long addr, text_start;
int ret = 0;
+
+ static const struct vm_special_mapping vdso_mapping = {
+ .name = "[vdso]",
+ .fault = vdso_fault,
+ .mremap = vdso_mremap,
+ };
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
image->size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &text_mapping);
+ &vdso_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
- static int
- vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
+ static int vgetcpu_online(unsigned int cpu)
{
- long cpu = (long)arg;
-
- if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
-
- return NOTIFY_DONE;
+ return smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
}
static int __init init_vdso(void)
init_vdso_image(&vdso_image_x32);
#endif
- cpu_notifier_register_begin();
-
- on_each_cpu(vgetcpu_cpu_init, NULL, 1);
/* notifier priority > KVM */
- __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
-
- cpu_notifier_register_done();
-
- return 0;
+ return cpuhp_setup_state(CPUHP_AP_X86_VDSO_VMA_ONLINE,
+ "AP_X86_VDSO_VMA_ONLINE", vgetcpu_online, NULL);
}
subsys_initcall(init_vdso);
#endif /* CONFIG_X86_64 */
}
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
- raw.size = sizeof(u32) + ibs_data.size;
- raw.data = ibs_data.data;
+ raw = (struct perf_raw_record){
+ .frag = {
+ .size = sizeof(u32) + ibs_data.size,
+ .data = ibs_data.data,
+ },
+ };
data.raw = &raw;
}
return ret;
}
- static __init int perf_event_ibs_init(void)
+ static __init void perf_event_ibs_init(void)
{
struct attribute **attr = ibs_op_format_attrs;
- if (!ibs_caps)
- return -ENODEV; /* ibs not supported by the cpu */
-
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
if (ibs_caps & IBS_CAPS_OPCNT) {
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps);
-
- return 0;
}
#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
- static __init int perf_event_ibs_init(void) { return 0; }
+ static __init void perf_event_ibs_init(void) { }
#endif
return val & IBSCTL_LVT_OFFSET_MASK;
}
- static void setup_APIC_ibs(void *dummy)
+ static void setup_APIC_ibs(void)
{
int offset;
smp_processor_id());
}
- static void clear_APIC_ibs(void *dummy)
+ static void clear_APIC_ibs(void)
{
int offset;
setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
}
+ static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu)
+ {
+ setup_APIC_ibs();
+ return 0;
+ }
+
#ifdef CONFIG_PM
static int perf_ibs_suspend(void)
{
- clear_APIC_ibs(NULL);
+ clear_APIC_ibs();
return 0;
}
static void perf_ibs_resume(void)
{
ibs_eilvt_setup();
- setup_APIC_ibs(NULL);
+ setup_APIC_ibs();
}
static struct syscore_ops perf_ibs_syscore_ops = {
#endif
- static int
- perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+ static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu)
{
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_STARTING:
- setup_APIC_ibs(NULL);
- break;
- case CPU_DYING:
- clear_APIC_ibs(NULL);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
+ clear_APIC_ibs();
+ return 0;
}
static __init int amd_ibs_init(void)
{
u32 caps;
- int ret = -EINVAL;
caps = __get_ibs_caps();
if (!caps)
ibs_eilvt_setup();
if (!ibs_eilvt_valid())
- goto out;
+ return -EINVAL;
perf_ibs_pm_init();
- cpu_notifier_register_begin();
+
ibs_caps = caps;
/* make ibs_caps visible to other cpus: */
smp_mb();
- smp_call_function(setup_APIC_ibs, NULL, 1);
- __perf_cpu_notifier(perf_ibs_cpu_notifier);
- cpu_notifier_register_done();
+ /*
+ * x86_pmu_amd_ibs_starting_cpu will be called from core on
+ * all online cpus.
+ */
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
+ "AP_PERF_X86_AMD_IBS_STARTING",
+ x86_pmu_amd_ibs_starting_cpu,
+ x86_pmu_amd_ibs_dying_cpu);
- ret = perf_event_ibs_init();
- out:
- if (ret)
- pr_err("Failed to setup IBS, %d\n", ret);
- return ret;
+ perf_event_ibs_init();
+
+ return 0;
}
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
msr_fail:
pr_cont("Broken PMU hardware detected, using software events only.\n");
- pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+ printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
reg, val_new);
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
- static int
- x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+ static int x86_pmu_prepare_cpu(unsigned int cpu)
{
- unsigned int cpu = (long)hcpu;
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- int i, ret = NOTIFY_OK;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
- cpuc->kfree_on_online[i] = NULL;
- if (x86_pmu.cpu_prepare)
- ret = x86_pmu.cpu_prepare(cpu);
- break;
-
- case CPU_STARTING:
- if (x86_pmu.cpu_starting)
- x86_pmu.cpu_starting(cpu);
- break;
+ int i;
- case CPU_ONLINE:
- for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
- kfree(cpuc->kfree_on_online[i]);
- cpuc->kfree_on_online[i] = NULL;
- }
- break;
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++)
+ cpuc->kfree_on_online[i] = NULL;
+ if (x86_pmu.cpu_prepare)
+ return x86_pmu.cpu_prepare(cpu);
+ return 0;
+ }
- case CPU_DYING:
- if (x86_pmu.cpu_dying)
- x86_pmu.cpu_dying(cpu);
- break;
+ static int x86_pmu_dead_cpu(unsigned int cpu)
+ {
+ if (x86_pmu.cpu_dead)
+ x86_pmu.cpu_dead(cpu);
+ return 0;
+ }
- case CPU_UP_CANCELED:
- case CPU_DEAD:
- if (x86_pmu.cpu_dead)
- x86_pmu.cpu_dead(cpu);
- break;
+ static int x86_pmu_online_cpu(unsigned int cpu)
+ {
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int i;
- default:
- break;
+ for (i = 0 ; i < X86_PERF_KFREE_MAX; i++) {
+ kfree(cpuc->kfree_on_online[i]);
+ cpuc->kfree_on_online[i] = NULL;
}
+ return 0;
+ }
- return ret;
+ static int x86_pmu_starting_cpu(unsigned int cpu)
+ {
+ if (x86_pmu.cpu_starting)
+ x86_pmu.cpu_starting(cpu);
+ return 0;
+ }
+
+ static int x86_pmu_dying_cpu(unsigned int cpu)
+ {
+ if (x86_pmu.cpu_dying)
+ x86_pmu.cpu_dying(cpu);
+ return 0;
}
static void __init pmu_check_apic(void)
}
EXPORT_SYMBOL_GPL(events_sysfs_show);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_ht_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+ /*
+ * Report conditional events depending on Hyper-Threading.
+ *
+ * This is overly conservative as usually the HT special
+ * handling is not needed if the other CPU thread is idle.
+ *
+ * Note this does not (and cannot) handle the case when thread
+ * siblings are invisible, for example with virtualization
+ * if they are owned by some other guest. The user tool
+ * has to re-read when a thread sibling gets onlined later.
+ */
+ return sprintf(page, "%s",
+ topology_max_smt_threads() > 1 ?
+ pmu_attr->event_str_ht :
+ pmu_attr->event_str_noht);
+}
+
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
EVENT_ATTR(cache-references, CACHE_REFERENCES );
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
- perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
- perf_cpu_notifier(x86_pmu_notifier);
+ /*
+ * Install callbacks. Core will call them for each online
+ * cpu.
+ */
+ err = cpuhp_setup_state(CPUHP_PERF_X86_PREPARE, "PERF_X86_PREPARE",
+ x86_pmu_prepare_cpu, x86_pmu_dead_cpu);
+ if (err)
+ return err;
+
+ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_STARTING,
+ "AP_PERF_X86_STARTING", x86_pmu_starting_cpu,
+ x86_pmu_dying_cpu);
+ if (err)
+ goto out;
+
+ err = cpuhp_setup_state(CPUHP_AP_PERF_X86_ONLINE, "AP_PERF_X86_ONLINE",
+ x86_pmu_online_cpu, NULL);
+ if (err)
+ goto out1;
+
+ err = perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+ if (err)
+ goto out2;
return 0;
+
+ out2:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_ONLINE);
+ out1:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_STARTING);
+ out:
+ cpuhp_remove_state(CPUHP_PERF_X86_PREPARE);
+ return err;
}
early_initcall(init_hw_perf_events);
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
+#include <asm/intel-family.h>
#include <asm/apic.h>
#include "../perf_event.h"
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
};
static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7,
- MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x02b7,
- MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
EVENT_EXTRA_END
};
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
EVENT_PTR(mem_ld_nhm),
NULL,
};
-struct attribute *snb_events_attrs[] = {
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+ "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */
+ "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+ "event=0xe,umask=0x1"); /* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+ "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+ "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+ "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */
+ "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+ "4", "2");
+
+static struct attribute *snb_events_attrs[] = {
EVENT_PTR(mem_ld_snb),
EVENT_PTR(mem_st_snb),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL,
};
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
},
};
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+ "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+ "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+ "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+ EVENT_PTR(td_total_slots_slm),
+ EVENT_PTR(td_total_slots_scale_slm),
+ EVENT_PTR(td_fetch_bubbles_slm),
+ EVENT_PTR(td_fetch_bubbles_scale_slm),
+ EVENT_PTR(td_slots_issued_slm),
+ EVENT_PTR(td_slots_retired_slm),
+ NULL
+};
+
static struct extra_reg intel_slm_extra_regs[] __read_mostly =
{
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
cpuc->excl_thread_id = 0;
}
- return NOTIFY_OK;
+ return 0;
err_constraint_list:
kfree(cpuc->constraint_list);
cpuc->shared_regs = NULL;
err:
- return NOTIFY_BAD;
+ return -ENOMEM;
}
static void intel_pmu_cpu_starting(int cpu)
u32 rev = UINT_MAX; /* default to broken for unknown models */
switch (cpu_data(cpu).x86_model) {
- case 42: /* SNB */
+ case INTEL_FAM6_SANDYBRIDGE:
rev = 0x28;
break;
- case 45: /* SNB-EP */
+ case INTEL_FAM6_SANDYBRIDGE_X:
switch (cpu_data(cpu).x86_mask) {
case 6: rev = 0x618; break;
case 7: rev = 0x70c; break;
}
}
+static bool is_lbr_from(unsigned long msr)
+{
+ unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+
+ return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+}
+
/*
* Under certain circumstances, access certain MSR may cause #GP.
* The function tests if the input MSR can be safely accessed.
* Only change the bits which can be updated by wrmsrl.
*/
val_tmp = val_old ^ mask;
+
+ if (is_lbr_from(msr))
+ val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+
if (wrmsrl_safe(msr, val_tmp) ||
rdmsrl_safe(msr, &val_new))
return false;
+ /*
+ * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
+ * should equal rdmsrl()'s even with the quirk.
+ */
if (val_new != val_tmp)
return false;
+ if (is_lbr_from(msr))
+ val_old = lbr_from_signext_quirk_wr(val_old);
+
/* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return.
*/
EVENT_PTR(cycles_ct),
EVENT_PTR(mem_ld_hsw),
EVENT_PTR(mem_st_hsw),
+ EVENT_PTR(td_slots_issued),
+ EVENT_PTR(td_slots_retired),
+ EVENT_PTR(td_fetch_bubbles),
+ EVENT_PTR(td_total_slots),
+ EVENT_PTR(td_total_slots_scale),
+ EVENT_PTR(td_recovery_bubbles),
+ EVENT_PTR(td_recovery_bubbles_scale),
NULL
};
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_model) {
- case 14: /* 65nm Core "Yonah" */
+ case INTEL_FAM6_CORE_YONAH:
pr_cont("Core events, ");
break;
- case 15: /* 65nm Core2 "Merom" */
+ case INTEL_FAM6_CORE2_MEROM:
x86_add_quirk(intel_clovertown_quirk);
- case 22: /* 65nm Core2 "Merom-L" */
- case 23: /* 45nm Core2 "Penryn" */
- case 29: /* 45nm Core2 "Dunnington (MP) */
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
pr_cont("Core2 events, ");
break;
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
pr_cont("Nehalem events, ");
break;
- case 28: /* 45nm Atom "Pineview" */
- case 38: /* 45nm Atom "Lincroft" */
- case 39: /* 32nm Atom "Penwell" */
- case 53: /* 32nm Atom "Cloverview" */
- case 54: /* 32nm Atom "Cedarview" */
+ case INTEL_FAM6_ATOM_PINEVIEW:
+ case INTEL_FAM6_ATOM_LINCROFT:
+ case INTEL_FAM6_ATOM_PENWELL:
+ case INTEL_FAM6_ATOM_CLOVERVIEW:
+ case INTEL_FAM6_ATOM_CEDARVIEW:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
pr_cont("Atom events, ");
break;
- case 55: /* 22nm Atom "Silvermont" */
- case 76: /* 14nm Atom "Airmont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+ case INTEL_FAM6_ATOM_SILVERMONT1:
+ case INTEL_FAM6_ATOM_SILVERMONT2:
+ case INTEL_FAM6_ATOM_AIRMONT:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.cpu_events = slm_events_attrs;
pr_cont("Silvermont events, ");
break;
- case 92: /* 14nm Atom "Goldmont" */
- case 95: /* 14nm Atom "Goldmont Denverton" */
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_DENVERTON:
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
pr_cont("Goldmont events, ");
break;
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
pr_cont("Westmere events, ");
break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
x86_add_quirk(intel_sandybridge_quirk);
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- if (boot_cpu_data.x86_model == 45)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
pr_cont("SandyBridge events, ");
break;
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
x86_add_quirk(intel_ht_bug);
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
x86_pmu.pebs_prec_dist = true;
- if (boot_cpu_data.x86_model == 62)
+ if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
x86_pmu.extra_regs = intel_snbep_extra_regs;
else
x86_pmu.extra_regs = intel_snb_extra_regs;
break;
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+ case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_ULT:
+ case INTEL_FAM6_HASWELL_GT3E:
x86_add_quirk(intel_ht_bug);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
pr_cont("Haswell events, ");
break;
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
+ case INTEL_FAM6_BROADWELL_CORE:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL_X:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
pr_cont("Broadwell events, ");
break;
- case 87: /* Knights Landing Xeon Phi */
+ case INTEL_FAM6_XEON_PHI_KNL:
memcpy(hw_cache_event_ids,
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
pr_cont("Knights Landing events, ");
break;
- case 142: /* 14nm Kabylake Mobile */
- case 158: /* 14nm Kabylake Desktop */
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- case 85: /* 14nm Skylake Server */
+ case INTEL_FAM6_SKYLAKE_MOBILE:
+ case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_KABYLAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_DESKTOP:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_skl();
+ /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+ event_attr_td_recovery_bubbles.event_str_noht =
+ "event=0xd,umask=0x1,cmask=1";
+ event_attr_td_recovery_bubbles.event_str_ht =
+ "event=0xd,umask=0x1,cmask=1,any=1";
+
x86_pmu.event_constraints = intel_skl_event_constraints;
x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
x86_pmu.extra_regs = intel_skl_extra_regs;
x86_pmu.lbr_nr = 0;
}
+ if (x86_pmu.lbr_nr)
+ pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
/*
* Access extra MSR may cause #GP under certain circumstances.
* E.g. KVM doesn't support offcore event
*/
static __init int fixup_ht_bug(void)
{
- int cpu = smp_processor_id();
- int w, c;
+ int c;
/*
* problem not present on this CPU model, nothing to do
*/
if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
return 0;
- w = cpumask_weight(topology_sibling_cpumask(cpu));
- if (w > 1) {
+ if (topology_max_smt_threads() > 1) {
pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
return 0;
}
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
* Check if exiting cpu is the designated reader. If so migrate the
* events when there is a valid target available
*/
- static void cstate_cpu_exit(int cpu)
+ static int cstate_cpu_exit(unsigned int cpu)
{
unsigned int target;
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
}
}
+ return 0;
}
- static void cstate_cpu_init(int cpu)
+ static int cstate_cpu_init(unsigned int cpu)
{
unsigned int target;
topology_core_cpumask(cpu));
if (has_cstate_pkg && target >= nr_cpu_ids)
cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
- }
- static int cstate_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
- {
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_STARTING:
- cstate_cpu_init(cpu);
- break;
- case CPU_DOWN_PREPARE:
- cstate_cpu_exit(cpu);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
+ return 0;
}
- static struct notifier_block cstate_cpu_nb = {
- .notifier_call = cstate_cpu_notifier,
- .priority = CPU_PRI_PERF + 1,
- };
-
static struct pmu cstate_core_pmu = {
.attr_groups = core_attr_groups,
.name = "cstate_core",
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
static const struct x86_cpu_id intel_cstates_match[] __initconst = {
- X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
- X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
- X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
- X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
- X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
- X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
- X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
- X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
- X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
- X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
- X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
- X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
- X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
- X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
- X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
- X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
- X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
- X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
- X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
- X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
- X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
static int __init cstate_init(void)
{
- int cpu, err;
+ int err;
- cpu_notifier_register_begin();
- for_each_online_cpu(cpu)
- cstate_cpu_init(cpu);
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
+ "AP_PERF_X86_CSTATE_STARTING", cstate_cpu_init,
+ NULL);
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
+ "AP_PERF_X86_CSTATE_ONLINE", NULL, cstate_cpu_exit);
if (has_cstate_core) {
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
if (err) {
has_cstate_core = false;
pr_info("Failed to register cstate core pmu\n");
- goto out;
+ return err;
}
}
has_cstate_pkg = false;
pr_info("Failed to register cstate pkg pmu\n");
cstate_cleanup();
- goto out;
+ return err;
}
}
- __register_cpu_notifier(&cstate_cpu_nb);
- out:
- cpu_notifier_register_done();
+
return err;
}
static void __exit cstate_pmu_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&cstate_cpu_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
cstate_cleanup();
- cpu_notifier_register_done();
}
module_exit(cstate_pmu_exit);
#include <linux/slab.h>
#include <linux/perf_event.h>
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
MODULE_LICENSE("GPL");
NULL,
};
- static void rapl_cpu_exit(int cpu)
+ static int rapl_cpu_offline(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
/* Check if exiting cpu is used for collecting rapl events */
if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
- return;
+ return 0;
pmu->cpu = -1;
/* Find a new cpu to collect rapl events */
pmu->cpu = target;
perf_pmu_migrate_context(pmu->pmu, cpu, target);
}
+ return 0;
}
- static void rapl_cpu_init(int cpu)
+ static int rapl_cpu_online(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
*/
target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu));
if (target < nr_cpu_ids)
- return;
+ return 0;
cpumask_set_cpu(cpu, &rapl_cpu_mask);
pmu->cpu = cpu;
+ return 0;
}
- static int rapl_cpu_prepare(int cpu)
+ static int rapl_cpu_prepare(unsigned int cpu)
{
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
return 0;
}
- static int rapl_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
- {
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- rapl_cpu_prepare(cpu);
- break;
-
- case CPU_DOWN_FAILED:
- case CPU_ONLINE:
- rapl_cpu_init(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- rapl_cpu_exit(cpu);
- break;
- }
- return NOTIFY_OK;
- }
-
- static struct notifier_block rapl_cpu_nb = {
- .notifier_call = rapl_cpu_notifier,
- .priority = CPU_PRI_PERF + 1,
- };
-
static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
}
}
- static int __init rapl_prepare_cpus(void)
- {
- unsigned int cpu, pkg;
- int ret;
-
- for_each_online_cpu(cpu) {
- pkg = topology_logical_package_id(cpu);
- if (rapl_pmus->pmus[pkg])
- continue;
-
- ret = rapl_cpu_prepare(cpu);
- if (ret)
- return ret;
- rapl_cpu_init(cpu);
- }
- return 0;
- }
-
static void cleanup_rapl_pmus(void)
{
int i;
};
static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
- X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
- X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
- X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
- X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
- X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
- X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
- X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
- X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
- X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
- X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
- X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
- X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init),
{},
};
if (ret)
return ret;
- cpu_notifier_register_begin();
+ /*
+ * Install callbacks. Core will call them for each online cpu.
+ */
- ret = rapl_prepare_cpus();
+ ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP",
+ rapl_cpu_prepare, NULL);
if (ret)
goto out;
+ ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
+ "AP_PERF_X86_RAPL_ONLINE",
+ rapl_cpu_online, rapl_cpu_offline);
+ if (ret)
+ goto out1;
+
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
if (ret)
- goto out;
+ goto out2;
- __register_cpu_notifier(&rapl_cpu_nb);
- cpu_notifier_register_done();
rapl_advertise();
return 0;
+ out2:
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ out1:
+ cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus();
- cpu_notifier_register_done();
return ret;
}
module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&rapl_cpu_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
perf_pmu_unregister(&rapl_pmus->pmu);
cleanup_rapl_pmus();
- cpu_notifier_register_done();
}
module_exit(intel_rapl_exit);
#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_type *type;
- struct intel_uncore_pmu *pmu;
+ struct intel_uncore_pmu *pmu = NULL;
struct intel_uncore_box *box;
int phys_id, pkg, ret;
}
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
/*
- * for performance monitoring unit with multiple boxes,
- * each box has a different function id.
- */
- pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
- /* Knights Landing uses a common PCI device ID for multiple instances of
- * an uncore PMU device type. There is only one entry per device type in
- * the knl_uncore_pci_ids table inspite of multiple devices present for
- * some device types. Hence PCI device idx would be 0 for all devices.
- * So increment pmu pointer to point to an unused array element.
+ * Some platforms, e.g. Knights Landing, use a common PCI device ID
+ * for multiple instances of an uncore PMU device type. We should check
+ * PCI slot and func to indicate the uncore box.
*/
- if (boot_cpu_data.x86_model == 87) {
- while (pmu->func_id >= 0)
- pmu++;
+ if (id->driver_data & ~0xffff) {
+ struct pci_driver *pci_drv = pdev->driver;
+ const struct pci_device_id *ids = pci_drv->id_table;
+ unsigned int devfn;
+
+ while (ids && ids->vendor) {
+ if ((ids->vendor == pdev->vendor) &&
+ (ids->device == pdev->device)) {
+ devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+ UNCORE_PCI_DEV_FUNC(ids->driver_data));
+ if (devfn == pdev->devfn) {
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+ break;
+ }
+ }
+ ids++;
+ }
+ if (pmu == NULL)
+ return -ENODEV;
+ } else {
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
}
if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
static void uncore_pci_remove(struct pci_dev *pdev)
{
- struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ struct intel_uncore_box *box;
struct intel_uncore_pmu *pmu;
int i, phys_id, pkg;
}
}
- static void uncore_cpu_dying(int cpu)
+ static int uncore_cpu_dying(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
uncore_box_exit(box);
}
}
+ return 0;
}
- static void uncore_cpu_starting(int cpu, bool init)
+ static int first_init;
+
+ static int uncore_cpu_starting(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
int i, pkg, ncpus = 1;
- if (init) {
+ if (first_init) {
/*
* On init we get the number of online cpus in the package
* and set refcount for all of them.
uncore_box_init(box);
}
}
+
+ return 0;
}
- static int uncore_cpu_prepare(int cpu)
+ static int uncore_cpu_prepare(unsigned int cpu)
{
struct intel_uncore_type *type, **types = uncore_msr_uncores;
struct intel_uncore_pmu *pmu;
uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
}
- static void uncore_event_exit_cpu(int cpu)
+ static int uncore_event_cpu_offline(unsigned int cpu)
{
int target;
/* Check if exiting cpu is used for collecting uncore events */
if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
- return;
+ return 0;
/* Find a new cpu to collect uncore events */
target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
uncore_change_context(uncore_msr_uncores, cpu, target);
uncore_change_context(uncore_pci_uncores, cpu, target);
+ return 0;
}
- static void uncore_event_init_cpu(int cpu)
+ static int uncore_event_cpu_online(unsigned int cpu)
{
int target;
*/
target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
if (target < nr_cpu_ids)
- return;
+ return 0;
cpumask_set_cpu(cpu, &uncore_cpu_mask);
uncore_change_context(uncore_msr_uncores, -1, cpu);
uncore_change_context(uncore_pci_uncores, -1, cpu);
+ return 0;
}
- static int uncore_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
- {
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- return notifier_from_errno(uncore_cpu_prepare(cpu));
-
- case CPU_STARTING:
- uncore_cpu_starting(cpu, false);
- case CPU_DOWN_FAILED:
- uncore_event_init_cpu(cpu);
- break;
-
- case CPU_UP_CANCELED:
- case CPU_DYING:
- uncore_cpu_dying(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- uncore_event_exit_cpu(cpu);
- break;
- }
- return NOTIFY_OK;
- }
-
- static struct notifier_block uncore_cpu_nb = {
- .notifier_call = uncore_cpu_notifier,
- /*
- * to migrate uncore events, our notifier should be executed
- * before perf core's notifier.
- */
- .priority = CPU_PRI_PERF + 1,
- };
-
static int __init type_pmu_register(struct intel_uncore_type *type)
{
int i, ret;
return ret;
}
- static void __init uncore_cpu_setup(void *dummy)
- {
- uncore_cpu_starting(smp_processor_id(), true);
- }
-
- /* Lazy to avoid allocation of a few bytes for the normal case */
- static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
-
- static int __init uncore_cpumask_init(bool msr)
- {
- unsigned int cpu;
-
- for_each_online_cpu(cpu) {
- unsigned int pkg = topology_logical_package_id(cpu);
- int ret;
-
- if (test_and_set_bit(pkg, packages))
- continue;
- /*
- * The first online cpu of each package allocates and takes
- * the refcounts for all other online cpus in that package.
- * If msrs are not enabled no allocation is required.
- */
- if (msr) {
- ret = uncore_cpu_prepare(cpu);
- if (ret)
- return ret;
- }
- uncore_event_init_cpu(cpu);
- smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
- }
- __register_cpu_notifier(&uncore_cpu_nb);
- return 0;
- }
-
#define X86_UNCORE_MODEL_MATCH(model, init) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
};
static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ .cpu_init = skl_uncore_cpu_init,
.pci_init = skl_uncore_pci_init,
};
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
- X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
- X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
- X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
- X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
- X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
- X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
- X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
- X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
- X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
- X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
- X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
- X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
- X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
- X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
- X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
- X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
{},
};
if (cret && pret)
return -ENODEV;
- cpu_notifier_register_begin();
- ret = uncore_cpumask_init(!cret);
- if (ret)
- goto err;
- cpu_notifier_register_done();
+ /*
+ * Install callbacks. Core will call them for each online cpu.
+ *
+ * The first online cpu of each package allocates and takes
+ * the refcounts for all other online cpus in that package.
+ * If msrs are not enabled no allocation is required and
+ * uncore_cpu_prepare() is not called for each online cpu.
+ */
+ if (!cret) {
+ ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP,
+ "PERF_X86_UNCORE_PREP",
+ uncore_cpu_prepare, NULL);
+ if (ret)
+ goto err;
+ } else {
+ cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP,
+ "PERF_X86_UNCORE_PREP",
+ uncore_cpu_prepare, NULL);
+ }
+ first_init = 1;
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING,
+ "AP_PERF_X86_UNCORE_STARTING",
+ uncore_cpu_starting, uncore_cpu_dying);
+ first_init = 0;
+ cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
+ "AP_PERF_X86_UNCORE_ONLINE",
+ uncore_event_cpu_online, uncore_event_cpu_offline);
return 0;
err:
on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
- cpu_notifier_register_done();
return ret;
}
module_init(intel_uncore_init);
static void __exit intel_uncore_exit(void)
{
- cpu_notifier_register_begin();
- __unregister_cpu_notifier(&uncore_cpu_nb);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
+ cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING);
+ cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP);
uncore_types_exit(uncore_msr_uncores);
uncore_pci_exit();
- cpu_notifier_register_done();
}
module_exit(intel_uncore_exit);
}
}
- /*
- * At CPU state changes, update the x2apic cluster sibling info.
- */
- static int
- update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ /*
+ * At CPU state changes, update the x2apic cluster sibling info.
+ */
+ int x2apic_prepare_cpu(unsigned int cpu)
{
- unsigned int this_cpu = (unsigned long)hcpu;
- unsigned int cpu;
- int err = 0;
-
- switch (action) {
- case CPU_UP_PREPARE:
- if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
- GFP_KERNEL)) {
- err = -ENOMEM;
- } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
- GFP_KERNEL)) {
- free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
- err = -ENOMEM;
- }
- break;
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- for_each_online_cpu(cpu) {
- if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
- continue;
- cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
- cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
- }
- free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
- free_cpumask_var(per_cpu(ipi_mask, this_cpu));
- break;
+ if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
+ free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
+ return -ENOMEM;
}
- return notifier_from_errno(err);
+ return 0;
}
- static struct notifier_block x2apic_cpu_notifier = {
- .notifier_call = update_clusterinfo,
- };
-
- static int x2apic_init_cpu_notifier(void)
+ int x2apic_dead_cpu(unsigned int this_cpu)
{
- int cpu = smp_processor_id();
-
- zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
+ int cpu;
- BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
-
- cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
- register_hotcpu_notifier(&x2apic_cpu_notifier);
- return 1;
+ for_each_online_cpu(cpu) {
+ if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+ continue;
+ cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+ cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+ }
+ free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+ free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+ return 0;
}
static int x2apic_cluster_probe(void)
{
- if (x2apic_mode)
- return x2apic_init_cpu_notifier();
- else
+ int cpu = smp_processor_id();
+
+ if (!x2apic_mode)
return 0;
+
+ cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+ cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+ x2apic_prepare_cpu, x2apic_dead_cpu);
+ return 1;
}
static const struct cpumask *x2apic_cluster_target_cpus(void)
.get_apic_id = x2apic_get_apic_id,
.set_apic_id = x2apic_set_apic_id,
- .apic_id_mask = 0xFFFFFFFFu,
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
#include <linux/irqbypass.h>
#include <trace/events/kvm.h>
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/desc.h>
#include <asm/div64.h>
#include <asm/irq_remapping.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
- static void tsc_bad(void *info)
+ static int kvmclock_cpu_down_prep(unsigned int cpu)
{
__this_cpu_write(cpu_tsc_khz, 0);
+ return 0;
}
static void tsc_khz_changed(void *data)
.notifier_call = kvmclock_cpufreq_notifier
};
- static int kvmclock_cpu_notifier(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+ static int kvmclock_cpu_online(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_DOWN_FAILED:
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
- break;
- case CPU_DOWN_PREPARE:
- smp_call_function_single(cpu, tsc_bad, NULL, 1);
- break;
- }
- return NOTIFY_OK;
+ tsc_khz_changed(NULL);
+ return 0;
}
- static struct notifier_block kvmclock_cpu_notifier_block = {
- .notifier_call = kvmclock_cpu_notifier,
- .priority = -INT_MAX
- };
-
static void kvm_timer_init(void)
{
int cpu;
max_tsc_khz = tsc_khz;
- cpu_notifier_register_begin();
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
#ifdef CONFIG_CPU_FREQ
struct cpufreq_policy policy;
CPUFREQ_TRANSITION_NOTIFIER);
}
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
- for_each_online_cpu(cpu)
- smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
-
- __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
- cpu_notifier_register_done();
+ cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
+ kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
- unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+ cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
#endif
pr->performance_platform_limit);
break;
case ACPI_PROCESSOR_NOTIFY_POWER:
- acpi_processor_cst_has_changed(pr);
+ acpi_processor_power_state_has_changed(pr);
acpi_bus_generate_netlink_event(device->pnp.device_class,
dev_name(&device->dev), event, 0);
break;
struct acpi_device *device;
action &= ~CPU_TASKS_FROZEN;
- /*
- * CPU_STARTING and CPU_DYING must not sleep. Return here since
- * acpi_bus_get_device() may sleep.
- */
- if (action == CPU_STARTING || action == CPU_DYING)
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_DEAD:
+ break;
+ default:
return NOTIFY_DONE;
+ }
if (!pr || acpi_bus_get_device(pr->handle, &device))
return NOTIFY_DONE;
ARMADA_370_XP_SW_TRIG_INT_OFFS);
}
- static int armada_xp_mpic_secondary_init(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+ static int armada_xp_mpic_starting_cpu(unsigned int cpu)
{
- if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
- armada_xp_mpic_perf_init();
- armada_xp_mpic_smp_cpu_init();
- }
-
- return NOTIFY_OK;
+ armada_xp_mpic_perf_init();
+ armada_xp_mpic_smp_cpu_init();
+ return 0;
}
- static struct notifier_block armada_370_xp_mpic_cpu_notifier = {
- .notifier_call = armada_xp_mpic_secondary_init,
- .priority = 100,
- };
-
- static int mpic_cascaded_secondary_init(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+ static int mpic_cascaded_starting_cpu(unsigned int cpu)
{
- if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) {
- armada_xp_mpic_perf_init();
- enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
- }
-
- return NOTIFY_OK;
+ armada_xp_mpic_perf_init();
+ enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
+ return 0;
}
-
- static struct notifier_block mpic_cascaded_cpu_notifier = {
- .notifier_call = mpic_cascaded_secondary_init,
- .priority = 100,
- };
- #endif /* CONFIG_SMP */
+ #endif
static const struct irq_domain_ops armada_370_xp_mpic_irq_ops = {
.map = armada_370_xp_mpic_irq_map,
writel(1, per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
}
-struct syscore_ops armada_370_xp_mpic_syscore_ops = {
+static struct syscore_ops armada_370_xp_mpic_syscore_ops = {
.suspend = armada_370_xp_mpic_suspend,
.resume = armada_370_xp_mpic_resume,
};
set_handle_irq(armada_370_xp_handle_irq);
#ifdef CONFIG_SMP
set_smp_cross_call(armada_mpic_send_doorbell);
- register_cpu_notifier(&armada_370_xp_mpic_cpu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_XP_STARTING,
+ "AP_IRQ_ARMADA_XP_STARTING",
+ armada_xp_mpic_starting_cpu, NULL);
#endif
} else {
#ifdef CONFIG_SMP
- register_cpu_notifier(&mpic_cascaded_cpu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_ARMADA_CASC_STARTING,
+ "AP_IRQ_ARMADA_CASC_STARTING",
+ mpic_cascaded_starting_cpu, NULL);
#endif
irq_set_chained_handler(parent_irq,
armada_370_xp_mpic_handle_cascade_irq);
} else if (stat) {
u32 hwirq = ffs(stat) - 1;
- handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs);
+ handle_domain_irq(intc.domain, hwirq, regs);
}
}
}
}
- /* Unmasks the IPI on the CPU when it's online. */
- static int bcm2836_arm_irqchip_cpu_notify(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+ static int bcm2836_cpu_starting(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
- unsigned int int_reg = LOCAL_MAILBOX_INT_CONTROL0;
- unsigned int mailbox = 0;
-
- if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
- bcm2836_arm_irqchip_unmask_per_cpu_irq(int_reg, mailbox, cpu);
- else if (action == CPU_DYING)
- bcm2836_arm_irqchip_mask_per_cpu_irq(int_reg, mailbox, cpu);
-
- return NOTIFY_OK;
+ bcm2836_arm_irqchip_unmask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0,
+ cpu);
+ return 0;
}
- static struct notifier_block bcm2836_arm_irqchip_cpu_notifier = {
- .notifier_call = bcm2836_arm_irqchip_cpu_notify,
- .priority = 100,
- };
+ static int bcm2836_cpu_dying(unsigned int cpu)
+ {
+ bcm2836_arm_irqchip_mask_per_cpu_irq(LOCAL_MAILBOX_INT_CONTROL0, 0,
+ cpu);
+ return 0;
+ }
#ifdef CONFIG_ARM
-int __init bcm2836_smp_boot_secondary(unsigned int cpu,
- struct task_struct *idle)
+static int __init bcm2836_smp_boot_secondary(unsigned int cpu,
+ struct task_struct *idle)
{
unsigned long secondary_startup_phys =
(unsigned long)virt_to_phys((void *)secondary_startup);
{
#ifdef CONFIG_SMP
/* Unmask IPIs to the boot CPU. */
- bcm2836_arm_irqchip_cpu_notify(&bcm2836_arm_irqchip_cpu_notifier,
- CPU_STARTING,
- (void *)(uintptr_t)smp_processor_id());
- register_cpu_notifier(&bcm2836_arm_irqchip_cpu_notifier);
+ cpuhp_setup_state(CPUHP_AP_IRQ_BCM2836_STARTING,
+ "AP_IRQ_BCM2836_STARTING", bcm2836_cpu_starting,
+ bcm2836_cpu_dying);
set_smp_cross_call(bcm2836_arm_irqchip_send_ipi);
void __iomem *raw_dist_base;
void __iomem *raw_cpu_base;
u32 percpu_offset;
-#ifdef CONFIG_CPU_PM
+#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
u32 saved_spi_active[DIV_ROUND_UP(1020, 32)];
u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
}
-static void __init gic_dist_init(struct gic_chip_data *gic)
+static void gic_dist_init(struct gic_chip_data *gic)
{
unsigned int i;
u32 cpumask;
return 0;
}
-#ifdef CONFIG_CPU_PM
+#if defined(CONFIG_CPU_PM) || defined(CONFIG_ARM_GIC_PM)
/*
* Saves the GIC distributor registers during suspend or idle. Must be called
* with interrupts disabled but before powering down the GIC. After calling
* this function, no interrupts will be delivered by the GIC, and another
* platform-specific wakeup source must be enabled.
*/
-static void gic_dist_save(struct gic_chip_data *gic)
+void gic_dist_save(struct gic_chip_data *gic)
{
unsigned int gic_irqs;
void __iomem *dist_base;
* handled normally, but any edge interrupts that occured will not be seen by
* the GIC and need to be handled by the platform-specific wakeup source.
*/
-static void gic_dist_restore(struct gic_chip_data *gic)
+void gic_dist_restore(struct gic_chip_data *gic)
{
unsigned int gic_irqs;
unsigned int i;
writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
}
-static void gic_cpu_save(struct gic_chip_data *gic)
+void gic_cpu_save(struct gic_chip_data *gic)
{
int i;
u32 *ptr;
}
-static void gic_cpu_restore(struct gic_chip_data *gic)
+void gic_cpu_restore(struct gic_chip_data *gic)
{
int i;
u32 *ptr;
.notifier_call = gic_notifier,
};
-static int __init gic_pm_init(struct gic_chip_data *gic)
+static int gic_pm_init(struct gic_chip_data *gic)
{
gic->saved_ppi_enable = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
sizeof(u32));
return -ENOMEM;
}
#else
-static int __init gic_pm_init(struct gic_chip_data *gic)
+static int gic_pm_init(struct gic_chip_data *gic)
{
return 0;
}
return -EINVAL;
}
- #ifdef CONFIG_SMP
- static int gic_secondary_init(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+ static int gic_starting_cpu(unsigned int cpu)
{
- if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
- gic_cpu_init(&gic_data[0]);
- return NOTIFY_OK;
+ gic_cpu_init(&gic_data[0]);
+ return 0;
}
- /*
- * Notifier for enabling the GIC CPU interface. Set an arbitrarily high
- * priority because the GIC needs to be up before the ARM generic timers.
- */
- static struct notifier_block gic_cpu_notifier = {
- .notifier_call = gic_secondary_init,
- .priority = 100,
- };
- #endif
-
static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
.unmap = gic_irq_domain_unmap,
};
-static int __init __gic_init_bases(struct gic_chip_data *gic, int irq_start,
- struct fwnode_handle *handle)
+static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
+ const char *name, bool use_eoimode1)
{
- irq_hw_number_t hwirq_base;
- int gic_irqs, irq_base, i, ret;
-
- if (WARN_ON(!gic || gic->domain))
- return -EINVAL;
-
/* Initialize irq_chip */
gic->chip = gic_chip;
+ gic->chip.name = name;
+ gic->chip.parent_device = dev;
- if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
+ if (use_eoimode1) {
gic->chip.irq_mask = gic_eoimode1_mask_irq;
gic->chip.irq_eoi = gic_eoimode1_eoi_irq;
gic->chip.irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity;
- gic->chip.name = kasprintf(GFP_KERNEL, "GICv2");
- } else {
- gic->chip.name = kasprintf(GFP_KERNEL, "GIC-%d",
- (int)(gic - &gic_data[0]));
}
#ifdef CONFIG_SMP
if (gic == &gic_data[0])
gic->chip.irq_set_affinity = gic_set_affinity;
#endif
+}
+
+static int gic_init_bases(struct gic_chip_data *gic, int irq_start,
+ struct fwnode_handle *handle)
+{
+ irq_hw_number_t hwirq_base;
+ int gic_irqs, irq_base, ret;
if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
/* Frankein-GIC without banked registers... */
goto error;
}
+ gic_dist_init(gic);
+ ret = gic_cpu_init(gic);
+ if (ret)
+ goto error;
+
+ ret = gic_pm_init(gic);
+ if (ret)
+ goto error;
+
+ return 0;
+
+error:
+ if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
+ free_percpu(gic->dist_base.percpu_base);
+ free_percpu(gic->cpu_base.percpu_base);
+ }
+
+ return ret;
+}
+
+static int __init __gic_init_bases(struct gic_chip_data *gic,
+ int irq_start,
+ struct fwnode_handle *handle)
+{
+ char *name;
+ int i, ret;
+
+ if (WARN_ON(!gic || gic->domain))
+ return -EINVAL;
+
if (gic == &gic_data[0]) {
/*
* Initialize the CPU interface map to all CPUs.
gic_cpu_map[i] = 0xff;
#ifdef CONFIG_SMP
set_smp_cross_call(gic_raise_softirq);
- register_cpu_notifier(&gic_cpu_notifier);
#endif
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
+ "AP_IRQ_GIC_STARTING",
+ gic_starting_cpu, NULL);
set_handle_irq(gic_handle_irq);
if (static_key_true(&supports_deactivate))
pr_info("GIC: Using split EOI/Deactivate mode\n");
}
- gic_dist_init(gic);
- ret = gic_cpu_init(gic);
- if (ret)
- goto error;
-
- ret = gic_pm_init(gic);
- if (ret)
- goto error;
-
- return 0;
-
-error:
- if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
- free_percpu(gic->dist_base.percpu_base);
- free_percpu(gic->cpu_base.percpu_base);
+ if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
+ name = kasprintf(GFP_KERNEL, "GICv2");
+ gic_init_chip(gic, NULL, name, true);
+ } else {
+ name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
+ gic_init_chip(gic, NULL, name, false);
}
- kfree(gic->chip.name);
+ ret = gic_init_bases(gic, irq_start, handle);
+ if (ret)
+ kfree(name);
return ret;
}
return true;
}
-static int __init gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
+static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
{
if (!gic || !node)
return -EINVAL;
return -ENOMEM;
}
+int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
+{
+ int ret;
+
+ if (!dev || !dev->of_node || !gic || !irq)
+ return -EINVAL;
+
+ *gic = devm_kzalloc(dev, sizeof(**gic), GFP_KERNEL);
+ if (!*gic)
+ return -ENOMEM;
+
+ gic_init_chip(*gic, dev, dev->of_node->name, false);
+
+ ret = gic_of_setup(*gic, dev->of_node);
+ if (ret)
+ return ret;
+
+ ret = gic_init_bases(*gic, -1, &dev->of_node->fwnode);
+ if (ret) {
+ gic_teardown(*gic);
+ return ret;
+ }
+
+ irq_set_chained_handler_and_data(irq, gic_handle_cascade_irq, *gic);
+
+ return 0;
+}
+
static void __init gic_of_setup_kvm_info(struct device_node *node)
{
int ret;
IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
-
+#else
+int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
+{
+ return -ENOTSUPP;
+}
#endif
#ifdef CONFIG_ACPI
irq = platform_get_irq(pmu_device, 0);
if (irq >= 0 && irq_is_percpu(irq)) {
- on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
+ on_each_cpu_mask(&cpu_pmu->supported_cpus,
+ cpu_pmu_disable_percpu_irq, &irq, 1);
free_percpu_irq(irq, &hw_events->percpu_pmu);
} else {
for (i = 0; i < irqs; ++i) {
irq);
return err;
}
- on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1);
+
+ on_each_cpu_mask(&cpu_pmu->supported_cpus,
+ cpu_pmu_enable_percpu_irq, &irq, 1);
} else {
for (i = 0; i < irqs; ++i) {
int cpu = i;
return 0;
}
+ static DEFINE_MUTEX(arm_pmu_mutex);
+ static LIST_HEAD(arm_pmu_list);
+
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
* UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
* junk values out of them.
*/
- static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
- void *hcpu)
+ static int arm_perf_starting_cpu(unsigned int cpu)
{
- int cpu = (unsigned long)hcpu;
- struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
-
- if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
- return NOTIFY_DONE;
-
- if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
- return NOTIFY_DONE;
+ struct arm_pmu *pmu;
- if (pmu->reset)
- pmu->reset(pmu);
- else
- return NOTIFY_DONE;
+ mutex_lock(&arm_pmu_mutex);
+ list_for_each_entry(pmu, &arm_pmu_list, entry) {
- return NOTIFY_OK;
+ if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+ continue;
+ if (pmu->reset)
+ pmu->reset(pmu);
+ }
+ mutex_unlock(&arm_pmu_mutex);
+ return 0;
}
#ifdef CONFIG_CPU_PM
if (!cpu_hw_events)
return -ENOMEM;
- cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
- err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
- if (err)
- goto out_hw_events;
+ mutex_lock(&arm_pmu_mutex);
+ list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
+ mutex_unlock(&arm_pmu_mutex);
err = cpu_pm_pmu_register(cpu_pmu);
if (err)
return 0;
out_unregister:
- unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
- out_hw_events:
+ mutex_lock(&arm_pmu_mutex);
+ list_del(&cpu_pmu->entry);
+ mutex_unlock(&arm_pmu_mutex);
free_percpu(cpu_hw_events);
return err;
}
static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
{
cpu_pm_pmu_unregister(cpu_pmu);
- unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
+ mutex_lock(&arm_pmu_mutex);
+ list_del(&cpu_pmu->entry);
+ mutex_unlock(&arm_pmu_mutex);
free_percpu(cpu_pmu->hw_events);
}
i++;
} while (1);
- /* If we didn't manage to parse anything, claim to support all CPUs */
- if (cpumask_weight(&pmu->supported_cpus) == 0)
- cpumask_setall(&pmu->supported_cpus);
+ /* If we didn't manage to parse anything, try the interrupt affinity */
+ if (cpumask_weight(&pmu->supported_cpus) == 0) {
+ if (!using_spi) {
+ /* If using PPIs, check the affinity of the partition */
+ int ret, irq;
+
+ irq = platform_get_irq(pdev, 0);
+ ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
+ if (ret) {
+ kfree(irqs);
+ return ret;
+ }
+ } else {
+ /* Otherwise default to all CPUs */
+ cpumask_setall(&pmu->supported_cpus);
+ }
+ }
/* If we matched up the IRQ affinities, use them to route the SPIs */
if (using_spi && i == pdev->num_resources)
kfree(pmu);
return ret;
}
+
+ static int arm_pmu_hp_init(void)
+ {
+ int ret;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+ "AP_PERF_ARM_STARTING",
+ arm_perf_starting_cpu, NULL);
+ if (ret)
+ pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
+ ret);
+ return ret;
+ }
+ subsys_initcall(arm_pmu_hp_init);
bool contexts_maxed;
};
+typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
+ unsigned long off, unsigned long len);
+
+struct perf_raw_frag {
+ union {
+ struct perf_raw_frag *next;
+ unsigned long pad;
+ };
+ perf_copy_f copy;
+ void *data;
+ u32 size;
+} __packed;
+
struct perf_raw_record {
+ struct perf_raw_frag frag;
u32 size;
- void *data;
};
/*
struct perf_cgroup;
struct ring_buffer;
+struct pmu_event_list {
+ raw_spinlock_t lock;
+ struct list_head list;
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
int cgrp_defer_enabled;
#endif
+ struct list_head sb_list;
#endif /* CONFIG_PERF_EVENTS */
};
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark);
-extern int get_callchain_buffers(void);
+extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
extern int sysctl_perf_event_max_stack;
static inline void perf_restore_debug_store(void) { }
#endif
+static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
+{
+ return frag->pad < sizeof(u64);
+}
+
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
- /*
- * This has to have a higher priority than migration_notifier in sched/core.c.
- */
- #define perf_cpu_notifier(fn) \
- do { \
- static struct notifier_block fn##_nb = \
- { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
- unsigned long cpu = smp_processor_id(); \
- unsigned long flags; \
- \
- cpu_notifier_register_begin(); \
- fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
- (void *)(unsigned long)cpu); \
- local_irq_save(flags); \
- fn(&fn##_nb, (unsigned long)CPU_STARTING, \
- (void *)(unsigned long)cpu); \
- local_irq_restore(flags); \
- fn(&fn##_nb, (unsigned long)CPU_ONLINE, \
- (void *)(unsigned long)cpu); \
- __register_cpu_notifier(&fn##_nb); \
- cpu_notifier_register_done(); \
- } while (0)
-
- /*
- * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the
- * callback for already online CPUs.
- */
- #define __perf_cpu_notifier(fn) \
- do { \
- static struct notifier_block fn##_nb = \
- { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
- \
- __register_cpu_notifier(&fn##_nb); \
- } while (0)
-
struct perf_pmu_events_attr {
struct device_attribute attr;
u64 id;
const char *event_str;
};
+struct perf_pmu_events_ht_attr {
+ struct device_attribute attr;
+ u64 id;
+ const char *event_str_ht;
+ const char *event_str_noht;
+};
+
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page);
\
static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
+ /* Performance counter hotplug functions */
+ #ifdef CONFIG_PERF_EVENTS
+ int perf_event_init_cpu(unsigned int cpu);
+ int perf_event_exit_cpu(unsigned int cpu);
+ #else
+ #define perf_event_init_cpu NULL
+ #define perf_event_exit_cpu NULL
+ #endif
+
#endif /* _LINUX_PERF_EVENT_H */
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
+static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
if (ret || !write)
return ret;
+ /*
+ * If throttling is disabled don't allow the write:
+ */
+ if (sysctl_perf_cpu_time_max_percent == 100 ||
+ sysctl_perf_cpu_time_max_percent == 0)
+ return -EINVAL;
+
max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
update_perf_cpu_limits();
static void ring_buffer_attach(struct perf_event *event,
struct ring_buffer *rb);
+static void detach_sb_event(struct perf_event *event)
+{
+ struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+ raw_spin_lock(&pel->lock);
+ list_del_rcu(&event->sb_list);
+ raw_spin_unlock(&pel->lock);
+}
+
+static bool is_sb_event(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+
+ if (event->parent)
+ return false;
+
+ if (event->attach_state & PERF_ATTACH_TASK)
+ return false;
+
+ if (attr->mmap || attr->mmap_data || attr->mmap2 ||
+ attr->comm || attr->comm_exec ||
+ attr->task ||
+ attr->context_switch)
+ return true;
+ return false;
+}
+
+static void unaccount_pmu_sb_event(struct perf_event *event)
+{
+ if (is_sb_event(event))
+ detach_sb_event(event);
+}
+
static void unaccount_event_cpu(struct perf_event *event, int cpu)
{
if (event->parent)
}
unaccount_event_cpu(event, event->cpu);
+
+ unaccount_pmu_sb_event(event);
}
static void perf_sched_delayed(struct work_struct *work)
}
if (sample_type & PERF_SAMPLE_RAW) {
- if (data->raw) {
- u32 raw_size = data->raw->size;
- u32 real_size = round_up(raw_size + sizeof(u32),
- sizeof(u64)) - sizeof(u32);
- u64 zero = 0;
-
- perf_output_put(handle, real_size);
- __output_copy(handle, data->raw->data, raw_size);
- if (real_size - raw_size)
- __output_copy(handle, &zero, real_size - raw_size);
+ struct perf_raw_record *raw = data->raw;
+
+ if (raw) {
+ struct perf_raw_frag *frag = &raw->frag;
+
+ perf_output_put(handle, raw->size);
+ do {
+ if (frag->copy) {
+ __output_custom(handle, frag->copy,
+ frag->data, frag->size);
+ } else {
+ __output_copy(handle, frag->data,
+ frag->size);
+ }
+ if (perf_raw_frag_last(frag))
+ break;
+ frag = frag->next;
+ } while (1);
+ if (frag->pad)
+ __output_skip(handle, NULL, frag->pad);
} else {
struct {
u32 size;
}
if (sample_type & PERF_SAMPLE_RAW) {
- int size = sizeof(u32);
-
- if (data->raw)
- size += data->raw->size;
- else
- size += sizeof(u32);
+ struct perf_raw_record *raw = data->raw;
+ int size;
+
+ if (raw) {
+ struct perf_raw_frag *frag = &raw->frag;
+ u32 sum = 0;
+
+ do {
+ sum += frag->size;
+ if (perf_raw_frag_last(frag))
+ break;
+ frag = frag->next;
+ } while (1);
+
+ size = round_up(sum + sizeof(u32), sizeof(u64));
+ raw->size = size - sizeof(u32);
+ frag->pad = raw->size - sum;
+ } else {
+ size = sizeof(u64);
+ }
- header->size += round_up(size, sizeof(u64));
+ header->size += size;
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
perf_output_end(&handle);
}
-typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data);
+typedef void (perf_iterate_f)(struct perf_event *event, void *data);
static void
-perf_event_aux_ctx(struct perf_event_context *ctx,
- perf_event_aux_output_cb output,
+perf_iterate_ctx(struct perf_event_context *ctx,
+ perf_iterate_f output,
void *data, bool all)
{
struct perf_event *event;
}
}
-static void
-perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
- struct perf_event_context *task_ctx)
+static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
{
- rcu_read_lock();
- preempt_disable();
- perf_event_aux_ctx(task_ctx, output, data, false);
- preempt_enable();
- rcu_read_unlock();
+ struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events);
+ struct perf_event *event;
+
+ list_for_each_entry_rcu(event, &pel->list, sb_list) {
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ continue;
+ if (!event_filter_match(event))
+ continue;
+ output(event, data);
+ }
}
+/*
+ * Iterate all events that need to receive side-band events.
+ *
+ * For new callers; ensure that account_pmu_sb_event() includes
+ * your event, otherwise it might not get delivered.
+ */
static void
-perf_event_aux(perf_event_aux_output_cb output, void *data,
+perf_iterate_sb(perf_iterate_f output, void *data,
struct perf_event_context *task_ctx)
{
- struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
- struct pmu *pmu;
int ctxn;
+ rcu_read_lock();
+ preempt_disable();
+
/*
- * If we have task_ctx != NULL we only notify
- * the task context itself. The task_ctx is set
- * only for EXIT events before releasing task
+ * If we have task_ctx != NULL we only notify the task context itself.
+ * The task_ctx is set only for EXIT events before releasing task
* context.
*/
if (task_ctx) {
- perf_event_aux_task_ctx(output, data, task_ctx);
- return;
+ perf_iterate_ctx(task_ctx, output, data, false);
+ goto done;
}
- rcu_read_lock();
- list_for_each_entry_rcu(pmu, &pmus, entry) {
- cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->unique_pmu != pmu)
- goto next;
- perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
- ctxn = pmu->task_ctx_nr;
- if (ctxn < 0)
- goto next;
+ perf_iterate_sb_cpu(output, data);
+
+ for_each_task_context_nr(ctxn) {
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx)
- perf_event_aux_ctx(ctx, output, data, false);
-next:
- put_cpu_ptr(pmu->pmu_cpu_context);
+ perf_iterate_ctx(ctx, output, data, false);
}
+done:
+ preempt_enable();
rcu_read_unlock();
}
perf_event_enable_on_exec(ctxn);
- perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+ perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL,
true);
}
rcu_read_unlock();
};
rcu_read_lock();
- perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+ perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
if (cpuctx->task_ctx)
- perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+ perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop,
&ro, false);
rcu_read_unlock();
},
};
- perf_event_aux(perf_event_task_output,
+ perf_iterate_sb(perf_event_task_output,
&task_event,
task_ctx);
}
comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
- perf_event_aux(perf_event_comm_output,
+ perf_iterate_sb(perf_event_comm_output,
comm_event,
NULL);
}
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
- perf_event_aux(perf_event_mmap_output,
+ perf_iterate_sb(perf_event_mmap_output,
mmap_event,
NULL);
if (!ctx)
continue;
- perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+ perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true);
}
rcu_read_unlock();
}
},
};
- perf_event_aux(perf_event_switch_output,
+ perf_iterate_sb(perf_event_switch_output,
&switch_event,
NULL);
}
static int perf_tp_filter_match(struct perf_event *event,
struct perf_sample_data *data)
{
- void *record = data->raw->data;
+ void *record = data->raw->frag.data;
/* only top level events have filters set */
if (event->parent)
struct perf_event *event;
struct perf_raw_record raw = {
- .size = entry_size,
- .data = record,
+ .frag = {
+ .size = entry_size,
+ .data = record,
+ },
};
perf_sample_data_init(&data, 0, 0);
prog = event->tp_event->prog;
if (prog) {
event->tp_event->prog = NULL;
- bpf_prog_put_rcu(prog);
+ bpf_prog_put(prog);
}
}
return pmu;
}
+static void attach_sb_event(struct perf_event *event)
+{
+ struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu);
+
+ raw_spin_lock(&pel->lock);
+ list_add_rcu(&event->sb_list, &pel->list);
+ raw_spin_unlock(&pel->lock);
+}
+
+/*
+ * We keep a list of all !task (and therefore per-cpu) events
+ * that need to receive side-band records.
+ *
+ * This avoids having to scan all the various PMU per-cpu contexts
+ * looking for them.
+ */
+static void account_pmu_sb_event(struct perf_event *event)
+{
+ if (is_sb_event(event))
+ attach_sb_event(event);
+}
+
static void account_event_cpu(struct perf_event *event, int cpu)
{
if (event->parent)
enabled:
account_event_cpu(event, event->cpu);
+
+ account_pmu_sb_event(event);
}
/*
if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
- err = get_callchain_buffers();
+ err = get_callchain_buffers(attr->sample_max_stack);
if (err)
goto err_addr_filters;
}
return -EINVAL;
}
+ if (!attr.sample_max_stack)
+ attr.sample_max_stack = sysctl_perf_event_max_stack;
+
/*
* In cgroup mode, the pid argument is used to pass the fd
* opened to the cgroup directory in cgroupfs. The cpu argument
if (is_sampling_event(event)) {
if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
- err = -ENOTSUPP;
+ err = -EOPNOTSUPP;
goto err_alloc;
}
}
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
+
+ INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
+ raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
}
}
- static void perf_event_init_cpu(int cpu)
+ int perf_event_init_cpu(unsigned int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
rcu_assign_pointer(swhash->swevent_hlist, hlist);
}
mutex_unlock(&swhash->hlist_mutex);
+ return 0;
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
}
srcu_read_unlock(&pmus_srcu, idx);
}
+ #else
+
+ static void perf_event_exit_cpu_context(int cpu) { }
+
+ #endif
- static void perf_event_exit_cpu(int cpu)
+ int perf_event_exit_cpu(unsigned int cpu)
{
perf_event_exit_cpu_context(cpu);
+ return 0;
}
- #else
- static inline void perf_event_exit_cpu(int cpu) { }
- #endif
static int
perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
.priority = INT_MIN,
};
- static int
- perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
- {
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
-
- case CPU_UP_PREPARE:
- /*
- * This must be done before the CPU comes alive, because the
- * moment we can run tasks we can encounter (software) events.
- *
- * Specifically, someone can have inherited events on kthreadd
- * or a pre-existing worker thread that gets re-bound.
- */
- perf_event_init_cpu(cpu);
- break;
-
- case CPU_DOWN_PREPARE:
- /*
- * This must be done before the CPU dies because after that an
- * active event might want to IPI the CPU and that'll not work
- * so great for dead CPUs.
- *
- * XXX smp_call_function_single() return -ENXIO without a warn
- * so we could possibly deal with this.
- *
- * This is safe against new events arriving because
- * sys_perf_event_open() serializes against hotplug using
- * get_online_cpus().
- */
- perf_event_exit_cpu(cpu);
- break;
- default:
- break;
- }
-
- return NOTIFY_OK;
- }
-
void __init perf_event_init(void)
{
int ret;
perf_pmu_register(&perf_cpu_clock, NULL, -1);
perf_pmu_register(&perf_task_clock, NULL, -1);
perf_tp_register();
- perf_cpu_notifier(perf_cpu_notify);
+ perf_event_init_cpu(smp_processor_id());
register_reboot_notifier(&perf_reboot_notifier);
ret = init_hw_breakpoint();
static void flush_smp_call_function_queue(bool warn_cpu_offline);
- static int
- hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ int smpcfd_prepare_cpu(unsigned int cpu)
{
- long cpu = (long)hcpu;
struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
- cpu_to_node(cpu)))
- return notifier_from_errno(-ENOMEM);
- cfd->csd = alloc_percpu(struct call_single_data);
- if (!cfd->csd) {
- free_cpumask_var(cfd->cpumask);
- return notifier_from_errno(-ENOMEM);
- }
- break;
-
- #ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- /* Fall-through to the CPU_DEAD[_FROZEN] case. */
-
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
+ if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
+ cpu_to_node(cpu)))
+ return -ENOMEM;
+ cfd->csd = alloc_percpu(struct call_single_data);
+ if (!cfd->csd) {
free_cpumask_var(cfd->cpumask);
- free_percpu(cfd->csd);
- break;
-
- case CPU_DYING:
- case CPU_DYING_FROZEN:
- /*
- * The IPIs for the smp-call-function callbacks queued by other
- * CPUs might arrive late, either due to hardware latencies or
- * because this CPU disabled interrupts (inside stop-machine)
- * before the IPIs were sent. So flush out any pending callbacks
- * explicitly (without waiting for the IPIs to arrive), to
- * ensure that the outgoing CPU doesn't go offline with work
- * still pending.
- */
- flush_smp_call_function_queue(false);
- break;
- #endif
- };
-
- return NOTIFY_OK;
+ return -ENOMEM;
+ }
+
+ return 0;
}
- static struct notifier_block hotplug_cfd_notifier = {
- .notifier_call = hotplug_cfd,
- };
+ int smpcfd_dead_cpu(unsigned int cpu)
+ {
+ struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
+
+ free_cpumask_var(cfd->cpumask);
+ free_percpu(cfd->csd);
+ return 0;
+ }
+
+ int smpcfd_dying_cpu(unsigned int cpu)
+ {
+ /*
+ * The IPIs for the smp-call-function callbacks queued by other
+ * CPUs might arrive late, either due to hardware latencies or
+ * because this CPU disabled interrupts (inside stop-machine)
+ * before the IPIs were sent. So flush out any pending callbacks
+ * explicitly (without waiting for the IPIs to arrive), to
+ * ensure that the outgoing CPU doesn't go offline with work
+ * still pending.
+ */
+ flush_smp_call_function_queue(false);
+ return 0;
+ }
void __init call_function_init(void)
{
- void *cpu = (void *)(long)smp_processor_id();
int i;
for_each_possible_cpu(i)
init_llist_head(&per_cpu(call_single_queue, i));
- hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
- register_cpu_notifier(&hotplug_cfd_notifier);
+ smpcfd_prepare_cpu(smp_processor_id());
}
/*
*/
static __always_inline void csd_lock_wait(struct call_single_data *csd)
{
- smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
+ smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
}
static __always_inline void csd_lock(struct call_single_data *csd)
/**
* show_workqueue_state - dump workqueue state
*
- * Called from a sysrq handler and prints out all busy workqueues and
- * pools.
+ * Called from a sysrq handler or try_to_freeze_tasks() and prints out
+ * all busy workqueues and pools.
*/
void show_workqueue_state(void)
{
if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
return;
- /* is @cpu the only online CPU? */
cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
- if (cpumask_weight(&cpumask) != 1)
- return;
/* as we're called from CPU_ONLINE, the following shouldn't fail */
for_each_pool_worker(worker, pool)
- WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
- pool->attrs->cpumask) < 0);
+ WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
}
- /*
- * Workqueues should be brought up before normal priority CPU notifiers.
- * This will be registered high priority CPU notifier.
- */
- static int workqueue_cpu_up_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+ int workqueue_prepare_cpu(unsigned int cpu)
+ {
+ struct worker_pool *pool;
+
+ for_each_cpu_worker_pool(pool, cpu) {
+ if (pool->nr_workers)
+ continue;
+ if (!create_worker(pool))
+ return -ENOMEM;
+ }
+ return 0;
+ }
+
+ int workqueue_online_cpu(unsigned int cpu)
{
- int cpu = (unsigned long)hcpu;
struct worker_pool *pool;
struct workqueue_struct *wq;
int pi;
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- for_each_cpu_worker_pool(pool, cpu) {
- if (pool->nr_workers)
- continue;
- if (!create_worker(pool))
- return NOTIFY_BAD;
- }
- break;
-
- case CPU_DOWN_FAILED:
- case CPU_ONLINE:
- mutex_lock(&wq_pool_mutex);
+ mutex_lock(&wq_pool_mutex);
- for_each_pool(pool, pi) {
- mutex_lock(&pool->attach_mutex);
+ for_each_pool(pool, pi) {
+ mutex_lock(&pool->attach_mutex);
- if (pool->cpu == cpu)
- rebind_workers(pool);
- else if (pool->cpu < 0)
- restore_unbound_workers_cpumask(pool, cpu);
+ if (pool->cpu == cpu)
+ rebind_workers(pool);
+ else if (pool->cpu < 0)
+ restore_unbound_workers_cpumask(pool, cpu);
- mutex_unlock(&pool->attach_mutex);
- }
+ mutex_unlock(&pool->attach_mutex);
+ }
- /* update NUMA affinity of unbound workqueues */
- list_for_each_entry(wq, &workqueues, list)
- wq_update_unbound_numa(wq, cpu, true);
+ /* update NUMA affinity of unbound workqueues */
+ list_for_each_entry(wq, &workqueues, list)
+ wq_update_unbound_numa(wq, cpu, true);
- mutex_unlock(&wq_pool_mutex);
- break;
- }
- return NOTIFY_OK;
+ mutex_unlock(&wq_pool_mutex);
+ return 0;
}
- /*
- * Workqueues should be brought down after normal priority CPU notifiers.
- * This will be registered as low priority CPU notifier.
- */
- static int workqueue_cpu_down_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+ int workqueue_offline_cpu(unsigned int cpu)
{
- int cpu = (unsigned long)hcpu;
struct work_struct unbind_work;
struct workqueue_struct *wq;
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_PREPARE:
- /* unbinding per-cpu workers should happen on the local CPU */
- INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
- queue_work_on(cpu, system_highpri_wq, &unbind_work);
-
- /* update NUMA affinity of unbound workqueues */
- mutex_lock(&wq_pool_mutex);
- list_for_each_entry(wq, &workqueues, list)
- wq_update_unbound_numa(wq, cpu, false);
- mutex_unlock(&wq_pool_mutex);
-
- /* wait for per-cpu unbinding to finish */
- flush_work(&unbind_work);
- destroy_work_on_stack(&unbind_work);
- break;
- }
- return NOTIFY_OK;
+ /* unbinding per-cpu workers should happen on the local CPU */
+ INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
+ queue_work_on(cpu, system_highpri_wq, &unbind_work);
+
+ /* update NUMA affinity of unbound workqueues */
+ mutex_lock(&wq_pool_mutex);
+ list_for_each_entry(wq, &workqueues, list)
+ wq_update_unbound_numa(wq, cpu, false);
+ mutex_unlock(&wq_pool_mutex);
+
+ /* wait for per-cpu unbinding to finish */
+ flush_work(&unbind_work);
+ destroy_work_on_stack(&unbind_work);
+ return 0;
}
#ifdef CONFIG_SMP
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
- cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
- hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
-
wq_numa_init();
/* initialize CPU pools */
put_cpu();
return 0;
}
+EXPORT_SYMBOL_GPL(vcpu_load);
void vcpu_put(struct kvm_vcpu *vcpu)
{
preempt_enable();
mutex_unlock(&vcpu->mutex);
}
+EXPORT_SYMBOL_GPL(vcpu_put);
static void ack_flush(void *_completed)
{
{
int r;
struct kvm *kvm;
+ struct file *file;
kvm = kvm_create_vm(type);
if (IS_ERR(kvm))
return r;
}
#endif
- r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
+ r = get_unused_fd_flags(O_CLOEXEC);
if (r < 0) {
kvm_put_kvm(kvm);
return r;
}
+ file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+ if (IS_ERR(file)) {
+ put_unused_fd(r);
+ kvm_put_kvm(kvm);
+ return PTR_ERR(file);
+ }
if (kvm_create_vm_debugfs(kvm, r) < 0) {
- kvm_put_kvm(kvm);
+ put_unused_fd(r);
+ fput(file);
return -ENOMEM;
}
+ fd_install(r, file);
return r;
}
}
}
- static void hardware_enable(void)
+ static int kvm_starting_cpu(unsigned int cpu)
{
raw_spin_lock(&kvm_count_lock);
if (kvm_usage_count)
hardware_enable_nolock(NULL);
raw_spin_unlock(&kvm_count_lock);
+ return 0;
}
static void hardware_disable_nolock(void *junk)
kvm_arch_hardware_disable();
}
- static void hardware_disable(void)
+ static int kvm_dying_cpu(unsigned int cpu)
{
raw_spin_lock(&kvm_count_lock);
if (kvm_usage_count)
hardware_disable_nolock(NULL);
raw_spin_unlock(&kvm_count_lock);
+ return 0;
}
static void hardware_disable_all_nolock(void)
return r;
}
- static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
- void *v)
- {
- val &= ~CPU_TASKS_FROZEN;
- switch (val) {
- case CPU_DYING:
- hardware_disable();
- break;
- case CPU_STARTING:
- hardware_enable();
- break;
- }
- return NOTIFY_OK;
- }
-
static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
void *v)
{
return r;
}
- static struct notifier_block kvm_cpu_notifier = {
- .notifier_call = kvm_cpu_hotplug,
- };
-
static int kvm_debugfs_open(struct inode *inode, struct file *file,
int (*get)(void *, u64 *), int (*set)(void *, u64),
const char *fmt)
goto out_free_1;
}
- r = register_cpu_notifier(&kvm_cpu_notifier);
+ r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "AP_KVM_STARTING",
+ kvm_starting_cpu, kvm_dying_cpu);
if (r)
goto out_free_2;
register_reboot_notifier(&kvm_reboot_notifier);
kmem_cache_destroy(kvm_vcpu_cache);
out_free_3:
unregister_reboot_notifier(&kvm_reboot_notifier);
- unregister_cpu_notifier(&kvm_cpu_notifier);
+ cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
out_free_2:
out_free_1:
kvm_arch_hardware_unsetup();
kvm_async_pf_deinit();
unregister_syscore_ops(&kvm_syscore_ops);
unregister_reboot_notifier(&kvm_reboot_notifier);
- unregister_cpu_notifier(&kvm_cpu_notifier);
+ cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
on_each_cpu(hardware_disable_nolock, NULL, 1);
kvm_arch_hardware_unsetup();
kvm_arch_exit();