]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge branch 'perf/urgent' into perf/core
authorIngo Molnar <mingo@elte.hu>
Fri, 26 Nov 2010 14:07:02 +0000 (15:07 +0100)
committerIngo Molnar <mingo@elte.hu>
Fri, 26 Nov 2010 14:07:02 +0000 (15:07 +0100)
Conflicts:
arch/x86/kernel/apic/hw_nmi.c

Merge reason: Resolve conflict, queue up dependent patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
1  2 
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/cpu/perf_event.c
kernel/perf_event.c
tools/perf/builtin-record.c

index 3e25afe9a62a85d222aad06c84eb93a9aed25e6b,62f6e1e55b90d7f9a2bc460e73ba8895da23c4ff..a0e71cb4fa9ccf6fc790ff1a0d98743fcdbae1fc
  #include <linux/nmi.h>
  #include <linux/module.h>
  
- /* For reliability, we're prepared to waste bits here. */
- static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 +#ifdef CONFIG_HARDLOCKUP_DETECTOR
  u64 hw_nmi_get_sample_period(void)
  {
        return (u64)(cpu_khz) * 1000 * 60;
  }
 +#endif
  
 -#ifdef ARCH_HAS_NMI_WATCHDOG
+ /* For reliability, we're prepared to waste bits here. */
+ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 +#ifdef arch_trigger_all_cpu_backtrace
  void arch_trigger_all_cpu_backtrace(void)
  {
        int i;
@@@ -94,4 -93,16 +95,4 @@@ early_initcall(register_trigger_all_cpu
  #endif
  
  /* STUB calls to mimic old nmi_watchdog behaviour */
 -#if defined(CONFIG_X86_LOCAL_APIC)
 -unsigned int nmi_watchdog = NMI_NONE;
 -EXPORT_SYMBOL(nmi_watchdog);
 -void acpi_nmi_enable(void) { return; }
 -void acpi_nmi_disable(void) { return; }
 -#endif
 -atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
 -EXPORT_SYMBOL(nmi_active);
  int unknown_nmi_panic;
 -void cpu_nmi_set_wd_enabled(void) { return; }
 -void stop_apic_nmi_watchdog(void *unused) { return; }
 -void setup_apic_nmi_watchdog(void *unused) { return; }
 -int __init check_nmi_watchdog(void) { return 0; }
index 5273c7b90b8b82126ebccad4722f74696df3da68,6d75b9145b13f0e68a106acd76b0d458c827d099..7c1a4c35fd419fa18b6d7409622de1f3bbddd791
@@@ -330,6 -330,9 +330,6 @@@ static bool reserve_pmc_hardware(void
  {
        int i;
  
 -      if (nmi_watchdog == NMI_LOCAL_APIC)
 -              disable_lapic_nmi_watchdog();
 -
        for (i = 0; i < x86_pmu.num_counters; i++) {
                if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
                        goto perfctr_fail;
@@@ -352,6 -355,9 +352,6 @@@ perfctr_fail
        for (i--; i >= 0; i--)
                release_perfctr_nmi(x86_pmu.perfctr + i);
  
 -      if (nmi_watchdog == NMI_LOCAL_APIC)
 -              enable_lapic_nmi_watchdog();
 -
        return false;
  }
  
@@@ -363,6 -369,9 +363,6 @@@ static void release_pmc_hardware(void
                release_perfctr_nmi(x86_pmu.perfctr + i);
                release_evntsel_nmi(x86_pmu.eventsel + i);
        }
 -
 -      if (nmi_watchdog == NMI_LOCAL_APIC)
 -              enable_lapic_nmi_watchdog();
  }
  
  #else
@@@ -372,6 -381,20 +372,20 @@@ static void release_pmc_hardware(void) 
  
  #endif
  
+ static bool check_hw_exists(void)
+ {
+       u64 val, val_new = 0;
+       int ret = 0;
+       val = 0xabcdUL;
+       ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+       ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+       if (ret || val != val_new)
+               return false;
+       return true;
+ }
  static void reserve_ds_buffers(void);
  static void release_ds_buffers(void);
  
@@@ -1363,6 -1386,12 +1377,12 @@@ void __init init_hw_perf_events(void
  
        pmu_check_apic();
  
+       /* sanity check that the hardware exists or is emulated */
+       if (!check_hw_exists()) {
+               pr_cont("Broken PMU hardware detected, software events only.\n");
+               return;
+       }
        pr_cont("%s PMU driver.\n", x86_pmu.name);
  
        if (x86_pmu.quirks)
@@@ -1657,7 -1686,7 +1677,7 @@@ perf_callchain_kernel(struct perf_callc
  
        perf_callchain_store(entry, regs->ip);
  
 -      dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 +      dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
  }
  
  #ifdef CONFIG_COMPAT
diff --combined kernel/perf_event.c
index 40c3aab648a1cd489aa25cb5313a1a5ce6a5e549,eac7e3364335a7a3f94d902e69b9ed4eac4df74a..43f757ccf831a04414bcea3d500f2090259fa09b
@@@ -31,6 -31,7 +31,7 @@@
  #include <linux/kernel_stat.h>
  #include <linux/perf_event.h>
  #include <linux/ftrace_event.h>
+ #include <linux/hw_breakpoint.h>
  
  #include <asm/irq_regs.h>
  
@@@ -1286,8 -1287,6 +1287,6 @@@ void __perf_event_task_sched_out(struc
  {
        int ctxn;
  
-       perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
        for_each_task_context_nr(ctxn)
                perf_event_context_sched_out(task, ctxn, next);
  }
@@@ -1621,8 -1620,12 +1620,12 @@@ static void rotate_ctx(struct perf_even
  {
        raw_spin_lock(&ctx->lock);
  
-       /* Rotate the first entry last of non-pinned groups */
-       list_rotate_left(&ctx->flexible_groups);
+       /*
+        * Rotate the first entry last of non-pinned groups. Rotation might be
+        * disabled by the inheritance code.
+        */
+       if (!ctx->rotate_disable)
+               list_rotate_left(&ctx->flexible_groups);
  
        raw_spin_unlock(&ctx->lock);
  }
@@@ -2234,11 -2237,6 +2237,6 @@@ int perf_event_release_kernel(struct pe
        raw_spin_unlock_irq(&ctx->lock);
        mutex_unlock(&ctx->mutex);
  
-       mutex_lock(&event->owner->perf_event_mutex);
-       list_del_init(&event->owner_entry);
-       mutex_unlock(&event->owner->perf_event_mutex);
-       put_task_struct(event->owner);
        free_event(event);
  
        return 0;
@@@ -2251,9 -2249,43 +2249,43 @@@ EXPORT_SYMBOL_GPL(perf_event_release_ke
  static int perf_release(struct inode *inode, struct file *file)
  {
        struct perf_event *event = file->private_data;
+       struct task_struct *owner;
  
        file->private_data = NULL;
  
+       rcu_read_lock();
+       owner = ACCESS_ONCE(event->owner);
+       /*
+        * Matches the smp_wmb() in perf_event_exit_task(). If we observe
+        * !owner it means the list deletion is complete and we can indeed
+        * free this event, otherwise we need to serialize on
+        * owner->perf_event_mutex.
+        */
+       smp_read_barrier_depends();
+       if (owner) {
+               /*
+                * Since delayed_put_task_struct() also drops the last
+                * task reference we can safely take a new reference
+                * while holding the rcu_read_lock().
+                */
+               get_task_struct(owner);
+       }
+       rcu_read_unlock();
+       if (owner) {
+               mutex_lock(&owner->perf_event_mutex);
+               /*
+                * We have to re-check the event->owner field, if it is cleared
+                * we raced with perf_event_exit_task(), acquiring the mutex
+                * ensured they're done, and we can proceed with freeing the
+                * event.
+                */
+               if (event->owner)
+                       list_del_init(&event->owner_entry);
+               mutex_unlock(&owner->perf_event_mutex);
+               put_task_struct(owner);
+       }
        return perf_event_release_kernel(event);
  }
  
@@@ -4773,6 -4805,15 +4805,6 @@@ static int perf_tp_event_init(struct pe
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -ENOENT;
  
 -      /*
 -       * Raw tracepoint data is a severe data leak, only allow root to
 -       * have these.
 -       */
 -      if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
 -                      perf_paranoid_tracepoint_raw() &&
 -                      !capable(CAP_SYS_ADMIN))
 -              return -EPERM;
 -
        err = perf_trace_init(event);
        if (err)
                return err;
@@@ -5668,7 -5709,7 +5700,7 @@@ SYSCALL_DEFINE5(perf_event_open
        mutex_unlock(&ctx->mutex);
  
        event->owner = current;
-       get_task_struct(current);
        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
@@@ -5736,12 -5777,6 +5768,6 @@@ perf_event_create_kernel_counter(struc
        ++ctx->generation;
        mutex_unlock(&ctx->mutex);
  
-       event->owner = current;
-       get_task_struct(current);
-       mutex_lock(&current->perf_event_mutex);
-       list_add_tail(&event->owner_entry, &current->perf_event_list);
-       mutex_unlock(&current->perf_event_mutex);
        return event;
  
  err_free:
@@@ -5892,8 -5927,24 +5918,24 @@@ again
   */
  void perf_event_exit_task(struct task_struct *child)
  {
+       struct perf_event *event, *tmp;
        int ctxn;
  
+       mutex_lock(&child->perf_event_mutex);
+       list_for_each_entry_safe(event, tmp, &child->perf_event_list,
+                                owner_entry) {
+               list_del_init(&event->owner_entry);
+               /*
+                * Ensure the list deletion is visible before we clear
+                * the owner, closes a race against perf_release() where
+                * we need to serialize on the owner->perf_event_mutex.
+                */
+               smp_wmb();
+               event->owner = NULL;
+       }
+       mutex_unlock(&child->perf_event_mutex);
        for_each_task_context_nr(ctxn)
                perf_event_exit_task_context(child, ctxn);
  }
@@@ -6113,6 -6164,7 +6155,7 @@@ int perf_event_init_context(struct task
        struct perf_event *event;
        struct task_struct *parent = current;
        int inherited_all = 1;
+       unsigned long flags;
        int ret = 0;
  
        child->perf_event_ctxp[ctxn] = NULL;
                        break;
        }
  
+       /*
+        * We can't hold ctx->lock when iterating the ->flexible_group list due
+        * to allocations, but we need to prevent rotation because
+        * rotate_ctx() will change the list from interrupt context.
+        */
+       raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+       parent_ctx->rotate_disable = 1;
+       raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
        list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, ctxn, &inherited_all);
                        break;
        }
  
+       raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+       parent_ctx->rotate_disable = 0;
+       raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
        child_ctx = child->perf_event_ctxp[ctxn];
  
        if (child_ctx && inherited_all) {
@@@ -6312,6 -6377,8 +6368,8 @@@ perf_cpu_notify(struct notifier_block *
  
  void __init perf_event_init(void)
  {
+       int ret;
        perf_event_init_all_cpus();
        init_srcu_struct(&pmus_srcu);
        perf_pmu_register(&perf_swevent);
        perf_pmu_register(&perf_task_clock);
        perf_tp_register();
        perf_cpu_notifier(perf_cpu_notify);
+       ret = init_hw_breakpoint();
+       WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
  }
index d9dd47885218b0dffc11460776153b3b7bb7a031,e2c2de201eecafc5490caba7637f0eefbcbc0b46..3d2cb4899807cf5bdadaba70a65167f16cdd4eef
@@@ -326,7 -326,7 +326,7 @@@ try_again
                                goto try_again;
                        }
                        printf("\n");
 -                      error("perfcounter syscall returned with %d (%s)\n",
 +                      error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
                                        fd[nr_cpu][counter][thread_index], strerror(err));
  
  #if defined(__i386__) || defined(__x86_64__)
@@@ -697,17 -697,18 +697,18 @@@ static int __cmd_record(int argc, cons
        if (err < 0)
                err = event__synthesize_kernel_mmap(process_synthesized_event,
                                                    session, machine, "_stext");
-       if (err < 0) {
-               pr_err("Couldn't record kernel reference relocation symbol.\n");
-               return err;
-       }
+       if (err < 0)
+               pr_err("Couldn't record kernel reference relocation symbol\n"
+                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                      "Check /proc/kallsyms permission or run as root.\n");
  
        err = event__synthesize_modules(process_synthesized_event,
                                        session, machine);
-       if (err < 0) {
-               pr_err("Couldn't record kernel reference relocation symbol.\n");
-               return err;
-       }
+       if (err < 0)
+               pr_err("Couldn't record kernel module information.\n"
+                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                      "Check /proc/modules permission or run as root.\n");
        if (perf_guest)
                perf_session__process_machines(session, event__synthesize_guest_os);