Merge branch 'perf/urgent' into perf/core

author Ingo Molnar <mingo@elte.hu>

Fri, 29 Jan 2010 08:24:57 +0000 (09:24 +0100)

committer Ingo Molnar <mingo@elte.hu>

Fri, 29 Jan 2010 09:36:22 +0000 (10:36 +0100)
author Ingo Molnar <mingo@elte.hu>
Fri, 29 Jan 2010 08:24:57 +0000 (09:24 +0100)
committer Ingo Molnar <mingo@elte.hu>
Fri, 29 Jan 2010 09:36:22 +0000 (10:36 +0100)
diff --combined arch/x86/include/asm/perf_event.h

index ff5ede128bae20898d807e96d4a82ad65c52dce5,1380367dabd96375eb1f0d3518fa7cd97750de01..befd172c82ada4c1c4c4e24b539e79689ddb3054
--- 1/arch/x86/include/asm/perf_event.h
--- 2/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@@ -19,6 -19,7 +19,7 @@@
   #define MSR_ARCH_PERFMON_EVENTSEL1                         0x187
   
   #define ARCH_PERFMON_EVENTSEL0_ENABLE                   (1 << 22)
+ #define ARCH_PERFMON_EVENTSEL_ANY                       (1 << 21)
   #define ARCH_PERFMON_EVENTSEL_INT                       (1 << 20)
   #define ARCH_PERFMON_EVENTSEL_OS                        (1 << 17)
   #define ARCH_PERFMON_EVENTSEL_USR                       (1 << 16)
@@@ -26,14 -27,7 +27,14 @@@
   /*
    * Includes eventsel and unit mask as well:
    */
- -#define ARCH_PERFMON_EVENT_MASK                                   0xffff
+ +
+ +
+ +#define INTEL_ARCH_EVTSEL_MASK                0x000000FFULL
+ +#define INTEL_ARCH_UNIT_MASK          0x0000FF00ULL
+ +#define INTEL_ARCH_EDGE_MASK          0x00040000ULL
+ +#define INTEL_ARCH_INV_MASK           0x00800000ULL
+ +#define INTEL_ARCH_CNT_MASK           0xFF000000ULL
+ +#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
   
   /*
    * filter mask to validate fixed counter events.
@@@ -44,12 -38,7 +45,12 @@@
    *  The other filters are supported by fixed counters.
    *  The any-thread option is supported starting with v3.
    */
- -#define ARCH_PERFMON_EVENT_FILTER_MASK                        0xff840000
+ +#define INTEL_ARCH_FIXED_MASK \
+ +      (INTEL_ARCH_CNT_MASK| \
+ +       INTEL_ARCH_INV_MASK| \
+ +       INTEL_ARCH_EDGE_MASK|\
+ +       INTEL_ARCH_UNIT_MASK|\
+ +       INTEL_ARCH_EVTSEL_MASK)
   
   #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL               0x3c
   #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK               (0x00 << 8)
diff --combined arch/x86/kernel/cpu/perf_event.c

index 518eb3e395778a3d0009f256c0b663a8816e31f0,8c1c07073cccf822c882283c641965f174f3b769..1846ead0576b933806c1c83ef30855c648427bf8
--- 1/arch/x86/kernel/cpu/perf_event.c
--- 2/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@@ -7,7 -7,6 +7,7 @@@
    *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
    *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
    *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ + *  Copyright (C) 2009 Google, Inc., Stephane Eranian
    *
    *  For licencing details see kernel-base/COPYING
    */
@@@ -23,7 -22,6 +23,7 @@@
   #include <linux/uaccess.h>
   #include <linux/highmem.h>
   #include <linux/cpu.h>
+ +#include <linux/bitops.h>
   
   #include <asm/apic.h>
   #include <asm/stacktrace.h>
@@@ -70,47 -68,26 +70,47 @@@ struct debug_store 
         u64     pebs_event_reset[MAX_PEBS_EVENTS];
   };
   
+ +struct event_constraint {
+ +      union {
+ +              unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ +              u64             idxmsk64[1];
+ +      };
+ +      int     code;
+ +      int     cmask;
+ +      int     weight;
+ +};
+ +
   struct cpu_hw_events {
- -      struct perf_event       *events[X86_PMC_IDX_MAX];
- -      unsigned long           used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ +      struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
         unsigned long           interrupts;
         int                     enabled;
         struct debug_store      *ds;
- -};
   
- -struct event_constraint {
- -      unsigned long   idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- -      int             code;
+ +      int                     n_events;
+ +      int                     n_added;
+ +      int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+ +      struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
   };
   
- -#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
- -#define EVENT_CONSTRAINT_END  { .code = 0, .idxmsk[0] = 0 }
+ +#define EVENT_CONSTRAINT(c, n, m) {   \
+ +      { .idxmsk64[0] = (n) },         \
+ +      .code = (c),                    \
+ +      .cmask = (m),                   \
+ +      .weight = HWEIGHT64((u64)(n)),  \
+ +}
+ +
+ +#define INTEL_EVENT_CONSTRAINT(c, n)  \
+ +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
+ +
+ +#define FIXED_EVENT_CONSTRAINT(c, n)  \
+ +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
   
- -#define for_each_event_constraint(e, c) \
- -      for ((e) = (c); (e)->idxmsk[0]; (e)++)
+ +#define EVENT_CONSTRAINT_END          \
+ +      EVENT_CONSTRAINT(0, 0, 0)
   
+ +#define for_each_event_constraint(e, c)       \
+ +      for ((e) = (c); (e)->cmask; (e)++)
   
   /*
    * struct x86_pmu - generic x86 pmu
@@@ -137,14 -114,8 +137,14 @@@ struct x86_pmu 
         u64             intel_ctrl;
         void            (*enable_bts)(u64 config);
         void            (*disable_bts)(void);
- -      int             (*get_event_idx)(struct cpu_hw_events *cpuc,
- -                                       struct hw_perf_event *hwc);
+ +
+ +      struct event_constraint *
+ +                      (*get_event_constraints)(struct cpu_hw_events *cpuc,
+ +                                               struct perf_event *event);
+ +
+ +      void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
+ +                                               struct perf_event *event);
+ +      struct event_constraint *event_constraints;
   };
   
   static struct x86_pmu x86_pmu __read_mostly;
@@@ -153,8 -124,7 +153,8 @@@ static DEFINE_PER_CPU(struct cpu_hw_eve
         .enabled = 1,
   };
   
- -static const struct event_constraint *event_constraints;
+ +static int x86_perf_event_set_period(struct perf_event *event,
+ +                           struct hw_perf_event *hwc, int idx);
   
   /*
    * Not sure about some of these
@@@ -201,14 -171,14 +201,14 @@@ static u64 p6_pmu_raw_event(u64 hw_even
         return hw_event & P6_EVNTSEL_MASK;
   }
   
- -static const struct event_constraint intel_p6_event_constraints[] =
+ +static struct event_constraint intel_p6_event_constraints[] =
   {
- -      EVENT_CONSTRAINT(0xc1, 0x1),    /* FLOPS */
- -      EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
- -      EVENT_CONSTRAINT(0x11, 0x1),    /* FP_ASSIST */
- -      EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
- -      EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
- -      EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
+ +      INTEL_EVENT_CONSTRAINT(0xc1, 0x1),      /* FLOPS */
+ +      INTEL_EVENT_CONSTRAINT(0x10, 0x1),      /* FP_COMP_OPS_EXE */
+ +      INTEL_EVENT_CONSTRAINT(0x11, 0x1),      /* FP_ASSIST */
+ +      INTEL_EVENT_CONSTRAINT(0x12, 0x2),      /* MUL */
+ +      INTEL_EVENT_CONSTRAINT(0x13, 0x2),      /* DIV */
+ +      INTEL_EVENT_CONSTRAINT(0x14, 0x1),      /* CYCLES_DIV_BUSY */
         EVENT_CONSTRAINT_END
   };
   
@@@ -226,51 -196,32 +226,51 @@@ static const u64 intel_perfmon_event_ma
     [PERF_COUNT_HW_BUS_CYCLES]          = 0x013c,
   };
   
- -static const struct event_constraint intel_core_event_constraints[] =
- -{
- -      EVENT_CONSTRAINT(0x10, 0x1),    /* FP_COMP_OPS_EXE */
- -      EVENT_CONSTRAINT(0x11, 0x2),    /* FP_ASSIST */
- -      EVENT_CONSTRAINT(0x12, 0x2),    /* MUL */
- -      EVENT_CONSTRAINT(0x13, 0x2),    /* DIV */
- -      EVENT_CONSTRAINT(0x14, 0x1),    /* CYCLES_DIV_BUSY */
- -      EVENT_CONSTRAINT(0x18, 0x1),    /* IDLE_DURING_DIV */
- -      EVENT_CONSTRAINT(0x19, 0x2),    /* DELAYED_BYPASS */
- -      EVENT_CONSTRAINT(0xa1, 0x1),    /* RS_UOPS_DISPATCH_CYCLES */
- -      EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED */
+ +static struct event_constraint intel_core_event_constraints[] =
+ +{
+ +      FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ +      FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ +      INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ +      INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ +      INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ +      INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ +      INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ +      INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+ +      INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+ +      INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+ +      INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+ +      EVENT_CONSTRAINT_END
+ +};
+ +
+ +static struct event_constraint intel_nehalem_event_constraints[] =
+ +{
+ +      FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ +      FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ +      INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+ +      INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+ +      INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+ +      INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+ +      INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+ +      INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+ +      INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ +      INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
         EVENT_CONSTRAINT_END
   };
   
- -static const struct event_constraint intel_nehalem_event_constraints[] =
- -{
- -      EVENT_CONSTRAINT(0x40, 0x3),    /* L1D_CACHE_LD */
- -      EVENT_CONSTRAINT(0x41, 0x3),    /* L1D_CACHE_ST */
- -      EVENT_CONSTRAINT(0x42, 0x3),    /* L1D_CACHE_LOCK */
- -      EVENT_CONSTRAINT(0x43, 0x3),    /* L1D_ALL_REF */
- -      EVENT_CONSTRAINT(0x4e, 0x3),    /* L1D_PREFETCH */
- -      EVENT_CONSTRAINT(0x4c, 0x3),    /* LOAD_HIT_PRE */
- -      EVENT_CONSTRAINT(0x51, 0x3),    /* L1D */
- -      EVENT_CONSTRAINT(0x52, 0x3),    /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
- -      EVENT_CONSTRAINT(0x53, 0x3),    /* L1D_CACHE_LOCK_FB_HIT */
- -      EVENT_CONSTRAINT(0xc5, 0x3),    /* CACHE_LOCK_CYCLES */
+ +static struct event_constraint intel_westmere_event_constraints[] =
+ +{
+ +      FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ +      FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ +      INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ +      INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+ +      INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ +      EVENT_CONSTRAINT_END
+ +};
+ +
+ +static struct event_constraint intel_gen_event_constraints[] =
+ +{
+ +      FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ +      FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
         EVENT_CONSTRAINT_END
   };
   
@@@ -294,97 -245,6 +294,97 @@@ static u64 __read_mostly hw_cache_event
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
   
+ +static __initconst u64 westmere_hw_cache_event_ids
+ +                              [PERF_COUNT_HW_CACHE_MAX]
+ +                              [PERF_COUNT_HW_CACHE_OP_MAX]
+ +                              [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+ +{
+ + [ C(L1D) ] = {
+ +      [ C(OP_READ) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+ +              [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+ +      },
+ +      [ C(OP_WRITE) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+ +              [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+ +      },
+ +      [ C(OP_PREFETCH) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+ +              [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+ +      },
+ + },
+ + [ C(L1I ) ] = {
+ +      [ C(OP_READ) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+ +              [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+ +      },
+ +      [ C(OP_WRITE) ] = {
+ +              [ C(RESULT_ACCESS) ] = -1,
+ +              [ C(RESULT_MISS)   ] = -1,
+ +      },
+ +      [ C(OP_PREFETCH) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x0,
+ +              [ C(RESULT_MISS)   ] = 0x0,
+ +      },
+ + },
+ + [ C(LL  ) ] = {
+ +      [ C(OP_READ) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+ +              [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+ +      },
+ +      [ C(OP_WRITE) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+ +              [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+ +      },
+ +      [ C(OP_PREFETCH) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+ +              [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+ +      },
+ + },
+ + [ C(DTLB) ] = {
+ +      [ C(OP_READ) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+ +              [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+ +      },
+ +      [ C(OP_WRITE) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+ +              [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+ +      },
+ +      [ C(OP_PREFETCH) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x0,
+ +              [ C(RESULT_MISS)   ] = 0x0,
+ +      },
+ + },
+ + [ C(ITLB) ] = {
+ +      [ C(OP_READ) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+ +              [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+ +      },
+ +      [ C(OP_WRITE) ] = {
+ +              [ C(RESULT_ACCESS) ] = -1,
+ +              [ C(RESULT_MISS)   ] = -1,
+ +      },
+ +      [ C(OP_PREFETCH) ] = {
+ +              [ C(RESULT_ACCESS) ] = -1,
+ +              [ C(RESULT_MISS)   ] = -1,
+ +      },
+ + },
+ + [ C(BPU ) ] = {
+ +      [ C(OP_READ) ] = {
+ +              [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ +              [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+ +      },
+ +      [ C(OP_WRITE) ] = {
+ +              [ C(RESULT_ACCESS) ] = -1,
+ +              [ C(RESULT_MISS)   ] = -1,
+ +      },
+ +      [ C(OP_PREFETCH) ] = {
+ +              [ C(RESULT_ACCESS) ] = -1,
+ +              [ C(RESULT_MISS)   ] = -1,
+ +      },
+ + },
+ +};
+ +
   static __initconst u64 nehalem_hw_cache_event_ids
                                 [PERF_COUNT_HW_CACHE_MAX]
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
@@@ -667,11 -527,11 +667,11 @@@ static u64 intel_pmu_raw_event(u64 hw_e
   #define CORE_EVNTSEL_REG_MASK         0xFF000000ULL
   
   #define CORE_EVNTSEL_MASK             \
- -      (CORE_EVNTSEL_EVENT_MASK |      \
- -       CORE_EVNTSEL_UNIT_MASK  |      \
- -       CORE_EVNTSEL_EDGE_MASK  |      \
- -       CORE_EVNTSEL_INV_MASK  |       \
- -       CORE_EVNTSEL_REG_MASK)
+ +      (INTEL_ARCH_EVTSEL_MASK |       \
+ +       INTEL_ARCH_UNIT_MASK   |       \
+ +       INTEL_ARCH_EDGE_MASK   |       \
+ +       INTEL_ARCH_INV_MASK    |       \
+ +       INTEL_ARCH_CNT_MASK)
   
         return hw_event & CORE_EVNTSEL_MASK;
   }
@@@ -1198,8 -1058,15 +1198,8 @@@ static int __hw_perf_event_init(struct 
   
   static void p6_pmu_disable_all(void)
   {
- -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         u64 val;
   
- -      if (!cpuc->enabled)
- -              return;
- -
- -      cpuc->enabled = 0;
- -      barrier();
- -
         /* p6 only has one enable register */
         rdmsrl(MSR_P6_EVNTSEL0, val);
         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@@ -1210,6 -1077,12 +1210,6 @@@ static void intel_pmu_disable_all(void
   {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
   
- -      if (!cpuc->enabled)
- -              return;
- -
- -      cpuc->enabled = 0;
- -      barrier();
- -
         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
   
         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
@@@ -1221,6 -1094,17 +1221,6 @@@ static void amd_pmu_disable_all(void
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         int idx;
   
- -      if (!cpuc->enabled)
- -              return;
- -
- -      cpuc->enabled = 0;
- -      /*
- -       * ensure we write the disable before we start disabling the
- -       * events proper, so that amd_pmu_enable_event() does the
- -       * right thing.
- -       */
- -      barrier();
- -
         for (idx = 0; idx < x86_pmu.num_events; idx++) {
                 u64 val;
   
@@@ -1236,25 -1120,22 +1236,25 @@@
   
   void hw_perf_disable(void)
   {
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +
         if (!x86_pmu_initialized())
                 return;
- -      return x86_pmu.disable_all();
+ +
+ +      if (!cpuc->enabled)
+ +              return;
+ +
+ +      cpuc->n_added = 0;
+ +      cpuc->enabled = 0;
+ +      barrier();
+ +
+ +      x86_pmu.disable_all();
   }
   
   static void p6_pmu_enable_all(void)
   {
- -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         unsigned long val;
   
- -      if (cpuc->enabled)
- -              return;
- -
- -      cpuc->enabled = 1;
- -      barrier();
- -
         /* p6 only has one enable register */
         rdmsrl(MSR_P6_EVNTSEL0, val);
         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@@ -1265,6 -1146,12 +1265,6 @@@ static void intel_pmu_enable_all(void
   {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
   
- -      if (cpuc->enabled)
- -              return;
- -
- -      cpuc->enabled = 1;
- -      barrier();
- -
         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
   
         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@@ -1283,6 -1170,12 +1283,6 @@@ static void amd_pmu_enable_all(void
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         int idx;
   
- -      if (cpuc->enabled)
- -              return;
- -
- -      cpuc->enabled = 1;
- -      barrier();
- -
         for (idx = 0; idx < x86_pmu.num_events; idx++) {
                 struct perf_event *event = cpuc->events[idx];
                 u64 val;
@@@ -1296,239 -1189,10 +1296,239 @@@
         }
   }
   
+ +static const struct pmu pmu;
+ +
+ +static inline int is_x86_event(struct perf_event *event)
+ +{
+ +      return event->pmu == &pmu;
+ +}
+ +
+ +static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+ +{
+ +      struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+ +      unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ +      int i, j, w, wmax, num = 0;
+ +      struct hw_perf_event *hwc;
+ +
+ +      bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+ +
+ +      for (i = 0; i < n; i++) {
+ +              constraints[i] =
+ +                x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+ +      }
+ +
+ +      /*
+ +       * fastpath, try to reuse previous register
+ +       */
+ +      for (i = 0; i < n; i++) {
+ +              hwc = &cpuc->event_list[i]->hw;
+ +              c = constraints[i];
+ +
+ +              /* never assigned */
+ +              if (hwc->idx == -1)
+ +                      break;
+ +
+ +              /* constraint still honored */
+ +              if (!test_bit(hwc->idx, c->idxmsk))
+ +                      break;
+ +
+ +              /* not already used */
+ +              if (test_bit(hwc->idx, used_mask))
+ +                      break;
+ +
+ +              set_bit(hwc->idx, used_mask);
+ +              if (assign)
+ +                      assign[i] = hwc->idx;
+ +      }
+ +      if (i == n)
+ +              goto done;
+ +
+ +      /*
+ +       * begin slow path
+ +       */
+ +
+ +      bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+ +
+ +      /*
+ +       * weight = number of possible counters
+ +       *
+ +       * 1    = most constrained, only works on one counter
+ +       * wmax = least constrained, works on any counter
+ +       *
+ +       * assign events to counters starting with most
+ +       * constrained events.
+ +       */
+ +      wmax = x86_pmu.num_events;
+ +
+ +      /*
+ +       * when fixed event counters are present,
+ +       * wmax is incremented by 1 to account
+ +       * for one more choice
+ +       */
+ +      if (x86_pmu.num_events_fixed)
+ +              wmax++;
+ +
+ +      for (w = 1, num = n; num && w <= wmax; w++) {
+ +              /* for each event */
+ +              for (i = 0; num && i < n; i++) {
+ +                      c = constraints[i];
+ +                      hwc = &cpuc->event_list[i]->hw;
+ +
+ +                      if (c->weight != w)
+ +                              continue;
+ +
+ +                      for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
+ +                              if (!test_bit(j, used_mask))
+ +                                      break;
+ +                      }
+ +
+ +                      if (j == X86_PMC_IDX_MAX)
+ +                              break;
+ +
+ +                      set_bit(j, used_mask);
+ +
+ +                      if (assign)
+ +                              assign[i] = j;
+ +                      num--;
+ +              }
+ +      }
+ +done:
+ +      /*
+ +       * scheduling failed or is just a simulation,
+ +       * free resources if necessary
+ +       */
+ +      if (!assign || num) {
+ +              for (i = 0; i < n; i++) {
+ +                      if (x86_pmu.put_event_constraints)
+ +                              x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+ +              }
+ +      }
+ +      return num ? -ENOSPC : 0;
+ +}
+ +
+ +/*
+ + * dogrp: true if must collect siblings events (group)
+ + * returns total number of events and error code
+ + */
+ +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+ +{
+ +      struct perf_event *event;
+ +      int n, max_count;
+ +
+ +      max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+ +
+ +      /* current number of events already accepted */
+ +      n = cpuc->n_events;
+ +
+ +      if (is_x86_event(leader)) {
+ +              if (n >= max_count)
+ +                      return -ENOSPC;
+ +              cpuc->event_list[n] = leader;
+ +              n++;
+ +      }
+ +      if (!dogrp)
+ +              return n;
+ +
+ +      list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ +              if (!is_x86_event(event) ||
+ +                  event->state <= PERF_EVENT_STATE_OFF)
+ +                      continue;
+ +
+ +              if (n >= max_count)
+ +                      return -ENOSPC;
+ +
+ +              cpuc->event_list[n] = event;
+ +              n++;
+ +      }
+ +      return n;
+ +}
+ +
+ +
+ +static inline void x86_assign_hw_event(struct perf_event *event,
+ +                              struct hw_perf_event *hwc, int idx)
+ +{
+ +      hwc->idx = idx;
+ +
+ +      if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+ +              hwc->config_base = 0;
+ +              hwc->event_base = 0;
+ +      } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+ +              hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ +              /*
+ +               * We set it so that event_base + idx in wrmsr/rdmsr maps to
+ +               * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+ +               */
+ +              hwc->event_base =
+ +                      MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ +      } else {
+ +              hwc->config_base = x86_pmu.eventsel;
+ +              hwc->event_base  = x86_pmu.perfctr;
+ +      }
+ +}
+ +
+ +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+ +
   void hw_perf_enable(void)
   {
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      struct perf_event *event;
+ +      struct hw_perf_event *hwc;
+ +      int i;
+ +
         if (!x86_pmu_initialized())
                 return;
+ +
+ +      if (cpuc->enabled)
+ +              return;
+ +
+ +      if (cpuc->n_added) {
+ +              /*
+ +               * apply assignment obtained either from
+ +               * hw_perf_group_sched_in() or x86_pmu_enable()
+ +               *
+ +               * step1: save events moving to new counters
+ +               * step2: reprogram moved events into new counters
+ +               */
+ +              for (i = 0; i < cpuc->n_events; i++) {
+ +
+ +                      event = cpuc->event_list[i];
+ +                      hwc = &event->hw;
+ +
+ +                      if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
+ +                              continue;
+ +
+ +                      __x86_pmu_disable(event, cpuc);
+ +
+ +                      hwc->idx = -1;
+ +              }
+ +
+ +              for (i = 0; i < cpuc->n_events; i++) {
+ +
+ +                      event = cpuc->event_list[i];
+ +                      hwc = &event->hw;
+ +
+ +                      if (hwc->idx == -1) {
+ +                              x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+ +                              x86_perf_event_set_period(event, hwc, hwc->idx);
+ +                      }
+ +                      /*
+ +                       * need to mark as active because x86_pmu_disable()
+ +                       * clear active_mask and eventsp[] yet it preserves
+ +                       * idx
+ +                       */
+ +                      set_bit(hwc->idx, cpuc->active_mask);
+ +                      cpuc->events[hwc->idx] = event;
+ +
+ +                      x86_pmu.enable(hwc, hwc->idx);
+ +                      perf_event_update_userpage(event);
+ +              }
+ +              cpuc->n_added = 0;
+ +              perf_events_lapic_init();
+ +      }
+ +
+ +      cpuc->enabled = 1;
+ +      barrier();
+ +
         x86_pmu.enable_all();
   }
   
@@@ -1679,6 -1343,13 +1679,13 @@@ intel_pmu_enable_fixed(struct hw_perf_e
                 bits |= 0x2;
         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
                 bits |= 0x1;
+ 
+       /*
+        * ANY bit is supported in v3 and up
+        */
+       if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+               bits |= 0x4;
+ 
         bits <<= (idx * 4);
         mask = 0xfULL << (idx * 4);
   
@@@ -1727,40 -1398,148 +1734,40 @@@ static void amd_pmu_enable_event(struc
                 x86_pmu_enable_event(hwc, idx);
   }
   
- -static int fixed_mode_idx(struct hw_perf_event *hwc)
- -{
- -      unsigned int hw_event;
- -
- -      hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
- -
- -      if (unlikely((hw_event ==
- -                    x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
- -                   (hwc->sample_period == 1)))
- -              return X86_PMC_IDX_FIXED_BTS;
- -
- -      if (!x86_pmu.num_events_fixed)
- -              return -1;
- -
- -      /*
- -       * fixed counters do not take all possible filters
- -       */
- -      if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
- -              return -1;
- -
- -      if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
- -              return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- -      if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
- -              return X86_PMC_IDX_FIXED_CPU_CYCLES;
- -      if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
- -              return X86_PMC_IDX_FIXED_BUS_CYCLES;
- -
- -      return -1;
- -}
- -
- -/*
- - * generic counter allocator: get next free counter
- - */
- -static int
- -gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
- -{
- -      int idx;
- -
- -      idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
- -      return idx == x86_pmu.num_events ? -1 : idx;
- -}
- -
   /*
- - * intel-specific counter allocator: check event constraints
- - */
- -static int
- -intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
- -{
- -      const struct event_constraint *event_constraint;
- -      int i, code;
- -
- -      if (!event_constraints)
- -              goto skip;
- -
- -      code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
- -
- -      for_each_event_constraint(event_constraint, event_constraints) {
- -              if (code == event_constraint->code) {
- -                      for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
- -                              if (!test_and_set_bit(i, cpuc->used_mask))
- -                                      return i;
- -                      }
- -                      return -1;
- -              }
- -      }
- -skip:
- -      return gen_get_event_idx(cpuc, hwc);
- -}
- -
- -static int
- -x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
- -{
- -      int idx;
- -
- -      idx = fixed_mode_idx(hwc);
- -      if (idx == X86_PMC_IDX_FIXED_BTS) {
- -              /* BTS is already occupied. */
- -              if (test_and_set_bit(idx, cpuc->used_mask))
- -                      return -EAGAIN;
- -
- -              hwc->config_base        = 0;
- -              hwc->event_base         = 0;
- -              hwc->idx                = idx;
- -      } else if (idx >= 0) {
- -              /*
- -               * Try to get the fixed event, if that is already taken
- -               * then try to get a generic event:
- -               */
- -              if (test_and_set_bit(idx, cpuc->used_mask))
- -                      goto try_generic;
- -
- -              hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- -              /*
- -               * We set it so that event_base + idx in wrmsr/rdmsr maps to
- -               * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- -               */
- -              hwc->event_base =
- -                      MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
- -              hwc->idx = idx;
- -      } else {
- -              idx = hwc->idx;
- -              /* Try to get the previous generic event again */
- -              if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
- -try_generic:
- -                      idx = x86_pmu.get_event_idx(cpuc, hwc);
- -                      if (idx == -1)
- -                              return -EAGAIN;
- -
- -                      set_bit(idx, cpuc->used_mask);
- -                      hwc->idx = idx;
- -              }
- -              hwc->config_base = x86_pmu.eventsel;
- -              hwc->event_base  = x86_pmu.perfctr;
- -      }
- -
- -      return idx;
- -}
- -
- -/*
- - * Find a PMC slot for the freshly enabled / scheduled in event:
+ + * activate a single event
+ + *
+ + * The event is added to the group of enabled events
+ + * but only if it can be scehduled with existing events.
+ + *
+ + * Called with PMU disabled. If successful and return value 1,
+ + * then guaranteed to call perf_enable() and hw_perf_enable()
    */
   static int x86_pmu_enable(struct perf_event *event)
   {
         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- -      struct hw_perf_event *hwc = &event->hw;
- -      int idx;
- -
- -      idx = x86_schedule_event(cpuc, hwc);
- -      if (idx < 0)
- -              return idx;
- -
- -      perf_events_lapic_init();
+ +      struct hw_perf_event *hwc;
+ +      int assign[X86_PMC_IDX_MAX];
+ +      int n, n0, ret;
   
- -      x86_pmu.disable(hwc, idx);
+ +      hwc = &event->hw;
   
- -      cpuc->events[idx] = event;
- -      set_bit(idx, cpuc->active_mask);
+ +      n0 = cpuc->n_events;
+ +      n = collect_events(cpuc, event, false);
+ +      if (n < 0)
+ +              return n;
   
- -      x86_perf_event_set_period(event, hwc, idx);
- -      x86_pmu.enable(hwc, idx);
+ +      ret = x86_schedule_events(cpuc, n, assign);
+ +      if (ret)
+ +              return ret;
+ +      /*
+ +       * copy new assignment, now we know it is possible
+ +       * will be used by hw_perf_enable()
+ +       */
+ +      memcpy(cpuc->assign, assign, n*sizeof(int));
   
- -      perf_event_update_userpage(event);
+ +      cpuc->n_events = n;
+ +      cpuc->n_added  = n - n0;
   
         return 0;
   }
@@@ -1804,7 -1583,7 +1811,7 @@@ void perf_event_print_debug(void
                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
         }
- -      pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+ +      pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
   
         for (idx = 0; idx < x86_pmu.num_events; idx++) {
                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@@ -1888,8 -1667,9 +1895,8 @@@ static void intel_pmu_drain_bts_buffer(
         event->pending_kill = POLL_IN;
   }
   
- -static void x86_pmu_disable(struct perf_event *event)
+ +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
   {
- -      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
         struct hw_perf_event *hwc = &event->hw;
         int idx = hwc->idx;
   
@@@ -1900,6 -1680,12 +1907,6 @@@
         clear_bit(idx, cpuc->active_mask);
         x86_pmu.disable(hwc, idx);
   
- -      /*
- -       * Make sure the cleared pointer becomes visible before we
- -       * (potentially) free the event:
- -       */
- -      barrier();
- -
         /*
          * Drain the remaining delta count out of a event
          * that we are disabling:
@@@ -1911,28 -1697,8 +1918,28 @@@
                 intel_pmu_drain_bts_buffer(cpuc);
   
         cpuc->events[idx] = NULL;
- -      clear_bit(idx, cpuc->used_mask);
+ +}
+ +
+ +static void x86_pmu_disable(struct perf_event *event)
+ +{
+ +      struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ +      int i;
+ +
+ +      __x86_pmu_disable(event, cpuc);
+ +
+ +      for (i = 0; i < cpuc->n_events; i++) {
+ +              if (event == cpuc->event_list[i]) {
   
+ +                      if (x86_pmu.put_event_constraints)
+ +                              x86_pmu.put_event_constraints(cpuc, event);
+ +
+ +                      while (++i < cpuc->n_events)
+ +                              cpuc->event_list[i-1] = cpuc->event_list[i];
+ +
+ +                      --cpuc->n_events;
+ +                      break;
+ +              }
+ +      }
         perf_event_update_userpage(event);
   }
   
@@@ -2203,162 -1969,6 +2210,162 @@@ perf_event_nmi_handler(struct notifier_
         return NOTIFY_STOP;
   }
   
+ +static struct event_constraint unconstrained;
+ +
+ +static struct event_constraint bts_constraint =
+ +      EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+ +
+ +static struct event_constraint *
+ +intel_special_constraints(struct perf_event *event)
+ +{
+ +      unsigned int hw_event;
+ +
+ +      hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+ +
+ +      if (unlikely((hw_event ==
+ +                    x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+ +                   (event->hw.sample_period == 1))) {
+ +
+ +              return &bts_constraint;
+ +      }
+ +      return NULL;
+ +}
+ +
+ +static struct event_constraint *
+ +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+ +{
+ +      struct event_constraint *c;
+ +
+ +      c = intel_special_constraints(event);
+ +      if (c)
+ +              return c;
+ +
+ +      if (x86_pmu.event_constraints) {
+ +              for_each_event_constraint(c, x86_pmu.event_constraints) {
+ +                      if ((event->hw.config & c->cmask) == c->code)
+ +                              return c;
+ +              }
+ +      }
+ +
+ +      return &unconstrained;
+ +}
+ +
+ +static struct event_constraint *
+ +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+ +{
+ +      return &unconstrained;
+ +}
+ +
+ +static int x86_event_sched_in(struct perf_event *event,
+ +                        struct perf_cpu_context *cpuctx, int cpu)
+ +{
+ +      int ret = 0;
+ +
+ +      event->state = PERF_EVENT_STATE_ACTIVE;
+ +      event->oncpu = cpu;
+ +      event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+ +
+ +      if (!is_x86_event(event))
+ +              ret = event->pmu->enable(event);
+ +
+ +      if (!ret && !is_software_event(event))
+ +              cpuctx->active_oncpu++;
+ +
+ +      if (!ret && event->attr.exclusive)
+ +              cpuctx->exclusive = 1;
+ +
+ +      return ret;
+ +}
+ +
+ +static void x86_event_sched_out(struct perf_event *event,
+ +                          struct perf_cpu_context *cpuctx, int cpu)
+ +{
+ +      event->state = PERF_EVENT_STATE_INACTIVE;
+ +      event->oncpu = -1;
+ +
+ +      if (!is_x86_event(event))
+ +              event->pmu->disable(event);
+ +
+ +      event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
+ +
+ +      if (!is_software_event(event))
+ +              cpuctx->active_oncpu--;
+ +
+ +      if (event->attr.exclusive || !cpuctx->active_oncpu)
+ +              cpuctx->exclusive = 0;
+ +}
+ +
+ +/*
+ + * Called to enable a whole group of events.
+ + * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ + * Assumes the caller has disabled interrupts and has
+ + * frozen the PMU with hw_perf_save_disable.
+ + *
+ + * called with PMU disabled. If successful and return value 1,
+ + * then guaranteed to call perf_enable() and hw_perf_enable()
+ + */
+ +int hw_perf_group_sched_in(struct perf_event *leader,
+ +             struct perf_cpu_context *cpuctx,
+ +             struct perf_event_context *ctx, int cpu)
+ +{
+ +      struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ +      struct perf_event *sub;
+ +      int assign[X86_PMC_IDX_MAX];
+ +      int n0, n1, ret;
+ +
+ +      /* n0 = total number of events */
+ +      n0 = collect_events(cpuc, leader, true);
+ +      if (n0 < 0)
+ +              return n0;
+ +
+ +      ret = x86_schedule_events(cpuc, n0, assign);
+ +      if (ret)
+ +              return ret;
+ +
+ +      ret = x86_event_sched_in(leader, cpuctx, cpu);
+ +      if (ret)
+ +              return ret;
+ +
+ +      n1 = 1;
+ +      list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ +              if (sub->state > PERF_EVENT_STATE_OFF) {
+ +                      ret = x86_event_sched_in(sub, cpuctx, cpu);
+ +                      if (ret)
+ +                              goto undo;
+ +                      ++n1;
+ +              }
+ +      }
+ +      /*
+ +       * copy new assignment, now we know it is possible
+ +       * will be used by hw_perf_enable()
+ +       */
+ +      memcpy(cpuc->assign, assign, n0*sizeof(int));
+ +
+ +      cpuc->n_events  = n0;
+ +      cpuc->n_added   = n1;
+ +      ctx->nr_active += n1;
+ +
+ +      /*
+ +       * 1 means successful and events are active
+ +       * This is not quite true because we defer
+ +       * actual activation until hw_perf_enable() but
+ +       * this way we* ensure caller won't try to enable
+ +       * individual events
+ +       */
+ +      return 1;
+ +undo:
+ +      x86_event_sched_out(leader, cpuctx, cpu);
+ +      n0  = 1;
+ +      list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ +              if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+ +                      x86_event_sched_out(sub, cpuctx, cpu);
+ +                      if (++n0 == n1)
+ +                              break;
+ +              }
+ +      }
+ +      return ret;
+ +}
+ +
   static __read_mostly struct notifier_block perf_event_nmi_notifier = {
         .notifier_call          = perf_event_nmi_handler,
         .next                   = NULL,
@@@ -2390,8 -2000,7 +2397,8 @@@ static __initconst struct x86_pmu p6_pm
          */
         .event_bits             = 32,
         .event_mask             = (1ULL << 32) - 1,
- -      .get_event_idx          = intel_get_event_idx,
+ +      .get_event_constraints  = intel_get_event_constraints,
+ +      .event_constraints      = intel_p6_event_constraints
   };
   
   static __initconst struct x86_pmu intel_pmu = {
@@@ -2415,7 -2024,7 +2422,7 @@@
         .max_period             = (1ULL << 31) - 1,
         .enable_bts             = intel_pmu_enable_bts,
         .disable_bts            = intel_pmu_disable_bts,
- -      .get_event_idx          = intel_get_event_idx,
+ +      .get_event_constraints  = intel_get_event_constraints
   };
   
   static __initconst struct x86_pmu amd_pmu = {
@@@ -2436,7 -2045,7 +2443,7 @@@
         .apic                   = 1,
         /* use highest bit to detect overflow */
         .max_period             = (1ULL << 47) - 1,
- -      .get_event_idx          = gen_get_event_idx,
+ +      .get_event_constraints  = amd_get_event_constraints
   };
   
   static __init int p6_pmu_init(void)
@@@ -2449,9 -2058,12 +2456,9 @@@
         case 7:
         case 8:
         case 11: /* Pentium III */
- -              event_constraints = intel_p6_event_constraints;
- -              break;
         case 9:
         case 13:
                 /* Pentium M */
- -              event_constraints = intel_p6_event_constraints;
                 break;
         default:
                 pr_cont("unsupported p6 CPU model %d ",
@@@ -2516,40 -2128,23 +2523,40 @@@ static __init int intel_pmu_init(void
                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
+ +              x86_pmu.event_constraints = intel_core_event_constraints;
                 pr_cont("Core2 events, ");
- -              event_constraints = intel_core_event_constraints;
                 break;
- -      default:
- -      case 26:
+ +
+ +      case 26: /* 45 nm nehalem, "Bloomfield" */
+ +      case 30: /* 45 nm nehalem, "Lynnfield" */
                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
- -              event_constraints = intel_nehalem_event_constraints;
+ +              x86_pmu.event_constraints = intel_nehalem_event_constraints;
                 pr_cont("Nehalem/Corei7 events, ");
                 break;
         case 28:
                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
   
+ +              x86_pmu.event_constraints = intel_gen_event_constraints;
                 pr_cont("Atom events, ");
                 break;
+ +
+ +      case 37: /* 32 nm nehalem, "Clarkdale" */
+ +      case 44: /* 32 nm nehalem, "Gulftown" */
+ +              memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+ +                     sizeof(hw_cache_event_ids));
+ +
+ +              x86_pmu.event_constraints = intel_westmere_event_constraints;
+ +              pr_cont("Westmere events, ");
+ +              break;
+ +      default:
+ +              /*
+ +               * default constraints for v2 and up
+ +               */
+ +              x86_pmu.event_constraints = intel_gen_event_constraints;
+ +              pr_cont("generic architected perfmon, ");
         }
         return 0;
   }
@@@ -2625,9 -2220,6 +2632,9 @@@ void __init init_hw_perf_events(void
         perf_events_lapic_init();
         register_die_notifier(&perf_event_nmi_notifier);
   
+ +      unconstrained = (struct event_constraint)
+ +              EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
+ +
         pr_info("... version:                %d\n",     x86_pmu.version);
         pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
         pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
@@@ -2649,73 -2241,46 +2656,73 @@@ static const struct pmu pmu = 
         .unthrottle     = x86_pmu_unthrottle,
   };
   
- -static int
- -validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
- -{
- -      struct hw_perf_event fake_event = event->hw;
- -
- -      if (event->pmu && event->pmu != &pmu)
- -              return 0;
- -
- -      return x86_schedule_event(cpuc, &fake_event) >= 0;
- -}
- -
+ +/*
+ + * validate a single event group
+ + *
+ + * validation include:
+ + *    - check events are compatible which each other
+ + *    - events do not compete for the same counter
+ + *    - number of events <= number of counters
+ + *
+ + * validation ensures the group can be loaded onto the
+ + * PMU if it was the only group available.
+ + */
   static int validate_group(struct perf_event *event)
   {
- -      struct perf_event *sibling, *leader = event->group_leader;
- -      struct cpu_hw_events fake_pmu;
+ +      struct perf_event *leader = event->group_leader;
+ +      struct cpu_hw_events *fake_cpuc;
+ +      int ret, n;
   
- -      memset(&fake_pmu, 0, sizeof(fake_pmu));
+ +      ret = -ENOMEM;
+ +      fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+ +      if (!fake_cpuc)
+ +              goto out;
   
- -      if (!validate_event(&fake_pmu, leader))
- -              return -ENOSPC;
+ +      /*
+ +       * the event is not yet connected with its
+ +       * siblings therefore we must first collect
+ +       * existing siblings, then add the new event
+ +       * before we can simulate the scheduling
+ +       */
+ +      ret = -ENOSPC;
+ +      n = collect_events(fake_cpuc, leader, true);
+ +      if (n < 0)
+ +              goto out_free;
   
- -      list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
- -              if (!validate_event(&fake_pmu, sibling))
- -                      return -ENOSPC;
- -      }
+ +      fake_cpuc->n_events = n;
+ +      n = collect_events(fake_cpuc, event, false);
+ +      if (n < 0)
+ +              goto out_free;
   
- -      if (!validate_event(&fake_pmu, event))
- -              return -ENOSPC;
+ +      fake_cpuc->n_events = n;
   
- -      return 0;
+ +      ret = x86_schedule_events(fake_cpuc, n, NULL);
+ +
+ +out_free:
+ +      kfree(fake_cpuc);
+ +out:
+ +      return ret;
   }
   
   const struct pmu *hw_perf_event_init(struct perf_event *event)
   {
+ +      const struct pmu *tmp;
         int err;
   
         err = __hw_perf_event_init(event);
         if (!err) {
+ +              /*
+ +               * we temporarily connect event to its pmu
+ +               * such that validate_group() can classify
+ +               * it as an x86 event using is_x86_event()
+ +               */
+ +              tmp = event->pmu;
+ +              event->pmu = &pmu;
+ +
                 if (event->group_leader != event)
                         err = validate_group(event);
+ +
+ +              event->pmu = tmp;
         }
         if (err) {
                 if (event->destroy)
@@@ -2739,6 -2304,7 +2746,6 @@@ void callchain_store(struct perf_callch
   
   static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
   static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
- -static DEFINE_PER_CPU(int, in_ignored_frame);
   
   
   static void
@@@ -2754,6 -2320,10 +2761,6 @@@ static void backtrace_warning(void *dat
   
   static int backtrace_stack(void *data, char *name)
   {
- -      per_cpu(in_ignored_frame, smp_processor_id()) =
- -                      x86_is_stack_id(NMI_STACK, name) ||
- -                      x86_is_stack_id(DEBUG_STACK, name);
- -
         return 0;
   }
   
@@@ -2761,6 -2331,9 +2768,6 @@@ static void backtrace_address(void *dat
   {
         struct perf_callchain_entry *entry = data;
   
- -      if (per_cpu(in_ignored_frame, smp_processor_id()))
- -              return;
- -
         if (reliable)
                 callchain_store(entry, addr);
   }
@@@ -2867,6 -2440,9 +2874,6 @@@ perf_do_callchain(struct pt_regs *regs
   
         is_user = user_mode(regs);
   
- -      if (!current || current->pid == 0)
- -              return;
- -
         if (is_user && current->state != TASK_RUNNING)
                 return;
   
diff --combined include/linux/perf_event.h

index 953c17731e0d05b2cd2d75ace3d3a2616b1d5c5f,8fa71874113f326106bc79ea699910f75b6c3d9e..556b0f4a668ea0f838d0f86e912bf692ba3788dd
--- 1/include/linux/perf_event.h
--- 2/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@@ -290,7 -290,7 +290,7 @@@ struct perf_event_mmap_page 
   };
   
   #define PERF_RECORD_MISC_CPUMODE_MASK         (3 << 0)
- -#define PERF_RECORD_MISC_CPUMODE_UNKNOWN              (0 << 0)
+ +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN      (0 << 0)
   #define PERF_RECORD_MISC_KERNEL                       (1 << 0)
   #define PERF_RECORD_MISC_USER                 (2 << 0)
   #define PERF_RECORD_MISC_HYPERVISOR           (3 << 0)
@@@ -356,8 -356,8 +356,8 @@@ enum perf_event_type 
          *      u64                             stream_id;
          * };
          */
- -      PERF_RECORD_THROTTLE            = 5,
- -      PERF_RECORD_UNTHROTTLE          = 6,
+ +      PERF_RECORD_THROTTLE                    = 5,
+ +      PERF_RECORD_UNTHROTTLE                  = 6,
   
         /*
          * struct {
@@@ -371,10 -371,10 +371,10 @@@
   
         /*
          * struct {
- -       *      struct perf_event_header        header;
- -       *      u32                             pid, tid;
+ +       *      struct perf_event_header        header;
+ +       *      u32                             pid, tid;
          *
- -       *      struct read_format              values;
+ +       *      struct read_format              values;
          * };
          */
         PERF_RECORD_READ                        = 8,
@@@ -412,7 -412,7 +412,7 @@@
          *        char                  data[size];}&& PERF_SAMPLE_RAW
          * };
          */
- -      PERF_RECORD_SAMPLE              = 9,
+ +      PERF_RECORD_SAMPLE                      = 9,
   
         PERF_RECORD_MAX,                        /* non-ABI */
   };
@@@ -498,8 -498,9 +498,8 @@@ struct hw_perf_event 
         atomic64_t                      period_left;
         u64                             interrupts;
   
- -      u64                             freq_count;
- -      u64                             freq_interrupts;
- -      u64                             freq_stamp;
+ +      u64                             freq_time_stamp;
+ +      u64                             freq_count_stamp;
   #endif
   };
   
@@@ -564,10 -565,6 +564,10 @@@ typedef void (*perf_overflow_handler_t)
                                         struct perf_sample_data *,
                                         struct pt_regs *regs);
   
+ +enum perf_group_flag {
+ +      PERF_GROUP_SOFTWARE = 0x1,
+ +};
+ +
   /**
    * struct perf_event - performance event kernel representation:
    */
@@@ -577,7 -574,6 +577,7 @@@ struct perf_event 
         struct list_head                event_entry;
         struct list_head                sibling_list;
         int                             nr_siblings;
+ +      int                             group_flags;
         struct perf_event               *group_leader;
         struct perf_event               *output;
         const struct pmu                *pmu;
@@@ -662,7 -658,7 +662,7 @@@
   
         perf_overflow_handler_t         overflow_handler;
   
- -#ifdef CONFIG_EVENT_PROFILE
+ +#ifdef CONFIG_EVENT_TRACING
         struct event_filter             *filter;
   #endif
   
@@@ -687,8 -683,7 +687,8 @@@ struct perf_event_context 
          */
         struct mutex                    mutex;
   
- -      struct list_head                group_list;
+ +      struct list_head                pinned_groups;
+ +      struct list_head                flexible_groups;
         struct list_head                event_list;
         int                             nr_events;
         int                             nr_active;
@@@ -751,9 -746,10 +751,9 @@@ extern int perf_max_events
   
   extern const struct pmu *hw_perf_event_init(struct perf_event *event);
   
- -extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
- -extern void perf_event_task_sched_out(struct task_struct *task,
- -                                      struct task_struct *next, int cpu);
- -extern void perf_event_task_tick(struct task_struct *task, int cpu);
+ +extern void perf_event_task_sched_in(struct task_struct *task);
+ +extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
+ +extern void perf_event_task_tick(struct task_struct *task);
   extern int perf_event_init_task(struct task_struct *child);
   extern void perf_event_exit_task(struct task_struct *child);
   extern void perf_event_free_task(struct task_struct *task);
@@@ -818,9 -814,14 +818,14 @@@ extern int perf_event_overflow(struct p
    */
   static inline int is_software_event(struct perf_event *event)
   {
-       return (event->attr.type != PERF_TYPE_RAW) &&
-               (event->attr.type != PERF_TYPE_HARDWARE) &&
-               (event->attr.type != PERF_TYPE_HW_CACHE);
+       switch (event->attr.type) {
+       case PERF_TYPE_SOFTWARE:
+       case PERF_TYPE_TRACEPOINT:
+       /* for now the breakpoint stuff also works as software event */
+       case PERF_TYPE_BREAKPOINT:
+               return 1;
+       }
+       return 0;
   }
   
   extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
@@@ -852,7 -853,8 +857,7 @@@ extern int sysctl_perf_event_mlock
   extern int sysctl_perf_event_sample_rate;
   
   extern void perf_event_init(void);
- -extern void perf_tp_event(int event_id, u64 addr, u64 count,
- -                               void *record, int entry_size);
+ +extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
   extern void perf_bp_event(struct perf_event *event, void *data);
   
   #ifndef perf_misc_flags
@@@ -873,12 -875,12 +878,12 @@@ extern void perf_event_enable(struct pe
   extern void perf_event_disable(struct perf_event *event);
   #else
   static inline void
- -perf_event_task_sched_in(struct task_struct *task, int cpu)           { }
+ +perf_event_task_sched_in(struct task_struct *task)                    { }
   static inline void
   perf_event_task_sched_out(struct task_struct *task,
- -                          struct task_struct *next, int cpu)          { }
+ +                          struct task_struct *next)                   { }
   static inline void
- -perf_event_task_tick(struct task_struct *task, int cpu)                       { }
+ +perf_event_task_tick(struct task_struct *task)                                { }
   static inline int perf_event_init_task(struct task_struct *child)     { return 0; }
   static inline void perf_event_exit_task(struct task_struct *child)    { }
   static inline void perf_event_free_task(struct task_struct *task)     { }
@@@ -893,13 -895,13 +898,13 @@@ static inline voi
   perf_sw_event(u32 event_id, u64 nr, int nmi,
                      struct pt_regs *regs, u64 addr)                    { }
   static inline void
- -perf_bp_event(struct perf_event *event, void *data)           { }
+ +perf_bp_event(struct perf_event *event, void *data)                   { }
   
   static inline void perf_event_mmap(struct vm_area_struct *vma)                { }
   static inline void perf_event_comm(struct task_struct *tsk)           { }
   static inline void perf_event_fork(struct task_struct *tsk)           { }
   static inline void perf_event_init(void)                              { }
- -static inline int  perf_swevent_get_recursion_context(void)  { return -1; }
+ +static inline int  perf_swevent_get_recursion_context(void)           { return -1; }
   static inline void perf_swevent_put_recursion_context(int rctx)               { }
   static inline void perf_event_enable(struct perf_event *event)                { }
   static inline void perf_event_disable(struct perf_event *event)               { }
diff --combined kernel/perf_event.c

index 53dc2a362111d4bc82d98045dae12163e7d18320,d27746bd3a06097af5635539b690fe272b78cf2c..40f8b07c56011811e49c0535d4e1db126afd4b0d
--- 1/kernel/perf_event.c
--- 2/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@@ -289,15 -289,6 +289,15 @@@ static void update_event_times(struct p
         event->total_time_running = run_end - event->tstamp_running;
   }
   
+ +static struct list_head *
+ +ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
+ +{
+ +      if (event->attr.pinned)
+ +              return &ctx->pinned_groups;
+ +      else
+ +              return &ctx->flexible_groups;
+ +}
+ +
   /*
    * Add a event from the lists for its context.
    * Must be called with ctx->mutex and ctx->lock held.
@@@ -312,19 -303,9 +312,19 @@@ list_add_event(struct perf_event *event
          * add it straight to the context's event list, or to the group
          * leader's sibling list:
          */
- -      if (group_leader == event)
- -              list_add_tail(&event->group_entry, &ctx->group_list);
- -      else {
+ +      if (group_leader == event) {
+ +              struct list_head *list;
+ +
+ +              if (is_software_event(event))
+ +                      event->group_flags |= PERF_GROUP_SOFTWARE;
+ +
+ +              list = ctx_group_list(event, ctx);
+ +              list_add_tail(&event->group_entry, list);
+ +      } else {
+ +              if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+ +                  !is_software_event(event))
+ +                      group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+ +
                 list_add_tail(&event->group_entry, &group_leader->sibling_list);
                 group_leader->nr_siblings++;
         }
@@@ -374,14 -355,9 +374,14 @@@ list_del_event(struct perf_event *event
          * to the context list directly:
          */
         list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+ +              struct list_head *list;
   
- -              list_move_tail(&sibling->group_entry, &ctx->group_list);
+ +              list = ctx_group_list(event, ctx);
+ +              list_move_tail(&sibling->group_entry, list);
                 sibling->group_leader = sibling;
+ +
+ +              /* Inherit group flags from the previous leader */
+ +              sibling->group_flags = event->group_flags;
         }
   }
   
@@@ -709,6 -685,24 +709,6 @@@ group_error
         return -EAGAIN;
   }
   
- -/*
- - * Return 1 for a group consisting entirely of software events,
- - * 0 if the group contains any hardware events.
- - */
- -static int is_software_only_group(struct perf_event *leader)
- -{
- -      struct perf_event *event;
- -
- -      if (!is_software_event(leader))
- -              return 0;
- -
- -      list_for_each_entry(event, &leader->sibling_list, group_entry)
- -              if (!is_software_event(event))
- -                      return 0;
- -
- -      return 1;
- -}
- -
   /*
    * Work out whether we can put this event group on the CPU now.
    */
@@@ -719,7 -713,7 +719,7 @@@ static int group_can_go_on(struct perf_
         /*
          * Groups consisting entirely of software events can always go on.
          */
- -      if (is_software_only_group(event))
+ +      if (event->group_flags & PERF_GROUP_SOFTWARE)
                 return 1;
         /*
          * If an exclusive group is already on, no other hardware
@@@ -1049,15 -1043,8 +1049,15 @@@ static int perf_event_refresh(struct pe
         return 0;
   }
   
- -void __perf_event_sched_out(struct perf_event_context *ctx,
- -                            struct perf_cpu_context *cpuctx)
+ +enum event_type_t {
+ +      EVENT_FLEXIBLE = 0x1,
+ +      EVENT_PINNED = 0x2,
+ +      EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+ +};
+ +
+ +static void ctx_sched_out(struct perf_event_context *ctx,
+ +                        struct perf_cpu_context *cpuctx,
+ +                        enum event_type_t event_type)
   {
         struct perf_event *event;
   
@@@ -1068,18 -1055,10 +1068,18 @@@
         update_context_time(ctx);
   
         perf_disable();
- -      if (ctx->nr_active) {
- -              list_for_each_entry(event, &ctx->group_list, group_entry)
+ +      if (!ctx->nr_active)
+ +              goto out_enable;
+ +
+ +      if (event_type & EVENT_PINNED)
+ +              list_for_each_entry(event, &ctx->pinned_groups, group_entry)
                         group_sched_out(event, cpuctx, ctx);
- -      }
+ +
+ +      if (event_type & EVENT_FLEXIBLE)
+ +              list_for_each_entry(event, &ctx->flexible_groups, group_entry)
+ +                      group_sched_out(event, cpuctx, ctx);
+ +
+ + out_enable:
         perf_enable();
    out:
         raw_spin_unlock(&ctx->lock);
@@@ -1191,9 -1170,9 +1191,9 @@@ static void perf_event_sync_stat(struc
    * not restart the event.
    */
   void perf_event_task_sched_out(struct task_struct *task,
- -                               struct task_struct *next, int cpu)
+ +                               struct task_struct *next)
   {
- -      struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ +      struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
         struct perf_event_context *ctx = task->perf_event_ctxp;
         struct perf_event_context *next_ctx;
         struct perf_event_context *parent;
@@@ -1241,13 -1220,15 +1241,13 @@@
         rcu_read_unlock();
   
         if (do_switch) {
- -              __perf_event_sched_out(ctx, cpuctx);
+ +              ctx_sched_out(ctx, cpuctx, EVENT_ALL);
                 cpuctx->task_ctx = NULL;
         }
   }
   
- -/*
- - * Called with IRQs disabled
- - */
- -static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+ +static void task_ctx_sched_out(struct perf_event_context *ctx,
+ +                             enum event_type_t event_type)
   {
         struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
   
@@@ -1257,36 -1238,41 +1257,36 @@@
         if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
                 return;
   
- -      __perf_event_sched_out(ctx, cpuctx);
+ +      ctx_sched_out(ctx, cpuctx, event_type);
         cpuctx->task_ctx = NULL;
   }
   
   /*
    * Called with IRQs disabled
    */
- -static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
+ +static void __perf_event_task_sched_out(struct perf_event_context *ctx)
   {
- -      __perf_event_sched_out(&cpuctx->ctx, cpuctx);
+ +      task_ctx_sched_out(ctx, EVENT_ALL);
+ +}
+ +
+ +/*
+ + * Called with IRQs disabled
+ + */
+ +static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ +                            enum event_type_t event_type)
+ +{
+ +      ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
   }
   
   static void
- -__perf_event_sched_in(struct perf_event_context *ctx,
- -                      struct perf_cpu_context *cpuctx, int cpu)
+ +ctx_pinned_sched_in(struct perf_event_context *ctx,
+ +                  struct perf_cpu_context *cpuctx,
+ +                  int cpu)
   {
         struct perf_event *event;
- -      int can_add_hw = 1;
- -
- -      raw_spin_lock(&ctx->lock);
- -      ctx->is_active = 1;
- -      if (likely(!ctx->nr_events))
- -              goto out;
- -
- -      ctx->timestamp = perf_clock();
   
- -      perf_disable();
- -
- -      /*
- -       * First go through the list and put on any pinned groups
- -       * in order to give them the best chance of going on.
- -       */
- -      list_for_each_entry(event, &ctx->group_list, group_entry) {
- -              if (event->state <= PERF_EVENT_STATE_OFF ||
- -                  !event->attr.pinned)
+ +      list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ +              if (event->state <= PERF_EVENT_STATE_OFF)
                         continue;
                 if (event->cpu != -1 && event->cpu != cpu)
                         continue;
@@@ -1303,20 -1289,16 +1303,20 @@@
                         event->state = PERF_EVENT_STATE_ERROR;
                 }
         }
+ +}
   
- -      list_for_each_entry(event, &ctx->group_list, group_entry) {
- -              /*
- -               * Ignore events in OFF or ERROR state, and
- -               * ignore pinned events since we did them already.
- -               */
- -              if (event->state <= PERF_EVENT_STATE_OFF ||
- -                  event->attr.pinned)
- -                      continue;
+ +static void
+ +ctx_flexible_sched_in(struct perf_event_context *ctx,
+ +                    struct perf_cpu_context *cpuctx,
+ +                    int cpu)
+ +{
+ +      struct perf_event *event;
+ +      int can_add_hw = 1;
   
+ +      list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ +              /* Ignore events in OFF or ERROR state */
+ +              if (event->state <= PERF_EVENT_STATE_OFF)
+ +                      continue;
                 /*
                  * Listen to the 'cpu' scheduling filter constraint
                  * of events:
@@@ -1328,61 -1310,11 +1328,61 @@@
                         if (group_sched_in(event, cpuctx, ctx, cpu))
                                 can_add_hw = 0;
         }
+ +}
+ +
+ +static void
+ +ctx_sched_in(struct perf_event_context *ctx,
+ +           struct perf_cpu_context *cpuctx,
+ +           enum event_type_t event_type)
+ +{
+ +      int cpu = smp_processor_id();
+ +
+ +      raw_spin_lock(&ctx->lock);
+ +      ctx->is_active = 1;
+ +      if (likely(!ctx->nr_events))
+ +              goto out;
+ +
+ +      ctx->timestamp = perf_clock();
+ +
+ +      perf_disable();
+ +
+ +      /*
+ +       * First go through the list and put on any pinned groups
+ +       * in order to give them the best chance of going on.
+ +       */
+ +      if (event_type & EVENT_PINNED)
+ +              ctx_pinned_sched_in(ctx, cpuctx, cpu);
+ +
+ +      /* Then walk through the lower prio flexible groups */
+ +      if (event_type & EVENT_FLEXIBLE)
+ +              ctx_flexible_sched_in(ctx, cpuctx, cpu);
+ +
         perf_enable();
    out:
         raw_spin_unlock(&ctx->lock);
   }
   
+ +static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+ +                           enum event_type_t event_type)
+ +{
+ +      struct perf_event_context *ctx = &cpuctx->ctx;
+ +
+ +      ctx_sched_in(ctx, cpuctx, event_type);
+ +}
+ +
+ +static void task_ctx_sched_in(struct task_struct *task,
+ +                            enum event_type_t event_type)
+ +{
+ +      struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ +      struct perf_event_context *ctx = task->perf_event_ctxp;
+ +
+ +      if (likely(!ctx))
+ +              return;
+ +      if (cpuctx->task_ctx == ctx)
+ +              return;
+ +      ctx_sched_in(ctx, cpuctx, event_type);
+ +      cpuctx->task_ctx = ctx;
+ +}
   /*
    * Called from scheduler to add the events of the current task
    * with interrupts disabled.
@@@ -1394,112 -1326,38 +1394,112 @@@
    * accessing the event control register. If a NMI hits, then it will
    * keep the event running.
    */
- -void perf_event_task_sched_in(struct task_struct *task, int cpu)
+ +void perf_event_task_sched_in(struct task_struct *task)
   {
- -      struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ +      struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
         struct perf_event_context *ctx = task->perf_event_ctxp;
   
         if (likely(!ctx))
                 return;
+ +
         if (cpuctx->task_ctx == ctx)
                 return;
- -      __perf_event_sched_in(ctx, cpuctx, cpu);
- -      cpuctx->task_ctx = ctx;
- -}
   
- -static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
- -{
- -      struct perf_event_context *ctx = &cpuctx->ctx;
+ +      /*
+ +       * We want to keep the following priority order:
+ +       * cpu pinned (that don't need to move), task pinned,
+ +       * cpu flexible, task flexible.
+ +       */
+ +      cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
   
- -      __perf_event_sched_in(ctx, cpuctx, cpu);
+ +      ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
+ +      cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
+ +      ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
+ +
+ +      cpuctx->task_ctx = ctx;
   }
   
   #define MAX_INTERRUPTS (~0ULL)
   
   static void perf_log_throttle(struct perf_event *event, int enable);
   
- -static void perf_adjust_period(struct perf_event *event, u64 events)
+ +static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
+ +{
+ +      u64 frequency = event->attr.sample_freq;
+ +      u64 sec = NSEC_PER_SEC;
+ +      u64 divisor, dividend;
+ +
+ +      int count_fls, nsec_fls, frequency_fls, sec_fls;
+ +
+ +      count_fls = fls64(count);
+ +      nsec_fls = fls64(nsec);
+ +      frequency_fls = fls64(frequency);
+ +      sec_fls = 30;
+ +
+ +      /*
+ +       * We got @count in @nsec, with a target of sample_freq HZ
+ +       * the target period becomes:
+ +       *
+ +       *             @count * 10^9
+ +       * period = -------------------
+ +       *          @nsec * sample_freq
+ +       *
+ +       */
+ +
+ +      /*
+ +       * Reduce accuracy by one bit such that @a and @b converge
+ +       * to a similar magnitude.
+ +       */
+ +#define REDUCE_FLS(a, b)              \
+ +do {                                  \
+ +      if (a##_fls > b##_fls) {        \
+ +              a >>= 1;                \
+ +              a##_fls--;              \
+ +      } else {                        \
+ +              b >>= 1;                \
+ +              b##_fls--;              \
+ +      }                               \
+ +} while (0)
+ +
+ +      /*
+ +       * Reduce accuracy until either term fits in a u64, then proceed with
+ +       * the other, so that finally we can do a u64/u64 division.
+ +       */
+ +      while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
+ +              REDUCE_FLS(nsec, frequency);
+ +              REDUCE_FLS(sec, count);
+ +      }
+ +
+ +      if (count_fls + sec_fls > 64) {
+ +              divisor = nsec * frequency;
+ +
+ +              while (count_fls + sec_fls > 64) {
+ +                      REDUCE_FLS(count, sec);
+ +                      divisor >>= 1;
+ +              }
+ +
+ +              dividend = count * sec;
+ +      } else {
+ +              dividend = count * sec;
+ +
+ +              while (nsec_fls + frequency_fls > 64) {
+ +                      REDUCE_FLS(nsec, frequency);
+ +                      dividend >>= 1;
+ +              }
+ +
+ +              divisor = nsec * frequency;
+ +      }
+ +
+ +      return div64_u64(dividend, divisor);
+ +}
+ +
+ +static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
   {
         struct hw_perf_event *hwc = &event->hw;
         u64 period, sample_period;
         s64 delta;
   
- -      events *= hwc->sample_period;
- -      period = div64_u64(events, event->attr.sample_freq);
+ +      period = perf_calculate_period(event, nsec, count);
   
         delta = (s64)(period - hwc->sample_period);
         delta = (delta + 7) / 8; /* low pass filter */
@@@ -1510,22 -1368,13 +1510,22 @@@
                 sample_period = 1;
   
         hwc->sample_period = sample_period;
+ +
+ +      if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+ +              perf_disable();
+ +              event->pmu->disable(event);
+ +              atomic64_set(&hwc->period_left, 0);
+ +              event->pmu->enable(event);
+ +              perf_enable();
+ +      }
   }
   
   static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
   {
         struct perf_event *event;
         struct hw_perf_event *hwc;
- -      u64 interrupts, freq;
+ +      u64 interrupts, now;
+ +      s64 delta;
   
         raw_spin_lock(&ctx->lock);
         list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
@@@ -1546,18 -1395,44 +1546,18 @@@
                 if (interrupts == MAX_INTERRUPTS) {
                         perf_log_throttle(event, 1);
                         event->pmu->unthrottle(event);
- -                      interrupts = 2*sysctl_perf_event_sample_rate/HZ;
                 }
   
                 if (!event->attr.freq || !event->attr.sample_freq)
                         continue;
   
- -              /*
- -               * if the specified freq < HZ then we need to skip ticks
- -               */
- -              if (event->attr.sample_freq < HZ) {
- -                      freq = event->attr.sample_freq;
- -
- -                      hwc->freq_count += freq;
- -                      hwc->freq_interrupts += interrupts;
- -
- -                      if (hwc->freq_count < HZ)
- -                              continue;
- -
- -                      interrupts = hwc->freq_interrupts;
- -                      hwc->freq_interrupts = 0;
- -                      hwc->freq_count -= HZ;
- -              } else
- -                      freq = HZ;
- -
- -              perf_adjust_period(event, freq * interrupts);
+ +              event->pmu->read(event);
+ +              now = atomic64_read(&event->count);
+ +              delta = now - hwc->freq_count_stamp;
+ +              hwc->freq_count_stamp = now;
   
- -              /*
- -               * In order to avoid being stalled by an (accidental) huge
- -               * sample period, force reset the sample period if we didn't
- -               * get any events in this freq period.
- -               */
- -              if (!interrupts) {
- -                      perf_disable();
- -                      event->pmu->disable(event);
- -                      atomic64_set(&hwc->period_left, 0);
- -                      event->pmu->enable(event);
- -                      perf_enable();
- -              }
+ +              if (delta > 0)
+ +                      perf_adjust_period(event, TICK_NSEC, delta);
         }
         raw_spin_unlock(&ctx->lock);
   }
@@@ -1567,22 -1442,26 +1567,22 @@@
    */
   static void rotate_ctx(struct perf_event_context *ctx)
   {
- -      struct perf_event *event;
- -
         if (!ctx->nr_events)
                 return;
   
         raw_spin_lock(&ctx->lock);
- -      /*
- -       * Rotate the first entry last (works just fine for group events too):
- -       */
+ +
+ +      /* Rotate the first entry last of non-pinned groups */
         perf_disable();
- -      list_for_each_entry(event, &ctx->group_list, group_entry) {
- -              list_move_tail(&event->group_entry, &ctx->group_list);
- -              break;
- -      }
+ +
+ +      list_rotate_left(&ctx->flexible_groups);
+ +
         perf_enable();
   
         raw_spin_unlock(&ctx->lock);
   }
   
- -void perf_event_task_tick(struct task_struct *curr, int cpu)
+ +void perf_event_task_tick(struct task_struct *curr)
   {
         struct perf_cpu_context *cpuctx;
         struct perf_event_context *ctx;
@@@ -1590,39 -1469,24 +1590,39 @@@
         if (!atomic_read(&nr_events))
                 return;
   
- -      cpuctx = &per_cpu(perf_cpu_context, cpu);
+ +      cpuctx = &__get_cpu_var(perf_cpu_context);
         ctx = curr->perf_event_ctxp;
   
         perf_ctx_adjust_freq(&cpuctx->ctx);
         if (ctx)
                 perf_ctx_adjust_freq(ctx);
   
- -      perf_event_cpu_sched_out(cpuctx);
+ +      cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
         if (ctx)
- -              __perf_event_task_sched_out(ctx);
+ +              task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
   
         rotate_ctx(&cpuctx->ctx);
         if (ctx)
                 rotate_ctx(ctx);
   
- -      perf_event_cpu_sched_in(cpuctx, cpu);
+ +      cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
         if (ctx)
- -              perf_event_task_sched_in(curr, cpu);
+ +              task_ctx_sched_in(curr, EVENT_FLEXIBLE);
+ +}
+ +
+ +static int event_enable_on_exec(struct perf_event *event,
+ +                              struct perf_event_context *ctx)
+ +{
+ +      if (!event->attr.enable_on_exec)
+ +              return 0;
+ +
+ +      event->attr.enable_on_exec = 0;
+ +      if (event->state >= PERF_EVENT_STATE_INACTIVE)
+ +              return 0;
+ +
+ +      __perf_event_mark_enabled(event, ctx);
+ +
+ +      return 1;
   }
   
   /*
@@@ -1635,7 -1499,6 +1635,7 @@@ static void perf_event_enable_on_exec(s
         struct perf_event *event;
         unsigned long flags;
         int enabled = 0;
+ +      int ret;
   
         local_irq_save(flags);
         ctx = task->perf_event_ctxp;
@@@ -1646,16 -1509,14 +1646,16 @@@
   
         raw_spin_lock(&ctx->lock);
   
- -      list_for_each_entry(event, &ctx->group_list, group_entry) {
- -              if (!event->attr.enable_on_exec)
- -                      continue;
- -              event->attr.enable_on_exec = 0;
- -              if (event->state >= PERF_EVENT_STATE_INACTIVE)
- -                      continue;
- -              __perf_event_mark_enabled(event, ctx);
- -              enabled = 1;
+ +      list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ +              ret = event_enable_on_exec(event, ctx);
+ +              if (ret)
+ +                      enabled = 1;
+ +      }
+ +
+ +      list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ +              ret = event_enable_on_exec(event, ctx);
+ +              if (ret)
+ +                      enabled = 1;
         }
   
         /*
@@@ -1666,7 -1527,7 +1666,7 @@@
   
         raw_spin_unlock(&ctx->lock);
   
- -      perf_event_task_sched_in(task, smp_processor_id());
+ +      perf_event_task_sched_in(task);
    out:
         local_irq_restore(flags);
   }
@@@ -1729,8 -1590,7 +1729,8 @@@ __perf_event_init_context(struct perf_e
   {
         raw_spin_lock_init(&ctx->lock);
         mutex_init(&ctx->mutex);
- -      INIT_LIST_HEAD(&ctx->group_list);
+ +      INIT_LIST_HEAD(&ctx->pinned_groups);
+ +      INIT_LIST_HEAD(&ctx->flexible_groups);
         INIT_LIST_HEAD(&ctx->event_list);
         atomic_set(&ctx->refcount, 1);
         ctx->task = task;
@@@ -3408,6 -3268,9 +3408,9 @@@ static void perf_event_task_output(stru
   
   static int perf_event_task_match(struct perf_event *event)
   {
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return 0;
+ 
         if (event->cpu != -1 && event->cpu != smp_processor_id())
                 return 0;
   
@@@ -3517,6 -3380,9 +3520,9 @@@ static void perf_event_comm_output(stru
   
   static int perf_event_comm_match(struct perf_event *event)
   {
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return 0;
+ 
         if (event->cpu != -1 && event->cpu != smp_processor_id())
                 return 0;
   
@@@ -3634,6 -3500,9 +3640,9 @@@ static void perf_event_mmap_output(stru
   static int perf_event_mmap_match(struct perf_event *event,
                                    struct perf_mmap_event *mmap_event)
   {
+       if (event->state != PERF_EVENT_STATE_ACTIVE)
+               return 0;
+ 
         if (event->cpu != -1 && event->cpu != smp_processor_id())
                 return 0;
   
@@@ -3820,12 -3689,12 +3829,12 @@@ static int __perf_event_overflow(struc
   
         if (event->attr.freq) {
                 u64 now = perf_clock();
- -              s64 delta = now - hwc->freq_stamp;
+ +              s64 delta = now - hwc->freq_time_stamp;
   
- -              hwc->freq_stamp = now;
+ +              hwc->freq_time_stamp = now;
   
- -              if (delta > 0 && delta < TICK_NSEC)
- -                      perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+ +              if (delta > 0 && delta < 2*TICK_NSEC)
+ +                      perf_adjust_period(event, delta, hwc->last_period);
         }
   
         /*
@@@ -4316,7 -4185,7 +4325,7 @@@ static const struct pmu perf_ops_task_c
         .read           = task_clock_perf_event_read,
   };
   
- -#ifdef CONFIG_EVENT_PROFILE
+ +#ifdef CONFIG_EVENT_TRACING
   
   void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
                           int entry_size)
@@@ -4421,7 -4290,7 +4430,7 @@@ static void perf_event_free_filter(stru
   {
   }
   
- -#endif /* CONFIG_EVENT_PROFILE */
+ +#endif /* CONFIG_EVENT_TRACING */
   
   #ifdef CONFIG_HAVE_HW_BREAKPOINT
   static void bp_perf_event_destroy(struct perf_event *event)
@@@ -5002,15 -4871,8 +5011,15 @@@ inherit_event(struct perf_event *parent
         else
                 child_event->state = PERF_EVENT_STATE_OFF;
   
- -      if (parent_event->attr.freq)
- -              child_event->hw.sample_period = parent_event->hw.sample_period;
+ +      if (parent_event->attr.freq) {
+ +              u64 sample_period = parent_event->hw.sample_period;
+ +              struct hw_perf_event *hwc = &child_event->hw;
+ +
+ +              hwc->sample_period = sample_period;
+ +              hwc->last_period   = sample_period;
+ +
+ +              atomic64_set(&hwc->period_left, sample_period);
+ +      }
   
         child_event->overflow_handler = parent_event->overflow_handler;
   
@@@ -5178,11 -5040,7 +5187,11 @@@ void perf_event_exit_task(struct task_s
         mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
   
   again:
- -      list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+ +      list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
+ +                               group_entry)
+ +              __perf_event_exit_task(child_event, child_ctx, child);
+ +
+ +      list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
                                  group_entry)
                 __perf_event_exit_task(child_event, child_ctx, child);
   
@@@ -5191,8 -5049,7 +5200,8 @@@
          * its siblings to the list, but we obtained 'tmp' before that which
          * will still point to the list head terminating the iteration.
          */
- -      if (!list_empty(&child_ctx->group_list))
+ +      if (!list_empty(&child_ctx->pinned_groups) ||
+ +          !list_empty(&child_ctx->flexible_groups))
                 goto again;
   
         mutex_unlock(&child_ctx->mutex);
@@@ -5200,24 -5057,6 +5209,24 @@@
         put_ctx(child_ctx);
   }
   
+ +static void perf_free_event(struct perf_event *event,
+ +                          struct perf_event_context *ctx)
+ +{
+ +      struct perf_event *parent = event->parent;
+ +
+ +      if (WARN_ON_ONCE(!parent))
+ +              return;
+ +
+ +      mutex_lock(&parent->child_mutex);
+ +      list_del_init(&event->child_list);
+ +      mutex_unlock(&parent->child_mutex);
+ +
+ +      fput(parent->filp);
+ +
+ +      list_del_event(event, ctx);
+ +      free_event(event);
+ +}
+ +
   /*
    * free an unexposed, unused context as created by inheritance by
    * init_task below, used by fork() in case of fail.
@@@ -5232,70 -5071,36 +5241,70 @@@ void perf_event_free_task(struct task_s
   
         mutex_lock(&ctx->mutex);
   again:
- -      list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
- -              struct perf_event *parent = event->parent;
+ +      list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ +              perf_free_event(event, ctx);
   
- -              if (WARN_ON_ONCE(!parent))
- -                      continue;
+ +      list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
+ +                               group_entry)
+ +              perf_free_event(event, ctx);
   
- -              mutex_lock(&parent->child_mutex);
- -              list_del_init(&event->child_list);
- -              mutex_unlock(&parent->child_mutex);
+ +      if (!list_empty(&ctx->pinned_groups) ||
+ +          !list_empty(&ctx->flexible_groups))
+ +              goto again;
   
- -              fput(parent->filp);
+ +      mutex_unlock(&ctx->mutex);
   
- -              list_del_event(event, ctx);
- -              free_event(event);
+ +      put_ctx(ctx);
+ +}
+ +
+ +static int
+ +inherit_task_group(struct perf_event *event, struct task_struct *parent,
+ +                 struct perf_event_context *parent_ctx,
+ +                 struct task_struct *child,
+ +                 int *inherited_all)
+ +{
+ +      int ret;
+ +      struct perf_event_context *child_ctx = child->perf_event_ctxp;
+ +
+ +      if (!event->attr.inherit) {
+ +              *inherited_all = 0;
+ +              return 0;
         }
   
- -      if (!list_empty(&ctx->group_list))
- -              goto again;
+ +      if (!child_ctx) {
+ +              /*
+ +               * This is executed from the parent task context, so
+ +               * inherit events that have been marked for cloning.
+ +               * First allocate and initialize a context for the
+ +               * child.
+ +               */
   
- -      mutex_unlock(&ctx->mutex);
+ +              child_ctx = kzalloc(sizeof(struct perf_event_context),
+ +                                  GFP_KERNEL);
+ +              if (!child_ctx)
+ +                      return -ENOMEM;
   
- -      put_ctx(ctx);
+ +              __perf_event_init_context(child_ctx, child);
+ +              child->perf_event_ctxp = child_ctx;
+ +              get_task_struct(child);
+ +      }
+ +
+ +      ret = inherit_group(event, parent, parent_ctx,
+ +                          child, child_ctx);
+ +
+ +      if (ret)
+ +              *inherited_all = 0;
+ +
+ +      return ret;
   }
   
+ +
   /*
    * Initialize the perf_event context in task_struct
    */
   int perf_event_init_task(struct task_struct *child)
   {
- -      struct perf_event_context *child_ctx = NULL, *parent_ctx;
+ +      struct perf_event_context *child_ctx, *parent_ctx;
         struct perf_event_context *cloned_ctx;
         struct perf_event *event;
         struct task_struct *parent = current;
@@@ -5333,22 -5138,41 +5342,22 @@@
          * We dont have to disable NMIs - we are only looking at
          * the list, not manipulating it:
          */
- -      list_for_each_entry(event, &parent_ctx->group_list, group_entry) {
- -
- -              if (!event->attr.inherit) {
- -                      inherited_all = 0;
- -                      continue;
- -              }
- -
- -              if (!child->perf_event_ctxp) {
- -                      /*
- -                       * This is executed from the parent task context, so
- -                       * inherit events that have been marked for cloning.
- -                       * First allocate and initialize a context for the
- -                       * child.
- -                       */
- -
- -                      child_ctx = kzalloc(sizeof(struct perf_event_context),
- -                                          GFP_KERNEL);
- -                      if (!child_ctx) {
- -                              ret = -ENOMEM;
- -                              break;
- -                      }
- -
- -                      __perf_event_init_context(child_ctx, child);
- -                      child->perf_event_ctxp = child_ctx;
- -                      get_task_struct(child);
- -              }
+ +      list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
+ +              ret = inherit_task_group(event, parent, parent_ctx, child,
+ +                                       &inherited_all);
+ +              if (ret)
+ +                      break;
+ +      }
   
- -              ret = inherit_group(event, parent, parent_ctx,
- -                                           child, child_ctx);
- -              if (ret) {
- -                      inherited_all = 0;
+ +      list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
+ +              ret = inherit_task_group(event, parent, parent_ctx, child,
+ +                                       &inherited_all);
+ +              if (ret)
                         break;
- -              }
         }
   
+ +      child_ctx = child->perf_event_ctxp;
+ +
         if (child_ctx && inherited_all) {
                 /*
                  * Mark the child context as a clone of the parent
@@@ -5397,9 -5221,7 +5406,9 @@@ static void __perf_event_exit_cpu(void 
         struct perf_event_context *ctx = &cpuctx->ctx;
         struct perf_event *event, *tmp;
   
- -      list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+ +      list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ +              __perf_event_remove_from_context(event);
+ +      list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
                 __perf_event_remove_from_context(event);
   }
   static void perf_event_exit_cpu(int cpu)
diff --combined kernel/sched.c

index c3ad3427a2a554e9b3043385fc6e15f9b2122296,4508fe7048be5d91ee5a01daef3fe9de8d37b909..7266b912139fa955a164af796426800a733d8c32
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -2783,13 -2783,7 +2783,13 @@@ static void finish_task_switch(struct r
          */
         prev_state = prev->state;
         finish_arch_switch(prev);
- -      perf_event_task_sched_in(current, cpu_of(rq));
+ +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ +      local_irq_disable();
+ +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+ +      perf_event_task_sched_in(current);
+ +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ +      local_irq_enable();
+ +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
         finish_lock_switch(rq, prev);
   
         fire_sched_in_preempt_notifiers(current);
@@@ -5304,7 -5298,7 +5304,7 @@@ void scheduler_tick(void
         curr->sched_class->task_tick(rq, curr, 0);
         raw_spin_unlock(&rq->lock);
   
- -      perf_event_task_tick(curr, cpu);
+ +      perf_event_task_tick(curr);
   
   #ifdef CONFIG_SMP
         rq->idle_at_tick = idle_cpu(cpu);
@@@ -5518,7 -5512,7 +5518,7 @@@ need_resched_nonpreemptible
   
         if (likely(prev != next)) {
                 sched_info_switch(prev, next);
- -              perf_event_task_sched_out(prev, next, cpu);
+ +              perf_event_task_sched_out(prev, next);
   
                 rq->nr_switches++;
                 rq->curr = next;
@@@ -5536,8 -5530,11 +5536,11 @@@
   
         post_schedule(rq);
   
-       if (unlikely(reacquire_kernel_lock(current) < 0))
+       if (unlikely(reacquire_kernel_lock(current) < 0)) {
+               prev = rq->curr;
+               switch_count = &prev->nivcsw;
                 goto need_resched_nonpreemptible;
+       }
   
         preempt_enable_no_resched();
         if (need_resched())
diff --combined kernel/trace/trace_events_filter.c

index 74563d7e102e5b3beb5d1feae9e86f2ddc510dc0,e42af9aad69fc3b321ded8531d8792c80cb9b33b..4615f62a04f185bf5d38b89162ca223884954385
--- 1/kernel/trace/trace_events_filter.c
--- 2/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@@ -211,8 -211,9 +211,9 @@@ static int filter_pred_pchar(struct fil
   {
         char **addr = (char **)(event + pred->offset);
         int cmp, match;
+       int len = strlen(*addr) + 1;    /* including tailing '\0' */
   
-       cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
+       cmp = pred->regex.match(*addr, &pred->regex, len);
   
         match = cmp ^ pred->not;
   
@@@ -251,7 -252,18 +252,18 @@@ static int filter_pred_none(struct filt
         return 0;
   }
   
- /* Basic regex callbacks */
+ /*
+  * regex_match_foo - Basic regex callbacks
+  *
+  * @str: the string to be searched
+  * @r:   the regex structure containing the pattern string
+  * @len: the length of the string to be searched (including '\0')
+  *
+  * Note:
+  * - @str might not be NULL-terminated if it's of type DYN_STRING
+  *   or STATIC_STRING
+  */
+ 
   static int regex_match_full(char *str, struct regex *r, int len)
   {
         if (strncmp(str, r->pattern, len) == 0)
@@@ -261,23 -273,24 +273,24 @@@
   
   static int regex_match_front(char *str, struct regex *r, int len)
   {
-       if (strncmp(str, r->pattern, len) == 0)
+       if (strncmp(str, r->pattern, r->len) == 0)
                 return 1;
         return 0;
   }
   
   static int regex_match_middle(char *str, struct regex *r, int len)
   {
-       if (strstr(str, r->pattern))
+       if (strnstr(str, r->pattern, len))
                 return 1;
         return 0;
   }
   
   static int regex_match_end(char *str, struct regex *r, int len)
   {
-       char *ptr = strstr(str, r->pattern);
+       int strlen = len - 1;
   
-       if (ptr && (ptr[r->len] == 0))
+       if (strlen >= r->len &&
+           memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
                 return 1;
         return 0;
   }
@@@ -781,10 -794,8 +794,8 @@@ static int filter_add_pred(struct filte
                         pred->regex.field_len = field->size;
                 } else if (field->filter_type == FILTER_DYN_STRING)
                         fn = filter_pred_strloc;
-               else {
+               else
                         fn = filter_pred_pchar;
-                       pred->regex.field_len = strlen(pred->regex.pattern);
-               }
         } else {
                 if (field->is_signed)
                         ret = strict_strtoll(pred->regex.pattern, 0, &val);
@@@ -1360,7 -1371,7 +1371,7 @@@ out_unlock
         return err;
   }
   
- -#ifdef CONFIG_EVENT_PROFILE
+ +#ifdef CONFIG_PERF_EVENTS
   
   void ftrace_profile_free_filter(struct perf_event *event)
   {
@@@ -1428,5 -1439,5 +1439,5 @@@ out_unlock
         return err;
   }
   
- -#endif /* CONFIG_EVENT_PROFILE */
+ +#endif /* CONFIG_PERF_EVENTS */
   
diff --combined tools/perf/builtin-kmem.c

index 38b8ca900eda1522e769ec31dbf0c25c1ab3f283,93c67bf53d2c96e391a649989ab6597728063b1c..5d5dc6b096172eedc07baf0db7f8c7a36ee2887f
--- 1/tools/perf/builtin-kmem.c
--- 2/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@@ -92,18 -92,23 +92,18 @@@ static void setup_cpunode_map(void
         if (!dir1)
                 return;
   
- -      while (true) {
- -              dent1 = readdir(dir1);
- -              if (!dent1)
- -                      break;
- -
- -              if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ +      while ((dent1 = readdir(dir1)) != NULL) {
+ +              if (dent1->d_type != DT_DIR ||
+ +                  sscanf(dent1->d_name, "node%u", &mem) < 1)
                         continue;
   
                 snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
                 dir2 = opendir(buf);
                 if (!dir2)
                         continue;
- -              while (true) {
- -                      dent2 = readdir(dir2);
- -                      if (!dent2)
- -                              break;
- -                      if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ +              while ((dent2 = readdir(dir2)) != NULL) {
+ +                      if (dent2->d_type != DT_LNK ||
+ +                          sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
                                 continue;
                         cpunode_map[cpu] = mem;
                 }
@@@ -316,8 -321,11 +316,8 @@@ static int process_sample_event(event_
   
         event__parse_sample(event, session->sample_type, &data);
   
- -      dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- -              event->header.misc,
- -              data.pid, data.tid,
- -              (void *)(long)data.ip,
- -              (long long)data.period);
+ +      dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ +                  data.pid, data.tid, data.ip, data.period);
   
         thread = perf_session__findnew(session, event->ip.pid);
         if (thread == NULL) {
@@@ -334,9 -342,22 +334,9 @@@
         return 0;
   }
   
- -static int sample_type_check(struct perf_session *session)
- -{
- -      if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- -              fprintf(stderr,
- -                      "No trace sample to read. Did you call perf record "
- -                      "without -R?");
- -              return -1;
- -      }
- -
- -      return 0;
- -}
- -
   static struct perf_event_ops event_ops = {
- -      .process_sample_event   = process_sample_event,
- -      .process_comm_event     = event__process_comm,
- -      .sample_type_check      = sample_type_check,
+ +      .sample = process_sample_event,
+ +      .comm   = event__process_comm,
   };
   
   static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@@ -354,7 -375,7 +354,7 @@@ static void __print_result(struct rb_ro
   
         printf("%.102s\n", graph_dotted_line);
         printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
-       printf(" Total_alloc/Per | Total_req/Per   | Hit   | Ping-pong | Frag\n");
+       printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
         printf("%.102s\n", graph_dotted_line);
   
         next = rb_first(root);
@@@ -380,7 -401,7 +380,7 @@@
                         snprintf(buf, sizeof(buf), "%#Lx", addr);
                 printf(" %-34s |", buf);
   
-               printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+               printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
                        (unsigned long long)data->bytes_alloc,
                        (unsigned long)data->bytes_alloc / data->hit,
                        (unsigned long long)data->bytes_req,
@@@ -483,14 -504,11 +483,14 @@@ static void sort_result(void
   
   static int __cmd_kmem(void)
   {
- -      int err;
+ +      int err = -EINVAL;
         struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
         if (session == NULL)
                 return -ENOMEM;
   
+ +      if (!perf_session__has_traces(session, "kmem record"))
+ +              goto out_delete;
+ +
         setup_pager();
         err = perf_session__process_events(session, &event_ops);
         if (err != 0)
@@@ -766,7 -784,8 +766,8 @@@ int cmd_kmem(int argc, const char **arg
                         setup_sorting(&alloc_sort, default_sort_order);
   
                 return __cmd_kmem();
-       }
+       } else
+               usage_with_options(kmem_usage, kmem_options);
   
         return 0;
   }
diff --combined tools/perf/builtin-report.c

index 4c3d6997995b8a535b9c30cec063080d1d2ceb3f,860f1eeeea7dbf8e43779308eaaffb1dbcf79d10..cfc655d40bb7b4a88f37c068aef908e76e408a4c
--- 1/tools/perf/builtin-report.c
--- 2/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@@ -34,8 -34,6 +34,8 @@@
   static char           const *input_name = "perf.data";
   
   static int            force;
+ +static bool           hide_unresolved;
+ +static bool           dont_use_callchains;
   
   static int            show_threads;
   static struct perf_read_values        show_threads_values;
@@@ -93,8 -91,11 +93,8 @@@ static int process_sample_event(event_
   
         event__parse_sample(event, session->sample_type, &data);
   
- -      dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- -              event->header.misc,
- -              data.pid, data.tid,
- -              (void *)(long)data.ip,
- -              (long long)data.period);
+ +      dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ +                  data.pid, data.tid, data.ip, data.period);
   
         if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
                 unsigned int i;
@@@ -120,7 -121,7 +120,7 @@@
                 return -1;
         }
   
- -      if (al.filtered)
+ +      if (al.filtered || (hide_unresolved && al.sym == NULL))
                 return 0;
   
         if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) {
@@@ -155,14 -156,14 +155,14 @@@ static int process_read_event(event_t *
         return 0;
   }
   
- -static int sample_type_check(struct perf_session *session)
+ +static int perf_session__setup_sample_type(struct perf_session *self)
   {
- -      if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ +      if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
                 if (sort__has_parent) {
                         fprintf(stderr, "selected --sort parent, but no"
                                         " callchain data. Did you call"
                                         " perf record without -g?\n");
- -                      return -1;
+ +                      return -EINVAL;
                 }
                 if (symbol_conf.use_callchain) {
                         fprintf(stderr, "selected -g but no callchain data."
@@@ -170,13 -171,12 +170,13 @@@
                                         " -g?\n");
                         return -1;
                 }
- -      } else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) {
+ +      } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
+ +                 !symbol_conf.use_callchain) {
                         symbol_conf.use_callchain = true;
                         if (register_callchain_param(&callchain_param) < 0) {
                                 fprintf(stderr, "Can't register callchain"
                                                 " params\n");
- -                              return -1;
+ +                              return -EINVAL;
                         }
         }
   
@@@ -184,18 -184,20 +184,18 @@@
   }
   
   static struct perf_event_ops event_ops = {
- -      .process_sample_event   = process_sample_event,
- -      .process_mmap_event     = event__process_mmap,
- -      .process_comm_event     = event__process_comm,
- -      .process_exit_event     = event__process_task,
- -      .process_fork_event     = event__process_task,
- -      .process_lost_event     = event__process_lost,
- -      .process_read_event     = process_read_event,
- -      .sample_type_check      = sample_type_check,
+ +      .sample = process_sample_event,
+ +      .mmap   = event__process_mmap,
+ +      .comm   = event__process_comm,
+ +      .exit   = event__process_task,
+ +      .fork   = event__process_task,
+ +      .lost   = event__process_lost,
+ +      .read   = process_read_event,
   };
   
- -
   static int __cmd_report(void)
   {
- -      int ret;
+ +      int ret = -EINVAL;
         struct perf_session *session;
   
         session = perf_session__new(input_name, O_RDONLY, force);
@@@ -205,10 -207,6 +205,10 @@@
         if (show_threads)
                 perf_read_values_init(&show_threads_values);
   
+ +      ret = perf_session__setup_sample_type(session);
+ +      if (ret)
+ +              goto out_delete;
+ +
         ret = perf_session__process_events(session, &event_ops);
         if (ret)
                 goto out_delete;
@@@ -245,19 -243,11 +245,19 @@@ out_delete
   
   static int
   parse_callchain_opt(const struct option *opt __used, const char *arg,
- -                  int unset __used)
+ +                  int unset)
   {
         char *tok;
         char *endptr;
   
+ +      /*
+ +       * --no-call-graph
+ +       */
+ +      if (unset) {
+ +              dont_use_callchains = true;
+ +              return 0;
+ +      }
+ +
         symbol_conf.use_callchain = true;
   
         if (!arg)
@@@ -279,7 -269,7 +279,7 @@@
   
         else if (!strncmp(tok, "none", strlen(arg))) {
                 callchain_param.mode = CHAIN_NONE;
-               symbol_conf.use_callchain = true;
+               symbol_conf.use_callchain = false;
   
                 return 0;
         }
@@@ -329,7 -319,7 +329,7 @@@ static const struct option options[] = 
                    "pretty printing style key: normal raw"),
         OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
                    "sort by key(s): pid, comm, dso, symbol, parent"),
- -      OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+ +      OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
                     "Don't shorten the pathnames taking into account the cwd"),
         OPT_STRING('p', "parent", &parent_pattern, "regex",
                    "regex filter to identify parent, see: '--sort parent'"),
@@@ -350,8 -340,6 +350,8 @@@
         OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
                    "separator for columns, no spaces will be added between "
                    "columns '.' is reserved."),
+ +      OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
+ +                  "Only display entries resolved to a symbol"),
         OPT_END()
   };
   
diff --combined tools/perf/builtin-timechart.c

index 5b68d81d93a11f591bd0e0e947b6d79b2372e4e0,3f8bbcfb1e9bcd0fed97470a38e918f9b7677230..0d4d8ff7914b029423dba4e742c7343925519043
--- 1/tools/perf/builtin-timechart.c
--- 2/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@@ -280,7 -280,7 +280,7 @@@ static u64 cpus_pstate_state[MAX_CPUS]
   
   static int process_comm_event(event_t *event, struct perf_session *session __used)
   {
-       pid_set_comm(event->comm.pid, event->comm.comm);
+       pid_set_comm(event->comm.tid, event->comm.comm);
         return 0;
   }
   
@@@ -1029,24 -1029,33 +1029,24 @@@ static void process_samples(struct perf
         }
   }
   
- -static int sample_type_check(struct perf_session *session)
- -{
- -      if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- -              fprintf(stderr, "No trace samples found in the file.\n"
- -                              "Have you used 'perf timechart record' to record it?\n");
- -              return -1;
- -      }
- -
- -      return 0;
- -}
- -
   static struct perf_event_ops event_ops = {
- -      .process_comm_event     = process_comm_event,
- -      .process_fork_event     = process_fork_event,
- -      .process_exit_event     = process_exit_event,
- -      .process_sample_event   = queue_sample_event,
- -      .sample_type_check      = sample_type_check,
+ +      .comm   = process_comm_event,
+ +      .fork   = process_fork_event,
+ +      .exit   = process_exit_event,
+ +      .sample = queue_sample_event,
   };
   
   static int __cmd_timechart(void)
   {
         struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
- -      int ret;
+ +      int ret = -EINVAL;
   
         if (session == NULL)
                 return -ENOMEM;
   
+ +      if (!perf_session__has_traces(session, "timechart record"))
+ +              goto out_delete;
+ +
         ret = perf_session__process_events(session, &event_ops);
         if (ret)
                 goto out_delete;
author	Ingo Molnar <mingo@elte.hu>
	Fri, 29 Jan 2010 08:24:57 +0000 (09:24 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 29 Jan 2010 09:36:22 +0000 (10:36 +0100)
		1	2
arch/x86/include/asm/perf_event.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/perf_event.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/perf_event.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/perf_event.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace_events_filter.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/perf/builtin-kmem.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/perf/builtin-report.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/perf/builtin-timechart.c	patch \|	diff1 \|	diff2 \|	blob \| history