#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22)
+ #define ARCH_PERFMON_EVENTSEL_ANY (1 << 21)
#define ARCH_PERFMON_EVENTSEL_INT (1 << 20)
#define ARCH_PERFMON_EVENTSEL_OS (1 << 17)
#define ARCH_PERFMON_EVENTSEL_USR (1 << 16)
/*
* Includes eventsel and unit mask as well:
*/
-#define ARCH_PERFMON_EVENT_MASK 0xffff
+
+
+#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL
+#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL
+#define INTEL_ARCH_EDGE_MASK 0x00040000ULL
+#define INTEL_ARCH_INV_MASK 0x00800000ULL
+#define INTEL_ARCH_CNT_MASK 0xFF000000ULL
+#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
/*
* filter mask to validate fixed counter events.
* The other filters are supported by fixed counters.
* The any-thread option is supported starting with v3.
*/
-#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000
+#define INTEL_ARCH_FIXED_MASK \
+ (INTEL_ARCH_CNT_MASK| \
+ INTEL_ARCH_INV_MASK| \
+ INTEL_ARCH_EDGE_MASK|\
+ INTEL_ARCH_UNIT_MASK|\
+ INTEL_ARCH_EVTSEL_MASK)
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ * Copyright (C) 2009 Google, Inc., Stephane Eranian
*
* For licencing details see kernel-base/COPYING
*/
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <linux/cpu.h>
+#include <linux/bitops.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
u64 pebs_event_reset[MAX_PEBS_EVENTS];
};
+struct event_constraint {
+ union {
+ unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ u64 idxmsk64[1];
+ };
+ int code;
+ int cmask;
+ int weight;
+};
+
struct cpu_hw_events {
- struct perf_event *events[X86_PMC_IDX_MAX];
- unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
int enabled;
struct debug_store *ds;
-};
-struct event_constraint {
- unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
- int code;
+ int n_events;
+ int n_added;
+ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+ struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
};
-#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
-#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
+#define EVENT_CONSTRAINT(c, n, m) { \
+ { .idxmsk64[0] = (n) }, \
+ .code = (c), \
+ .cmask = (m), \
+ .weight = HWEIGHT64((u64)(n)), \
+}
+
+#define INTEL_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
+
+#define FIXED_EVENT_CONSTRAINT(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
-#define for_each_event_constraint(e, c) \
- for ((e) = (c); (e)->idxmsk[0]; (e)++)
+#define EVENT_CONSTRAINT_END \
+ EVENT_CONSTRAINT(0, 0, 0)
+#define for_each_event_constraint(e, c) \
+ for ((e) = (c); (e)->cmask; (e)++)
/*
* struct x86_pmu - generic x86 pmu
u64 intel_ctrl;
void (*enable_bts)(u64 config);
void (*disable_bts)(void);
- int (*get_event_idx)(struct cpu_hw_events *cpuc,
- struct hw_perf_event *hwc);
+
+ struct event_constraint *
+ (*get_event_constraints)(struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+
+ void (*put_event_constraints)(struct cpu_hw_events *cpuc,
+ struct perf_event *event);
+ struct event_constraint *event_constraints;
};
static struct x86_pmu x86_pmu __read_mostly;
.enabled = 1,
};
-static const struct event_constraint *event_constraints;
+static int x86_perf_event_set_period(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx);
/*
* Not sure about some of these
return hw_event & P6_EVNTSEL_MASK;
}
-static const struct event_constraint intel_p6_event_constraints[] =
+static struct event_constraint intel_p6_event_constraints[] =
{
- EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
- EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
- EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
+ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
EVENT_CONSTRAINT_END
};
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};
-static const struct event_constraint intel_core_event_constraints[] =
-{
- EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
- EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
- EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
- EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
- EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
- EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
- EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
- EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+static struct event_constraint intel_core_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
+ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
+ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
+ INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
+ INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
+ INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_nehalem_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
+ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
+ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
+ INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
+ INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
+ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
EVENT_CONSTRAINT_END
};
-static const struct event_constraint intel_nehalem_event_constraints[] =
-{
- EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
- EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
- EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
- EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
- EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
- EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
- EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
- EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
- EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
- EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+static struct event_constraint intel_westmere_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_gen_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+ FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
EVENT_CONSTRAINT_END
};
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+static __initconst u64 westmere_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
+ [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
+ [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
+ [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
+ [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
+ [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
+ [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
+ [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static __initconst u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
#define CORE_EVNTSEL_MASK \
- (CORE_EVNTSEL_EVENT_MASK | \
- CORE_EVNTSEL_UNIT_MASK | \
- CORE_EVNTSEL_EDGE_MASK | \
- CORE_EVNTSEL_INV_MASK | \
- CORE_EVNTSEL_REG_MASK)
+ (INTEL_ARCH_EVTSEL_MASK | \
+ INTEL_ARCH_UNIT_MASK | \
+ INTEL_ARCH_EDGE_MASK | \
+ INTEL_ARCH_INV_MASK | \
+ INTEL_ARCH_CNT_MASK)
return hw_event & CORE_EVNTSEL_MASK;
}
static void p6_pmu_disable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 val;
- if (!cpuc->enabled)
- return;
-
- cpuc->enabled = 0;
- barrier();
-
/* p6 only has one enable register */
rdmsrl(MSR_P6_EVNTSEL0, val);
val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (!cpuc->enabled)
- return;
-
- cpuc->enabled = 0;
- barrier();
-
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
- if (!cpuc->enabled)
- return;
-
- cpuc->enabled = 0;
- /*
- * ensure we write the disable before we start disabling the
- * events proper, so that amd_pmu_enable_event() does the
- * right thing.
- */
- barrier();
-
for (idx = 0; idx < x86_pmu.num_events; idx++) {
u64 val;
void hw_perf_disable(void)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
if (!x86_pmu_initialized())
return;
- return x86_pmu.disable_all();
+
+ if (!cpuc->enabled)
+ return;
+
+ cpuc->n_added = 0;
+ cpuc->enabled = 0;
+ barrier();
+
+ x86_pmu.disable_all();
}
static void p6_pmu_enable_all(void)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
unsigned long val;
- if (cpuc->enabled)
- return;
-
- cpuc->enabled = 1;
- barrier();
-
/* p6 only has one enable register */
rdmsrl(MSR_P6_EVNTSEL0, val);
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (cpuc->enabled)
- return;
-
- cpuc->enabled = 1;
- barrier();
-
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
- if (cpuc->enabled)
- return;
-
- cpuc->enabled = 1;
- barrier();
-
for (idx = 0; idx < x86_pmu.num_events; idx++) {
struct perf_event *event = cpuc->events[idx];
u64 val;
}
}
+static const struct pmu pmu;
+
+static inline int is_x86_event(struct perf_event *event)
+{
+ return event->pmu == &pmu;
+}
+
+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+{
+ struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+ unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ int i, j, w, wmax, num = 0;
+ struct hw_perf_event *hwc;
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ for (i = 0; i < n; i++) {
+ constraints[i] =
+ x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
+ }
+
+ /*
+ * fastpath, try to reuse previous register
+ */
+ for (i = 0; i < n; i++) {
+ hwc = &cpuc->event_list[i]->hw;
+ c = constraints[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c->idxmsk))
+ break;
+
+ /* not already used */
+ if (test_bit(hwc->idx, used_mask))
+ break;
+
+ set_bit(hwc->idx, used_mask);
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+ if (i == n)
+ goto done;
+
+ /*
+ * begin slow path
+ */
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ /*
+ * weight = number of possible counters
+ *
+ * 1 = most constrained, only works on one counter
+ * wmax = least constrained, works on any counter
+ *
+ * assign events to counters starting with most
+ * constrained events.
+ */
+ wmax = x86_pmu.num_events;
+
+ /*
+ * when fixed event counters are present,
+ * wmax is incremented by 1 to account
+ * for one more choice
+ */
+ if (x86_pmu.num_events_fixed)
+ wmax++;
+
+ for (w = 1, num = n; num && w <= wmax; w++) {
+ /* for each event */
+ for (i = 0; num && i < n; i++) {
+ c = constraints[i];
+ hwc = &cpuc->event_list[i]->hw;
+
+ if (c->weight != w)
+ continue;
+
+ for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
+ if (!test_bit(j, used_mask))
+ break;
+ }
+
+ if (j == X86_PMC_IDX_MAX)
+ break;
+
+ set_bit(j, used_mask);
+
+ if (assign)
+ assign[i] = j;
+ num--;
+ }
+ }
+done:
+ /*
+ * scheduling failed or is just a simulation,
+ * free resources if necessary
+ */
+ if (!assign || num) {
+ for (i = 0; i < n; i++) {
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+ }
+ }
+ return num ? -ENOSPC : 0;
+}
+
+/*
+ * dogrp: true if must collect siblings events (group)
+ * returns total number of events and error code
+ */
+static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
+
+ /* current number of events already accepted */
+ n = cpuc->n_events;
+
+ if (is_x86_event(leader)) {
+ if (n >= max_count)
+ return -ENOSPC;
+ cpuc->event_list[n] = leader;
+ n++;
+ }
+ if (!dogrp)
+ return n;
+
+ list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ if (!is_x86_event(event) ||
+ event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -ENOSPC;
+
+ cpuc->event_list[n] = event;
+ n++;
+ }
+ return n;
+}
+
+
+static inline void x86_assign_hw_event(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
+{
+ hwc->idx = idx;
+
+ if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+ hwc->config_base = 0;
+ hwc->event_base = 0;
+ } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+ hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ /*
+ * We set it so that event_base + idx in wrmsr/rdmsr maps to
+ * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+ */
+ hwc->event_base =
+ MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ } else {
+ hwc->config_base = x86_pmu.eventsel;
+ hwc->event_base = x86_pmu.perfctr;
+ }
+}
+
+static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc);
+
void hw_perf_enable(void)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
+ int i;
+
if (!x86_pmu_initialized())
return;
+
+ if (cpuc->enabled)
+ return;
+
+ if (cpuc->n_added) {
+ /*
+ * apply assignment obtained either from
+ * hw_perf_group_sched_in() or x86_pmu_enable()
+ *
+ * step1: save events moving to new counters
+ * step2: reprogram moved events into new counters
+ */
+ for (i = 0; i < cpuc->n_events; i++) {
+
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
+ continue;
+
+ __x86_pmu_disable(event, cpuc);
+
+ hwc->idx = -1;
+ }
+
+ for (i = 0; i < cpuc->n_events; i++) {
+
+ event = cpuc->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == -1) {
+ x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+ x86_perf_event_set_period(event, hwc, hwc->idx);
+ }
+ /*
+ * need to mark as active because x86_pmu_disable()
+ * clear active_mask and eventsp[] yet it preserves
+ * idx
+ */
+ set_bit(hwc->idx, cpuc->active_mask);
+ cpuc->events[hwc->idx] = event;
+
+ x86_pmu.enable(hwc, hwc->idx);
+ perf_event_update_userpage(event);
+ }
+ cpuc->n_added = 0;
+ perf_events_lapic_init();
+ }
+
+ cpuc->enabled = 1;
+ barrier();
+
x86_pmu.enable_all();
}
bits |= 0x2;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
bits |= 0x1;
+
+ /*
+ * ANY bit is supported in v3 and up
+ */
+ if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
+ bits |= 0x4;
+
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);
x86_pmu_enable_event(hwc, idx);
}
-static int fixed_mode_idx(struct hw_perf_event *hwc)
-{
- unsigned int hw_event;
-
- hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
- if (unlikely((hw_event ==
- x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
- (hwc->sample_period == 1)))
- return X86_PMC_IDX_FIXED_BTS;
-
- if (!x86_pmu.num_events_fixed)
- return -1;
-
- /*
- * fixed counters do not take all possible filters
- */
- if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK)
- return -1;
-
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
- return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
- return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
- return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
- return -1;
-}
-
-/*
- * generic counter allocator: get next free counter
- */
-static int
-gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- int idx;
-
- idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
- return idx == x86_pmu.num_events ? -1 : idx;
-}
-
/*
- * intel-specific counter allocator: check event constraints
- */
-static int
-intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- const struct event_constraint *event_constraint;
- int i, code;
-
- if (!event_constraints)
- goto skip;
-
- code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
-
- for_each_event_constraint(event_constraint, event_constraints) {
- if (code == event_constraint->code) {
- for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
- if (!test_and_set_bit(i, cpuc->used_mask))
- return i;
- }
- return -1;
- }
- }
-skip:
- return gen_get_event_idx(cpuc, hwc);
-}
-
-static int
-x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
-{
- int idx;
-
- idx = fixed_mode_idx(hwc);
- if (idx == X86_PMC_IDX_FIXED_BTS) {
- /* BTS is already occupied. */
- if (test_and_set_bit(idx, cpuc->used_mask))
- return -EAGAIN;
-
- hwc->config_base = 0;
- hwc->event_base = 0;
- hwc->idx = idx;
- } else if (idx >= 0) {
- /*
- * Try to get the fixed event, if that is already taken
- * then try to get a generic event:
- */
- if (test_and_set_bit(idx, cpuc->used_mask))
- goto try_generic;
-
- hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
- hwc->idx = idx;
- } else {
- idx = hwc->idx;
- /* Try to get the previous generic event again */
- if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
- idx = x86_pmu.get_event_idx(cpuc, hwc);
- if (idx == -1)
- return -EAGAIN;
-
- set_bit(idx, cpuc->used_mask);
- hwc->idx = idx;
- }
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
- }
-
- return idx;
-}
-
-/*
- * Find a PMC slot for the freshly enabled / scheduled in event:
+ * activate a single event
+ *
+ * The event is added to the group of enabled events
+ * but only if it can be scehduled with existing events.
+ *
+ * Called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
*/
static int x86_pmu_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- int idx;
-
- idx = x86_schedule_event(cpuc, hwc);
- if (idx < 0)
- return idx;
-
- perf_events_lapic_init();
+ struct hw_perf_event *hwc;
+ int assign[X86_PMC_IDX_MAX];
+ int n, n0, ret;
- x86_pmu.disable(hwc, idx);
+ hwc = &event->hw;
- cpuc->events[idx] = event;
- set_bit(idx, cpuc->active_mask);
+ n0 = cpuc->n_events;
+ n = collect_events(cpuc, event, false);
+ if (n < 0)
+ return n;
- x86_perf_event_set_period(event, hwc, idx);
- x86_pmu.enable(hwc, idx);
+ ret = x86_schedule_events(cpuc, n, assign);
+ if (ret)
+ return ret;
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n*sizeof(int));
- perf_event_update_userpage(event);
+ cpuc->n_events = n;
+ cpuc->n_added = n - n0;
return 0;
}
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
}
- pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+ pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for (idx = 0; idx < x86_pmu.num_events; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
event->pending_kill = POLL_IN;
}
-static void x86_pmu_disable(struct perf_event *event)
+static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
clear_bit(idx, cpuc->active_mask);
x86_pmu.disable(hwc, idx);
- /*
- * Make sure the cleared pointer becomes visible before we
- * (potentially) free the event:
- */
- barrier();
-
/*
* Drain the remaining delta count out of a event
* that we are disabling:
intel_pmu_drain_bts_buffer(cpuc);
cpuc->events[idx] = NULL;
- clear_bit(idx, cpuc->used_mask);
+}
+
+static void x86_pmu_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int i;
+
+ __x86_pmu_disable(event, cpuc);
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ if (event == cpuc->event_list[i]) {
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(cpuc, event);
+
+ while (++i < cpuc->n_events)
+ cpuc->event_list[i-1] = cpuc->event_list[i];
+
+ --cpuc->n_events;
+ break;
+ }
+ }
perf_event_update_userpage(event);
}
return NOTIFY_STOP;
}
+static struct event_constraint unconstrained;
+
+static struct event_constraint bts_constraint =
+ EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+
+static struct event_constraint *
+intel_special_constraints(struct perf_event *event)
+{
+ unsigned int hw_event;
+
+ hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+
+ if (unlikely((hw_event ==
+ x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+ (event->hw.sample_period == 1))) {
+
+ return &bts_constraint;
+ }
+ return NULL;
+}
+
+static struct event_constraint *
+intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ c = intel_special_constraints(event);
+ if (c)
+ return c;
+
+ if (x86_pmu.event_constraints) {
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return c;
+ }
+ }
+
+ return &unconstrained;
+}
+
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ return &unconstrained;
+}
+
+static int x86_event_sched_in(struct perf_event *event,
+ struct perf_cpu_context *cpuctx, int cpu)
+{
+ int ret = 0;
+
+ event->state = PERF_EVENT_STATE_ACTIVE;
+ event->oncpu = cpu;
+ event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+
+ if (!is_x86_event(event))
+ ret = event->pmu->enable(event);
+
+ if (!ret && !is_software_event(event))
+ cpuctx->active_oncpu++;
+
+ if (!ret && event->attr.exclusive)
+ cpuctx->exclusive = 1;
+
+ return ret;
+}
+
+static void x86_event_sched_out(struct perf_event *event,
+ struct perf_cpu_context *cpuctx, int cpu)
+{
+ event->state = PERF_EVENT_STATE_INACTIVE;
+ event->oncpu = -1;
+
+ if (!is_x86_event(event))
+ event->pmu->disable(event);
+
+ event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
+
+ if (!is_software_event(event))
+ cpuctx->active_oncpu--;
+
+ if (event->attr.exclusive || !cpuctx->active_oncpu)
+ cpuctx->exclusive = 0;
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ *
+ * called with PMU disabled. If successful and return value 1,
+ * then guaranteed to call perf_enable() and hw_perf_enable()
+ */
+int hw_perf_group_sched_in(struct perf_event *leader,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx, int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct perf_event *sub;
+ int assign[X86_PMC_IDX_MAX];
+ int n0, n1, ret;
+
+ /* n0 = total number of events */
+ n0 = collect_events(cpuc, leader, true);
+ if (n0 < 0)
+ return n0;
+
+ ret = x86_schedule_events(cpuc, n0, assign);
+ if (ret)
+ return ret;
+
+ ret = x86_event_sched_in(leader, cpuctx, cpu);
+ if (ret)
+ return ret;
+
+ n1 = 1;
+ list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ if (sub->state > PERF_EVENT_STATE_OFF) {
+ ret = x86_event_sched_in(sub, cpuctx, cpu);
+ if (ret)
+ goto undo;
+ ++n1;
+ }
+ }
+ /*
+ * copy new assignment, now we know it is possible
+ * will be used by hw_perf_enable()
+ */
+ memcpy(cpuc->assign, assign, n0*sizeof(int));
+
+ cpuc->n_events = n0;
+ cpuc->n_added = n1;
+ ctx->nr_active += n1;
+
+ /*
+ * 1 means successful and events are active
+ * This is not quite true because we defer
+ * actual activation until hw_perf_enable() but
+ * this way we* ensure caller won't try to enable
+ * individual events
+ */
+ return 1;
+undo:
+ x86_event_sched_out(leader, cpuctx, cpu);
+ n0 = 1;
+ list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+ if (sub->state == PERF_EVENT_STATE_ACTIVE) {
+ x86_event_sched_out(sub, cpuctx, cpu);
+ if (++n0 == n1)
+ break;
+ }
+ }
+ return ret;
+}
+
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.notifier_call = perf_event_nmi_handler,
.next = NULL,
*/
.event_bits = 32,
.event_mask = (1ULL << 32) - 1,
- .get_event_idx = intel_get_event_idx,
+ .get_event_constraints = intel_get_event_constraints,
+ .event_constraints = intel_p6_event_constraints
};
static __initconst struct x86_pmu intel_pmu = {
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
.disable_bts = intel_pmu_disable_bts,
- .get_event_idx = intel_get_event_idx,
+ .get_event_constraints = intel_get_event_constraints
};
static __initconst struct x86_pmu amd_pmu = {
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
- .get_event_idx = gen_get_event_idx,
+ .get_event_constraints = amd_get_event_constraints
};
static __init int p6_pmu_init(void)
case 7:
case 8:
case 11: /* Pentium III */
- event_constraints = intel_p6_event_constraints;
- break;
case 9:
case 13:
/* Pentium M */
- event_constraints = intel_p6_event_constraints;
break;
default:
pr_cont("unsupported p6 CPU model %d ",
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ x86_pmu.event_constraints = intel_core_event_constraints;
pr_cont("Core2 events, ");
- event_constraints = intel_core_event_constraints;
break;
- default:
- case 26:
+
+ case 26: /* 45 nm nehalem, "Bloomfield" */
+ case 30: /* 45 nm nehalem, "Lynnfield" */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
- event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;
case 28:
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("Atom events, ");
break;
+
+ case 37: /* 32 nm nehalem, "Clarkdale" */
+ case 44: /* 32 nm nehalem, "Gulftown" */
+ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ x86_pmu.event_constraints = intel_westmere_event_constraints;
+ pr_cont("Westmere events, ");
+ break;
+ default:
+ /*
+ * default constraints for v2 and up
+ */
+ x86_pmu.event_constraints = intel_gen_event_constraints;
+ pr_cont("generic architected perfmon, ");
}
return 0;
}
perf_events_lapic_init();
register_die_notifier(&perf_event_nmi_notifier);
+ unconstrained = (struct event_constraint)
+ EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0);
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.event_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_events);
.unthrottle = x86_pmu_unthrottle,
};
-static int
-validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
-{
- struct hw_perf_event fake_event = event->hw;
-
- if (event->pmu && event->pmu != &pmu)
- return 0;
-
- return x86_schedule_event(cpuc, &fake_event) >= 0;
-}
-
+/*
+ * validate a single event group
+ *
+ * validation include:
+ * - check events are compatible which each other
+ * - events do not compete for the same counter
+ * - number of events <= number of counters
+ *
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
static int validate_group(struct perf_event *event)
{
- struct perf_event *sibling, *leader = event->group_leader;
- struct cpu_hw_events fake_pmu;
+ struct perf_event *leader = event->group_leader;
+ struct cpu_hw_events *fake_cpuc;
+ int ret, n;
- memset(&fake_pmu, 0, sizeof(fake_pmu));
+ ret = -ENOMEM;
+ fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+ if (!fake_cpuc)
+ goto out;
- if (!validate_event(&fake_pmu, leader))
- return -ENOSPC;
+ /*
+ * the event is not yet connected with its
+ * siblings therefore we must first collect
+ * existing siblings, then add the new event
+ * before we can simulate the scheduling
+ */
+ ret = -ENOSPC;
+ n = collect_events(fake_cpuc, leader, true);
+ if (n < 0)
+ goto out_free;
- list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
- if (!validate_event(&fake_pmu, sibling))
- return -ENOSPC;
- }
+ fake_cpuc->n_events = n;
+ n = collect_events(fake_cpuc, event, false);
+ if (n < 0)
+ goto out_free;
- if (!validate_event(&fake_pmu, event))
- return -ENOSPC;
+ fake_cpuc->n_events = n;
- return 0;
+ ret = x86_schedule_events(fake_cpuc, n, NULL);
+
+out_free:
+ kfree(fake_cpuc);
+out:
+ return ret;
}
const struct pmu *hw_perf_event_init(struct perf_event *event)
{
+ const struct pmu *tmp;
int err;
err = __hw_perf_event_init(event);
if (!err) {
+ /*
+ * we temporarily connect event to its pmu
+ * such that validate_group() can classify
+ * it as an x86 event using is_x86_event()
+ */
+ tmp = event->pmu;
+ event->pmu = &pmu;
+
if (event->group_leader != event)
err = validate_group(event);
+
+ event->pmu = tmp;
}
if (err) {
if (event->destroy)
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_ignored_frame);
static void
static int backtrace_stack(void *data, char *name)
{
- per_cpu(in_ignored_frame, smp_processor_id()) =
- x86_is_stack_id(NMI_STACK, name) ||
- x86_is_stack_id(DEBUG_STACK, name);
-
return 0;
}
{
struct perf_callchain_entry *entry = data;
- if (per_cpu(in_ignored_frame, smp_processor_id()))
- return;
-
if (reliable)
callchain_store(entry, addr);
}
is_user = user_mode(regs);
- if (!current || current->pid == 0)
- return;
-
if (is_user && current->state != TASK_RUNNING)
return;
};
#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0)
-#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
#define PERF_RECORD_MISC_KERNEL (1 << 0)
#define PERF_RECORD_MISC_USER (2 << 0)
#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
* u64 stream_id;
* };
*/
- PERF_RECORD_THROTTLE = 5,
- PERF_RECORD_UNTHROTTLE = 6,
+ PERF_RECORD_THROTTLE = 5,
+ PERF_RECORD_UNTHROTTLE = 6,
/*
* struct {
/*
* struct {
- * struct perf_event_header header;
- * u32 pid, tid;
+ * struct perf_event_header header;
+ * u32 pid, tid;
*
- * struct read_format values;
+ * struct read_format values;
* };
*/
PERF_RECORD_READ = 8,
* char data[size];}&& PERF_SAMPLE_RAW
* };
*/
- PERF_RECORD_SAMPLE = 9,
+ PERF_RECORD_SAMPLE = 9,
PERF_RECORD_MAX, /* non-ABI */
};
atomic64_t period_left;
u64 interrupts;
- u64 freq_count;
- u64 freq_interrupts;
- u64 freq_stamp;
+ u64 freq_time_stamp;
+ u64 freq_count_stamp;
#endif
};
struct perf_sample_data *,
struct pt_regs *regs);
+enum perf_group_flag {
+ PERF_GROUP_SOFTWARE = 0x1,
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
struct list_head event_entry;
struct list_head sibling_list;
int nr_siblings;
+ int group_flags;
struct perf_event *group_leader;
struct perf_event *output;
const struct pmu *pmu;
perf_overflow_handler_t overflow_handler;
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
struct event_filter *filter;
#endif
*/
struct mutex mutex;
- struct list_head group_list;
+ struct list_head pinned_groups;
+ struct list_head flexible_groups;
struct list_head event_list;
int nr_events;
int nr_active;
extern const struct pmu *hw_perf_event_init(struct perf_event *event);
-extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
-extern void perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu);
-extern void perf_event_task_tick(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_in(struct task_struct *task);
+extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
+extern void perf_event_task_tick(struct task_struct *task);
extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
*/
static inline int is_software_event(struct perf_event *event)
{
- return (event->attr.type != PERF_TYPE_RAW) &&
- (event->attr.type != PERF_TYPE_HARDWARE) &&
- (event->attr.type != PERF_TYPE_HW_CACHE);
+ switch (event->attr.type) {
+ case PERF_TYPE_SOFTWARE:
+ case PERF_TYPE_TRACEPOINT:
+ /* for now the breakpoint stuff also works as software event */
+ case PERF_TYPE_BREAKPOINT:
+ return 1;
+ }
+ return 0;
}
extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
extern int sysctl_perf_event_sample_rate;
extern void perf_event_init(void);
-extern void perf_tp_event(int event_id, u64 addr, u64 count,
- void *record, int entry_size);
+extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
extern void perf_event_disable(struct perf_event *event);
#else
static inline void
-perf_event_task_sched_in(struct task_struct *task, int cpu) { }
+perf_event_task_sched_in(struct task_struct *task) { }
static inline void
perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu) { }
+ struct task_struct *next) { }
static inline void
-perf_event_task_tick(struct task_struct *task, int cpu) { }
+perf_event_task_tick(struct task_struct *task) { }
static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
static inline void
-perf_bp_event(struct perf_event *event, void *data) { }
+perf_bp_event(struct perf_event *event, void *data) { }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
-static inline int perf_swevent_get_recursion_context(void) { return -1; }
+static inline int perf_swevent_get_recursion_context(void) { return -1; }
static inline void perf_swevent_put_recursion_context(int rctx) { }
static inline void perf_event_enable(struct perf_event *event) { }
static inline void perf_event_disable(struct perf_event *event) { }
event->total_time_running = run_end - event->tstamp_running;
}
+static struct list_head *
+ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
+{
+ if (event->attr.pinned)
+ return &ctx->pinned_groups;
+ else
+ return &ctx->flexible_groups;
+}
+
/*
* Add a event from the lists for its context.
* Must be called with ctx->mutex and ctx->lock held.
* add it straight to the context's event list, or to the group
* leader's sibling list:
*/
- if (group_leader == event)
- list_add_tail(&event->group_entry, &ctx->group_list);
- else {
+ if (group_leader == event) {
+ struct list_head *list;
+
+ if (is_software_event(event))
+ event->group_flags |= PERF_GROUP_SOFTWARE;
+
+ list = ctx_group_list(event, ctx);
+ list_add_tail(&event->group_entry, list);
+ } else {
+ if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+ !is_software_event(event))
+ group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
}
* to the context list directly:
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+ struct list_head *list;
- list_move_tail(&sibling->group_entry, &ctx->group_list);
+ list = ctx_group_list(event, ctx);
+ list_move_tail(&sibling->group_entry, list);
sibling->group_leader = sibling;
+
+ /* Inherit group flags from the previous leader */
+ sibling->group_flags = event->group_flags;
}
}
return -EAGAIN;
}
-/*
- * Return 1 for a group consisting entirely of software events,
- * 0 if the group contains any hardware events.
- */
-static int is_software_only_group(struct perf_event *leader)
-{
- struct perf_event *event;
-
- if (!is_software_event(leader))
- return 0;
-
- list_for_each_entry(event, &leader->sibling_list, group_entry)
- if (!is_software_event(event))
- return 0;
-
- return 1;
-}
-
/*
* Work out whether we can put this event group on the CPU now.
*/
/*
* Groups consisting entirely of software events can always go on.
*/
- if (is_software_only_group(event))
+ if (event->group_flags & PERF_GROUP_SOFTWARE)
return 1;
/*
* If an exclusive group is already on, no other hardware
return 0;
}
-void __perf_event_sched_out(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx)
+enum event_type_t {
+ EVENT_FLEXIBLE = 0x1,
+ EVENT_PINNED = 0x2,
+ EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
+};
+
+static void ctx_sched_out(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
{
struct perf_event *event;
update_context_time(ctx);
perf_disable();
- if (ctx->nr_active) {
- list_for_each_entry(event, &ctx->group_list, group_entry)
+ if (!ctx->nr_active)
+ goto out_enable;
+
+ if (event_type & EVENT_PINNED)
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry)
group_sched_out(event, cpuctx, ctx);
- }
+
+ if (event_type & EVENT_FLEXIBLE)
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry)
+ group_sched_out(event, cpuctx, ctx);
+
+ out_enable:
perf_enable();
out:
raw_spin_unlock(&ctx->lock);
* not restart the event.
*/
void perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu)
+ struct task_struct *next)
{
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = task->perf_event_ctxp;
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
rcu_read_unlock();
if (do_switch) {
- __perf_event_sched_out(ctx, cpuctx);
+ ctx_sched_out(ctx, cpuctx, EVENT_ALL);
cpuctx->task_ctx = NULL;
}
}
-/*
- * Called with IRQs disabled
- */
-static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+static void task_ctx_sched_out(struct perf_event_context *ctx,
+ enum event_type_t event_type)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
return;
- __perf_event_sched_out(ctx, cpuctx);
+ ctx_sched_out(ctx, cpuctx, event_type);
cpuctx->task_ctx = NULL;
}
/*
* Called with IRQs disabled
*/
-static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
+static void __perf_event_task_sched_out(struct perf_event_context *ctx)
{
- __perf_event_sched_out(&cpuctx->ctx, cpuctx);
+ task_ctx_sched_out(ctx, EVENT_ALL);
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
}
static void
-__perf_event_sched_in(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx, int cpu)
+ctx_pinned_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ int cpu)
{
struct perf_event *event;
- int can_add_hw = 1;
-
- raw_spin_lock(&ctx->lock);
- ctx->is_active = 1;
- if (likely(!ctx->nr_events))
- goto out;
-
- ctx->timestamp = perf_clock();
- perf_disable();
-
- /*
- * First go through the list and put on any pinned groups
- * in order to give them the best chance of going on.
- */
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- if (event->state <= PERF_EVENT_STATE_OFF ||
- !event->attr.pinned)
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ if (event->state <= PERF_EVENT_STATE_OFF)
continue;
if (event->cpu != -1 && event->cpu != cpu)
continue;
event->state = PERF_EVENT_STATE_ERROR;
}
}
+}
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- /*
- * Ignore events in OFF or ERROR state, and
- * ignore pinned events since we did them already.
- */
- if (event->state <= PERF_EVENT_STATE_OFF ||
- event->attr.pinned)
- continue;
+static void
+ctx_flexible_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ int cpu)
+{
+ struct perf_event *event;
+ int can_add_hw = 1;
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ /* Ignore events in OFF or ERROR state */
+ if (event->state <= PERF_EVENT_STATE_OFF)
+ continue;
/*
* Listen to the 'cpu' scheduling filter constraint
* of events:
if (group_sched_in(event, cpuctx, ctx, cpu))
can_add_hw = 0;
}
+}
+
+static void
+ctx_sched_in(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ int cpu = smp_processor_id();
+
+ raw_spin_lock(&ctx->lock);
+ ctx->is_active = 1;
+ if (likely(!ctx->nr_events))
+ goto out;
+
+ ctx->timestamp = perf_clock();
+
+ perf_disable();
+
+ /*
+ * First go through the list and put on any pinned groups
+ * in order to give them the best chance of going on.
+ */
+ if (event_type & EVENT_PINNED)
+ ctx_pinned_sched_in(ctx, cpuctx, cpu);
+
+ /* Then walk through the lower prio flexible groups */
+ if (event_type & EVENT_FLEXIBLE)
+ ctx_flexible_sched_in(ctx, cpuctx, cpu);
+
perf_enable();
out:
raw_spin_unlock(&ctx->lock);
}
+static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type)
+{
+ struct perf_event_context *ctx = &cpuctx->ctx;
+
+ ctx_sched_in(ctx, cpuctx, event_type);
+}
+
+static void task_ctx_sched_in(struct task_struct *task,
+ enum event_type_t event_type)
+{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_event_context *ctx = task->perf_event_ctxp;
+
+ if (likely(!ctx))
+ return;
+ if (cpuctx->task_ctx == ctx)
+ return;
+ ctx_sched_in(ctx, cpuctx, event_type);
+ cpuctx->task_ctx = ctx;
+}
/*
* Called from scheduler to add the events of the current task
* with interrupts disabled.
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void perf_event_task_sched_in(struct task_struct *task, int cpu)
+void perf_event_task_sched_in(struct task_struct *task)
{
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = task->perf_event_ctxp;
if (likely(!ctx))
return;
+
if (cpuctx->task_ctx == ctx)
return;
- __perf_event_sched_in(ctx, cpuctx, cpu);
- cpuctx->task_ctx = ctx;
-}
-static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
-{
- struct perf_event_context *ctx = &cpuctx->ctx;
+ /*
+ * We want to keep the following priority order:
+ * cpu pinned (that don't need to move), task pinned,
+ * cpu flexible, task flexible.
+ */
+ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
- __perf_event_sched_in(ctx, cpuctx, cpu);
+ ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
+ cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
+ ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
+
+ cpuctx->task_ctx = ctx;
}
#define MAX_INTERRUPTS (~0ULL)
static void perf_log_throttle(struct perf_event *event, int enable);
-static void perf_adjust_period(struct perf_event *event, u64 events)
+static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
+{
+ u64 frequency = event->attr.sample_freq;
+ u64 sec = NSEC_PER_SEC;
+ u64 divisor, dividend;
+
+ int count_fls, nsec_fls, frequency_fls, sec_fls;
+
+ count_fls = fls64(count);
+ nsec_fls = fls64(nsec);
+ frequency_fls = fls64(frequency);
+ sec_fls = 30;
+
+ /*
+ * We got @count in @nsec, with a target of sample_freq HZ
+ * the target period becomes:
+ *
+ * @count * 10^9
+ * period = -------------------
+ * @nsec * sample_freq
+ *
+ */
+
+ /*
+ * Reduce accuracy by one bit such that @a and @b converge
+ * to a similar magnitude.
+ */
+#define REDUCE_FLS(a, b) \
+do { \
+ if (a##_fls > b##_fls) { \
+ a >>= 1; \
+ a##_fls--; \
+ } else { \
+ b >>= 1; \
+ b##_fls--; \
+ } \
+} while (0)
+
+ /*
+ * Reduce accuracy until either term fits in a u64, then proceed with
+ * the other, so that finally we can do a u64/u64 division.
+ */
+ while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
+ REDUCE_FLS(nsec, frequency);
+ REDUCE_FLS(sec, count);
+ }
+
+ if (count_fls + sec_fls > 64) {
+ divisor = nsec * frequency;
+
+ while (count_fls + sec_fls > 64) {
+ REDUCE_FLS(count, sec);
+ divisor >>= 1;
+ }
+
+ dividend = count * sec;
+ } else {
+ dividend = count * sec;
+
+ while (nsec_fls + frequency_fls > 64) {
+ REDUCE_FLS(nsec, frequency);
+ dividend >>= 1;
+ }
+
+ divisor = nsec * frequency;
+ }
+
+ return div64_u64(dividend, divisor);
+}
+
+static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
u64 period, sample_period;
s64 delta;
- events *= hwc->sample_period;
- period = div64_u64(events, event->attr.sample_freq);
+ period = perf_calculate_period(event, nsec, count);
delta = (s64)(period - hwc->sample_period);
delta = (delta + 7) / 8; /* low pass filter */
sample_period = 1;
hwc->sample_period = sample_period;
+
+ if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+ perf_disable();
+ event->pmu->disable(event);
+ atomic64_set(&hwc->period_left, 0);
+ event->pmu->enable(event);
+ perf_enable();
+ }
}
static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
{
struct perf_event *event;
struct hw_perf_event *hwc;
- u64 interrupts, freq;
+ u64 interrupts, now;
+ s64 delta;
raw_spin_lock(&ctx->lock);
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (interrupts == MAX_INTERRUPTS) {
perf_log_throttle(event, 1);
event->pmu->unthrottle(event);
- interrupts = 2*sysctl_perf_event_sample_rate/HZ;
}
if (!event->attr.freq || !event->attr.sample_freq)
continue;
- /*
- * if the specified freq < HZ then we need to skip ticks
- */
- if (event->attr.sample_freq < HZ) {
- freq = event->attr.sample_freq;
-
- hwc->freq_count += freq;
- hwc->freq_interrupts += interrupts;
-
- if (hwc->freq_count < HZ)
- continue;
-
- interrupts = hwc->freq_interrupts;
- hwc->freq_interrupts = 0;
- hwc->freq_count -= HZ;
- } else
- freq = HZ;
-
- perf_adjust_period(event, freq * interrupts);
+ event->pmu->read(event);
+ now = atomic64_read(&event->count);
+ delta = now - hwc->freq_count_stamp;
+ hwc->freq_count_stamp = now;
- /*
- * In order to avoid being stalled by an (accidental) huge
- * sample period, force reset the sample period if we didn't
- * get any events in this freq period.
- */
- if (!interrupts) {
- perf_disable();
- event->pmu->disable(event);
- atomic64_set(&hwc->period_left, 0);
- event->pmu->enable(event);
- perf_enable();
- }
+ if (delta > 0)
+ perf_adjust_period(event, TICK_NSEC, delta);
}
raw_spin_unlock(&ctx->lock);
}
*/
static void rotate_ctx(struct perf_event_context *ctx)
{
- struct perf_event *event;
-
if (!ctx->nr_events)
return;
raw_spin_lock(&ctx->lock);
- /*
- * Rotate the first entry last (works just fine for group events too):
- */
+
+ /* Rotate the first entry last of non-pinned groups */
perf_disable();
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- list_move_tail(&event->group_entry, &ctx->group_list);
- break;
- }
+
+ list_rotate_left(&ctx->flexible_groups);
+
perf_enable();
raw_spin_unlock(&ctx->lock);
}
-void perf_event_task_tick(struct task_struct *curr, int cpu)
+void perf_event_task_tick(struct task_struct *curr)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
if (!atomic_read(&nr_events))
return;
- cpuctx = &per_cpu(perf_cpu_context, cpu);
+ cpuctx = &__get_cpu_var(perf_cpu_context);
ctx = curr->perf_event_ctxp;
perf_ctx_adjust_freq(&cpuctx->ctx);
if (ctx)
perf_ctx_adjust_freq(ctx);
- perf_event_cpu_sched_out(cpuctx);
+ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx)
- __perf_event_task_sched_out(ctx);
+ task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
rotate_ctx(&cpuctx->ctx);
if (ctx)
rotate_ctx(ctx);
- perf_event_cpu_sched_in(cpuctx, cpu);
+ cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
if (ctx)
- perf_event_task_sched_in(curr, cpu);
+ task_ctx_sched_in(curr, EVENT_FLEXIBLE);
+}
+
+static int event_enable_on_exec(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ if (!event->attr.enable_on_exec)
+ return 0;
+
+ event->attr.enable_on_exec = 0;
+ if (event->state >= PERF_EVENT_STATE_INACTIVE)
+ return 0;
+
+ __perf_event_mark_enabled(event, ctx);
+
+ return 1;
}
/*
struct perf_event *event;
unsigned long flags;
int enabled = 0;
+ int ret;
local_irq_save(flags);
ctx = task->perf_event_ctxp;
raw_spin_lock(&ctx->lock);
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- if (!event->attr.enable_on_exec)
- continue;
- event->attr.enable_on_exec = 0;
- if (event->state >= PERF_EVENT_STATE_INACTIVE)
- continue;
- __perf_event_mark_enabled(event, ctx);
- enabled = 1;
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ ret = event_enable_on_exec(event, ctx);
+ if (ret)
+ enabled = 1;
+ }
+
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ ret = event_enable_on_exec(event, ctx);
+ if (ret)
+ enabled = 1;
}
/*
raw_spin_unlock(&ctx->lock);
- perf_event_task_sched_in(task, smp_processor_id());
+ perf_event_task_sched_in(task);
out:
local_irq_restore(flags);
}
{
raw_spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
- INIT_LIST_HEAD(&ctx->group_list);
+ INIT_LIST_HEAD(&ctx->pinned_groups);
+ INIT_LIST_HEAD(&ctx->flexible_groups);
INIT_LIST_HEAD(&ctx->event_list);
atomic_set(&ctx->refcount, 1);
ctx->task = task;
static int perf_event_task_match(struct perf_event *event)
{
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return 0;
+
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
static int perf_event_comm_match(struct perf_event *event)
{
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return 0;
+
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
static int perf_event_mmap_match(struct perf_event *event,
struct perf_mmap_event *mmap_event)
{
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return 0;
+
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
if (event->attr.freq) {
u64 now = perf_clock();
- s64 delta = now - hwc->freq_stamp;
+ s64 delta = now - hwc->freq_time_stamp;
- hwc->freq_stamp = now;
+ hwc->freq_time_stamp = now;
- if (delta > 0 && delta < TICK_NSEC)
- perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+ if (delta > 0 && delta < 2*TICK_NSEC)
+ perf_adjust_period(event, delta, hwc->last_period);
}
/*
.read = task_clock_perf_event_read,
};
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
int entry_size)
{
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_EVENT_TRACING */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
static void bp_perf_event_destroy(struct perf_event *event)
else
child_event->state = PERF_EVENT_STATE_OFF;
- if (parent_event->attr.freq)
- child_event->hw.sample_period = parent_event->hw.sample_period;
+ if (parent_event->attr.freq) {
+ u64 sample_period = parent_event->hw.sample_period;
+ struct hw_perf_event *hwc = &child_event->hw;
+
+ hwc->sample_period = sample_period;
+ hwc->last_period = sample_period;
+
+ atomic64_set(&hwc->period_left, sample_period);
+ }
child_event->overflow_handler = parent_event->overflow_handler;
mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
again:
- list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+ list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
+ group_entry)
+ __perf_event_exit_task(child_event, child_ctx, child);
+
+ list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
group_entry)
__perf_event_exit_task(child_event, child_ctx, child);
* its siblings to the list, but we obtained 'tmp' before that which
* will still point to the list head terminating the iteration.
*/
- if (!list_empty(&child_ctx->group_list))
+ if (!list_empty(&child_ctx->pinned_groups) ||
+ !list_empty(&child_ctx->flexible_groups))
goto again;
mutex_unlock(&child_ctx->mutex);
put_ctx(child_ctx);
}
+static void perf_free_event(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ struct perf_event *parent = event->parent;
+
+ if (WARN_ON_ONCE(!parent))
+ return;
+
+ mutex_lock(&parent->child_mutex);
+ list_del_init(&event->child_list);
+ mutex_unlock(&parent->child_mutex);
+
+ fput(parent->filp);
+
+ list_del_event(event, ctx);
+ free_event(event);
+}
+
/*
* free an unexposed, unused context as created by inheritance by
* init_task below, used by fork() in case of fail.
mutex_lock(&ctx->mutex);
again:
- list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
- struct perf_event *parent = event->parent;
+ list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ perf_free_event(event, ctx);
- if (WARN_ON_ONCE(!parent))
- continue;
+ list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
+ group_entry)
+ perf_free_event(event, ctx);
- mutex_lock(&parent->child_mutex);
- list_del_init(&event->child_list);
- mutex_unlock(&parent->child_mutex);
+ if (!list_empty(&ctx->pinned_groups) ||
+ !list_empty(&ctx->flexible_groups))
+ goto again;
- fput(parent->filp);
+ mutex_unlock(&ctx->mutex);
- list_del_event(event, ctx);
- free_event(event);
+ put_ctx(ctx);
+}
+
+static int
+inherit_task_group(struct perf_event *event, struct task_struct *parent,
+ struct perf_event_context *parent_ctx,
+ struct task_struct *child,
+ int *inherited_all)
+{
+ int ret;
+ struct perf_event_context *child_ctx = child->perf_event_ctxp;
+
+ if (!event->attr.inherit) {
+ *inherited_all = 0;
+ return 0;
}
- if (!list_empty(&ctx->group_list))
- goto again;
+ if (!child_ctx) {
+ /*
+ * This is executed from the parent task context, so
+ * inherit events that have been marked for cloning.
+ * First allocate and initialize a context for the
+ * child.
+ */
- mutex_unlock(&ctx->mutex);
+ child_ctx = kzalloc(sizeof(struct perf_event_context),
+ GFP_KERNEL);
+ if (!child_ctx)
+ return -ENOMEM;
- put_ctx(ctx);
+ __perf_event_init_context(child_ctx, child);
+ child->perf_event_ctxp = child_ctx;
+ get_task_struct(child);
+ }
+
+ ret = inherit_group(event, parent, parent_ctx,
+ child, child_ctx);
+
+ if (ret)
+ *inherited_all = 0;
+
+ return ret;
}
+
/*
* Initialize the perf_event context in task_struct
*/
int perf_event_init_task(struct task_struct *child)
{
- struct perf_event_context *child_ctx = NULL, *parent_ctx;
+ struct perf_event_context *child_ctx, *parent_ctx;
struct perf_event_context *cloned_ctx;
struct perf_event *event;
struct task_struct *parent = current;
* We dont have to disable NMIs - we are only looking at
* the list, not manipulating it:
*/
- list_for_each_entry(event, &parent_ctx->group_list, group_entry) {
-
- if (!event->attr.inherit) {
- inherited_all = 0;
- continue;
- }
-
- if (!child->perf_event_ctxp) {
- /*
- * This is executed from the parent task context, so
- * inherit events that have been marked for cloning.
- * First allocate and initialize a context for the
- * child.
- */
-
- child_ctx = kzalloc(sizeof(struct perf_event_context),
- GFP_KERNEL);
- if (!child_ctx) {
- ret = -ENOMEM;
- break;
- }
-
- __perf_event_init_context(child_ctx, child);
- child->perf_event_ctxp = child_ctx;
- get_task_struct(child);
- }
+ list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
+ ret = inherit_task_group(event, parent, parent_ctx, child,
+ &inherited_all);
+ if (ret)
+ break;
+ }
- ret = inherit_group(event, parent, parent_ctx,
- child, child_ctx);
- if (ret) {
- inherited_all = 0;
+ list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
+ ret = inherit_task_group(event, parent, parent_ctx, child,
+ &inherited_all);
+ if (ret)
break;
- }
}
+ child_ctx = child->perf_event_ctxp;
+
if (child_ctx && inherited_all) {
/*
* Mark the child context as a clone of the parent
struct perf_event_context *ctx = &cpuctx->ctx;
struct perf_event *event, *tmp;
- list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+ list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ __perf_event_remove_from_context(event);
+ list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
__perf_event_remove_from_context(event);
}
static void perf_event_exit_cpu(int cpu)
*/
prev_state = prev->state;
finish_arch_switch(prev);
- perf_event_task_sched_in(current, cpu_of(rq));
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_disable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+ perf_event_task_sched_in(current);
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_enable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
fire_sched_in_preempt_notifiers(current);
curr->sched_class->task_tick(rq, curr, 0);
raw_spin_unlock(&rq->lock);
- perf_event_task_tick(curr, cpu);
+ perf_event_task_tick(curr);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
if (likely(prev != next)) {
sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next, cpu);
+ perf_event_task_sched_out(prev, next);
rq->nr_switches++;
rq->curr = next;
post_schedule(rq);
- if (unlikely(reacquire_kernel_lock(current) < 0))
+ if (unlikely(reacquire_kernel_lock(current) < 0)) {
+ prev = rq->curr;
+ switch_count = &prev->nivcsw;
goto need_resched_nonpreemptible;
+ }
preempt_enable_no_resched();
if (need_resched())
{
char **addr = (char **)(event + pred->offset);
int cmp, match;
+ int len = strlen(*addr) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len);
+ cmp = pred->regex.match(*addr, &pred->regex, len);
match = cmp ^ pred->not;
return 0;
}
- /* Basic regex callbacks */
+ /*
+ * regex_match_foo - Basic regex callbacks
+ *
+ * @str: the string to be searched
+ * @r: the regex structure containing the pattern string
+ * @len: the length of the string to be searched (including '\0')
+ *
+ * Note:
+ * - @str might not be NULL-terminated if it's of type DYN_STRING
+ * or STATIC_STRING
+ */
+
static int regex_match_full(char *str, struct regex *r, int len)
{
if (strncmp(str, r->pattern, len) == 0)
static int regex_match_front(char *str, struct regex *r, int len)
{
- if (strncmp(str, r->pattern, len) == 0)
+ if (strncmp(str, r->pattern, r->len) == 0)
return 1;
return 0;
}
static int regex_match_middle(char *str, struct regex *r, int len)
{
- if (strstr(str, r->pattern))
+ if (strnstr(str, r->pattern, len))
return 1;
return 0;
}
static int regex_match_end(char *str, struct regex *r, int len)
{
- char *ptr = strstr(str, r->pattern);
+ int strlen = len - 1;
- if (ptr && (ptr[r->len] == 0))
+ if (strlen >= r->len &&
+ memcmp(str + strlen - r->len, r->pattern, r->len) == 0)
return 1;
return 0;
}
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
fn = filter_pred_strloc;
- else {
+ else
fn = filter_pred_pchar;
- pred->regex.field_len = strlen(pred->regex.pattern);
- }
} else {
if (field->is_signed)
ret = strict_strtoll(pred->regex.pattern, 0, &val);
return err;
}
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
void ftrace_profile_free_filter(struct perf_event *event)
{
return err;
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
if (!dir1)
return;
- while (true) {
- dent1 = readdir(dir1);
- if (!dent1)
- break;
-
- if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ while ((dent1 = readdir(dir1)) != NULL) {
+ if (dent1->d_type != DT_DIR ||
+ sscanf(dent1->d_name, "node%u", &mem) < 1)
continue;
snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
dir2 = opendir(buf);
if (!dir2)
continue;
- while (true) {
- dent2 = readdir(dir2);
- if (!dent2)
- break;
- if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ while ((dent2 = readdir(dir2)) != NULL) {
+ if (dent2->d_type != DT_LNK ||
+ sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
continue;
cpunode_map[cpu] = mem;
}
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL) {
return 0;
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr,
- "No trace sample to read. Did you call perf record "
- "without -R?");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_comm_event = event__process_comm,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .comm = event__process_comm,
};
static double fragmentation(unsigned long n_req, unsigned long n_alloc)
printf("%.102s\n", graph_dotted_line);
printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
- printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
+ printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
printf("%.102s\n", graph_dotted_line);
next = rb_first(root);
snprintf(buf, sizeof(buf), "%#Lx", addr);
printf(" %-34s |", buf);
- printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+ printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
(unsigned long long)data->bytes_alloc,
(unsigned long)data->bytes_alloc / data->hit,
(unsigned long long)data->bytes_req,
static int __cmd_kmem(void)
{
- int err;
+ int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "kmem record"))
+ goto out_delete;
+
setup_pager();
err = perf_session__process_events(session, &event_ops);
if (err != 0)
setup_sorting(&alloc_sort, default_sort_order);
return __cmd_kmem();
- }
+ } else
+ usage_with_options(kmem_usage, kmem_options);
return 0;
}
static char const *input_name = "perf.data";
static int force;
+static bool hide_unresolved;
+static bool dont_use_callchains;
static int show_threads;
static struct perf_read_values show_threads_values;
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
unsigned int i;
return -1;
}
- if (al.filtered)
+ if (al.filtered || (hide_unresolved && al.sym == NULL))
return 0;
if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) {
return 0;
}
-static int sample_type_check(struct perf_session *session)
+static int perf_session__setup_sample_type(struct perf_session *self)
{
- if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (sort__has_parent) {
fprintf(stderr, "selected --sort parent, but no"
" callchain data. Did you call"
" perf record without -g?\n");
- return -1;
+ return -EINVAL;
}
if (symbol_conf.use_callchain) {
fprintf(stderr, "selected -g but no callchain data."
" -g?\n");
return -1;
}
- } else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) {
+ } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
+ !symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (register_callchain_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain"
" params\n");
- return -1;
+ return -EINVAL;
}
}
}
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_mmap_event = event__process_mmap,
- .process_comm_event = event__process_comm,
- .process_exit_event = event__process_task,
- .process_fork_event = event__process_task,
- .process_lost_event = event__process_lost,
- .process_read_event = process_read_event,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .mmap = event__process_mmap,
+ .comm = event__process_comm,
+ .exit = event__process_task,
+ .fork = event__process_task,
+ .lost = event__process_lost,
+ .read = process_read_event,
};
-
static int __cmd_report(void)
{
- int ret;
+ int ret = -EINVAL;
struct perf_session *session;
session = perf_session__new(input_name, O_RDONLY, force);
if (show_threads)
perf_read_values_init(&show_threads_values);
+ ret = perf_session__setup_sample_type(session);
+ if (ret)
+ goto out_delete;
+
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;
static int
parse_callchain_opt(const struct option *opt __used, const char *arg,
- int unset __used)
+ int unset)
{
char *tok;
char *endptr;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ dont_use_callchains = true;
+ return 0;
+ }
+
symbol_conf.use_callchain = true;
if (!arg)
else if (!strncmp(tok, "none", strlen(arg))) {
callchain_param.mode = CHAIN_NONE;
- symbol_conf.use_callchain = true;
+ symbol_conf.use_callchain = false;
return 0;
}
"pretty printing style key: normal raw"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
- OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+ OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
"Don't shorten the pathnames taking into account the cwd"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
"regex filter to identify parent, see: '--sort parent'"),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
+ OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
+ "Only display entries resolved to a symbol"),
OPT_END()
};
static int process_comm_event(event_t *event, struct perf_session *session __used)
{
- pid_set_comm(event->comm.pid, event->comm.comm);
+ pid_set_comm(event->comm.tid, event->comm.comm);
return 0;
}
}
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr, "No trace samples found in the file.\n"
- "Have you used 'perf timechart record' to record it?\n");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_comm_event = process_comm_event,
- .process_fork_event = process_fork_event,
- .process_exit_event = process_exit_event,
- .process_sample_event = queue_sample_event,
- .sample_type_check = sample_type_check,
+ .comm = process_comm_event,
+ .fork = process_fork_event,
+ .exit = process_exit_event,
+ .sample = queue_sample_event,
};
static int __cmd_timechart(void)
{
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
- int ret;
+ int ret = -EINVAL;
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "timechart record"))
+ goto out_delete;
+
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;