#include <asm/reg.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
+#include <asm/firmware.h>
struct cpu_hw_counters {
int n_counters;
return 0;
}
+/*
+ * Check if newly-added counters have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added counters.
+ */
+static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
+{
+ int eu, ek, eh;
+ int i, n;
+ struct perf_counter *counter;
+
+ n = n_prev + n_new;
+ if (n <= 1)
+ return 0;
+
+ eu = ctrs[0]->hw_event.exclude_user;
+ ek = ctrs[0]->hw_event.exclude_kernel;
+ eh = ctrs[0]->hw_event.exclude_hv;
+ if (n_prev == 0)
+ n_prev = 1;
+ for (i = n_prev; i < n; ++i) {
+ counter = ctrs[i];
+ if (counter->hw_event.exclude_user != eu ||
+ counter->hw_event.exclude_kernel != ek ||
+ counter->hw_event.exclude_hv != eh)
+ return -EAGAIN;
+ }
+ return 0;
+}
+
static void power_perf_read(struct perf_counter *counter)
{
long val, delta, prev;
goto out;
}
+ /*
+ * Add in MMCR0 freeze bits corresponding to the
+ * hw_event.exclude_* bits for the first counter.
+ * We have already checked that all counters have the
+ * same values for these bits as the first counter.
+ */
+ counter = cpuhw->counter[0];
+ if (counter->hw_event.exclude_user)
+ cpuhw->mmcr[0] |= MMCR0_FCP;
+ if (counter->hw_event.exclude_kernel)
+ cpuhw->mmcr[0] |= MMCR0_FCS;
+ if (counter->hw_event.exclude_hv)
+ cpuhw->mmcr[0] |= MMCR0_FCHV;
+
/*
* Write the new configuration to MMCR* with the freeze
* bit set and set the hardware counters to their initial values.
&cpuhw->counter[n0], &cpuhw->events[n0]);
if (n < 0)
return -EAGAIN;
+ if (check_excludes(cpuhw->counter, n0, n))
+ return -EAGAIN;
if (power_check_constraints(cpuhw->events, n + n0))
return -EAGAIN;
cpuhw->n_counters = n0 + n;
goto out;
cpuhw->counter[n0] = counter;
cpuhw->events[n0] = counter->hw.config;
+ if (check_excludes(cpuhw->counter, n0, 1))
+ goto out;
if (power_check_constraints(cpuhw->events, n0 + 1))
goto out;
counter->hw.config_base = ev;
counter->hw.idx = 0;
+ /*
+ * If we are not running on a hypervisor, force the
+ * exclude_hv bit to 0 so that we don't care what
+ * the user set it to. This also means that we don't
+ * set the MMCR0_FCHV bit, which unconditionally freezes
+ * the counters on the PPC970 variants used in Apple G5
+ * machines (since MSR.HV is always 1 on those machines).
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ counter->hw_event.exclude_hv = 0;
+
/*
* If this is in a group, check if it can go on with all the
* other hardware counters in the group. We assume the counter
if (n < 0)
return NULL;
}
- events[n++] = ev;
- if (power_check_constraints(events, n))
+ events[n] = ev;
+ if (check_excludes(ctrs, n, 1))
+ return NULL;
+ if (power_check_constraints(events, n + 1))
return NULL;
- counter->hw.config = events[n - 1];
+ counter->hw.config = events[n];
atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
return &power_perf_ops;
}
return -EINVAL;
/*
- * Count user events, and generate PMC IRQs:
+ * Generate PMC IRQs:
* (keep 'enabled' bit clear for now)
*/
- hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT;
+ hwc->config = ARCH_PERFMON_EVENTSEL_INT;
/*
- * If privileged enough, count OS events too, and allow
- * NMI events as well:
+ * Count user and OS events unless requested not to.
*/
- hwc->nmi = 0;
- if (capable(CAP_SYS_ADMIN)) {
+ if (!hw_event->exclude_user)
+ hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+ if (!hw_event->exclude_kernel)
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
- if (hw_event->nmi)
- hwc->nmi = 1;
- }
+
+ /*
+ * If privileged enough, allow NMI events:
+ */
+ hwc->nmi = 0;
+ if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
+ hwc->nmi = 1;
hwc->irq_period = hw_event->irq_period;
/*
int err;
/*
- * Enable IRQ generation (0x8) and ring-3 counting (0x2),
- * and enable ring-0 counting if allowed:
+ * Enable IRQ generation (0x8),
+ * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+ * if requested:
*/
- bits = 0x8ULL | 0x2ULL;
+ bits = 0x8ULL;
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+ bits |= 0x2;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
bits |= 0x1;
bits <<= (idx * 4);
u64 irq_period;
u32 record_type;
- u32 disabled : 1, /* off by default */
- nmi : 1, /* NMI sampling */
- raw : 1, /* raw event type */
- inherit : 1, /* children inherit it */
- pinned : 1, /* must always be on PMU */
- exclusive : 1, /* only counter on PMU */
-
- __reserved_1 : 26;
+ u32 disabled : 1, /* off by default */
+ nmi : 1, /* NMI sampling */
+ raw : 1, /* raw event type */
+ inherit : 1, /* children inherit it */
+ pinned : 1, /* must always be on PMU */
+ exclusive : 1, /* only group on PMU */
+ exclude_user : 1, /* don't count user */
+ exclude_kernel : 1, /* ditto kernel */
+ exclude_hv : 1, /* ditto hypervisor */
+
+ __reserved_1 : 23;
u64 __reserved_2;
};
{
const struct hw_perf_counter_ops *hw_ops = NULL;
+ /*
+ * Software counters (currently) can't in general distinguish
+ * between user, kernel and hypervisor events.
+ * However, context switches and cpu migrations are considered
+ * to be kernel events, and page faults are never hypervisor
+ * events.
+ */
switch (counter->hw_event.type) {
case PERF_COUNT_CPU_CLOCK:
- hw_ops = &perf_ops_cpu_clock;
+ if (!(counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel ||
+ counter->hw_event.exclude_hv))
+ hw_ops = &perf_ops_cpu_clock;
break;
case PERF_COUNT_TASK_CLOCK:
+ if (counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel ||
+ counter->hw_event.exclude_hv)
+ break;
/*
* If the user instantiates this as a per-cpu counter,
* use the cpu_clock counter instead.
hw_ops = &perf_ops_cpu_clock;
break;
case PERF_COUNT_PAGE_FAULTS:
- hw_ops = &perf_ops_page_faults;
+ if (!(counter->hw_event.exclude_user ||
+ counter->hw_event.exclude_kernel))
+ hw_ops = &perf_ops_page_faults;
break;
case PERF_COUNT_CONTEXT_SWITCHES:
- hw_ops = &perf_ops_context_switches;
+ if (!counter->hw_event.exclude_kernel)
+ hw_ops = &perf_ops_context_switches;
break;
case PERF_COUNT_CPU_MIGRATIONS:
- hw_ops = &perf_ops_cpu_migrations;
+ if (!counter->hw_event.exclude_kernel)
+ hw_ops = &perf_ops_cpu_migrations;
break;
default:
break;