Thresholding, a performance monitoring unit feature, can be
used to identify marked instructions which take more than
expected cycles between start event and end event.
Threshold compare (thresh_cmp) bits are programmed in MMCRA
register. In Power9, thresh_cmp bits were part of the
event code. But in case of P10, thresh_cmp are not part of
event code due to inclusion of MMCR3 bits.
Patch here adds an option to use attr.config1 variable
to be used to pass thresh_cmp value to be programmed in
MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1
has been added and this flag is used to notify the use of
attr.config1 variable.
Patch has extended the parameter list of 'compute_mmcr',
to include power_pmu's 'flags' element and parameter list of
get_constraint to include attr.config1 value. It also extend
parameter list of power_check_constraints inorder to pass
perf_event list.
As stated by commit
ef0e3b650f8d ("powerpc/perf: Fix Threshold
Event Counter Multiplier width for P10"), constraint bits for
thresh_cmp is also needed to be increased to 11 bits, which is
handled as part of this patch. We added bit number 53 as part
of constraint bits of thresh_cmp for power10 to make it an
11 bit field.
Updated layout for p10:
/*
* Layout of constraint bits:
*
* 60 56 52 48 44 40 36 32
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
* [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
* | |
* [ thresh_cmp bits for p10] thresh_sel -*
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
* [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1]
* | | | | |
* BHRB IFM -* | | |*radix_scope | Count of events for each PMC.
* EBB -* | | p1, p2, p3, p4, p5, p6.
* L1 I/D qualifier -* |
* nc - number of counters -*
*
* The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
* we want the low bit of each field to be added to any existing value.
*
* Everything else is a value field.
*/
Result:
command#: cat /sys/devices/cpu/format/thresh_cmp
config1:0-17
ex. usage:
command#: perf record -I --weight -d -e
cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096
1826636 records/s
real 2.00 s
user 2.00 s
sys 0.00 s
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ]
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210209095234.837356-1-kjain@linux.ibm.com
unsigned long test_adder;
int (*compute_mmcr)(u64 events[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[]);
+ struct perf_event *pevents[], u32 flags);
int (*get_constraint)(u64 event_id, unsigned long *mskp,
- unsigned long *valp);
+ unsigned long *valp, u64 event_config1);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
void (*get_mem_data_src)(union perf_mem_data_src *dsrc,
#define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */
#define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */
#define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */
+#define PPMU_HAS_ATTR_CONFIG1 0x00000800 /* Using config1 attribute */
/*
* Values for flags to get_alternatives()
*/
static int power_check_constraints(struct cpu_hw_events *cpuhw,
u64 event_id[], unsigned int cflags[],
- int n_ev)
+ int n_ev, struct perf_event **event)
{
unsigned long mask, value, nv;
unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
event_id[i] = cpuhw->alternatives[i][0];
}
if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
- &cpuhw->avalues[i][0]))
+ &cpuhw->avalues[i][0], event[i]->attr.config1))
return -1;
}
value = mask = 0;
for (j = 1; j < n_alt[i]; ++j)
ppmu->get_constraint(cpuhw->alternatives[i][j],
&cpuhw->amasks[i][j],
- &cpuhw->avalues[i][j]);
+ &cpuhw->avalues[i][j],
+ event[i]->attr.config1);
}
/* enumerate all possibilities and see if any will work */
memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
- &cpuhw->mmcr, cpuhw->event)) {
+ &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
/* shouldn't ever get here */
printk(KERN_ERR "oops compute_mmcr failed\n");
goto out;
if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
goto out;
- if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+ if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
goto out;
event->hw.config = cpuhw->events[n0];
n = cpuhw->n_events;
if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
return -EAGAIN;
- i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+ i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
if (i < 0)
return -EAGAIN;
local_irq_save(irq_flags);
cpuhw = this_cpu_ptr(&cpu_hw_events);
- err = power_check_constraints(cpuhw, events, cflags, n + 1);
+ err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
if (has_branch_stack(event)) {
u64 bhrb_filter = -1;
*mmcra |= MMCRA_SDAR_MODE_TLB;
}
+static u64 p10_thresh_cmp_val(u64 value)
+{
+ int exp = 0;
+ u64 result = value;
+
+ if (!value)
+ return value;
+
+ /*
+ * Incase of P10, thresh_cmp value is not part of raw event code
+ * and provided via attr.config1 parameter. To program threshold in MMCRA,
+ * take a 18 bit number N and shift right 2 places and increment
+ * the exponent E by 1 until the upper 10 bits of N are zero.
+ * Write E to the threshold exponent and write the lower 8 bits of N
+ * to the threshold mantissa.
+ * The max threshold that can be written is 261120.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (value > 261120)
+ value = 261120;
+ while ((64 - __builtin_clzl(value)) > 8) {
+ exp++;
+ value >>= 2;
+ }
+
+ /*
+ * Note that it is invalid to write a mantissa with the
+ * upper 2 bits of mantissa being zero, unless the
+ * exponent is also zero.
+ */
+ if (!(value & 0xC0) && exp)
+ result = 0;
+ else
+ result = (exp << 8) | value;
+ }
+ return result;
+}
+
static u64 thresh_cmp_val(u64 value)
{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ value = p10_thresh_cmp_val(value);
+
+ /*
+ * Since location of threshold compare bits in MMCRA
+ * is different for p8, using different shift value.
+ */
if (cpu_has_feature(CPU_FTR_ARCH_300))
return value << p9_MMCRA_THR_CMP_SHIFT;
-
- return value << MMCRA_THR_CMP_SHIFT;
+ else
+ return value << MMCRA_THR_CMP_SHIFT;
}
static unsigned long combine_from_event(u64 event)
{
unsigned int cmp, exp;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return p10_thresh_cmp_val(event) != 0;
+
/*
* Check the mantissa upper two bits are not zero, unless the
* exponent is also zero. See the THRESH_CMP_MANTISSA doc.
- * Power10: thresh_cmp is replaced by l2_l3 event select.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- return false;
cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
exp = cmp >> 7;
*weight = mantissa << (2 * exp);
}
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
{
unsigned int unit, pmc, cache, ebb;
unsigned long mask, value;
}
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
- if (event_is_threshold(event)) {
+ if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) {
mask |= CNST_THRESH_CTL_SEL_MASK;
value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+ mask |= p10_CNST_THRESH_CMP_MASK;
+ value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
}
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
int isa207_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[])
+ struct perf_event *pevents[], u32 flags)
{
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
unsigned long mmcr3;
val = (event[i] >> EVENT_THR_CMP_SHIFT) &
EVENT_THR_CMP_MASK;
mmcra |= thresh_cmp_val(val);
+ } else if (flags & PPMU_HAS_ATTR_CONFIG1) {
+ val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) &
+ p10_EVENT_THR_CMP_MASK;
+ mmcra |= thresh_cmp_val(val);
}
}
#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1
#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45
+/* Event Threshold Compare bit constant for power10 in config1 attribute */
+#define p10_EVENT_THR_CMP_SHIFT 0
+#define p10_EVENT_THR_CMP_MASK 0x3FFFFull
+
#define p10_EVENT_VALID_MASK \
((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \
(p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
* 60 56 52 48 44 40 36 32
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
* [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
- * |
- * thresh_sel -*
+ * | |
+ * [ thresh_cmp bits for p10] thresh_sel -*
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32)
#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff)
+#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43)
+#define p10_CNST_THRESH_CMP_MASK p10_CNST_THRESH_CMP_VAL(0x7ff)
+
#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
#define PH(a, b) (P(LVL, HIT) | P(a, b))
#define PM(a, b) (P(LVL, MISS) | P(a, b))
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp);
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
int isa207_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[]);
+ struct perf_event *pevents[], u32 flags);
void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr);
int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
const unsigned int ev_alt[][MAX_ALT]);
};
static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, class;
u32 mask, value;
*/
static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
struct mmcr_regs *mmcr,
- struct perf_event *pevents[])
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
u8 event_index[N_CLASSES][N_COUNTER];
int n_classevent[N_CLASSES];
PMU_FORMAT_ATTR(src_mask, "config:48-53");
PMU_FORMAT_ATTR(src_match, "config:54-59");
PMU_FORMAT_ATTR(radix_scope, "config:9");
+PMU_FORMAT_ATTR(thresh_cmp, "config1:0-17");
static struct attribute *power10_pmu_format_attr[] = {
&format_attr_event.attr,
&format_attr_src_mask.attr,
&format_attr_src_match.attr,
&format_attr_radix_scope.attr,
+ &format_attr_thresh_cmp.attr,
NULL,
};
.get_mem_weight = isa207_get_mem_weight,
.disable_pmc = isa207_disable_pmc,
.flags = PPMU_HAS_SIER | PPMU_ARCH_207S |
- PPMU_ARCH_31,
+ PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1,
.n_generic = ARRAY_SIZE(power10_generic_events),
.generic_events = power10_generic_events,
.cache_events = &power10_cache_events,
};
static int power5p_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh;
int bit, fmask;
static int power5p_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[])
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = 0;
};
static int power5_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh;
int bit, fmask;
static int power5_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[])
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
* Assign PMC numbers and compute MMCR1 value for a set of events
*/
static int p6_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
* 32-34 select field: nest (subunit) event selector
*/
static int p6_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, sh, subunit;
unsigned long mask = 0, value = 0;
*/
static int power7_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, sh, unit;
unsigned long mask = 0, value = 0;
static int power7_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[])
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
};
static int p970_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh, spcsel;
unsigned long mask = 0, value = 0;
static int p970_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[])
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel;