From afb5e9e47faf53e0f557e22979076dc1a94ef3d7 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 19 May 2021 15:19:38 +0800 Subject: [PATCH] perf arm-spe: Bail out if the trace is later than perf event It's possible that record in Arm SPE trace is later than perf event and vice versa. This asks to correlate the perf events and Arm SPE synthesized events to be processed in the manner of correct timing. To achieve the time ordering, this patch reverses the flow, it firstly calls arm_spe_sample() and then calls arm_spe_decode(). By comparing the timestamp value and detect the perf event is coming earlier than Arm SPE trace data, it bails out from the decoding loop, the last record is pushed into auxtrace stack and is deferred to generate sample. To track the timestamp, everytime it updates timestamp for the latest record. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Dave Martin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20210519071939.1598923-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe.c | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index da379328442c..5c5b438584c4 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -434,12 +434,36 @@ static int arm_spe_sample(struct arm_spe_queue *speq) static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) { struct arm_spe *spe = speq->spe; + struct arm_spe_record *record; int ret; if (!spe->kernel_start) spe->kernel_start = machine__kernel_start(spe->machine); while (1) { + /* + * The usual logic is firstly to decode the packets, and then + * based the record to synthesize sample; but here the flow is + * reversed: it calls arm_spe_sample() for synthesizing samples + * prior to arm_spe_decode(). + * + * Two reasons for this code logic: + * 1. Firstly, when setup queue in arm_spe__setup_queue(), it + * has decoded trace data and generated a record, but the record + * is left to generate sample until run to here, so it's correct + * to synthesize sample for the left record. + * 2. After decoding trace data, it needs to compare the record + * timestamp with the coming perf event, if the record timestamp + * is later than the perf event, it needs bail out and pushs the + * record into auxtrace heap, thus the record can be deferred to + * synthesize sample until run to here at the next time; so this + * can correlate samples between Arm SPE trace data and other + * perf events with correct time ordering. + */ + ret = arm_spe_sample(speq); + if (ret) + return ret; + ret = arm_spe_decode(speq->decoder); if (!ret) { pr_debug("No data or all data has been processed.\n"); @@ -453,10 +477,17 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp) if (ret < 0) continue; - ret = arm_spe_sample(speq); - if (ret) - return ret; + record = &speq->decoder->record; + /* Update timestamp for the last record */ + if (record->timestamp > speq->timestamp) + speq->timestamp = record->timestamp; + + /* + * If the timestamp of the queue is later than timestamp of the + * coming perf event, bail out so can allow the perf event to + * be processed ahead. + */ if (!spe->timeless_decoding && speq->timestamp >= *timestamp) { *timestamp = speq->timestamp; return 0; -- 2.39.5