]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
perf tools: Support PERF_SAMPLE_WEIGHT_STRUCT
authorKan Liang <kan.liang@linux.intel.com>
Tue, 2 Feb 2021 20:09:09 +0000 (12:09 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 8 Feb 2021 19:25:00 +0000 (16:25 -0300)
The new sample type, PERF_SAMPLE_WEIGHT_STRUCT, is an alternative of the
PERF_SAMPLE_WEIGHT sample type. Users can apply either the
PERF_SAMPLE_WEIGHT sample type or the PERF_SAMPLE_WEIGHT_STRUCT sample
type to retrieve the sample weight, but they cannot apply both sample
types simultaneously.

The new sample type shares the same space as the PERF_SAMPLE_WEIGHT
sample type. The lower 32 bits are exactly the same for both sample
type. The higher 32 bits may be different for different architecture.

Add arch specific arch_evsel__set_sample_weight() to set the new sample
type for X86. Only store the lower 32 bits for the sample->weight if the
new sample type is applied. In practice, no memory access could last
than 4G cycles. No data will be lost.

If the kernel doesn't support the new sample type. Fall back to the
PERF_SAMPLE_WEIGHT sample type.

There is no impact for other architectures.

Committer notes:

Fixup related to PERF_SAMPLE_CODE_PAGE_SIZE, present in acme/perf/core
but not upstream yet.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/1612296553-21962-6-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/arch/x86/util/Build
tools/perf/arch/x86/util/evsel.c [new file with mode: 0644]
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/intel-pt.c
tools/perf/util/perf_event_attr_fprintf.c
tools/perf/util/session.c
tools/perf/util/synthetic-events.c

index 98014613b622d8a5c9cd0301553a3e7b757c35dd..0c72d418932eab24c7fd6cd3ec17583f2a176736 100644 (file)
@@ -8,6 +8,7 @@ perf-y += machine.o
 perf-y += event.o
 perf-y += evlist.o
 perf-y += mem-events.o
+perf-y += evsel.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
new file mode 100644 (file)
index 0000000..2f733cd
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+       evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
index a8ce6b2ebd61ef7ffce541802fc5cc6620189848..fa49d15edc35add8ccc0f20e6a2123d90f9a1744 100644 (file)
@@ -1014,6 +1014,11 @@ struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evs
        return found_term;
 }
 
+void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+       evsel__set_sample_bit(evsel, WEIGHT);
+}
+
 /*
  * The enable_on_exec/disabled value strategy:
  *
@@ -1168,7 +1173,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
        }
 
        if (opts->sample_weight)
-               evsel__set_sample_bit(evsel, WEIGHT);
+               arch_evsel__set_sample_weight(evsel);
 
        attr->task     = track;
        attr->mmap     = track;
@@ -1743,6 +1748,10 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
        }
 
 fallback_missing_features:
+       if (perf_missing_features.weight_struct) {
+               evsel__set_sample_bit(evsel, WEIGHT);
+               evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
+       }
        if (perf_missing_features.clockid_wrong)
                evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
        if (perf_missing_features.clockid) {
@@ -1883,7 +1892,12 @@ try_fallback:
         * Must probe features in the order they were added to the
         * perf_event_attr interface.
         */
-       if (!perf_missing_features.code_page_size &&
+       if (!perf_missing_features.weight_struct &&
+           (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
+               perf_missing_features.weight_struct = true;
+               pr_debug2("switching off weight struct support\n");
+               goto fallback_missing_features;
+       } else if (!perf_missing_features.code_page_size &&
            (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
                perf_missing_features.code_page_size = true;
                pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
@@ -2331,9 +2345,15 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
                }
        }
 
-       if (type & PERF_SAMPLE_WEIGHT) {
+       if (type & PERF_SAMPLE_WEIGHT_TYPE) {
+               union perf_sample_weight weight;
+
                OVERFLOW_CHECK_u64(array);
-               data->weight = *array;
+               weight.full = *array;
+               if (type & PERF_SAMPLE_WEIGHT)
+                       data->weight = weight.full;
+               else
+                       data->weight = weight.var1_dw;
                array++;
        }
 
index e25594494cf628677a34807121f8480318196a0c..4e8e49fb7e9de5f3a742e1ddf2743b95cd2ea0c7 100644 (file)
@@ -150,6 +150,7 @@ struct perf_missing_features {
        bool cgroup;
        bool data_page_size;
        bool code_page_size;
+       bool weight_struct;
 };
 
 extern struct perf_missing_features perf_missing_features;
@@ -244,6 +245,8 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma
 
 void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
 
+void arch_evsel__set_sample_weight(struct evsel *evsel);
+
 int evsel__set_filter(struct evsel *evsel, const char *filter);
 int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
 int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
index 60214de42f31bd380269940068d5bfd7be0fadae..a929f6dbdf433eecfc5b0e0bf20a5c717d6192ce 100644 (file)
@@ -1853,13 +1853,29 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
        if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
                sample.addr = items->mem_access_address;
 
-       if (sample_type & PERF_SAMPLE_WEIGHT) {
+       if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
                /*
                 * Refer kernel's setup_pebs_adaptive_sample_data() and
                 * intel_hsw_weight().
                 */
-               if (items->has_mem_access_latency)
-                       sample.weight = items->mem_access_latency;
+               if (items->has_mem_access_latency) {
+                       u64 weight = items->mem_access_latency >> 32;
+
+                       /*
+                        * Starts from SPR, the mem access latency field
+                        * contains both cache latency [47:32] and instruction
+                        * latency [15:0]. The cache latency is the same as the
+                        * mem access latency on previous platforms.
+                        *
+                        * In practice, no memory access could last than 4G
+                        * cycles. Use latency >> 32 to distinguish the
+                        * different format of the mem access latency field.
+                        */
+                       if (weight > 0)
+                               sample.weight = weight & 0xffff;
+                       else
+                               sample.weight = items->mem_access_latency;
+               }
                if (!sample.weight && items->has_tsx_aux_info) {
                        /* Cycles last block */
                        sample.weight = (u32)items->tsx_aux_info;
index 1bd6cfd74257d102c56f563932f985523ec8ff07..30481825515bdca68db852eb004514912e453bc9 100644 (file)
@@ -36,6 +36,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
                bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
                bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
                bit_name(CGROUP), bit_name(DATA_PAGE_SIZE), bit_name(CODE_PAGE_SIZE),
+               bit_name(WEIGHT_STRUCT),
                { .name = NULL, }
        };
 #undef bit_name
index 5b1a31cae0472d66566ec9d4dc98e02a77626e0a..053c08c8c850d54f1cd14f24fde6f85e49b4b038 100644 (file)
@@ -1300,7 +1300,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
        if (sample_type & PERF_SAMPLE_STACK_USER)
                stack_user__printf(&sample->user_stack);
 
-       if (sample_type & PERF_SAMPLE_WEIGHT)
+       if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
                printf("... weight: %" PRIu64 "\n", sample->weight);
 
        if (sample_type & PERF_SAMPLE_DATA_SRC)
index fcec775636ac61cc3ebdf95cac7798e90d7742d4..4e9266f751754af0141a24d64ec90ecfdcab5e6e 100644 (file)
@@ -1468,7 +1468,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
                }
        }
 
-       if (type & PERF_SAMPLE_WEIGHT)
+       if (type & PERF_SAMPLE_WEIGHT_TYPE)
                result += sizeof(u64);
 
        if (type & PERF_SAMPLE_DATA_SRC)
@@ -1642,8 +1642,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
                }
        }
 
-       if (type & PERF_SAMPLE_WEIGHT) {
+       if (type & PERF_SAMPLE_WEIGHT_TYPE) {
                *array = sample->weight;
+               if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+                       *array &= 0xffffffff;
                array++;
        }