]>
Commit | Line | Data |
---|---|---|
b96e6615 TR |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright IBM Corp. 2018 | |
4 | * Auxtrace support for s390 CPU-Measurement Sampling Facility | |
5 | * | |
6 | * Author(s): Thomas Richter <tmricht@linux.ibm.com> | |
33d9e183 TR |
7 | * |
8 | * Auxiliary traces are collected during 'perf record' using rbd000 event. | |
9 | * Several PERF_RECORD_XXX are generated during recording: | |
10 | * | |
11 | * PERF_RECORD_AUX: | |
12 | * Records that new data landed in the AUX buffer part. | |
13 | * PERF_RECORD_AUXTRACE: | |
14 | * Defines auxtrace data. Followed by the actual data. The contents of | |
15 | * the auxtrace data is dependent on the event and the CPU. | |
16 | * This record is generated by perf record command. For details | |
17 | * see Documentation/perf.data-file-format.txt. | |
18 | * PERF_RECORD_AUXTRACE_INFO: | |
19 | * Defines a table of contains for PERF_RECORD_AUXTRACE records. This | |
20 | * record is generated during 'perf record' command. Each record contains up | |
21 | * to 256 entries describing offset and size of the AUXTRACE data in the | |
22 | * perf.data file. | |
23 | * PERF_RECORD_AUXTRACE_ERROR: | |
24 | * Indicates an error during AUXTRACE collection such as buffer overflow. | |
25 | * PERF_RECORD_FINISHED_ROUND: | |
26 | * Perf events are not necessarily in time stamp order, as they can be | |
27 | * collected in parallel on different CPUs. If the events should be | |
28 | * processed in time order they need to be sorted first. | |
29 | * Perf report guarantees that there is no reordering over a | |
30 | * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a | |
31 | * time stamp lower than this record are processed (and displayed) before | |
32 | * the succeeding perf record are processed. | |
33 | * | |
34 | * These records are evaluated during perf report command. | |
35 | * | |
36 | * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for | |
37 | * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() | |
38 | * below. | |
39 | * Auxiliary trace data is collected per CPU. To merge the data into the report | |
40 | * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace | |
41 | * data is in ascending order. | |
42 | * | |
43 | * Each queue has a double linked list of auxtrace_buffers. This list contains | |
44 | * the offset and size of a CPU's auxtrace data. During auxtrace processing | |
45 | * the data portion is mmap()'ed. | |
46 | * | |
47 | * To sort the queues in chronological order, all queue access is controlled | |
48 | * by the auxtrace_heap. This is basicly a stack, each stack element has two | |
49 | * entries, the queue number and a time stamp. However the stack is sorted by | |
50 | * the time stamps. The highest time stamp is at the bottom the lowest | |
51 | * (nearest) time stamp is at the top. That sort order is maintained at all | |
52 | * times! | |
53 | * | |
54 | * After the auxtrace infrastructure has been setup, the auxtrace queues are | |
55 | * filled with data (offset/size pairs) and the auxtrace_heap is populated. | |
56 | * | |
57 | * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. | |
58 | * Each record is handled by s390_cpumsf_process_event(). The time stamp of | |
59 | * the perf record is compared with the time stamp located on the auxtrace_heap | |
60 | * top element. If that time stamp is lower than the time stamp from the | |
61 | * record sample, the auxtrace queues will be processed. As auxtrace queues | |
62 | * control many auxtrace_buffers and each buffer can be quite large, the | |
63 | * auxtrace buffer might be processed only partially. In this case the | |
64 | * position in the auxtrace_buffer of that queue is remembered and the time | |
65 | * stamp of the last processed entry of the auxtrace_buffer replaces the | |
66 | * current auxtrace_heap top. | |
67 | * | |
68 | * 3. Auxtrace_queues might run of out data and are feeded by the | |
69 | * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). | |
70 | * | |
71 | * Event Generation | |
72 | * Each sampling-data entry in the auxilary trace data generates a perf sample. | |
73 | * This sample is filled | |
74 | * with data from the auxtrace such as PID/TID, instruction address, CPU state, | |
75 | * etc. This sample is processed with perf_session__deliver_synth_event() to | |
76 | * be included into the GUI. | |
77 | * | |
78 | * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining | |
79 | * auxiliary traces entries until the time stamp of this record is reached | |
80 | * auxtrace_heap top. This is triggered by ordered_event->deliver(). | |
81 | * | |
82 | * | |
83 | * Perf event processing. | |
84 | * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. | |
85 | * This is the function call sequence: | |
86 | * | |
87 | * __cmd_report() | |
88 | * | | |
89 | * perf_session__process_events() | |
90 | * | | |
91 | * __perf_session__process_events() | |
92 | * | | |
93 | * perf_session__process_event() | |
94 | * | This functions splits the PERF_RECORD_XXX records. | |
95 | * | - Those generated by perf record command (type number equal or higher | |
96 | * | than PERF_RECORD_USER_TYPE_START) are handled by | |
97 | * | perf_session__process_user_event(see below) | |
98 | * | - Those generated by the kernel are handled by | |
99 | * | perf_evlist__parse_sample_timestamp() | |
100 | * | | |
101 | * perf_evlist__parse_sample_timestamp() | |
102 | * | Extract time stamp from sample data. | |
103 | * | | |
104 | * perf_session__queue_event() | |
105 | * | If timestamp is positive the sample is entered into an ordered_event | |
106 | * | list, sort order is the timestamp. The event processing is deferred until | |
107 | * | later (see perf_session__process_user_event()). | |
108 | * | Other timestamps (0 or -1) are handled immediately by | |
109 | * | perf_session__deliver_event(). These are events generated at start up | |
110 | * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* | |
111 | * | records. They are needed to create a list of running processes and its | |
112 | * | memory mappings and layout. They are needed at the beginning to enable | |
113 | * | command perf report to create process trees and memory mappings. | |
114 | * | | |
115 | * perf_session__deliver_event() | |
116 | * | Delivers a PERF_RECORD_XXX entry for handling. | |
117 | * | | |
118 | * auxtrace__process_event() | |
119 | * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with | |
120 | * | time stamps from the auxiliary trace buffers. This enables | |
121 | * | synchronization between auxiliary trace data and the events on the | |
122 | * | perf.data file. | |
123 | * | | |
124 | * machine__deliver_event() | |
125 | * | Handles the PERF_RECORD_XXX event. This depends on the record type. | |
126 | * It might update the process tree, update a process memory map or enter | |
127 | * a sample with IP and call back chain data into GUI data pool. | |
128 | * | |
129 | * | |
130 | * Deferred processing determined by perf_session__process_user_event() is | |
131 | * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These | |
132 | * are generated during command perf record. | |
133 | * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all | |
134 | * PERF_RECORD_XXX entries stored in the ordered_event list. This list was | |
135 | * built up while reading the perf.data file. | |
136 | * Each event is now processed by calling perf_session__deliver_event(). | |
137 | * This enables time synchronization between the data in the perf.data file and | |
138 | * the data in the auxiliary trace buffers. | |
b96e6615 TR |
139 | */ |
140 | ||
141 | #include <endian.h> | |
142 | #include <errno.h> | |
143 | #include <byteswap.h> | |
144 | #include <inttypes.h> | |
145 | #include <linux/kernel.h> | |
146 | #include <linux/types.h> | |
147 | #include <linux/bitops.h> | |
148 | #include <linux/log2.h> | |
149 | ||
766e0618 TR |
150 | #include <sys/stat.h> |
151 | #include <sys/types.h> | |
152 | ||
b96e6615 TR |
153 | #include "cpumap.h" |
154 | #include "color.h" | |
155 | #include "evsel.h" | |
156 | #include "evlist.h" | |
157 | #include "machine.h" | |
158 | #include "session.h" | |
159 | #include "util.h" | |
160 | #include "thread.h" | |
161 | #include "debug.h" | |
162 | #include "auxtrace.h" | |
163 | #include "s390-cpumsf.h" | |
2b1444f2 | 164 | #include "s390-cpumsf-kernel.h" |
766e0618 | 165 | #include "config.h" |
b96e6615 TR |
166 | |
167 | struct s390_cpumsf { | |
168 | struct auxtrace auxtrace; | |
169 | struct auxtrace_queues queues; | |
170 | struct auxtrace_heap heap; | |
171 | struct perf_session *session; | |
172 | struct machine *machine; | |
173 | u32 auxtrace_type; | |
174 | u32 pmu_type; | |
2b1444f2 | 175 | u16 machine_type; |
33d9e183 | 176 | bool data_queued; |
766e0618 TR |
177 | bool use_logfile; |
178 | char *logdir; | |
33d9e183 TR |
179 | }; |
180 | ||
181 | struct s390_cpumsf_queue { | |
182 | struct s390_cpumsf *sf; | |
183 | unsigned int queue_nr; | |
184 | struct auxtrace_buffer *buffer; | |
185 | int cpu; | |
766e0618 | 186 | FILE *logfile; |
b96e6615 TR |
187 | }; |
188 | ||
2b1444f2 TR |
189 | /* Display s390 CPU measurement facility basic-sampling data entry */ |
190 | static bool s390_cpumsf_basic_show(const char *color, size_t pos, | |
191 | struct hws_basic_entry *basic) | |
192 | { | |
193 | if (basic->def != 1) { | |
194 | pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); | |
195 | return false; | |
196 | } | |
197 | color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" | |
198 | " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" | |
199 | "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", | |
200 | pos, basic->def, basic->U, | |
201 | basic->T ? 'T' : ' ', | |
202 | basic->W ? 'W' : ' ', | |
203 | basic->P ? 'P' : ' ', | |
204 | basic->I ? 'I' : ' ', | |
205 | basic->AS, basic->prim_asn, basic->ia, basic->CL, | |
206 | basic->hpp, basic->gpp); | |
207 | return true; | |
208 | } | |
209 | ||
210 | /* Display s390 CPU measurement facility diagnostic-sampling data entry */ | |
211 | static bool s390_cpumsf_diag_show(const char *color, size_t pos, | |
212 | struct hws_diag_entry *diag) | |
213 | { | |
214 | if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { | |
215 | pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); | |
216 | return false; | |
217 | } | |
218 | color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", | |
219 | pos, diag->def, diag->I ? 'I' : ' '); | |
220 | return true; | |
221 | } | |
222 | ||
223 | /* Return TOD timestamp contained in an trailer entry */ | |
224 | static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) | |
225 | { | |
226 | /* te->t set: TOD in STCKE format, bytes 8-15 | |
227 | * to->t not set: TOD in STCK format, bytes 0-7 | |
228 | */ | |
229 | unsigned long long ts; | |
230 | ||
231 | memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); | |
232 | return ts; | |
233 | } | |
234 | ||
235 | /* Display s390 CPU measurement facility trailer entry */ | |
236 | static bool s390_cpumsf_trailer_show(const char *color, size_t pos, | |
237 | struct hws_trailer_entry *te) | |
238 | { | |
239 | if (te->bsdes != sizeof(struct hws_basic_entry)) { | |
240 | pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); | |
241 | return false; | |
242 | } | |
243 | color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" | |
244 | " dsdes:%d Overflow:%lld Time:%#llx\n" | |
245 | "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", | |
246 | pos, | |
247 | te->f ? 'F' : ' ', | |
248 | te->a ? 'A' : ' ', | |
249 | te->t ? 'T' : ' ', | |
250 | te->bsdes, te->dsdes, te->overflow, | |
251 | trailer_timestamp(te), te->clock_base, te->progusage2, | |
252 | te->progusage[0], te->progusage[1]); | |
253 | return true; | |
254 | } | |
255 | ||
256 | /* Test a sample data block. It must be 4KB or a multiple thereof in size and | |
257 | * 4KB page aligned. Each sample data page has a trailer entry at the | |
258 | * end which contains the sample entry data sizes. | |
259 | * | |
260 | * Return true if the sample data block passes the checks and set the | |
261 | * basic set entry size and diagnostic set entry size. | |
262 | * | |
263 | * Return false on failure. | |
264 | * | |
265 | * Note: Old hardware does not set the basic or diagnostic entry sizes | |
266 | * in the trailer entry. Use the type number instead. | |
267 | */ | |
268 | static bool s390_cpumsf_validate(int machine_type, | |
269 | unsigned char *buf, size_t len, | |
270 | unsigned short *bsdes, | |
271 | unsigned short *dsdes) | |
272 | { | |
273 | struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; | |
274 | struct hws_trailer_entry *te; | |
275 | ||
276 | *dsdes = *bsdes = 0; | |
277 | if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ | |
278 | return false; | |
279 | if (basic->def != 1) /* No basic set entry, must be first */ | |
280 | return false; | |
281 | /* Check for trailer entry at end of SDB */ | |
282 | te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ | |
283 | - sizeof(*te)); | |
284 | *bsdes = te->bsdes; | |
285 | *dsdes = te->dsdes; | |
286 | if (!te->bsdes && !te->dsdes) { | |
287 | /* Very old hardware, use CPUID */ | |
288 | switch (machine_type) { | |
289 | case 2097: | |
290 | case 2098: | |
291 | *dsdes = 64; | |
292 | *bsdes = 32; | |
293 | break; | |
294 | case 2817: | |
295 | case 2818: | |
296 | *dsdes = 74; | |
297 | *bsdes = 32; | |
298 | break; | |
299 | case 2827: | |
300 | case 2828: | |
301 | *dsdes = 85; | |
302 | *bsdes = 32; | |
303 | break; | |
304 | default: | |
305 | /* Illegal trailer entry */ | |
306 | return false; | |
307 | } | |
308 | } | |
309 | return true; | |
310 | } | |
311 | ||
312 | /* Return true if there is room for another entry */ | |
313 | static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) | |
314 | { | |
315 | size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); | |
316 | ||
317 | if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) | |
318 | return false; | |
319 | return true; | |
320 | } | |
321 | ||
322 | /* Dump an auxiliary buffer. These buffers are multiple of | |
323 | * 4KB SDB pages. | |
324 | */ | |
325 | static void s390_cpumsf_dump(struct s390_cpumsf *sf, | |
326 | unsigned char *buf, size_t len) | |
327 | { | |
328 | const char *color = PERF_COLOR_BLUE; | |
329 | struct hws_basic_entry *basic; | |
330 | struct hws_diag_entry *diag; | |
2b1444f2 | 331 | unsigned short bsdes, dsdes; |
33d9e183 | 332 | size_t pos = 0; |
2b1444f2 TR |
333 | |
334 | color_fprintf(stdout, color, | |
335 | ". ... s390 AUX data: size %zu bytes\n", | |
336 | len); | |
337 | ||
338 | if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, | |
339 | &dsdes)) { | |
340 | pr_err("Invalid AUX trace data block size:%zu" | |
341 | " (type:%d bsdes:%hd dsdes:%hd)\n", | |
342 | len, sf->machine_type, bsdes, dsdes); | |
343 | return; | |
344 | } | |
345 | ||
346 | /* s390 kernel always returns 4KB blocks fully occupied, | |
347 | * no partially filled SDBs. | |
348 | */ | |
349 | while (pos < len) { | |
350 | /* Handle Basic entry */ | |
351 | basic = (struct hws_basic_entry *)(buf + pos); | |
352 | if (s390_cpumsf_basic_show(color, pos, basic)) | |
353 | pos += bsdes; | |
354 | else | |
355 | return; | |
356 | ||
357 | /* Handle Diagnostic entry */ | |
358 | diag = (struct hws_diag_entry *)(buf + pos); | |
359 | if (s390_cpumsf_diag_show(color, pos, diag)) | |
360 | pos += dsdes; | |
361 | else | |
362 | return; | |
363 | ||
364 | /* Check for trailer entry */ | |
365 | if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { | |
366 | /* Show trailer entry */ | |
367 | struct hws_trailer_entry te; | |
368 | ||
369 | pos = (pos + S390_CPUMSF_PAGESZ) | |
370 | & ~(S390_CPUMSF_PAGESZ - 1); | |
371 | pos -= sizeof(te); | |
372 | memcpy(&te, buf + pos, sizeof(te)); | |
373 | /* Set descriptor sizes in case of old hardware | |
374 | * where these values are not set. | |
375 | */ | |
376 | te.bsdes = bsdes; | |
377 | te.dsdes = dsdes; | |
378 | if (s390_cpumsf_trailer_show(color, pos, &te)) | |
379 | pos += sizeof(te); | |
380 | else | |
381 | return; | |
382 | } | |
383 | } | |
384 | } | |
385 | ||
386 | static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, | |
387 | size_t len) | |
388 | { | |
389 | printf(".\n"); | |
390 | s390_cpumsf_dump(sf, buf, len); | |
391 | } | |
392 | ||
33d9e183 TR |
393 | #define S390_LPP_PID_MASK 0xffffffff |
394 | ||
395 | static bool s390_cpumsf_make_event(size_t pos, | |
396 | struct hws_basic_entry *basic, | |
397 | struct s390_cpumsf_queue *sfq) | |
398 | { | |
399 | struct perf_sample sample = { | |
400 | .ip = basic->ia, | |
401 | .pid = basic->hpp & S390_LPP_PID_MASK, | |
402 | .tid = basic->hpp & S390_LPP_PID_MASK, | |
403 | .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, | |
404 | .cpu = sfq->cpu, | |
405 | .period = 1 | |
406 | }; | |
407 | union perf_event event; | |
408 | ||
409 | memset(&event, 0, sizeof(event)); | |
410 | if (basic->CL == 1) /* Native LPAR mode */ | |
411 | sample.cpumode = basic->P ? PERF_RECORD_MISC_USER | |
412 | : PERF_RECORD_MISC_KERNEL; | |
413 | else if (basic->CL == 2) /* Guest kernel/user space */ | |
414 | sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER | |
415 | : PERF_RECORD_MISC_GUEST_KERNEL; | |
416 | else if (basic->gpp || basic->prim_asn != 0xffff) | |
417 | /* Use heuristics on old hardware */ | |
418 | sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER | |
419 | : PERF_RECORD_MISC_GUEST_KERNEL; | |
420 | else | |
421 | sample.cpumode = basic->P ? PERF_RECORD_MISC_USER | |
422 | : PERF_RECORD_MISC_KERNEL; | |
423 | ||
424 | event.sample.header.type = PERF_RECORD_SAMPLE; | |
425 | event.sample.header.misc = sample.cpumode; | |
426 | event.sample.header.size = sizeof(struct perf_event_header); | |
427 | ||
428 | pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", | |
429 | __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, | |
430 | sample.tid, sample.cpumode, sample.cpu); | |
431 | if (perf_session__deliver_synth_event(sfq->sf->session, &event, | |
432 | &sample)) { | |
433 | pr_err("s390 Auxiliary Trace: failed to deliver event\n"); | |
434 | return false; | |
435 | } | |
436 | return true; | |
437 | } | |
438 | ||
439 | static unsigned long long get_trailer_time(const unsigned char *buf) | |
440 | { | |
441 | struct hws_trailer_entry *te; | |
442 | unsigned long long aux_time; | |
443 | ||
444 | te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ | |
445 | - sizeof(*te)); | |
446 | ||
447 | if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ | |
448 | return 0; | |
449 | ||
450 | /* Correct calculation to convert time stamp in trailer entry to | |
451 | * nano seconds (taken from arch/s390 function tod_to_ns()). | |
452 | * TOD_CLOCK_BASE is stored in trailer entry member progusage2. | |
453 | */ | |
454 | aux_time = trailer_timestamp(te) - te->progusage2; | |
455 | aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); | |
456 | return aux_time; | |
457 | } | |
458 | ||
459 | /* Process the data samples of a single queue. The first parameter is a | |
460 | * pointer to the queue, the second parameter is the time stamp. This | |
461 | * is the time stamp: | |
462 | * - of the event that triggered this processing. | |
463 | * - or the time stamp when the last proccesing of this queue stopped. | |
464 | * In this case it stopped at a 4KB page boundary and record the | |
465 | * position on where to continue processing on the next invocation | |
466 | * (see buffer->use_data and buffer->use_size). | |
467 | * | |
468 | * When this function returns the second parameter is updated to | |
469 | * reflect the time stamp of the last processed auxiliary data entry | |
470 | * (taken from the trailer entry of that page). The caller uses this | |
471 | * returned time stamp to record the last processed entry in this | |
472 | * queue. | |
473 | * | |
474 | * The function returns: | |
475 | * 0: Processing successful. The second parameter returns the | |
476 | * time stamp from the trailer entry until which position | |
477 | * processing took place. Subsequent calls resume from this | |
478 | * position. | |
479 | * <0: An error occurred during processing. The second parameter | |
480 | * returns the maximum time stamp. | |
481 | * >0: Done on this queue. The second parameter returns the | |
482 | * maximum time stamp. | |
483 | */ | |
484 | static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) | |
485 | { | |
486 | struct s390_cpumsf *sf = sfq->sf; | |
487 | unsigned char *buf = sfq->buffer->use_data; | |
488 | size_t len = sfq->buffer->use_size; | |
489 | struct hws_basic_entry *basic; | |
490 | unsigned short bsdes, dsdes; | |
491 | size_t pos = 0; | |
492 | int err = 1; | |
493 | u64 aux_ts; | |
494 | ||
495 | if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, | |
496 | &dsdes)) { | |
497 | *ts = ~0ULL; | |
498 | return -1; | |
499 | } | |
500 | ||
501 | /* Get trailer entry time stamp and check if entries in | |
502 | * this auxiliary page are ready for processing. If the | |
503 | * time stamp of the first entry is too high, whole buffer | |
504 | * can be skipped. In this case return time stamp. | |
505 | */ | |
506 | aux_ts = get_trailer_time(buf); | |
507 | if (!aux_ts) { | |
508 | pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", | |
509 | sfq->buffer->data_offset); | |
510 | aux_ts = ~0ULL; | |
511 | goto out; | |
512 | } | |
513 | if (aux_ts > *ts) { | |
514 | *ts = aux_ts; | |
515 | return 0; | |
516 | } | |
517 | ||
518 | while (pos < len) { | |
519 | /* Handle Basic entry */ | |
520 | basic = (struct hws_basic_entry *)(buf + pos); | |
521 | if (s390_cpumsf_make_event(pos, basic, sfq)) | |
522 | pos += bsdes; | |
523 | else { | |
524 | err = -EBADF; | |
525 | goto out; | |
526 | } | |
527 | ||
528 | pos += dsdes; /* Skip diagnositic entry */ | |
529 | ||
530 | /* Check for trailer entry */ | |
531 | if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { | |
532 | pos = (pos + S390_CPUMSF_PAGESZ) | |
533 | & ~(S390_CPUMSF_PAGESZ - 1); | |
534 | /* Check existence of next page */ | |
535 | if (pos >= len) | |
536 | break; | |
537 | aux_ts = get_trailer_time(buf + pos); | |
538 | if (!aux_ts) { | |
539 | aux_ts = ~0ULL; | |
540 | goto out; | |
541 | } | |
542 | if (aux_ts > *ts) { | |
543 | *ts = aux_ts; | |
544 | sfq->buffer->use_data += pos; | |
545 | sfq->buffer->use_size -= pos; | |
546 | return 0; | |
547 | } | |
548 | } | |
549 | } | |
550 | out: | |
551 | *ts = aux_ts; | |
552 | sfq->buffer->use_size = 0; | |
553 | sfq->buffer->use_data = NULL; | |
554 | return err; /* Buffer completely scanned or error */ | |
555 | } | |
556 | ||
557 | /* Run the s390 auxiliary trace decoder. | |
558 | * Select the queue buffer to operate on, the caller already selected | |
559 | * the proper queue, depending on second parameter 'ts'. | |
560 | * This is the time stamp until which the auxiliary entries should | |
561 | * be processed. This value is updated by called functions and | |
562 | * returned to the caller. | |
563 | * | |
564 | * Resume processing in the current buffer. If there is no buffer | |
565 | * get a new buffer from the queue and setup start position for | |
566 | * processing. | |
567 | * When a buffer is completely processed remove it from the queue | |
568 | * before returning. | |
569 | * | |
570 | * This function returns | |
571 | * 1: When the queue is empty. Second parameter will be set to | |
572 | * maximum time stamp. | |
573 | * 0: Normal processing done. | |
574 | * <0: Error during queue buffer setup. This causes the caller | |
575 | * to stop processing completely. | |
576 | */ | |
577 | static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, | |
578 | u64 *ts) | |
579 | { | |
580 | ||
581 | struct auxtrace_buffer *buffer; | |
582 | struct auxtrace_queue *queue; | |
583 | int err; | |
584 | ||
585 | queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; | |
586 | ||
587 | /* Get buffer and last position in buffer to resume | |
588 | * decoding the auxiliary entries. One buffer might be large | |
589 | * and decoding might stop in between. This depends on the time | |
590 | * stamp of the trailer entry in each page of the auxiliary | |
591 | * data and the time stamp of the event triggering the decoding. | |
592 | */ | |
593 | if (sfq->buffer == NULL) { | |
594 | sfq->buffer = buffer = auxtrace_buffer__next(queue, | |
595 | sfq->buffer); | |
596 | if (!buffer) { | |
597 | *ts = ~0ULL; | |
598 | return 1; /* Processing done on this queue */ | |
599 | } | |
600 | /* Start with a new buffer on this queue */ | |
601 | if (buffer->data) { | |
602 | buffer->use_size = buffer->size; | |
603 | buffer->use_data = buffer->data; | |
604 | } | |
766e0618 TR |
605 | if (sfq->logfile) { /* Write into log file */ |
606 | size_t rc = fwrite(buffer->data, buffer->size, 1, | |
607 | sfq->logfile); | |
608 | if (rc != 1) | |
609 | pr_err("Failed to write auxiliary data\n"); | |
610 | } | |
33d9e183 TR |
611 | } else |
612 | buffer = sfq->buffer; | |
613 | ||
614 | if (!buffer->data) { | |
615 | int fd = perf_data__fd(sfq->sf->session->data); | |
616 | ||
617 | buffer->data = auxtrace_buffer__get_data(buffer, fd); | |
618 | if (!buffer->data) | |
619 | return -ENOMEM; | |
620 | buffer->use_size = buffer->size; | |
621 | buffer->use_data = buffer->data; | |
766e0618 TR |
622 | |
623 | if (sfq->logfile) { /* Write into log file */ | |
624 | size_t rc = fwrite(buffer->data, buffer->size, 1, | |
625 | sfq->logfile); | |
626 | if (rc != 1) | |
627 | pr_err("Failed to write auxiliary data\n"); | |
628 | } | |
33d9e183 TR |
629 | } |
630 | pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", | |
631 | __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, | |
632 | buffer->size, buffer->use_size); | |
633 | err = s390_cpumsf_samples(sfq, ts); | |
634 | ||
635 | /* If non-zero, there is either an error (err < 0) or the buffer is | |
636 | * completely done (err > 0). The error is unrecoverable, usually | |
637 | * some descriptors could not be read successfully, so continue with | |
638 | * the next buffer. | |
639 | * In both cases the parameter 'ts' has been updated. | |
640 | */ | |
641 | if (err) { | |
642 | sfq->buffer = NULL; | |
643 | list_del(&buffer->list); | |
644 | auxtrace_buffer__free(buffer); | |
645 | if (err > 0) /* Buffer done, no error */ | |
646 | err = 0; | |
647 | } | |
648 | return err; | |
649 | } | |
650 | ||
651 | static struct s390_cpumsf_queue * | |
652 | s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) | |
653 | { | |
654 | struct s390_cpumsf_queue *sfq; | |
655 | ||
656 | sfq = zalloc(sizeof(struct s390_cpumsf_queue)); | |
657 | if (sfq == NULL) | |
658 | return NULL; | |
659 | ||
660 | sfq->sf = sf; | |
661 | sfq->queue_nr = queue_nr; | |
662 | sfq->cpu = -1; | |
766e0618 TR |
663 | if (sf->use_logfile) { |
664 | char *name; | |
665 | int rc; | |
666 | ||
667 | rc = (sf->logdir) | |
668 | ? asprintf(&name, "%s/aux.smp.%02x", | |
669 | sf->logdir, queue_nr) | |
670 | : asprintf(&name, "aux.smp.%02x", queue_nr); | |
671 | if (rc > 0) | |
672 | sfq->logfile = fopen(name, "w"); | |
673 | if (sfq->logfile == NULL) { | |
674 | pr_err("Failed to open auxiliary log file %s," | |
675 | "continue...\n", name); | |
676 | sf->use_logfile = false; | |
677 | } | |
678 | free(name); | |
679 | } | |
33d9e183 TR |
680 | return sfq; |
681 | } | |
682 | ||
683 | static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, | |
684 | struct auxtrace_queue *queue, | |
685 | unsigned int queue_nr, u64 ts) | |
686 | { | |
687 | struct s390_cpumsf_queue *sfq = queue->priv; | |
688 | ||
689 | if (list_empty(&queue->head)) | |
690 | return 0; | |
691 | ||
692 | if (sfq == NULL) { | |
693 | sfq = s390_cpumsf_alloc_queue(sf, queue_nr); | |
694 | if (!sfq) | |
695 | return -ENOMEM; | |
696 | queue->priv = sfq; | |
697 | ||
698 | if (queue->cpu != -1) | |
699 | sfq->cpu = queue->cpu; | |
700 | } | |
701 | return auxtrace_heap__add(&sf->heap, queue_nr, ts); | |
702 | } | |
703 | ||
704 | static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) | |
705 | { | |
706 | unsigned int i; | |
707 | int ret = 0; | |
708 | ||
709 | for (i = 0; i < sf->queues.nr_queues; i++) { | |
710 | ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], | |
711 | i, ts); | |
712 | if (ret) | |
713 | break; | |
714 | } | |
715 | return ret; | |
716 | } | |
717 | ||
718 | static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) | |
719 | { | |
720 | if (!sf->queues.new_data) | |
721 | return 0; | |
722 | ||
723 | sf->queues.new_data = false; | |
724 | return s390_cpumsf_setup_queues(sf, ts); | |
725 | } | |
726 | ||
727 | static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) | |
728 | { | |
729 | unsigned int queue_nr; | |
730 | u64 ts; | |
731 | int ret; | |
732 | ||
733 | while (1) { | |
734 | struct auxtrace_queue *queue; | |
735 | struct s390_cpumsf_queue *sfq; | |
736 | ||
737 | if (!sf->heap.heap_cnt) | |
738 | return 0; | |
739 | ||
740 | if (sf->heap.heap_array[0].ordinal >= timestamp) | |
741 | return 0; | |
742 | ||
743 | queue_nr = sf->heap.heap_array[0].queue_nr; | |
744 | queue = &sf->queues.queue_array[queue_nr]; | |
745 | sfq = queue->priv; | |
746 | ||
747 | auxtrace_heap__pop(&sf->heap); | |
748 | if (sf->heap.heap_cnt) { | |
749 | ts = sf->heap.heap_array[0].ordinal + 1; | |
750 | if (ts > timestamp) | |
751 | ts = timestamp; | |
752 | } else { | |
753 | ts = timestamp; | |
754 | } | |
755 | ||
756 | ret = s390_cpumsf_run_decoder(sfq, &ts); | |
757 | if (ret < 0) { | |
758 | auxtrace_heap__add(&sf->heap, queue_nr, ts); | |
759 | return ret; | |
760 | } | |
761 | if (!ret) { | |
762 | ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); | |
763 | if (ret < 0) | |
764 | return ret; | |
765 | } | |
766 | } | |
767 | return 0; | |
768 | } | |
769 | ||
770 | static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, | |
771 | pid_t pid, pid_t tid, u64 ip) | |
772 | { | |
773 | char msg[MAX_AUXTRACE_ERROR_MSG]; | |
774 | union perf_event event; | |
775 | int err; | |
776 | ||
777 | strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); | |
778 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | |
779 | code, cpu, pid, tid, ip, msg); | |
780 | ||
781 | err = perf_session__deliver_synth_event(sf->session, &event, NULL); | |
782 | if (err) | |
783 | pr_err("s390 Auxiliary Trace: failed to deliver error event," | |
784 | "error %d\n", err); | |
785 | return err; | |
786 | } | |
787 | ||
788 | static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) | |
789 | { | |
790 | return s390_cpumsf_synth_error(sf, 1, sample->cpu, | |
791 | sample->pid, sample->tid, 0); | |
792 | } | |
793 | ||
b96e6615 TR |
794 | static int |
795 | s390_cpumsf_process_event(struct perf_session *session __maybe_unused, | |
33d9e183 TR |
796 | union perf_event *event, |
797 | struct perf_sample *sample, | |
798 | struct perf_tool *tool) | |
b96e6615 | 799 | { |
33d9e183 TR |
800 | struct s390_cpumsf *sf = container_of(session->auxtrace, |
801 | struct s390_cpumsf, | |
802 | auxtrace); | |
803 | u64 timestamp = sample->time; | |
804 | int err = 0; | |
805 | ||
806 | if (dump_trace) | |
807 | return 0; | |
808 | ||
809 | if (!tool->ordered_events) { | |
810 | pr_err("s390 Auxiliary Trace requires ordered events\n"); | |
811 | return -EINVAL; | |
812 | } | |
813 | ||
814 | if (event->header.type == PERF_RECORD_AUX && | |
815 | event->aux.flags & PERF_AUX_FLAG_TRUNCATED) | |
816 | return s390_cpumsf_lost(sf, sample); | |
817 | ||
818 | if (timestamp) { | |
819 | err = s390_cpumsf_update_queues(sf, timestamp); | |
820 | if (!err) | |
821 | err = s390_cpumsf_process_queues(sf, timestamp); | |
822 | } | |
823 | return err; | |
b96e6615 TR |
824 | } |
825 | ||
33d9e183 TR |
826 | struct s390_cpumsf_synth { |
827 | struct perf_tool cpumsf_tool; | |
828 | struct perf_session *session; | |
829 | }; | |
830 | ||
b96e6615 | 831 | static int |
2b1444f2 | 832 | s390_cpumsf_process_auxtrace_event(struct perf_session *session, |
b96e6615 TR |
833 | union perf_event *event __maybe_unused, |
834 | struct perf_tool *tool __maybe_unused) | |
835 | { | |
2b1444f2 TR |
836 | struct s390_cpumsf *sf = container_of(session->auxtrace, |
837 | struct s390_cpumsf, | |
838 | auxtrace); | |
839 | ||
840 | int fd = perf_data__fd(session->data); | |
841 | struct auxtrace_buffer *buffer; | |
842 | off_t data_offset; | |
843 | int err; | |
844 | ||
33d9e183 TR |
845 | if (sf->data_queued) |
846 | return 0; | |
847 | ||
2b1444f2 TR |
848 | if (perf_data__is_pipe(session->data)) { |
849 | data_offset = 0; | |
850 | } else { | |
851 | data_offset = lseek(fd, 0, SEEK_CUR); | |
852 | if (data_offset == -1) | |
853 | return -errno; | |
854 | } | |
855 | ||
856 | err = auxtrace_queues__add_event(&sf->queues, session, event, | |
857 | data_offset, &buffer); | |
858 | if (err) | |
859 | return err; | |
860 | ||
861 | /* Dump here after copying piped trace out of the pipe */ | |
862 | if (dump_trace) { | |
863 | if (auxtrace_buffer__get_data(buffer, fd)) { | |
864 | s390_cpumsf_dump_event(sf, buffer->data, | |
865 | buffer->size); | |
866 | auxtrace_buffer__put_data(buffer); | |
867 | } | |
868 | } | |
b96e6615 TR |
869 | return 0; |
870 | } | |
871 | ||
33d9e183 TR |
872 | static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) |
873 | { | |
874 | } | |
875 | ||
b96e6615 TR |
876 | static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, |
877 | struct perf_tool *tool __maybe_unused) | |
878 | { | |
879 | return 0; | |
880 | } | |
881 | ||
33d9e183 | 882 | static void s390_cpumsf_free_queues(struct perf_session *session) |
b96e6615 TR |
883 | { |
884 | struct s390_cpumsf *sf = container_of(session->auxtrace, | |
885 | struct s390_cpumsf, | |
33d9e183 | 886 | auxtrace); |
b96e6615 TR |
887 | struct auxtrace_queues *queues = &sf->queues; |
888 | unsigned int i; | |
889 | ||
766e0618 TR |
890 | for (i = 0; i < queues->nr_queues; i++) { |
891 | struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) | |
892 | queues->queue_array[i].priv; | |
893 | ||
894 | if (sfq != NULL && sfq->logfile) { | |
895 | fclose(sfq->logfile); | |
896 | sfq->logfile = NULL; | |
897 | } | |
b96e6615 | 898 | zfree(&queues->queue_array[i].priv); |
766e0618 | 899 | } |
b96e6615 TR |
900 | auxtrace_queues__free(queues); |
901 | } | |
902 | ||
903 | static void s390_cpumsf_free(struct perf_session *session) | |
904 | { | |
905 | struct s390_cpumsf *sf = container_of(session->auxtrace, | |
906 | struct s390_cpumsf, | |
907 | auxtrace); | |
908 | ||
909 | auxtrace_heap__free(&sf->heap); | |
33d9e183 | 910 | s390_cpumsf_free_queues(session); |
b96e6615 | 911 | session->auxtrace = NULL; |
766e0618 | 912 | free(sf->logdir); |
b96e6615 TR |
913 | free(sf); |
914 | } | |
915 | ||
2b1444f2 TR |
916 | static int s390_cpumsf_get_type(const char *cpuid) |
917 | { | |
918 | int ret, family = 0; | |
919 | ||
920 | ret = sscanf(cpuid, "%*[^,],%u", &family); | |
921 | return (ret == 1) ? family : 0; | |
922 | } | |
923 | ||
33d9e183 TR |
924 | /* Check itrace options set on perf report command. |
925 | * Return true, if none are set or all options specified can be | |
766e0618 | 926 | * handled on s390 (currently only option 'd' for logging. |
33d9e183 TR |
927 | * Return false otherwise. |
928 | */ | |
929 | static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) | |
930 | { | |
766e0618 TR |
931 | bool ison = false; |
932 | ||
33d9e183 TR |
933 | if (!itops || !itops->set) |
934 | return true; | |
766e0618 TR |
935 | ison = itops->inject || itops->instructions || itops->branches || |
936 | itops->transactions || itops->ptwrites || | |
937 | itops->pwr_events || itops->errors || | |
938 | itops->dont_decode || itops->calls || itops->returns || | |
939 | itops->callchain || itops->thread_stack || | |
940 | itops->last_branch; | |
941 | if (!ison) | |
942 | return true; | |
943 | pr_err("Unsupported --itrace options specified\n"); | |
33d9e183 TR |
944 | return false; |
945 | } | |
946 | ||
766e0618 TR |
947 | /* Check for AUXTRACE dump directory if it is needed. |
948 | * On failure print an error message but continue. | |
949 | * Return 0 on wrong keyword in config file and 1 otherwise. | |
950 | */ | |
951 | static int s390_cpumsf__config(const char *var, const char *value, void *cb) | |
952 | { | |
953 | struct s390_cpumsf *sf = cb; | |
954 | struct stat stbuf; | |
955 | int rc; | |
956 | ||
957 | if (strcmp(var, "auxtrace.dumpdir")) | |
958 | return 0; | |
959 | sf->logdir = strdup(value); | |
960 | if (sf->logdir == NULL) { | |
961 | pr_err("Failed to find auxtrace log directory %s," | |
962 | " continue with current directory...\n", value); | |
963 | return 1; | |
964 | } | |
965 | rc = stat(sf->logdir, &stbuf); | |
966 | if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { | |
967 | pr_err("Missing auxtrace log directory %s," | |
968 | " continue with current directory...\n", value); | |
969 | free(sf->logdir); | |
970 | sf->logdir = NULL; | |
971 | } | |
972 | return 1; | |
973 | } | |
974 | ||
b96e6615 TR |
975 | int s390_cpumsf_process_auxtrace_info(union perf_event *event, |
976 | struct perf_session *session) | |
977 | { | |
978 | struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; | |
979 | struct s390_cpumsf *sf; | |
980 | int err; | |
981 | ||
982 | if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event)) | |
983 | return -EINVAL; | |
984 | ||
985 | sf = zalloc(sizeof(struct s390_cpumsf)); | |
986 | if (sf == NULL) | |
987 | return -ENOMEM; | |
988 | ||
33d9e183 TR |
989 | if (!check_auxtrace_itrace(session->itrace_synth_opts)) { |
990 | err = -EINVAL; | |
991 | goto err_free; | |
992 | } | |
766e0618 TR |
993 | sf->use_logfile = session->itrace_synth_opts->log; |
994 | if (sf->use_logfile) | |
995 | perf_config(s390_cpumsf__config, sf); | |
33d9e183 | 996 | |
b96e6615 TR |
997 | err = auxtrace_queues__init(&sf->queues); |
998 | if (err) | |
999 | goto err_free; | |
1000 | ||
1001 | sf->session = session; | |
1002 | sf->machine = &session->machines.host; /* No kvm support */ | |
1003 | sf->auxtrace_type = auxtrace_info->type; | |
1004 | sf->pmu_type = PERF_TYPE_RAW; | |
2b1444f2 | 1005 | sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); |
b96e6615 TR |
1006 | |
1007 | sf->auxtrace.process_event = s390_cpumsf_process_event; | |
1008 | sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; | |
1009 | sf->auxtrace.flush_events = s390_cpumsf_flush; | |
1010 | sf->auxtrace.free_events = s390_cpumsf_free_events; | |
1011 | sf->auxtrace.free = s390_cpumsf_free; | |
1012 | session->auxtrace = &sf->auxtrace; | |
1013 | ||
33d9e183 TR |
1014 | if (dump_trace) |
1015 | return 0; | |
1016 | ||
1017 | err = auxtrace_queues__process_index(&sf->queues, session); | |
1018 | if (err) | |
1019 | goto err_free_queues; | |
1020 | ||
1021 | if (sf->queues.populated) | |
1022 | sf->data_queued = true; | |
1023 | ||
b96e6615 TR |
1024 | return 0; |
1025 | ||
33d9e183 TR |
1026 | err_free_queues: |
1027 | auxtrace_queues__free(&sf->queues); | |
1028 | session->auxtrace = NULL; | |
b96e6615 | 1029 | err_free: |
766e0618 | 1030 | free(sf->logdir); |
b96e6615 TR |
1031 | free(sf); |
1032 | return err; | |
1033 | } |