]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame_incremental - kernel/trace/trace_syscalls.c
tracing: Move fields from event to class structure
[mirror_ubuntu-bionic-kernel.git] / kernel / trace / trace_syscalls.c
... / ...
CommitLineData
1#include <trace/syscall.h>
2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
4#include <linux/kernel.h>
5#include <linux/ftrace.h>
6#include <linux/perf_event.h>
7#include <asm/syscall.h>
8
9#include "trace_output.h"
10#include "trace.h"
11
12static DEFINE_MUTEX(syscall_trace_lock);
13static int sys_refcount_enter;
14static int sys_refcount_exit;
15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
17
18static int syscall_enter_register(struct ftrace_event_call *event,
19 enum trace_reg type);
20static int syscall_exit_register(struct ftrace_event_call *event,
21 enum trace_reg type);
22
23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25
26static struct list_head *
27syscall_get_enter_fields(struct ftrace_event_call *call)
28{
29 struct syscall_metadata *entry = call->data;
30
31 return &entry->enter_fields;
32}
33
34static struct list_head *
35syscall_get_exit_fields(struct ftrace_event_call *call)
36{
37 struct syscall_metadata *entry = call->data;
38
39 return &entry->exit_fields;
40}
41
42struct ftrace_event_class event_class_syscall_enter = {
43 .system = "syscalls",
44 .reg = syscall_enter_register,
45 .define_fields = syscall_enter_define_fields,
46 .get_fields = syscall_get_enter_fields,
47};
48
49struct ftrace_event_class event_class_syscall_exit = {
50 .system = "syscalls",
51 .reg = syscall_exit_register,
52 .define_fields = syscall_exit_define_fields,
53 .get_fields = syscall_get_exit_fields,
54};
55
56extern unsigned long __start_syscalls_metadata[];
57extern unsigned long __stop_syscalls_metadata[];
58
59static struct syscall_metadata **syscalls_metadata;
60
61static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
62{
63 struct syscall_metadata *start;
64 struct syscall_metadata *stop;
65 char str[KSYM_SYMBOL_LEN];
66
67
68 start = (struct syscall_metadata *)__start_syscalls_metadata;
69 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
70 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
71
72 for ( ; start < stop; start++) {
73 /*
74 * Only compare after the "sys" prefix. Archs that use
75 * syscall wrappers may have syscalls symbols aliases prefixed
76 * with "SyS" instead of "sys", leading to an unwanted
77 * mismatch.
78 */
79 if (start->name && !strcmp(start->name + 3, str + 3))
80 return start;
81 }
82 return NULL;
83}
84
85static struct syscall_metadata *syscall_nr_to_meta(int nr)
86{
87 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
88 return NULL;
89
90 return syscalls_metadata[nr];
91}
92
93enum print_line_t
94print_syscall_enter(struct trace_iterator *iter, int flags)
95{
96 struct trace_seq *s = &iter->seq;
97 struct trace_entry *ent = iter->ent;
98 struct syscall_trace_enter *trace;
99 struct syscall_metadata *entry;
100 int i, ret, syscall;
101
102 trace = (typeof(trace))ent;
103 syscall = trace->nr;
104 entry = syscall_nr_to_meta(syscall);
105
106 if (!entry)
107 goto end;
108
109 if (entry->enter_event->id != ent->type) {
110 WARN_ON_ONCE(1);
111 goto end;
112 }
113
114 ret = trace_seq_printf(s, "%s(", entry->name);
115 if (!ret)
116 return TRACE_TYPE_PARTIAL_LINE;
117
118 for (i = 0; i < entry->nb_args; i++) {
119 /* parameter types */
120 if (trace_flags & TRACE_ITER_VERBOSE) {
121 ret = trace_seq_printf(s, "%s ", entry->types[i]);
122 if (!ret)
123 return TRACE_TYPE_PARTIAL_LINE;
124 }
125 /* parameter values */
126 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
127 trace->args[i],
128 i == entry->nb_args - 1 ? "" : ", ");
129 if (!ret)
130 return TRACE_TYPE_PARTIAL_LINE;
131 }
132
133 ret = trace_seq_putc(s, ')');
134 if (!ret)
135 return TRACE_TYPE_PARTIAL_LINE;
136
137end:
138 ret = trace_seq_putc(s, '\n');
139 if (!ret)
140 return TRACE_TYPE_PARTIAL_LINE;
141
142 return TRACE_TYPE_HANDLED;
143}
144
145enum print_line_t
146print_syscall_exit(struct trace_iterator *iter, int flags)
147{
148 struct trace_seq *s = &iter->seq;
149 struct trace_entry *ent = iter->ent;
150 struct syscall_trace_exit *trace;
151 int syscall;
152 struct syscall_metadata *entry;
153 int ret;
154
155 trace = (typeof(trace))ent;
156 syscall = trace->nr;
157 entry = syscall_nr_to_meta(syscall);
158
159 if (!entry) {
160 trace_seq_printf(s, "\n");
161 return TRACE_TYPE_HANDLED;
162 }
163
164 if (entry->exit_event->id != ent->type) {
165 WARN_ON_ONCE(1);
166 return TRACE_TYPE_UNHANDLED;
167 }
168
169 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
170 trace->ret);
171 if (!ret)
172 return TRACE_TYPE_PARTIAL_LINE;
173
174 return TRACE_TYPE_HANDLED;
175}
176
177extern char *__bad_type_size(void);
178
179#define SYSCALL_FIELD(type, name) \
180 sizeof(type) != sizeof(trace.name) ? \
181 __bad_type_size() : \
182 #type, #name, offsetof(typeof(trace), name), \
183 sizeof(trace.name), is_signed_type(type)
184
185static
186int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
187{
188 int i;
189 int pos = 0;
190
191 /* When len=0, we just calculate the needed length */
192#define LEN_OR_ZERO (len ? len - pos : 0)
193
194 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
195 for (i = 0; i < entry->nb_args; i++) {
196 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
197 entry->args[i], sizeof(unsigned long),
198 i == entry->nb_args - 1 ? "" : ", ");
199 }
200 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
201
202 for (i = 0; i < entry->nb_args; i++) {
203 pos += snprintf(buf + pos, LEN_OR_ZERO,
204 ", ((unsigned long)(REC->%s))", entry->args[i]);
205 }
206
207#undef LEN_OR_ZERO
208
209 /* return the length of print_fmt */
210 return pos;
211}
212
213static int set_syscall_print_fmt(struct ftrace_event_call *call)
214{
215 char *print_fmt;
216 int len;
217 struct syscall_metadata *entry = call->data;
218
219 if (entry->enter_event != call) {
220 call->print_fmt = "\"0x%lx\", REC->ret";
221 return 0;
222 }
223
224 /* First: called with 0 length to calculate the needed length */
225 len = __set_enter_print_fmt(entry, NULL, 0);
226
227 print_fmt = kmalloc(len + 1, GFP_KERNEL);
228 if (!print_fmt)
229 return -ENOMEM;
230
231 /* Second: actually write the @print_fmt */
232 __set_enter_print_fmt(entry, print_fmt, len + 1);
233 call->print_fmt = print_fmt;
234
235 return 0;
236}
237
238static void free_syscall_print_fmt(struct ftrace_event_call *call)
239{
240 struct syscall_metadata *entry = call->data;
241
242 if (entry->enter_event == call)
243 kfree(call->print_fmt);
244}
245
246static int syscall_enter_define_fields(struct ftrace_event_call *call)
247{
248 struct syscall_trace_enter trace;
249 struct syscall_metadata *meta = call->data;
250 int ret;
251 int i;
252 int offset = offsetof(typeof(trace), args);
253
254 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
255 if (ret)
256 return ret;
257
258 for (i = 0; i < meta->nb_args; i++) {
259 ret = trace_define_field(call, meta->types[i],
260 meta->args[i], offset,
261 sizeof(unsigned long), 0,
262 FILTER_OTHER);
263 offset += sizeof(unsigned long);
264 }
265
266 return ret;
267}
268
269static int syscall_exit_define_fields(struct ftrace_event_call *call)
270{
271 struct syscall_trace_exit trace;
272 int ret;
273
274 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
275 if (ret)
276 return ret;
277
278 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
279 FILTER_OTHER);
280
281 return ret;
282}
283
284void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
285{
286 struct syscall_trace_enter *entry;
287 struct syscall_metadata *sys_data;
288 struct ring_buffer_event *event;
289 struct ring_buffer *buffer;
290 int size;
291 int syscall_nr;
292
293 syscall_nr = syscall_get_nr(current, regs);
294 if (syscall_nr < 0)
295 return;
296 if (!test_bit(syscall_nr, enabled_enter_syscalls))
297 return;
298
299 sys_data = syscall_nr_to_meta(syscall_nr);
300 if (!sys_data)
301 return;
302
303 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
304
305 event = trace_current_buffer_lock_reserve(&buffer,
306 sys_data->enter_event->id, size, 0, 0);
307 if (!event)
308 return;
309
310 entry = ring_buffer_event_data(event);
311 entry->nr = syscall_nr;
312 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
313
314 if (!filter_current_check_discard(buffer, sys_data->enter_event,
315 entry, event))
316 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
317}
318
319void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
320{
321 struct syscall_trace_exit *entry;
322 struct syscall_metadata *sys_data;
323 struct ring_buffer_event *event;
324 struct ring_buffer *buffer;
325 int syscall_nr;
326
327 syscall_nr = syscall_get_nr(current, regs);
328 if (syscall_nr < 0)
329 return;
330 if (!test_bit(syscall_nr, enabled_exit_syscalls))
331 return;
332
333 sys_data = syscall_nr_to_meta(syscall_nr);
334 if (!sys_data)
335 return;
336
337 event = trace_current_buffer_lock_reserve(&buffer,
338 sys_data->exit_event->id, sizeof(*entry), 0, 0);
339 if (!event)
340 return;
341
342 entry = ring_buffer_event_data(event);
343 entry->nr = syscall_nr;
344 entry->ret = syscall_get_return_value(current, regs);
345
346 if (!filter_current_check_discard(buffer, sys_data->exit_event,
347 entry, event))
348 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
349}
350
351int reg_event_syscall_enter(struct ftrace_event_call *call)
352{
353 int ret = 0;
354 int num;
355
356 num = ((struct syscall_metadata *)call->data)->syscall_nr;
357 if (num < 0 || num >= NR_syscalls)
358 return -ENOSYS;
359 mutex_lock(&syscall_trace_lock);
360 if (!sys_refcount_enter)
361 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
362 if (!ret) {
363 set_bit(num, enabled_enter_syscalls);
364 sys_refcount_enter++;
365 }
366 mutex_unlock(&syscall_trace_lock);
367 return ret;
368}
369
370void unreg_event_syscall_enter(struct ftrace_event_call *call)
371{
372 int num;
373
374 num = ((struct syscall_metadata *)call->data)->syscall_nr;
375 if (num < 0 || num >= NR_syscalls)
376 return;
377 mutex_lock(&syscall_trace_lock);
378 sys_refcount_enter--;
379 clear_bit(num, enabled_enter_syscalls);
380 if (!sys_refcount_enter)
381 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
382 mutex_unlock(&syscall_trace_lock);
383}
384
385int reg_event_syscall_exit(struct ftrace_event_call *call)
386{
387 int ret = 0;
388 int num;
389
390 num = ((struct syscall_metadata *)call->data)->syscall_nr;
391 if (num < 0 || num >= NR_syscalls)
392 return -ENOSYS;
393 mutex_lock(&syscall_trace_lock);
394 if (!sys_refcount_exit)
395 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
396 if (!ret) {
397 set_bit(num, enabled_exit_syscalls);
398 sys_refcount_exit++;
399 }
400 mutex_unlock(&syscall_trace_lock);
401 return ret;
402}
403
404void unreg_event_syscall_exit(struct ftrace_event_call *call)
405{
406 int num;
407
408 num = ((struct syscall_metadata *)call->data)->syscall_nr;
409 if (num < 0 || num >= NR_syscalls)
410 return;
411 mutex_lock(&syscall_trace_lock);
412 sys_refcount_exit--;
413 clear_bit(num, enabled_exit_syscalls);
414 if (!sys_refcount_exit)
415 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
416 mutex_unlock(&syscall_trace_lock);
417}
418
419int init_syscall_trace(struct ftrace_event_call *call)
420{
421 int id;
422
423 if (set_syscall_print_fmt(call) < 0)
424 return -ENOMEM;
425
426 id = trace_event_raw_init(call);
427
428 if (id < 0) {
429 free_syscall_print_fmt(call);
430 return id;
431 }
432
433 return id;
434}
435
436unsigned long __init arch_syscall_addr(int nr)
437{
438 return (unsigned long)sys_call_table[nr];
439}
440
441int __init init_ftrace_syscalls(void)
442{
443 struct syscall_metadata *meta;
444 unsigned long addr;
445 int i;
446
447 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
448 NR_syscalls, GFP_KERNEL);
449 if (!syscalls_metadata) {
450 WARN_ON(1);
451 return -ENOMEM;
452 }
453
454 for (i = 0; i < NR_syscalls; i++) {
455 addr = arch_syscall_addr(i);
456 meta = find_syscall_meta(addr);
457 if (!meta)
458 continue;
459
460 meta->syscall_nr = i;
461 syscalls_metadata[i] = meta;
462 }
463
464 return 0;
465}
466core_initcall(init_ftrace_syscalls);
467
468#ifdef CONFIG_PERF_EVENTS
469
470static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
471static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
472static int sys_perf_refcount_enter;
473static int sys_perf_refcount_exit;
474
475static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
476{
477 struct syscall_metadata *sys_data;
478 struct syscall_trace_enter *rec;
479 unsigned long flags;
480 int syscall_nr;
481 int rctx;
482 int size;
483
484 syscall_nr = syscall_get_nr(current, regs);
485 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
486 return;
487
488 sys_data = syscall_nr_to_meta(syscall_nr);
489 if (!sys_data)
490 return;
491
492 /* get the size after alignment with the u32 buffer size field */
493 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
494 size = ALIGN(size + sizeof(u32), sizeof(u64));
495 size -= sizeof(u32);
496
497 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
498 "perf buffer not large enough"))
499 return;
500
501 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
502 sys_data->enter_event->id, &rctx, &flags);
503 if (!rec)
504 return;
505
506 rec->nr = syscall_nr;
507 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
508 (unsigned long *)&rec->args);
509 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
510}
511
512int perf_sysenter_enable(struct ftrace_event_call *call)
513{
514 int ret = 0;
515 int num;
516
517 num = ((struct syscall_metadata *)call->data)->syscall_nr;
518
519 mutex_lock(&syscall_trace_lock);
520 if (!sys_perf_refcount_enter)
521 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
522 if (ret) {
523 pr_info("event trace: Could not activate"
524 "syscall entry trace point");
525 } else {
526 set_bit(num, enabled_perf_enter_syscalls);
527 sys_perf_refcount_enter++;
528 }
529 mutex_unlock(&syscall_trace_lock);
530 return ret;
531}
532
533void perf_sysenter_disable(struct ftrace_event_call *call)
534{
535 int num;
536
537 num = ((struct syscall_metadata *)call->data)->syscall_nr;
538
539 mutex_lock(&syscall_trace_lock);
540 sys_perf_refcount_enter--;
541 clear_bit(num, enabled_perf_enter_syscalls);
542 if (!sys_perf_refcount_enter)
543 unregister_trace_sys_enter(perf_syscall_enter, NULL);
544 mutex_unlock(&syscall_trace_lock);
545}
546
547static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
548{
549 struct syscall_metadata *sys_data;
550 struct syscall_trace_exit *rec;
551 unsigned long flags;
552 int syscall_nr;
553 int rctx;
554 int size;
555
556 syscall_nr = syscall_get_nr(current, regs);
557 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
558 return;
559
560 sys_data = syscall_nr_to_meta(syscall_nr);
561 if (!sys_data)
562 return;
563
564 /* We can probably do that at build time */
565 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
566 size -= sizeof(u32);
567
568 /*
569 * Impossible, but be paranoid with the future
570 * How to put this check outside runtime?
571 */
572 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
573 "exit event has grown above perf buffer size"))
574 return;
575
576 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
577 sys_data->exit_event->id, &rctx, &flags);
578 if (!rec)
579 return;
580
581 rec->nr = syscall_nr;
582 rec->ret = syscall_get_return_value(current, regs);
583
584 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
585}
586
587int perf_sysexit_enable(struct ftrace_event_call *call)
588{
589 int ret = 0;
590 int num;
591
592 num = ((struct syscall_metadata *)call->data)->syscall_nr;
593
594 mutex_lock(&syscall_trace_lock);
595 if (!sys_perf_refcount_exit)
596 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
597 if (ret) {
598 pr_info("event trace: Could not activate"
599 "syscall exit trace point");
600 } else {
601 set_bit(num, enabled_perf_exit_syscalls);
602 sys_perf_refcount_exit++;
603 }
604 mutex_unlock(&syscall_trace_lock);
605 return ret;
606}
607
608void perf_sysexit_disable(struct ftrace_event_call *call)
609{
610 int num;
611
612 num = ((struct syscall_metadata *)call->data)->syscall_nr;
613
614 mutex_lock(&syscall_trace_lock);
615 sys_perf_refcount_exit--;
616 clear_bit(num, enabled_perf_exit_syscalls);
617 if (!sys_perf_refcount_exit)
618 unregister_trace_sys_exit(perf_syscall_exit, NULL);
619 mutex_unlock(&syscall_trace_lock);
620}
621
622#endif /* CONFIG_PERF_EVENTS */
623
624static int syscall_enter_register(struct ftrace_event_call *event,
625 enum trace_reg type)
626{
627 switch (type) {
628 case TRACE_REG_REGISTER:
629 return reg_event_syscall_enter(event);
630 case TRACE_REG_UNREGISTER:
631 unreg_event_syscall_enter(event);
632 return 0;
633
634#ifdef CONFIG_PERF_EVENTS
635 case TRACE_REG_PERF_REGISTER:
636 return perf_sysenter_enable(event);
637 case TRACE_REG_PERF_UNREGISTER:
638 perf_sysenter_disable(event);
639 return 0;
640#endif
641 }
642 return 0;
643}
644
645static int syscall_exit_register(struct ftrace_event_call *event,
646 enum trace_reg type)
647{
648 switch (type) {
649 case TRACE_REG_REGISTER:
650 return reg_event_syscall_exit(event);
651 case TRACE_REG_UNREGISTER:
652 unreg_event_syscall_exit(event);
653 return 0;
654
655#ifdef CONFIG_PERF_EVENTS
656 case TRACE_REG_PERF_REGISTER:
657 return perf_sysexit_enable(event);
658 case TRACE_REG_PERF_UNREGISTER:
659 perf_sysexit_disable(event);
660 return 0;
661#endif
662 }
663 return 0;
664}