]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - kernel/trace/trace.c
tracing: Get trace_array reference for available_tracers files
[mirror_ubuntu-jammy-kernel.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52 * On boot up, the ring buffer is set to the minimum size, so that
53 * we do not waste memory on systems that are not using tracing.
54 */
55 bool ring_buffer_expanded;
56
57 /*
58 * We need to change this state when a selftest is running.
59 * A selftest will lurk into the ring-buffer to count the
60 * entries inserted during the selftest although some concurrent
61 * insertions into the ring-buffer such as trace_printk could occurred
62 * at the same time, giving false positive or negative results.
63 */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67 * If a tracer is running, we do not want to run SELFTEST.
68 */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78 { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84 return 0;
85 }
86
87 /*
88 * To prevent the comm cache from being overwritten when no
89 * tracing is active, only save the comm when a trace event
90 * occurred.
91 */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95 * Kill all tracing for good (never come back).
96 * It is initialized to 1 but will turn to zero if the initialization
97 * of the tracer is successful. But that is the only place that sets
98 * this back to zero.
99 */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly tracing_buffer_mask;
103
104 /*
105 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106 *
107 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108 * is set, then ftrace_dump is called. This will output the contents
109 * of the ftrace buffers to the console. This is very useful for
110 * capturing traces that lead to crashes and outputing it to a
111 * serial console.
112 *
113 * It is default off, but you can enable it with either specifying
114 * "ftrace_dump_on_oops" in the kernel command line, or setting
115 * /proc/sys/kernel/ftrace_dump_on_oops
116 * Set 1 if you want to dump buffers of all CPUs
117 * Set 2 if you want to dump the buffer of the CPU that triggered oops
118 */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128 struct module *mod;
129 unsigned long length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135 /*
136 * "end" is first and points to NULL as it must be different
137 * than "mod" or "eval_string"
138 */
139 union trace_eval_map_item *next;
140 const char *end; /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146 * The trace_eval_maps are saved in an array with two extra elements,
147 * one at the beginning, and one at the end. The beginning item contains
148 * the count of the saved maps (head.length), and the module they
149 * belong to if not built in (head.mod). The ending item contains a
150 * pointer to the next array of saved eval_map items.
151 */
152 union trace_eval_map_item {
153 struct trace_eval_map map;
154 struct trace_eval_map_head head;
155 struct trace_eval_map_tail tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162 static void ftrace_trace_userstack(struct ring_buffer *buffer,
163 unsigned long flags, int pc);
164
165 #define MAX_TRACER_SIZE 100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168
169 static bool allocate_snapshot;
170
171 static int __init set_cmdline_ftrace(char *str)
172 {
173 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174 default_bootup_tracer = bootup_tracer_buf;
175 /* We are using ftrace early, expand it */
176 ring_buffer_expanded = true;
177 return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180
181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183 if (*str++ != '=' || !*str) {
184 ftrace_dump_on_oops = DUMP_ALL;
185 return 1;
186 }
187
188 if (!strcmp("orig_cpu", str)) {
189 ftrace_dump_on_oops = DUMP_ORIG;
190 return 1;
191 }
192
193 return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196
197 static int __init stop_trace_on_warning(char *str)
198 {
199 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200 __disable_trace_on_warning = 1;
201 return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204
205 static int __init boot_alloc_snapshot(char *str)
206 {
207 allocate_snapshot = true;
208 /* We also need the main ring buffer expanded */
209 ring_buffer_expanded = true;
210 return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213
214
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216
217 static int __init set_trace_boot_options(char *str)
218 {
219 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220 return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226
227 static int __init set_trace_boot_clock(char *str)
228 {
229 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230 trace_boot_clock = trace_boot_clock_buf;
231 return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234
235 static int __init set_tracepoint_printk(char *str)
236 {
237 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238 tracepoint_printk = 1;
239 return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242
243 unsigned long long ns2usecs(u64 nsec)
244 {
245 nsec += 500;
246 do_div(nsec, 1000);
247 return nsec;
248 }
249
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS \
252 (FUNCTION_DEFAULT_FLAGS | \
253 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
254 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
255 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
256 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
260 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261
262 /* trace_flags that are default zero for instances */
263 #define ZEROED_TRACE_FLAGS \
264 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
265
266 /*
267 * The global_trace is the descriptor that holds the top-level tracing
268 * buffers for the live tracing.
269 */
270 static struct trace_array global_trace = {
271 .trace_flags = TRACE_DEFAULT_FLAGS,
272 };
273
274 LIST_HEAD(ftrace_trace_arrays);
275
276 int trace_array_get(struct trace_array *this_tr)
277 {
278 struct trace_array *tr;
279 int ret = -ENODEV;
280
281 mutex_lock(&trace_types_lock);
282 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
283 if (tr == this_tr) {
284 tr->ref++;
285 ret = 0;
286 break;
287 }
288 }
289 mutex_unlock(&trace_types_lock);
290
291 return ret;
292 }
293
294 static void __trace_array_put(struct trace_array *this_tr)
295 {
296 WARN_ON(!this_tr->ref);
297 this_tr->ref--;
298 }
299
300 void trace_array_put(struct trace_array *this_tr)
301 {
302 mutex_lock(&trace_types_lock);
303 __trace_array_put(this_tr);
304 mutex_unlock(&trace_types_lock);
305 }
306
307 int call_filter_check_discard(struct trace_event_call *call, void *rec,
308 struct ring_buffer *buffer,
309 struct ring_buffer_event *event)
310 {
311 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
312 !filter_match_preds(call->filter, rec)) {
313 __trace_event_discard_commit(buffer, event);
314 return 1;
315 }
316
317 return 0;
318 }
319
320 void trace_free_pid_list(struct trace_pid_list *pid_list)
321 {
322 vfree(pid_list->pids);
323 kfree(pid_list);
324 }
325
326 /**
327 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
328 * @filtered_pids: The list of pids to check
329 * @search_pid: The PID to find in @filtered_pids
330 *
331 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
332 */
333 bool
334 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
335 {
336 /*
337 * If pid_max changed after filtered_pids was created, we
338 * by default ignore all pids greater than the previous pid_max.
339 */
340 if (search_pid >= filtered_pids->pid_max)
341 return false;
342
343 return test_bit(search_pid, filtered_pids->pids);
344 }
345
346 /**
347 * trace_ignore_this_task - should a task be ignored for tracing
348 * @filtered_pids: The list of pids to check
349 * @task: The task that should be ignored if not filtered
350 *
351 * Checks if @task should be traced or not from @filtered_pids.
352 * Returns true if @task should *NOT* be traced.
353 * Returns false if @task should be traced.
354 */
355 bool
356 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
357 {
358 /*
359 * Return false, because if filtered_pids does not exist,
360 * all pids are good to trace.
361 */
362 if (!filtered_pids)
363 return false;
364
365 return !trace_find_filtered_pid(filtered_pids, task->pid);
366 }
367
368 /**
369 * trace_filter_add_remove_task - Add or remove a task from a pid_list
370 * @pid_list: The list to modify
371 * @self: The current task for fork or NULL for exit
372 * @task: The task to add or remove
373 *
374 * If adding a task, if @self is defined, the task is only added if @self
375 * is also included in @pid_list. This happens on fork and tasks should
376 * only be added when the parent is listed. If @self is NULL, then the
377 * @task pid will be removed from the list, which would happen on exit
378 * of a task.
379 */
380 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
381 struct task_struct *self,
382 struct task_struct *task)
383 {
384 if (!pid_list)
385 return;
386
387 /* For forks, we only add if the forking task is listed */
388 if (self) {
389 if (!trace_find_filtered_pid(pid_list, self->pid))
390 return;
391 }
392
393 /* Sorry, but we don't support pid_max changing after setting */
394 if (task->pid >= pid_list->pid_max)
395 return;
396
397 /* "self" is set for forks, and NULL for exits */
398 if (self)
399 set_bit(task->pid, pid_list->pids);
400 else
401 clear_bit(task->pid, pid_list->pids);
402 }
403
404 /**
405 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
406 * @pid_list: The pid list to show
407 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
408 * @pos: The position of the file
409 *
410 * This is used by the seq_file "next" operation to iterate the pids
411 * listed in a trace_pid_list structure.
412 *
413 * Returns the pid+1 as we want to display pid of zero, but NULL would
414 * stop the iteration.
415 */
416 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
417 {
418 unsigned long pid = (unsigned long)v;
419
420 (*pos)++;
421
422 /* pid already is +1 of the actual prevous bit */
423 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
424
425 /* Return pid + 1 to allow zero to be represented */
426 if (pid < pid_list->pid_max)
427 return (void *)(pid + 1);
428
429 return NULL;
430 }
431
432 /**
433 * trace_pid_start - Used for seq_file to start reading pid lists
434 * @pid_list: The pid list to show
435 * @pos: The position of the file
436 *
437 * This is used by seq_file "start" operation to start the iteration
438 * of listing pids.
439 *
440 * Returns the pid+1 as we want to display pid of zero, but NULL would
441 * stop the iteration.
442 */
443 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 {
445 unsigned long pid;
446 loff_t l = 0;
447
448 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
449 if (pid >= pid_list->pid_max)
450 return NULL;
451
452 /* Return pid + 1 so that zero can be the exit value */
453 for (pid++; pid && l < *pos;
454 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
455 ;
456 return (void *)pid;
457 }
458
459 /**
460 * trace_pid_show - show the current pid in seq_file processing
461 * @m: The seq_file structure to write into
462 * @v: A void pointer of the pid (+1) value to display
463 *
464 * Can be directly used by seq_file operations to display the current
465 * pid value.
466 */
467 int trace_pid_show(struct seq_file *m, void *v)
468 {
469 unsigned long pid = (unsigned long)v - 1;
470
471 seq_printf(m, "%lu\n", pid);
472 return 0;
473 }
474
475 /* 128 should be much more than enough */
476 #define PID_BUF_SIZE 127
477
478 int trace_pid_write(struct trace_pid_list *filtered_pids,
479 struct trace_pid_list **new_pid_list,
480 const char __user *ubuf, size_t cnt)
481 {
482 struct trace_pid_list *pid_list;
483 struct trace_parser parser;
484 unsigned long val;
485 int nr_pids = 0;
486 ssize_t read = 0;
487 ssize_t ret = 0;
488 loff_t pos;
489 pid_t pid;
490
491 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
492 return -ENOMEM;
493
494 /*
495 * Always recreate a new array. The write is an all or nothing
496 * operation. Always create a new array when adding new pids by
497 * the user. If the operation fails, then the current list is
498 * not modified.
499 */
500 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
501 if (!pid_list) {
502 trace_parser_put(&parser);
503 return -ENOMEM;
504 }
505
506 pid_list->pid_max = READ_ONCE(pid_max);
507
508 /* Only truncating will shrink pid_max */
509 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 pid_list->pid_max = filtered_pids->pid_max;
511
512 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 if (!pid_list->pids) {
514 trace_parser_put(&parser);
515 kfree(pid_list);
516 return -ENOMEM;
517 }
518
519 if (filtered_pids) {
520 /* copy the current bits to the new max */
521 for_each_set_bit(pid, filtered_pids->pids,
522 filtered_pids->pid_max) {
523 set_bit(pid, pid_list->pids);
524 nr_pids++;
525 }
526 }
527
528 while (cnt > 0) {
529
530 pos = 0;
531
532 ret = trace_get_user(&parser, ubuf, cnt, &pos);
533 if (ret < 0 || !trace_parser_loaded(&parser))
534 break;
535
536 read += ret;
537 ubuf += ret;
538 cnt -= ret;
539
540 ret = -EINVAL;
541 if (kstrtoul(parser.buffer, 0, &val))
542 break;
543 if (val >= pid_list->pid_max)
544 break;
545
546 pid = (pid_t)val;
547
548 set_bit(pid, pid_list->pids);
549 nr_pids++;
550
551 trace_parser_clear(&parser);
552 ret = 0;
553 }
554 trace_parser_put(&parser);
555
556 if (ret < 0) {
557 trace_free_pid_list(pid_list);
558 return ret;
559 }
560
561 if (!nr_pids) {
562 /* Cleared the list of pids */
563 trace_free_pid_list(pid_list);
564 read = ret;
565 pid_list = NULL;
566 }
567
568 *new_pid_list = pid_list;
569
570 return read;
571 }
572
573 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
574 {
575 u64 ts;
576
577 /* Early boot up does not have a buffer yet */
578 if (!buf->buffer)
579 return trace_clock_local();
580
581 ts = ring_buffer_time_stamp(buf->buffer, cpu);
582 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
583
584 return ts;
585 }
586
587 u64 ftrace_now(int cpu)
588 {
589 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
590 }
591
592 /**
593 * tracing_is_enabled - Show if global_trace has been disabled
594 *
595 * Shows if the global trace has been enabled or not. It uses the
596 * mirror flag "buffer_disabled" to be used in fast paths such as for
597 * the irqsoff tracer. But it may be inaccurate due to races. If you
598 * need to know the accurate state, use tracing_is_on() which is a little
599 * slower, but accurate.
600 */
601 int tracing_is_enabled(void)
602 {
603 /*
604 * For quick access (irqsoff uses this in fast path), just
605 * return the mirror variable of the state of the ring buffer.
606 * It's a little racy, but we don't really care.
607 */
608 smp_rmb();
609 return !global_trace.buffer_disabled;
610 }
611
612 /*
613 * trace_buf_size is the size in bytes that is allocated
614 * for a buffer. Note, the number of bytes is always rounded
615 * to page size.
616 *
617 * This number is purposely set to a low number of 16384.
618 * If the dump on oops happens, it will be much appreciated
619 * to not have to wait for all that output. Anyway this can be
620 * boot time and run time configurable.
621 */
622 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
623
624 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
625
626 /* trace_types holds a link list of available tracers. */
627 static struct tracer *trace_types __read_mostly;
628
629 /*
630 * trace_types_lock is used to protect the trace_types list.
631 */
632 DEFINE_MUTEX(trace_types_lock);
633
634 /*
635 * serialize the access of the ring buffer
636 *
637 * ring buffer serializes readers, but it is low level protection.
638 * The validity of the events (which returns by ring_buffer_peek() ..etc)
639 * are not protected by ring buffer.
640 *
641 * The content of events may become garbage if we allow other process consumes
642 * these events concurrently:
643 * A) the page of the consumed events may become a normal page
644 * (not reader page) in ring buffer, and this page will be rewrited
645 * by events producer.
646 * B) The page of the consumed events may become a page for splice_read,
647 * and this page will be returned to system.
648 *
649 * These primitives allow multi process access to different cpu ring buffer
650 * concurrently.
651 *
652 * These primitives don't distinguish read-only and read-consume access.
653 * Multi read-only access are also serialized.
654 */
655
656 #ifdef CONFIG_SMP
657 static DECLARE_RWSEM(all_cpu_access_lock);
658 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
659
660 static inline void trace_access_lock(int cpu)
661 {
662 if (cpu == RING_BUFFER_ALL_CPUS) {
663 /* gain it for accessing the whole ring buffer. */
664 down_write(&all_cpu_access_lock);
665 } else {
666 /* gain it for accessing a cpu ring buffer. */
667
668 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
669 down_read(&all_cpu_access_lock);
670
671 /* Secondly block other access to this @cpu ring buffer. */
672 mutex_lock(&per_cpu(cpu_access_lock, cpu));
673 }
674 }
675
676 static inline void trace_access_unlock(int cpu)
677 {
678 if (cpu == RING_BUFFER_ALL_CPUS) {
679 up_write(&all_cpu_access_lock);
680 } else {
681 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
682 up_read(&all_cpu_access_lock);
683 }
684 }
685
686 static inline void trace_access_lock_init(void)
687 {
688 int cpu;
689
690 for_each_possible_cpu(cpu)
691 mutex_init(&per_cpu(cpu_access_lock, cpu));
692 }
693
694 #else
695
696 static DEFINE_MUTEX(access_lock);
697
698 static inline void trace_access_lock(int cpu)
699 {
700 (void)cpu;
701 mutex_lock(&access_lock);
702 }
703
704 static inline void trace_access_unlock(int cpu)
705 {
706 (void)cpu;
707 mutex_unlock(&access_lock);
708 }
709
710 static inline void trace_access_lock_init(void)
711 {
712 }
713
714 #endif
715
716 #ifdef CONFIG_STACKTRACE
717 static void __ftrace_trace_stack(struct ring_buffer *buffer,
718 unsigned long flags,
719 int skip, int pc, struct pt_regs *regs);
720 static inline void ftrace_trace_stack(struct trace_array *tr,
721 struct ring_buffer *buffer,
722 unsigned long flags,
723 int skip, int pc, struct pt_regs *regs);
724
725 #else
726 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
727 unsigned long flags,
728 int skip, int pc, struct pt_regs *regs)
729 {
730 }
731 static inline void ftrace_trace_stack(struct trace_array *tr,
732 struct ring_buffer *buffer,
733 unsigned long flags,
734 int skip, int pc, struct pt_regs *regs)
735 {
736 }
737
738 #endif
739
740 static __always_inline void
741 trace_event_setup(struct ring_buffer_event *event,
742 int type, unsigned long flags, int pc)
743 {
744 struct trace_entry *ent = ring_buffer_event_data(event);
745
746 tracing_generic_entry_update(ent, type, flags, pc);
747 }
748
749 static __always_inline struct ring_buffer_event *
750 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
751 int type,
752 unsigned long len,
753 unsigned long flags, int pc)
754 {
755 struct ring_buffer_event *event;
756
757 event = ring_buffer_lock_reserve(buffer, len);
758 if (event != NULL)
759 trace_event_setup(event, type, flags, pc);
760
761 return event;
762 }
763
764 void tracer_tracing_on(struct trace_array *tr)
765 {
766 if (tr->trace_buffer.buffer)
767 ring_buffer_record_on(tr->trace_buffer.buffer);
768 /*
769 * This flag is looked at when buffers haven't been allocated
770 * yet, or by some tracers (like irqsoff), that just want to
771 * know if the ring buffer has been disabled, but it can handle
772 * races of where it gets disabled but we still do a record.
773 * As the check is in the fast path of the tracers, it is more
774 * important to be fast than accurate.
775 */
776 tr->buffer_disabled = 0;
777 /* Make the flag seen by readers */
778 smp_wmb();
779 }
780
781 /**
782 * tracing_on - enable tracing buffers
783 *
784 * This function enables tracing buffers that may have been
785 * disabled with tracing_off.
786 */
787 void tracing_on(void)
788 {
789 tracer_tracing_on(&global_trace);
790 }
791 EXPORT_SYMBOL_GPL(tracing_on);
792
793
794 static __always_inline void
795 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
796 {
797 __this_cpu_write(trace_taskinfo_save, true);
798
799 /* If this is the temp buffer, we need to commit fully */
800 if (this_cpu_read(trace_buffered_event) == event) {
801 /* Length is in event->array[0] */
802 ring_buffer_write(buffer, event->array[0], &event->array[1]);
803 /* Release the temp buffer */
804 this_cpu_dec(trace_buffered_event_cnt);
805 } else
806 ring_buffer_unlock_commit(buffer, event);
807 }
808
809 /**
810 * __trace_puts - write a constant string into the trace buffer.
811 * @ip: The address of the caller
812 * @str: The constant string to write
813 * @size: The size of the string.
814 */
815 int __trace_puts(unsigned long ip, const char *str, int size)
816 {
817 struct ring_buffer_event *event;
818 struct ring_buffer *buffer;
819 struct print_entry *entry;
820 unsigned long irq_flags;
821 int alloc;
822 int pc;
823
824 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
825 return 0;
826
827 pc = preempt_count();
828
829 if (unlikely(tracing_selftest_running || tracing_disabled))
830 return 0;
831
832 alloc = sizeof(*entry) + size + 2; /* possible \n added */
833
834 local_save_flags(irq_flags);
835 buffer = global_trace.trace_buffer.buffer;
836 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
837 irq_flags, pc);
838 if (!event)
839 return 0;
840
841 entry = ring_buffer_event_data(event);
842 entry->ip = ip;
843
844 memcpy(&entry->buf, str, size);
845
846 /* Add a newline if necessary */
847 if (entry->buf[size - 1] != '\n') {
848 entry->buf[size] = '\n';
849 entry->buf[size + 1] = '\0';
850 } else
851 entry->buf[size] = '\0';
852
853 __buffer_unlock_commit(buffer, event);
854 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
855
856 return size;
857 }
858 EXPORT_SYMBOL_GPL(__trace_puts);
859
860 /**
861 * __trace_bputs - write the pointer to a constant string into trace buffer
862 * @ip: The address of the caller
863 * @str: The constant string to write to the buffer to
864 */
865 int __trace_bputs(unsigned long ip, const char *str)
866 {
867 struct ring_buffer_event *event;
868 struct ring_buffer *buffer;
869 struct bputs_entry *entry;
870 unsigned long irq_flags;
871 int size = sizeof(struct bputs_entry);
872 int pc;
873
874 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
875 return 0;
876
877 pc = preempt_count();
878
879 if (unlikely(tracing_selftest_running || tracing_disabled))
880 return 0;
881
882 local_save_flags(irq_flags);
883 buffer = global_trace.trace_buffer.buffer;
884 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
885 irq_flags, pc);
886 if (!event)
887 return 0;
888
889 entry = ring_buffer_event_data(event);
890 entry->ip = ip;
891 entry->str = str;
892
893 __buffer_unlock_commit(buffer, event);
894 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
895
896 return 1;
897 }
898 EXPORT_SYMBOL_GPL(__trace_bputs);
899
900 #ifdef CONFIG_TRACER_SNAPSHOT
901 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
902 {
903 struct tracer *tracer = tr->current_trace;
904 unsigned long flags;
905
906 if (in_nmi()) {
907 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
908 internal_trace_puts("*** snapshot is being ignored ***\n");
909 return;
910 }
911
912 if (!tr->allocated_snapshot) {
913 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
914 internal_trace_puts("*** stopping trace here! ***\n");
915 tracing_off();
916 return;
917 }
918
919 /* Note, snapshot can not be used when the tracer uses it */
920 if (tracer->use_max_tr) {
921 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
922 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
923 return;
924 }
925
926 local_irq_save(flags);
927 update_max_tr(tr, current, smp_processor_id(), cond_data);
928 local_irq_restore(flags);
929 }
930
931 void tracing_snapshot_instance(struct trace_array *tr)
932 {
933 tracing_snapshot_instance_cond(tr, NULL);
934 }
935
936 /**
937 * tracing_snapshot - take a snapshot of the current buffer.
938 *
939 * This causes a swap between the snapshot buffer and the current live
940 * tracing buffer. You can use this to take snapshots of the live
941 * trace when some condition is triggered, but continue to trace.
942 *
943 * Note, make sure to allocate the snapshot with either
944 * a tracing_snapshot_alloc(), or by doing it manually
945 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
946 *
947 * If the snapshot buffer is not allocated, it will stop tracing.
948 * Basically making a permanent snapshot.
949 */
950 void tracing_snapshot(void)
951 {
952 struct trace_array *tr = &global_trace;
953
954 tracing_snapshot_instance(tr);
955 }
956 EXPORT_SYMBOL_GPL(tracing_snapshot);
957
958 /**
959 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
960 * @tr: The tracing instance to snapshot
961 * @cond_data: The data to be tested conditionally, and possibly saved
962 *
963 * This is the same as tracing_snapshot() except that the snapshot is
964 * conditional - the snapshot will only happen if the
965 * cond_snapshot.update() implementation receiving the cond_data
966 * returns true, which means that the trace array's cond_snapshot
967 * update() operation used the cond_data to determine whether the
968 * snapshot should be taken, and if it was, presumably saved it along
969 * with the snapshot.
970 */
971 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
972 {
973 tracing_snapshot_instance_cond(tr, cond_data);
974 }
975 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
976
977 /**
978 * tracing_snapshot_cond_data - get the user data associated with a snapshot
979 * @tr: The tracing instance
980 *
981 * When the user enables a conditional snapshot using
982 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
983 * with the snapshot. This accessor is used to retrieve it.
984 *
985 * Should not be called from cond_snapshot.update(), since it takes
986 * the tr->max_lock lock, which the code calling
987 * cond_snapshot.update() has already done.
988 *
989 * Returns the cond_data associated with the trace array's snapshot.
990 */
991 void *tracing_cond_snapshot_data(struct trace_array *tr)
992 {
993 void *cond_data = NULL;
994
995 arch_spin_lock(&tr->max_lock);
996
997 if (tr->cond_snapshot)
998 cond_data = tr->cond_snapshot->cond_data;
999
1000 arch_spin_unlock(&tr->max_lock);
1001
1002 return cond_data;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1005
1006 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1007 struct trace_buffer *size_buf, int cpu_id);
1008 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1009
1010 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1011 {
1012 int ret;
1013
1014 if (!tr->allocated_snapshot) {
1015
1016 /* allocate spare buffer */
1017 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1018 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1019 if (ret < 0)
1020 return ret;
1021
1022 tr->allocated_snapshot = true;
1023 }
1024
1025 return 0;
1026 }
1027
1028 static void free_snapshot(struct trace_array *tr)
1029 {
1030 /*
1031 * We don't free the ring buffer. instead, resize it because
1032 * The max_tr ring buffer has some state (e.g. ring->clock) and
1033 * we want preserve it.
1034 */
1035 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1036 set_buffer_entries(&tr->max_buffer, 1);
1037 tracing_reset_online_cpus(&tr->max_buffer);
1038 tr->allocated_snapshot = false;
1039 }
1040
1041 /**
1042 * tracing_alloc_snapshot - allocate snapshot buffer.
1043 *
1044 * This only allocates the snapshot buffer if it isn't already
1045 * allocated - it doesn't also take a snapshot.
1046 *
1047 * This is meant to be used in cases where the snapshot buffer needs
1048 * to be set up for events that can't sleep but need to be able to
1049 * trigger a snapshot.
1050 */
1051 int tracing_alloc_snapshot(void)
1052 {
1053 struct trace_array *tr = &global_trace;
1054 int ret;
1055
1056 ret = tracing_alloc_snapshot_instance(tr);
1057 WARN_ON(ret < 0);
1058
1059 return ret;
1060 }
1061 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1062
1063 /**
1064 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1065 *
1066 * This is similar to tracing_snapshot(), but it will allocate the
1067 * snapshot buffer if it isn't already allocated. Use this only
1068 * where it is safe to sleep, as the allocation may sleep.
1069 *
1070 * This causes a swap between the snapshot buffer and the current live
1071 * tracing buffer. You can use this to take snapshots of the live
1072 * trace when some condition is triggered, but continue to trace.
1073 */
1074 void tracing_snapshot_alloc(void)
1075 {
1076 int ret;
1077
1078 ret = tracing_alloc_snapshot();
1079 if (ret < 0)
1080 return;
1081
1082 tracing_snapshot();
1083 }
1084 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1085
1086 /**
1087 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1088 * @tr: The tracing instance
1089 * @cond_data: User data to associate with the snapshot
1090 * @update: Implementation of the cond_snapshot update function
1091 *
1092 * Check whether the conditional snapshot for the given instance has
1093 * already been enabled, or if the current tracer is already using a
1094 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1095 * save the cond_data and update function inside.
1096 *
1097 * Returns 0 if successful, error otherwise.
1098 */
1099 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1100 cond_update_fn_t update)
1101 {
1102 struct cond_snapshot *cond_snapshot;
1103 int ret = 0;
1104
1105 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1106 if (!cond_snapshot)
1107 return -ENOMEM;
1108
1109 cond_snapshot->cond_data = cond_data;
1110 cond_snapshot->update = update;
1111
1112 mutex_lock(&trace_types_lock);
1113
1114 ret = tracing_alloc_snapshot_instance(tr);
1115 if (ret)
1116 goto fail_unlock;
1117
1118 if (tr->current_trace->use_max_tr) {
1119 ret = -EBUSY;
1120 goto fail_unlock;
1121 }
1122
1123 /*
1124 * The cond_snapshot can only change to NULL without the
1125 * trace_types_lock. We don't care if we race with it going
1126 * to NULL, but we want to make sure that it's not set to
1127 * something other than NULL when we get here, which we can
1128 * do safely with only holding the trace_types_lock and not
1129 * having to take the max_lock.
1130 */
1131 if (tr->cond_snapshot) {
1132 ret = -EBUSY;
1133 goto fail_unlock;
1134 }
1135
1136 arch_spin_lock(&tr->max_lock);
1137 tr->cond_snapshot = cond_snapshot;
1138 arch_spin_unlock(&tr->max_lock);
1139
1140 mutex_unlock(&trace_types_lock);
1141
1142 return ret;
1143
1144 fail_unlock:
1145 mutex_unlock(&trace_types_lock);
1146 kfree(cond_snapshot);
1147 return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1150
1151 /**
1152 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1153 * @tr: The tracing instance
1154 *
1155 * Check whether the conditional snapshot for the given instance is
1156 * enabled; if so, free the cond_snapshot associated with it,
1157 * otherwise return -EINVAL.
1158 *
1159 * Returns 0 if successful, error otherwise.
1160 */
1161 int tracing_snapshot_cond_disable(struct trace_array *tr)
1162 {
1163 int ret = 0;
1164
1165 arch_spin_lock(&tr->max_lock);
1166
1167 if (!tr->cond_snapshot)
1168 ret = -EINVAL;
1169 else {
1170 kfree(tr->cond_snapshot);
1171 tr->cond_snapshot = NULL;
1172 }
1173
1174 arch_spin_unlock(&tr->max_lock);
1175
1176 return ret;
1177 }
1178 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1179 #else
1180 void tracing_snapshot(void)
1181 {
1182 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1183 }
1184 EXPORT_SYMBOL_GPL(tracing_snapshot);
1185 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1186 {
1187 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1188 }
1189 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1190 int tracing_alloc_snapshot(void)
1191 {
1192 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1193 return -ENODEV;
1194 }
1195 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1196 void tracing_snapshot_alloc(void)
1197 {
1198 /* Give warning */
1199 tracing_snapshot();
1200 }
1201 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1202 void *tracing_cond_snapshot_data(struct trace_array *tr)
1203 {
1204 return NULL;
1205 }
1206 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1207 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1208 {
1209 return -ENODEV;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1212 int tracing_snapshot_cond_disable(struct trace_array *tr)
1213 {
1214 return false;
1215 }
1216 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1217 #endif /* CONFIG_TRACER_SNAPSHOT */
1218
1219 void tracer_tracing_off(struct trace_array *tr)
1220 {
1221 if (tr->trace_buffer.buffer)
1222 ring_buffer_record_off(tr->trace_buffer.buffer);
1223 /*
1224 * This flag is looked at when buffers haven't been allocated
1225 * yet, or by some tracers (like irqsoff), that just want to
1226 * know if the ring buffer has been disabled, but it can handle
1227 * races of where it gets disabled but we still do a record.
1228 * As the check is in the fast path of the tracers, it is more
1229 * important to be fast than accurate.
1230 */
1231 tr->buffer_disabled = 1;
1232 /* Make the flag seen by readers */
1233 smp_wmb();
1234 }
1235
1236 /**
1237 * tracing_off - turn off tracing buffers
1238 *
1239 * This function stops the tracing buffers from recording data.
1240 * It does not disable any overhead the tracers themselves may
1241 * be causing. This function simply causes all recording to
1242 * the ring buffers to fail.
1243 */
1244 void tracing_off(void)
1245 {
1246 tracer_tracing_off(&global_trace);
1247 }
1248 EXPORT_SYMBOL_GPL(tracing_off);
1249
1250 void disable_trace_on_warning(void)
1251 {
1252 if (__disable_trace_on_warning)
1253 tracing_off();
1254 }
1255
1256 /**
1257 * tracer_tracing_is_on - show real state of ring buffer enabled
1258 * @tr : the trace array to know if ring buffer is enabled
1259 *
1260 * Shows real state of the ring buffer if it is enabled or not.
1261 */
1262 bool tracer_tracing_is_on(struct trace_array *tr)
1263 {
1264 if (tr->trace_buffer.buffer)
1265 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1266 return !tr->buffer_disabled;
1267 }
1268
1269 /**
1270 * tracing_is_on - show state of ring buffers enabled
1271 */
1272 int tracing_is_on(void)
1273 {
1274 return tracer_tracing_is_on(&global_trace);
1275 }
1276 EXPORT_SYMBOL_GPL(tracing_is_on);
1277
1278 static int __init set_buf_size(char *str)
1279 {
1280 unsigned long buf_size;
1281
1282 if (!str)
1283 return 0;
1284 buf_size = memparse(str, &str);
1285 /* nr_entries can not be zero */
1286 if (buf_size == 0)
1287 return 0;
1288 trace_buf_size = buf_size;
1289 return 1;
1290 }
1291 __setup("trace_buf_size=", set_buf_size);
1292
1293 static int __init set_tracing_thresh(char *str)
1294 {
1295 unsigned long threshold;
1296 int ret;
1297
1298 if (!str)
1299 return 0;
1300 ret = kstrtoul(str, 0, &threshold);
1301 if (ret < 0)
1302 return 0;
1303 tracing_thresh = threshold * 1000;
1304 return 1;
1305 }
1306 __setup("tracing_thresh=", set_tracing_thresh);
1307
1308 unsigned long nsecs_to_usecs(unsigned long nsecs)
1309 {
1310 return nsecs / 1000;
1311 }
1312
1313 /*
1314 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1315 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1316 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1317 * of strings in the order that the evals (enum) were defined.
1318 */
1319 #undef C
1320 #define C(a, b) b
1321
1322 /* These must match the bit postions in trace_iterator_flags */
1323 static const char *trace_options[] = {
1324 TRACE_FLAGS
1325 NULL
1326 };
1327
1328 static struct {
1329 u64 (*func)(void);
1330 const char *name;
1331 int in_ns; /* is this clock in nanoseconds? */
1332 } trace_clocks[] = {
1333 { trace_clock_local, "local", 1 },
1334 { trace_clock_global, "global", 1 },
1335 { trace_clock_counter, "counter", 0 },
1336 { trace_clock_jiffies, "uptime", 0 },
1337 { trace_clock, "perf", 1 },
1338 { ktime_get_mono_fast_ns, "mono", 1 },
1339 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1340 { ktime_get_boot_fast_ns, "boot", 1 },
1341 ARCH_TRACE_CLOCKS
1342 };
1343
1344 bool trace_clock_in_ns(struct trace_array *tr)
1345 {
1346 if (trace_clocks[tr->clock_id].in_ns)
1347 return true;
1348
1349 return false;
1350 }
1351
1352 /*
1353 * trace_parser_get_init - gets the buffer for trace parser
1354 */
1355 int trace_parser_get_init(struct trace_parser *parser, int size)
1356 {
1357 memset(parser, 0, sizeof(*parser));
1358
1359 parser->buffer = kmalloc(size, GFP_KERNEL);
1360 if (!parser->buffer)
1361 return 1;
1362
1363 parser->size = size;
1364 return 0;
1365 }
1366
1367 /*
1368 * trace_parser_put - frees the buffer for trace parser
1369 */
1370 void trace_parser_put(struct trace_parser *parser)
1371 {
1372 kfree(parser->buffer);
1373 parser->buffer = NULL;
1374 }
1375
1376 /*
1377 * trace_get_user - reads the user input string separated by space
1378 * (matched by isspace(ch))
1379 *
1380 * For each string found the 'struct trace_parser' is updated,
1381 * and the function returns.
1382 *
1383 * Returns number of bytes read.
1384 *
1385 * See kernel/trace/trace.h for 'struct trace_parser' details.
1386 */
1387 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1388 size_t cnt, loff_t *ppos)
1389 {
1390 char ch;
1391 size_t read = 0;
1392 ssize_t ret;
1393
1394 if (!*ppos)
1395 trace_parser_clear(parser);
1396
1397 ret = get_user(ch, ubuf++);
1398 if (ret)
1399 goto out;
1400
1401 read++;
1402 cnt--;
1403
1404 /*
1405 * The parser is not finished with the last write,
1406 * continue reading the user input without skipping spaces.
1407 */
1408 if (!parser->cont) {
1409 /* skip white space */
1410 while (cnt && isspace(ch)) {
1411 ret = get_user(ch, ubuf++);
1412 if (ret)
1413 goto out;
1414 read++;
1415 cnt--;
1416 }
1417
1418 parser->idx = 0;
1419
1420 /* only spaces were written */
1421 if (isspace(ch) || !ch) {
1422 *ppos += read;
1423 ret = read;
1424 goto out;
1425 }
1426 }
1427
1428 /* read the non-space input */
1429 while (cnt && !isspace(ch) && ch) {
1430 if (parser->idx < parser->size - 1)
1431 parser->buffer[parser->idx++] = ch;
1432 else {
1433 ret = -EINVAL;
1434 goto out;
1435 }
1436 ret = get_user(ch, ubuf++);
1437 if (ret)
1438 goto out;
1439 read++;
1440 cnt--;
1441 }
1442
1443 /* We either got finished input or we have to wait for another call. */
1444 if (isspace(ch) || !ch) {
1445 parser->buffer[parser->idx] = 0;
1446 parser->cont = false;
1447 } else if (parser->idx < parser->size - 1) {
1448 parser->cont = true;
1449 parser->buffer[parser->idx++] = ch;
1450 /* Make sure the parsed string always terminates with '\0'. */
1451 parser->buffer[parser->idx] = 0;
1452 } else {
1453 ret = -EINVAL;
1454 goto out;
1455 }
1456
1457 *ppos += read;
1458 ret = read;
1459
1460 out:
1461 return ret;
1462 }
1463
1464 /* TODO add a seq_buf_to_buffer() */
1465 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1466 {
1467 int len;
1468
1469 if (trace_seq_used(s) <= s->seq.readpos)
1470 return -EBUSY;
1471
1472 len = trace_seq_used(s) - s->seq.readpos;
1473 if (cnt > len)
1474 cnt = len;
1475 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1476
1477 s->seq.readpos += cnt;
1478 return cnt;
1479 }
1480
1481 unsigned long __read_mostly tracing_thresh;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 /*
1485 * Copy the new maximum trace into the separate maximum-trace
1486 * structure. (this way the maximum trace is permanently saved,
1487 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1488 */
1489 static void
1490 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1491 {
1492 struct trace_buffer *trace_buf = &tr->trace_buffer;
1493 struct trace_buffer *max_buf = &tr->max_buffer;
1494 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1495 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1496
1497 max_buf->cpu = cpu;
1498 max_buf->time_start = data->preempt_timestamp;
1499
1500 max_data->saved_latency = tr->max_latency;
1501 max_data->critical_start = data->critical_start;
1502 max_data->critical_end = data->critical_end;
1503
1504 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1505 max_data->pid = tsk->pid;
1506 /*
1507 * If tsk == current, then use current_uid(), as that does not use
1508 * RCU. The irq tracer can be called out of RCU scope.
1509 */
1510 if (tsk == current)
1511 max_data->uid = current_uid();
1512 else
1513 max_data->uid = task_uid(tsk);
1514
1515 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1516 max_data->policy = tsk->policy;
1517 max_data->rt_priority = tsk->rt_priority;
1518
1519 /* record this tasks comm */
1520 tracing_record_cmdline(tsk);
1521 }
1522
1523 /**
1524 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1525 * @tr: tracer
1526 * @tsk: the task with the latency
1527 * @cpu: The cpu that initiated the trace.
1528 * @cond_data: User data associated with a conditional snapshot
1529 *
1530 * Flip the buffers between the @tr and the max_tr and record information
1531 * about which task was the cause of this latency.
1532 */
1533 void
1534 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1535 void *cond_data)
1536 {
1537 if (tr->stop_count)
1538 return;
1539
1540 WARN_ON_ONCE(!irqs_disabled());
1541
1542 if (!tr->allocated_snapshot) {
1543 /* Only the nop tracer should hit this when disabling */
1544 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1545 return;
1546 }
1547
1548 arch_spin_lock(&tr->max_lock);
1549
1550 /* Inherit the recordable setting from trace_buffer */
1551 if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1552 ring_buffer_record_on(tr->max_buffer.buffer);
1553 else
1554 ring_buffer_record_off(tr->max_buffer.buffer);
1555
1556 #ifdef CONFIG_TRACER_SNAPSHOT
1557 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1558 goto out_unlock;
1559 #endif
1560 swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1561
1562 __update_max_tr(tr, tsk, cpu);
1563
1564 out_unlock:
1565 arch_spin_unlock(&tr->max_lock);
1566 }
1567
1568 /**
1569 * update_max_tr_single - only copy one trace over, and reset the rest
1570 * @tr: tracer
1571 * @tsk: task with the latency
1572 * @cpu: the cpu of the buffer to copy.
1573 *
1574 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1575 */
1576 void
1577 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1578 {
1579 int ret;
1580
1581 if (tr->stop_count)
1582 return;
1583
1584 WARN_ON_ONCE(!irqs_disabled());
1585 if (!tr->allocated_snapshot) {
1586 /* Only the nop tracer should hit this when disabling */
1587 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1588 return;
1589 }
1590
1591 arch_spin_lock(&tr->max_lock);
1592
1593 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1594
1595 if (ret == -EBUSY) {
1596 /*
1597 * We failed to swap the buffer due to a commit taking
1598 * place on this CPU. We fail to record, but we reset
1599 * the max trace buffer (no one writes directly to it)
1600 * and flag that it failed.
1601 */
1602 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1603 "Failed to swap buffers due to commit in progress\n");
1604 }
1605
1606 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1607
1608 __update_max_tr(tr, tsk, cpu);
1609 arch_spin_unlock(&tr->max_lock);
1610 }
1611 #endif /* CONFIG_TRACER_MAX_TRACE */
1612
1613 static int wait_on_pipe(struct trace_iterator *iter, int full)
1614 {
1615 /* Iterators are static, they should be filled or empty */
1616 if (trace_buffer_iter(iter, iter->cpu_file))
1617 return 0;
1618
1619 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1620 full);
1621 }
1622
1623 #ifdef CONFIG_FTRACE_STARTUP_TEST
1624 static bool selftests_can_run;
1625
1626 struct trace_selftests {
1627 struct list_head list;
1628 struct tracer *type;
1629 };
1630
1631 static LIST_HEAD(postponed_selftests);
1632
1633 static int save_selftest(struct tracer *type)
1634 {
1635 struct trace_selftests *selftest;
1636
1637 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1638 if (!selftest)
1639 return -ENOMEM;
1640
1641 selftest->type = type;
1642 list_add(&selftest->list, &postponed_selftests);
1643 return 0;
1644 }
1645
1646 static int run_tracer_selftest(struct tracer *type)
1647 {
1648 struct trace_array *tr = &global_trace;
1649 struct tracer *saved_tracer = tr->current_trace;
1650 int ret;
1651
1652 if (!type->selftest || tracing_selftest_disabled)
1653 return 0;
1654
1655 /*
1656 * If a tracer registers early in boot up (before scheduling is
1657 * initialized and such), then do not run its selftests yet.
1658 * Instead, run it a little later in the boot process.
1659 */
1660 if (!selftests_can_run)
1661 return save_selftest(type);
1662
1663 /*
1664 * Run a selftest on this tracer.
1665 * Here we reset the trace buffer, and set the current
1666 * tracer to be this tracer. The tracer can then run some
1667 * internal tracing to verify that everything is in order.
1668 * If we fail, we do not register this tracer.
1669 */
1670 tracing_reset_online_cpus(&tr->trace_buffer);
1671
1672 tr->current_trace = type;
1673
1674 #ifdef CONFIG_TRACER_MAX_TRACE
1675 if (type->use_max_tr) {
1676 /* If we expanded the buffers, make sure the max is expanded too */
1677 if (ring_buffer_expanded)
1678 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1679 RING_BUFFER_ALL_CPUS);
1680 tr->allocated_snapshot = true;
1681 }
1682 #endif
1683
1684 /* the test is responsible for initializing and enabling */
1685 pr_info("Testing tracer %s: ", type->name);
1686 ret = type->selftest(type, tr);
1687 /* the test is responsible for resetting too */
1688 tr->current_trace = saved_tracer;
1689 if (ret) {
1690 printk(KERN_CONT "FAILED!\n");
1691 /* Add the warning after printing 'FAILED' */
1692 WARN_ON(1);
1693 return -1;
1694 }
1695 /* Only reset on passing, to avoid touching corrupted buffers */
1696 tracing_reset_online_cpus(&tr->trace_buffer);
1697
1698 #ifdef CONFIG_TRACER_MAX_TRACE
1699 if (type->use_max_tr) {
1700 tr->allocated_snapshot = false;
1701
1702 /* Shrink the max buffer again */
1703 if (ring_buffer_expanded)
1704 ring_buffer_resize(tr->max_buffer.buffer, 1,
1705 RING_BUFFER_ALL_CPUS);
1706 }
1707 #endif
1708
1709 printk(KERN_CONT "PASSED\n");
1710 return 0;
1711 }
1712
1713 static __init int init_trace_selftests(void)
1714 {
1715 struct trace_selftests *p, *n;
1716 struct tracer *t, **last;
1717 int ret;
1718
1719 selftests_can_run = true;
1720
1721 mutex_lock(&trace_types_lock);
1722
1723 if (list_empty(&postponed_selftests))
1724 goto out;
1725
1726 pr_info("Running postponed tracer tests:\n");
1727
1728 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1729 /* This loop can take minutes when sanitizers are enabled, so
1730 * lets make sure we allow RCU processing.
1731 */
1732 cond_resched();
1733 ret = run_tracer_selftest(p->type);
1734 /* If the test fails, then warn and remove from available_tracers */
1735 if (ret < 0) {
1736 WARN(1, "tracer: %s failed selftest, disabling\n",
1737 p->type->name);
1738 last = &trace_types;
1739 for (t = trace_types; t; t = t->next) {
1740 if (t == p->type) {
1741 *last = t->next;
1742 break;
1743 }
1744 last = &t->next;
1745 }
1746 }
1747 list_del(&p->list);
1748 kfree(p);
1749 }
1750
1751 out:
1752 mutex_unlock(&trace_types_lock);
1753
1754 return 0;
1755 }
1756 core_initcall(init_trace_selftests);
1757 #else
1758 static inline int run_tracer_selftest(struct tracer *type)
1759 {
1760 return 0;
1761 }
1762 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1763
1764 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1765
1766 static void __init apply_trace_boot_options(void);
1767
1768 /**
1769 * register_tracer - register a tracer with the ftrace system.
1770 * @type: the plugin for the tracer
1771 *
1772 * Register a new plugin tracer.
1773 */
1774 int __init register_tracer(struct tracer *type)
1775 {
1776 struct tracer *t;
1777 int ret = 0;
1778
1779 if (!type->name) {
1780 pr_info("Tracer must have a name\n");
1781 return -1;
1782 }
1783
1784 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1785 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1786 return -1;
1787 }
1788
1789 mutex_lock(&trace_types_lock);
1790
1791 tracing_selftest_running = true;
1792
1793 for (t = trace_types; t; t = t->next) {
1794 if (strcmp(type->name, t->name) == 0) {
1795 /* already found */
1796 pr_info("Tracer %s already registered\n",
1797 type->name);
1798 ret = -1;
1799 goto out;
1800 }
1801 }
1802
1803 if (!type->set_flag)
1804 type->set_flag = &dummy_set_flag;
1805 if (!type->flags) {
1806 /*allocate a dummy tracer_flags*/
1807 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1808 if (!type->flags) {
1809 ret = -ENOMEM;
1810 goto out;
1811 }
1812 type->flags->val = 0;
1813 type->flags->opts = dummy_tracer_opt;
1814 } else
1815 if (!type->flags->opts)
1816 type->flags->opts = dummy_tracer_opt;
1817
1818 /* store the tracer for __set_tracer_option */
1819 type->flags->trace = type;
1820
1821 ret = run_tracer_selftest(type);
1822 if (ret < 0)
1823 goto out;
1824
1825 type->next = trace_types;
1826 trace_types = type;
1827 add_tracer_options(&global_trace, type);
1828
1829 out:
1830 tracing_selftest_running = false;
1831 mutex_unlock(&trace_types_lock);
1832
1833 if (ret || !default_bootup_tracer)
1834 goto out_unlock;
1835
1836 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1837 goto out_unlock;
1838
1839 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1840 /* Do we want this tracer to start on bootup? */
1841 tracing_set_tracer(&global_trace, type->name);
1842 default_bootup_tracer = NULL;
1843
1844 apply_trace_boot_options();
1845
1846 /* disable other selftests, since this will break it. */
1847 tracing_selftest_disabled = true;
1848 #ifdef CONFIG_FTRACE_STARTUP_TEST
1849 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1850 type->name);
1851 #endif
1852
1853 out_unlock:
1854 return ret;
1855 }
1856
1857 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1858 {
1859 struct ring_buffer *buffer = buf->buffer;
1860
1861 if (!buffer)
1862 return;
1863
1864 ring_buffer_record_disable(buffer);
1865
1866 /* Make sure all commits have finished */
1867 synchronize_rcu();
1868 ring_buffer_reset_cpu(buffer, cpu);
1869
1870 ring_buffer_record_enable(buffer);
1871 }
1872
1873 void tracing_reset_online_cpus(struct trace_buffer *buf)
1874 {
1875 struct ring_buffer *buffer = buf->buffer;
1876 int cpu;
1877
1878 if (!buffer)
1879 return;
1880
1881 ring_buffer_record_disable(buffer);
1882
1883 /* Make sure all commits have finished */
1884 synchronize_rcu();
1885
1886 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1887
1888 for_each_online_cpu(cpu)
1889 ring_buffer_reset_cpu(buffer, cpu);
1890
1891 ring_buffer_record_enable(buffer);
1892 }
1893
1894 /* Must have trace_types_lock held */
1895 void tracing_reset_all_online_cpus(void)
1896 {
1897 struct trace_array *tr;
1898
1899 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1900 if (!tr->clear_trace)
1901 continue;
1902 tr->clear_trace = false;
1903 tracing_reset_online_cpus(&tr->trace_buffer);
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905 tracing_reset_online_cpus(&tr->max_buffer);
1906 #endif
1907 }
1908 }
1909
1910 static int *tgid_map;
1911
1912 #define SAVED_CMDLINES_DEFAULT 128
1913 #define NO_CMDLINE_MAP UINT_MAX
1914 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1915 struct saved_cmdlines_buffer {
1916 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1917 unsigned *map_cmdline_to_pid;
1918 unsigned cmdline_num;
1919 int cmdline_idx;
1920 char *saved_cmdlines;
1921 };
1922 static struct saved_cmdlines_buffer *savedcmd;
1923
1924 /* temporary disable recording */
1925 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1926
1927 static inline char *get_saved_cmdlines(int idx)
1928 {
1929 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1930 }
1931
1932 static inline void set_cmdline(int idx, const char *cmdline)
1933 {
1934 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1935 }
1936
1937 static int allocate_cmdlines_buffer(unsigned int val,
1938 struct saved_cmdlines_buffer *s)
1939 {
1940 s->map_cmdline_to_pid = kmalloc_array(val,
1941 sizeof(*s->map_cmdline_to_pid),
1942 GFP_KERNEL);
1943 if (!s->map_cmdline_to_pid)
1944 return -ENOMEM;
1945
1946 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1947 if (!s->saved_cmdlines) {
1948 kfree(s->map_cmdline_to_pid);
1949 return -ENOMEM;
1950 }
1951
1952 s->cmdline_idx = 0;
1953 s->cmdline_num = val;
1954 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1955 sizeof(s->map_pid_to_cmdline));
1956 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1957 val * sizeof(*s->map_cmdline_to_pid));
1958
1959 return 0;
1960 }
1961
1962 static int trace_create_savedcmd(void)
1963 {
1964 int ret;
1965
1966 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1967 if (!savedcmd)
1968 return -ENOMEM;
1969
1970 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1971 if (ret < 0) {
1972 kfree(savedcmd);
1973 savedcmd = NULL;
1974 return -ENOMEM;
1975 }
1976
1977 return 0;
1978 }
1979
1980 int is_tracing_stopped(void)
1981 {
1982 return global_trace.stop_count;
1983 }
1984
1985 /**
1986 * tracing_start - quick start of the tracer
1987 *
1988 * If tracing is enabled but was stopped by tracing_stop,
1989 * this will start the tracer back up.
1990 */
1991 void tracing_start(void)
1992 {
1993 struct ring_buffer *buffer;
1994 unsigned long flags;
1995
1996 if (tracing_disabled)
1997 return;
1998
1999 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2000 if (--global_trace.stop_count) {
2001 if (global_trace.stop_count < 0) {
2002 /* Someone screwed up their debugging */
2003 WARN_ON_ONCE(1);
2004 global_trace.stop_count = 0;
2005 }
2006 goto out;
2007 }
2008
2009 /* Prevent the buffers from switching */
2010 arch_spin_lock(&global_trace.max_lock);
2011
2012 buffer = global_trace.trace_buffer.buffer;
2013 if (buffer)
2014 ring_buffer_record_enable(buffer);
2015
2016 #ifdef CONFIG_TRACER_MAX_TRACE
2017 buffer = global_trace.max_buffer.buffer;
2018 if (buffer)
2019 ring_buffer_record_enable(buffer);
2020 #endif
2021
2022 arch_spin_unlock(&global_trace.max_lock);
2023
2024 out:
2025 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2026 }
2027
2028 static void tracing_start_tr(struct trace_array *tr)
2029 {
2030 struct ring_buffer *buffer;
2031 unsigned long flags;
2032
2033 if (tracing_disabled)
2034 return;
2035
2036 /* If global, we need to also start the max tracer */
2037 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2038 return tracing_start();
2039
2040 raw_spin_lock_irqsave(&tr->start_lock, flags);
2041
2042 if (--tr->stop_count) {
2043 if (tr->stop_count < 0) {
2044 /* Someone screwed up their debugging */
2045 WARN_ON_ONCE(1);
2046 tr->stop_count = 0;
2047 }
2048 goto out;
2049 }
2050
2051 buffer = tr->trace_buffer.buffer;
2052 if (buffer)
2053 ring_buffer_record_enable(buffer);
2054
2055 out:
2056 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2057 }
2058
2059 /**
2060 * tracing_stop - quick stop of the tracer
2061 *
2062 * Light weight way to stop tracing. Use in conjunction with
2063 * tracing_start.
2064 */
2065 void tracing_stop(void)
2066 {
2067 struct ring_buffer *buffer;
2068 unsigned long flags;
2069
2070 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2071 if (global_trace.stop_count++)
2072 goto out;
2073
2074 /* Prevent the buffers from switching */
2075 arch_spin_lock(&global_trace.max_lock);
2076
2077 buffer = global_trace.trace_buffer.buffer;
2078 if (buffer)
2079 ring_buffer_record_disable(buffer);
2080
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082 buffer = global_trace.max_buffer.buffer;
2083 if (buffer)
2084 ring_buffer_record_disable(buffer);
2085 #endif
2086
2087 arch_spin_unlock(&global_trace.max_lock);
2088
2089 out:
2090 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2091 }
2092
2093 static void tracing_stop_tr(struct trace_array *tr)
2094 {
2095 struct ring_buffer *buffer;
2096 unsigned long flags;
2097
2098 /* If global, we need to also stop the max tracer */
2099 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2100 return tracing_stop();
2101
2102 raw_spin_lock_irqsave(&tr->start_lock, flags);
2103 if (tr->stop_count++)
2104 goto out;
2105
2106 buffer = tr->trace_buffer.buffer;
2107 if (buffer)
2108 ring_buffer_record_disable(buffer);
2109
2110 out:
2111 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2112 }
2113
2114 static int trace_save_cmdline(struct task_struct *tsk)
2115 {
2116 unsigned pid, idx;
2117
2118 /* treat recording of idle task as a success */
2119 if (!tsk->pid)
2120 return 1;
2121
2122 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2123 return 0;
2124
2125 /*
2126 * It's not the end of the world if we don't get
2127 * the lock, but we also don't want to spin
2128 * nor do we want to disable interrupts,
2129 * so if we miss here, then better luck next time.
2130 */
2131 if (!arch_spin_trylock(&trace_cmdline_lock))
2132 return 0;
2133
2134 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2135 if (idx == NO_CMDLINE_MAP) {
2136 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2137
2138 /*
2139 * Check whether the cmdline buffer at idx has a pid
2140 * mapped. We are going to overwrite that entry so we
2141 * need to clear the map_pid_to_cmdline. Otherwise we
2142 * would read the new comm for the old pid.
2143 */
2144 pid = savedcmd->map_cmdline_to_pid[idx];
2145 if (pid != NO_CMDLINE_MAP)
2146 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2147
2148 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2149 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2150
2151 savedcmd->cmdline_idx = idx;
2152 }
2153
2154 set_cmdline(idx, tsk->comm);
2155
2156 arch_spin_unlock(&trace_cmdline_lock);
2157
2158 return 1;
2159 }
2160
2161 static void __trace_find_cmdline(int pid, char comm[])
2162 {
2163 unsigned map;
2164
2165 if (!pid) {
2166 strcpy(comm, "<idle>");
2167 return;
2168 }
2169
2170 if (WARN_ON_ONCE(pid < 0)) {
2171 strcpy(comm, "<XXX>");
2172 return;
2173 }
2174
2175 if (pid > PID_MAX_DEFAULT) {
2176 strcpy(comm, "<...>");
2177 return;
2178 }
2179
2180 map = savedcmd->map_pid_to_cmdline[pid];
2181 if (map != NO_CMDLINE_MAP)
2182 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2183 else
2184 strcpy(comm, "<...>");
2185 }
2186
2187 void trace_find_cmdline(int pid, char comm[])
2188 {
2189 preempt_disable();
2190 arch_spin_lock(&trace_cmdline_lock);
2191
2192 __trace_find_cmdline(pid, comm);
2193
2194 arch_spin_unlock(&trace_cmdline_lock);
2195 preempt_enable();
2196 }
2197
2198 int trace_find_tgid(int pid)
2199 {
2200 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2201 return 0;
2202
2203 return tgid_map[pid];
2204 }
2205
2206 static int trace_save_tgid(struct task_struct *tsk)
2207 {
2208 /* treat recording of idle task as a success */
2209 if (!tsk->pid)
2210 return 1;
2211
2212 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2213 return 0;
2214
2215 tgid_map[tsk->pid] = tsk->tgid;
2216 return 1;
2217 }
2218
2219 static bool tracing_record_taskinfo_skip(int flags)
2220 {
2221 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2222 return true;
2223 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2224 return true;
2225 if (!__this_cpu_read(trace_taskinfo_save))
2226 return true;
2227 return false;
2228 }
2229
2230 /**
2231 * tracing_record_taskinfo - record the task info of a task
2232 *
2233 * @task: task to record
2234 * @flags: TRACE_RECORD_CMDLINE for recording comm
2235 * TRACE_RECORD_TGID for recording tgid
2236 */
2237 void tracing_record_taskinfo(struct task_struct *task, int flags)
2238 {
2239 bool done;
2240
2241 if (tracing_record_taskinfo_skip(flags))
2242 return;
2243
2244 /*
2245 * Record as much task information as possible. If some fail, continue
2246 * to try to record the others.
2247 */
2248 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2249 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2250
2251 /* If recording any information failed, retry again soon. */
2252 if (!done)
2253 return;
2254
2255 __this_cpu_write(trace_taskinfo_save, false);
2256 }
2257
2258 /**
2259 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2260 *
2261 * @prev: previous task during sched_switch
2262 * @next: next task during sched_switch
2263 * @flags: TRACE_RECORD_CMDLINE for recording comm
2264 * TRACE_RECORD_TGID for recording tgid
2265 */
2266 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2267 struct task_struct *next, int flags)
2268 {
2269 bool done;
2270
2271 if (tracing_record_taskinfo_skip(flags))
2272 return;
2273
2274 /*
2275 * Record as much task information as possible. If some fail, continue
2276 * to try to record the others.
2277 */
2278 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2279 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2280 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2281 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2282
2283 /* If recording any information failed, retry again soon. */
2284 if (!done)
2285 return;
2286
2287 __this_cpu_write(trace_taskinfo_save, false);
2288 }
2289
2290 /* Helpers to record a specific task information */
2291 void tracing_record_cmdline(struct task_struct *task)
2292 {
2293 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2294 }
2295
2296 void tracing_record_tgid(struct task_struct *task)
2297 {
2298 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2299 }
2300
2301 /*
2302 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2303 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2304 * simplifies those functions and keeps them in sync.
2305 */
2306 enum print_line_t trace_handle_return(struct trace_seq *s)
2307 {
2308 return trace_seq_has_overflowed(s) ?
2309 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2310 }
2311 EXPORT_SYMBOL_GPL(trace_handle_return);
2312
2313 void
2314 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2315 unsigned long flags, int pc)
2316 {
2317 struct task_struct *tsk = current;
2318
2319 entry->preempt_count = pc & 0xff;
2320 entry->pid = (tsk) ? tsk->pid : 0;
2321 entry->type = type;
2322 entry->flags =
2323 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2324 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2325 #else
2326 TRACE_FLAG_IRQS_NOSUPPORT |
2327 #endif
2328 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2329 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2330 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2331 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2332 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2333 }
2334 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2335
2336 struct ring_buffer_event *
2337 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2338 int type,
2339 unsigned long len,
2340 unsigned long flags, int pc)
2341 {
2342 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2343 }
2344
2345 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2346 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2347 static int trace_buffered_event_ref;
2348
2349 /**
2350 * trace_buffered_event_enable - enable buffering events
2351 *
2352 * When events are being filtered, it is quicker to use a temporary
2353 * buffer to write the event data into if there's a likely chance
2354 * that it will not be committed. The discard of the ring buffer
2355 * is not as fast as committing, and is much slower than copying
2356 * a commit.
2357 *
2358 * When an event is to be filtered, allocate per cpu buffers to
2359 * write the event data into, and if the event is filtered and discarded
2360 * it is simply dropped, otherwise, the entire data is to be committed
2361 * in one shot.
2362 */
2363 void trace_buffered_event_enable(void)
2364 {
2365 struct ring_buffer_event *event;
2366 struct page *page;
2367 int cpu;
2368
2369 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2370
2371 if (trace_buffered_event_ref++)
2372 return;
2373
2374 for_each_tracing_cpu(cpu) {
2375 page = alloc_pages_node(cpu_to_node(cpu),
2376 GFP_KERNEL | __GFP_NORETRY, 0);
2377 if (!page)
2378 goto failed;
2379
2380 event = page_address(page);
2381 memset(event, 0, sizeof(*event));
2382
2383 per_cpu(trace_buffered_event, cpu) = event;
2384
2385 preempt_disable();
2386 if (cpu == smp_processor_id() &&
2387 this_cpu_read(trace_buffered_event) !=
2388 per_cpu(trace_buffered_event, cpu))
2389 WARN_ON_ONCE(1);
2390 preempt_enable();
2391 }
2392
2393 return;
2394 failed:
2395 trace_buffered_event_disable();
2396 }
2397
2398 static void enable_trace_buffered_event(void *data)
2399 {
2400 /* Probably not needed, but do it anyway */
2401 smp_rmb();
2402 this_cpu_dec(trace_buffered_event_cnt);
2403 }
2404
2405 static void disable_trace_buffered_event(void *data)
2406 {
2407 this_cpu_inc(trace_buffered_event_cnt);
2408 }
2409
2410 /**
2411 * trace_buffered_event_disable - disable buffering events
2412 *
2413 * When a filter is removed, it is faster to not use the buffered
2414 * events, and to commit directly into the ring buffer. Free up
2415 * the temp buffers when there are no more users. This requires
2416 * special synchronization with current events.
2417 */
2418 void trace_buffered_event_disable(void)
2419 {
2420 int cpu;
2421
2422 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2423
2424 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2425 return;
2426
2427 if (--trace_buffered_event_ref)
2428 return;
2429
2430 preempt_disable();
2431 /* For each CPU, set the buffer as used. */
2432 smp_call_function_many(tracing_buffer_mask,
2433 disable_trace_buffered_event, NULL, 1);
2434 preempt_enable();
2435
2436 /* Wait for all current users to finish */
2437 synchronize_rcu();
2438
2439 for_each_tracing_cpu(cpu) {
2440 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2441 per_cpu(trace_buffered_event, cpu) = NULL;
2442 }
2443 /*
2444 * Make sure trace_buffered_event is NULL before clearing
2445 * trace_buffered_event_cnt.
2446 */
2447 smp_wmb();
2448
2449 preempt_disable();
2450 /* Do the work on each cpu */
2451 smp_call_function_many(tracing_buffer_mask,
2452 enable_trace_buffered_event, NULL, 1);
2453 preempt_enable();
2454 }
2455
2456 static struct ring_buffer *temp_buffer;
2457
2458 struct ring_buffer_event *
2459 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2460 struct trace_event_file *trace_file,
2461 int type, unsigned long len,
2462 unsigned long flags, int pc)
2463 {
2464 struct ring_buffer_event *entry;
2465 int val;
2466
2467 *current_rb = trace_file->tr->trace_buffer.buffer;
2468
2469 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2470 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2471 (entry = this_cpu_read(trace_buffered_event))) {
2472 /* Try to use the per cpu buffer first */
2473 val = this_cpu_inc_return(trace_buffered_event_cnt);
2474 if (val == 1) {
2475 trace_event_setup(entry, type, flags, pc);
2476 entry->array[0] = len;
2477 return entry;
2478 }
2479 this_cpu_dec(trace_buffered_event_cnt);
2480 }
2481
2482 entry = __trace_buffer_lock_reserve(*current_rb,
2483 type, len, flags, pc);
2484 /*
2485 * If tracing is off, but we have triggers enabled
2486 * we still need to look at the event data. Use the temp_buffer
2487 * to store the trace event for the tigger to use. It's recusive
2488 * safe and will not be recorded anywhere.
2489 */
2490 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2491 *current_rb = temp_buffer;
2492 entry = __trace_buffer_lock_reserve(*current_rb,
2493 type, len, flags, pc);
2494 }
2495 return entry;
2496 }
2497 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2498
2499 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2500 static DEFINE_MUTEX(tracepoint_printk_mutex);
2501
2502 static void output_printk(struct trace_event_buffer *fbuffer)
2503 {
2504 struct trace_event_call *event_call;
2505 struct trace_event *event;
2506 unsigned long flags;
2507 struct trace_iterator *iter = tracepoint_print_iter;
2508
2509 /* We should never get here if iter is NULL */
2510 if (WARN_ON_ONCE(!iter))
2511 return;
2512
2513 event_call = fbuffer->trace_file->event_call;
2514 if (!event_call || !event_call->event.funcs ||
2515 !event_call->event.funcs->trace)
2516 return;
2517
2518 event = &fbuffer->trace_file->event_call->event;
2519
2520 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2521 trace_seq_init(&iter->seq);
2522 iter->ent = fbuffer->entry;
2523 event_call->event.funcs->trace(iter, 0, event);
2524 trace_seq_putc(&iter->seq, 0);
2525 printk("%s", iter->seq.buffer);
2526
2527 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2528 }
2529
2530 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2531 void __user *buffer, size_t *lenp,
2532 loff_t *ppos)
2533 {
2534 int save_tracepoint_printk;
2535 int ret;
2536
2537 mutex_lock(&tracepoint_printk_mutex);
2538 save_tracepoint_printk = tracepoint_printk;
2539
2540 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2541
2542 /*
2543 * This will force exiting early, as tracepoint_printk
2544 * is always zero when tracepoint_printk_iter is not allocated
2545 */
2546 if (!tracepoint_print_iter)
2547 tracepoint_printk = 0;
2548
2549 if (save_tracepoint_printk == tracepoint_printk)
2550 goto out;
2551
2552 if (tracepoint_printk)
2553 static_key_enable(&tracepoint_printk_key.key);
2554 else
2555 static_key_disable(&tracepoint_printk_key.key);
2556
2557 out:
2558 mutex_unlock(&tracepoint_printk_mutex);
2559
2560 return ret;
2561 }
2562
2563 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2564 {
2565 if (static_key_false(&tracepoint_printk_key.key))
2566 output_printk(fbuffer);
2567
2568 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2569 fbuffer->event, fbuffer->entry,
2570 fbuffer->flags, fbuffer->pc);
2571 }
2572 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2573
2574 /*
2575 * Skip 3:
2576 *
2577 * trace_buffer_unlock_commit_regs()
2578 * trace_event_buffer_commit()
2579 * trace_event_raw_event_xxx()
2580 */
2581 # define STACK_SKIP 3
2582
2583 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2584 struct ring_buffer *buffer,
2585 struct ring_buffer_event *event,
2586 unsigned long flags, int pc,
2587 struct pt_regs *regs)
2588 {
2589 __buffer_unlock_commit(buffer, event);
2590
2591 /*
2592 * If regs is not set, then skip the necessary functions.
2593 * Note, we can still get here via blktrace, wakeup tracer
2594 * and mmiotrace, but that's ok if they lose a function or
2595 * two. They are not that meaningful.
2596 */
2597 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2598 ftrace_trace_userstack(buffer, flags, pc);
2599 }
2600
2601 /*
2602 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2603 */
2604 void
2605 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2606 struct ring_buffer_event *event)
2607 {
2608 __buffer_unlock_commit(buffer, event);
2609 }
2610
2611 static void
2612 trace_process_export(struct trace_export *export,
2613 struct ring_buffer_event *event)
2614 {
2615 struct trace_entry *entry;
2616 unsigned int size = 0;
2617
2618 entry = ring_buffer_event_data(event);
2619 size = ring_buffer_event_length(event);
2620 export->write(export, entry, size);
2621 }
2622
2623 static DEFINE_MUTEX(ftrace_export_lock);
2624
2625 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2626
2627 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2628
2629 static inline void ftrace_exports_enable(void)
2630 {
2631 static_branch_enable(&ftrace_exports_enabled);
2632 }
2633
2634 static inline void ftrace_exports_disable(void)
2635 {
2636 static_branch_disable(&ftrace_exports_enabled);
2637 }
2638
2639 static void ftrace_exports(struct ring_buffer_event *event)
2640 {
2641 struct trace_export *export;
2642
2643 preempt_disable_notrace();
2644
2645 export = rcu_dereference_raw_check(ftrace_exports_list);
2646 while (export) {
2647 trace_process_export(export, event);
2648 export = rcu_dereference_raw_check(export->next);
2649 }
2650
2651 preempt_enable_notrace();
2652 }
2653
2654 static inline void
2655 add_trace_export(struct trace_export **list, struct trace_export *export)
2656 {
2657 rcu_assign_pointer(export->next, *list);
2658 /*
2659 * We are entering export into the list but another
2660 * CPU might be walking that list. We need to make sure
2661 * the export->next pointer is valid before another CPU sees
2662 * the export pointer included into the list.
2663 */
2664 rcu_assign_pointer(*list, export);
2665 }
2666
2667 static inline int
2668 rm_trace_export(struct trace_export **list, struct trace_export *export)
2669 {
2670 struct trace_export **p;
2671
2672 for (p = list; *p != NULL; p = &(*p)->next)
2673 if (*p == export)
2674 break;
2675
2676 if (*p != export)
2677 return -1;
2678
2679 rcu_assign_pointer(*p, (*p)->next);
2680
2681 return 0;
2682 }
2683
2684 static inline void
2685 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2686 {
2687 if (*list == NULL)
2688 ftrace_exports_enable();
2689
2690 add_trace_export(list, export);
2691 }
2692
2693 static inline int
2694 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2695 {
2696 int ret;
2697
2698 ret = rm_trace_export(list, export);
2699 if (*list == NULL)
2700 ftrace_exports_disable();
2701
2702 return ret;
2703 }
2704
2705 int register_ftrace_export(struct trace_export *export)
2706 {
2707 if (WARN_ON_ONCE(!export->write))
2708 return -1;
2709
2710 mutex_lock(&ftrace_export_lock);
2711
2712 add_ftrace_export(&ftrace_exports_list, export);
2713
2714 mutex_unlock(&ftrace_export_lock);
2715
2716 return 0;
2717 }
2718 EXPORT_SYMBOL_GPL(register_ftrace_export);
2719
2720 int unregister_ftrace_export(struct trace_export *export)
2721 {
2722 int ret;
2723
2724 mutex_lock(&ftrace_export_lock);
2725
2726 ret = rm_ftrace_export(&ftrace_exports_list, export);
2727
2728 mutex_unlock(&ftrace_export_lock);
2729
2730 return ret;
2731 }
2732 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2733
2734 void
2735 trace_function(struct trace_array *tr,
2736 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2737 int pc)
2738 {
2739 struct trace_event_call *call = &event_function;
2740 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2741 struct ring_buffer_event *event;
2742 struct ftrace_entry *entry;
2743
2744 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2745 flags, pc);
2746 if (!event)
2747 return;
2748 entry = ring_buffer_event_data(event);
2749 entry->ip = ip;
2750 entry->parent_ip = parent_ip;
2751
2752 if (!call_filter_check_discard(call, entry, buffer, event)) {
2753 if (static_branch_unlikely(&ftrace_exports_enabled))
2754 ftrace_exports(event);
2755 __buffer_unlock_commit(buffer, event);
2756 }
2757 }
2758
2759 #ifdef CONFIG_STACKTRACE
2760
2761 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2762 #define FTRACE_KSTACK_NESTING 4
2763
2764 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2765
2766 struct ftrace_stack {
2767 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2768 };
2769
2770
2771 struct ftrace_stacks {
2772 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2773 };
2774
2775 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2776 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2777
2778 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2779 unsigned long flags,
2780 int skip, int pc, struct pt_regs *regs)
2781 {
2782 struct trace_event_call *call = &event_kernel_stack;
2783 struct ring_buffer_event *event;
2784 unsigned int size, nr_entries;
2785 struct ftrace_stack *fstack;
2786 struct stack_entry *entry;
2787 int stackidx;
2788
2789 /*
2790 * Add one, for this function and the call to save_stack_trace()
2791 * If regs is set, then these functions will not be in the way.
2792 */
2793 #ifndef CONFIG_UNWINDER_ORC
2794 if (!regs)
2795 skip++;
2796 #endif
2797
2798 /*
2799 * Since events can happen in NMIs there's no safe way to
2800 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2801 * or NMI comes in, it will just have to use the default
2802 * FTRACE_STACK_SIZE.
2803 */
2804 preempt_disable_notrace();
2805
2806 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2807
2808 /* This should never happen. If it does, yell once and skip */
2809 if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2810 goto out;
2811
2812 /*
2813 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2814 * interrupt will either see the value pre increment or post
2815 * increment. If the interrupt happens pre increment it will have
2816 * restored the counter when it returns. We just need a barrier to
2817 * keep gcc from moving things around.
2818 */
2819 barrier();
2820
2821 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2822 size = ARRAY_SIZE(fstack->calls);
2823
2824 if (regs) {
2825 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2826 size, skip);
2827 } else {
2828 nr_entries = stack_trace_save(fstack->calls, size, skip);
2829 }
2830
2831 size = nr_entries * sizeof(unsigned long);
2832 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2833 sizeof(*entry) + size, flags, pc);
2834 if (!event)
2835 goto out;
2836 entry = ring_buffer_event_data(event);
2837
2838 memcpy(&entry->caller, fstack->calls, size);
2839 entry->size = nr_entries;
2840
2841 if (!call_filter_check_discard(call, entry, buffer, event))
2842 __buffer_unlock_commit(buffer, event);
2843
2844 out:
2845 /* Again, don't let gcc optimize things here */
2846 barrier();
2847 __this_cpu_dec(ftrace_stack_reserve);
2848 preempt_enable_notrace();
2849
2850 }
2851
2852 static inline void ftrace_trace_stack(struct trace_array *tr,
2853 struct ring_buffer *buffer,
2854 unsigned long flags,
2855 int skip, int pc, struct pt_regs *regs)
2856 {
2857 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2858 return;
2859
2860 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2861 }
2862
2863 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2864 int pc)
2865 {
2866 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2867
2868 if (rcu_is_watching()) {
2869 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2870 return;
2871 }
2872
2873 /*
2874 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2875 * but if the above rcu_is_watching() failed, then the NMI
2876 * triggered someplace critical, and rcu_irq_enter() should
2877 * not be called from NMI.
2878 */
2879 if (unlikely(in_nmi()))
2880 return;
2881
2882 rcu_irq_enter_irqson();
2883 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2884 rcu_irq_exit_irqson();
2885 }
2886
2887 /**
2888 * trace_dump_stack - record a stack back trace in the trace buffer
2889 * @skip: Number of functions to skip (helper handlers)
2890 */
2891 void trace_dump_stack(int skip)
2892 {
2893 unsigned long flags;
2894
2895 if (tracing_disabled || tracing_selftest_running)
2896 return;
2897
2898 local_save_flags(flags);
2899
2900 #ifndef CONFIG_UNWINDER_ORC
2901 /* Skip 1 to skip this function. */
2902 skip++;
2903 #endif
2904 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2905 flags, skip, preempt_count(), NULL);
2906 }
2907 EXPORT_SYMBOL_GPL(trace_dump_stack);
2908
2909 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2910 static DEFINE_PER_CPU(int, user_stack_count);
2911
2912 static void
2913 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2914 {
2915 struct trace_event_call *call = &event_user_stack;
2916 struct ring_buffer_event *event;
2917 struct userstack_entry *entry;
2918
2919 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2920 return;
2921
2922 /*
2923 * NMIs can not handle page faults, even with fix ups.
2924 * The save user stack can (and often does) fault.
2925 */
2926 if (unlikely(in_nmi()))
2927 return;
2928
2929 /*
2930 * prevent recursion, since the user stack tracing may
2931 * trigger other kernel events.
2932 */
2933 preempt_disable();
2934 if (__this_cpu_read(user_stack_count))
2935 goto out;
2936
2937 __this_cpu_inc(user_stack_count);
2938
2939 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2940 sizeof(*entry), flags, pc);
2941 if (!event)
2942 goto out_drop_count;
2943 entry = ring_buffer_event_data(event);
2944
2945 entry->tgid = current->tgid;
2946 memset(&entry->caller, 0, sizeof(entry->caller));
2947
2948 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2949 if (!call_filter_check_discard(call, entry, buffer, event))
2950 __buffer_unlock_commit(buffer, event);
2951
2952 out_drop_count:
2953 __this_cpu_dec(user_stack_count);
2954 out:
2955 preempt_enable();
2956 }
2957 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2958 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2959 unsigned long flags, int pc)
2960 {
2961 }
2962 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2963
2964 #endif /* CONFIG_STACKTRACE */
2965
2966 /* created for use with alloc_percpu */
2967 struct trace_buffer_struct {
2968 int nesting;
2969 char buffer[4][TRACE_BUF_SIZE];
2970 };
2971
2972 static struct trace_buffer_struct *trace_percpu_buffer;
2973
2974 /*
2975 * Thise allows for lockless recording. If we're nested too deeply, then
2976 * this returns NULL.
2977 */
2978 static char *get_trace_buf(void)
2979 {
2980 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2981
2982 if (!buffer || buffer->nesting >= 4)
2983 return NULL;
2984
2985 buffer->nesting++;
2986
2987 /* Interrupts must see nesting incremented before we use the buffer */
2988 barrier();
2989 return &buffer->buffer[buffer->nesting][0];
2990 }
2991
2992 static void put_trace_buf(void)
2993 {
2994 /* Don't let the decrement of nesting leak before this */
2995 barrier();
2996 this_cpu_dec(trace_percpu_buffer->nesting);
2997 }
2998
2999 static int alloc_percpu_trace_buffer(void)
3000 {
3001 struct trace_buffer_struct *buffers;
3002
3003 buffers = alloc_percpu(struct trace_buffer_struct);
3004 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3005 return -ENOMEM;
3006
3007 trace_percpu_buffer = buffers;
3008 return 0;
3009 }
3010
3011 static int buffers_allocated;
3012
3013 void trace_printk_init_buffers(void)
3014 {
3015 if (buffers_allocated)
3016 return;
3017
3018 if (alloc_percpu_trace_buffer())
3019 return;
3020
3021 /* trace_printk() is for debug use only. Don't use it in production. */
3022
3023 pr_warn("\n");
3024 pr_warn("**********************************************************\n");
3025 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3026 pr_warn("** **\n");
3027 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3028 pr_warn("** **\n");
3029 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3030 pr_warn("** unsafe for production use. **\n");
3031 pr_warn("** **\n");
3032 pr_warn("** If you see this message and you are not debugging **\n");
3033 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3034 pr_warn("** **\n");
3035 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3036 pr_warn("**********************************************************\n");
3037
3038 /* Expand the buffers to set size */
3039 tracing_update_buffers();
3040
3041 buffers_allocated = 1;
3042
3043 /*
3044 * trace_printk_init_buffers() can be called by modules.
3045 * If that happens, then we need to start cmdline recording
3046 * directly here. If the global_trace.buffer is already
3047 * allocated here, then this was called by module code.
3048 */
3049 if (global_trace.trace_buffer.buffer)
3050 tracing_start_cmdline_record();
3051 }
3052 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3053
3054 void trace_printk_start_comm(void)
3055 {
3056 /* Start tracing comms if trace printk is set */
3057 if (!buffers_allocated)
3058 return;
3059 tracing_start_cmdline_record();
3060 }
3061
3062 static void trace_printk_start_stop_comm(int enabled)
3063 {
3064 if (!buffers_allocated)
3065 return;
3066
3067 if (enabled)
3068 tracing_start_cmdline_record();
3069 else
3070 tracing_stop_cmdline_record();
3071 }
3072
3073 /**
3074 * trace_vbprintk - write binary msg to tracing buffer
3075 * @ip: The address of the caller
3076 * @fmt: The string format to write to the buffer
3077 * @args: Arguments for @fmt
3078 */
3079 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3080 {
3081 struct trace_event_call *call = &event_bprint;
3082 struct ring_buffer_event *event;
3083 struct ring_buffer *buffer;
3084 struct trace_array *tr = &global_trace;
3085 struct bprint_entry *entry;
3086 unsigned long flags;
3087 char *tbuffer;
3088 int len = 0, size, pc;
3089
3090 if (unlikely(tracing_selftest_running || tracing_disabled))
3091 return 0;
3092
3093 /* Don't pollute graph traces with trace_vprintk internals */
3094 pause_graph_tracing();
3095
3096 pc = preempt_count();
3097 preempt_disable_notrace();
3098
3099 tbuffer = get_trace_buf();
3100 if (!tbuffer) {
3101 len = 0;
3102 goto out_nobuffer;
3103 }
3104
3105 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3106
3107 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3108 goto out;
3109
3110 local_save_flags(flags);
3111 size = sizeof(*entry) + sizeof(u32) * len;
3112 buffer = tr->trace_buffer.buffer;
3113 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3114 flags, pc);
3115 if (!event)
3116 goto out;
3117 entry = ring_buffer_event_data(event);
3118 entry->ip = ip;
3119 entry->fmt = fmt;
3120
3121 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3122 if (!call_filter_check_discard(call, entry, buffer, event)) {
3123 __buffer_unlock_commit(buffer, event);
3124 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3125 }
3126
3127 out:
3128 put_trace_buf();
3129
3130 out_nobuffer:
3131 preempt_enable_notrace();
3132 unpause_graph_tracing();
3133
3134 return len;
3135 }
3136 EXPORT_SYMBOL_GPL(trace_vbprintk);
3137
3138 __printf(3, 0)
3139 static int
3140 __trace_array_vprintk(struct ring_buffer *buffer,
3141 unsigned long ip, const char *fmt, va_list args)
3142 {
3143 struct trace_event_call *call = &event_print;
3144 struct ring_buffer_event *event;
3145 int len = 0, size, pc;
3146 struct print_entry *entry;
3147 unsigned long flags;
3148 char *tbuffer;
3149
3150 if (tracing_disabled || tracing_selftest_running)
3151 return 0;
3152
3153 /* Don't pollute graph traces with trace_vprintk internals */
3154 pause_graph_tracing();
3155
3156 pc = preempt_count();
3157 preempt_disable_notrace();
3158
3159
3160 tbuffer = get_trace_buf();
3161 if (!tbuffer) {
3162 len = 0;
3163 goto out_nobuffer;
3164 }
3165
3166 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3167
3168 local_save_flags(flags);
3169 size = sizeof(*entry) + len + 1;
3170 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3171 flags, pc);
3172 if (!event)
3173 goto out;
3174 entry = ring_buffer_event_data(event);
3175 entry->ip = ip;
3176
3177 memcpy(&entry->buf, tbuffer, len + 1);
3178 if (!call_filter_check_discard(call, entry, buffer, event)) {
3179 __buffer_unlock_commit(buffer, event);
3180 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3181 }
3182
3183 out:
3184 put_trace_buf();
3185
3186 out_nobuffer:
3187 preempt_enable_notrace();
3188 unpause_graph_tracing();
3189
3190 return len;
3191 }
3192
3193 __printf(3, 0)
3194 int trace_array_vprintk(struct trace_array *tr,
3195 unsigned long ip, const char *fmt, va_list args)
3196 {
3197 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3198 }
3199
3200 __printf(3, 0)
3201 int trace_array_printk(struct trace_array *tr,
3202 unsigned long ip, const char *fmt, ...)
3203 {
3204 int ret;
3205 va_list ap;
3206
3207 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3208 return 0;
3209
3210 va_start(ap, fmt);
3211 ret = trace_array_vprintk(tr, ip, fmt, ap);
3212 va_end(ap);
3213 return ret;
3214 }
3215 EXPORT_SYMBOL_GPL(trace_array_printk);
3216
3217 __printf(3, 4)
3218 int trace_array_printk_buf(struct ring_buffer *buffer,
3219 unsigned long ip, const char *fmt, ...)
3220 {
3221 int ret;
3222 va_list ap;
3223
3224 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225 return 0;
3226
3227 va_start(ap, fmt);
3228 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229 va_end(ap);
3230 return ret;
3231 }
3232
3233 __printf(2, 0)
3234 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235 {
3236 return trace_array_vprintk(&global_trace, ip, fmt, args);
3237 }
3238 EXPORT_SYMBOL_GPL(trace_vprintk);
3239
3240 static void trace_iterator_increment(struct trace_iterator *iter)
3241 {
3242 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243
3244 iter->idx++;
3245 if (buf_iter)
3246 ring_buffer_read(buf_iter, NULL);
3247 }
3248
3249 static struct trace_entry *
3250 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251 unsigned long *lost_events)
3252 {
3253 struct ring_buffer_event *event;
3254 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255
3256 if (buf_iter)
3257 event = ring_buffer_iter_peek(buf_iter, ts);
3258 else
3259 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260 lost_events);
3261
3262 if (event) {
3263 iter->ent_size = ring_buffer_event_length(event);
3264 return ring_buffer_event_data(event);
3265 }
3266 iter->ent_size = 0;
3267 return NULL;
3268 }
3269
3270 static struct trace_entry *
3271 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272 unsigned long *missing_events, u64 *ent_ts)
3273 {
3274 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275 struct trace_entry *ent, *next = NULL;
3276 unsigned long lost_events = 0, next_lost = 0;
3277 int cpu_file = iter->cpu_file;
3278 u64 next_ts = 0, ts;
3279 int next_cpu = -1;
3280 int next_size = 0;
3281 int cpu;
3282
3283 /*
3284 * If we are in a per_cpu trace file, don't bother by iterating over
3285 * all cpu and peek directly.
3286 */
3287 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288 if (ring_buffer_empty_cpu(buffer, cpu_file))
3289 return NULL;
3290 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291 if (ent_cpu)
3292 *ent_cpu = cpu_file;
3293
3294 return ent;
3295 }
3296
3297 for_each_tracing_cpu(cpu) {
3298
3299 if (ring_buffer_empty_cpu(buffer, cpu))
3300 continue;
3301
3302 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303
3304 /*
3305 * Pick the entry with the smallest timestamp:
3306 */
3307 if (ent && (!next || ts < next_ts)) {
3308 next = ent;
3309 next_cpu = cpu;
3310 next_ts = ts;
3311 next_lost = lost_events;
3312 next_size = iter->ent_size;
3313 }
3314 }
3315
3316 iter->ent_size = next_size;
3317
3318 if (ent_cpu)
3319 *ent_cpu = next_cpu;
3320
3321 if (ent_ts)
3322 *ent_ts = next_ts;
3323
3324 if (missing_events)
3325 *missing_events = next_lost;
3326
3327 return next;
3328 }
3329
3330 /* Find the next real entry, without updating the iterator itself */
3331 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332 int *ent_cpu, u64 *ent_ts)
3333 {
3334 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335 }
3336
3337 /* Find the next real entry, and increment the iterator to the next entry */
3338 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339 {
3340 iter->ent = __find_next_entry(iter, &iter->cpu,
3341 &iter->lost_events, &iter->ts);
3342
3343 if (iter->ent)
3344 trace_iterator_increment(iter);
3345
3346 return iter->ent ? iter : NULL;
3347 }
3348
3349 static void trace_consume(struct trace_iterator *iter)
3350 {
3351 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352 &iter->lost_events);
3353 }
3354
3355 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356 {
3357 struct trace_iterator *iter = m->private;
3358 int i = (int)*pos;
3359 void *ent;
3360
3361 WARN_ON_ONCE(iter->leftover);
3362
3363 (*pos)++;
3364
3365 /* can't go backwards */
3366 if (iter->idx > i)
3367 return NULL;
3368
3369 if (iter->idx < 0)
3370 ent = trace_find_next_entry_inc(iter);
3371 else
3372 ent = iter;
3373
3374 while (ent && iter->idx < i)
3375 ent = trace_find_next_entry_inc(iter);
3376
3377 iter->pos = *pos;
3378
3379 return ent;
3380 }
3381
3382 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383 {
3384 struct ring_buffer_event *event;
3385 struct ring_buffer_iter *buf_iter;
3386 unsigned long entries = 0;
3387 u64 ts;
3388
3389 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390
3391 buf_iter = trace_buffer_iter(iter, cpu);
3392 if (!buf_iter)
3393 return;
3394
3395 ring_buffer_iter_reset(buf_iter);
3396
3397 /*
3398 * We could have the case with the max latency tracers
3399 * that a reset never took place on a cpu. This is evident
3400 * by the timestamp being before the start of the buffer.
3401 */
3402 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403 if (ts >= iter->trace_buffer->time_start)
3404 break;
3405 entries++;
3406 ring_buffer_read(buf_iter, NULL);
3407 }
3408
3409 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410 }
3411
3412 /*
3413 * The current tracer is copied to avoid a global locking
3414 * all around.
3415 */
3416 static void *s_start(struct seq_file *m, loff_t *pos)
3417 {
3418 struct trace_iterator *iter = m->private;
3419 struct trace_array *tr = iter->tr;
3420 int cpu_file = iter->cpu_file;
3421 void *p = NULL;
3422 loff_t l = 0;
3423 int cpu;
3424
3425 /*
3426 * copy the tracer to avoid using a global lock all around.
3427 * iter->trace is a copy of current_trace, the pointer to the
3428 * name may be used instead of a strcmp(), as iter->trace->name
3429 * will point to the same string as current_trace->name.
3430 */
3431 mutex_lock(&trace_types_lock);
3432 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433 *iter->trace = *tr->current_trace;
3434 mutex_unlock(&trace_types_lock);
3435
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437 if (iter->snapshot && iter->trace->use_max_tr)
3438 return ERR_PTR(-EBUSY);
3439 #endif
3440
3441 if (!iter->snapshot)
3442 atomic_inc(&trace_record_taskinfo_disabled);
3443
3444 if (*pos != iter->pos) {
3445 iter->ent = NULL;
3446 iter->cpu = 0;
3447 iter->idx = -1;
3448
3449 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450 for_each_tracing_cpu(cpu)
3451 tracing_iter_reset(iter, cpu);
3452 } else
3453 tracing_iter_reset(iter, cpu_file);
3454
3455 iter->leftover = 0;
3456 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457 ;
3458
3459 } else {
3460 /*
3461 * If we overflowed the seq_file before, then we want
3462 * to just reuse the trace_seq buffer again.
3463 */
3464 if (iter->leftover)
3465 p = iter;
3466 else {
3467 l = *pos - 1;
3468 p = s_next(m, p, &l);
3469 }
3470 }
3471
3472 trace_event_read_lock();
3473 trace_access_lock(cpu_file);
3474 return p;
3475 }
3476
3477 static void s_stop(struct seq_file *m, void *p)
3478 {
3479 struct trace_iterator *iter = m->private;
3480
3481 #ifdef CONFIG_TRACER_MAX_TRACE
3482 if (iter->snapshot && iter->trace->use_max_tr)
3483 return;
3484 #endif
3485
3486 if (!iter->snapshot)
3487 atomic_dec(&trace_record_taskinfo_disabled);
3488
3489 trace_access_unlock(iter->cpu_file);
3490 trace_event_read_unlock();
3491 }
3492
3493 static void
3494 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3495 unsigned long *entries, int cpu)
3496 {
3497 unsigned long count;
3498
3499 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3500 /*
3501 * If this buffer has skipped entries, then we hold all
3502 * entries for the trace and we need to ignore the
3503 * ones before the time stamp.
3504 */
3505 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3506 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3507 /* total is the same as the entries */
3508 *total = count;
3509 } else
3510 *total = count +
3511 ring_buffer_overrun_cpu(buf->buffer, cpu);
3512 *entries = count;
3513 }
3514
3515 static void
3516 get_total_entries(struct trace_buffer *buf,
3517 unsigned long *total, unsigned long *entries)
3518 {
3519 unsigned long t, e;
3520 int cpu;
3521
3522 *total = 0;
3523 *entries = 0;
3524
3525 for_each_tracing_cpu(cpu) {
3526 get_total_entries_cpu(buf, &t, &e, cpu);
3527 *total += t;
3528 *entries += e;
3529 }
3530 }
3531
3532 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3533 {
3534 unsigned long total, entries;
3535
3536 if (!tr)
3537 tr = &global_trace;
3538
3539 get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3540
3541 return entries;
3542 }
3543
3544 unsigned long trace_total_entries(struct trace_array *tr)
3545 {
3546 unsigned long total, entries;
3547
3548 if (!tr)
3549 tr = &global_trace;
3550
3551 get_total_entries(&tr->trace_buffer, &total, &entries);
3552
3553 return entries;
3554 }
3555
3556 static void print_lat_help_header(struct seq_file *m)
3557 {
3558 seq_puts(m, "# _------=> CPU# \n"
3559 "# / _-----=> irqs-off \n"
3560 "# | / _----=> need-resched \n"
3561 "# || / _---=> hardirq/softirq \n"
3562 "# ||| / _--=> preempt-depth \n"
3563 "# |||| / delay \n"
3564 "# cmd pid ||||| time | caller \n"
3565 "# \\ / ||||| \\ | / \n");
3566 }
3567
3568 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3569 {
3570 unsigned long total;
3571 unsigned long entries;
3572
3573 get_total_entries(buf, &total, &entries);
3574 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3575 entries, total, num_online_cpus());
3576 seq_puts(m, "#\n");
3577 }
3578
3579 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3580 unsigned int flags)
3581 {
3582 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3583
3584 print_event_info(buf, m);
3585
3586 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
3587 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3588 }
3589
3590 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3591 unsigned int flags)
3592 {
3593 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3594 const char *space = " ";
3595 int prec = tgid ? 10 : 2;
3596
3597 print_event_info(buf, m);
3598
3599 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3600 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3601 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3602 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3603 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3604 seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3605 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3606 }
3607
3608 void
3609 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3610 {
3611 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3612 struct trace_buffer *buf = iter->trace_buffer;
3613 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3614 struct tracer *type = iter->trace;
3615 unsigned long entries;
3616 unsigned long total;
3617 const char *name = "preemption";
3618
3619 name = type->name;
3620
3621 get_total_entries(buf, &total, &entries);
3622
3623 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3624 name, UTS_RELEASE);
3625 seq_puts(m, "# -----------------------------------"
3626 "---------------------------------\n");
3627 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3628 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3629 nsecs_to_usecs(data->saved_latency),
3630 entries,
3631 total,
3632 buf->cpu,
3633 #if defined(CONFIG_PREEMPT_NONE)
3634 "server",
3635 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3636 "desktop",
3637 #elif defined(CONFIG_PREEMPT)
3638 "preempt",
3639 #else
3640 "unknown",
3641 #endif
3642 /* These are reserved for later use */
3643 0, 0, 0, 0);
3644 #ifdef CONFIG_SMP
3645 seq_printf(m, " #P:%d)\n", num_online_cpus());
3646 #else
3647 seq_puts(m, ")\n");
3648 #endif
3649 seq_puts(m, "# -----------------\n");
3650 seq_printf(m, "# | task: %.16s-%d "
3651 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3652 data->comm, data->pid,
3653 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3654 data->policy, data->rt_priority);
3655 seq_puts(m, "# -----------------\n");
3656
3657 if (data->critical_start) {
3658 seq_puts(m, "# => started at: ");
3659 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3660 trace_print_seq(m, &iter->seq);
3661 seq_puts(m, "\n# => ended at: ");
3662 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3663 trace_print_seq(m, &iter->seq);
3664 seq_puts(m, "\n#\n");
3665 }
3666
3667 seq_puts(m, "#\n");
3668 }
3669
3670 static void test_cpu_buff_start(struct trace_iterator *iter)
3671 {
3672 struct trace_seq *s = &iter->seq;
3673 struct trace_array *tr = iter->tr;
3674
3675 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3676 return;
3677
3678 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3679 return;
3680
3681 if (cpumask_available(iter->started) &&
3682 cpumask_test_cpu(iter->cpu, iter->started))
3683 return;
3684
3685 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3686 return;
3687
3688 if (cpumask_available(iter->started))
3689 cpumask_set_cpu(iter->cpu, iter->started);
3690
3691 /* Don't print started cpu buffer for the first entry of the trace */
3692 if (iter->idx > 1)
3693 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3694 iter->cpu);
3695 }
3696
3697 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3698 {
3699 struct trace_array *tr = iter->tr;
3700 struct trace_seq *s = &iter->seq;
3701 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3702 struct trace_entry *entry;
3703 struct trace_event *event;
3704
3705 entry = iter->ent;
3706
3707 test_cpu_buff_start(iter);
3708
3709 event = ftrace_find_event(entry->type);
3710
3711 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3712 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3713 trace_print_lat_context(iter);
3714 else
3715 trace_print_context(iter);
3716 }
3717
3718 if (trace_seq_has_overflowed(s))
3719 return TRACE_TYPE_PARTIAL_LINE;
3720
3721 if (event)
3722 return event->funcs->trace(iter, sym_flags, event);
3723
3724 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3725
3726 return trace_handle_return(s);
3727 }
3728
3729 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3730 {
3731 struct trace_array *tr = iter->tr;
3732 struct trace_seq *s = &iter->seq;
3733 struct trace_entry *entry;
3734 struct trace_event *event;
3735
3736 entry = iter->ent;
3737
3738 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3739 trace_seq_printf(s, "%d %d %llu ",
3740 entry->pid, iter->cpu, iter->ts);
3741
3742 if (trace_seq_has_overflowed(s))
3743 return TRACE_TYPE_PARTIAL_LINE;
3744
3745 event = ftrace_find_event(entry->type);
3746 if (event)
3747 return event->funcs->raw(iter, 0, event);
3748
3749 trace_seq_printf(s, "%d ?\n", entry->type);
3750
3751 return trace_handle_return(s);
3752 }
3753
3754 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3755 {
3756 struct trace_array *tr = iter->tr;
3757 struct trace_seq *s = &iter->seq;
3758 unsigned char newline = '\n';
3759 struct trace_entry *entry;
3760 struct trace_event *event;
3761
3762 entry = iter->ent;
3763
3764 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3765 SEQ_PUT_HEX_FIELD(s, entry->pid);
3766 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3767 SEQ_PUT_HEX_FIELD(s, iter->ts);
3768 if (trace_seq_has_overflowed(s))
3769 return TRACE_TYPE_PARTIAL_LINE;
3770 }
3771
3772 event = ftrace_find_event(entry->type);
3773 if (event) {
3774 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3775 if (ret != TRACE_TYPE_HANDLED)
3776 return ret;
3777 }
3778
3779 SEQ_PUT_FIELD(s, newline);
3780
3781 return trace_handle_return(s);
3782 }
3783
3784 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3785 {
3786 struct trace_array *tr = iter->tr;
3787 struct trace_seq *s = &iter->seq;
3788 struct trace_entry *entry;
3789 struct trace_event *event;
3790
3791 entry = iter->ent;
3792
3793 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3794 SEQ_PUT_FIELD(s, entry->pid);
3795 SEQ_PUT_FIELD(s, iter->cpu);
3796 SEQ_PUT_FIELD(s, iter->ts);
3797 if (trace_seq_has_overflowed(s))
3798 return TRACE_TYPE_PARTIAL_LINE;
3799 }
3800
3801 event = ftrace_find_event(entry->type);
3802 return event ? event->funcs->binary(iter, 0, event) :
3803 TRACE_TYPE_HANDLED;
3804 }
3805
3806 int trace_empty(struct trace_iterator *iter)
3807 {
3808 struct ring_buffer_iter *buf_iter;
3809 int cpu;
3810
3811 /* If we are looking at one CPU buffer, only check that one */
3812 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3813 cpu = iter->cpu_file;
3814 buf_iter = trace_buffer_iter(iter, cpu);
3815 if (buf_iter) {
3816 if (!ring_buffer_iter_empty(buf_iter))
3817 return 0;
3818 } else {
3819 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3820 return 0;
3821 }
3822 return 1;
3823 }
3824
3825 for_each_tracing_cpu(cpu) {
3826 buf_iter = trace_buffer_iter(iter, cpu);
3827 if (buf_iter) {
3828 if (!ring_buffer_iter_empty(buf_iter))
3829 return 0;
3830 } else {
3831 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3832 return 0;
3833 }
3834 }
3835
3836 return 1;
3837 }
3838
3839 /* Called with trace_event_read_lock() held. */
3840 enum print_line_t print_trace_line(struct trace_iterator *iter)
3841 {
3842 struct trace_array *tr = iter->tr;
3843 unsigned long trace_flags = tr->trace_flags;
3844 enum print_line_t ret;
3845
3846 if (iter->lost_events) {
3847 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3848 iter->cpu, iter->lost_events);
3849 if (trace_seq_has_overflowed(&iter->seq))
3850 return TRACE_TYPE_PARTIAL_LINE;
3851 }
3852
3853 if (iter->trace && iter->trace->print_line) {
3854 ret = iter->trace->print_line(iter);
3855 if (ret != TRACE_TYPE_UNHANDLED)
3856 return ret;
3857 }
3858
3859 if (iter->ent->type == TRACE_BPUTS &&
3860 trace_flags & TRACE_ITER_PRINTK &&
3861 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3862 return trace_print_bputs_msg_only(iter);
3863
3864 if (iter->ent->type == TRACE_BPRINT &&
3865 trace_flags & TRACE_ITER_PRINTK &&
3866 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3867 return trace_print_bprintk_msg_only(iter);
3868
3869 if (iter->ent->type == TRACE_PRINT &&
3870 trace_flags & TRACE_ITER_PRINTK &&
3871 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3872 return trace_print_printk_msg_only(iter);
3873
3874 if (trace_flags & TRACE_ITER_BIN)
3875 return print_bin_fmt(iter);
3876
3877 if (trace_flags & TRACE_ITER_HEX)
3878 return print_hex_fmt(iter);
3879
3880 if (trace_flags & TRACE_ITER_RAW)
3881 return print_raw_fmt(iter);
3882
3883 return print_trace_fmt(iter);
3884 }
3885
3886 void trace_latency_header(struct seq_file *m)
3887 {
3888 struct trace_iterator *iter = m->private;
3889 struct trace_array *tr = iter->tr;
3890
3891 /* print nothing if the buffers are empty */
3892 if (trace_empty(iter))
3893 return;
3894
3895 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3896 print_trace_header(m, iter);
3897
3898 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3899 print_lat_help_header(m);
3900 }
3901
3902 void trace_default_header(struct seq_file *m)
3903 {
3904 struct trace_iterator *iter = m->private;
3905 struct trace_array *tr = iter->tr;
3906 unsigned long trace_flags = tr->trace_flags;
3907
3908 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3909 return;
3910
3911 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3912 /* print nothing if the buffers are empty */
3913 if (trace_empty(iter))
3914 return;
3915 print_trace_header(m, iter);
3916 if (!(trace_flags & TRACE_ITER_VERBOSE))
3917 print_lat_help_header(m);
3918 } else {
3919 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3920 if (trace_flags & TRACE_ITER_IRQ_INFO)
3921 print_func_help_header_irq(iter->trace_buffer,
3922 m, trace_flags);
3923 else
3924 print_func_help_header(iter->trace_buffer, m,
3925 trace_flags);
3926 }
3927 }
3928 }
3929
3930 static void test_ftrace_alive(struct seq_file *m)
3931 {
3932 if (!ftrace_is_dead())
3933 return;
3934 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3935 "# MAY BE MISSING FUNCTION EVENTS\n");
3936 }
3937
3938 #ifdef CONFIG_TRACER_MAX_TRACE
3939 static void show_snapshot_main_help(struct seq_file *m)
3940 {
3941 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3942 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3943 "# Takes a snapshot of the main buffer.\n"
3944 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3945 "# (Doesn't have to be '2' works with any number that\n"
3946 "# is not a '0' or '1')\n");
3947 }
3948
3949 static void show_snapshot_percpu_help(struct seq_file *m)
3950 {
3951 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3952 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3953 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3954 "# Takes a snapshot of the main buffer for this cpu.\n");
3955 #else
3956 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3957 "# Must use main snapshot file to allocate.\n");
3958 #endif
3959 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3960 "# (Doesn't have to be '2' works with any number that\n"
3961 "# is not a '0' or '1')\n");
3962 }
3963
3964 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3965 {
3966 if (iter->tr->allocated_snapshot)
3967 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3968 else
3969 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3970
3971 seq_puts(m, "# Snapshot commands:\n");
3972 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3973 show_snapshot_main_help(m);
3974 else
3975 show_snapshot_percpu_help(m);
3976 }
3977 #else
3978 /* Should never be called */
3979 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3980 #endif
3981
3982 static int s_show(struct seq_file *m, void *v)
3983 {
3984 struct trace_iterator *iter = v;
3985 int ret;
3986
3987 if (iter->ent == NULL) {
3988 if (iter->tr) {
3989 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3990 seq_puts(m, "#\n");
3991 test_ftrace_alive(m);
3992 }
3993 if (iter->snapshot && trace_empty(iter))
3994 print_snapshot_help(m, iter);
3995 else if (iter->trace && iter->trace->print_header)
3996 iter->trace->print_header(m);
3997 else
3998 trace_default_header(m);
3999
4000 } else if (iter->leftover) {
4001 /*
4002 * If we filled the seq_file buffer earlier, we
4003 * want to just show it now.
4004 */
4005 ret = trace_print_seq(m, &iter->seq);
4006
4007 /* ret should this time be zero, but you never know */
4008 iter->leftover = ret;
4009
4010 } else {
4011 print_trace_line(iter);
4012 ret = trace_print_seq(m, &iter->seq);
4013 /*
4014 * If we overflow the seq_file buffer, then it will
4015 * ask us for this data again at start up.
4016 * Use that instead.
4017 * ret is 0 if seq_file write succeeded.
4018 * -1 otherwise.
4019 */
4020 iter->leftover = ret;
4021 }
4022
4023 return 0;
4024 }
4025
4026 /*
4027 * Should be used after trace_array_get(), trace_types_lock
4028 * ensures that i_cdev was already initialized.
4029 */
4030 static inline int tracing_get_cpu(struct inode *inode)
4031 {
4032 if (inode->i_cdev) /* See trace_create_cpu_file() */
4033 return (long)inode->i_cdev - 1;
4034 return RING_BUFFER_ALL_CPUS;
4035 }
4036
4037 static const struct seq_operations tracer_seq_ops = {
4038 .start = s_start,
4039 .next = s_next,
4040 .stop = s_stop,
4041 .show = s_show,
4042 };
4043
4044 static struct trace_iterator *
4045 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4046 {
4047 struct trace_array *tr = inode->i_private;
4048 struct trace_iterator *iter;
4049 int cpu;
4050
4051 if (tracing_disabled)
4052 return ERR_PTR(-ENODEV);
4053
4054 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4055 if (!iter)
4056 return ERR_PTR(-ENOMEM);
4057
4058 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4059 GFP_KERNEL);
4060 if (!iter->buffer_iter)
4061 goto release;
4062
4063 /*
4064 * We make a copy of the current tracer to avoid concurrent
4065 * changes on it while we are reading.
4066 */
4067 mutex_lock(&trace_types_lock);
4068 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4069 if (!iter->trace)
4070 goto fail;
4071
4072 *iter->trace = *tr->current_trace;
4073
4074 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4075 goto fail;
4076
4077 iter->tr = tr;
4078
4079 #ifdef CONFIG_TRACER_MAX_TRACE
4080 /* Currently only the top directory has a snapshot */
4081 if (tr->current_trace->print_max || snapshot)
4082 iter->trace_buffer = &tr->max_buffer;
4083 else
4084 #endif
4085 iter->trace_buffer = &tr->trace_buffer;
4086 iter->snapshot = snapshot;
4087 iter->pos = -1;
4088 iter->cpu_file = tracing_get_cpu(inode);
4089 mutex_init(&iter->mutex);
4090
4091 /* Notify the tracer early; before we stop tracing. */
4092 if (iter->trace && iter->trace->open)
4093 iter->trace->open(iter);
4094
4095 /* Annotate start of buffers if we had overruns */
4096 if (ring_buffer_overruns(iter->trace_buffer->buffer))
4097 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4098
4099 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4100 if (trace_clocks[tr->clock_id].in_ns)
4101 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4102
4103 /* stop the trace while dumping if we are not opening "snapshot" */
4104 if (!iter->snapshot)
4105 tracing_stop_tr(tr);
4106
4107 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4108 for_each_tracing_cpu(cpu) {
4109 iter->buffer_iter[cpu] =
4110 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4111 cpu, GFP_KERNEL);
4112 }
4113 ring_buffer_read_prepare_sync();
4114 for_each_tracing_cpu(cpu) {
4115 ring_buffer_read_start(iter->buffer_iter[cpu]);
4116 tracing_iter_reset(iter, cpu);
4117 }
4118 } else {
4119 cpu = iter->cpu_file;
4120 iter->buffer_iter[cpu] =
4121 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4122 cpu, GFP_KERNEL);
4123 ring_buffer_read_prepare_sync();
4124 ring_buffer_read_start(iter->buffer_iter[cpu]);
4125 tracing_iter_reset(iter, cpu);
4126 }
4127
4128 mutex_unlock(&trace_types_lock);
4129
4130 return iter;
4131
4132 fail:
4133 mutex_unlock(&trace_types_lock);
4134 kfree(iter->trace);
4135 kfree(iter->buffer_iter);
4136 release:
4137 seq_release_private(inode, file);
4138 return ERR_PTR(-ENOMEM);
4139 }
4140
4141 int tracing_open_generic(struct inode *inode, struct file *filp)
4142 {
4143 if (tracing_disabled)
4144 return -ENODEV;
4145
4146 filp->private_data = inode->i_private;
4147 return 0;
4148 }
4149
4150 bool tracing_is_disabled(void)
4151 {
4152 return (tracing_disabled) ? true: false;
4153 }
4154
4155 /*
4156 * Open and update trace_array ref count.
4157 * Must have the current trace_array passed to it.
4158 */
4159 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4160 {
4161 struct trace_array *tr = inode->i_private;
4162
4163 if (tracing_disabled)
4164 return -ENODEV;
4165
4166 if (trace_array_get(tr) < 0)
4167 return -ENODEV;
4168
4169 filp->private_data = inode->i_private;
4170
4171 return 0;
4172 }
4173
4174 static int tracing_release(struct inode *inode, struct file *file)
4175 {
4176 struct trace_array *tr = inode->i_private;
4177 struct seq_file *m = file->private_data;
4178 struct trace_iterator *iter;
4179 int cpu;
4180
4181 if (!(file->f_mode & FMODE_READ)) {
4182 trace_array_put(tr);
4183 return 0;
4184 }
4185
4186 /* Writes do not use seq_file */
4187 iter = m->private;
4188 mutex_lock(&trace_types_lock);
4189
4190 for_each_tracing_cpu(cpu) {
4191 if (iter->buffer_iter[cpu])
4192 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4193 }
4194
4195 if (iter->trace && iter->trace->close)
4196 iter->trace->close(iter);
4197
4198 if (!iter->snapshot)
4199 /* reenable tracing if it was previously enabled */
4200 tracing_start_tr(tr);
4201
4202 __trace_array_put(tr);
4203
4204 mutex_unlock(&trace_types_lock);
4205
4206 mutex_destroy(&iter->mutex);
4207 free_cpumask_var(iter->started);
4208 kfree(iter->trace);
4209 kfree(iter->buffer_iter);
4210 seq_release_private(inode, file);
4211
4212 return 0;
4213 }
4214
4215 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4216 {
4217 struct trace_array *tr = inode->i_private;
4218
4219 trace_array_put(tr);
4220 return 0;
4221 }
4222
4223 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4224 {
4225 struct trace_array *tr = inode->i_private;
4226
4227 trace_array_put(tr);
4228
4229 return single_release(inode, file);
4230 }
4231
4232 static int tracing_open(struct inode *inode, struct file *file)
4233 {
4234 struct trace_array *tr = inode->i_private;
4235 struct trace_iterator *iter;
4236 int ret = 0;
4237
4238 if (trace_array_get(tr) < 0)
4239 return -ENODEV;
4240
4241 /* If this file was open for write, then erase contents */
4242 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4243 int cpu = tracing_get_cpu(inode);
4244 struct trace_buffer *trace_buf = &tr->trace_buffer;
4245
4246 #ifdef CONFIG_TRACER_MAX_TRACE
4247 if (tr->current_trace->print_max)
4248 trace_buf = &tr->max_buffer;
4249 #endif
4250
4251 if (cpu == RING_BUFFER_ALL_CPUS)
4252 tracing_reset_online_cpus(trace_buf);
4253 else
4254 tracing_reset_cpu(trace_buf, cpu);
4255 }
4256
4257 if (file->f_mode & FMODE_READ) {
4258 iter = __tracing_open(inode, file, false);
4259 if (IS_ERR(iter))
4260 ret = PTR_ERR(iter);
4261 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4262 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4263 }
4264
4265 if (ret < 0)
4266 trace_array_put(tr);
4267
4268 return ret;
4269 }
4270
4271 /*
4272 * Some tracers are not suitable for instance buffers.
4273 * A tracer is always available for the global array (toplevel)
4274 * or if it explicitly states that it is.
4275 */
4276 static bool
4277 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4278 {
4279 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4280 }
4281
4282 /* Find the next tracer that this trace array may use */
4283 static struct tracer *
4284 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4285 {
4286 while (t && !trace_ok_for_array(t, tr))
4287 t = t->next;
4288
4289 return t;
4290 }
4291
4292 static void *
4293 t_next(struct seq_file *m, void *v, loff_t *pos)
4294 {
4295 struct trace_array *tr = m->private;
4296 struct tracer *t = v;
4297
4298 (*pos)++;
4299
4300 if (t)
4301 t = get_tracer_for_array(tr, t->next);
4302
4303 return t;
4304 }
4305
4306 static void *t_start(struct seq_file *m, loff_t *pos)
4307 {
4308 struct trace_array *tr = m->private;
4309 struct tracer *t;
4310 loff_t l = 0;
4311
4312 mutex_lock(&trace_types_lock);
4313
4314 t = get_tracer_for_array(tr, trace_types);
4315 for (; t && l < *pos; t = t_next(m, t, &l))
4316 ;
4317
4318 return t;
4319 }
4320
4321 static void t_stop(struct seq_file *m, void *p)
4322 {
4323 mutex_unlock(&trace_types_lock);
4324 }
4325
4326 static int t_show(struct seq_file *m, void *v)
4327 {
4328 struct tracer *t = v;
4329
4330 if (!t)
4331 return 0;
4332
4333 seq_puts(m, t->name);
4334 if (t->next)
4335 seq_putc(m, ' ');
4336 else
4337 seq_putc(m, '\n');
4338
4339 return 0;
4340 }
4341
4342 static const struct seq_operations show_traces_seq_ops = {
4343 .start = t_start,
4344 .next = t_next,
4345 .stop = t_stop,
4346 .show = t_show,
4347 };
4348
4349 static int show_traces_open(struct inode *inode, struct file *file)
4350 {
4351 struct trace_array *tr = inode->i_private;
4352 struct seq_file *m;
4353 int ret;
4354
4355 if (tracing_disabled)
4356 return -ENODEV;
4357
4358 if (trace_array_get(tr) < 0)
4359 return -ENODEV;
4360
4361 ret = seq_open(file, &show_traces_seq_ops);
4362 if (ret) {
4363 trace_array_put(tr);
4364 return ret;
4365 }
4366
4367 m = file->private_data;
4368 m->private = tr;
4369
4370 return 0;
4371 }
4372
4373 static int show_traces_release(struct inode *inode, struct file *file)
4374 {
4375 struct trace_array *tr = inode->i_private;
4376
4377 trace_array_put(tr);
4378 return seq_release(inode, file);
4379 }
4380
4381 static ssize_t
4382 tracing_write_stub(struct file *filp, const char __user *ubuf,
4383 size_t count, loff_t *ppos)
4384 {
4385 return count;
4386 }
4387
4388 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4389 {
4390 int ret;
4391
4392 if (file->f_mode & FMODE_READ)
4393 ret = seq_lseek(file, offset, whence);
4394 else
4395 file->f_pos = ret = 0;
4396
4397 return ret;
4398 }
4399
4400 static const struct file_operations tracing_fops = {
4401 .open = tracing_open,
4402 .read = seq_read,
4403 .write = tracing_write_stub,
4404 .llseek = tracing_lseek,
4405 .release = tracing_release,
4406 };
4407
4408 static const struct file_operations show_traces_fops = {
4409 .open = show_traces_open,
4410 .read = seq_read,
4411 .llseek = seq_lseek,
4412 .release = show_traces_release,
4413 };
4414
4415 static ssize_t
4416 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4417 size_t count, loff_t *ppos)
4418 {
4419 struct trace_array *tr = file_inode(filp)->i_private;
4420 char *mask_str;
4421 int len;
4422
4423 len = snprintf(NULL, 0, "%*pb\n",
4424 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4425 mask_str = kmalloc(len, GFP_KERNEL);
4426 if (!mask_str)
4427 return -ENOMEM;
4428
4429 len = snprintf(mask_str, len, "%*pb\n",
4430 cpumask_pr_args(tr->tracing_cpumask));
4431 if (len >= count) {
4432 count = -EINVAL;
4433 goto out_err;
4434 }
4435 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4436
4437 out_err:
4438 kfree(mask_str);
4439
4440 return count;
4441 }
4442
4443 static ssize_t
4444 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4445 size_t count, loff_t *ppos)
4446 {
4447 struct trace_array *tr = file_inode(filp)->i_private;
4448 cpumask_var_t tracing_cpumask_new;
4449 int err, cpu;
4450
4451 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4452 return -ENOMEM;
4453
4454 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4455 if (err)
4456 goto err_unlock;
4457
4458 local_irq_disable();
4459 arch_spin_lock(&tr->max_lock);
4460 for_each_tracing_cpu(cpu) {
4461 /*
4462 * Increase/decrease the disabled counter if we are
4463 * about to flip a bit in the cpumask:
4464 */
4465 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4466 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4467 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4468 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4469 }
4470 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4471 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4472 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4473 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4474 }
4475 }
4476 arch_spin_unlock(&tr->max_lock);
4477 local_irq_enable();
4478
4479 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4480 free_cpumask_var(tracing_cpumask_new);
4481
4482 return count;
4483
4484 err_unlock:
4485 free_cpumask_var(tracing_cpumask_new);
4486
4487 return err;
4488 }
4489
4490 static const struct file_operations tracing_cpumask_fops = {
4491 .open = tracing_open_generic_tr,
4492 .read = tracing_cpumask_read,
4493 .write = tracing_cpumask_write,
4494 .release = tracing_release_generic_tr,
4495 .llseek = generic_file_llseek,
4496 };
4497
4498 static int tracing_trace_options_show(struct seq_file *m, void *v)
4499 {
4500 struct tracer_opt *trace_opts;
4501 struct trace_array *tr = m->private;
4502 u32 tracer_flags;
4503 int i;
4504
4505 mutex_lock(&trace_types_lock);
4506 tracer_flags = tr->current_trace->flags->val;
4507 trace_opts = tr->current_trace->flags->opts;
4508
4509 for (i = 0; trace_options[i]; i++) {
4510 if (tr->trace_flags & (1 << i))
4511 seq_printf(m, "%s\n", trace_options[i]);
4512 else
4513 seq_printf(m, "no%s\n", trace_options[i]);
4514 }
4515
4516 for (i = 0; trace_opts[i].name; i++) {
4517 if (tracer_flags & trace_opts[i].bit)
4518 seq_printf(m, "%s\n", trace_opts[i].name);
4519 else
4520 seq_printf(m, "no%s\n", trace_opts[i].name);
4521 }
4522 mutex_unlock(&trace_types_lock);
4523
4524 return 0;
4525 }
4526
4527 static int __set_tracer_option(struct trace_array *tr,
4528 struct tracer_flags *tracer_flags,
4529 struct tracer_opt *opts, int neg)
4530 {
4531 struct tracer *trace = tracer_flags->trace;
4532 int ret;
4533
4534 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4535 if (ret)
4536 return ret;
4537
4538 if (neg)
4539 tracer_flags->val &= ~opts->bit;
4540 else
4541 tracer_flags->val |= opts->bit;
4542 return 0;
4543 }
4544
4545 /* Try to assign a tracer specific option */
4546 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4547 {
4548 struct tracer *trace = tr->current_trace;
4549 struct tracer_flags *tracer_flags = trace->flags;
4550 struct tracer_opt *opts = NULL;
4551 int i;
4552
4553 for (i = 0; tracer_flags->opts[i].name; i++) {
4554 opts = &tracer_flags->opts[i];
4555
4556 if (strcmp(cmp, opts->name) == 0)
4557 return __set_tracer_option(tr, trace->flags, opts, neg);
4558 }
4559
4560 return -EINVAL;
4561 }
4562
4563 /* Some tracers require overwrite to stay enabled */
4564 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4565 {
4566 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4567 return -1;
4568
4569 return 0;
4570 }
4571
4572 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4573 {
4574 /* do nothing if flag is already set */
4575 if (!!(tr->trace_flags & mask) == !!enabled)
4576 return 0;
4577
4578 /* Give the tracer a chance to approve the change */
4579 if (tr->current_trace->flag_changed)
4580 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4581 return -EINVAL;
4582
4583 if (enabled)
4584 tr->trace_flags |= mask;
4585 else
4586 tr->trace_flags &= ~mask;
4587
4588 if (mask == TRACE_ITER_RECORD_CMD)
4589 trace_event_enable_cmd_record(enabled);
4590
4591 if (mask == TRACE_ITER_RECORD_TGID) {
4592 if (!tgid_map)
4593 tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4594 sizeof(*tgid_map),
4595 GFP_KERNEL);
4596 if (!tgid_map) {
4597 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4598 return -ENOMEM;
4599 }
4600
4601 trace_event_enable_tgid_record(enabled);
4602 }
4603
4604 if (mask == TRACE_ITER_EVENT_FORK)
4605 trace_event_follow_fork(tr, enabled);
4606
4607 if (mask == TRACE_ITER_FUNC_FORK)
4608 ftrace_pid_follow_fork(tr, enabled);
4609
4610 if (mask == TRACE_ITER_OVERWRITE) {
4611 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4612 #ifdef CONFIG_TRACER_MAX_TRACE
4613 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4614 #endif
4615 }
4616
4617 if (mask == TRACE_ITER_PRINTK) {
4618 trace_printk_start_stop_comm(enabled);
4619 trace_printk_control(enabled);
4620 }
4621
4622 return 0;
4623 }
4624
4625 static int trace_set_options(struct trace_array *tr, char *option)
4626 {
4627 char *cmp;
4628 int neg = 0;
4629 int ret;
4630 size_t orig_len = strlen(option);
4631 int len;
4632
4633 cmp = strstrip(option);
4634
4635 len = str_has_prefix(cmp, "no");
4636 if (len)
4637 neg = 1;
4638
4639 cmp += len;
4640
4641 mutex_lock(&trace_types_lock);
4642
4643 ret = match_string(trace_options, -1, cmp);
4644 /* If no option could be set, test the specific tracer options */
4645 if (ret < 0)
4646 ret = set_tracer_option(tr, cmp, neg);
4647 else
4648 ret = set_tracer_flag(tr, 1 << ret, !neg);
4649
4650 mutex_unlock(&trace_types_lock);
4651
4652 /*
4653 * If the first trailing whitespace is replaced with '\0' by strstrip,
4654 * turn it back into a space.
4655 */
4656 if (orig_len > strlen(option))
4657 option[strlen(option)] = ' ';
4658
4659 return ret;
4660 }
4661
4662 static void __init apply_trace_boot_options(void)
4663 {
4664 char *buf = trace_boot_options_buf;
4665 char *option;
4666
4667 while (true) {
4668 option = strsep(&buf, ",");
4669
4670 if (!option)
4671 break;
4672
4673 if (*option)
4674 trace_set_options(&global_trace, option);
4675
4676 /* Put back the comma to allow this to be called again */
4677 if (buf)
4678 *(buf - 1) = ',';
4679 }
4680 }
4681
4682 static ssize_t
4683 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4684 size_t cnt, loff_t *ppos)
4685 {
4686 struct seq_file *m = filp->private_data;
4687 struct trace_array *tr = m->private;
4688 char buf[64];
4689 int ret;
4690
4691 if (cnt >= sizeof(buf))
4692 return -EINVAL;
4693
4694 if (copy_from_user(buf, ubuf, cnt))
4695 return -EFAULT;
4696
4697 buf[cnt] = 0;
4698
4699 ret = trace_set_options(tr, buf);
4700 if (ret < 0)
4701 return ret;
4702
4703 *ppos += cnt;
4704
4705 return cnt;
4706 }
4707
4708 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4709 {
4710 struct trace_array *tr = inode->i_private;
4711 int ret;
4712
4713 if (tracing_disabled)
4714 return -ENODEV;
4715
4716 if (trace_array_get(tr) < 0)
4717 return -ENODEV;
4718
4719 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4720 if (ret < 0)
4721 trace_array_put(tr);
4722
4723 return ret;
4724 }
4725
4726 static const struct file_operations tracing_iter_fops = {
4727 .open = tracing_trace_options_open,
4728 .read = seq_read,
4729 .llseek = seq_lseek,
4730 .release = tracing_single_release_tr,
4731 .write = tracing_trace_options_write,
4732 };
4733
4734 static const char readme_msg[] =
4735 "tracing mini-HOWTO:\n\n"
4736 "# echo 0 > tracing_on : quick way to disable tracing\n"
4737 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4738 " Important files:\n"
4739 " trace\t\t\t- The static contents of the buffer\n"
4740 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4741 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4742 " current_tracer\t- function and latency tracers\n"
4743 " available_tracers\t- list of configured tracers for current_tracer\n"
4744 " error_log\t- error log for failed commands (that support it)\n"
4745 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4746 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4747 " trace_clock\t\t-change the clock used to order events\n"
4748 " local: Per cpu clock but may not be synced across CPUs\n"
4749 " global: Synced across CPUs but slows tracing down.\n"
4750 " counter: Not a clock, but just an increment\n"
4751 " uptime: Jiffy counter from time of boot\n"
4752 " perf: Same clock that perf events use\n"
4753 #ifdef CONFIG_X86_64
4754 " x86-tsc: TSC cycle counter\n"
4755 #endif
4756 "\n timestamp_mode\t-view the mode used to timestamp events\n"
4757 " delta: Delta difference against a buffer-wide timestamp\n"
4758 " absolute: Absolute (standalone) timestamp\n"
4759 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4760 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4761 " tracing_cpumask\t- Limit which CPUs to trace\n"
4762 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4763 "\t\t\t Remove sub-buffer with rmdir\n"
4764 " trace_options\t\t- Set format or modify how tracing happens\n"
4765 "\t\t\t Disable an option by prefixing 'no' to the\n"
4766 "\t\t\t option name\n"
4767 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4768 #ifdef CONFIG_DYNAMIC_FTRACE
4769 "\n available_filter_functions - list of functions that can be filtered on\n"
4770 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4771 "\t\t\t functions\n"
4772 "\t accepts: func_full_name or glob-matching-pattern\n"
4773 "\t modules: Can select a group via module\n"
4774 "\t Format: :mod:<module-name>\n"
4775 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4776 "\t triggers: a command to perform when function is hit\n"
4777 "\t Format: <function>:<trigger>[:count]\n"
4778 "\t trigger: traceon, traceoff\n"
4779 "\t\t enable_event:<system>:<event>\n"
4780 "\t\t disable_event:<system>:<event>\n"
4781 #ifdef CONFIG_STACKTRACE
4782 "\t\t stacktrace\n"
4783 #endif
4784 #ifdef CONFIG_TRACER_SNAPSHOT
4785 "\t\t snapshot\n"
4786 #endif
4787 "\t\t dump\n"
4788 "\t\t cpudump\n"
4789 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4790 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4791 "\t The first one will disable tracing every time do_fault is hit\n"
4792 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4793 "\t The first time do trap is hit and it disables tracing, the\n"
4794 "\t counter will decrement to 2. If tracing is already disabled,\n"
4795 "\t the counter will not decrement. It only decrements when the\n"
4796 "\t trigger did work\n"
4797 "\t To remove trigger without count:\n"
4798 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4799 "\t To remove trigger with a count:\n"
4800 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4801 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4802 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4803 "\t modules: Can select a group via module command :mod:\n"
4804 "\t Does not accept triggers\n"
4805 #endif /* CONFIG_DYNAMIC_FTRACE */
4806 #ifdef CONFIG_FUNCTION_TRACER
4807 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4808 "\t\t (function)\n"
4809 #endif
4810 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4811 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4812 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4813 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4814 #endif
4815 #ifdef CONFIG_TRACER_SNAPSHOT
4816 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4817 "\t\t\t snapshot buffer. Read the contents for more\n"
4818 "\t\t\t information\n"
4819 #endif
4820 #ifdef CONFIG_STACK_TRACER
4821 " stack_trace\t\t- Shows the max stack trace when active\n"
4822 " stack_max_size\t- Shows current max stack size that was traced\n"
4823 "\t\t\t Write into this file to reset the max size (trigger a\n"
4824 "\t\t\t new trace)\n"
4825 #ifdef CONFIG_DYNAMIC_FTRACE
4826 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4827 "\t\t\t traces\n"
4828 #endif
4829 #endif /* CONFIG_STACK_TRACER */
4830 #ifdef CONFIG_DYNAMIC_EVENTS
4831 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4832 "\t\t\t Write into this file to define/undefine new trace events.\n"
4833 #endif
4834 #ifdef CONFIG_KPROBE_EVENTS
4835 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4836 "\t\t\t Write into this file to define/undefine new trace events.\n"
4837 #endif
4838 #ifdef CONFIG_UPROBE_EVENTS
4839 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4840 "\t\t\t Write into this file to define/undefine new trace events.\n"
4841 #endif
4842 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4843 "\t accepts: event-definitions (one definition per line)\n"
4844 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
4845 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4846 #ifdef CONFIG_HIST_TRIGGERS
4847 "\t s:[synthetic/]<event> <field> [<field>]\n"
4848 #endif
4849 "\t -:[<group>/]<event>\n"
4850 #ifdef CONFIG_KPROBE_EVENTS
4851 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4852 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4853 #endif
4854 #ifdef CONFIG_UPROBE_EVENTS
4855 " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4856 #endif
4857 "\t args: <name>=fetcharg[:type]\n"
4858 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4859 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4860 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4861 #else
4862 "\t $stack<index>, $stack, $retval, $comm,\n"
4863 #endif
4864 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4865 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4866 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4867 "\t <type>\\[<array-size>\\]\n"
4868 #ifdef CONFIG_HIST_TRIGGERS
4869 "\t field: <stype> <name>;\n"
4870 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4871 "\t [unsigned] char/int/long\n"
4872 #endif
4873 #endif
4874 " events/\t\t- Directory containing all trace event subsystems:\n"
4875 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4876 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4877 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4878 "\t\t\t events\n"
4879 " filter\t\t- If set, only events passing filter are traced\n"
4880 " events/<system>/<event>/\t- Directory containing control files for\n"
4881 "\t\t\t <event>:\n"
4882 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4883 " filter\t\t- If set, only events passing filter are traced\n"
4884 " trigger\t\t- If set, a command to perform when event is hit\n"
4885 "\t Format: <trigger>[:count][if <filter>]\n"
4886 "\t trigger: traceon, traceoff\n"
4887 "\t enable_event:<system>:<event>\n"
4888 "\t disable_event:<system>:<event>\n"
4889 #ifdef CONFIG_HIST_TRIGGERS
4890 "\t enable_hist:<system>:<event>\n"
4891 "\t disable_hist:<system>:<event>\n"
4892 #endif
4893 #ifdef CONFIG_STACKTRACE
4894 "\t\t stacktrace\n"
4895 #endif
4896 #ifdef CONFIG_TRACER_SNAPSHOT
4897 "\t\t snapshot\n"
4898 #endif
4899 #ifdef CONFIG_HIST_TRIGGERS
4900 "\t\t hist (see below)\n"
4901 #endif
4902 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4903 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4904 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4905 "\t events/block/block_unplug/trigger\n"
4906 "\t The first disables tracing every time block_unplug is hit.\n"
4907 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4908 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4909 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4910 "\t Like function triggers, the counter is only decremented if it\n"
4911 "\t enabled or disabled tracing.\n"
4912 "\t To remove a trigger without a count:\n"
4913 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4914 "\t To remove a trigger with a count:\n"
4915 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4916 "\t Filters can be ignored when removing a trigger.\n"
4917 #ifdef CONFIG_HIST_TRIGGERS
4918 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4919 "\t Format: hist:keys=<field1[,field2,...]>\n"
4920 "\t [:values=<field1[,field2,...]>]\n"
4921 "\t [:sort=<field1[,field2,...]>]\n"
4922 "\t [:size=#entries]\n"
4923 "\t [:pause][:continue][:clear]\n"
4924 "\t [:name=histname1]\n"
4925 "\t [:<handler>.<action>]\n"
4926 "\t [if <filter>]\n\n"
4927 "\t When a matching event is hit, an entry is added to a hash\n"
4928 "\t table using the key(s) and value(s) named, and the value of a\n"
4929 "\t sum called 'hitcount' is incremented. Keys and values\n"
4930 "\t correspond to fields in the event's format description. Keys\n"
4931 "\t can be any field, or the special string 'stacktrace'.\n"
4932 "\t Compound keys consisting of up to two fields can be specified\n"
4933 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4934 "\t fields. Sort keys consisting of up to two fields can be\n"
4935 "\t specified using the 'sort' keyword. The sort direction can\n"
4936 "\t be modified by appending '.descending' or '.ascending' to a\n"
4937 "\t sort field. The 'size' parameter can be used to specify more\n"
4938 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4939 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4940 "\t its histogram data will be shared with other triggers of the\n"
4941 "\t same name, and trigger hits will update this common data.\n\n"
4942 "\t Reading the 'hist' file for the event will dump the hash\n"
4943 "\t table in its entirety to stdout. If there are multiple hist\n"
4944 "\t triggers attached to an event, there will be a table for each\n"
4945 "\t trigger in the output. The table displayed for a named\n"
4946 "\t trigger will be the same as any other instance having the\n"
4947 "\t same name. The default format used to display a given field\n"
4948 "\t can be modified by appending any of the following modifiers\n"
4949 "\t to the field name, as applicable:\n\n"
4950 "\t .hex display a number as a hex value\n"
4951 "\t .sym display an address as a symbol\n"
4952 "\t .sym-offset display an address as a symbol and offset\n"
4953 "\t .execname display a common_pid as a program name\n"
4954 "\t .syscall display a syscall id as a syscall name\n"
4955 "\t .log2 display log2 value rather than raw number\n"
4956 "\t .usecs display a common_timestamp in microseconds\n\n"
4957 "\t The 'pause' parameter can be used to pause an existing hist\n"
4958 "\t trigger or to start a hist trigger but not log any events\n"
4959 "\t until told to do so. 'continue' can be used to start or\n"
4960 "\t restart a paused hist trigger.\n\n"
4961 "\t The 'clear' parameter will clear the contents of a running\n"
4962 "\t hist trigger and leave its current paused/active state\n"
4963 "\t unchanged.\n\n"
4964 "\t The enable_hist and disable_hist triggers can be used to\n"
4965 "\t have one event conditionally start and stop another event's\n"
4966 "\t already-attached hist trigger. The syntax is analogous to\n"
4967 "\t the enable_event and disable_event triggers.\n\n"
4968 "\t Hist trigger handlers and actions are executed whenever a\n"
4969 "\t a histogram entry is added or updated. They take the form:\n\n"
4970 "\t <handler>.<action>\n\n"
4971 "\t The available handlers are:\n\n"
4972 "\t onmatch(matching.event) - invoke on addition or update\n"
4973 "\t onmax(var) - invoke if var exceeds current max\n"
4974 "\t onchange(var) - invoke action if var changes\n\n"
4975 "\t The available actions are:\n\n"
4976 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
4977 "\t save(field,...) - save current event fields\n"
4978 #ifdef CONFIG_TRACER_SNAPSHOT
4979 "\t snapshot() - snapshot the trace buffer\n"
4980 #endif
4981 #endif
4982 ;
4983
4984 static ssize_t
4985 tracing_readme_read(struct file *filp, char __user *ubuf,
4986 size_t cnt, loff_t *ppos)
4987 {
4988 return simple_read_from_buffer(ubuf, cnt, ppos,
4989 readme_msg, strlen(readme_msg));
4990 }
4991
4992 static const struct file_operations tracing_readme_fops = {
4993 .open = tracing_open_generic,
4994 .read = tracing_readme_read,
4995 .llseek = generic_file_llseek,
4996 };
4997
4998 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4999 {
5000 int *ptr = v;
5001
5002 if (*pos || m->count)
5003 ptr++;
5004
5005 (*pos)++;
5006
5007 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5008 if (trace_find_tgid(*ptr))
5009 return ptr;
5010 }
5011
5012 return NULL;
5013 }
5014
5015 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5016 {
5017 void *v;
5018 loff_t l = 0;
5019
5020 if (!tgid_map)
5021 return NULL;
5022
5023 v = &tgid_map[0];
5024 while (l <= *pos) {
5025 v = saved_tgids_next(m, v, &l);
5026 if (!v)
5027 return NULL;
5028 }
5029
5030 return v;
5031 }
5032
5033 static void saved_tgids_stop(struct seq_file *m, void *v)
5034 {
5035 }
5036
5037 static int saved_tgids_show(struct seq_file *m, void *v)
5038 {
5039 int pid = (int *)v - tgid_map;
5040
5041 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5042 return 0;
5043 }
5044
5045 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5046 .start = saved_tgids_start,
5047 .stop = saved_tgids_stop,
5048 .next = saved_tgids_next,
5049 .show = saved_tgids_show,
5050 };
5051
5052 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5053 {
5054 if (tracing_disabled)
5055 return -ENODEV;
5056
5057 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5058 }
5059
5060
5061 static const struct file_operations tracing_saved_tgids_fops = {
5062 .open = tracing_saved_tgids_open,
5063 .read = seq_read,
5064 .llseek = seq_lseek,
5065 .release = seq_release,
5066 };
5067
5068 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5069 {
5070 unsigned int *ptr = v;
5071
5072 if (*pos || m->count)
5073 ptr++;
5074
5075 (*pos)++;
5076
5077 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5078 ptr++) {
5079 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5080 continue;
5081
5082 return ptr;
5083 }
5084
5085 return NULL;
5086 }
5087
5088 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5089 {
5090 void *v;
5091 loff_t l = 0;
5092
5093 preempt_disable();
5094 arch_spin_lock(&trace_cmdline_lock);
5095
5096 v = &savedcmd->map_cmdline_to_pid[0];
5097 while (l <= *pos) {
5098 v = saved_cmdlines_next(m, v, &l);
5099 if (!v)
5100 return NULL;
5101 }
5102
5103 return v;
5104 }
5105
5106 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5107 {
5108 arch_spin_unlock(&trace_cmdline_lock);
5109 preempt_enable();
5110 }
5111
5112 static int saved_cmdlines_show(struct seq_file *m, void *v)
5113 {
5114 char buf[TASK_COMM_LEN];
5115 unsigned int *pid = v;
5116
5117 __trace_find_cmdline(*pid, buf);
5118 seq_printf(m, "%d %s\n", *pid, buf);
5119 return 0;
5120 }
5121
5122 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5123 .start = saved_cmdlines_start,
5124 .next = saved_cmdlines_next,
5125 .stop = saved_cmdlines_stop,
5126 .show = saved_cmdlines_show,
5127 };
5128
5129 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5130 {
5131 if (tracing_disabled)
5132 return -ENODEV;
5133
5134 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5135 }
5136
5137 static const struct file_operations tracing_saved_cmdlines_fops = {
5138 .open = tracing_saved_cmdlines_open,
5139 .read = seq_read,
5140 .llseek = seq_lseek,
5141 .release = seq_release,
5142 };
5143
5144 static ssize_t
5145 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5146 size_t cnt, loff_t *ppos)
5147 {
5148 char buf[64];
5149 int r;
5150
5151 arch_spin_lock(&trace_cmdline_lock);
5152 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5153 arch_spin_unlock(&trace_cmdline_lock);
5154
5155 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5156 }
5157
5158 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5159 {
5160 kfree(s->saved_cmdlines);
5161 kfree(s->map_cmdline_to_pid);
5162 kfree(s);
5163 }
5164
5165 static int tracing_resize_saved_cmdlines(unsigned int val)
5166 {
5167 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5168
5169 s = kmalloc(sizeof(*s), GFP_KERNEL);
5170 if (!s)
5171 return -ENOMEM;
5172
5173 if (allocate_cmdlines_buffer(val, s) < 0) {
5174 kfree(s);
5175 return -ENOMEM;
5176 }
5177
5178 arch_spin_lock(&trace_cmdline_lock);
5179 savedcmd_temp = savedcmd;
5180 savedcmd = s;
5181 arch_spin_unlock(&trace_cmdline_lock);
5182 free_saved_cmdlines_buffer(savedcmd_temp);
5183
5184 return 0;
5185 }
5186
5187 static ssize_t
5188 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5189 size_t cnt, loff_t *ppos)
5190 {
5191 unsigned long val;
5192 int ret;
5193
5194 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5195 if (ret)
5196 return ret;
5197
5198 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5199 if (!val || val > PID_MAX_DEFAULT)
5200 return -EINVAL;
5201
5202 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5203 if (ret < 0)
5204 return ret;
5205
5206 *ppos += cnt;
5207
5208 return cnt;
5209 }
5210
5211 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5212 .open = tracing_open_generic,
5213 .read = tracing_saved_cmdlines_size_read,
5214 .write = tracing_saved_cmdlines_size_write,
5215 };
5216
5217 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5218 static union trace_eval_map_item *
5219 update_eval_map(union trace_eval_map_item *ptr)
5220 {
5221 if (!ptr->map.eval_string) {
5222 if (ptr->tail.next) {
5223 ptr = ptr->tail.next;
5224 /* Set ptr to the next real item (skip head) */
5225 ptr++;
5226 } else
5227 return NULL;
5228 }
5229 return ptr;
5230 }
5231
5232 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5233 {
5234 union trace_eval_map_item *ptr = v;
5235
5236 /*
5237 * Paranoid! If ptr points to end, we don't want to increment past it.
5238 * This really should never happen.
5239 */
5240 ptr = update_eval_map(ptr);
5241 if (WARN_ON_ONCE(!ptr))
5242 return NULL;
5243
5244 ptr++;
5245
5246 (*pos)++;
5247
5248 ptr = update_eval_map(ptr);
5249
5250 return ptr;
5251 }
5252
5253 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5254 {
5255 union trace_eval_map_item *v;
5256 loff_t l = 0;
5257
5258 mutex_lock(&trace_eval_mutex);
5259
5260 v = trace_eval_maps;
5261 if (v)
5262 v++;
5263
5264 while (v && l < *pos) {
5265 v = eval_map_next(m, v, &l);
5266 }
5267
5268 return v;
5269 }
5270
5271 static void eval_map_stop(struct seq_file *m, void *v)
5272 {
5273 mutex_unlock(&trace_eval_mutex);
5274 }
5275
5276 static int eval_map_show(struct seq_file *m, void *v)
5277 {
5278 union trace_eval_map_item *ptr = v;
5279
5280 seq_printf(m, "%s %ld (%s)\n",
5281 ptr->map.eval_string, ptr->map.eval_value,
5282 ptr->map.system);
5283
5284 return 0;
5285 }
5286
5287 static const struct seq_operations tracing_eval_map_seq_ops = {
5288 .start = eval_map_start,
5289 .next = eval_map_next,
5290 .stop = eval_map_stop,
5291 .show = eval_map_show,
5292 };
5293
5294 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5295 {
5296 if (tracing_disabled)
5297 return -ENODEV;
5298
5299 return seq_open(filp, &tracing_eval_map_seq_ops);
5300 }
5301
5302 static const struct file_operations tracing_eval_map_fops = {
5303 .open = tracing_eval_map_open,
5304 .read = seq_read,
5305 .llseek = seq_lseek,
5306 .release = seq_release,
5307 };
5308
5309 static inline union trace_eval_map_item *
5310 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5311 {
5312 /* Return tail of array given the head */
5313 return ptr + ptr->head.length + 1;
5314 }
5315
5316 static void
5317 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5318 int len)
5319 {
5320 struct trace_eval_map **stop;
5321 struct trace_eval_map **map;
5322 union trace_eval_map_item *map_array;
5323 union trace_eval_map_item *ptr;
5324
5325 stop = start + len;
5326
5327 /*
5328 * The trace_eval_maps contains the map plus a head and tail item,
5329 * where the head holds the module and length of array, and the
5330 * tail holds a pointer to the next list.
5331 */
5332 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5333 if (!map_array) {
5334 pr_warn("Unable to allocate trace eval mapping\n");
5335 return;
5336 }
5337
5338 mutex_lock(&trace_eval_mutex);
5339
5340 if (!trace_eval_maps)
5341 trace_eval_maps = map_array;
5342 else {
5343 ptr = trace_eval_maps;
5344 for (;;) {
5345 ptr = trace_eval_jmp_to_tail(ptr);
5346 if (!ptr->tail.next)
5347 break;
5348 ptr = ptr->tail.next;
5349
5350 }
5351 ptr->tail.next = map_array;
5352 }
5353 map_array->head.mod = mod;
5354 map_array->head.length = len;
5355 map_array++;
5356
5357 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5358 map_array->map = **map;
5359 map_array++;
5360 }
5361 memset(map_array, 0, sizeof(*map_array));
5362
5363 mutex_unlock(&trace_eval_mutex);
5364 }
5365
5366 static void trace_create_eval_file(struct dentry *d_tracer)
5367 {
5368 trace_create_file("eval_map", 0444, d_tracer,
5369 NULL, &tracing_eval_map_fops);
5370 }
5371
5372 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5373 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5374 static inline void trace_insert_eval_map_file(struct module *mod,
5375 struct trace_eval_map **start, int len) { }
5376 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5377
5378 static void trace_insert_eval_map(struct module *mod,
5379 struct trace_eval_map **start, int len)
5380 {
5381 struct trace_eval_map **map;
5382
5383 if (len <= 0)
5384 return;
5385
5386 map = start;
5387
5388 trace_event_eval_update(map, len);
5389
5390 trace_insert_eval_map_file(mod, start, len);
5391 }
5392
5393 static ssize_t
5394 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5395 size_t cnt, loff_t *ppos)
5396 {
5397 struct trace_array *tr = filp->private_data;
5398 char buf[MAX_TRACER_SIZE+2];
5399 int r;
5400
5401 mutex_lock(&trace_types_lock);
5402 r = sprintf(buf, "%s\n", tr->current_trace->name);
5403 mutex_unlock(&trace_types_lock);
5404
5405 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5406 }
5407
5408 int tracer_init(struct tracer *t, struct trace_array *tr)
5409 {
5410 tracing_reset_online_cpus(&tr->trace_buffer);
5411 return t->init(tr);
5412 }
5413
5414 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5415 {
5416 int cpu;
5417
5418 for_each_tracing_cpu(cpu)
5419 per_cpu_ptr(buf->data, cpu)->entries = val;
5420 }
5421
5422 #ifdef CONFIG_TRACER_MAX_TRACE
5423 /* resize @tr's buffer to the size of @size_tr's entries */
5424 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5425 struct trace_buffer *size_buf, int cpu_id)
5426 {
5427 int cpu, ret = 0;
5428
5429 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5430 for_each_tracing_cpu(cpu) {
5431 ret = ring_buffer_resize(trace_buf->buffer,
5432 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5433 if (ret < 0)
5434 break;
5435 per_cpu_ptr(trace_buf->data, cpu)->entries =
5436 per_cpu_ptr(size_buf->data, cpu)->entries;
5437 }
5438 } else {
5439 ret = ring_buffer_resize(trace_buf->buffer,
5440 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5441 if (ret == 0)
5442 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5443 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5444 }
5445
5446 return ret;
5447 }
5448 #endif /* CONFIG_TRACER_MAX_TRACE */
5449
5450 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5451 unsigned long size, int cpu)
5452 {
5453 int ret;
5454
5455 /*
5456 * If kernel or user changes the size of the ring buffer
5457 * we use the size that was given, and we can forget about
5458 * expanding it later.
5459 */
5460 ring_buffer_expanded = true;
5461
5462 /* May be called before buffers are initialized */
5463 if (!tr->trace_buffer.buffer)
5464 return 0;
5465
5466 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5467 if (ret < 0)
5468 return ret;
5469
5470 #ifdef CONFIG_TRACER_MAX_TRACE
5471 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5472 !tr->current_trace->use_max_tr)
5473 goto out;
5474
5475 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5476 if (ret < 0) {
5477 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5478 &tr->trace_buffer, cpu);
5479 if (r < 0) {
5480 /*
5481 * AARGH! We are left with different
5482 * size max buffer!!!!
5483 * The max buffer is our "snapshot" buffer.
5484 * When a tracer needs a snapshot (one of the
5485 * latency tracers), it swaps the max buffer
5486 * with the saved snap shot. We succeeded to
5487 * update the size of the main buffer, but failed to
5488 * update the size of the max buffer. But when we tried
5489 * to reset the main buffer to the original size, we
5490 * failed there too. This is very unlikely to
5491 * happen, but if it does, warn and kill all
5492 * tracing.
5493 */
5494 WARN_ON(1);
5495 tracing_disabled = 1;
5496 }
5497 return ret;
5498 }
5499
5500 if (cpu == RING_BUFFER_ALL_CPUS)
5501 set_buffer_entries(&tr->max_buffer, size);
5502 else
5503 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5504
5505 out:
5506 #endif /* CONFIG_TRACER_MAX_TRACE */
5507
5508 if (cpu == RING_BUFFER_ALL_CPUS)
5509 set_buffer_entries(&tr->trace_buffer, size);
5510 else
5511 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5512
5513 return ret;
5514 }
5515
5516 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5517 unsigned long size, int cpu_id)
5518 {
5519 int ret = size;
5520
5521 mutex_lock(&trace_types_lock);
5522
5523 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5524 /* make sure, this cpu is enabled in the mask */
5525 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5526 ret = -EINVAL;
5527 goto out;
5528 }
5529 }
5530
5531 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5532 if (ret < 0)
5533 ret = -ENOMEM;
5534
5535 out:
5536 mutex_unlock(&trace_types_lock);
5537
5538 return ret;
5539 }
5540
5541
5542 /**
5543 * tracing_update_buffers - used by tracing facility to expand ring buffers
5544 *
5545 * To save on memory when the tracing is never used on a system with it
5546 * configured in. The ring buffers are set to a minimum size. But once
5547 * a user starts to use the tracing facility, then they need to grow
5548 * to their default size.
5549 *
5550 * This function is to be called when a tracer is about to be used.
5551 */
5552 int tracing_update_buffers(void)
5553 {
5554 int ret = 0;
5555
5556 mutex_lock(&trace_types_lock);
5557 if (!ring_buffer_expanded)
5558 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5559 RING_BUFFER_ALL_CPUS);
5560 mutex_unlock(&trace_types_lock);
5561
5562 return ret;
5563 }
5564
5565 struct trace_option_dentry;
5566
5567 static void
5568 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5569
5570 /*
5571 * Used to clear out the tracer before deletion of an instance.
5572 * Must have trace_types_lock held.
5573 */
5574 static void tracing_set_nop(struct trace_array *tr)
5575 {
5576 if (tr->current_trace == &nop_trace)
5577 return;
5578
5579 tr->current_trace->enabled--;
5580
5581 if (tr->current_trace->reset)
5582 tr->current_trace->reset(tr);
5583
5584 tr->current_trace = &nop_trace;
5585 }
5586
5587 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5588 {
5589 /* Only enable if the directory has been created already. */
5590 if (!tr->dir)
5591 return;
5592
5593 create_trace_option_files(tr, t);
5594 }
5595
5596 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5597 {
5598 struct tracer *t;
5599 #ifdef CONFIG_TRACER_MAX_TRACE
5600 bool had_max_tr;
5601 #endif
5602 int ret = 0;
5603
5604 mutex_lock(&trace_types_lock);
5605
5606 if (!ring_buffer_expanded) {
5607 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5608 RING_BUFFER_ALL_CPUS);
5609 if (ret < 0)
5610 goto out;
5611 ret = 0;
5612 }
5613
5614 for (t = trace_types; t; t = t->next) {
5615 if (strcmp(t->name, buf) == 0)
5616 break;
5617 }
5618 if (!t) {
5619 ret = -EINVAL;
5620 goto out;
5621 }
5622 if (t == tr->current_trace)
5623 goto out;
5624
5625 #ifdef CONFIG_TRACER_SNAPSHOT
5626 if (t->use_max_tr) {
5627 arch_spin_lock(&tr->max_lock);
5628 if (tr->cond_snapshot)
5629 ret = -EBUSY;
5630 arch_spin_unlock(&tr->max_lock);
5631 if (ret)
5632 goto out;
5633 }
5634 #endif
5635 /* Some tracers won't work on kernel command line */
5636 if (system_state < SYSTEM_RUNNING && t->noboot) {
5637 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5638 t->name);
5639 goto out;
5640 }
5641
5642 /* Some tracers are only allowed for the top level buffer */
5643 if (!trace_ok_for_array(t, tr)) {
5644 ret = -EINVAL;
5645 goto out;
5646 }
5647
5648 /* If trace pipe files are being read, we can't change the tracer */
5649 if (tr->current_trace->ref) {
5650 ret = -EBUSY;
5651 goto out;
5652 }
5653
5654 trace_branch_disable();
5655
5656 tr->current_trace->enabled--;
5657
5658 if (tr->current_trace->reset)
5659 tr->current_trace->reset(tr);
5660
5661 /* Current trace needs to be nop_trace before synchronize_rcu */
5662 tr->current_trace = &nop_trace;
5663
5664 #ifdef CONFIG_TRACER_MAX_TRACE
5665 had_max_tr = tr->allocated_snapshot;
5666
5667 if (had_max_tr && !t->use_max_tr) {
5668 /*
5669 * We need to make sure that the update_max_tr sees that
5670 * current_trace changed to nop_trace to keep it from
5671 * swapping the buffers after we resize it.
5672 * The update_max_tr is called from interrupts disabled
5673 * so a synchronized_sched() is sufficient.
5674 */
5675 synchronize_rcu();
5676 free_snapshot(tr);
5677 }
5678 #endif
5679
5680 #ifdef CONFIG_TRACER_MAX_TRACE
5681 if (t->use_max_tr && !had_max_tr) {
5682 ret = tracing_alloc_snapshot_instance(tr);
5683 if (ret < 0)
5684 goto out;
5685 }
5686 #endif
5687
5688 if (t->init) {
5689 ret = tracer_init(t, tr);
5690 if (ret)
5691 goto out;
5692 }
5693
5694 tr->current_trace = t;
5695 tr->current_trace->enabled++;
5696 trace_branch_enable(tr);
5697 out:
5698 mutex_unlock(&trace_types_lock);
5699
5700 return ret;
5701 }
5702
5703 static ssize_t
5704 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5705 size_t cnt, loff_t *ppos)
5706 {
5707 struct trace_array *tr = filp->private_data;
5708 char buf[MAX_TRACER_SIZE+1];
5709 int i;
5710 size_t ret;
5711 int err;
5712
5713 ret = cnt;
5714
5715 if (cnt > MAX_TRACER_SIZE)
5716 cnt = MAX_TRACER_SIZE;
5717
5718 if (copy_from_user(buf, ubuf, cnt))
5719 return -EFAULT;
5720
5721 buf[cnt] = 0;
5722
5723 /* strip ending whitespace. */
5724 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5725 buf[i] = 0;
5726
5727 err = tracing_set_tracer(tr, buf);
5728 if (err)
5729 return err;
5730
5731 *ppos += ret;
5732
5733 return ret;
5734 }
5735
5736 static ssize_t
5737 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5738 size_t cnt, loff_t *ppos)
5739 {
5740 char buf[64];
5741 int r;
5742
5743 r = snprintf(buf, sizeof(buf), "%ld\n",
5744 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5745 if (r > sizeof(buf))
5746 r = sizeof(buf);
5747 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5748 }
5749
5750 static ssize_t
5751 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5752 size_t cnt, loff_t *ppos)
5753 {
5754 unsigned long val;
5755 int ret;
5756
5757 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5758 if (ret)
5759 return ret;
5760
5761 *ptr = val * 1000;
5762
5763 return cnt;
5764 }
5765
5766 static ssize_t
5767 tracing_thresh_read(struct file *filp, char __user *ubuf,
5768 size_t cnt, loff_t *ppos)
5769 {
5770 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5771 }
5772
5773 static ssize_t
5774 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5775 size_t cnt, loff_t *ppos)
5776 {
5777 struct trace_array *tr = filp->private_data;
5778 int ret;
5779
5780 mutex_lock(&trace_types_lock);
5781 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5782 if (ret < 0)
5783 goto out;
5784
5785 if (tr->current_trace->update_thresh) {
5786 ret = tr->current_trace->update_thresh(tr);
5787 if (ret < 0)
5788 goto out;
5789 }
5790
5791 ret = cnt;
5792 out:
5793 mutex_unlock(&trace_types_lock);
5794
5795 return ret;
5796 }
5797
5798 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5799
5800 static ssize_t
5801 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5802 size_t cnt, loff_t *ppos)
5803 {
5804 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5805 }
5806
5807 static ssize_t
5808 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5809 size_t cnt, loff_t *ppos)
5810 {
5811 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5812 }
5813
5814 #endif
5815
5816 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5817 {
5818 struct trace_array *tr = inode->i_private;
5819 struct trace_iterator *iter;
5820 int ret = 0;
5821
5822 if (tracing_disabled)
5823 return -ENODEV;
5824
5825 if (trace_array_get(tr) < 0)
5826 return -ENODEV;
5827
5828 mutex_lock(&trace_types_lock);
5829
5830 /* create a buffer to store the information to pass to userspace */
5831 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5832 if (!iter) {
5833 ret = -ENOMEM;
5834 __trace_array_put(tr);
5835 goto out;
5836 }
5837
5838 trace_seq_init(&iter->seq);
5839 iter->trace = tr->current_trace;
5840
5841 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5842 ret = -ENOMEM;
5843 goto fail;
5844 }
5845
5846 /* trace pipe does not show start of buffer */
5847 cpumask_setall(iter->started);
5848
5849 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5850 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5851
5852 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5853 if (trace_clocks[tr->clock_id].in_ns)
5854 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5855
5856 iter->tr = tr;
5857 iter->trace_buffer = &tr->trace_buffer;
5858 iter->cpu_file = tracing_get_cpu(inode);
5859 mutex_init(&iter->mutex);
5860 filp->private_data = iter;
5861
5862 if (iter->trace->pipe_open)
5863 iter->trace->pipe_open(iter);
5864
5865 nonseekable_open(inode, filp);
5866
5867 tr->current_trace->ref++;
5868 out:
5869 mutex_unlock(&trace_types_lock);
5870 return ret;
5871
5872 fail:
5873 kfree(iter);
5874 __trace_array_put(tr);
5875 mutex_unlock(&trace_types_lock);
5876 return ret;
5877 }
5878
5879 static int tracing_release_pipe(struct inode *inode, struct file *file)
5880 {
5881 struct trace_iterator *iter = file->private_data;
5882 struct trace_array *tr = inode->i_private;
5883
5884 mutex_lock(&trace_types_lock);
5885
5886 tr->current_trace->ref--;
5887
5888 if (iter->trace->pipe_close)
5889 iter->trace->pipe_close(iter);
5890
5891 mutex_unlock(&trace_types_lock);
5892
5893 free_cpumask_var(iter->started);
5894 mutex_destroy(&iter->mutex);
5895 kfree(iter);
5896
5897 trace_array_put(tr);
5898
5899 return 0;
5900 }
5901
5902 static __poll_t
5903 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5904 {
5905 struct trace_array *tr = iter->tr;
5906
5907 /* Iterators are static, they should be filled or empty */
5908 if (trace_buffer_iter(iter, iter->cpu_file))
5909 return EPOLLIN | EPOLLRDNORM;
5910
5911 if (tr->trace_flags & TRACE_ITER_BLOCK)
5912 /*
5913 * Always select as readable when in blocking mode
5914 */
5915 return EPOLLIN | EPOLLRDNORM;
5916 else
5917 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5918 filp, poll_table);
5919 }
5920
5921 static __poll_t
5922 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5923 {
5924 struct trace_iterator *iter = filp->private_data;
5925
5926 return trace_poll(iter, filp, poll_table);
5927 }
5928
5929 /* Must be called with iter->mutex held. */
5930 static int tracing_wait_pipe(struct file *filp)
5931 {
5932 struct trace_iterator *iter = filp->private_data;
5933 int ret;
5934
5935 while (trace_empty(iter)) {
5936
5937 if ((filp->f_flags & O_NONBLOCK)) {
5938 return -EAGAIN;
5939 }
5940
5941 /*
5942 * We block until we read something and tracing is disabled.
5943 * We still block if tracing is disabled, but we have never
5944 * read anything. This allows a user to cat this file, and
5945 * then enable tracing. But after we have read something,
5946 * we give an EOF when tracing is again disabled.
5947 *
5948 * iter->pos will be 0 if we haven't read anything.
5949 */
5950 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5951 break;
5952
5953 mutex_unlock(&iter->mutex);
5954
5955 ret = wait_on_pipe(iter, 0);
5956
5957 mutex_lock(&iter->mutex);
5958
5959 if (ret)
5960 return ret;
5961 }
5962
5963 return 1;
5964 }
5965
5966 /*
5967 * Consumer reader.
5968 */
5969 static ssize_t
5970 tracing_read_pipe(struct file *filp, char __user *ubuf,
5971 size_t cnt, loff_t *ppos)
5972 {
5973 struct trace_iterator *iter = filp->private_data;
5974 ssize_t sret;
5975
5976 /*
5977 * Avoid more than one consumer on a single file descriptor
5978 * This is just a matter of traces coherency, the ring buffer itself
5979 * is protected.
5980 */
5981 mutex_lock(&iter->mutex);
5982
5983 /* return any leftover data */
5984 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5985 if (sret != -EBUSY)
5986 goto out;
5987
5988 trace_seq_init(&iter->seq);
5989
5990 if (iter->trace->read) {
5991 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5992 if (sret)
5993 goto out;
5994 }
5995
5996 waitagain:
5997 sret = tracing_wait_pipe(filp);
5998 if (sret <= 0)
5999 goto out;
6000
6001 /* stop when tracing is finished */
6002 if (trace_empty(iter)) {
6003 sret = 0;
6004 goto out;
6005 }
6006
6007 if (cnt >= PAGE_SIZE)
6008 cnt = PAGE_SIZE - 1;
6009
6010 /* reset all but tr, trace, and overruns */
6011 memset(&iter->seq, 0,
6012 sizeof(struct trace_iterator) -
6013 offsetof(struct trace_iterator, seq));
6014 cpumask_clear(iter->started);
6015 iter->pos = -1;
6016
6017 trace_event_read_lock();
6018 trace_access_lock(iter->cpu_file);
6019 while (trace_find_next_entry_inc(iter) != NULL) {
6020 enum print_line_t ret;
6021 int save_len = iter->seq.seq.len;
6022
6023 ret = print_trace_line(iter);
6024 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6025 /* don't print partial lines */
6026 iter->seq.seq.len = save_len;
6027 break;
6028 }
6029 if (ret != TRACE_TYPE_NO_CONSUME)
6030 trace_consume(iter);
6031
6032 if (trace_seq_used(&iter->seq) >= cnt)
6033 break;
6034
6035 /*
6036 * Setting the full flag means we reached the trace_seq buffer
6037 * size and we should leave by partial output condition above.
6038 * One of the trace_seq_* functions is not used properly.
6039 */
6040 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6041 iter->ent->type);
6042 }
6043 trace_access_unlock(iter->cpu_file);
6044 trace_event_read_unlock();
6045
6046 /* Now copy what we have to the user */
6047 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6048 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6049 trace_seq_init(&iter->seq);
6050
6051 /*
6052 * If there was nothing to send to user, in spite of consuming trace
6053 * entries, go back to wait for more entries.
6054 */
6055 if (sret == -EBUSY)
6056 goto waitagain;
6057
6058 out:
6059 mutex_unlock(&iter->mutex);
6060
6061 return sret;
6062 }
6063
6064 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6065 unsigned int idx)
6066 {
6067 __free_page(spd->pages[idx]);
6068 }
6069
6070 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6071 .confirm = generic_pipe_buf_confirm,
6072 .release = generic_pipe_buf_release,
6073 .steal = generic_pipe_buf_steal,
6074 .get = generic_pipe_buf_get,
6075 };
6076
6077 static size_t
6078 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6079 {
6080 size_t count;
6081 int save_len;
6082 int ret;
6083
6084 /* Seq buffer is page-sized, exactly what we need. */
6085 for (;;) {
6086 save_len = iter->seq.seq.len;
6087 ret = print_trace_line(iter);
6088
6089 if (trace_seq_has_overflowed(&iter->seq)) {
6090 iter->seq.seq.len = save_len;
6091 break;
6092 }
6093
6094 /*
6095 * This should not be hit, because it should only
6096 * be set if the iter->seq overflowed. But check it
6097 * anyway to be safe.
6098 */
6099 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6100 iter->seq.seq.len = save_len;
6101 break;
6102 }
6103
6104 count = trace_seq_used(&iter->seq) - save_len;
6105 if (rem < count) {
6106 rem = 0;
6107 iter->seq.seq.len = save_len;
6108 break;
6109 }
6110
6111 if (ret != TRACE_TYPE_NO_CONSUME)
6112 trace_consume(iter);
6113 rem -= count;
6114 if (!trace_find_next_entry_inc(iter)) {
6115 rem = 0;
6116 iter->ent = NULL;
6117 break;
6118 }
6119 }
6120
6121 return rem;
6122 }
6123
6124 static ssize_t tracing_splice_read_pipe(struct file *filp,
6125 loff_t *ppos,
6126 struct pipe_inode_info *pipe,
6127 size_t len,
6128 unsigned int flags)
6129 {
6130 struct page *pages_def[PIPE_DEF_BUFFERS];
6131 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6132 struct trace_iterator *iter = filp->private_data;
6133 struct splice_pipe_desc spd = {
6134 .pages = pages_def,
6135 .partial = partial_def,
6136 .nr_pages = 0, /* This gets updated below. */
6137 .nr_pages_max = PIPE_DEF_BUFFERS,
6138 .ops = &tracing_pipe_buf_ops,
6139 .spd_release = tracing_spd_release_pipe,
6140 };
6141 ssize_t ret;
6142 size_t rem;
6143 unsigned int i;
6144
6145 if (splice_grow_spd(pipe, &spd))
6146 return -ENOMEM;
6147
6148 mutex_lock(&iter->mutex);
6149
6150 if (iter->trace->splice_read) {
6151 ret = iter->trace->splice_read(iter, filp,
6152 ppos, pipe, len, flags);
6153 if (ret)
6154 goto out_err;
6155 }
6156
6157 ret = tracing_wait_pipe(filp);
6158 if (ret <= 0)
6159 goto out_err;
6160
6161 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6162 ret = -EFAULT;
6163 goto out_err;
6164 }
6165
6166 trace_event_read_lock();
6167 trace_access_lock(iter->cpu_file);
6168
6169 /* Fill as many pages as possible. */
6170 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6171 spd.pages[i] = alloc_page(GFP_KERNEL);
6172 if (!spd.pages[i])
6173 break;
6174
6175 rem = tracing_fill_pipe_page(rem, iter);
6176
6177 /* Copy the data into the page, so we can start over. */
6178 ret = trace_seq_to_buffer(&iter->seq,
6179 page_address(spd.pages[i]),
6180 trace_seq_used(&iter->seq));
6181 if (ret < 0) {
6182 __free_page(spd.pages[i]);
6183 break;
6184 }
6185 spd.partial[i].offset = 0;
6186 spd.partial[i].len = trace_seq_used(&iter->seq);
6187
6188 trace_seq_init(&iter->seq);
6189 }
6190
6191 trace_access_unlock(iter->cpu_file);
6192 trace_event_read_unlock();
6193 mutex_unlock(&iter->mutex);
6194
6195 spd.nr_pages = i;
6196
6197 if (i)
6198 ret = splice_to_pipe(pipe, &spd);
6199 else
6200 ret = 0;
6201 out:
6202 splice_shrink_spd(&spd);
6203 return ret;
6204
6205 out_err:
6206 mutex_unlock(&iter->mutex);
6207 goto out;
6208 }
6209
6210 static ssize_t
6211 tracing_entries_read(struct file *filp, char __user *ubuf,
6212 size_t cnt, loff_t *ppos)
6213 {
6214 struct inode *inode = file_inode(filp);
6215 struct trace_array *tr = inode->i_private;
6216 int cpu = tracing_get_cpu(inode);
6217 char buf[64];
6218 int r = 0;
6219 ssize_t ret;
6220
6221 mutex_lock(&trace_types_lock);
6222
6223 if (cpu == RING_BUFFER_ALL_CPUS) {
6224 int cpu, buf_size_same;
6225 unsigned long size;
6226
6227 size = 0;
6228 buf_size_same = 1;
6229 /* check if all cpu sizes are same */
6230 for_each_tracing_cpu(cpu) {
6231 /* fill in the size from first enabled cpu */
6232 if (size == 0)
6233 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6234 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6235 buf_size_same = 0;
6236 break;
6237 }
6238 }
6239
6240 if (buf_size_same) {
6241 if (!ring_buffer_expanded)
6242 r = sprintf(buf, "%lu (expanded: %lu)\n",
6243 size >> 10,
6244 trace_buf_size >> 10);
6245 else
6246 r = sprintf(buf, "%lu\n", size >> 10);
6247 } else
6248 r = sprintf(buf, "X\n");
6249 } else
6250 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6251
6252 mutex_unlock(&trace_types_lock);
6253
6254 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6255 return ret;
6256 }
6257
6258 static ssize_t
6259 tracing_entries_write(struct file *filp, const char __user *ubuf,
6260 size_t cnt, loff_t *ppos)
6261 {
6262 struct inode *inode = file_inode(filp);
6263 struct trace_array *tr = inode->i_private;
6264 unsigned long val;
6265 int ret;
6266
6267 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6268 if (ret)
6269 return ret;
6270
6271 /* must have at least 1 entry */
6272 if (!val)
6273 return -EINVAL;
6274
6275 /* value is in KB */
6276 val <<= 10;
6277 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6278 if (ret < 0)
6279 return ret;
6280
6281 *ppos += cnt;
6282
6283 return cnt;
6284 }
6285
6286 static ssize_t
6287 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6288 size_t cnt, loff_t *ppos)
6289 {
6290 struct trace_array *tr = filp->private_data;
6291 char buf[64];
6292 int r, cpu;
6293 unsigned long size = 0, expanded_size = 0;
6294
6295 mutex_lock(&trace_types_lock);
6296 for_each_tracing_cpu(cpu) {
6297 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6298 if (!ring_buffer_expanded)
6299 expanded_size += trace_buf_size >> 10;
6300 }
6301 if (ring_buffer_expanded)
6302 r = sprintf(buf, "%lu\n", size);
6303 else
6304 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6305 mutex_unlock(&trace_types_lock);
6306
6307 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6308 }
6309
6310 static ssize_t
6311 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6312 size_t cnt, loff_t *ppos)
6313 {
6314 /*
6315 * There is no need to read what the user has written, this function
6316 * is just to make sure that there is no error when "echo" is used
6317 */
6318
6319 *ppos += cnt;
6320
6321 return cnt;
6322 }
6323
6324 static int
6325 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6326 {
6327 struct trace_array *tr = inode->i_private;
6328
6329 /* disable tracing ? */
6330 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6331 tracer_tracing_off(tr);
6332 /* resize the ring buffer to 0 */
6333 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6334
6335 trace_array_put(tr);
6336
6337 return 0;
6338 }
6339
6340 static ssize_t
6341 tracing_mark_write(struct file *filp, const char __user *ubuf,
6342 size_t cnt, loff_t *fpos)
6343 {
6344 struct trace_array *tr = filp->private_data;
6345 struct ring_buffer_event *event;
6346 enum event_trigger_type tt = ETT_NONE;
6347 struct ring_buffer *buffer;
6348 struct print_entry *entry;
6349 unsigned long irq_flags;
6350 ssize_t written;
6351 int size;
6352 int len;
6353
6354 /* Used in tracing_mark_raw_write() as well */
6355 #define FAULTED_STR "<faulted>"
6356 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6357
6358 if (tracing_disabled)
6359 return -EINVAL;
6360
6361 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6362 return -EINVAL;
6363
6364 if (cnt > TRACE_BUF_SIZE)
6365 cnt = TRACE_BUF_SIZE;
6366
6367 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6368
6369 local_save_flags(irq_flags);
6370 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6371
6372 /* If less than "<faulted>", then make sure we can still add that */
6373 if (cnt < FAULTED_SIZE)
6374 size += FAULTED_SIZE - cnt;
6375
6376 buffer = tr->trace_buffer.buffer;
6377 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6378 irq_flags, preempt_count());
6379 if (unlikely(!event))
6380 /* Ring buffer disabled, return as if not open for write */
6381 return -EBADF;
6382
6383 entry = ring_buffer_event_data(event);
6384 entry->ip = _THIS_IP_;
6385
6386 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6387 if (len) {
6388 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6389 cnt = FAULTED_SIZE;
6390 written = -EFAULT;
6391 } else
6392 written = cnt;
6393 len = cnt;
6394
6395 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6396 /* do not add \n before testing triggers, but add \0 */
6397 entry->buf[cnt] = '\0';
6398 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6399 }
6400
6401 if (entry->buf[cnt - 1] != '\n') {
6402 entry->buf[cnt] = '\n';
6403 entry->buf[cnt + 1] = '\0';
6404 } else
6405 entry->buf[cnt] = '\0';
6406
6407 __buffer_unlock_commit(buffer, event);
6408
6409 if (tt)
6410 event_triggers_post_call(tr->trace_marker_file, tt);
6411
6412 if (written > 0)
6413 *fpos += written;
6414
6415 return written;
6416 }
6417
6418 /* Limit it for now to 3K (including tag) */
6419 #define RAW_DATA_MAX_SIZE (1024*3)
6420
6421 static ssize_t
6422 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6423 size_t cnt, loff_t *fpos)
6424 {
6425 struct trace_array *tr = filp->private_data;
6426 struct ring_buffer_event *event;
6427 struct ring_buffer *buffer;
6428 struct raw_data_entry *entry;
6429 unsigned long irq_flags;
6430 ssize_t written;
6431 int size;
6432 int len;
6433
6434 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6435
6436 if (tracing_disabled)
6437 return -EINVAL;
6438
6439 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6440 return -EINVAL;
6441
6442 /* The marker must at least have a tag id */
6443 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6444 return -EINVAL;
6445
6446 if (cnt > TRACE_BUF_SIZE)
6447 cnt = TRACE_BUF_SIZE;
6448
6449 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6450
6451 local_save_flags(irq_flags);
6452 size = sizeof(*entry) + cnt;
6453 if (cnt < FAULT_SIZE_ID)
6454 size += FAULT_SIZE_ID - cnt;
6455
6456 buffer = tr->trace_buffer.buffer;
6457 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6458 irq_flags, preempt_count());
6459 if (!event)
6460 /* Ring buffer disabled, return as if not open for write */
6461 return -EBADF;
6462
6463 entry = ring_buffer_event_data(event);
6464
6465 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6466 if (len) {
6467 entry->id = -1;
6468 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6469 written = -EFAULT;
6470 } else
6471 written = cnt;
6472
6473 __buffer_unlock_commit(buffer, event);
6474
6475 if (written > 0)
6476 *fpos += written;
6477
6478 return written;
6479 }
6480
6481 static int tracing_clock_show(struct seq_file *m, void *v)
6482 {
6483 struct trace_array *tr = m->private;
6484 int i;
6485
6486 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6487 seq_printf(m,
6488 "%s%s%s%s", i ? " " : "",
6489 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6490 i == tr->clock_id ? "]" : "");
6491 seq_putc(m, '\n');
6492
6493 return 0;
6494 }
6495
6496 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6497 {
6498 int i;
6499
6500 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6501 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6502 break;
6503 }
6504 if (i == ARRAY_SIZE(trace_clocks))
6505 return -EINVAL;
6506
6507 mutex_lock(&trace_types_lock);
6508
6509 tr->clock_id = i;
6510
6511 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6512
6513 /*
6514 * New clock may not be consistent with the previous clock.
6515 * Reset the buffer so that it doesn't have incomparable timestamps.
6516 */
6517 tracing_reset_online_cpus(&tr->trace_buffer);
6518
6519 #ifdef CONFIG_TRACER_MAX_TRACE
6520 if (tr->max_buffer.buffer)
6521 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6522 tracing_reset_online_cpus(&tr->max_buffer);
6523 #endif
6524
6525 mutex_unlock(&trace_types_lock);
6526
6527 return 0;
6528 }
6529
6530 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6531 size_t cnt, loff_t *fpos)
6532 {
6533 struct seq_file *m = filp->private_data;
6534 struct trace_array *tr = m->private;
6535 char buf[64];
6536 const char *clockstr;
6537 int ret;
6538
6539 if (cnt >= sizeof(buf))
6540 return -EINVAL;
6541
6542 if (copy_from_user(buf, ubuf, cnt))
6543 return -EFAULT;
6544
6545 buf[cnt] = 0;
6546
6547 clockstr = strstrip(buf);
6548
6549 ret = tracing_set_clock(tr, clockstr);
6550 if (ret)
6551 return ret;
6552
6553 *fpos += cnt;
6554
6555 return cnt;
6556 }
6557
6558 static int tracing_clock_open(struct inode *inode, struct file *file)
6559 {
6560 struct trace_array *tr = inode->i_private;
6561 int ret;
6562
6563 if (tracing_disabled)
6564 return -ENODEV;
6565
6566 if (trace_array_get(tr))
6567 return -ENODEV;
6568
6569 ret = single_open(file, tracing_clock_show, inode->i_private);
6570 if (ret < 0)
6571 trace_array_put(tr);
6572
6573 return ret;
6574 }
6575
6576 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6577 {
6578 struct trace_array *tr = m->private;
6579
6580 mutex_lock(&trace_types_lock);
6581
6582 if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6583 seq_puts(m, "delta [absolute]\n");
6584 else
6585 seq_puts(m, "[delta] absolute\n");
6586
6587 mutex_unlock(&trace_types_lock);
6588
6589 return 0;
6590 }
6591
6592 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6593 {
6594 struct trace_array *tr = inode->i_private;
6595 int ret;
6596
6597 if (tracing_disabled)
6598 return -ENODEV;
6599
6600 if (trace_array_get(tr))
6601 return -ENODEV;
6602
6603 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6604 if (ret < 0)
6605 trace_array_put(tr);
6606
6607 return ret;
6608 }
6609
6610 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6611 {
6612 int ret = 0;
6613
6614 mutex_lock(&trace_types_lock);
6615
6616 if (abs && tr->time_stamp_abs_ref++)
6617 goto out;
6618
6619 if (!abs) {
6620 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6621 ret = -EINVAL;
6622 goto out;
6623 }
6624
6625 if (--tr->time_stamp_abs_ref)
6626 goto out;
6627 }
6628
6629 ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6630
6631 #ifdef CONFIG_TRACER_MAX_TRACE
6632 if (tr->max_buffer.buffer)
6633 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6634 #endif
6635 out:
6636 mutex_unlock(&trace_types_lock);
6637
6638 return ret;
6639 }
6640
6641 struct ftrace_buffer_info {
6642 struct trace_iterator iter;
6643 void *spare;
6644 unsigned int spare_cpu;
6645 unsigned int read;
6646 };
6647
6648 #ifdef CONFIG_TRACER_SNAPSHOT
6649 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6650 {
6651 struct trace_array *tr = inode->i_private;
6652 struct trace_iterator *iter;
6653 struct seq_file *m;
6654 int ret = 0;
6655
6656 if (trace_array_get(tr) < 0)
6657 return -ENODEV;
6658
6659 if (file->f_mode & FMODE_READ) {
6660 iter = __tracing_open(inode, file, true);
6661 if (IS_ERR(iter))
6662 ret = PTR_ERR(iter);
6663 } else {
6664 /* Writes still need the seq_file to hold the private data */
6665 ret = -ENOMEM;
6666 m = kzalloc(sizeof(*m), GFP_KERNEL);
6667 if (!m)
6668 goto out;
6669 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6670 if (!iter) {
6671 kfree(m);
6672 goto out;
6673 }
6674 ret = 0;
6675
6676 iter->tr = tr;
6677 iter->trace_buffer = &tr->max_buffer;
6678 iter->cpu_file = tracing_get_cpu(inode);
6679 m->private = iter;
6680 file->private_data = m;
6681 }
6682 out:
6683 if (ret < 0)
6684 trace_array_put(tr);
6685
6686 return ret;
6687 }
6688
6689 static ssize_t
6690 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6691 loff_t *ppos)
6692 {
6693 struct seq_file *m = filp->private_data;
6694 struct trace_iterator *iter = m->private;
6695 struct trace_array *tr = iter->tr;
6696 unsigned long val;
6697 int ret;
6698
6699 ret = tracing_update_buffers();
6700 if (ret < 0)
6701 return ret;
6702
6703 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6704 if (ret)
6705 return ret;
6706
6707 mutex_lock(&trace_types_lock);
6708
6709 if (tr->current_trace->use_max_tr) {
6710 ret = -EBUSY;
6711 goto out;
6712 }
6713
6714 arch_spin_lock(&tr->max_lock);
6715 if (tr->cond_snapshot)
6716 ret = -EBUSY;
6717 arch_spin_unlock(&tr->max_lock);
6718 if (ret)
6719 goto out;
6720
6721 switch (val) {
6722 case 0:
6723 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6724 ret = -EINVAL;
6725 break;
6726 }
6727 if (tr->allocated_snapshot)
6728 free_snapshot(tr);
6729 break;
6730 case 1:
6731 /* Only allow per-cpu swap if the ring buffer supports it */
6732 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6733 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6734 ret = -EINVAL;
6735 break;
6736 }
6737 #endif
6738 if (tr->allocated_snapshot)
6739 ret = resize_buffer_duplicate_size(&tr->max_buffer,
6740 &tr->trace_buffer, iter->cpu_file);
6741 else
6742 ret = tracing_alloc_snapshot_instance(tr);
6743 if (ret < 0)
6744 break;
6745 local_irq_disable();
6746 /* Now, we're going to swap */
6747 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6748 update_max_tr(tr, current, smp_processor_id(), NULL);
6749 else
6750 update_max_tr_single(tr, current, iter->cpu_file);
6751 local_irq_enable();
6752 break;
6753 default:
6754 if (tr->allocated_snapshot) {
6755 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6756 tracing_reset_online_cpus(&tr->max_buffer);
6757 else
6758 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6759 }
6760 break;
6761 }
6762
6763 if (ret >= 0) {
6764 *ppos += cnt;
6765 ret = cnt;
6766 }
6767 out:
6768 mutex_unlock(&trace_types_lock);
6769 return ret;
6770 }
6771
6772 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6773 {
6774 struct seq_file *m = file->private_data;
6775 int ret;
6776
6777 ret = tracing_release(inode, file);
6778
6779 if (file->f_mode & FMODE_READ)
6780 return ret;
6781
6782 /* If write only, the seq_file is just a stub */
6783 if (m)
6784 kfree(m->private);
6785 kfree(m);
6786
6787 return 0;
6788 }
6789
6790 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6791 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6792 size_t count, loff_t *ppos);
6793 static int tracing_buffers_release(struct inode *inode, struct file *file);
6794 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6795 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6796
6797 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6798 {
6799 struct ftrace_buffer_info *info;
6800 int ret;
6801
6802 ret = tracing_buffers_open(inode, filp);
6803 if (ret < 0)
6804 return ret;
6805
6806 info = filp->private_data;
6807
6808 if (info->iter.trace->use_max_tr) {
6809 tracing_buffers_release(inode, filp);
6810 return -EBUSY;
6811 }
6812
6813 info->iter.snapshot = true;
6814 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6815
6816 return ret;
6817 }
6818
6819 #endif /* CONFIG_TRACER_SNAPSHOT */
6820
6821
6822 static const struct file_operations tracing_thresh_fops = {
6823 .open = tracing_open_generic,
6824 .read = tracing_thresh_read,
6825 .write = tracing_thresh_write,
6826 .llseek = generic_file_llseek,
6827 };
6828
6829 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6830 static const struct file_operations tracing_max_lat_fops = {
6831 .open = tracing_open_generic,
6832 .read = tracing_max_lat_read,
6833 .write = tracing_max_lat_write,
6834 .llseek = generic_file_llseek,
6835 };
6836 #endif
6837
6838 static const struct file_operations set_tracer_fops = {
6839 .open = tracing_open_generic,
6840 .read = tracing_set_trace_read,
6841 .write = tracing_set_trace_write,
6842 .llseek = generic_file_llseek,
6843 };
6844
6845 static const struct file_operations tracing_pipe_fops = {
6846 .open = tracing_open_pipe,
6847 .poll = tracing_poll_pipe,
6848 .read = tracing_read_pipe,
6849 .splice_read = tracing_splice_read_pipe,
6850 .release = tracing_release_pipe,
6851 .llseek = no_llseek,
6852 };
6853
6854 static const struct file_operations tracing_entries_fops = {
6855 .open = tracing_open_generic_tr,
6856 .read = tracing_entries_read,
6857 .write = tracing_entries_write,
6858 .llseek = generic_file_llseek,
6859 .release = tracing_release_generic_tr,
6860 };
6861
6862 static const struct file_operations tracing_total_entries_fops = {
6863 .open = tracing_open_generic_tr,
6864 .read = tracing_total_entries_read,
6865 .llseek = generic_file_llseek,
6866 .release = tracing_release_generic_tr,
6867 };
6868
6869 static const struct file_operations tracing_free_buffer_fops = {
6870 .open = tracing_open_generic_tr,
6871 .write = tracing_free_buffer_write,
6872 .release = tracing_free_buffer_release,
6873 };
6874
6875 static const struct file_operations tracing_mark_fops = {
6876 .open = tracing_open_generic_tr,
6877 .write = tracing_mark_write,
6878 .llseek = generic_file_llseek,
6879 .release = tracing_release_generic_tr,
6880 };
6881
6882 static const struct file_operations tracing_mark_raw_fops = {
6883 .open = tracing_open_generic_tr,
6884 .write = tracing_mark_raw_write,
6885 .llseek = generic_file_llseek,
6886 .release = tracing_release_generic_tr,
6887 };
6888
6889 static const struct file_operations trace_clock_fops = {
6890 .open = tracing_clock_open,
6891 .read = seq_read,
6892 .llseek = seq_lseek,
6893 .release = tracing_single_release_tr,
6894 .write = tracing_clock_write,
6895 };
6896
6897 static const struct file_operations trace_time_stamp_mode_fops = {
6898 .open = tracing_time_stamp_mode_open,
6899 .read = seq_read,
6900 .llseek = seq_lseek,
6901 .release = tracing_single_release_tr,
6902 };
6903
6904 #ifdef CONFIG_TRACER_SNAPSHOT
6905 static const struct file_operations snapshot_fops = {
6906 .open = tracing_snapshot_open,
6907 .read = seq_read,
6908 .write = tracing_snapshot_write,
6909 .llseek = tracing_lseek,
6910 .release = tracing_snapshot_release,
6911 };
6912
6913 static const struct file_operations snapshot_raw_fops = {
6914 .open = snapshot_raw_open,
6915 .read = tracing_buffers_read,
6916 .release = tracing_buffers_release,
6917 .splice_read = tracing_buffers_splice_read,
6918 .llseek = no_llseek,
6919 };
6920
6921 #endif /* CONFIG_TRACER_SNAPSHOT */
6922
6923 #define TRACING_LOG_ERRS_MAX 8
6924 #define TRACING_LOG_LOC_MAX 128
6925
6926 #define CMD_PREFIX " Command: "
6927
6928 struct err_info {
6929 const char **errs; /* ptr to loc-specific array of err strings */
6930 u8 type; /* index into errs -> specific err string */
6931 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
6932 u64 ts;
6933 };
6934
6935 struct tracing_log_err {
6936 struct list_head list;
6937 struct err_info info;
6938 char loc[TRACING_LOG_LOC_MAX]; /* err location */
6939 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6940 };
6941
6942 static DEFINE_MUTEX(tracing_err_log_lock);
6943
6944 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6945 {
6946 struct tracing_log_err *err;
6947
6948 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6949 err = kzalloc(sizeof(*err), GFP_KERNEL);
6950 if (!err)
6951 err = ERR_PTR(-ENOMEM);
6952 tr->n_err_log_entries++;
6953
6954 return err;
6955 }
6956
6957 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6958 list_del(&err->list);
6959
6960 return err;
6961 }
6962
6963 /**
6964 * err_pos - find the position of a string within a command for error careting
6965 * @cmd: The tracing command that caused the error
6966 * @str: The string to position the caret at within @cmd
6967 *
6968 * Finds the position of the first occurence of @str within @cmd. The
6969 * return value can be passed to tracing_log_err() for caret placement
6970 * within @cmd.
6971 *
6972 * Returns the index within @cmd of the first occurence of @str or 0
6973 * if @str was not found.
6974 */
6975 unsigned int err_pos(char *cmd, const char *str)
6976 {
6977 char *found;
6978
6979 if (WARN_ON(!strlen(cmd)))
6980 return 0;
6981
6982 found = strstr(cmd, str);
6983 if (found)
6984 return found - cmd;
6985
6986 return 0;
6987 }
6988
6989 /**
6990 * tracing_log_err - write an error to the tracing error log
6991 * @tr: The associated trace array for the error (NULL for top level array)
6992 * @loc: A string describing where the error occurred
6993 * @cmd: The tracing command that caused the error
6994 * @errs: The array of loc-specific static error strings
6995 * @type: The index into errs[], which produces the specific static err string
6996 * @pos: The position the caret should be placed in the cmd
6997 *
6998 * Writes an error into tracing/error_log of the form:
6999 *
7000 * <loc>: error: <text>
7001 * Command: <cmd>
7002 * ^
7003 *
7004 * tracing/error_log is a small log file containing the last
7005 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7006 * unless there has been a tracing error, and the error log can be
7007 * cleared and have its memory freed by writing the empty string in
7008 * truncation mode to it i.e. echo > tracing/error_log.
7009 *
7010 * NOTE: the @errs array along with the @type param are used to
7011 * produce a static error string - this string is not copied and saved
7012 * when the error is logged - only a pointer to it is saved. See
7013 * existing callers for examples of how static strings are typically
7014 * defined for use with tracing_log_err().
7015 */
7016 void tracing_log_err(struct trace_array *tr,
7017 const char *loc, const char *cmd,
7018 const char **errs, u8 type, u8 pos)
7019 {
7020 struct tracing_log_err *err;
7021
7022 if (!tr)
7023 tr = &global_trace;
7024
7025 mutex_lock(&tracing_err_log_lock);
7026 err = get_tracing_log_err(tr);
7027 if (PTR_ERR(err) == -ENOMEM) {
7028 mutex_unlock(&tracing_err_log_lock);
7029 return;
7030 }
7031
7032 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7033 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7034
7035 err->info.errs = errs;
7036 err->info.type = type;
7037 err->info.pos = pos;
7038 err->info.ts = local_clock();
7039
7040 list_add_tail(&err->list, &tr->err_log);
7041 mutex_unlock(&tracing_err_log_lock);
7042 }
7043
7044 static void clear_tracing_err_log(struct trace_array *tr)
7045 {
7046 struct tracing_log_err *err, *next;
7047
7048 mutex_lock(&tracing_err_log_lock);
7049 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7050 list_del(&err->list);
7051 kfree(err);
7052 }
7053
7054 tr->n_err_log_entries = 0;
7055 mutex_unlock(&tracing_err_log_lock);
7056 }
7057
7058 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7059 {
7060 struct trace_array *tr = m->private;
7061
7062 mutex_lock(&tracing_err_log_lock);
7063
7064 return seq_list_start(&tr->err_log, *pos);
7065 }
7066
7067 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7068 {
7069 struct trace_array *tr = m->private;
7070
7071 return seq_list_next(v, &tr->err_log, pos);
7072 }
7073
7074 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7075 {
7076 mutex_unlock(&tracing_err_log_lock);
7077 }
7078
7079 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7080 {
7081 u8 i;
7082
7083 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7084 seq_putc(m, ' ');
7085 for (i = 0; i < pos; i++)
7086 seq_putc(m, ' ');
7087 seq_puts(m, "^\n");
7088 }
7089
7090 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7091 {
7092 struct tracing_log_err *err = v;
7093
7094 if (err) {
7095 const char *err_text = err->info.errs[err->info.type];
7096 u64 sec = err->info.ts;
7097 u32 nsec;
7098
7099 nsec = do_div(sec, NSEC_PER_SEC);
7100 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7101 err->loc, err_text);
7102 seq_printf(m, "%s", err->cmd);
7103 tracing_err_log_show_pos(m, err->info.pos);
7104 }
7105
7106 return 0;
7107 }
7108
7109 static const struct seq_operations tracing_err_log_seq_ops = {
7110 .start = tracing_err_log_seq_start,
7111 .next = tracing_err_log_seq_next,
7112 .stop = tracing_err_log_seq_stop,
7113 .show = tracing_err_log_seq_show
7114 };
7115
7116 static int tracing_err_log_open(struct inode *inode, struct file *file)
7117 {
7118 struct trace_array *tr = inode->i_private;
7119 int ret = 0;
7120
7121 if (trace_array_get(tr) < 0)
7122 return -ENODEV;
7123
7124 /* If this file was opened for write, then erase contents */
7125 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7126 clear_tracing_err_log(tr);
7127
7128 if (file->f_mode & FMODE_READ) {
7129 ret = seq_open(file, &tracing_err_log_seq_ops);
7130 if (!ret) {
7131 struct seq_file *m = file->private_data;
7132 m->private = tr;
7133 } else {
7134 trace_array_put(tr);
7135 }
7136 }
7137 return ret;
7138 }
7139
7140 static ssize_t tracing_err_log_write(struct file *file,
7141 const char __user *buffer,
7142 size_t count, loff_t *ppos)
7143 {
7144 return count;
7145 }
7146
7147 static int tracing_err_log_release(struct inode *inode, struct file *file)
7148 {
7149 struct trace_array *tr = inode->i_private;
7150
7151 trace_array_put(tr);
7152
7153 if (file->f_mode & FMODE_READ)
7154 seq_release(inode, file);
7155
7156 return 0;
7157 }
7158
7159 static const struct file_operations tracing_err_log_fops = {
7160 .open = tracing_err_log_open,
7161 .write = tracing_err_log_write,
7162 .read = seq_read,
7163 .llseek = seq_lseek,
7164 .release = tracing_err_log_release,
7165 };
7166
7167 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7168 {
7169 struct trace_array *tr = inode->i_private;
7170 struct ftrace_buffer_info *info;
7171 int ret;
7172
7173 if (tracing_disabled)
7174 return -ENODEV;
7175
7176 if (trace_array_get(tr) < 0)
7177 return -ENODEV;
7178
7179 info = kzalloc(sizeof(*info), GFP_KERNEL);
7180 if (!info) {
7181 trace_array_put(tr);
7182 return -ENOMEM;
7183 }
7184
7185 mutex_lock(&trace_types_lock);
7186
7187 info->iter.tr = tr;
7188 info->iter.cpu_file = tracing_get_cpu(inode);
7189 info->iter.trace = tr->current_trace;
7190 info->iter.trace_buffer = &tr->trace_buffer;
7191 info->spare = NULL;
7192 /* Force reading ring buffer for first read */
7193 info->read = (unsigned int)-1;
7194
7195 filp->private_data = info;
7196
7197 tr->current_trace->ref++;
7198
7199 mutex_unlock(&trace_types_lock);
7200
7201 ret = nonseekable_open(inode, filp);
7202 if (ret < 0)
7203 trace_array_put(tr);
7204
7205 return ret;
7206 }
7207
7208 static __poll_t
7209 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7210 {
7211 struct ftrace_buffer_info *info = filp->private_data;
7212 struct trace_iterator *iter = &info->iter;
7213
7214 return trace_poll(iter, filp, poll_table);
7215 }
7216
7217 static ssize_t
7218 tracing_buffers_read(struct file *filp, char __user *ubuf,
7219 size_t count, loff_t *ppos)
7220 {
7221 struct ftrace_buffer_info *info = filp->private_data;
7222 struct trace_iterator *iter = &info->iter;
7223 ssize_t ret = 0;
7224 ssize_t size;
7225
7226 if (!count)
7227 return 0;
7228
7229 #ifdef CONFIG_TRACER_MAX_TRACE
7230 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7231 return -EBUSY;
7232 #endif
7233
7234 if (!info->spare) {
7235 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7236 iter->cpu_file);
7237 if (IS_ERR(info->spare)) {
7238 ret = PTR_ERR(info->spare);
7239 info->spare = NULL;
7240 } else {
7241 info->spare_cpu = iter->cpu_file;
7242 }
7243 }
7244 if (!info->spare)
7245 return ret;
7246
7247 /* Do we have previous read data to read? */
7248 if (info->read < PAGE_SIZE)
7249 goto read;
7250
7251 again:
7252 trace_access_lock(iter->cpu_file);
7253 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7254 &info->spare,
7255 count,
7256 iter->cpu_file, 0);
7257 trace_access_unlock(iter->cpu_file);
7258
7259 if (ret < 0) {
7260 if (trace_empty(iter)) {
7261 if ((filp->f_flags & O_NONBLOCK))
7262 return -EAGAIN;
7263
7264 ret = wait_on_pipe(iter, 0);
7265 if (ret)
7266 return ret;
7267
7268 goto again;
7269 }
7270 return 0;
7271 }
7272
7273 info->read = 0;
7274 read:
7275 size = PAGE_SIZE - info->read;
7276 if (size > count)
7277 size = count;
7278
7279 ret = copy_to_user(ubuf, info->spare + info->read, size);
7280 if (ret == size)
7281 return -EFAULT;
7282
7283 size -= ret;
7284
7285 *ppos += size;
7286 info->read += size;
7287
7288 return size;
7289 }
7290
7291 static int tracing_buffers_release(struct inode *inode, struct file *file)
7292 {
7293 struct ftrace_buffer_info *info = file->private_data;
7294 struct trace_iterator *iter = &info->iter;
7295
7296 mutex_lock(&trace_types_lock);
7297
7298 iter->tr->current_trace->ref--;
7299
7300 __trace_array_put(iter->tr);
7301
7302 if (info->spare)
7303 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7304 info->spare_cpu, info->spare);
7305 kfree(info);
7306
7307 mutex_unlock(&trace_types_lock);
7308
7309 return 0;
7310 }
7311
7312 struct buffer_ref {
7313 struct ring_buffer *buffer;
7314 void *page;
7315 int cpu;
7316 refcount_t refcount;
7317 };
7318
7319 static void buffer_ref_release(struct buffer_ref *ref)
7320 {
7321 if (!refcount_dec_and_test(&ref->refcount))
7322 return;
7323 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7324 kfree(ref);
7325 }
7326
7327 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7328 struct pipe_buffer *buf)
7329 {
7330 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7331
7332 buffer_ref_release(ref);
7333 buf->private = 0;
7334 }
7335
7336 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7337 struct pipe_buffer *buf)
7338 {
7339 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7340
7341 if (refcount_read(&ref->refcount) > INT_MAX/2)
7342 return false;
7343
7344 refcount_inc(&ref->refcount);
7345 return true;
7346 }
7347
7348 /* Pipe buffer operations for a buffer. */
7349 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7350 .confirm = generic_pipe_buf_confirm,
7351 .release = buffer_pipe_buf_release,
7352 .steal = generic_pipe_buf_nosteal,
7353 .get = buffer_pipe_buf_get,
7354 };
7355
7356 /*
7357 * Callback from splice_to_pipe(), if we need to release some pages
7358 * at the end of the spd in case we error'ed out in filling the pipe.
7359 */
7360 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7361 {
7362 struct buffer_ref *ref =
7363 (struct buffer_ref *)spd->partial[i].private;
7364
7365 buffer_ref_release(ref);
7366 spd->partial[i].private = 0;
7367 }
7368
7369 static ssize_t
7370 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7371 struct pipe_inode_info *pipe, size_t len,
7372 unsigned int flags)
7373 {
7374 struct ftrace_buffer_info *info = file->private_data;
7375 struct trace_iterator *iter = &info->iter;
7376 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7377 struct page *pages_def[PIPE_DEF_BUFFERS];
7378 struct splice_pipe_desc spd = {
7379 .pages = pages_def,
7380 .partial = partial_def,
7381 .nr_pages_max = PIPE_DEF_BUFFERS,
7382 .ops = &buffer_pipe_buf_ops,
7383 .spd_release = buffer_spd_release,
7384 };
7385 struct buffer_ref *ref;
7386 int entries, i;
7387 ssize_t ret = 0;
7388
7389 #ifdef CONFIG_TRACER_MAX_TRACE
7390 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7391 return -EBUSY;
7392 #endif
7393
7394 if (*ppos & (PAGE_SIZE - 1))
7395 return -EINVAL;
7396
7397 if (len & (PAGE_SIZE - 1)) {
7398 if (len < PAGE_SIZE)
7399 return -EINVAL;
7400 len &= PAGE_MASK;
7401 }
7402
7403 if (splice_grow_spd(pipe, &spd))
7404 return -ENOMEM;
7405
7406 again:
7407 trace_access_lock(iter->cpu_file);
7408 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7409
7410 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7411 struct page *page;
7412 int r;
7413
7414 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7415 if (!ref) {
7416 ret = -ENOMEM;
7417 break;
7418 }
7419
7420 refcount_set(&ref->refcount, 1);
7421 ref->buffer = iter->trace_buffer->buffer;
7422 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7423 if (IS_ERR(ref->page)) {
7424 ret = PTR_ERR(ref->page);
7425 ref->page = NULL;
7426 kfree(ref);
7427 break;
7428 }
7429 ref->cpu = iter->cpu_file;
7430
7431 r = ring_buffer_read_page(ref->buffer, &ref->page,
7432 len, iter->cpu_file, 1);
7433 if (r < 0) {
7434 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7435 ref->page);
7436 kfree(ref);
7437 break;
7438 }
7439
7440 page = virt_to_page(ref->page);
7441
7442 spd.pages[i] = page;
7443 spd.partial[i].len = PAGE_SIZE;
7444 spd.partial[i].offset = 0;
7445 spd.partial[i].private = (unsigned long)ref;
7446 spd.nr_pages++;
7447 *ppos += PAGE_SIZE;
7448
7449 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7450 }
7451
7452 trace_access_unlock(iter->cpu_file);
7453 spd.nr_pages = i;
7454
7455 /* did we read anything? */
7456 if (!spd.nr_pages) {
7457 if (ret)
7458 goto out;
7459
7460 ret = -EAGAIN;
7461 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7462 goto out;
7463
7464 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7465 if (ret)
7466 goto out;
7467
7468 goto again;
7469 }
7470
7471 ret = splice_to_pipe(pipe, &spd);
7472 out:
7473 splice_shrink_spd(&spd);
7474
7475 return ret;
7476 }
7477
7478 static const struct file_operations tracing_buffers_fops = {
7479 .open = tracing_buffers_open,
7480 .read = tracing_buffers_read,
7481 .poll = tracing_buffers_poll,
7482 .release = tracing_buffers_release,
7483 .splice_read = tracing_buffers_splice_read,
7484 .llseek = no_llseek,
7485 };
7486
7487 static ssize_t
7488 tracing_stats_read(struct file *filp, char __user *ubuf,
7489 size_t count, loff_t *ppos)
7490 {
7491 struct inode *inode = file_inode(filp);
7492 struct trace_array *tr = inode->i_private;
7493 struct trace_buffer *trace_buf = &tr->trace_buffer;
7494 int cpu = tracing_get_cpu(inode);
7495 struct trace_seq *s;
7496 unsigned long cnt;
7497 unsigned long long t;
7498 unsigned long usec_rem;
7499
7500 s = kmalloc(sizeof(*s), GFP_KERNEL);
7501 if (!s)
7502 return -ENOMEM;
7503
7504 trace_seq_init(s);
7505
7506 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7507 trace_seq_printf(s, "entries: %ld\n", cnt);
7508
7509 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7510 trace_seq_printf(s, "overrun: %ld\n", cnt);
7511
7512 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7513 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7514
7515 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7516 trace_seq_printf(s, "bytes: %ld\n", cnt);
7517
7518 if (trace_clocks[tr->clock_id].in_ns) {
7519 /* local or global for trace_clock */
7520 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7521 usec_rem = do_div(t, USEC_PER_SEC);
7522 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7523 t, usec_rem);
7524
7525 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7526 usec_rem = do_div(t, USEC_PER_SEC);
7527 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7528 } else {
7529 /* counter or tsc mode for trace_clock */
7530 trace_seq_printf(s, "oldest event ts: %llu\n",
7531 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7532
7533 trace_seq_printf(s, "now ts: %llu\n",
7534 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7535 }
7536
7537 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7538 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7539
7540 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7541 trace_seq_printf(s, "read events: %ld\n", cnt);
7542
7543 count = simple_read_from_buffer(ubuf, count, ppos,
7544 s->buffer, trace_seq_used(s));
7545
7546 kfree(s);
7547
7548 return count;
7549 }
7550
7551 static const struct file_operations tracing_stats_fops = {
7552 .open = tracing_open_generic_tr,
7553 .read = tracing_stats_read,
7554 .llseek = generic_file_llseek,
7555 .release = tracing_release_generic_tr,
7556 };
7557
7558 #ifdef CONFIG_DYNAMIC_FTRACE
7559
7560 static ssize_t
7561 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7562 size_t cnt, loff_t *ppos)
7563 {
7564 unsigned long *p = filp->private_data;
7565 char buf[64]; /* Not too big for a shallow stack */
7566 int r;
7567
7568 r = scnprintf(buf, 63, "%ld", *p);
7569 buf[r++] = '\n';
7570
7571 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7572 }
7573
7574 static const struct file_operations tracing_dyn_info_fops = {
7575 .open = tracing_open_generic,
7576 .read = tracing_read_dyn_info,
7577 .llseek = generic_file_llseek,
7578 };
7579 #endif /* CONFIG_DYNAMIC_FTRACE */
7580
7581 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7582 static void
7583 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7584 struct trace_array *tr, struct ftrace_probe_ops *ops,
7585 void *data)
7586 {
7587 tracing_snapshot_instance(tr);
7588 }
7589
7590 static void
7591 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7592 struct trace_array *tr, struct ftrace_probe_ops *ops,
7593 void *data)
7594 {
7595 struct ftrace_func_mapper *mapper = data;
7596 long *count = NULL;
7597
7598 if (mapper)
7599 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7600
7601 if (count) {
7602
7603 if (*count <= 0)
7604 return;
7605
7606 (*count)--;
7607 }
7608
7609 tracing_snapshot_instance(tr);
7610 }
7611
7612 static int
7613 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7614 struct ftrace_probe_ops *ops, void *data)
7615 {
7616 struct ftrace_func_mapper *mapper = data;
7617 long *count = NULL;
7618
7619 seq_printf(m, "%ps:", (void *)ip);
7620
7621 seq_puts(m, "snapshot");
7622
7623 if (mapper)
7624 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7625
7626 if (count)
7627 seq_printf(m, ":count=%ld\n", *count);
7628 else
7629 seq_puts(m, ":unlimited\n");
7630
7631 return 0;
7632 }
7633
7634 static int
7635 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7636 unsigned long ip, void *init_data, void **data)
7637 {
7638 struct ftrace_func_mapper *mapper = *data;
7639
7640 if (!mapper) {
7641 mapper = allocate_ftrace_func_mapper();
7642 if (!mapper)
7643 return -ENOMEM;
7644 *data = mapper;
7645 }
7646
7647 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7648 }
7649
7650 static void
7651 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7652 unsigned long ip, void *data)
7653 {
7654 struct ftrace_func_mapper *mapper = data;
7655
7656 if (!ip) {
7657 if (!mapper)
7658 return;
7659 free_ftrace_func_mapper(mapper, NULL);
7660 return;
7661 }
7662
7663 ftrace_func_mapper_remove_ip(mapper, ip);
7664 }
7665
7666 static struct ftrace_probe_ops snapshot_probe_ops = {
7667 .func = ftrace_snapshot,
7668 .print = ftrace_snapshot_print,
7669 };
7670
7671 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7672 .func = ftrace_count_snapshot,
7673 .print = ftrace_snapshot_print,
7674 .init = ftrace_snapshot_init,
7675 .free = ftrace_snapshot_free,
7676 };
7677
7678 static int
7679 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7680 char *glob, char *cmd, char *param, int enable)
7681 {
7682 struct ftrace_probe_ops *ops;
7683 void *count = (void *)-1;
7684 char *number;
7685 int ret;
7686
7687 if (!tr)
7688 return -ENODEV;
7689
7690 /* hash funcs only work with set_ftrace_filter */
7691 if (!enable)
7692 return -EINVAL;
7693
7694 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7695
7696 if (glob[0] == '!')
7697 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7698
7699 if (!param)
7700 goto out_reg;
7701
7702 number = strsep(&param, ":");
7703
7704 if (!strlen(number))
7705 goto out_reg;
7706
7707 /*
7708 * We use the callback data field (which is a pointer)
7709 * as our counter.
7710 */
7711 ret = kstrtoul(number, 0, (unsigned long *)&count);
7712 if (ret)
7713 return ret;
7714
7715 out_reg:
7716 ret = tracing_alloc_snapshot_instance(tr);
7717 if (ret < 0)
7718 goto out;
7719
7720 ret = register_ftrace_function_probe(glob, tr, ops, count);
7721
7722 out:
7723 return ret < 0 ? ret : 0;
7724 }
7725
7726 static struct ftrace_func_command ftrace_snapshot_cmd = {
7727 .name = "snapshot",
7728 .func = ftrace_trace_snapshot_callback,
7729 };
7730
7731 static __init int register_snapshot_cmd(void)
7732 {
7733 return register_ftrace_command(&ftrace_snapshot_cmd);
7734 }
7735 #else
7736 static inline __init int register_snapshot_cmd(void) { return 0; }
7737 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7738
7739 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7740 {
7741 if (WARN_ON(!tr->dir))
7742 return ERR_PTR(-ENODEV);
7743
7744 /* Top directory uses NULL as the parent */
7745 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7746 return NULL;
7747
7748 /* All sub buffers have a descriptor */
7749 return tr->dir;
7750 }
7751
7752 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7753 {
7754 struct dentry *d_tracer;
7755
7756 if (tr->percpu_dir)
7757 return tr->percpu_dir;
7758
7759 d_tracer = tracing_get_dentry(tr);
7760 if (IS_ERR(d_tracer))
7761 return NULL;
7762
7763 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7764
7765 WARN_ONCE(!tr->percpu_dir,
7766 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7767
7768 return tr->percpu_dir;
7769 }
7770
7771 static struct dentry *
7772 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7773 void *data, long cpu, const struct file_operations *fops)
7774 {
7775 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7776
7777 if (ret) /* See tracing_get_cpu() */
7778 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7779 return ret;
7780 }
7781
7782 static void
7783 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7784 {
7785 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7786 struct dentry *d_cpu;
7787 char cpu_dir[30]; /* 30 characters should be more than enough */
7788
7789 if (!d_percpu)
7790 return;
7791
7792 snprintf(cpu_dir, 30, "cpu%ld", cpu);
7793 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7794 if (!d_cpu) {
7795 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7796 return;
7797 }
7798
7799 /* per cpu trace_pipe */
7800 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7801 tr, cpu, &tracing_pipe_fops);
7802
7803 /* per cpu trace */
7804 trace_create_cpu_file("trace", 0644, d_cpu,
7805 tr, cpu, &tracing_fops);
7806
7807 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7808 tr, cpu, &tracing_buffers_fops);
7809
7810 trace_create_cpu_file("stats", 0444, d_cpu,
7811 tr, cpu, &tracing_stats_fops);
7812
7813 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7814 tr, cpu, &tracing_entries_fops);
7815
7816 #ifdef CONFIG_TRACER_SNAPSHOT
7817 trace_create_cpu_file("snapshot", 0644, d_cpu,
7818 tr, cpu, &snapshot_fops);
7819
7820 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7821 tr, cpu, &snapshot_raw_fops);
7822 #endif
7823 }
7824
7825 #ifdef CONFIG_FTRACE_SELFTEST
7826 /* Let selftest have access to static functions in this file */
7827 #include "trace_selftest.c"
7828 #endif
7829
7830 static ssize_t
7831 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7832 loff_t *ppos)
7833 {
7834 struct trace_option_dentry *topt = filp->private_data;
7835 char *buf;
7836
7837 if (topt->flags->val & topt->opt->bit)
7838 buf = "1\n";
7839 else
7840 buf = "0\n";
7841
7842 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7843 }
7844
7845 static ssize_t
7846 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7847 loff_t *ppos)
7848 {
7849 struct trace_option_dentry *topt = filp->private_data;
7850 unsigned long val;
7851 int ret;
7852
7853 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7854 if (ret)
7855 return ret;
7856
7857 if (val != 0 && val != 1)
7858 return -EINVAL;
7859
7860 if (!!(topt->flags->val & topt->opt->bit) != val) {
7861 mutex_lock(&trace_types_lock);
7862 ret = __set_tracer_option(topt->tr, topt->flags,
7863 topt->opt, !val);
7864 mutex_unlock(&trace_types_lock);
7865 if (ret)
7866 return ret;
7867 }
7868
7869 *ppos += cnt;
7870
7871 return cnt;
7872 }
7873
7874
7875 static const struct file_operations trace_options_fops = {
7876 .open = tracing_open_generic,
7877 .read = trace_options_read,
7878 .write = trace_options_write,
7879 .llseek = generic_file_llseek,
7880 };
7881
7882 /*
7883 * In order to pass in both the trace_array descriptor as well as the index
7884 * to the flag that the trace option file represents, the trace_array
7885 * has a character array of trace_flags_index[], which holds the index
7886 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7887 * The address of this character array is passed to the flag option file
7888 * read/write callbacks.
7889 *
7890 * In order to extract both the index and the trace_array descriptor,
7891 * get_tr_index() uses the following algorithm.
7892 *
7893 * idx = *ptr;
7894 *
7895 * As the pointer itself contains the address of the index (remember
7896 * index[1] == 1).
7897 *
7898 * Then to get the trace_array descriptor, by subtracting that index
7899 * from the ptr, we get to the start of the index itself.
7900 *
7901 * ptr - idx == &index[0]
7902 *
7903 * Then a simple container_of() from that pointer gets us to the
7904 * trace_array descriptor.
7905 */
7906 static void get_tr_index(void *data, struct trace_array **ptr,
7907 unsigned int *pindex)
7908 {
7909 *pindex = *(unsigned char *)data;
7910
7911 *ptr = container_of(data - *pindex, struct trace_array,
7912 trace_flags_index);
7913 }
7914
7915 static ssize_t
7916 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7917 loff_t *ppos)
7918 {
7919 void *tr_index = filp->private_data;
7920 struct trace_array *tr;
7921 unsigned int index;
7922 char *buf;
7923
7924 get_tr_index(tr_index, &tr, &index);
7925
7926 if (tr->trace_flags & (1 << index))
7927 buf = "1\n";
7928 else
7929 buf = "0\n";
7930
7931 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7932 }
7933
7934 static ssize_t
7935 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7936 loff_t *ppos)
7937 {
7938 void *tr_index = filp->private_data;
7939 struct trace_array *tr;
7940 unsigned int index;
7941 unsigned long val;
7942 int ret;
7943
7944 get_tr_index(tr_index, &tr, &index);
7945
7946 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7947 if (ret)
7948 return ret;
7949
7950 if (val != 0 && val != 1)
7951 return -EINVAL;
7952
7953 mutex_lock(&trace_types_lock);
7954 ret = set_tracer_flag(tr, 1 << index, val);
7955 mutex_unlock(&trace_types_lock);
7956
7957 if (ret < 0)
7958 return ret;
7959
7960 *ppos += cnt;
7961
7962 return cnt;
7963 }
7964
7965 static const struct file_operations trace_options_core_fops = {
7966 .open = tracing_open_generic,
7967 .read = trace_options_core_read,
7968 .write = trace_options_core_write,
7969 .llseek = generic_file_llseek,
7970 };
7971
7972 struct dentry *trace_create_file(const char *name,
7973 umode_t mode,
7974 struct dentry *parent,
7975 void *data,
7976 const struct file_operations *fops)
7977 {
7978 struct dentry *ret;
7979
7980 ret = tracefs_create_file(name, mode, parent, data, fops);
7981 if (!ret)
7982 pr_warn("Could not create tracefs '%s' entry\n", name);
7983
7984 return ret;
7985 }
7986
7987
7988 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7989 {
7990 struct dentry *d_tracer;
7991
7992 if (tr->options)
7993 return tr->options;
7994
7995 d_tracer = tracing_get_dentry(tr);
7996 if (IS_ERR(d_tracer))
7997 return NULL;
7998
7999 tr->options = tracefs_create_dir("options", d_tracer);
8000 if (!tr->options) {
8001 pr_warn("Could not create tracefs directory 'options'\n");
8002 return NULL;
8003 }
8004
8005 return tr->options;
8006 }
8007
8008 static void
8009 create_trace_option_file(struct trace_array *tr,
8010 struct trace_option_dentry *topt,
8011 struct tracer_flags *flags,
8012 struct tracer_opt *opt)
8013 {
8014 struct dentry *t_options;
8015
8016 t_options = trace_options_init_dentry(tr);
8017 if (!t_options)
8018 return;
8019
8020 topt->flags = flags;
8021 topt->opt = opt;
8022 topt->tr = tr;
8023
8024 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8025 &trace_options_fops);
8026
8027 }
8028
8029 static void
8030 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8031 {
8032 struct trace_option_dentry *topts;
8033 struct trace_options *tr_topts;
8034 struct tracer_flags *flags;
8035 struct tracer_opt *opts;
8036 int cnt;
8037 int i;
8038
8039 if (!tracer)
8040 return;
8041
8042 flags = tracer->flags;
8043
8044 if (!flags || !flags->opts)
8045 return;
8046
8047 /*
8048 * If this is an instance, only create flags for tracers
8049 * the instance may have.
8050 */
8051 if (!trace_ok_for_array(tracer, tr))
8052 return;
8053
8054 for (i = 0; i < tr->nr_topts; i++) {
8055 /* Make sure there's no duplicate flags. */
8056 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8057 return;
8058 }
8059
8060 opts = flags->opts;
8061
8062 for (cnt = 0; opts[cnt].name; cnt++)
8063 ;
8064
8065 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8066 if (!topts)
8067 return;
8068
8069 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8070 GFP_KERNEL);
8071 if (!tr_topts) {
8072 kfree(topts);
8073 return;
8074 }
8075
8076 tr->topts = tr_topts;
8077 tr->topts[tr->nr_topts].tracer = tracer;
8078 tr->topts[tr->nr_topts].topts = topts;
8079 tr->nr_topts++;
8080
8081 for (cnt = 0; opts[cnt].name; cnt++) {
8082 create_trace_option_file(tr, &topts[cnt], flags,
8083 &opts[cnt]);
8084 WARN_ONCE(topts[cnt].entry == NULL,
8085 "Failed to create trace option: %s",
8086 opts[cnt].name);
8087 }
8088 }
8089
8090 static struct dentry *
8091 create_trace_option_core_file(struct trace_array *tr,
8092 const char *option, long index)
8093 {
8094 struct dentry *t_options;
8095
8096 t_options = trace_options_init_dentry(tr);
8097 if (!t_options)
8098 return NULL;
8099
8100 return trace_create_file(option, 0644, t_options,
8101 (void *)&tr->trace_flags_index[index],
8102 &trace_options_core_fops);
8103 }
8104
8105 static void create_trace_options_dir(struct trace_array *tr)
8106 {
8107 struct dentry *t_options;
8108 bool top_level = tr == &global_trace;
8109 int i;
8110
8111 t_options = trace_options_init_dentry(tr);
8112 if (!t_options)
8113 return;
8114
8115 for (i = 0; trace_options[i]; i++) {
8116 if (top_level ||
8117 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8118 create_trace_option_core_file(tr, trace_options[i], i);
8119 }
8120 }
8121
8122 static ssize_t
8123 rb_simple_read(struct file *filp, char __user *ubuf,
8124 size_t cnt, loff_t *ppos)
8125 {
8126 struct trace_array *tr = filp->private_data;
8127 char buf[64];
8128 int r;
8129
8130 r = tracer_tracing_is_on(tr);
8131 r = sprintf(buf, "%d\n", r);
8132
8133 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8134 }
8135
8136 static ssize_t
8137 rb_simple_write(struct file *filp, const char __user *ubuf,
8138 size_t cnt, loff_t *ppos)
8139 {
8140 struct trace_array *tr = filp->private_data;
8141 struct ring_buffer *buffer = tr->trace_buffer.buffer;
8142 unsigned long val;
8143 int ret;
8144
8145 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8146 if (ret)
8147 return ret;
8148
8149 if (buffer) {
8150 mutex_lock(&trace_types_lock);
8151 if (!!val == tracer_tracing_is_on(tr)) {
8152 val = 0; /* do nothing */
8153 } else if (val) {
8154 tracer_tracing_on(tr);
8155 if (tr->current_trace->start)
8156 tr->current_trace->start(tr);
8157 } else {
8158 tracer_tracing_off(tr);
8159 if (tr->current_trace->stop)
8160 tr->current_trace->stop(tr);
8161 }
8162 mutex_unlock(&trace_types_lock);
8163 }
8164
8165 (*ppos)++;
8166
8167 return cnt;
8168 }
8169
8170 static const struct file_operations rb_simple_fops = {
8171 .open = tracing_open_generic_tr,
8172 .read = rb_simple_read,
8173 .write = rb_simple_write,
8174 .release = tracing_release_generic_tr,
8175 .llseek = default_llseek,
8176 };
8177
8178 static ssize_t
8179 buffer_percent_read(struct file *filp, char __user *ubuf,
8180 size_t cnt, loff_t *ppos)
8181 {
8182 struct trace_array *tr = filp->private_data;
8183 char buf[64];
8184 int r;
8185
8186 r = tr->buffer_percent;
8187 r = sprintf(buf, "%d\n", r);
8188
8189 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8190 }
8191
8192 static ssize_t
8193 buffer_percent_write(struct file *filp, const char __user *ubuf,
8194 size_t cnt, loff_t *ppos)
8195 {
8196 struct trace_array *tr = filp->private_data;
8197 unsigned long val;
8198 int ret;
8199
8200 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8201 if (ret)
8202 return ret;
8203
8204 if (val > 100)
8205 return -EINVAL;
8206
8207 if (!val)
8208 val = 1;
8209
8210 tr->buffer_percent = val;
8211
8212 (*ppos)++;
8213
8214 return cnt;
8215 }
8216
8217 static const struct file_operations buffer_percent_fops = {
8218 .open = tracing_open_generic_tr,
8219 .read = buffer_percent_read,
8220 .write = buffer_percent_write,
8221 .release = tracing_release_generic_tr,
8222 .llseek = default_llseek,
8223 };
8224
8225 static struct dentry *trace_instance_dir;
8226
8227 static void
8228 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8229
8230 static int
8231 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8232 {
8233 enum ring_buffer_flags rb_flags;
8234
8235 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8236
8237 buf->tr = tr;
8238
8239 buf->buffer = ring_buffer_alloc(size, rb_flags);
8240 if (!buf->buffer)
8241 return -ENOMEM;
8242
8243 buf->data = alloc_percpu(struct trace_array_cpu);
8244 if (!buf->data) {
8245 ring_buffer_free(buf->buffer);
8246 buf->buffer = NULL;
8247 return -ENOMEM;
8248 }
8249
8250 /* Allocate the first page for all buffers */
8251 set_buffer_entries(&tr->trace_buffer,
8252 ring_buffer_size(tr->trace_buffer.buffer, 0));
8253
8254 return 0;
8255 }
8256
8257 static int allocate_trace_buffers(struct trace_array *tr, int size)
8258 {
8259 int ret;
8260
8261 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8262 if (ret)
8263 return ret;
8264
8265 #ifdef CONFIG_TRACER_MAX_TRACE
8266 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8267 allocate_snapshot ? size : 1);
8268 if (WARN_ON(ret)) {
8269 ring_buffer_free(tr->trace_buffer.buffer);
8270 tr->trace_buffer.buffer = NULL;
8271 free_percpu(tr->trace_buffer.data);
8272 tr->trace_buffer.data = NULL;
8273 return -ENOMEM;
8274 }
8275 tr->allocated_snapshot = allocate_snapshot;
8276
8277 /*
8278 * Only the top level trace array gets its snapshot allocated
8279 * from the kernel command line.
8280 */
8281 allocate_snapshot = false;
8282 #endif
8283 return 0;
8284 }
8285
8286 static void free_trace_buffer(struct trace_buffer *buf)
8287 {
8288 if (buf->buffer) {
8289 ring_buffer_free(buf->buffer);
8290 buf->buffer = NULL;
8291 free_percpu(buf->data);
8292 buf->data = NULL;
8293 }
8294 }
8295
8296 static void free_trace_buffers(struct trace_array *tr)
8297 {
8298 if (!tr)
8299 return;
8300
8301 free_trace_buffer(&tr->trace_buffer);
8302
8303 #ifdef CONFIG_TRACER_MAX_TRACE
8304 free_trace_buffer(&tr->max_buffer);
8305 #endif
8306 }
8307
8308 static void init_trace_flags_index(struct trace_array *tr)
8309 {
8310 int i;
8311
8312 /* Used by the trace options files */
8313 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8314 tr->trace_flags_index[i] = i;
8315 }
8316
8317 static void __update_tracer_options(struct trace_array *tr)
8318 {
8319 struct tracer *t;
8320
8321 for (t = trace_types; t; t = t->next)
8322 add_tracer_options(tr, t);
8323 }
8324
8325 static void update_tracer_options(struct trace_array *tr)
8326 {
8327 mutex_lock(&trace_types_lock);
8328 __update_tracer_options(tr);
8329 mutex_unlock(&trace_types_lock);
8330 }
8331
8332 struct trace_array *trace_array_create(const char *name)
8333 {
8334 struct trace_array *tr;
8335 int ret;
8336
8337 mutex_lock(&event_mutex);
8338 mutex_lock(&trace_types_lock);
8339
8340 ret = -EEXIST;
8341 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8342 if (tr->name && strcmp(tr->name, name) == 0)
8343 goto out_unlock;
8344 }
8345
8346 ret = -ENOMEM;
8347 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8348 if (!tr)
8349 goto out_unlock;
8350
8351 tr->name = kstrdup(name, GFP_KERNEL);
8352 if (!tr->name)
8353 goto out_free_tr;
8354
8355 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8356 goto out_free_tr;
8357
8358 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8359
8360 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8361
8362 raw_spin_lock_init(&tr->start_lock);
8363
8364 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8365
8366 tr->current_trace = &nop_trace;
8367
8368 INIT_LIST_HEAD(&tr->systems);
8369 INIT_LIST_HEAD(&tr->events);
8370 INIT_LIST_HEAD(&tr->hist_vars);
8371 INIT_LIST_HEAD(&tr->err_log);
8372
8373 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8374 goto out_free_tr;
8375
8376 tr->dir = tracefs_create_dir(name, trace_instance_dir);
8377 if (!tr->dir)
8378 goto out_free_tr;
8379
8380 ret = event_trace_add_tracer(tr->dir, tr);
8381 if (ret) {
8382 tracefs_remove_recursive(tr->dir);
8383 goto out_free_tr;
8384 }
8385
8386 ftrace_init_trace_array(tr);
8387
8388 init_tracer_tracefs(tr, tr->dir);
8389 init_trace_flags_index(tr);
8390 __update_tracer_options(tr);
8391
8392 list_add(&tr->list, &ftrace_trace_arrays);
8393
8394 mutex_unlock(&trace_types_lock);
8395 mutex_unlock(&event_mutex);
8396
8397 return tr;
8398
8399 out_free_tr:
8400 free_trace_buffers(tr);
8401 free_cpumask_var(tr->tracing_cpumask);
8402 kfree(tr->name);
8403 kfree(tr);
8404
8405 out_unlock:
8406 mutex_unlock(&trace_types_lock);
8407 mutex_unlock(&event_mutex);
8408
8409 return ERR_PTR(ret);
8410 }
8411 EXPORT_SYMBOL_GPL(trace_array_create);
8412
8413 static int instance_mkdir(const char *name)
8414 {
8415 return PTR_ERR_OR_ZERO(trace_array_create(name));
8416 }
8417
8418 static int __remove_instance(struct trace_array *tr)
8419 {
8420 int i;
8421
8422 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8423 return -EBUSY;
8424
8425 list_del(&tr->list);
8426
8427 /* Disable all the flags that were enabled coming in */
8428 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8429 if ((1 << i) & ZEROED_TRACE_FLAGS)
8430 set_tracer_flag(tr, 1 << i, 0);
8431 }
8432
8433 tracing_set_nop(tr);
8434 clear_ftrace_function_probes(tr);
8435 event_trace_del_tracer(tr);
8436 ftrace_clear_pids(tr);
8437 ftrace_destroy_function_files(tr);
8438 tracefs_remove_recursive(tr->dir);
8439 free_trace_buffers(tr);
8440
8441 for (i = 0; i < tr->nr_topts; i++) {
8442 kfree(tr->topts[i].topts);
8443 }
8444 kfree(tr->topts);
8445
8446 free_cpumask_var(tr->tracing_cpumask);
8447 kfree(tr->name);
8448 kfree(tr);
8449 tr = NULL;
8450
8451 return 0;
8452 }
8453
8454 int trace_array_destroy(struct trace_array *tr)
8455 {
8456 int ret;
8457
8458 if (!tr)
8459 return -EINVAL;
8460
8461 mutex_lock(&event_mutex);
8462 mutex_lock(&trace_types_lock);
8463
8464 ret = __remove_instance(tr);
8465
8466 mutex_unlock(&trace_types_lock);
8467 mutex_unlock(&event_mutex);
8468
8469 return ret;
8470 }
8471 EXPORT_SYMBOL_GPL(trace_array_destroy);
8472
8473 static int instance_rmdir(const char *name)
8474 {
8475 struct trace_array *tr;
8476 int ret;
8477
8478 mutex_lock(&event_mutex);
8479 mutex_lock(&trace_types_lock);
8480
8481 ret = -ENODEV;
8482 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8483 if (tr->name && strcmp(tr->name, name) == 0) {
8484 ret = __remove_instance(tr);
8485 break;
8486 }
8487 }
8488
8489 mutex_unlock(&trace_types_lock);
8490 mutex_unlock(&event_mutex);
8491
8492 return ret;
8493 }
8494
8495 static __init void create_trace_instances(struct dentry *d_tracer)
8496 {
8497 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8498 instance_mkdir,
8499 instance_rmdir);
8500 if (WARN_ON(!trace_instance_dir))
8501 return;
8502 }
8503
8504 static void
8505 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8506 {
8507 struct trace_event_file *file;
8508 int cpu;
8509
8510 trace_create_file("available_tracers", 0444, d_tracer,
8511 tr, &show_traces_fops);
8512
8513 trace_create_file("current_tracer", 0644, d_tracer,
8514 tr, &set_tracer_fops);
8515
8516 trace_create_file("tracing_cpumask", 0644, d_tracer,
8517 tr, &tracing_cpumask_fops);
8518
8519 trace_create_file("trace_options", 0644, d_tracer,
8520 tr, &tracing_iter_fops);
8521
8522 trace_create_file("trace", 0644, d_tracer,
8523 tr, &tracing_fops);
8524
8525 trace_create_file("trace_pipe", 0444, d_tracer,
8526 tr, &tracing_pipe_fops);
8527
8528 trace_create_file("buffer_size_kb", 0644, d_tracer,
8529 tr, &tracing_entries_fops);
8530
8531 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8532 tr, &tracing_total_entries_fops);
8533
8534 trace_create_file("free_buffer", 0200, d_tracer,
8535 tr, &tracing_free_buffer_fops);
8536
8537 trace_create_file("trace_marker", 0220, d_tracer,
8538 tr, &tracing_mark_fops);
8539
8540 file = __find_event_file(tr, "ftrace", "print");
8541 if (file && file->dir)
8542 trace_create_file("trigger", 0644, file->dir, file,
8543 &event_trigger_fops);
8544 tr->trace_marker_file = file;
8545
8546 trace_create_file("trace_marker_raw", 0220, d_tracer,
8547 tr, &tracing_mark_raw_fops);
8548
8549 trace_create_file("trace_clock", 0644, d_tracer, tr,
8550 &trace_clock_fops);
8551
8552 trace_create_file("tracing_on", 0644, d_tracer,
8553 tr, &rb_simple_fops);
8554
8555 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8556 &trace_time_stamp_mode_fops);
8557
8558 tr->buffer_percent = 50;
8559
8560 trace_create_file("buffer_percent", 0444, d_tracer,
8561 tr, &buffer_percent_fops);
8562
8563 create_trace_options_dir(tr);
8564
8565 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8566 trace_create_file("tracing_max_latency", 0644, d_tracer,
8567 &tr->max_latency, &tracing_max_lat_fops);
8568 #endif
8569
8570 if (ftrace_create_function_files(tr, d_tracer))
8571 WARN(1, "Could not allocate function filter files");
8572
8573 #ifdef CONFIG_TRACER_SNAPSHOT
8574 trace_create_file("snapshot", 0644, d_tracer,
8575 tr, &snapshot_fops);
8576 #endif
8577
8578 trace_create_file("error_log", 0644, d_tracer,
8579 tr, &tracing_err_log_fops);
8580
8581 for_each_tracing_cpu(cpu)
8582 tracing_init_tracefs_percpu(tr, cpu);
8583
8584 ftrace_init_tracefs(tr, d_tracer);
8585 }
8586
8587 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8588 {
8589 struct vfsmount *mnt;
8590 struct file_system_type *type;
8591
8592 /*
8593 * To maintain backward compatibility for tools that mount
8594 * debugfs to get to the tracing facility, tracefs is automatically
8595 * mounted to the debugfs/tracing directory.
8596 */
8597 type = get_fs_type("tracefs");
8598 if (!type)
8599 return NULL;
8600 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8601 put_filesystem(type);
8602 if (IS_ERR(mnt))
8603 return NULL;
8604 mntget(mnt);
8605
8606 return mnt;
8607 }
8608
8609 /**
8610 * tracing_init_dentry - initialize top level trace array
8611 *
8612 * This is called when creating files or directories in the tracing
8613 * directory. It is called via fs_initcall() by any of the boot up code
8614 * and expects to return the dentry of the top level tracing directory.
8615 */
8616 struct dentry *tracing_init_dentry(void)
8617 {
8618 struct trace_array *tr = &global_trace;
8619
8620 /* The top level trace array uses NULL as parent */
8621 if (tr->dir)
8622 return NULL;
8623
8624 if (WARN_ON(!tracefs_initialized()) ||
8625 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8626 WARN_ON(!debugfs_initialized())))
8627 return ERR_PTR(-ENODEV);
8628
8629 /*
8630 * As there may still be users that expect the tracing
8631 * files to exist in debugfs/tracing, we must automount
8632 * the tracefs file system there, so older tools still
8633 * work with the newer kerenl.
8634 */
8635 tr->dir = debugfs_create_automount("tracing", NULL,
8636 trace_automount, NULL);
8637
8638 return NULL;
8639 }
8640
8641 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8642 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8643
8644 static void __init trace_eval_init(void)
8645 {
8646 int len;
8647
8648 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8649 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8650 }
8651
8652 #ifdef CONFIG_MODULES
8653 static void trace_module_add_evals(struct module *mod)
8654 {
8655 if (!mod->num_trace_evals)
8656 return;
8657
8658 /*
8659 * Modules with bad taint do not have events created, do
8660 * not bother with enums either.
8661 */
8662 if (trace_module_has_bad_taint(mod))
8663 return;
8664
8665 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8666 }
8667
8668 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8669 static void trace_module_remove_evals(struct module *mod)
8670 {
8671 union trace_eval_map_item *map;
8672 union trace_eval_map_item **last = &trace_eval_maps;
8673
8674 if (!mod->num_trace_evals)
8675 return;
8676
8677 mutex_lock(&trace_eval_mutex);
8678
8679 map = trace_eval_maps;
8680
8681 while (map) {
8682 if (map->head.mod == mod)
8683 break;
8684 map = trace_eval_jmp_to_tail(map);
8685 last = &map->tail.next;
8686 map = map->tail.next;
8687 }
8688 if (!map)
8689 goto out;
8690
8691 *last = trace_eval_jmp_to_tail(map)->tail.next;
8692 kfree(map);
8693 out:
8694 mutex_unlock(&trace_eval_mutex);
8695 }
8696 #else
8697 static inline void trace_module_remove_evals(struct module *mod) { }
8698 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8699
8700 static int trace_module_notify(struct notifier_block *self,
8701 unsigned long val, void *data)
8702 {
8703 struct module *mod = data;
8704
8705 switch (val) {
8706 case MODULE_STATE_COMING:
8707 trace_module_add_evals(mod);
8708 break;
8709 case MODULE_STATE_GOING:
8710 trace_module_remove_evals(mod);
8711 break;
8712 }
8713
8714 return 0;
8715 }
8716
8717 static struct notifier_block trace_module_nb = {
8718 .notifier_call = trace_module_notify,
8719 .priority = 0,
8720 };
8721 #endif /* CONFIG_MODULES */
8722
8723 static __init int tracer_init_tracefs(void)
8724 {
8725 struct dentry *d_tracer;
8726
8727 trace_access_lock_init();
8728
8729 d_tracer = tracing_init_dentry();
8730 if (IS_ERR(d_tracer))
8731 return 0;
8732
8733 event_trace_init();
8734
8735 init_tracer_tracefs(&global_trace, d_tracer);
8736 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8737
8738 trace_create_file("tracing_thresh", 0644, d_tracer,
8739 &global_trace, &tracing_thresh_fops);
8740
8741 trace_create_file("README", 0444, d_tracer,
8742 NULL, &tracing_readme_fops);
8743
8744 trace_create_file("saved_cmdlines", 0444, d_tracer,
8745 NULL, &tracing_saved_cmdlines_fops);
8746
8747 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8748 NULL, &tracing_saved_cmdlines_size_fops);
8749
8750 trace_create_file("saved_tgids", 0444, d_tracer,
8751 NULL, &tracing_saved_tgids_fops);
8752
8753 trace_eval_init();
8754
8755 trace_create_eval_file(d_tracer);
8756
8757 #ifdef CONFIG_MODULES
8758 register_module_notifier(&trace_module_nb);
8759 #endif
8760
8761 #ifdef CONFIG_DYNAMIC_FTRACE
8762 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8763 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8764 #endif
8765
8766 create_trace_instances(d_tracer);
8767
8768 update_tracer_options(&global_trace);
8769
8770 return 0;
8771 }
8772
8773 static int trace_panic_handler(struct notifier_block *this,
8774 unsigned long event, void *unused)
8775 {
8776 if (ftrace_dump_on_oops)
8777 ftrace_dump(ftrace_dump_on_oops);
8778 return NOTIFY_OK;
8779 }
8780
8781 static struct notifier_block trace_panic_notifier = {
8782 .notifier_call = trace_panic_handler,
8783 .next = NULL,
8784 .priority = 150 /* priority: INT_MAX >= x >= 0 */
8785 };
8786
8787 static int trace_die_handler(struct notifier_block *self,
8788 unsigned long val,
8789 void *data)
8790 {
8791 switch (val) {
8792 case DIE_OOPS:
8793 if (ftrace_dump_on_oops)
8794 ftrace_dump(ftrace_dump_on_oops);
8795 break;
8796 default:
8797 break;
8798 }
8799 return NOTIFY_OK;
8800 }
8801
8802 static struct notifier_block trace_die_notifier = {
8803 .notifier_call = trace_die_handler,
8804 .priority = 200
8805 };
8806
8807 /*
8808 * printk is set to max of 1024, we really don't need it that big.
8809 * Nothing should be printing 1000 characters anyway.
8810 */
8811 #define TRACE_MAX_PRINT 1000
8812
8813 /*
8814 * Define here KERN_TRACE so that we have one place to modify
8815 * it if we decide to change what log level the ftrace dump
8816 * should be at.
8817 */
8818 #define KERN_TRACE KERN_EMERG
8819
8820 void
8821 trace_printk_seq(struct trace_seq *s)
8822 {
8823 /* Probably should print a warning here. */
8824 if (s->seq.len >= TRACE_MAX_PRINT)
8825 s->seq.len = TRACE_MAX_PRINT;
8826
8827 /*
8828 * More paranoid code. Although the buffer size is set to
8829 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8830 * an extra layer of protection.
8831 */
8832 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8833 s->seq.len = s->seq.size - 1;
8834
8835 /* should be zero ended, but we are paranoid. */
8836 s->buffer[s->seq.len] = 0;
8837
8838 printk(KERN_TRACE "%s", s->buffer);
8839
8840 trace_seq_init(s);
8841 }
8842
8843 void trace_init_global_iter(struct trace_iterator *iter)
8844 {
8845 iter->tr = &global_trace;
8846 iter->trace = iter->tr->current_trace;
8847 iter->cpu_file = RING_BUFFER_ALL_CPUS;
8848 iter->trace_buffer = &global_trace.trace_buffer;
8849
8850 if (iter->trace && iter->trace->open)
8851 iter->trace->open(iter);
8852
8853 /* Annotate start of buffers if we had overruns */
8854 if (ring_buffer_overruns(iter->trace_buffer->buffer))
8855 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8856
8857 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8858 if (trace_clocks[iter->tr->clock_id].in_ns)
8859 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8860 }
8861
8862 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8863 {
8864 /* use static because iter can be a bit big for the stack */
8865 static struct trace_iterator iter;
8866 static atomic_t dump_running;
8867 struct trace_array *tr = &global_trace;
8868 unsigned int old_userobj;
8869 unsigned long flags;
8870 int cnt = 0, cpu;
8871
8872 /* Only allow one dump user at a time. */
8873 if (atomic_inc_return(&dump_running) != 1) {
8874 atomic_dec(&dump_running);
8875 return;
8876 }
8877
8878 /*
8879 * Always turn off tracing when we dump.
8880 * We don't need to show trace output of what happens
8881 * between multiple crashes.
8882 *
8883 * If the user does a sysrq-z, then they can re-enable
8884 * tracing with echo 1 > tracing_on.
8885 */
8886 tracing_off();
8887
8888 local_irq_save(flags);
8889 printk_nmi_direct_enter();
8890
8891 /* Simulate the iterator */
8892 trace_init_global_iter(&iter);
8893
8894 for_each_tracing_cpu(cpu) {
8895 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8896 }
8897
8898 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8899
8900 /* don't look at user memory in panic mode */
8901 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8902
8903 switch (oops_dump_mode) {
8904 case DUMP_ALL:
8905 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8906 break;
8907 case DUMP_ORIG:
8908 iter.cpu_file = raw_smp_processor_id();
8909 break;
8910 case DUMP_NONE:
8911 goto out_enable;
8912 default:
8913 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8914 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8915 }
8916
8917 printk(KERN_TRACE "Dumping ftrace buffer:\n");
8918
8919 /* Did function tracer already get disabled? */
8920 if (ftrace_is_dead()) {
8921 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8922 printk("# MAY BE MISSING FUNCTION EVENTS\n");
8923 }
8924
8925 /*
8926 * We need to stop all tracing on all CPUS to read the
8927 * the next buffer. This is a bit expensive, but is
8928 * not done often. We fill all what we can read,
8929 * and then release the locks again.
8930 */
8931
8932 while (!trace_empty(&iter)) {
8933
8934 if (!cnt)
8935 printk(KERN_TRACE "---------------------------------\n");
8936
8937 cnt++;
8938
8939 trace_iterator_reset(&iter);
8940 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8941
8942 if (trace_find_next_entry_inc(&iter) != NULL) {
8943 int ret;
8944
8945 ret = print_trace_line(&iter);
8946 if (ret != TRACE_TYPE_NO_CONSUME)
8947 trace_consume(&iter);
8948 }
8949 touch_nmi_watchdog();
8950
8951 trace_printk_seq(&iter.seq);
8952 }
8953
8954 if (!cnt)
8955 printk(KERN_TRACE " (ftrace buffer empty)\n");
8956 else
8957 printk(KERN_TRACE "---------------------------------\n");
8958
8959 out_enable:
8960 tr->trace_flags |= old_userobj;
8961
8962 for_each_tracing_cpu(cpu) {
8963 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8964 }
8965 atomic_dec(&dump_running);
8966 printk_nmi_direct_exit();
8967 local_irq_restore(flags);
8968 }
8969 EXPORT_SYMBOL_GPL(ftrace_dump);
8970
8971 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8972 {
8973 char **argv;
8974 int argc, ret;
8975
8976 argc = 0;
8977 ret = 0;
8978 argv = argv_split(GFP_KERNEL, buf, &argc);
8979 if (!argv)
8980 return -ENOMEM;
8981
8982 if (argc)
8983 ret = createfn(argc, argv);
8984
8985 argv_free(argv);
8986
8987 return ret;
8988 }
8989
8990 #define WRITE_BUFSIZE 4096
8991
8992 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8993 size_t count, loff_t *ppos,
8994 int (*createfn)(int, char **))
8995 {
8996 char *kbuf, *buf, *tmp;
8997 int ret = 0;
8998 size_t done = 0;
8999 size_t size;
9000
9001 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9002 if (!kbuf)
9003 return -ENOMEM;
9004
9005 while (done < count) {
9006 size = count - done;
9007
9008 if (size >= WRITE_BUFSIZE)
9009 size = WRITE_BUFSIZE - 1;
9010
9011 if (copy_from_user(kbuf, buffer + done, size)) {
9012 ret = -EFAULT;
9013 goto out;
9014 }
9015 kbuf[size] = '\0';
9016 buf = kbuf;
9017 do {
9018 tmp = strchr(buf, '\n');
9019 if (tmp) {
9020 *tmp = '\0';
9021 size = tmp - buf + 1;
9022 } else {
9023 size = strlen(buf);
9024 if (done + size < count) {
9025 if (buf != kbuf)
9026 break;
9027 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9028 pr_warn("Line length is too long: Should be less than %d\n",
9029 WRITE_BUFSIZE - 2);
9030 ret = -EINVAL;
9031 goto out;
9032 }
9033 }
9034 done += size;
9035
9036 /* Remove comments */
9037 tmp = strchr(buf, '#');
9038
9039 if (tmp)
9040 *tmp = '\0';
9041
9042 ret = trace_run_command(buf, createfn);
9043 if (ret)
9044 goto out;
9045 buf += size;
9046
9047 } while (done < count);
9048 }
9049 ret = done;
9050
9051 out:
9052 kfree(kbuf);
9053
9054 return ret;
9055 }
9056
9057 __init static int tracer_alloc_buffers(void)
9058 {
9059 int ring_buf_size;
9060 int ret = -ENOMEM;
9061
9062 /*
9063 * Make sure we don't accidently add more trace options
9064 * than we have bits for.
9065 */
9066 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9067
9068 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9069 goto out;
9070
9071 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9072 goto out_free_buffer_mask;
9073
9074 /* Only allocate trace_printk buffers if a trace_printk exists */
9075 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9076 /* Must be called before global_trace.buffer is allocated */
9077 trace_printk_init_buffers();
9078
9079 /* To save memory, keep the ring buffer size to its minimum */
9080 if (ring_buffer_expanded)
9081 ring_buf_size = trace_buf_size;
9082 else
9083 ring_buf_size = 1;
9084
9085 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9086 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9087
9088 raw_spin_lock_init(&global_trace.start_lock);
9089
9090 /*
9091 * The prepare callbacks allocates some memory for the ring buffer. We
9092 * don't free the buffer if the if the CPU goes down. If we were to free
9093 * the buffer, then the user would lose any trace that was in the
9094 * buffer. The memory will be removed once the "instance" is removed.
9095 */
9096 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9097 "trace/RB:preapre", trace_rb_cpu_prepare,
9098 NULL);
9099 if (ret < 0)
9100 goto out_free_cpumask;
9101 /* Used for event triggers */
9102 ret = -ENOMEM;
9103 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9104 if (!temp_buffer)
9105 goto out_rm_hp_state;
9106
9107 if (trace_create_savedcmd() < 0)
9108 goto out_free_temp_buffer;
9109
9110 /* TODO: make the number of buffers hot pluggable with CPUS */
9111 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9112 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9113 WARN_ON(1);
9114 goto out_free_savedcmd;
9115 }
9116
9117 if (global_trace.buffer_disabled)
9118 tracing_off();
9119
9120 if (trace_boot_clock) {
9121 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9122 if (ret < 0)
9123 pr_warn("Trace clock %s not defined, going back to default\n",
9124 trace_boot_clock);
9125 }
9126
9127 /*
9128 * register_tracer() might reference current_trace, so it
9129 * needs to be set before we register anything. This is
9130 * just a bootstrap of current_trace anyway.
9131 */
9132 global_trace.current_trace = &nop_trace;
9133
9134 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9135
9136 ftrace_init_global_array_ops(&global_trace);
9137
9138 init_trace_flags_index(&global_trace);
9139
9140 register_tracer(&nop_trace);
9141
9142 /* Function tracing may start here (via kernel command line) */
9143 init_function_trace();
9144
9145 /* All seems OK, enable tracing */
9146 tracing_disabled = 0;
9147
9148 atomic_notifier_chain_register(&panic_notifier_list,
9149 &trace_panic_notifier);
9150
9151 register_die_notifier(&trace_die_notifier);
9152
9153 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9154
9155 INIT_LIST_HEAD(&global_trace.systems);
9156 INIT_LIST_HEAD(&global_trace.events);
9157 INIT_LIST_HEAD(&global_trace.hist_vars);
9158 INIT_LIST_HEAD(&global_trace.err_log);
9159 list_add(&global_trace.list, &ftrace_trace_arrays);
9160
9161 apply_trace_boot_options();
9162
9163 register_snapshot_cmd();
9164
9165 return 0;
9166
9167 out_free_savedcmd:
9168 free_saved_cmdlines_buffer(savedcmd);
9169 out_free_temp_buffer:
9170 ring_buffer_free(temp_buffer);
9171 out_rm_hp_state:
9172 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9173 out_free_cpumask:
9174 free_cpumask_var(global_trace.tracing_cpumask);
9175 out_free_buffer_mask:
9176 free_cpumask_var(tracing_buffer_mask);
9177 out:
9178 return ret;
9179 }
9180
9181 void __init early_trace_init(void)
9182 {
9183 if (tracepoint_printk) {
9184 tracepoint_print_iter =
9185 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9186 if (WARN_ON(!tracepoint_print_iter))
9187 tracepoint_printk = 0;
9188 else
9189 static_key_enable(&tracepoint_printk_key.key);
9190 }
9191 tracer_alloc_buffers();
9192 }
9193
9194 void __init trace_init(void)
9195 {
9196 trace_event_init();
9197 }
9198
9199 __init static int clear_boot_tracer(void)
9200 {
9201 /*
9202 * The default tracer at boot buffer is an init section.
9203 * This function is called in lateinit. If we did not
9204 * find the boot tracer, then clear it out, to prevent
9205 * later registration from accessing the buffer that is
9206 * about to be freed.
9207 */
9208 if (!default_bootup_tracer)
9209 return 0;
9210
9211 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9212 default_bootup_tracer);
9213 default_bootup_tracer = NULL;
9214
9215 return 0;
9216 }
9217
9218 fs_initcall(tracer_init_tracefs);
9219 late_initcall_sync(clear_boot_tracer);
9220
9221 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9222 __init static int tracing_set_default_clock(void)
9223 {
9224 /* sched_clock_stable() is determined in late_initcall */
9225 if (!trace_boot_clock && !sched_clock_stable()) {
9226 printk(KERN_WARNING
9227 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9228 "If you want to keep using the local clock, then add:\n"
9229 " \"trace_clock=local\"\n"
9230 "on the kernel command line\n");
9231 tracing_set_clock(&global_trace, "global");
9232 }
9233
9234 return 0;
9235 }
9236 late_initcall_sync(tracing_set_default_clock);
9237 #endif