]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - kernel/trace/trace.c
Merge branch 'patchwork' into v4l_for_linus
[mirror_ubuntu-jammy-kernel.git] / kernel / trace / trace.c
1 /*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
13 */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing.
51 */
52 bool ring_buffer_expanded;
53
54 /*
55 * We need to change this state when a selftest is running.
56 * A selftest will lurk into the ring-buffer to count the
57 * entries inserted during the selftest although some concurrent
58 * insertions into the ring-buffer such as trace_printk could occurred
59 * at the same time, giving false positive or negative results.
60 */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64 * If a tracer is running, we do not want to run SELFTEST.
65 */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74 { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80 return 0;
81 }
82
83 /*
84 * To prevent the comm cache from being overwritten when no
85 * tracing is active, only save the comm when a trace event
86 * occurred.
87 */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91 * Kill all tracing for good (never come back).
92 * It is initialized to 1 but will turn to zero if the initialization
93 * of the tracer is successful. But that is the only place that sets
94 * this back to zero.
95 */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly tracing_buffer_mask;
99
100 /*
101 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102 *
103 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104 * is set, then ftrace_dump is called. This will output the contents
105 * of the ftrace buffers to the console. This is very useful for
106 * capturing traces that lead to crashes and outputing it to a
107 * serial console.
108 *
109 * It is default off, but you can enable it with either specifying
110 * "ftrace_dump_on_oops" in the kernel command line, or setting
111 * /proc/sys/kernel/ftrace_dump_on_oops
112 * Set 1 if you want to dump buffers of all CPUs
113 * Set 2 if you want to dump the buffer of the CPU that triggered oops
114 */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124 struct module *mod;
125 unsigned long length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131 /*
132 * "end" is first and points to NULL as it must be different
133 * than "mod" or "enum_string"
134 */
135 union trace_enum_map_item *next;
136 const char *end; /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142 * The trace_enum_maps are saved in an array with two extra elements,
143 * one at the beginning, and one at the end. The beginning item contains
144 * the count of the saved maps (head.length), and the module they
145 * belong to if not built in (head.mod). The ending item contains a
146 * pointer to the next array of saved enum_map items.
147 */
148 union trace_enum_map_item {
149 struct trace_enum_map map;
150 struct trace_enum_map_head head;
151 struct trace_enum_map_tail tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE 100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168 default_bootup_tracer = bootup_tracer_buf;
169 /* We are using ftrace early, expand it */
170 ring_buffer_expanded = true;
171 return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177 if (*str++ != '=' || !*str) {
178 ftrace_dump_on_oops = DUMP_ALL;
179 return 1;
180 }
181
182 if (!strcmp("orig_cpu", str)) {
183 ftrace_dump_on_oops = DUMP_ORIG;
184 return 1;
185 }
186
187 return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194 __disable_trace_on_warning = 1;
195 return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201 allocate_snapshot = true;
202 /* We also need the main ring buffer expanded */
203 ring_buffer_expanded = true;
204 return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224 trace_boot_clock = trace_boot_clock_buf;
225 return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232 tracepoint_printk = 1;
233 return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239 nsec += 500;
240 do_div(nsec, 1000);
241 return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS \
246 (FUNCTION_DEFAULT_FLAGS | \
247 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
248 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
249 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
250 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
254 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258 TRACE_ITER_EVENT_FORK
259
260 /*
261 * The global_trace is the descriptor that holds the tracing
262 * buffers for the live tracing. For each CPU, it contains
263 * a link list of pages that will store trace entries. The
264 * page descriptor of the pages in the memory is used to hold
265 * the link list by linking the lru item in the page descriptor
266 * to each of the pages in the buffer per CPU.
267 *
268 * For each active CPU there is a data field that holds the
269 * pages for the buffer for that CPU. Each CPU has the same number
270 * of pages allocated for its buffer.
271 */
272 static struct trace_array global_trace = {
273 .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280 struct trace_array *tr;
281 int ret = -ENODEV;
282
283 mutex_lock(&trace_types_lock);
284 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 if (tr == this_tr) {
286 tr->ref++;
287 ret = 0;
288 break;
289 }
290 }
291 mutex_unlock(&trace_types_lock);
292
293 return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298 WARN_ON(!this_tr->ref);
299 this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304 mutex_lock(&trace_types_lock);
305 __trace_array_put(this_tr);
306 mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 struct ring_buffer *buffer,
311 struct ring_buffer_event *event)
312 {
313 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314 !filter_match_preds(call->filter, rec)) {
315 __trace_event_discard_commit(buffer, event);
316 return 1;
317 }
318
319 return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324 vfree(pid_list->pids);
325 kfree(pid_list);
326 }
327
328 /**
329 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330 * @filtered_pids: The list of pids to check
331 * @search_pid: The PID to find in @filtered_pids
332 *
333 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334 */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338 /*
339 * If pid_max changed after filtered_pids was created, we
340 * by default ignore all pids greater than the previous pid_max.
341 */
342 if (search_pid >= filtered_pids->pid_max)
343 return false;
344
345 return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349 * trace_ignore_this_task - should a task be ignored for tracing
350 * @filtered_pids: The list of pids to check
351 * @task: The task that should be ignored if not filtered
352 *
353 * Checks if @task should be traced or not from @filtered_pids.
354 * Returns true if @task should *NOT* be traced.
355 * Returns false if @task should be traced.
356 */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360 /*
361 * Return false, because if filtered_pids does not exist,
362 * all pids are good to trace.
363 */
364 if (!filtered_pids)
365 return false;
366
367 return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372 * @pid_list: The list to modify
373 * @self: The current task for fork or NULL for exit
374 * @task: The task to add or remove
375 *
376 * If adding a task, if @self is defined, the task is only added if @self
377 * is also included in @pid_list. This happens on fork and tasks should
378 * only be added when the parent is listed. If @self is NULL, then the
379 * @task pid will be removed from the list, which would happen on exit
380 * of a task.
381 */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383 struct task_struct *self,
384 struct task_struct *task)
385 {
386 if (!pid_list)
387 return;
388
389 /* For forks, we only add if the forking task is listed */
390 if (self) {
391 if (!trace_find_filtered_pid(pid_list, self->pid))
392 return;
393 }
394
395 /* Sorry, but we don't support pid_max changing after setting */
396 if (task->pid >= pid_list->pid_max)
397 return;
398
399 /* "self" is set for forks, and NULL for exits */
400 if (self)
401 set_bit(task->pid, pid_list->pids);
402 else
403 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408 * @pid_list: The pid list to show
409 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410 * @pos: The position of the file
411 *
412 * This is used by the seq_file "next" operation to iterate the pids
413 * listed in a trace_pid_list structure.
414 *
415 * Returns the pid+1 as we want to display pid of zero, but NULL would
416 * stop the iteration.
417 */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420 unsigned long pid = (unsigned long)v;
421
422 (*pos)++;
423
424 /* pid already is +1 of the actual prevous bit */
425 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427 /* Return pid + 1 to allow zero to be represented */
428 if (pid < pid_list->pid_max)
429 return (void *)(pid + 1);
430
431 return NULL;
432 }
433
434 /**
435 * trace_pid_start - Used for seq_file to start reading pid lists
436 * @pid_list: The pid list to show
437 * @pos: The position of the file
438 *
439 * This is used by seq_file "start" operation to start the iteration
440 * of listing pids.
441 *
442 * Returns the pid+1 as we want to display pid of zero, but NULL would
443 * stop the iteration.
444 */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447 unsigned long pid;
448 loff_t l = 0;
449
450 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451 if (pid >= pid_list->pid_max)
452 return NULL;
453
454 /* Return pid + 1 so that zero can be the exit value */
455 for (pid++; pid && l < *pos;
456 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457 ;
458 return (void *)pid;
459 }
460
461 /**
462 * trace_pid_show - show the current pid in seq_file processing
463 * @m: The seq_file structure to write into
464 * @v: A void pointer of the pid (+1) value to display
465 *
466 * Can be directly used by seq_file operations to display the current
467 * pid value.
468 */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471 unsigned long pid = (unsigned long)v - 1;
472
473 seq_printf(m, "%lu\n", pid);
474 return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE 127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481 struct trace_pid_list **new_pid_list,
482 const char __user *ubuf, size_t cnt)
483 {
484 struct trace_pid_list *pid_list;
485 struct trace_parser parser;
486 unsigned long val;
487 int nr_pids = 0;
488 ssize_t read = 0;
489 ssize_t ret = 0;
490 loff_t pos;
491 pid_t pid;
492
493 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494 return -ENOMEM;
495
496 /*
497 * Always recreate a new array. The write is an all or nothing
498 * operation. Always create a new array when adding new pids by
499 * the user. If the operation fails, then the current list is
500 * not modified.
501 */
502 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503 if (!pid_list)
504 return -ENOMEM;
505
506 pid_list->pid_max = READ_ONCE(pid_max);
507
508 /* Only truncating will shrink pid_max */
509 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510 pid_list->pid_max = filtered_pids->pid_max;
511
512 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513 if (!pid_list->pids) {
514 kfree(pid_list);
515 return -ENOMEM;
516 }
517
518 if (filtered_pids) {
519 /* copy the current bits to the new max */
520 for_each_set_bit(pid, filtered_pids->pids,
521 filtered_pids->pid_max) {
522 set_bit(pid, pid_list->pids);
523 nr_pids++;
524 }
525 }
526
527 while (cnt > 0) {
528
529 pos = 0;
530
531 ret = trace_get_user(&parser, ubuf, cnt, &pos);
532 if (ret < 0 || !trace_parser_loaded(&parser))
533 break;
534
535 read += ret;
536 ubuf += ret;
537 cnt -= ret;
538
539 parser.buffer[parser.idx] = 0;
540
541 ret = -EINVAL;
542 if (kstrtoul(parser.buffer, 0, &val))
543 break;
544 if (val >= pid_list->pid_max)
545 break;
546
547 pid = (pid_t)val;
548
549 set_bit(pid, pid_list->pids);
550 nr_pids++;
551
552 trace_parser_clear(&parser);
553 ret = 0;
554 }
555 trace_parser_put(&parser);
556
557 if (ret < 0) {
558 trace_free_pid_list(pid_list);
559 return ret;
560 }
561
562 if (!nr_pids) {
563 /* Cleared the list of pids */
564 trace_free_pid_list(pid_list);
565 read = ret;
566 pid_list = NULL;
567 }
568
569 *new_pid_list = pid_list;
570
571 return read;
572 }
573
574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576 u64 ts;
577
578 /* Early boot up does not have a buffer yet */
579 if (!buf->buffer)
580 return trace_clock_local();
581
582 ts = ring_buffer_time_stamp(buf->buffer, cpu);
583 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584
585 return ts;
586 }
587
588 cycle_t ftrace_now(int cpu)
589 {
590 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592
593 /**
594 * tracing_is_enabled - Show if global_trace has been disabled
595 *
596 * Shows if the global trace has been enabled or not. It uses the
597 * mirror flag "buffer_disabled" to be used in fast paths such as for
598 * the irqsoff tracer. But it may be inaccurate due to races. If you
599 * need to know the accurate state, use tracing_is_on() which is a little
600 * slower, but accurate.
601 */
602 int tracing_is_enabled(void)
603 {
604 /*
605 * For quick access (irqsoff uses this in fast path), just
606 * return the mirror variable of the state of the ring buffer.
607 * It's a little racy, but we don't really care.
608 */
609 smp_rmb();
610 return !global_trace.buffer_disabled;
611 }
612
613 /*
614 * trace_buf_size is the size in bytes that is allocated
615 * for a buffer. Note, the number of bytes is always rounded
616 * to page size.
617 *
618 * This number is purposely set to a low number of 16384.
619 * If the dump on oops happens, it will be much appreciated
620 * to not have to wait for all that output. Anyway this can be
621 * boot time and run time configurable.
622 */
623 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
624
625 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer *trace_types __read_mostly;
629
630 /*
631 * trace_types_lock is used to protect the trace_types list.
632 */
633 DEFINE_MUTEX(trace_types_lock);
634
635 /*
636 * serialize the access of the ring buffer
637 *
638 * ring buffer serializes readers, but it is low level protection.
639 * The validity of the events (which returns by ring_buffer_peek() ..etc)
640 * are not protected by ring buffer.
641 *
642 * The content of events may become garbage if we allow other process consumes
643 * these events concurrently:
644 * A) the page of the consumed events may become a normal page
645 * (not reader page) in ring buffer, and this page will be rewrited
646 * by events producer.
647 * B) The page of the consumed events may become a page for splice_read,
648 * and this page will be returned to system.
649 *
650 * These primitives allow multi process access to different cpu ring buffer
651 * concurrently.
652 *
653 * These primitives don't distinguish read-only and read-consume access.
654 * Multi read-only access are also serialized.
655 */
656
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660
661 static inline void trace_access_lock(int cpu)
662 {
663 if (cpu == RING_BUFFER_ALL_CPUS) {
664 /* gain it for accessing the whole ring buffer. */
665 down_write(&all_cpu_access_lock);
666 } else {
667 /* gain it for accessing a cpu ring buffer. */
668
669 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670 down_read(&all_cpu_access_lock);
671
672 /* Secondly block other access to this @cpu ring buffer. */
673 mutex_lock(&per_cpu(cpu_access_lock, cpu));
674 }
675 }
676
677 static inline void trace_access_unlock(int cpu)
678 {
679 if (cpu == RING_BUFFER_ALL_CPUS) {
680 up_write(&all_cpu_access_lock);
681 } else {
682 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683 up_read(&all_cpu_access_lock);
684 }
685 }
686
687 static inline void trace_access_lock_init(void)
688 {
689 int cpu;
690
691 for_each_possible_cpu(cpu)
692 mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694
695 #else
696
697 static DEFINE_MUTEX(access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701 (void)cpu;
702 mutex_lock(&access_lock);
703 }
704
705 static inline void trace_access_unlock(int cpu)
706 {
707 (void)cpu;
708 mutex_unlock(&access_lock);
709 }
710
711 static inline void trace_access_lock_init(void)
712 {
713 }
714
715 #endif
716
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719 unsigned long flags,
720 int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722 struct ring_buffer *buffer,
723 unsigned long flags,
724 int skip, int pc, struct pt_regs *regs);
725
726 #else
727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728 unsigned long flags,
729 int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 static inline void ftrace_trace_stack(struct trace_array *tr,
733 struct ring_buffer *buffer,
734 unsigned long flags,
735 int skip, int pc, struct pt_regs *regs)
736 {
737 }
738
739 #endif
740
741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743 if (tr->trace_buffer.buffer)
744 ring_buffer_record_on(tr->trace_buffer.buffer);
745 /*
746 * This flag is looked at when buffers haven't been allocated
747 * yet, or by some tracers (like irqsoff), that just want to
748 * know if the ring buffer has been disabled, but it can handle
749 * races of where it gets disabled but we still do a record.
750 * As the check is in the fast path of the tracers, it is more
751 * important to be fast than accurate.
752 */
753 tr->buffer_disabled = 0;
754 /* Make the flag seen by readers */
755 smp_wmb();
756 }
757
758 /**
759 * tracing_on - enable tracing buffers
760 *
761 * This function enables tracing buffers that may have been
762 * disabled with tracing_off.
763 */
764 void tracing_on(void)
765 {
766 tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769
770 /**
771 * __trace_puts - write a constant string into the trace buffer.
772 * @ip: The address of the caller
773 * @str: The constant string to write
774 * @size: The size of the string.
775 */
776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778 struct ring_buffer_event *event;
779 struct ring_buffer *buffer;
780 struct print_entry *entry;
781 unsigned long irq_flags;
782 int alloc;
783 int pc;
784
785 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786 return 0;
787
788 pc = preempt_count();
789
790 if (unlikely(tracing_selftest_running || tracing_disabled))
791 return 0;
792
793 alloc = sizeof(*entry) + size + 2; /* possible \n added */
794
795 local_save_flags(irq_flags);
796 buffer = global_trace.trace_buffer.buffer;
797 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
798 irq_flags, pc);
799 if (!event)
800 return 0;
801
802 entry = ring_buffer_event_data(event);
803 entry->ip = ip;
804
805 memcpy(&entry->buf, str, size);
806
807 /* Add a newline if necessary */
808 if (entry->buf[size - 1] != '\n') {
809 entry->buf[size] = '\n';
810 entry->buf[size + 1] = '\0';
811 } else
812 entry->buf[size] = '\0';
813
814 __buffer_unlock_commit(buffer, event);
815 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816
817 return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820
821 /**
822 * __trace_bputs - write the pointer to a constant string into trace buffer
823 * @ip: The address of the caller
824 * @str: The constant string to write to the buffer to
825 */
826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828 struct ring_buffer_event *event;
829 struct ring_buffer *buffer;
830 struct bputs_entry *entry;
831 unsigned long irq_flags;
832 int size = sizeof(struct bputs_entry);
833 int pc;
834
835 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836 return 0;
837
838 pc = preempt_count();
839
840 if (unlikely(tracing_selftest_running || tracing_disabled))
841 return 0;
842
843 local_save_flags(irq_flags);
844 buffer = global_trace.trace_buffer.buffer;
845 event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846 irq_flags, pc);
847 if (!event)
848 return 0;
849
850 entry = ring_buffer_event_data(event);
851 entry->ip = ip;
852 entry->str = str;
853
854 __buffer_unlock_commit(buffer, event);
855 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856
857 return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863 * trace_snapshot - take a snapshot of the current buffer.
864 *
865 * This causes a swap between the snapshot buffer and the current live
866 * tracing buffer. You can use this to take snapshots of the live
867 * trace when some condition is triggered, but continue to trace.
868 *
869 * Note, make sure to allocate the snapshot with either
870 * a tracing_snapshot_alloc(), or by doing it manually
871 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872 *
873 * If the snapshot buffer is not allocated, it will stop tracing.
874 * Basically making a permanent snapshot.
875 */
876 void tracing_snapshot(void)
877 {
878 struct trace_array *tr = &global_trace;
879 struct tracer *tracer = tr->current_trace;
880 unsigned long flags;
881
882 if (in_nmi()) {
883 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884 internal_trace_puts("*** snapshot is being ignored ***\n");
885 return;
886 }
887
888 if (!tr->allocated_snapshot) {
889 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890 internal_trace_puts("*** stopping trace here! ***\n");
891 tracing_off();
892 return;
893 }
894
895 /* Note, snapshot can not be used when the tracer uses it */
896 if (tracer->use_max_tr) {
897 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899 return;
900 }
901
902 local_irq_save(flags);
903 update_max_tr(tr, current, smp_processor_id());
904 local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909 struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911
912 static int alloc_snapshot(struct trace_array *tr)
913 {
914 int ret;
915
916 if (!tr->allocated_snapshot) {
917
918 /* allocate spare buffer */
919 ret = resize_buffer_duplicate_size(&tr->max_buffer,
920 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921 if (ret < 0)
922 return ret;
923
924 tr->allocated_snapshot = true;
925 }
926
927 return 0;
928 }
929
930 static void free_snapshot(struct trace_array *tr)
931 {
932 /*
933 * We don't free the ring buffer. instead, resize it because
934 * The max_tr ring buffer has some state (e.g. ring->clock) and
935 * we want preserve it.
936 */
937 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938 set_buffer_entries(&tr->max_buffer, 1);
939 tracing_reset_online_cpus(&tr->max_buffer);
940 tr->allocated_snapshot = false;
941 }
942
943 /**
944 * tracing_alloc_snapshot - allocate snapshot buffer.
945 *
946 * This only allocates the snapshot buffer if it isn't already
947 * allocated - it doesn't also take a snapshot.
948 *
949 * This is meant to be used in cases where the snapshot buffer needs
950 * to be set up for events that can't sleep but need to be able to
951 * trigger a snapshot.
952 */
953 int tracing_alloc_snapshot(void)
954 {
955 struct trace_array *tr = &global_trace;
956 int ret;
957
958 ret = alloc_snapshot(tr);
959 WARN_ON(ret < 0);
960
961 return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964
965 /**
966 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967 *
968 * This is similar to trace_snapshot(), but it will allocate the
969 * snapshot buffer if it isn't already allocated. Use this only
970 * where it is safe to sleep, as the allocation may sleep.
971 *
972 * This causes a swap between the snapshot buffer and the current live
973 * tracing buffer. You can use this to take snapshots of the live
974 * trace when some condition is triggered, but continue to trace.
975 */
976 void tracing_snapshot_alloc(void)
977 {
978 int ret;
979
980 ret = tracing_alloc_snapshot();
981 if (ret < 0)
982 return;
983
984 tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
988 void tracing_snapshot(void)
989 {
990 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
993 int tracing_alloc_snapshot(void)
994 {
995 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996 return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 void tracing_snapshot_alloc(void)
1000 {
1001 /* Give warning */
1002 tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006
1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009 if (tr->trace_buffer.buffer)
1010 ring_buffer_record_off(tr->trace_buffer.buffer);
1011 /*
1012 * This flag is looked at when buffers haven't been allocated
1013 * yet, or by some tracers (like irqsoff), that just want to
1014 * know if the ring buffer has been disabled, but it can handle
1015 * races of where it gets disabled but we still do a record.
1016 * As the check is in the fast path of the tracers, it is more
1017 * important to be fast than accurate.
1018 */
1019 tr->buffer_disabled = 1;
1020 /* Make the flag seen by readers */
1021 smp_wmb();
1022 }
1023
1024 /**
1025 * tracing_off - turn off tracing buffers
1026 *
1027 * This function stops the tracing buffers from recording data.
1028 * It does not disable any overhead the tracers themselves may
1029 * be causing. This function simply causes all recording to
1030 * the ring buffers to fail.
1031 */
1032 void tracing_off(void)
1033 {
1034 tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037
1038 void disable_trace_on_warning(void)
1039 {
1040 if (__disable_trace_on_warning)
1041 tracing_off();
1042 }
1043
1044 /**
1045 * tracer_tracing_is_on - show real state of ring buffer enabled
1046 * @tr : the trace array to know if ring buffer is enabled
1047 *
1048 * Shows real state of the ring buffer if it is enabled or not.
1049 */
1050 int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052 if (tr->trace_buffer.buffer)
1053 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054 return !tr->buffer_disabled;
1055 }
1056
1057 /**
1058 * tracing_is_on - show state of ring buffers enabled
1059 */
1060 int tracing_is_on(void)
1061 {
1062 return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065
1066 static int __init set_buf_size(char *str)
1067 {
1068 unsigned long buf_size;
1069
1070 if (!str)
1071 return 0;
1072 buf_size = memparse(str, &str);
1073 /* nr_entries can not be zero */
1074 if (buf_size == 0)
1075 return 0;
1076 trace_buf_size = buf_size;
1077 return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080
1081 static int __init set_tracing_thresh(char *str)
1082 {
1083 unsigned long threshold;
1084 int ret;
1085
1086 if (!str)
1087 return 0;
1088 ret = kstrtoul(str, 0, &threshold);
1089 if (ret < 0)
1090 return 0;
1091 tracing_thresh = threshold * 1000;
1092 return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095
1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098 return nsecs / 1000;
1099 }
1100
1101 /*
1102 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105 * of strings in the order that the enums were defined.
1106 */
1107 #undef C
1108 #define C(a, b) b
1109
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112 TRACE_FLAGS
1113 NULL
1114 };
1115
1116 static struct {
1117 u64 (*func)(void);
1118 const char *name;
1119 int in_ns; /* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121 { trace_clock_local, "local", 1 },
1122 { trace_clock_global, "global", 1 },
1123 { trace_clock_counter, "counter", 0 },
1124 { trace_clock_jiffies, "uptime", 0 },
1125 { trace_clock, "perf", 1 },
1126 { ktime_get_mono_fast_ns, "mono", 1 },
1127 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1128 { ktime_get_boot_fast_ns, "boot", 1 },
1129 ARCH_TRACE_CLOCKS
1130 };
1131
1132 /*
1133 * trace_parser_get_init - gets the buffer for trace parser
1134 */
1135 int trace_parser_get_init(struct trace_parser *parser, int size)
1136 {
1137 memset(parser, 0, sizeof(*parser));
1138
1139 parser->buffer = kmalloc(size, GFP_KERNEL);
1140 if (!parser->buffer)
1141 return 1;
1142
1143 parser->size = size;
1144 return 0;
1145 }
1146
1147 /*
1148 * trace_parser_put - frees the buffer for trace parser
1149 */
1150 void trace_parser_put(struct trace_parser *parser)
1151 {
1152 kfree(parser->buffer);
1153 }
1154
1155 /*
1156 * trace_get_user - reads the user input string separated by space
1157 * (matched by isspace(ch))
1158 *
1159 * For each string found the 'struct trace_parser' is updated,
1160 * and the function returns.
1161 *
1162 * Returns number of bytes read.
1163 *
1164 * See kernel/trace/trace.h for 'struct trace_parser' details.
1165 */
1166 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1167 size_t cnt, loff_t *ppos)
1168 {
1169 char ch;
1170 size_t read = 0;
1171 ssize_t ret;
1172
1173 if (!*ppos)
1174 trace_parser_clear(parser);
1175
1176 ret = get_user(ch, ubuf++);
1177 if (ret)
1178 goto out;
1179
1180 read++;
1181 cnt--;
1182
1183 /*
1184 * The parser is not finished with the last write,
1185 * continue reading the user input without skipping spaces.
1186 */
1187 if (!parser->cont) {
1188 /* skip white space */
1189 while (cnt && isspace(ch)) {
1190 ret = get_user(ch, ubuf++);
1191 if (ret)
1192 goto out;
1193 read++;
1194 cnt--;
1195 }
1196
1197 /* only spaces were written */
1198 if (isspace(ch)) {
1199 *ppos += read;
1200 ret = read;
1201 goto out;
1202 }
1203
1204 parser->idx = 0;
1205 }
1206
1207 /* read the non-space input */
1208 while (cnt && !isspace(ch)) {
1209 if (parser->idx < parser->size - 1)
1210 parser->buffer[parser->idx++] = ch;
1211 else {
1212 ret = -EINVAL;
1213 goto out;
1214 }
1215 ret = get_user(ch, ubuf++);
1216 if (ret)
1217 goto out;
1218 read++;
1219 cnt--;
1220 }
1221
1222 /* We either got finished input or we have to wait for another call. */
1223 if (isspace(ch)) {
1224 parser->buffer[parser->idx] = 0;
1225 parser->cont = false;
1226 } else if (parser->idx < parser->size - 1) {
1227 parser->cont = true;
1228 parser->buffer[parser->idx++] = ch;
1229 } else {
1230 ret = -EINVAL;
1231 goto out;
1232 }
1233
1234 *ppos += read;
1235 ret = read;
1236
1237 out:
1238 return ret;
1239 }
1240
1241 /* TODO add a seq_buf_to_buffer() */
1242 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1243 {
1244 int len;
1245
1246 if (trace_seq_used(s) <= s->seq.readpos)
1247 return -EBUSY;
1248
1249 len = trace_seq_used(s) - s->seq.readpos;
1250 if (cnt > len)
1251 cnt = len;
1252 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1253
1254 s->seq.readpos += cnt;
1255 return cnt;
1256 }
1257
1258 unsigned long __read_mostly tracing_thresh;
1259
1260 #ifdef CONFIG_TRACER_MAX_TRACE
1261 /*
1262 * Copy the new maximum trace into the separate maximum-trace
1263 * structure. (this way the maximum trace is permanently saved,
1264 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1265 */
1266 static void
1267 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1268 {
1269 struct trace_buffer *trace_buf = &tr->trace_buffer;
1270 struct trace_buffer *max_buf = &tr->max_buffer;
1271 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1272 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1273
1274 max_buf->cpu = cpu;
1275 max_buf->time_start = data->preempt_timestamp;
1276
1277 max_data->saved_latency = tr->max_latency;
1278 max_data->critical_start = data->critical_start;
1279 max_data->critical_end = data->critical_end;
1280
1281 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1282 max_data->pid = tsk->pid;
1283 /*
1284 * If tsk == current, then use current_uid(), as that does not use
1285 * RCU. The irq tracer can be called out of RCU scope.
1286 */
1287 if (tsk == current)
1288 max_data->uid = current_uid();
1289 else
1290 max_data->uid = task_uid(tsk);
1291
1292 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1293 max_data->policy = tsk->policy;
1294 max_data->rt_priority = tsk->rt_priority;
1295
1296 /* record this tasks comm */
1297 tracing_record_cmdline(tsk);
1298 }
1299
1300 /**
1301 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1302 * @tr: tracer
1303 * @tsk: the task with the latency
1304 * @cpu: The cpu that initiated the trace.
1305 *
1306 * Flip the buffers between the @tr and the max_tr and record information
1307 * about which task was the cause of this latency.
1308 */
1309 void
1310 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1311 {
1312 struct ring_buffer *buf;
1313
1314 if (tr->stop_count)
1315 return;
1316
1317 WARN_ON_ONCE(!irqs_disabled());
1318
1319 if (!tr->allocated_snapshot) {
1320 /* Only the nop tracer should hit this when disabling */
1321 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1322 return;
1323 }
1324
1325 arch_spin_lock(&tr->max_lock);
1326
1327 buf = tr->trace_buffer.buffer;
1328 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1329 tr->max_buffer.buffer = buf;
1330
1331 __update_max_tr(tr, tsk, cpu);
1332 arch_spin_unlock(&tr->max_lock);
1333 }
1334
1335 /**
1336 * update_max_tr_single - only copy one trace over, and reset the rest
1337 * @tr - tracer
1338 * @tsk - task with the latency
1339 * @cpu - the cpu of the buffer to copy.
1340 *
1341 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1342 */
1343 void
1344 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1345 {
1346 int ret;
1347
1348 if (tr->stop_count)
1349 return;
1350
1351 WARN_ON_ONCE(!irqs_disabled());
1352 if (!tr->allocated_snapshot) {
1353 /* Only the nop tracer should hit this when disabling */
1354 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1355 return;
1356 }
1357
1358 arch_spin_lock(&tr->max_lock);
1359
1360 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1361
1362 if (ret == -EBUSY) {
1363 /*
1364 * We failed to swap the buffer due to a commit taking
1365 * place on this CPU. We fail to record, but we reset
1366 * the max trace buffer (no one writes directly to it)
1367 * and flag that it failed.
1368 */
1369 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1370 "Failed to swap buffers due to commit in progress\n");
1371 }
1372
1373 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1374
1375 __update_max_tr(tr, tsk, cpu);
1376 arch_spin_unlock(&tr->max_lock);
1377 }
1378 #endif /* CONFIG_TRACER_MAX_TRACE */
1379
1380 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1381 {
1382 /* Iterators are static, they should be filled or empty */
1383 if (trace_buffer_iter(iter, iter->cpu_file))
1384 return 0;
1385
1386 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1387 full);
1388 }
1389
1390 #ifdef CONFIG_FTRACE_STARTUP_TEST
1391 static int run_tracer_selftest(struct tracer *type)
1392 {
1393 struct trace_array *tr = &global_trace;
1394 struct tracer *saved_tracer = tr->current_trace;
1395 int ret;
1396
1397 if (!type->selftest || tracing_selftest_disabled)
1398 return 0;
1399
1400 /*
1401 * Run a selftest on this tracer.
1402 * Here we reset the trace buffer, and set the current
1403 * tracer to be this tracer. The tracer can then run some
1404 * internal tracing to verify that everything is in order.
1405 * If we fail, we do not register this tracer.
1406 */
1407 tracing_reset_online_cpus(&tr->trace_buffer);
1408
1409 tr->current_trace = type;
1410
1411 #ifdef CONFIG_TRACER_MAX_TRACE
1412 if (type->use_max_tr) {
1413 /* If we expanded the buffers, make sure the max is expanded too */
1414 if (ring_buffer_expanded)
1415 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1416 RING_BUFFER_ALL_CPUS);
1417 tr->allocated_snapshot = true;
1418 }
1419 #endif
1420
1421 /* the test is responsible for initializing and enabling */
1422 pr_info("Testing tracer %s: ", type->name);
1423 ret = type->selftest(type, tr);
1424 /* the test is responsible for resetting too */
1425 tr->current_trace = saved_tracer;
1426 if (ret) {
1427 printk(KERN_CONT "FAILED!\n");
1428 /* Add the warning after printing 'FAILED' */
1429 WARN_ON(1);
1430 return -1;
1431 }
1432 /* Only reset on passing, to avoid touching corrupted buffers */
1433 tracing_reset_online_cpus(&tr->trace_buffer);
1434
1435 #ifdef CONFIG_TRACER_MAX_TRACE
1436 if (type->use_max_tr) {
1437 tr->allocated_snapshot = false;
1438
1439 /* Shrink the max buffer again */
1440 if (ring_buffer_expanded)
1441 ring_buffer_resize(tr->max_buffer.buffer, 1,
1442 RING_BUFFER_ALL_CPUS);
1443 }
1444 #endif
1445
1446 printk(KERN_CONT "PASSED\n");
1447 return 0;
1448 }
1449 #else
1450 static inline int run_tracer_selftest(struct tracer *type)
1451 {
1452 return 0;
1453 }
1454 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1455
1456 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1457
1458 static void __init apply_trace_boot_options(void);
1459
1460 /**
1461 * register_tracer - register a tracer with the ftrace system.
1462 * @type - the plugin for the tracer
1463 *
1464 * Register a new plugin tracer.
1465 */
1466 int __init register_tracer(struct tracer *type)
1467 {
1468 struct tracer *t;
1469 int ret = 0;
1470
1471 if (!type->name) {
1472 pr_info("Tracer must have a name\n");
1473 return -1;
1474 }
1475
1476 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1477 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1478 return -1;
1479 }
1480
1481 mutex_lock(&trace_types_lock);
1482
1483 tracing_selftest_running = true;
1484
1485 for (t = trace_types; t; t = t->next) {
1486 if (strcmp(type->name, t->name) == 0) {
1487 /* already found */
1488 pr_info("Tracer %s already registered\n",
1489 type->name);
1490 ret = -1;
1491 goto out;
1492 }
1493 }
1494
1495 if (!type->set_flag)
1496 type->set_flag = &dummy_set_flag;
1497 if (!type->flags) {
1498 /*allocate a dummy tracer_flags*/
1499 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1500 if (!type->flags) {
1501 ret = -ENOMEM;
1502 goto out;
1503 }
1504 type->flags->val = 0;
1505 type->flags->opts = dummy_tracer_opt;
1506 } else
1507 if (!type->flags->opts)
1508 type->flags->opts = dummy_tracer_opt;
1509
1510 /* store the tracer for __set_tracer_option */
1511 type->flags->trace = type;
1512
1513 ret = run_tracer_selftest(type);
1514 if (ret < 0)
1515 goto out;
1516
1517 type->next = trace_types;
1518 trace_types = type;
1519 add_tracer_options(&global_trace, type);
1520
1521 out:
1522 tracing_selftest_running = false;
1523 mutex_unlock(&trace_types_lock);
1524
1525 if (ret || !default_bootup_tracer)
1526 goto out_unlock;
1527
1528 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1529 goto out_unlock;
1530
1531 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1532 /* Do we want this tracer to start on bootup? */
1533 tracing_set_tracer(&global_trace, type->name);
1534 default_bootup_tracer = NULL;
1535
1536 apply_trace_boot_options();
1537
1538 /* disable other selftests, since this will break it. */
1539 tracing_selftest_disabled = true;
1540 #ifdef CONFIG_FTRACE_STARTUP_TEST
1541 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1542 type->name);
1543 #endif
1544
1545 out_unlock:
1546 return ret;
1547 }
1548
1549 void tracing_reset(struct trace_buffer *buf, int cpu)
1550 {
1551 struct ring_buffer *buffer = buf->buffer;
1552
1553 if (!buffer)
1554 return;
1555
1556 ring_buffer_record_disable(buffer);
1557
1558 /* Make sure all commits have finished */
1559 synchronize_sched();
1560 ring_buffer_reset_cpu(buffer, cpu);
1561
1562 ring_buffer_record_enable(buffer);
1563 }
1564
1565 void tracing_reset_online_cpus(struct trace_buffer *buf)
1566 {
1567 struct ring_buffer *buffer = buf->buffer;
1568 int cpu;
1569
1570 if (!buffer)
1571 return;
1572
1573 ring_buffer_record_disable(buffer);
1574
1575 /* Make sure all commits have finished */
1576 synchronize_sched();
1577
1578 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1579
1580 for_each_online_cpu(cpu)
1581 ring_buffer_reset_cpu(buffer, cpu);
1582
1583 ring_buffer_record_enable(buffer);
1584 }
1585
1586 /* Must have trace_types_lock held */
1587 void tracing_reset_all_online_cpus(void)
1588 {
1589 struct trace_array *tr;
1590
1591 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1592 tracing_reset_online_cpus(&tr->trace_buffer);
1593 #ifdef CONFIG_TRACER_MAX_TRACE
1594 tracing_reset_online_cpus(&tr->max_buffer);
1595 #endif
1596 }
1597 }
1598
1599 #define SAVED_CMDLINES_DEFAULT 128
1600 #define NO_CMDLINE_MAP UINT_MAX
1601 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1602 struct saved_cmdlines_buffer {
1603 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1604 unsigned *map_cmdline_to_pid;
1605 unsigned cmdline_num;
1606 int cmdline_idx;
1607 char *saved_cmdlines;
1608 };
1609 static struct saved_cmdlines_buffer *savedcmd;
1610
1611 /* temporary disable recording */
1612 static atomic_t trace_record_cmdline_disabled __read_mostly;
1613
1614 static inline char *get_saved_cmdlines(int idx)
1615 {
1616 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1617 }
1618
1619 static inline void set_cmdline(int idx, const char *cmdline)
1620 {
1621 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1622 }
1623
1624 static int allocate_cmdlines_buffer(unsigned int val,
1625 struct saved_cmdlines_buffer *s)
1626 {
1627 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1628 GFP_KERNEL);
1629 if (!s->map_cmdline_to_pid)
1630 return -ENOMEM;
1631
1632 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1633 if (!s->saved_cmdlines) {
1634 kfree(s->map_cmdline_to_pid);
1635 return -ENOMEM;
1636 }
1637
1638 s->cmdline_idx = 0;
1639 s->cmdline_num = val;
1640 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1641 sizeof(s->map_pid_to_cmdline));
1642 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1643 val * sizeof(*s->map_cmdline_to_pid));
1644
1645 return 0;
1646 }
1647
1648 static int trace_create_savedcmd(void)
1649 {
1650 int ret;
1651
1652 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1653 if (!savedcmd)
1654 return -ENOMEM;
1655
1656 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1657 if (ret < 0) {
1658 kfree(savedcmd);
1659 savedcmd = NULL;
1660 return -ENOMEM;
1661 }
1662
1663 return 0;
1664 }
1665
1666 int is_tracing_stopped(void)
1667 {
1668 return global_trace.stop_count;
1669 }
1670
1671 /**
1672 * tracing_start - quick start of the tracer
1673 *
1674 * If tracing is enabled but was stopped by tracing_stop,
1675 * this will start the tracer back up.
1676 */
1677 void tracing_start(void)
1678 {
1679 struct ring_buffer *buffer;
1680 unsigned long flags;
1681
1682 if (tracing_disabled)
1683 return;
1684
1685 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1686 if (--global_trace.stop_count) {
1687 if (global_trace.stop_count < 0) {
1688 /* Someone screwed up their debugging */
1689 WARN_ON_ONCE(1);
1690 global_trace.stop_count = 0;
1691 }
1692 goto out;
1693 }
1694
1695 /* Prevent the buffers from switching */
1696 arch_spin_lock(&global_trace.max_lock);
1697
1698 buffer = global_trace.trace_buffer.buffer;
1699 if (buffer)
1700 ring_buffer_record_enable(buffer);
1701
1702 #ifdef CONFIG_TRACER_MAX_TRACE
1703 buffer = global_trace.max_buffer.buffer;
1704 if (buffer)
1705 ring_buffer_record_enable(buffer);
1706 #endif
1707
1708 arch_spin_unlock(&global_trace.max_lock);
1709
1710 out:
1711 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1712 }
1713
1714 static void tracing_start_tr(struct trace_array *tr)
1715 {
1716 struct ring_buffer *buffer;
1717 unsigned long flags;
1718
1719 if (tracing_disabled)
1720 return;
1721
1722 /* If global, we need to also start the max tracer */
1723 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1724 return tracing_start();
1725
1726 raw_spin_lock_irqsave(&tr->start_lock, flags);
1727
1728 if (--tr->stop_count) {
1729 if (tr->stop_count < 0) {
1730 /* Someone screwed up their debugging */
1731 WARN_ON_ONCE(1);
1732 tr->stop_count = 0;
1733 }
1734 goto out;
1735 }
1736
1737 buffer = tr->trace_buffer.buffer;
1738 if (buffer)
1739 ring_buffer_record_enable(buffer);
1740
1741 out:
1742 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1743 }
1744
1745 /**
1746 * tracing_stop - quick stop of the tracer
1747 *
1748 * Light weight way to stop tracing. Use in conjunction with
1749 * tracing_start.
1750 */
1751 void tracing_stop(void)
1752 {
1753 struct ring_buffer *buffer;
1754 unsigned long flags;
1755
1756 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1757 if (global_trace.stop_count++)
1758 goto out;
1759
1760 /* Prevent the buffers from switching */
1761 arch_spin_lock(&global_trace.max_lock);
1762
1763 buffer = global_trace.trace_buffer.buffer;
1764 if (buffer)
1765 ring_buffer_record_disable(buffer);
1766
1767 #ifdef CONFIG_TRACER_MAX_TRACE
1768 buffer = global_trace.max_buffer.buffer;
1769 if (buffer)
1770 ring_buffer_record_disable(buffer);
1771 #endif
1772
1773 arch_spin_unlock(&global_trace.max_lock);
1774
1775 out:
1776 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1777 }
1778
1779 static void tracing_stop_tr(struct trace_array *tr)
1780 {
1781 struct ring_buffer *buffer;
1782 unsigned long flags;
1783
1784 /* If global, we need to also stop the max tracer */
1785 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1786 return tracing_stop();
1787
1788 raw_spin_lock_irqsave(&tr->start_lock, flags);
1789 if (tr->stop_count++)
1790 goto out;
1791
1792 buffer = tr->trace_buffer.buffer;
1793 if (buffer)
1794 ring_buffer_record_disable(buffer);
1795
1796 out:
1797 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1798 }
1799
1800 void trace_stop_cmdline_recording(void);
1801
1802 static int trace_save_cmdline(struct task_struct *tsk)
1803 {
1804 unsigned pid, idx;
1805
1806 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1807 return 0;
1808
1809 /*
1810 * It's not the end of the world if we don't get
1811 * the lock, but we also don't want to spin
1812 * nor do we want to disable interrupts,
1813 * so if we miss here, then better luck next time.
1814 */
1815 if (!arch_spin_trylock(&trace_cmdline_lock))
1816 return 0;
1817
1818 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1819 if (idx == NO_CMDLINE_MAP) {
1820 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1821
1822 /*
1823 * Check whether the cmdline buffer at idx has a pid
1824 * mapped. We are going to overwrite that entry so we
1825 * need to clear the map_pid_to_cmdline. Otherwise we
1826 * would read the new comm for the old pid.
1827 */
1828 pid = savedcmd->map_cmdline_to_pid[idx];
1829 if (pid != NO_CMDLINE_MAP)
1830 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1831
1832 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1833 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1834
1835 savedcmd->cmdline_idx = idx;
1836 }
1837
1838 set_cmdline(idx, tsk->comm);
1839
1840 arch_spin_unlock(&trace_cmdline_lock);
1841
1842 return 1;
1843 }
1844
1845 static void __trace_find_cmdline(int pid, char comm[])
1846 {
1847 unsigned map;
1848
1849 if (!pid) {
1850 strcpy(comm, "<idle>");
1851 return;
1852 }
1853
1854 if (WARN_ON_ONCE(pid < 0)) {
1855 strcpy(comm, "<XXX>");
1856 return;
1857 }
1858
1859 if (pid > PID_MAX_DEFAULT) {
1860 strcpy(comm, "<...>");
1861 return;
1862 }
1863
1864 map = savedcmd->map_pid_to_cmdline[pid];
1865 if (map != NO_CMDLINE_MAP)
1866 strcpy(comm, get_saved_cmdlines(map));
1867 else
1868 strcpy(comm, "<...>");
1869 }
1870
1871 void trace_find_cmdline(int pid, char comm[])
1872 {
1873 preempt_disable();
1874 arch_spin_lock(&trace_cmdline_lock);
1875
1876 __trace_find_cmdline(pid, comm);
1877
1878 arch_spin_unlock(&trace_cmdline_lock);
1879 preempt_enable();
1880 }
1881
1882 void tracing_record_cmdline(struct task_struct *tsk)
1883 {
1884 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1885 return;
1886
1887 if (!__this_cpu_read(trace_cmdline_save))
1888 return;
1889
1890 if (trace_save_cmdline(tsk))
1891 __this_cpu_write(trace_cmdline_save, false);
1892 }
1893
1894 void
1895 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1896 int pc)
1897 {
1898 struct task_struct *tsk = current;
1899
1900 entry->preempt_count = pc & 0xff;
1901 entry->pid = (tsk) ? tsk->pid : 0;
1902 entry->flags =
1903 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1904 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1905 #else
1906 TRACE_FLAG_IRQS_NOSUPPORT |
1907 #endif
1908 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
1909 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1910 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1911 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1912 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1913 }
1914 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1915
1916 static __always_inline void
1917 trace_event_setup(struct ring_buffer_event *event,
1918 int type, unsigned long flags, int pc)
1919 {
1920 struct trace_entry *ent = ring_buffer_event_data(event);
1921
1922 tracing_generic_entry_update(ent, flags, pc);
1923 ent->type = type;
1924 }
1925
1926 struct ring_buffer_event *
1927 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1928 int type,
1929 unsigned long len,
1930 unsigned long flags, int pc)
1931 {
1932 struct ring_buffer_event *event;
1933
1934 event = ring_buffer_lock_reserve(buffer, len);
1935 if (event != NULL)
1936 trace_event_setup(event, type, flags, pc);
1937
1938 return event;
1939 }
1940
1941 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1942 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1943 static int trace_buffered_event_ref;
1944
1945 /**
1946 * trace_buffered_event_enable - enable buffering events
1947 *
1948 * When events are being filtered, it is quicker to use a temporary
1949 * buffer to write the event data into if there's a likely chance
1950 * that it will not be committed. The discard of the ring buffer
1951 * is not as fast as committing, and is much slower than copying
1952 * a commit.
1953 *
1954 * When an event is to be filtered, allocate per cpu buffers to
1955 * write the event data into, and if the event is filtered and discarded
1956 * it is simply dropped, otherwise, the entire data is to be committed
1957 * in one shot.
1958 */
1959 void trace_buffered_event_enable(void)
1960 {
1961 struct ring_buffer_event *event;
1962 struct page *page;
1963 int cpu;
1964
1965 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1966
1967 if (trace_buffered_event_ref++)
1968 return;
1969
1970 for_each_tracing_cpu(cpu) {
1971 page = alloc_pages_node(cpu_to_node(cpu),
1972 GFP_KERNEL | __GFP_NORETRY, 0);
1973 if (!page)
1974 goto failed;
1975
1976 event = page_address(page);
1977 memset(event, 0, sizeof(*event));
1978
1979 per_cpu(trace_buffered_event, cpu) = event;
1980
1981 preempt_disable();
1982 if (cpu == smp_processor_id() &&
1983 this_cpu_read(trace_buffered_event) !=
1984 per_cpu(trace_buffered_event, cpu))
1985 WARN_ON_ONCE(1);
1986 preempt_enable();
1987 }
1988
1989 return;
1990 failed:
1991 trace_buffered_event_disable();
1992 }
1993
1994 static void enable_trace_buffered_event(void *data)
1995 {
1996 /* Probably not needed, but do it anyway */
1997 smp_rmb();
1998 this_cpu_dec(trace_buffered_event_cnt);
1999 }
2000
2001 static void disable_trace_buffered_event(void *data)
2002 {
2003 this_cpu_inc(trace_buffered_event_cnt);
2004 }
2005
2006 /**
2007 * trace_buffered_event_disable - disable buffering events
2008 *
2009 * When a filter is removed, it is faster to not use the buffered
2010 * events, and to commit directly into the ring buffer. Free up
2011 * the temp buffers when there are no more users. This requires
2012 * special synchronization with current events.
2013 */
2014 void trace_buffered_event_disable(void)
2015 {
2016 int cpu;
2017
2018 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2019
2020 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2021 return;
2022
2023 if (--trace_buffered_event_ref)
2024 return;
2025
2026 preempt_disable();
2027 /* For each CPU, set the buffer as used. */
2028 smp_call_function_many(tracing_buffer_mask,
2029 disable_trace_buffered_event, NULL, 1);
2030 preempt_enable();
2031
2032 /* Wait for all current users to finish */
2033 synchronize_sched();
2034
2035 for_each_tracing_cpu(cpu) {
2036 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2037 per_cpu(trace_buffered_event, cpu) = NULL;
2038 }
2039 /*
2040 * Make sure trace_buffered_event is NULL before clearing
2041 * trace_buffered_event_cnt.
2042 */
2043 smp_wmb();
2044
2045 preempt_disable();
2046 /* Do the work on each cpu */
2047 smp_call_function_many(tracing_buffer_mask,
2048 enable_trace_buffered_event, NULL, 1);
2049 preempt_enable();
2050 }
2051
2052 void
2053 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2054 {
2055 __this_cpu_write(trace_cmdline_save, true);
2056
2057 /* If this is the temp buffer, we need to commit fully */
2058 if (this_cpu_read(trace_buffered_event) == event) {
2059 /* Length is in event->array[0] */
2060 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2061 /* Release the temp buffer */
2062 this_cpu_dec(trace_buffered_event_cnt);
2063 } else
2064 ring_buffer_unlock_commit(buffer, event);
2065 }
2066
2067 static struct ring_buffer *temp_buffer;
2068
2069 struct ring_buffer_event *
2070 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2071 struct trace_event_file *trace_file,
2072 int type, unsigned long len,
2073 unsigned long flags, int pc)
2074 {
2075 struct ring_buffer_event *entry;
2076 int val;
2077
2078 *current_rb = trace_file->tr->trace_buffer.buffer;
2079
2080 if ((trace_file->flags &
2081 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2082 (entry = this_cpu_read(trace_buffered_event))) {
2083 /* Try to use the per cpu buffer first */
2084 val = this_cpu_inc_return(trace_buffered_event_cnt);
2085 if (val == 1) {
2086 trace_event_setup(entry, type, flags, pc);
2087 entry->array[0] = len;
2088 return entry;
2089 }
2090 this_cpu_dec(trace_buffered_event_cnt);
2091 }
2092
2093 entry = trace_buffer_lock_reserve(*current_rb,
2094 type, len, flags, pc);
2095 /*
2096 * If tracing is off, but we have triggers enabled
2097 * we still need to look at the event data. Use the temp_buffer
2098 * to store the trace event for the tigger to use. It's recusive
2099 * safe and will not be recorded anywhere.
2100 */
2101 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2102 *current_rb = temp_buffer;
2103 entry = trace_buffer_lock_reserve(*current_rb,
2104 type, len, flags, pc);
2105 }
2106 return entry;
2107 }
2108 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2109
2110 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2111 struct ring_buffer *buffer,
2112 struct ring_buffer_event *event,
2113 unsigned long flags, int pc,
2114 struct pt_regs *regs)
2115 {
2116 __buffer_unlock_commit(buffer, event);
2117
2118 /*
2119 * If regs is not set, then skip the following callers:
2120 * trace_buffer_unlock_commit_regs
2121 * event_trigger_unlock_commit
2122 * trace_event_buffer_commit
2123 * trace_event_raw_event_sched_switch
2124 * Note, we can still get here via blktrace, wakeup tracer
2125 * and mmiotrace, but that's ok if they lose a function or
2126 * two. They are that meaningful.
2127 */
2128 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2129 ftrace_trace_userstack(buffer, flags, pc);
2130 }
2131
2132 void
2133 trace_function(struct trace_array *tr,
2134 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2135 int pc)
2136 {
2137 struct trace_event_call *call = &event_function;
2138 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2139 struct ring_buffer_event *event;
2140 struct ftrace_entry *entry;
2141
2142 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2143 flags, pc);
2144 if (!event)
2145 return;
2146 entry = ring_buffer_event_data(event);
2147 entry->ip = ip;
2148 entry->parent_ip = parent_ip;
2149
2150 if (!call_filter_check_discard(call, entry, buffer, event))
2151 __buffer_unlock_commit(buffer, event);
2152 }
2153
2154 #ifdef CONFIG_STACKTRACE
2155
2156 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2157 struct ftrace_stack {
2158 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2159 };
2160
2161 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2162 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2163
2164 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2165 unsigned long flags,
2166 int skip, int pc, struct pt_regs *regs)
2167 {
2168 struct trace_event_call *call = &event_kernel_stack;
2169 struct ring_buffer_event *event;
2170 struct stack_entry *entry;
2171 struct stack_trace trace;
2172 int use_stack;
2173 int size = FTRACE_STACK_ENTRIES;
2174
2175 trace.nr_entries = 0;
2176 trace.skip = skip;
2177
2178 /*
2179 * Add two, for this function and the call to save_stack_trace()
2180 * If regs is set, then these functions will not be in the way.
2181 */
2182 if (!regs)
2183 trace.skip += 2;
2184
2185 /*
2186 * Since events can happen in NMIs there's no safe way to
2187 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2188 * or NMI comes in, it will just have to use the default
2189 * FTRACE_STACK_SIZE.
2190 */
2191 preempt_disable_notrace();
2192
2193 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2194 /*
2195 * We don't need any atomic variables, just a barrier.
2196 * If an interrupt comes in, we don't care, because it would
2197 * have exited and put the counter back to what we want.
2198 * We just need a barrier to keep gcc from moving things
2199 * around.
2200 */
2201 barrier();
2202 if (use_stack == 1) {
2203 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2204 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2205
2206 if (regs)
2207 save_stack_trace_regs(regs, &trace);
2208 else
2209 save_stack_trace(&trace);
2210
2211 if (trace.nr_entries > size)
2212 size = trace.nr_entries;
2213 } else
2214 /* From now on, use_stack is a boolean */
2215 use_stack = 0;
2216
2217 size *= sizeof(unsigned long);
2218
2219 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2220 sizeof(*entry) + size, flags, pc);
2221 if (!event)
2222 goto out;
2223 entry = ring_buffer_event_data(event);
2224
2225 memset(&entry->caller, 0, size);
2226
2227 if (use_stack)
2228 memcpy(&entry->caller, trace.entries,
2229 trace.nr_entries * sizeof(unsigned long));
2230 else {
2231 trace.max_entries = FTRACE_STACK_ENTRIES;
2232 trace.entries = entry->caller;
2233 if (regs)
2234 save_stack_trace_regs(regs, &trace);
2235 else
2236 save_stack_trace(&trace);
2237 }
2238
2239 entry->size = trace.nr_entries;
2240
2241 if (!call_filter_check_discard(call, entry, buffer, event))
2242 __buffer_unlock_commit(buffer, event);
2243
2244 out:
2245 /* Again, don't let gcc optimize things here */
2246 barrier();
2247 __this_cpu_dec(ftrace_stack_reserve);
2248 preempt_enable_notrace();
2249
2250 }
2251
2252 static inline void ftrace_trace_stack(struct trace_array *tr,
2253 struct ring_buffer *buffer,
2254 unsigned long flags,
2255 int skip, int pc, struct pt_regs *regs)
2256 {
2257 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2258 return;
2259
2260 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2261 }
2262
2263 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2264 int pc)
2265 {
2266 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2267 }
2268
2269 /**
2270 * trace_dump_stack - record a stack back trace in the trace buffer
2271 * @skip: Number of functions to skip (helper handlers)
2272 */
2273 void trace_dump_stack(int skip)
2274 {
2275 unsigned long flags;
2276
2277 if (tracing_disabled || tracing_selftest_running)
2278 return;
2279
2280 local_save_flags(flags);
2281
2282 /*
2283 * Skip 3 more, seems to get us at the caller of
2284 * this function.
2285 */
2286 skip += 3;
2287 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2288 flags, skip, preempt_count(), NULL);
2289 }
2290
2291 static DEFINE_PER_CPU(int, user_stack_count);
2292
2293 void
2294 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2295 {
2296 struct trace_event_call *call = &event_user_stack;
2297 struct ring_buffer_event *event;
2298 struct userstack_entry *entry;
2299 struct stack_trace trace;
2300
2301 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2302 return;
2303
2304 /*
2305 * NMIs can not handle page faults, even with fix ups.
2306 * The save user stack can (and often does) fault.
2307 */
2308 if (unlikely(in_nmi()))
2309 return;
2310
2311 /*
2312 * prevent recursion, since the user stack tracing may
2313 * trigger other kernel events.
2314 */
2315 preempt_disable();
2316 if (__this_cpu_read(user_stack_count))
2317 goto out;
2318
2319 __this_cpu_inc(user_stack_count);
2320
2321 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2322 sizeof(*entry), flags, pc);
2323 if (!event)
2324 goto out_drop_count;
2325 entry = ring_buffer_event_data(event);
2326
2327 entry->tgid = current->tgid;
2328 memset(&entry->caller, 0, sizeof(entry->caller));
2329
2330 trace.nr_entries = 0;
2331 trace.max_entries = FTRACE_STACK_ENTRIES;
2332 trace.skip = 0;
2333 trace.entries = entry->caller;
2334
2335 save_stack_trace_user(&trace);
2336 if (!call_filter_check_discard(call, entry, buffer, event))
2337 __buffer_unlock_commit(buffer, event);
2338
2339 out_drop_count:
2340 __this_cpu_dec(user_stack_count);
2341 out:
2342 preempt_enable();
2343 }
2344
2345 #ifdef UNUSED
2346 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2347 {
2348 ftrace_trace_userstack(tr, flags, preempt_count());
2349 }
2350 #endif /* UNUSED */
2351
2352 #endif /* CONFIG_STACKTRACE */
2353
2354 /* created for use with alloc_percpu */
2355 struct trace_buffer_struct {
2356 int nesting;
2357 char buffer[4][TRACE_BUF_SIZE];
2358 };
2359
2360 static struct trace_buffer_struct *trace_percpu_buffer;
2361
2362 /*
2363 * Thise allows for lockless recording. If we're nested too deeply, then
2364 * this returns NULL.
2365 */
2366 static char *get_trace_buf(void)
2367 {
2368 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2369
2370 if (!buffer || buffer->nesting >= 4)
2371 return NULL;
2372
2373 return &buffer->buffer[buffer->nesting++][0];
2374 }
2375
2376 static void put_trace_buf(void)
2377 {
2378 this_cpu_dec(trace_percpu_buffer->nesting);
2379 }
2380
2381 static int alloc_percpu_trace_buffer(void)
2382 {
2383 struct trace_buffer_struct *buffers;
2384
2385 buffers = alloc_percpu(struct trace_buffer_struct);
2386 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2387 return -ENOMEM;
2388
2389 trace_percpu_buffer = buffers;
2390 return 0;
2391 }
2392
2393 static int buffers_allocated;
2394
2395 void trace_printk_init_buffers(void)
2396 {
2397 if (buffers_allocated)
2398 return;
2399
2400 if (alloc_percpu_trace_buffer())
2401 return;
2402
2403 /* trace_printk() is for debug use only. Don't use it in production. */
2404
2405 pr_warn("\n");
2406 pr_warn("**********************************************************\n");
2407 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2408 pr_warn("** **\n");
2409 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2410 pr_warn("** **\n");
2411 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2412 pr_warn("** unsafe for production use. **\n");
2413 pr_warn("** **\n");
2414 pr_warn("** If you see this message and you are not debugging **\n");
2415 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2416 pr_warn("** **\n");
2417 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2418 pr_warn("**********************************************************\n");
2419
2420 /* Expand the buffers to set size */
2421 tracing_update_buffers();
2422
2423 buffers_allocated = 1;
2424
2425 /*
2426 * trace_printk_init_buffers() can be called by modules.
2427 * If that happens, then we need to start cmdline recording
2428 * directly here. If the global_trace.buffer is already
2429 * allocated here, then this was called by module code.
2430 */
2431 if (global_trace.trace_buffer.buffer)
2432 tracing_start_cmdline_record();
2433 }
2434
2435 void trace_printk_start_comm(void)
2436 {
2437 /* Start tracing comms if trace printk is set */
2438 if (!buffers_allocated)
2439 return;
2440 tracing_start_cmdline_record();
2441 }
2442
2443 static void trace_printk_start_stop_comm(int enabled)
2444 {
2445 if (!buffers_allocated)
2446 return;
2447
2448 if (enabled)
2449 tracing_start_cmdline_record();
2450 else
2451 tracing_stop_cmdline_record();
2452 }
2453
2454 /**
2455 * trace_vbprintk - write binary msg to tracing buffer
2456 *
2457 */
2458 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2459 {
2460 struct trace_event_call *call = &event_bprint;
2461 struct ring_buffer_event *event;
2462 struct ring_buffer *buffer;
2463 struct trace_array *tr = &global_trace;
2464 struct bprint_entry *entry;
2465 unsigned long flags;
2466 char *tbuffer;
2467 int len = 0, size, pc;
2468
2469 if (unlikely(tracing_selftest_running || tracing_disabled))
2470 return 0;
2471
2472 /* Don't pollute graph traces with trace_vprintk internals */
2473 pause_graph_tracing();
2474
2475 pc = preempt_count();
2476 preempt_disable_notrace();
2477
2478 tbuffer = get_trace_buf();
2479 if (!tbuffer) {
2480 len = 0;
2481 goto out_nobuffer;
2482 }
2483
2484 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2485
2486 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2487 goto out;
2488
2489 local_save_flags(flags);
2490 size = sizeof(*entry) + sizeof(u32) * len;
2491 buffer = tr->trace_buffer.buffer;
2492 event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2493 flags, pc);
2494 if (!event)
2495 goto out;
2496 entry = ring_buffer_event_data(event);
2497 entry->ip = ip;
2498 entry->fmt = fmt;
2499
2500 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2501 if (!call_filter_check_discard(call, entry, buffer, event)) {
2502 __buffer_unlock_commit(buffer, event);
2503 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2504 }
2505
2506 out:
2507 put_trace_buf();
2508
2509 out_nobuffer:
2510 preempt_enable_notrace();
2511 unpause_graph_tracing();
2512
2513 return len;
2514 }
2515 EXPORT_SYMBOL_GPL(trace_vbprintk);
2516
2517 static int
2518 __trace_array_vprintk(struct ring_buffer *buffer,
2519 unsigned long ip, const char *fmt, va_list args)
2520 {
2521 struct trace_event_call *call = &event_print;
2522 struct ring_buffer_event *event;
2523 int len = 0, size, pc;
2524 struct print_entry *entry;
2525 unsigned long flags;
2526 char *tbuffer;
2527
2528 if (tracing_disabled || tracing_selftest_running)
2529 return 0;
2530
2531 /* Don't pollute graph traces with trace_vprintk internals */
2532 pause_graph_tracing();
2533
2534 pc = preempt_count();
2535 preempt_disable_notrace();
2536
2537
2538 tbuffer = get_trace_buf();
2539 if (!tbuffer) {
2540 len = 0;
2541 goto out_nobuffer;
2542 }
2543
2544 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2545
2546 local_save_flags(flags);
2547 size = sizeof(*entry) + len + 1;
2548 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2549 flags, pc);
2550 if (!event)
2551 goto out;
2552 entry = ring_buffer_event_data(event);
2553 entry->ip = ip;
2554
2555 memcpy(&entry->buf, tbuffer, len + 1);
2556 if (!call_filter_check_discard(call, entry, buffer, event)) {
2557 __buffer_unlock_commit(buffer, event);
2558 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2559 }
2560
2561 out:
2562 put_trace_buf();
2563
2564 out_nobuffer:
2565 preempt_enable_notrace();
2566 unpause_graph_tracing();
2567
2568 return len;
2569 }
2570
2571 int trace_array_vprintk(struct trace_array *tr,
2572 unsigned long ip, const char *fmt, va_list args)
2573 {
2574 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2575 }
2576
2577 int trace_array_printk(struct trace_array *tr,
2578 unsigned long ip, const char *fmt, ...)
2579 {
2580 int ret;
2581 va_list ap;
2582
2583 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2584 return 0;
2585
2586 va_start(ap, fmt);
2587 ret = trace_array_vprintk(tr, ip, fmt, ap);
2588 va_end(ap);
2589 return ret;
2590 }
2591
2592 int trace_array_printk_buf(struct ring_buffer *buffer,
2593 unsigned long ip, const char *fmt, ...)
2594 {
2595 int ret;
2596 va_list ap;
2597
2598 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2599 return 0;
2600
2601 va_start(ap, fmt);
2602 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2603 va_end(ap);
2604 return ret;
2605 }
2606
2607 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2608 {
2609 return trace_array_vprintk(&global_trace, ip, fmt, args);
2610 }
2611 EXPORT_SYMBOL_GPL(trace_vprintk);
2612
2613 static void trace_iterator_increment(struct trace_iterator *iter)
2614 {
2615 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2616
2617 iter->idx++;
2618 if (buf_iter)
2619 ring_buffer_read(buf_iter, NULL);
2620 }
2621
2622 static struct trace_entry *
2623 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2624 unsigned long *lost_events)
2625 {
2626 struct ring_buffer_event *event;
2627 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2628
2629 if (buf_iter)
2630 event = ring_buffer_iter_peek(buf_iter, ts);
2631 else
2632 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2633 lost_events);
2634
2635 if (event) {
2636 iter->ent_size = ring_buffer_event_length(event);
2637 return ring_buffer_event_data(event);
2638 }
2639 iter->ent_size = 0;
2640 return NULL;
2641 }
2642
2643 static struct trace_entry *
2644 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2645 unsigned long *missing_events, u64 *ent_ts)
2646 {
2647 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2648 struct trace_entry *ent, *next = NULL;
2649 unsigned long lost_events = 0, next_lost = 0;
2650 int cpu_file = iter->cpu_file;
2651 u64 next_ts = 0, ts;
2652 int next_cpu = -1;
2653 int next_size = 0;
2654 int cpu;
2655
2656 /*
2657 * If we are in a per_cpu trace file, don't bother by iterating over
2658 * all cpu and peek directly.
2659 */
2660 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2661 if (ring_buffer_empty_cpu(buffer, cpu_file))
2662 return NULL;
2663 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2664 if (ent_cpu)
2665 *ent_cpu = cpu_file;
2666
2667 return ent;
2668 }
2669
2670 for_each_tracing_cpu(cpu) {
2671
2672 if (ring_buffer_empty_cpu(buffer, cpu))
2673 continue;
2674
2675 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2676
2677 /*
2678 * Pick the entry with the smallest timestamp:
2679 */
2680 if (ent && (!next || ts < next_ts)) {
2681 next = ent;
2682 next_cpu = cpu;
2683 next_ts = ts;
2684 next_lost = lost_events;
2685 next_size = iter->ent_size;
2686 }
2687 }
2688
2689 iter->ent_size = next_size;
2690
2691 if (ent_cpu)
2692 *ent_cpu = next_cpu;
2693
2694 if (ent_ts)
2695 *ent_ts = next_ts;
2696
2697 if (missing_events)
2698 *missing_events = next_lost;
2699
2700 return next;
2701 }
2702
2703 /* Find the next real entry, without updating the iterator itself */
2704 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2705 int *ent_cpu, u64 *ent_ts)
2706 {
2707 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2708 }
2709
2710 /* Find the next real entry, and increment the iterator to the next entry */
2711 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2712 {
2713 iter->ent = __find_next_entry(iter, &iter->cpu,
2714 &iter->lost_events, &iter->ts);
2715
2716 if (iter->ent)
2717 trace_iterator_increment(iter);
2718
2719 return iter->ent ? iter : NULL;
2720 }
2721
2722 static void trace_consume(struct trace_iterator *iter)
2723 {
2724 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2725 &iter->lost_events);
2726 }
2727
2728 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2729 {
2730 struct trace_iterator *iter = m->private;
2731 int i = (int)*pos;
2732 void *ent;
2733
2734 WARN_ON_ONCE(iter->leftover);
2735
2736 (*pos)++;
2737
2738 /* can't go backwards */
2739 if (iter->idx > i)
2740 return NULL;
2741
2742 if (iter->idx < 0)
2743 ent = trace_find_next_entry_inc(iter);
2744 else
2745 ent = iter;
2746
2747 while (ent && iter->idx < i)
2748 ent = trace_find_next_entry_inc(iter);
2749
2750 iter->pos = *pos;
2751
2752 return ent;
2753 }
2754
2755 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2756 {
2757 struct ring_buffer_event *event;
2758 struct ring_buffer_iter *buf_iter;
2759 unsigned long entries = 0;
2760 u64 ts;
2761
2762 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2763
2764 buf_iter = trace_buffer_iter(iter, cpu);
2765 if (!buf_iter)
2766 return;
2767
2768 ring_buffer_iter_reset(buf_iter);
2769
2770 /*
2771 * We could have the case with the max latency tracers
2772 * that a reset never took place on a cpu. This is evident
2773 * by the timestamp being before the start of the buffer.
2774 */
2775 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2776 if (ts >= iter->trace_buffer->time_start)
2777 break;
2778 entries++;
2779 ring_buffer_read(buf_iter, NULL);
2780 }
2781
2782 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2783 }
2784
2785 /*
2786 * The current tracer is copied to avoid a global locking
2787 * all around.
2788 */
2789 static void *s_start(struct seq_file *m, loff_t *pos)
2790 {
2791 struct trace_iterator *iter = m->private;
2792 struct trace_array *tr = iter->tr;
2793 int cpu_file = iter->cpu_file;
2794 void *p = NULL;
2795 loff_t l = 0;
2796 int cpu;
2797
2798 /*
2799 * copy the tracer to avoid using a global lock all around.
2800 * iter->trace is a copy of current_trace, the pointer to the
2801 * name may be used instead of a strcmp(), as iter->trace->name
2802 * will point to the same string as current_trace->name.
2803 */
2804 mutex_lock(&trace_types_lock);
2805 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2806 *iter->trace = *tr->current_trace;
2807 mutex_unlock(&trace_types_lock);
2808
2809 #ifdef CONFIG_TRACER_MAX_TRACE
2810 if (iter->snapshot && iter->trace->use_max_tr)
2811 return ERR_PTR(-EBUSY);
2812 #endif
2813
2814 if (!iter->snapshot)
2815 atomic_inc(&trace_record_cmdline_disabled);
2816
2817 if (*pos != iter->pos) {
2818 iter->ent = NULL;
2819 iter->cpu = 0;
2820 iter->idx = -1;
2821
2822 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2823 for_each_tracing_cpu(cpu)
2824 tracing_iter_reset(iter, cpu);
2825 } else
2826 tracing_iter_reset(iter, cpu_file);
2827
2828 iter->leftover = 0;
2829 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2830 ;
2831
2832 } else {
2833 /*
2834 * If we overflowed the seq_file before, then we want
2835 * to just reuse the trace_seq buffer again.
2836 */
2837 if (iter->leftover)
2838 p = iter;
2839 else {
2840 l = *pos - 1;
2841 p = s_next(m, p, &l);
2842 }
2843 }
2844
2845 trace_event_read_lock();
2846 trace_access_lock(cpu_file);
2847 return p;
2848 }
2849
2850 static void s_stop(struct seq_file *m, void *p)
2851 {
2852 struct trace_iterator *iter = m->private;
2853
2854 #ifdef CONFIG_TRACER_MAX_TRACE
2855 if (iter->snapshot && iter->trace->use_max_tr)
2856 return;
2857 #endif
2858
2859 if (!iter->snapshot)
2860 atomic_dec(&trace_record_cmdline_disabled);
2861
2862 trace_access_unlock(iter->cpu_file);
2863 trace_event_read_unlock();
2864 }
2865
2866 static void
2867 get_total_entries(struct trace_buffer *buf,
2868 unsigned long *total, unsigned long *entries)
2869 {
2870 unsigned long count;
2871 int cpu;
2872
2873 *total = 0;
2874 *entries = 0;
2875
2876 for_each_tracing_cpu(cpu) {
2877 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2878 /*
2879 * If this buffer has skipped entries, then we hold all
2880 * entries for the trace and we need to ignore the
2881 * ones before the time stamp.
2882 */
2883 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2884 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2885 /* total is the same as the entries */
2886 *total += count;
2887 } else
2888 *total += count +
2889 ring_buffer_overrun_cpu(buf->buffer, cpu);
2890 *entries += count;
2891 }
2892 }
2893
2894 static void print_lat_help_header(struct seq_file *m)
2895 {
2896 seq_puts(m, "# _------=> CPU# \n"
2897 "# / _-----=> irqs-off \n"
2898 "# | / _----=> need-resched \n"
2899 "# || / _---=> hardirq/softirq \n"
2900 "# ||| / _--=> preempt-depth \n"
2901 "# |||| / delay \n"
2902 "# cmd pid ||||| time | caller \n"
2903 "# \\ / ||||| \\ | / \n");
2904 }
2905
2906 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2907 {
2908 unsigned long total;
2909 unsigned long entries;
2910
2911 get_total_entries(buf, &total, &entries);
2912 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
2913 entries, total, num_online_cpus());
2914 seq_puts(m, "#\n");
2915 }
2916
2917 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2918 {
2919 print_event_info(buf, m);
2920 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
2921 "# | | | | |\n");
2922 }
2923
2924 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2925 {
2926 print_event_info(buf, m);
2927 seq_puts(m, "# _-----=> irqs-off\n"
2928 "# / _----=> need-resched\n"
2929 "# | / _---=> hardirq/softirq\n"
2930 "# || / _--=> preempt-depth\n"
2931 "# ||| / delay\n"
2932 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
2933 "# | | | |||| | |\n");
2934 }
2935
2936 void
2937 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2938 {
2939 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2940 struct trace_buffer *buf = iter->trace_buffer;
2941 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2942 struct tracer *type = iter->trace;
2943 unsigned long entries;
2944 unsigned long total;
2945 const char *name = "preemption";
2946
2947 name = type->name;
2948
2949 get_total_entries(buf, &total, &entries);
2950
2951 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2952 name, UTS_RELEASE);
2953 seq_puts(m, "# -----------------------------------"
2954 "---------------------------------\n");
2955 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2956 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2957 nsecs_to_usecs(data->saved_latency),
2958 entries,
2959 total,
2960 buf->cpu,
2961 #if defined(CONFIG_PREEMPT_NONE)
2962 "server",
2963 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2964 "desktop",
2965 #elif defined(CONFIG_PREEMPT)
2966 "preempt",
2967 #else
2968 "unknown",
2969 #endif
2970 /* These are reserved for later use */
2971 0, 0, 0, 0);
2972 #ifdef CONFIG_SMP
2973 seq_printf(m, " #P:%d)\n", num_online_cpus());
2974 #else
2975 seq_puts(m, ")\n");
2976 #endif
2977 seq_puts(m, "# -----------------\n");
2978 seq_printf(m, "# | task: %.16s-%d "
2979 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2980 data->comm, data->pid,
2981 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2982 data->policy, data->rt_priority);
2983 seq_puts(m, "# -----------------\n");
2984
2985 if (data->critical_start) {
2986 seq_puts(m, "# => started at: ");
2987 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2988 trace_print_seq(m, &iter->seq);
2989 seq_puts(m, "\n# => ended at: ");
2990 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2991 trace_print_seq(m, &iter->seq);
2992 seq_puts(m, "\n#\n");
2993 }
2994
2995 seq_puts(m, "#\n");
2996 }
2997
2998 static void test_cpu_buff_start(struct trace_iterator *iter)
2999 {
3000 struct trace_seq *s = &iter->seq;
3001 struct trace_array *tr = iter->tr;
3002
3003 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3004 return;
3005
3006 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3007 return;
3008
3009 if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3010 return;
3011
3012 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3013 return;
3014
3015 if (iter->started)
3016 cpumask_set_cpu(iter->cpu, iter->started);
3017
3018 /* Don't print started cpu buffer for the first entry of the trace */
3019 if (iter->idx > 1)
3020 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3021 iter->cpu);
3022 }
3023
3024 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3025 {
3026 struct trace_array *tr = iter->tr;
3027 struct trace_seq *s = &iter->seq;
3028 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3029 struct trace_entry *entry;
3030 struct trace_event *event;
3031
3032 entry = iter->ent;
3033
3034 test_cpu_buff_start(iter);
3035
3036 event = ftrace_find_event(entry->type);
3037
3038 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3039 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3040 trace_print_lat_context(iter);
3041 else
3042 trace_print_context(iter);
3043 }
3044
3045 if (trace_seq_has_overflowed(s))
3046 return TRACE_TYPE_PARTIAL_LINE;
3047
3048 if (event)
3049 return event->funcs->trace(iter, sym_flags, event);
3050
3051 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3052
3053 return trace_handle_return(s);
3054 }
3055
3056 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3057 {
3058 struct trace_array *tr = iter->tr;
3059 struct trace_seq *s = &iter->seq;
3060 struct trace_entry *entry;
3061 struct trace_event *event;
3062
3063 entry = iter->ent;
3064
3065 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3066 trace_seq_printf(s, "%d %d %llu ",
3067 entry->pid, iter->cpu, iter->ts);
3068
3069 if (trace_seq_has_overflowed(s))
3070 return TRACE_TYPE_PARTIAL_LINE;
3071
3072 event = ftrace_find_event(entry->type);
3073 if (event)
3074 return event->funcs->raw(iter, 0, event);
3075
3076 trace_seq_printf(s, "%d ?\n", entry->type);
3077
3078 return trace_handle_return(s);
3079 }
3080
3081 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3082 {
3083 struct trace_array *tr = iter->tr;
3084 struct trace_seq *s = &iter->seq;
3085 unsigned char newline = '\n';
3086 struct trace_entry *entry;
3087 struct trace_event *event;
3088
3089 entry = iter->ent;
3090
3091 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3092 SEQ_PUT_HEX_FIELD(s, entry->pid);
3093 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3094 SEQ_PUT_HEX_FIELD(s, iter->ts);
3095 if (trace_seq_has_overflowed(s))
3096 return TRACE_TYPE_PARTIAL_LINE;
3097 }
3098
3099 event = ftrace_find_event(entry->type);
3100 if (event) {
3101 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3102 if (ret != TRACE_TYPE_HANDLED)
3103 return ret;
3104 }
3105
3106 SEQ_PUT_FIELD(s, newline);
3107
3108 return trace_handle_return(s);
3109 }
3110
3111 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3112 {
3113 struct trace_array *tr = iter->tr;
3114 struct trace_seq *s = &iter->seq;
3115 struct trace_entry *entry;
3116 struct trace_event *event;
3117
3118 entry = iter->ent;
3119
3120 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3121 SEQ_PUT_FIELD(s, entry->pid);
3122 SEQ_PUT_FIELD(s, iter->cpu);
3123 SEQ_PUT_FIELD(s, iter->ts);
3124 if (trace_seq_has_overflowed(s))
3125 return TRACE_TYPE_PARTIAL_LINE;
3126 }
3127
3128 event = ftrace_find_event(entry->type);
3129 return event ? event->funcs->binary(iter, 0, event) :
3130 TRACE_TYPE_HANDLED;
3131 }
3132
3133 int trace_empty(struct trace_iterator *iter)
3134 {
3135 struct ring_buffer_iter *buf_iter;
3136 int cpu;
3137
3138 /* If we are looking at one CPU buffer, only check that one */
3139 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3140 cpu = iter->cpu_file;
3141 buf_iter = trace_buffer_iter(iter, cpu);
3142 if (buf_iter) {
3143 if (!ring_buffer_iter_empty(buf_iter))
3144 return 0;
3145 } else {
3146 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3147 return 0;
3148 }
3149 return 1;
3150 }
3151
3152 for_each_tracing_cpu(cpu) {
3153 buf_iter = trace_buffer_iter(iter, cpu);
3154 if (buf_iter) {
3155 if (!ring_buffer_iter_empty(buf_iter))
3156 return 0;
3157 } else {
3158 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3159 return 0;
3160 }
3161 }
3162
3163 return 1;
3164 }
3165
3166 /* Called with trace_event_read_lock() held. */
3167 enum print_line_t print_trace_line(struct trace_iterator *iter)
3168 {
3169 struct trace_array *tr = iter->tr;
3170 unsigned long trace_flags = tr->trace_flags;
3171 enum print_line_t ret;
3172
3173 if (iter->lost_events) {
3174 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3175 iter->cpu, iter->lost_events);
3176 if (trace_seq_has_overflowed(&iter->seq))
3177 return TRACE_TYPE_PARTIAL_LINE;
3178 }
3179
3180 if (iter->trace && iter->trace->print_line) {
3181 ret = iter->trace->print_line(iter);
3182 if (ret != TRACE_TYPE_UNHANDLED)
3183 return ret;
3184 }
3185
3186 if (iter->ent->type == TRACE_BPUTS &&
3187 trace_flags & TRACE_ITER_PRINTK &&
3188 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3189 return trace_print_bputs_msg_only(iter);
3190
3191 if (iter->ent->type == TRACE_BPRINT &&
3192 trace_flags & TRACE_ITER_PRINTK &&
3193 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3194 return trace_print_bprintk_msg_only(iter);
3195
3196 if (iter->ent->type == TRACE_PRINT &&
3197 trace_flags & TRACE_ITER_PRINTK &&
3198 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3199 return trace_print_printk_msg_only(iter);
3200
3201 if (trace_flags & TRACE_ITER_BIN)
3202 return print_bin_fmt(iter);
3203
3204 if (trace_flags & TRACE_ITER_HEX)
3205 return print_hex_fmt(iter);
3206
3207 if (trace_flags & TRACE_ITER_RAW)
3208 return print_raw_fmt(iter);
3209
3210 return print_trace_fmt(iter);
3211 }
3212
3213 void trace_latency_header(struct seq_file *m)
3214 {
3215 struct trace_iterator *iter = m->private;
3216 struct trace_array *tr = iter->tr;
3217
3218 /* print nothing if the buffers are empty */
3219 if (trace_empty(iter))
3220 return;
3221
3222 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3223 print_trace_header(m, iter);
3224
3225 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3226 print_lat_help_header(m);
3227 }
3228
3229 void trace_default_header(struct seq_file *m)
3230 {
3231 struct trace_iterator *iter = m->private;
3232 struct trace_array *tr = iter->tr;
3233 unsigned long trace_flags = tr->trace_flags;
3234
3235 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3236 return;
3237
3238 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3239 /* print nothing if the buffers are empty */
3240 if (trace_empty(iter))
3241 return;
3242 print_trace_header(m, iter);
3243 if (!(trace_flags & TRACE_ITER_VERBOSE))
3244 print_lat_help_header(m);
3245 } else {
3246 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3247 if (trace_flags & TRACE_ITER_IRQ_INFO)
3248 print_func_help_header_irq(iter->trace_buffer, m);
3249 else
3250 print_func_help_header(iter->trace_buffer, m);
3251 }
3252 }
3253 }
3254
3255 static void test_ftrace_alive(struct seq_file *m)
3256 {
3257 if (!ftrace_is_dead())
3258 return;
3259 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3260 "# MAY BE MISSING FUNCTION EVENTS\n");
3261 }
3262
3263 #ifdef CONFIG_TRACER_MAX_TRACE
3264 static void show_snapshot_main_help(struct seq_file *m)
3265 {
3266 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3267 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3268 "# Takes a snapshot of the main buffer.\n"
3269 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3270 "# (Doesn't have to be '2' works with any number that\n"
3271 "# is not a '0' or '1')\n");
3272 }
3273
3274 static void show_snapshot_percpu_help(struct seq_file *m)
3275 {
3276 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3277 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3278 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3279 "# Takes a snapshot of the main buffer for this cpu.\n");
3280 #else
3281 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3282 "# Must use main snapshot file to allocate.\n");
3283 #endif
3284 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3285 "# (Doesn't have to be '2' works with any number that\n"
3286 "# is not a '0' or '1')\n");
3287 }
3288
3289 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3290 {
3291 if (iter->tr->allocated_snapshot)
3292 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3293 else
3294 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3295
3296 seq_puts(m, "# Snapshot commands:\n");
3297 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3298 show_snapshot_main_help(m);
3299 else
3300 show_snapshot_percpu_help(m);
3301 }
3302 #else
3303 /* Should never be called */
3304 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3305 #endif
3306
3307 static int s_show(struct seq_file *m, void *v)
3308 {
3309 struct trace_iterator *iter = v;
3310 int ret;
3311
3312 if (iter->ent == NULL) {
3313 if (iter->tr) {
3314 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3315 seq_puts(m, "#\n");
3316 test_ftrace_alive(m);
3317 }
3318 if (iter->snapshot && trace_empty(iter))
3319 print_snapshot_help(m, iter);
3320 else if (iter->trace && iter->trace->print_header)
3321 iter->trace->print_header(m);
3322 else
3323 trace_default_header(m);
3324
3325 } else if (iter->leftover) {
3326 /*
3327 * If we filled the seq_file buffer earlier, we
3328 * want to just show it now.
3329 */
3330 ret = trace_print_seq(m, &iter->seq);
3331
3332 /* ret should this time be zero, but you never know */
3333 iter->leftover = ret;
3334
3335 } else {
3336 print_trace_line(iter);
3337 ret = trace_print_seq(m, &iter->seq);
3338 /*
3339 * If we overflow the seq_file buffer, then it will
3340 * ask us for this data again at start up.
3341 * Use that instead.
3342 * ret is 0 if seq_file write succeeded.
3343 * -1 otherwise.
3344 */
3345 iter->leftover = ret;
3346 }
3347
3348 return 0;
3349 }
3350
3351 /*
3352 * Should be used after trace_array_get(), trace_types_lock
3353 * ensures that i_cdev was already initialized.
3354 */
3355 static inline int tracing_get_cpu(struct inode *inode)
3356 {
3357 if (inode->i_cdev) /* See trace_create_cpu_file() */
3358 return (long)inode->i_cdev - 1;
3359 return RING_BUFFER_ALL_CPUS;
3360 }
3361
3362 static const struct seq_operations tracer_seq_ops = {
3363 .start = s_start,
3364 .next = s_next,
3365 .stop = s_stop,
3366 .show = s_show,
3367 };
3368
3369 static struct trace_iterator *
3370 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3371 {
3372 struct trace_array *tr = inode->i_private;
3373 struct trace_iterator *iter;
3374 int cpu;
3375
3376 if (tracing_disabled)
3377 return ERR_PTR(-ENODEV);
3378
3379 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3380 if (!iter)
3381 return ERR_PTR(-ENOMEM);
3382
3383 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3384 GFP_KERNEL);
3385 if (!iter->buffer_iter)
3386 goto release;
3387
3388 /*
3389 * We make a copy of the current tracer to avoid concurrent
3390 * changes on it while we are reading.
3391 */
3392 mutex_lock(&trace_types_lock);
3393 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3394 if (!iter->trace)
3395 goto fail;
3396
3397 *iter->trace = *tr->current_trace;
3398
3399 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3400 goto fail;
3401
3402 iter->tr = tr;
3403
3404 #ifdef CONFIG_TRACER_MAX_TRACE
3405 /* Currently only the top directory has a snapshot */
3406 if (tr->current_trace->print_max || snapshot)
3407 iter->trace_buffer = &tr->max_buffer;
3408 else
3409 #endif
3410 iter->trace_buffer = &tr->trace_buffer;
3411 iter->snapshot = snapshot;
3412 iter->pos = -1;
3413 iter->cpu_file = tracing_get_cpu(inode);
3414 mutex_init(&iter->mutex);
3415
3416 /* Notify the tracer early; before we stop tracing. */
3417 if (iter->trace && iter->trace->open)
3418 iter->trace->open(iter);
3419
3420 /* Annotate start of buffers if we had overruns */
3421 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3422 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3423
3424 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3425 if (trace_clocks[tr->clock_id].in_ns)
3426 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3427
3428 /* stop the trace while dumping if we are not opening "snapshot" */
3429 if (!iter->snapshot)
3430 tracing_stop_tr(tr);
3431
3432 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3433 for_each_tracing_cpu(cpu) {
3434 iter->buffer_iter[cpu] =
3435 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3436 }
3437 ring_buffer_read_prepare_sync();
3438 for_each_tracing_cpu(cpu) {
3439 ring_buffer_read_start(iter->buffer_iter[cpu]);
3440 tracing_iter_reset(iter, cpu);
3441 }
3442 } else {
3443 cpu = iter->cpu_file;
3444 iter->buffer_iter[cpu] =
3445 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3446 ring_buffer_read_prepare_sync();
3447 ring_buffer_read_start(iter->buffer_iter[cpu]);
3448 tracing_iter_reset(iter, cpu);
3449 }
3450
3451 mutex_unlock(&trace_types_lock);
3452
3453 return iter;
3454
3455 fail:
3456 mutex_unlock(&trace_types_lock);
3457 kfree(iter->trace);
3458 kfree(iter->buffer_iter);
3459 release:
3460 seq_release_private(inode, file);
3461 return ERR_PTR(-ENOMEM);
3462 }
3463
3464 int tracing_open_generic(struct inode *inode, struct file *filp)
3465 {
3466 if (tracing_disabled)
3467 return -ENODEV;
3468
3469 filp->private_data = inode->i_private;
3470 return 0;
3471 }
3472
3473 bool tracing_is_disabled(void)
3474 {
3475 return (tracing_disabled) ? true: false;
3476 }
3477
3478 /*
3479 * Open and update trace_array ref count.
3480 * Must have the current trace_array passed to it.
3481 */
3482 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3483 {
3484 struct trace_array *tr = inode->i_private;
3485
3486 if (tracing_disabled)
3487 return -ENODEV;
3488
3489 if (trace_array_get(tr) < 0)
3490 return -ENODEV;
3491
3492 filp->private_data = inode->i_private;
3493
3494 return 0;
3495 }
3496
3497 static int tracing_release(struct inode *inode, struct file *file)
3498 {
3499 struct trace_array *tr = inode->i_private;
3500 struct seq_file *m = file->private_data;
3501 struct trace_iterator *iter;
3502 int cpu;
3503
3504 if (!(file->f_mode & FMODE_READ)) {
3505 trace_array_put(tr);
3506 return 0;
3507 }
3508
3509 /* Writes do not use seq_file */
3510 iter = m->private;
3511 mutex_lock(&trace_types_lock);
3512
3513 for_each_tracing_cpu(cpu) {
3514 if (iter->buffer_iter[cpu])
3515 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3516 }
3517
3518 if (iter->trace && iter->trace->close)
3519 iter->trace->close(iter);
3520
3521 if (!iter->snapshot)
3522 /* reenable tracing if it was previously enabled */
3523 tracing_start_tr(tr);
3524
3525 __trace_array_put(tr);
3526
3527 mutex_unlock(&trace_types_lock);
3528
3529 mutex_destroy(&iter->mutex);
3530 free_cpumask_var(iter->started);
3531 kfree(iter->trace);
3532 kfree(iter->buffer_iter);
3533 seq_release_private(inode, file);
3534
3535 return 0;
3536 }
3537
3538 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3539 {
3540 struct trace_array *tr = inode->i_private;
3541
3542 trace_array_put(tr);
3543 return 0;
3544 }
3545
3546 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3547 {
3548 struct trace_array *tr = inode->i_private;
3549
3550 trace_array_put(tr);
3551
3552 return single_release(inode, file);
3553 }
3554
3555 static int tracing_open(struct inode *inode, struct file *file)
3556 {
3557 struct trace_array *tr = inode->i_private;
3558 struct trace_iterator *iter;
3559 int ret = 0;
3560
3561 if (trace_array_get(tr) < 0)
3562 return -ENODEV;
3563
3564 /* If this file was open for write, then erase contents */
3565 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3566 int cpu = tracing_get_cpu(inode);
3567
3568 if (cpu == RING_BUFFER_ALL_CPUS)
3569 tracing_reset_online_cpus(&tr->trace_buffer);
3570 else
3571 tracing_reset(&tr->trace_buffer, cpu);
3572 }
3573
3574 if (file->f_mode & FMODE_READ) {
3575 iter = __tracing_open(inode, file, false);
3576 if (IS_ERR(iter))
3577 ret = PTR_ERR(iter);
3578 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3579 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3580 }
3581
3582 if (ret < 0)
3583 trace_array_put(tr);
3584
3585 return ret;
3586 }
3587
3588 /*
3589 * Some tracers are not suitable for instance buffers.
3590 * A tracer is always available for the global array (toplevel)
3591 * or if it explicitly states that it is.
3592 */
3593 static bool
3594 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3595 {
3596 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3597 }
3598
3599 /* Find the next tracer that this trace array may use */
3600 static struct tracer *
3601 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3602 {
3603 while (t && !trace_ok_for_array(t, tr))
3604 t = t->next;
3605
3606 return t;
3607 }
3608
3609 static void *
3610 t_next(struct seq_file *m, void *v, loff_t *pos)
3611 {
3612 struct trace_array *tr = m->private;
3613 struct tracer *t = v;
3614
3615 (*pos)++;
3616
3617 if (t)
3618 t = get_tracer_for_array(tr, t->next);
3619
3620 return t;
3621 }
3622
3623 static void *t_start(struct seq_file *m, loff_t *pos)
3624 {
3625 struct trace_array *tr = m->private;
3626 struct tracer *t;
3627 loff_t l = 0;
3628
3629 mutex_lock(&trace_types_lock);
3630
3631 t = get_tracer_for_array(tr, trace_types);
3632 for (; t && l < *pos; t = t_next(m, t, &l))
3633 ;
3634
3635 return t;
3636 }
3637
3638 static void t_stop(struct seq_file *m, void *p)
3639 {
3640 mutex_unlock(&trace_types_lock);
3641 }
3642
3643 static int t_show(struct seq_file *m, void *v)
3644 {
3645 struct tracer *t = v;
3646
3647 if (!t)
3648 return 0;
3649
3650 seq_puts(m, t->name);
3651 if (t->next)
3652 seq_putc(m, ' ');
3653 else
3654 seq_putc(m, '\n');
3655
3656 return 0;
3657 }
3658
3659 static const struct seq_operations show_traces_seq_ops = {
3660 .start = t_start,
3661 .next = t_next,
3662 .stop = t_stop,
3663 .show = t_show,
3664 };
3665
3666 static int show_traces_open(struct inode *inode, struct file *file)
3667 {
3668 struct trace_array *tr = inode->i_private;
3669 struct seq_file *m;
3670 int ret;
3671
3672 if (tracing_disabled)
3673 return -ENODEV;
3674
3675 ret = seq_open(file, &show_traces_seq_ops);
3676 if (ret)
3677 return ret;
3678
3679 m = file->private_data;
3680 m->private = tr;
3681
3682 return 0;
3683 }
3684
3685 static ssize_t
3686 tracing_write_stub(struct file *filp, const char __user *ubuf,
3687 size_t count, loff_t *ppos)
3688 {
3689 return count;
3690 }
3691
3692 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3693 {
3694 int ret;
3695
3696 if (file->f_mode & FMODE_READ)
3697 ret = seq_lseek(file, offset, whence);
3698 else
3699 file->f_pos = ret = 0;
3700
3701 return ret;
3702 }
3703
3704 static const struct file_operations tracing_fops = {
3705 .open = tracing_open,
3706 .read = seq_read,
3707 .write = tracing_write_stub,
3708 .llseek = tracing_lseek,
3709 .release = tracing_release,
3710 };
3711
3712 static const struct file_operations show_traces_fops = {
3713 .open = show_traces_open,
3714 .read = seq_read,
3715 .release = seq_release,
3716 .llseek = seq_lseek,
3717 };
3718
3719 /*
3720 * The tracer itself will not take this lock, but still we want
3721 * to provide a consistent cpumask to user-space:
3722 */
3723 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3724
3725 /*
3726 * Temporary storage for the character representation of the
3727 * CPU bitmask (and one more byte for the newline):
3728 */
3729 static char mask_str[NR_CPUS + 1];
3730
3731 static ssize_t
3732 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3733 size_t count, loff_t *ppos)
3734 {
3735 struct trace_array *tr = file_inode(filp)->i_private;
3736 int len;
3737
3738 mutex_lock(&tracing_cpumask_update_lock);
3739
3740 len = snprintf(mask_str, count, "%*pb\n",
3741 cpumask_pr_args(tr->tracing_cpumask));
3742 if (len >= count) {
3743 count = -EINVAL;
3744 goto out_err;
3745 }
3746 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3747
3748 out_err:
3749 mutex_unlock(&tracing_cpumask_update_lock);
3750
3751 return count;
3752 }
3753
3754 static ssize_t
3755 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3756 size_t count, loff_t *ppos)
3757 {
3758 struct trace_array *tr = file_inode(filp)->i_private;
3759 cpumask_var_t tracing_cpumask_new;
3760 int err, cpu;
3761
3762 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3763 return -ENOMEM;
3764
3765 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3766 if (err)
3767 goto err_unlock;
3768
3769 mutex_lock(&tracing_cpumask_update_lock);
3770
3771 local_irq_disable();
3772 arch_spin_lock(&tr->max_lock);
3773 for_each_tracing_cpu(cpu) {
3774 /*
3775 * Increase/decrease the disabled counter if we are
3776 * about to flip a bit in the cpumask:
3777 */
3778 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3779 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3780 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3781 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3782 }
3783 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3784 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3785 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3786 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3787 }
3788 }
3789 arch_spin_unlock(&tr->max_lock);
3790 local_irq_enable();
3791
3792 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3793
3794 mutex_unlock(&tracing_cpumask_update_lock);
3795 free_cpumask_var(tracing_cpumask_new);
3796
3797 return count;
3798
3799 err_unlock:
3800 free_cpumask_var(tracing_cpumask_new);
3801
3802 return err;
3803 }
3804
3805 static const struct file_operations tracing_cpumask_fops = {
3806 .open = tracing_open_generic_tr,
3807 .read = tracing_cpumask_read,
3808 .write = tracing_cpumask_write,
3809 .release = tracing_release_generic_tr,
3810 .llseek = generic_file_llseek,
3811 };
3812
3813 static int tracing_trace_options_show(struct seq_file *m, void *v)
3814 {
3815 struct tracer_opt *trace_opts;
3816 struct trace_array *tr = m->private;
3817 u32 tracer_flags;
3818 int i;
3819
3820 mutex_lock(&trace_types_lock);
3821 tracer_flags = tr->current_trace->flags->val;
3822 trace_opts = tr->current_trace->flags->opts;
3823
3824 for (i = 0; trace_options[i]; i++) {
3825 if (tr->trace_flags & (1 << i))
3826 seq_printf(m, "%s\n", trace_options[i]);
3827 else
3828 seq_printf(m, "no%s\n", trace_options[i]);
3829 }
3830
3831 for (i = 0; trace_opts[i].name; i++) {
3832 if (tracer_flags & trace_opts[i].bit)
3833 seq_printf(m, "%s\n", trace_opts[i].name);
3834 else
3835 seq_printf(m, "no%s\n", trace_opts[i].name);
3836 }
3837 mutex_unlock(&trace_types_lock);
3838
3839 return 0;
3840 }
3841
3842 static int __set_tracer_option(struct trace_array *tr,
3843 struct tracer_flags *tracer_flags,
3844 struct tracer_opt *opts, int neg)
3845 {
3846 struct tracer *trace = tracer_flags->trace;
3847 int ret;
3848
3849 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3850 if (ret)
3851 return ret;
3852
3853 if (neg)
3854 tracer_flags->val &= ~opts->bit;
3855 else
3856 tracer_flags->val |= opts->bit;
3857 return 0;
3858 }
3859
3860 /* Try to assign a tracer specific option */
3861 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3862 {
3863 struct tracer *trace = tr->current_trace;
3864 struct tracer_flags *tracer_flags = trace->flags;
3865 struct tracer_opt *opts = NULL;
3866 int i;
3867
3868 for (i = 0; tracer_flags->opts[i].name; i++) {
3869 opts = &tracer_flags->opts[i];
3870
3871 if (strcmp(cmp, opts->name) == 0)
3872 return __set_tracer_option(tr, trace->flags, opts, neg);
3873 }
3874
3875 return -EINVAL;
3876 }
3877
3878 /* Some tracers require overwrite to stay enabled */
3879 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3880 {
3881 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3882 return -1;
3883
3884 return 0;
3885 }
3886
3887 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3888 {
3889 /* do nothing if flag is already set */
3890 if (!!(tr->trace_flags & mask) == !!enabled)
3891 return 0;
3892
3893 /* Give the tracer a chance to approve the change */
3894 if (tr->current_trace->flag_changed)
3895 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3896 return -EINVAL;
3897
3898 if (enabled)
3899 tr->trace_flags |= mask;
3900 else
3901 tr->trace_flags &= ~mask;
3902
3903 if (mask == TRACE_ITER_RECORD_CMD)
3904 trace_event_enable_cmd_record(enabled);
3905
3906 if (mask == TRACE_ITER_EVENT_FORK)
3907 trace_event_follow_fork(tr, enabled);
3908
3909 if (mask == TRACE_ITER_OVERWRITE) {
3910 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3911 #ifdef CONFIG_TRACER_MAX_TRACE
3912 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3913 #endif
3914 }
3915
3916 if (mask == TRACE_ITER_PRINTK) {
3917 trace_printk_start_stop_comm(enabled);
3918 trace_printk_control(enabled);
3919 }
3920
3921 return 0;
3922 }
3923
3924 static int trace_set_options(struct trace_array *tr, char *option)
3925 {
3926 char *cmp;
3927 int neg = 0;
3928 int ret = -ENODEV;
3929 int i;
3930 size_t orig_len = strlen(option);
3931
3932 cmp = strstrip(option);
3933
3934 if (strncmp(cmp, "no", 2) == 0) {
3935 neg = 1;
3936 cmp += 2;
3937 }
3938
3939 mutex_lock(&trace_types_lock);
3940
3941 for (i = 0; trace_options[i]; i++) {
3942 if (strcmp(cmp, trace_options[i]) == 0) {
3943 ret = set_tracer_flag(tr, 1 << i, !neg);
3944 break;
3945 }
3946 }
3947
3948 /* If no option could be set, test the specific tracer options */
3949 if (!trace_options[i])
3950 ret = set_tracer_option(tr, cmp, neg);
3951
3952 mutex_unlock(&trace_types_lock);
3953
3954 /*
3955 * If the first trailing whitespace is replaced with '\0' by strstrip,
3956 * turn it back into a space.
3957 */
3958 if (orig_len > strlen(option))
3959 option[strlen(option)] = ' ';
3960
3961 return ret;
3962 }
3963
3964 static void __init apply_trace_boot_options(void)
3965 {
3966 char *buf = trace_boot_options_buf;
3967 char *option;
3968
3969 while (true) {
3970 option = strsep(&buf, ",");
3971
3972 if (!option)
3973 break;
3974
3975 if (*option)
3976 trace_set_options(&global_trace, option);
3977
3978 /* Put back the comma to allow this to be called again */
3979 if (buf)
3980 *(buf - 1) = ',';
3981 }
3982 }
3983
3984 static ssize_t
3985 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3986 size_t cnt, loff_t *ppos)
3987 {
3988 struct seq_file *m = filp->private_data;
3989 struct trace_array *tr = m->private;
3990 char buf[64];
3991 int ret;
3992
3993 if (cnt >= sizeof(buf))
3994 return -EINVAL;
3995
3996 if (copy_from_user(buf, ubuf, cnt))
3997 return -EFAULT;
3998
3999 buf[cnt] = 0;
4000
4001 ret = trace_set_options(tr, buf);
4002 if (ret < 0)
4003 return ret;
4004
4005 *ppos += cnt;
4006
4007 return cnt;
4008 }
4009
4010 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4011 {
4012 struct trace_array *tr = inode->i_private;
4013 int ret;
4014
4015 if (tracing_disabled)
4016 return -ENODEV;
4017
4018 if (trace_array_get(tr) < 0)
4019 return -ENODEV;
4020
4021 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4022 if (ret < 0)
4023 trace_array_put(tr);
4024
4025 return ret;
4026 }
4027
4028 static const struct file_operations tracing_iter_fops = {
4029 .open = tracing_trace_options_open,
4030 .read = seq_read,
4031 .llseek = seq_lseek,
4032 .release = tracing_single_release_tr,
4033 .write = tracing_trace_options_write,
4034 };
4035
4036 static const char readme_msg[] =
4037 "tracing mini-HOWTO:\n\n"
4038 "# echo 0 > tracing_on : quick way to disable tracing\n"
4039 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4040 " Important files:\n"
4041 " trace\t\t\t- The static contents of the buffer\n"
4042 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4043 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4044 " current_tracer\t- function and latency tracers\n"
4045 " available_tracers\t- list of configured tracers for current_tracer\n"
4046 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4047 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4048 " trace_clock\t\t-change the clock used to order events\n"
4049 " local: Per cpu clock but may not be synced across CPUs\n"
4050 " global: Synced across CPUs but slows tracing down.\n"
4051 " counter: Not a clock, but just an increment\n"
4052 " uptime: Jiffy counter from time of boot\n"
4053 " perf: Same clock that perf events use\n"
4054 #ifdef CONFIG_X86_64
4055 " x86-tsc: TSC cycle counter\n"
4056 #endif
4057 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4058 " tracing_cpumask\t- Limit which CPUs to trace\n"
4059 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4060 "\t\t\t Remove sub-buffer with rmdir\n"
4061 " trace_options\t\t- Set format or modify how tracing happens\n"
4062 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4063 "\t\t\t option name\n"
4064 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4065 #ifdef CONFIG_DYNAMIC_FTRACE
4066 "\n available_filter_functions - list of functions that can be filtered on\n"
4067 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4068 "\t\t\t functions\n"
4069 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4070 "\t modules: Can select a group via module\n"
4071 "\t Format: :mod:<module-name>\n"
4072 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4073 "\t triggers: a command to perform when function is hit\n"
4074 "\t Format: <function>:<trigger>[:count]\n"
4075 "\t trigger: traceon, traceoff\n"
4076 "\t\t enable_event:<system>:<event>\n"
4077 "\t\t disable_event:<system>:<event>\n"
4078 #ifdef CONFIG_STACKTRACE
4079 "\t\t stacktrace\n"
4080 #endif
4081 #ifdef CONFIG_TRACER_SNAPSHOT
4082 "\t\t snapshot\n"
4083 #endif
4084 "\t\t dump\n"
4085 "\t\t cpudump\n"
4086 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4087 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4088 "\t The first one will disable tracing every time do_fault is hit\n"
4089 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4090 "\t The first time do trap is hit and it disables tracing, the\n"
4091 "\t counter will decrement to 2. If tracing is already disabled,\n"
4092 "\t the counter will not decrement. It only decrements when the\n"
4093 "\t trigger did work\n"
4094 "\t To remove trigger without count:\n"
4095 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4096 "\t To remove trigger with a count:\n"
4097 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4098 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4099 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4100 "\t modules: Can select a group via module command :mod:\n"
4101 "\t Does not accept triggers\n"
4102 #endif /* CONFIG_DYNAMIC_FTRACE */
4103 #ifdef CONFIG_FUNCTION_TRACER
4104 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4105 "\t\t (function)\n"
4106 #endif
4107 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4108 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4109 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4110 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4111 #endif
4112 #ifdef CONFIG_TRACER_SNAPSHOT
4113 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4114 "\t\t\t snapshot buffer. Read the contents for more\n"
4115 "\t\t\t information\n"
4116 #endif
4117 #ifdef CONFIG_STACK_TRACER
4118 " stack_trace\t\t- Shows the max stack trace when active\n"
4119 " stack_max_size\t- Shows current max stack size that was traced\n"
4120 "\t\t\t Write into this file to reset the max size (trigger a\n"
4121 "\t\t\t new trace)\n"
4122 #ifdef CONFIG_DYNAMIC_FTRACE
4123 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4124 "\t\t\t traces\n"
4125 #endif
4126 #endif /* CONFIG_STACK_TRACER */
4127 #ifdef CONFIG_KPROBE_EVENT
4128 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4129 "\t\t\t Write into this file to define/undefine new trace events.\n"
4130 #endif
4131 #ifdef CONFIG_UPROBE_EVENT
4132 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4133 "\t\t\t Write into this file to define/undefine new trace events.\n"
4134 #endif
4135 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4136 "\t accepts: event-definitions (one definition per line)\n"
4137 "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4138 "\t -:[<group>/]<event>\n"
4139 #ifdef CONFIG_KPROBE_EVENT
4140 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4141 #endif
4142 #ifdef CONFIG_UPROBE_EVENT
4143 "\t place: <path>:<offset>\n"
4144 #endif
4145 "\t args: <name>=fetcharg[:type]\n"
4146 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4147 "\t $stack<index>, $stack, $retval, $comm\n"
4148 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4149 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4150 #endif
4151 " events/\t\t- Directory containing all trace event subsystems:\n"
4152 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4153 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4154 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4155 "\t\t\t events\n"
4156 " filter\t\t- If set, only events passing filter are traced\n"
4157 " events/<system>/<event>/\t- Directory containing control files for\n"
4158 "\t\t\t <event>:\n"
4159 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4160 " filter\t\t- If set, only events passing filter are traced\n"
4161 " trigger\t\t- If set, a command to perform when event is hit\n"
4162 "\t Format: <trigger>[:count][if <filter>]\n"
4163 "\t trigger: traceon, traceoff\n"
4164 "\t enable_event:<system>:<event>\n"
4165 "\t disable_event:<system>:<event>\n"
4166 #ifdef CONFIG_HIST_TRIGGERS
4167 "\t enable_hist:<system>:<event>\n"
4168 "\t disable_hist:<system>:<event>\n"
4169 #endif
4170 #ifdef CONFIG_STACKTRACE
4171 "\t\t stacktrace\n"
4172 #endif
4173 #ifdef CONFIG_TRACER_SNAPSHOT
4174 "\t\t snapshot\n"
4175 #endif
4176 #ifdef CONFIG_HIST_TRIGGERS
4177 "\t\t hist (see below)\n"
4178 #endif
4179 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4180 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4181 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4182 "\t events/block/block_unplug/trigger\n"
4183 "\t The first disables tracing every time block_unplug is hit.\n"
4184 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4185 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4186 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4187 "\t Like function triggers, the counter is only decremented if it\n"
4188 "\t enabled or disabled tracing.\n"
4189 "\t To remove a trigger without a count:\n"
4190 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4191 "\t To remove a trigger with a count:\n"
4192 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4193 "\t Filters can be ignored when removing a trigger.\n"
4194 #ifdef CONFIG_HIST_TRIGGERS
4195 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4196 "\t Format: hist:keys=<field1[,field2,...]>\n"
4197 "\t [:values=<field1[,field2,...]>]\n"
4198 "\t [:sort=<field1[,field2,...]>]\n"
4199 "\t [:size=#entries]\n"
4200 "\t [:pause][:continue][:clear]\n"
4201 "\t [:name=histname1]\n"
4202 "\t [if <filter>]\n\n"
4203 "\t When a matching event is hit, an entry is added to a hash\n"
4204 "\t table using the key(s) and value(s) named, and the value of a\n"
4205 "\t sum called 'hitcount' is incremented. Keys and values\n"
4206 "\t correspond to fields in the event's format description. Keys\n"
4207 "\t can be any field, or the special string 'stacktrace'.\n"
4208 "\t Compound keys consisting of up to two fields can be specified\n"
4209 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4210 "\t fields. Sort keys consisting of up to two fields can be\n"
4211 "\t specified using the 'sort' keyword. The sort direction can\n"
4212 "\t be modified by appending '.descending' or '.ascending' to a\n"
4213 "\t sort field. The 'size' parameter can be used to specify more\n"
4214 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4215 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4216 "\t its histogram data will be shared with other triggers of the\n"
4217 "\t same name, and trigger hits will update this common data.\n\n"
4218 "\t Reading the 'hist' file for the event will dump the hash\n"
4219 "\t table in its entirety to stdout. If there are multiple hist\n"
4220 "\t triggers attached to an event, there will be a table for each\n"
4221 "\t trigger in the output. The table displayed for a named\n"
4222 "\t trigger will be the same as any other instance having the\n"
4223 "\t same name. The default format used to display a given field\n"
4224 "\t can be modified by appending any of the following modifiers\n"
4225 "\t to the field name, as applicable:\n\n"
4226 "\t .hex display a number as a hex value\n"
4227 "\t .sym display an address as a symbol\n"
4228 "\t .sym-offset display an address as a symbol and offset\n"
4229 "\t .execname display a common_pid as a program name\n"
4230 "\t .syscall display a syscall id as a syscall name\n\n"
4231 "\t .log2 display log2 value rather than raw number\n\n"
4232 "\t The 'pause' parameter can be used to pause an existing hist\n"
4233 "\t trigger or to start a hist trigger but not log any events\n"
4234 "\t until told to do so. 'continue' can be used to start or\n"
4235 "\t restart a paused hist trigger.\n\n"
4236 "\t The 'clear' parameter will clear the contents of a running\n"
4237 "\t hist trigger and leave its current paused/active state\n"
4238 "\t unchanged.\n\n"
4239 "\t The enable_hist and disable_hist triggers can be used to\n"
4240 "\t have one event conditionally start and stop another event's\n"
4241 "\t already-attached hist trigger. The syntax is analagous to\n"
4242 "\t the enable_event and disable_event triggers.\n"
4243 #endif
4244 ;
4245
4246 static ssize_t
4247 tracing_readme_read(struct file *filp, char __user *ubuf,
4248 size_t cnt, loff_t *ppos)
4249 {
4250 return simple_read_from_buffer(ubuf, cnt, ppos,
4251 readme_msg, strlen(readme_msg));
4252 }
4253
4254 static const struct file_operations tracing_readme_fops = {
4255 .open = tracing_open_generic,
4256 .read = tracing_readme_read,
4257 .llseek = generic_file_llseek,
4258 };
4259
4260 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4261 {
4262 unsigned int *ptr = v;
4263
4264 if (*pos || m->count)
4265 ptr++;
4266
4267 (*pos)++;
4268
4269 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4270 ptr++) {
4271 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4272 continue;
4273
4274 return ptr;
4275 }
4276
4277 return NULL;
4278 }
4279
4280 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4281 {
4282 void *v;
4283 loff_t l = 0;
4284
4285 preempt_disable();
4286 arch_spin_lock(&trace_cmdline_lock);
4287
4288 v = &savedcmd->map_cmdline_to_pid[0];
4289 while (l <= *pos) {
4290 v = saved_cmdlines_next(m, v, &l);
4291 if (!v)
4292 return NULL;
4293 }
4294
4295 return v;
4296 }
4297
4298 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4299 {
4300 arch_spin_unlock(&trace_cmdline_lock);
4301 preempt_enable();
4302 }
4303
4304 static int saved_cmdlines_show(struct seq_file *m, void *v)
4305 {
4306 char buf[TASK_COMM_LEN];
4307 unsigned int *pid = v;
4308
4309 __trace_find_cmdline(*pid, buf);
4310 seq_printf(m, "%d %s\n", *pid, buf);
4311 return 0;
4312 }
4313
4314 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4315 .start = saved_cmdlines_start,
4316 .next = saved_cmdlines_next,
4317 .stop = saved_cmdlines_stop,
4318 .show = saved_cmdlines_show,
4319 };
4320
4321 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4322 {
4323 if (tracing_disabled)
4324 return -ENODEV;
4325
4326 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4327 }
4328
4329 static const struct file_operations tracing_saved_cmdlines_fops = {
4330 .open = tracing_saved_cmdlines_open,
4331 .read = seq_read,
4332 .llseek = seq_lseek,
4333 .release = seq_release,
4334 };
4335
4336 static ssize_t
4337 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4338 size_t cnt, loff_t *ppos)
4339 {
4340 char buf[64];
4341 int r;
4342
4343 arch_spin_lock(&trace_cmdline_lock);
4344 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4345 arch_spin_unlock(&trace_cmdline_lock);
4346
4347 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4348 }
4349
4350 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4351 {
4352 kfree(s->saved_cmdlines);
4353 kfree(s->map_cmdline_to_pid);
4354 kfree(s);
4355 }
4356
4357 static int tracing_resize_saved_cmdlines(unsigned int val)
4358 {
4359 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4360
4361 s = kmalloc(sizeof(*s), GFP_KERNEL);
4362 if (!s)
4363 return -ENOMEM;
4364
4365 if (allocate_cmdlines_buffer(val, s) < 0) {
4366 kfree(s);
4367 return -ENOMEM;
4368 }
4369
4370 arch_spin_lock(&trace_cmdline_lock);
4371 savedcmd_temp = savedcmd;
4372 savedcmd = s;
4373 arch_spin_unlock(&trace_cmdline_lock);
4374 free_saved_cmdlines_buffer(savedcmd_temp);
4375
4376 return 0;
4377 }
4378
4379 static ssize_t
4380 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4381 size_t cnt, loff_t *ppos)
4382 {
4383 unsigned long val;
4384 int ret;
4385
4386 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4387 if (ret)
4388 return ret;
4389
4390 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4391 if (!val || val > PID_MAX_DEFAULT)
4392 return -EINVAL;
4393
4394 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4395 if (ret < 0)
4396 return ret;
4397
4398 *ppos += cnt;
4399
4400 return cnt;
4401 }
4402
4403 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4404 .open = tracing_open_generic,
4405 .read = tracing_saved_cmdlines_size_read,
4406 .write = tracing_saved_cmdlines_size_write,
4407 };
4408
4409 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4410 static union trace_enum_map_item *
4411 update_enum_map(union trace_enum_map_item *ptr)
4412 {
4413 if (!ptr->map.enum_string) {
4414 if (ptr->tail.next) {
4415 ptr = ptr->tail.next;
4416 /* Set ptr to the next real item (skip head) */
4417 ptr++;
4418 } else
4419 return NULL;
4420 }
4421 return ptr;
4422 }
4423
4424 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4425 {
4426 union trace_enum_map_item *ptr = v;
4427
4428 /*
4429 * Paranoid! If ptr points to end, we don't want to increment past it.
4430 * This really should never happen.
4431 */
4432 ptr = update_enum_map(ptr);
4433 if (WARN_ON_ONCE(!ptr))
4434 return NULL;
4435
4436 ptr++;
4437
4438 (*pos)++;
4439
4440 ptr = update_enum_map(ptr);
4441
4442 return ptr;
4443 }
4444
4445 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4446 {
4447 union trace_enum_map_item *v;
4448 loff_t l = 0;
4449
4450 mutex_lock(&trace_enum_mutex);
4451
4452 v = trace_enum_maps;
4453 if (v)
4454 v++;
4455
4456 while (v && l < *pos) {
4457 v = enum_map_next(m, v, &l);
4458 }
4459
4460 return v;
4461 }
4462
4463 static void enum_map_stop(struct seq_file *m, void *v)
4464 {
4465 mutex_unlock(&trace_enum_mutex);
4466 }
4467
4468 static int enum_map_show(struct seq_file *m, void *v)
4469 {
4470 union trace_enum_map_item *ptr = v;
4471
4472 seq_printf(m, "%s %ld (%s)\n",
4473 ptr->map.enum_string, ptr->map.enum_value,
4474 ptr->map.system);
4475
4476 return 0;
4477 }
4478
4479 static const struct seq_operations tracing_enum_map_seq_ops = {
4480 .start = enum_map_start,
4481 .next = enum_map_next,
4482 .stop = enum_map_stop,
4483 .show = enum_map_show,
4484 };
4485
4486 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4487 {
4488 if (tracing_disabled)
4489 return -ENODEV;
4490
4491 return seq_open(filp, &tracing_enum_map_seq_ops);
4492 }
4493
4494 static const struct file_operations tracing_enum_map_fops = {
4495 .open = tracing_enum_map_open,
4496 .read = seq_read,
4497 .llseek = seq_lseek,
4498 .release = seq_release,
4499 };
4500
4501 static inline union trace_enum_map_item *
4502 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4503 {
4504 /* Return tail of array given the head */
4505 return ptr + ptr->head.length + 1;
4506 }
4507
4508 static void
4509 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4510 int len)
4511 {
4512 struct trace_enum_map **stop;
4513 struct trace_enum_map **map;
4514 union trace_enum_map_item *map_array;
4515 union trace_enum_map_item *ptr;
4516
4517 stop = start + len;
4518
4519 /*
4520 * The trace_enum_maps contains the map plus a head and tail item,
4521 * where the head holds the module and length of array, and the
4522 * tail holds a pointer to the next list.
4523 */
4524 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4525 if (!map_array) {
4526 pr_warn("Unable to allocate trace enum mapping\n");
4527 return;
4528 }
4529
4530 mutex_lock(&trace_enum_mutex);
4531
4532 if (!trace_enum_maps)
4533 trace_enum_maps = map_array;
4534 else {
4535 ptr = trace_enum_maps;
4536 for (;;) {
4537 ptr = trace_enum_jmp_to_tail(ptr);
4538 if (!ptr->tail.next)
4539 break;
4540 ptr = ptr->tail.next;
4541
4542 }
4543 ptr->tail.next = map_array;
4544 }
4545 map_array->head.mod = mod;
4546 map_array->head.length = len;
4547 map_array++;
4548
4549 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4550 map_array->map = **map;
4551 map_array++;
4552 }
4553 memset(map_array, 0, sizeof(*map_array));
4554
4555 mutex_unlock(&trace_enum_mutex);
4556 }
4557
4558 static void trace_create_enum_file(struct dentry *d_tracer)
4559 {
4560 trace_create_file("enum_map", 0444, d_tracer,
4561 NULL, &tracing_enum_map_fops);
4562 }
4563
4564 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4565 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4566 static inline void trace_insert_enum_map_file(struct module *mod,
4567 struct trace_enum_map **start, int len) { }
4568 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4569
4570 static void trace_insert_enum_map(struct module *mod,
4571 struct trace_enum_map **start, int len)
4572 {
4573 struct trace_enum_map **map;
4574
4575 if (len <= 0)
4576 return;
4577
4578 map = start;
4579
4580 trace_event_enum_update(map, len);
4581
4582 trace_insert_enum_map_file(mod, start, len);
4583 }
4584
4585 static ssize_t
4586 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4587 size_t cnt, loff_t *ppos)
4588 {
4589 struct trace_array *tr = filp->private_data;
4590 char buf[MAX_TRACER_SIZE+2];
4591 int r;
4592
4593 mutex_lock(&trace_types_lock);
4594 r = sprintf(buf, "%s\n", tr->current_trace->name);
4595 mutex_unlock(&trace_types_lock);
4596
4597 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4598 }
4599
4600 int tracer_init(struct tracer *t, struct trace_array *tr)
4601 {
4602 tracing_reset_online_cpus(&tr->trace_buffer);
4603 return t->init(tr);
4604 }
4605
4606 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4607 {
4608 int cpu;
4609
4610 for_each_tracing_cpu(cpu)
4611 per_cpu_ptr(buf->data, cpu)->entries = val;
4612 }
4613
4614 #ifdef CONFIG_TRACER_MAX_TRACE
4615 /* resize @tr's buffer to the size of @size_tr's entries */
4616 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4617 struct trace_buffer *size_buf, int cpu_id)
4618 {
4619 int cpu, ret = 0;
4620
4621 if (cpu_id == RING_BUFFER_ALL_CPUS) {
4622 for_each_tracing_cpu(cpu) {
4623 ret = ring_buffer_resize(trace_buf->buffer,
4624 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4625 if (ret < 0)
4626 break;
4627 per_cpu_ptr(trace_buf->data, cpu)->entries =
4628 per_cpu_ptr(size_buf->data, cpu)->entries;
4629 }
4630 } else {
4631 ret = ring_buffer_resize(trace_buf->buffer,
4632 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4633 if (ret == 0)
4634 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4635 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4636 }
4637
4638 return ret;
4639 }
4640 #endif /* CONFIG_TRACER_MAX_TRACE */
4641
4642 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4643 unsigned long size, int cpu)
4644 {
4645 int ret;
4646
4647 /*
4648 * If kernel or user changes the size of the ring buffer
4649 * we use the size that was given, and we can forget about
4650 * expanding it later.
4651 */
4652 ring_buffer_expanded = true;
4653
4654 /* May be called before buffers are initialized */
4655 if (!tr->trace_buffer.buffer)
4656 return 0;
4657
4658 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4659 if (ret < 0)
4660 return ret;
4661
4662 #ifdef CONFIG_TRACER_MAX_TRACE
4663 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4664 !tr->current_trace->use_max_tr)
4665 goto out;
4666
4667 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4668 if (ret < 0) {
4669 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4670 &tr->trace_buffer, cpu);
4671 if (r < 0) {
4672 /*
4673 * AARGH! We are left with different
4674 * size max buffer!!!!
4675 * The max buffer is our "snapshot" buffer.
4676 * When a tracer needs a snapshot (one of the
4677 * latency tracers), it swaps the max buffer
4678 * with the saved snap shot. We succeeded to
4679 * update the size of the main buffer, but failed to
4680 * update the size of the max buffer. But when we tried
4681 * to reset the main buffer to the original size, we
4682 * failed there too. This is very unlikely to
4683 * happen, but if it does, warn and kill all
4684 * tracing.
4685 */
4686 WARN_ON(1);
4687 tracing_disabled = 1;
4688 }
4689 return ret;
4690 }
4691
4692 if (cpu == RING_BUFFER_ALL_CPUS)
4693 set_buffer_entries(&tr->max_buffer, size);
4694 else
4695 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4696
4697 out:
4698 #endif /* CONFIG_TRACER_MAX_TRACE */
4699
4700 if (cpu == RING_BUFFER_ALL_CPUS)
4701 set_buffer_entries(&tr->trace_buffer, size);
4702 else
4703 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4704
4705 return ret;
4706 }
4707
4708 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4709 unsigned long size, int cpu_id)
4710 {
4711 int ret = size;
4712
4713 mutex_lock(&trace_types_lock);
4714
4715 if (cpu_id != RING_BUFFER_ALL_CPUS) {
4716 /* make sure, this cpu is enabled in the mask */
4717 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4718 ret = -EINVAL;
4719 goto out;
4720 }
4721 }
4722
4723 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4724 if (ret < 0)
4725 ret = -ENOMEM;
4726
4727 out:
4728 mutex_unlock(&trace_types_lock);
4729
4730 return ret;
4731 }
4732
4733
4734 /**
4735 * tracing_update_buffers - used by tracing facility to expand ring buffers
4736 *
4737 * To save on memory when the tracing is never used on a system with it
4738 * configured in. The ring buffers are set to a minimum size. But once
4739 * a user starts to use the tracing facility, then they need to grow
4740 * to their default size.
4741 *
4742 * This function is to be called when a tracer is about to be used.
4743 */
4744 int tracing_update_buffers(void)
4745 {
4746 int ret = 0;
4747
4748 mutex_lock(&trace_types_lock);
4749 if (!ring_buffer_expanded)
4750 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4751 RING_BUFFER_ALL_CPUS);
4752 mutex_unlock(&trace_types_lock);
4753
4754 return ret;
4755 }
4756
4757 struct trace_option_dentry;
4758
4759 static void
4760 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4761
4762 /*
4763 * Used to clear out the tracer before deletion of an instance.
4764 * Must have trace_types_lock held.
4765 */
4766 static void tracing_set_nop(struct trace_array *tr)
4767 {
4768 if (tr->current_trace == &nop_trace)
4769 return;
4770
4771 tr->current_trace->enabled--;
4772
4773 if (tr->current_trace->reset)
4774 tr->current_trace->reset(tr);
4775
4776 tr->current_trace = &nop_trace;
4777 }
4778
4779 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4780 {
4781 /* Only enable if the directory has been created already. */
4782 if (!tr->dir)
4783 return;
4784
4785 create_trace_option_files(tr, t);
4786 }
4787
4788 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4789 {
4790 struct tracer *t;
4791 #ifdef CONFIG_TRACER_MAX_TRACE
4792 bool had_max_tr;
4793 #endif
4794 int ret = 0;
4795
4796 mutex_lock(&trace_types_lock);
4797
4798 if (!ring_buffer_expanded) {
4799 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4800 RING_BUFFER_ALL_CPUS);
4801 if (ret < 0)
4802 goto out;
4803 ret = 0;
4804 }
4805
4806 for (t = trace_types; t; t = t->next) {
4807 if (strcmp(t->name, buf) == 0)
4808 break;
4809 }
4810 if (!t) {
4811 ret = -EINVAL;
4812 goto out;
4813 }
4814 if (t == tr->current_trace)
4815 goto out;
4816
4817 /* Some tracers are only allowed for the top level buffer */
4818 if (!trace_ok_for_array(t, tr)) {
4819 ret = -EINVAL;
4820 goto out;
4821 }
4822
4823 /* If trace pipe files are being read, we can't change the tracer */
4824 if (tr->current_trace->ref) {
4825 ret = -EBUSY;
4826 goto out;
4827 }
4828
4829 trace_branch_disable();
4830
4831 tr->current_trace->enabled--;
4832
4833 if (tr->current_trace->reset)
4834 tr->current_trace->reset(tr);
4835
4836 /* Current trace needs to be nop_trace before synchronize_sched */
4837 tr->current_trace = &nop_trace;
4838
4839 #ifdef CONFIG_TRACER_MAX_TRACE
4840 had_max_tr = tr->allocated_snapshot;
4841
4842 if (had_max_tr && !t->use_max_tr) {
4843 /*
4844 * We need to make sure that the update_max_tr sees that
4845 * current_trace changed to nop_trace to keep it from
4846 * swapping the buffers after we resize it.
4847 * The update_max_tr is called from interrupts disabled
4848 * so a synchronized_sched() is sufficient.
4849 */
4850 synchronize_sched();
4851 free_snapshot(tr);
4852 }
4853 #endif
4854
4855 #ifdef CONFIG_TRACER_MAX_TRACE
4856 if (t->use_max_tr && !had_max_tr) {
4857 ret = alloc_snapshot(tr);
4858 if (ret < 0)
4859 goto out;
4860 }
4861 #endif
4862
4863 if (t->init) {
4864 ret = tracer_init(t, tr);
4865 if (ret)
4866 goto out;
4867 }
4868
4869 tr->current_trace = t;
4870 tr->current_trace->enabled++;
4871 trace_branch_enable(tr);
4872 out:
4873 mutex_unlock(&trace_types_lock);
4874
4875 return ret;
4876 }
4877
4878 static ssize_t
4879 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4880 size_t cnt, loff_t *ppos)
4881 {
4882 struct trace_array *tr = filp->private_data;
4883 char buf[MAX_TRACER_SIZE+1];
4884 int i;
4885 size_t ret;
4886 int err;
4887
4888 ret = cnt;
4889
4890 if (cnt > MAX_TRACER_SIZE)
4891 cnt = MAX_TRACER_SIZE;
4892
4893 if (copy_from_user(buf, ubuf, cnt))
4894 return -EFAULT;
4895
4896 buf[cnt] = 0;
4897
4898 /* strip ending whitespace. */
4899 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4900 buf[i] = 0;
4901
4902 err = tracing_set_tracer(tr, buf);
4903 if (err)
4904 return err;
4905
4906 *ppos += ret;
4907
4908 return ret;
4909 }
4910
4911 static ssize_t
4912 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4913 size_t cnt, loff_t *ppos)
4914 {
4915 char buf[64];
4916 int r;
4917
4918 r = snprintf(buf, sizeof(buf), "%ld\n",
4919 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4920 if (r > sizeof(buf))
4921 r = sizeof(buf);
4922 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4923 }
4924
4925 static ssize_t
4926 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4927 size_t cnt, loff_t *ppos)
4928 {
4929 unsigned long val;
4930 int ret;
4931
4932 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4933 if (ret)
4934 return ret;
4935
4936 *ptr = val * 1000;
4937
4938 return cnt;
4939 }
4940
4941 static ssize_t
4942 tracing_thresh_read(struct file *filp, char __user *ubuf,
4943 size_t cnt, loff_t *ppos)
4944 {
4945 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4946 }
4947
4948 static ssize_t
4949 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4950 size_t cnt, loff_t *ppos)
4951 {
4952 struct trace_array *tr = filp->private_data;
4953 int ret;
4954
4955 mutex_lock(&trace_types_lock);
4956 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4957 if (ret < 0)
4958 goto out;
4959
4960 if (tr->current_trace->update_thresh) {
4961 ret = tr->current_trace->update_thresh(tr);
4962 if (ret < 0)
4963 goto out;
4964 }
4965
4966 ret = cnt;
4967 out:
4968 mutex_unlock(&trace_types_lock);
4969
4970 return ret;
4971 }
4972
4973 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4974
4975 static ssize_t
4976 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4977 size_t cnt, loff_t *ppos)
4978 {
4979 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4980 }
4981
4982 static ssize_t
4983 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4984 size_t cnt, loff_t *ppos)
4985 {
4986 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4987 }
4988
4989 #endif
4990
4991 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4992 {
4993 struct trace_array *tr = inode->i_private;
4994 struct trace_iterator *iter;
4995 int ret = 0;
4996
4997 if (tracing_disabled)
4998 return -ENODEV;
4999
5000 if (trace_array_get(tr) < 0)
5001 return -ENODEV;
5002
5003 mutex_lock(&trace_types_lock);
5004
5005 /* create a buffer to store the information to pass to userspace */
5006 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5007 if (!iter) {
5008 ret = -ENOMEM;
5009 __trace_array_put(tr);
5010 goto out;
5011 }
5012
5013 trace_seq_init(&iter->seq);
5014 iter->trace = tr->current_trace;
5015
5016 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5017 ret = -ENOMEM;
5018 goto fail;
5019 }
5020
5021 /* trace pipe does not show start of buffer */
5022 cpumask_setall(iter->started);
5023
5024 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5025 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5026
5027 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5028 if (trace_clocks[tr->clock_id].in_ns)
5029 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5030
5031 iter->tr = tr;
5032 iter->trace_buffer = &tr->trace_buffer;
5033 iter->cpu_file = tracing_get_cpu(inode);
5034 mutex_init(&iter->mutex);
5035 filp->private_data = iter;
5036
5037 if (iter->trace->pipe_open)
5038 iter->trace->pipe_open(iter);
5039
5040 nonseekable_open(inode, filp);
5041
5042 tr->current_trace->ref++;
5043 out:
5044 mutex_unlock(&trace_types_lock);
5045 return ret;
5046
5047 fail:
5048 kfree(iter->trace);
5049 kfree(iter);
5050 __trace_array_put(tr);
5051 mutex_unlock(&trace_types_lock);
5052 return ret;
5053 }
5054
5055 static int tracing_release_pipe(struct inode *inode, struct file *file)
5056 {
5057 struct trace_iterator *iter = file->private_data;
5058 struct trace_array *tr = inode->i_private;
5059
5060 mutex_lock(&trace_types_lock);
5061
5062 tr->current_trace->ref--;
5063
5064 if (iter->trace->pipe_close)
5065 iter->trace->pipe_close(iter);
5066
5067 mutex_unlock(&trace_types_lock);
5068
5069 free_cpumask_var(iter->started);
5070 mutex_destroy(&iter->mutex);
5071 kfree(iter);
5072
5073 trace_array_put(tr);
5074
5075 return 0;
5076 }
5077
5078 static unsigned int
5079 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5080 {
5081 struct trace_array *tr = iter->tr;
5082
5083 /* Iterators are static, they should be filled or empty */
5084 if (trace_buffer_iter(iter, iter->cpu_file))
5085 return POLLIN | POLLRDNORM;
5086
5087 if (tr->trace_flags & TRACE_ITER_BLOCK)
5088 /*
5089 * Always select as readable when in blocking mode
5090 */
5091 return POLLIN | POLLRDNORM;
5092 else
5093 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5094 filp, poll_table);
5095 }
5096
5097 static unsigned int
5098 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5099 {
5100 struct trace_iterator *iter = filp->private_data;
5101
5102 return trace_poll(iter, filp, poll_table);
5103 }
5104
5105 /* Must be called with iter->mutex held. */
5106 static int tracing_wait_pipe(struct file *filp)
5107 {
5108 struct trace_iterator *iter = filp->private_data;
5109 int ret;
5110
5111 while (trace_empty(iter)) {
5112
5113 if ((filp->f_flags & O_NONBLOCK)) {
5114 return -EAGAIN;
5115 }
5116
5117 /*
5118 * We block until we read something and tracing is disabled.
5119 * We still block if tracing is disabled, but we have never
5120 * read anything. This allows a user to cat this file, and
5121 * then enable tracing. But after we have read something,
5122 * we give an EOF when tracing is again disabled.
5123 *
5124 * iter->pos will be 0 if we haven't read anything.
5125 */
5126 if (!tracing_is_on() && iter->pos)
5127 break;
5128
5129 mutex_unlock(&iter->mutex);
5130
5131 ret = wait_on_pipe(iter, false);
5132
5133 mutex_lock(&iter->mutex);
5134
5135 if (ret)
5136 return ret;
5137 }
5138
5139 return 1;
5140 }
5141
5142 /*
5143 * Consumer reader.
5144 */
5145 static ssize_t
5146 tracing_read_pipe(struct file *filp, char __user *ubuf,
5147 size_t cnt, loff_t *ppos)
5148 {
5149 struct trace_iterator *iter = filp->private_data;
5150 ssize_t sret;
5151
5152 /*
5153 * Avoid more than one consumer on a single file descriptor
5154 * This is just a matter of traces coherency, the ring buffer itself
5155 * is protected.
5156 */
5157 mutex_lock(&iter->mutex);
5158
5159 /* return any leftover data */
5160 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5161 if (sret != -EBUSY)
5162 goto out;
5163
5164 trace_seq_init(&iter->seq);
5165
5166 if (iter->trace->read) {
5167 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5168 if (sret)
5169 goto out;
5170 }
5171
5172 waitagain:
5173 sret = tracing_wait_pipe(filp);
5174 if (sret <= 0)
5175 goto out;
5176
5177 /* stop when tracing is finished */
5178 if (trace_empty(iter)) {
5179 sret = 0;
5180 goto out;
5181 }
5182
5183 if (cnt >= PAGE_SIZE)
5184 cnt = PAGE_SIZE - 1;
5185
5186 /* reset all but tr, trace, and overruns */
5187 memset(&iter->seq, 0,
5188 sizeof(struct trace_iterator) -
5189 offsetof(struct trace_iterator, seq));
5190 cpumask_clear(iter->started);
5191 iter->pos = -1;
5192
5193 trace_event_read_lock();
5194 trace_access_lock(iter->cpu_file);
5195 while (trace_find_next_entry_inc(iter) != NULL) {
5196 enum print_line_t ret;
5197 int save_len = iter->seq.seq.len;
5198
5199 ret = print_trace_line(iter);
5200 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5201 /* don't print partial lines */
5202 iter->seq.seq.len = save_len;
5203 break;
5204 }
5205 if (ret != TRACE_TYPE_NO_CONSUME)
5206 trace_consume(iter);
5207
5208 if (trace_seq_used(&iter->seq) >= cnt)
5209 break;
5210
5211 /*
5212 * Setting the full flag means we reached the trace_seq buffer
5213 * size and we should leave by partial output condition above.
5214 * One of the trace_seq_* functions is not used properly.
5215 */
5216 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5217 iter->ent->type);
5218 }
5219 trace_access_unlock(iter->cpu_file);
5220 trace_event_read_unlock();
5221
5222 /* Now copy what we have to the user */
5223 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5224 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5225 trace_seq_init(&iter->seq);
5226
5227 /*
5228 * If there was nothing to send to user, in spite of consuming trace
5229 * entries, go back to wait for more entries.
5230 */
5231 if (sret == -EBUSY)
5232 goto waitagain;
5233
5234 out:
5235 mutex_unlock(&iter->mutex);
5236
5237 return sret;
5238 }
5239
5240 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5241 unsigned int idx)
5242 {
5243 __free_page(spd->pages[idx]);
5244 }
5245
5246 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5247 .can_merge = 0,
5248 .confirm = generic_pipe_buf_confirm,
5249 .release = generic_pipe_buf_release,
5250 .steal = generic_pipe_buf_steal,
5251 .get = generic_pipe_buf_get,
5252 };
5253
5254 static size_t
5255 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5256 {
5257 size_t count;
5258 int save_len;
5259 int ret;
5260
5261 /* Seq buffer is page-sized, exactly what we need. */
5262 for (;;) {
5263 save_len = iter->seq.seq.len;
5264 ret = print_trace_line(iter);
5265
5266 if (trace_seq_has_overflowed(&iter->seq)) {
5267 iter->seq.seq.len = save_len;
5268 break;
5269 }
5270
5271 /*
5272 * This should not be hit, because it should only
5273 * be set if the iter->seq overflowed. But check it
5274 * anyway to be safe.
5275 */
5276 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5277 iter->seq.seq.len = save_len;
5278 break;
5279 }
5280
5281 count = trace_seq_used(&iter->seq) - save_len;
5282 if (rem < count) {
5283 rem = 0;
5284 iter->seq.seq.len = save_len;
5285 break;
5286 }
5287
5288 if (ret != TRACE_TYPE_NO_CONSUME)
5289 trace_consume(iter);
5290 rem -= count;
5291 if (!trace_find_next_entry_inc(iter)) {
5292 rem = 0;
5293 iter->ent = NULL;
5294 break;
5295 }
5296 }
5297
5298 return rem;
5299 }
5300
5301 static ssize_t tracing_splice_read_pipe(struct file *filp,
5302 loff_t *ppos,
5303 struct pipe_inode_info *pipe,
5304 size_t len,
5305 unsigned int flags)
5306 {
5307 struct page *pages_def[PIPE_DEF_BUFFERS];
5308 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5309 struct trace_iterator *iter = filp->private_data;
5310 struct splice_pipe_desc spd = {
5311 .pages = pages_def,
5312 .partial = partial_def,
5313 .nr_pages = 0, /* This gets updated below. */
5314 .nr_pages_max = PIPE_DEF_BUFFERS,
5315 .flags = flags,
5316 .ops = &tracing_pipe_buf_ops,
5317 .spd_release = tracing_spd_release_pipe,
5318 };
5319 ssize_t ret;
5320 size_t rem;
5321 unsigned int i;
5322
5323 if (splice_grow_spd(pipe, &spd))
5324 return -ENOMEM;
5325
5326 mutex_lock(&iter->mutex);
5327
5328 if (iter->trace->splice_read) {
5329 ret = iter->trace->splice_read(iter, filp,
5330 ppos, pipe, len, flags);
5331 if (ret)
5332 goto out_err;
5333 }
5334
5335 ret = tracing_wait_pipe(filp);
5336 if (ret <= 0)
5337 goto out_err;
5338
5339 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5340 ret = -EFAULT;
5341 goto out_err;
5342 }
5343
5344 trace_event_read_lock();
5345 trace_access_lock(iter->cpu_file);
5346
5347 /* Fill as many pages as possible. */
5348 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5349 spd.pages[i] = alloc_page(GFP_KERNEL);
5350 if (!spd.pages[i])
5351 break;
5352
5353 rem = tracing_fill_pipe_page(rem, iter);
5354
5355 /* Copy the data into the page, so we can start over. */
5356 ret = trace_seq_to_buffer(&iter->seq,
5357 page_address(spd.pages[i]),
5358 trace_seq_used(&iter->seq));
5359 if (ret < 0) {
5360 __free_page(spd.pages[i]);
5361 break;
5362 }
5363 spd.partial[i].offset = 0;
5364 spd.partial[i].len = trace_seq_used(&iter->seq);
5365
5366 trace_seq_init(&iter->seq);
5367 }
5368
5369 trace_access_unlock(iter->cpu_file);
5370 trace_event_read_unlock();
5371 mutex_unlock(&iter->mutex);
5372
5373 spd.nr_pages = i;
5374
5375 if (i)
5376 ret = splice_to_pipe(pipe, &spd);
5377 else
5378 ret = 0;
5379 out:
5380 splice_shrink_spd(&spd);
5381 return ret;
5382
5383 out_err:
5384 mutex_unlock(&iter->mutex);
5385 goto out;
5386 }
5387
5388 static ssize_t
5389 tracing_entries_read(struct file *filp, char __user *ubuf,
5390 size_t cnt, loff_t *ppos)
5391 {
5392 struct inode *inode = file_inode(filp);
5393 struct trace_array *tr = inode->i_private;
5394 int cpu = tracing_get_cpu(inode);
5395 char buf[64];
5396 int r = 0;
5397 ssize_t ret;
5398
5399 mutex_lock(&trace_types_lock);
5400
5401 if (cpu == RING_BUFFER_ALL_CPUS) {
5402 int cpu, buf_size_same;
5403 unsigned long size;
5404
5405 size = 0;
5406 buf_size_same = 1;
5407 /* check if all cpu sizes are same */
5408 for_each_tracing_cpu(cpu) {
5409 /* fill in the size from first enabled cpu */
5410 if (size == 0)
5411 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5412 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5413 buf_size_same = 0;
5414 break;
5415 }
5416 }
5417
5418 if (buf_size_same) {
5419 if (!ring_buffer_expanded)
5420 r = sprintf(buf, "%lu (expanded: %lu)\n",
5421 size >> 10,
5422 trace_buf_size >> 10);
5423 else
5424 r = sprintf(buf, "%lu\n", size >> 10);
5425 } else
5426 r = sprintf(buf, "X\n");
5427 } else
5428 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5429
5430 mutex_unlock(&trace_types_lock);
5431
5432 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5433 return ret;
5434 }
5435
5436 static ssize_t
5437 tracing_entries_write(struct file *filp, const char __user *ubuf,
5438 size_t cnt, loff_t *ppos)
5439 {
5440 struct inode *inode = file_inode(filp);
5441 struct trace_array *tr = inode->i_private;
5442 unsigned long val;
5443 int ret;
5444
5445 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5446 if (ret)
5447 return ret;
5448
5449 /* must have at least 1 entry */
5450 if (!val)
5451 return -EINVAL;
5452
5453 /* value is in KB */
5454 val <<= 10;
5455 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5456 if (ret < 0)
5457 return ret;
5458
5459 *ppos += cnt;
5460
5461 return cnt;
5462 }
5463
5464 static ssize_t
5465 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5466 size_t cnt, loff_t *ppos)
5467 {
5468 struct trace_array *tr = filp->private_data;
5469 char buf[64];
5470 int r, cpu;
5471 unsigned long size = 0, expanded_size = 0;
5472
5473 mutex_lock(&trace_types_lock);
5474 for_each_tracing_cpu(cpu) {
5475 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5476 if (!ring_buffer_expanded)
5477 expanded_size += trace_buf_size >> 10;
5478 }
5479 if (ring_buffer_expanded)
5480 r = sprintf(buf, "%lu\n", size);
5481 else
5482 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5483 mutex_unlock(&trace_types_lock);
5484
5485 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5486 }
5487
5488 static ssize_t
5489 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5490 size_t cnt, loff_t *ppos)
5491 {
5492 /*
5493 * There is no need to read what the user has written, this function
5494 * is just to make sure that there is no error when "echo" is used
5495 */
5496
5497 *ppos += cnt;
5498
5499 return cnt;
5500 }
5501
5502 static int
5503 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5504 {
5505 struct trace_array *tr = inode->i_private;
5506
5507 /* disable tracing ? */
5508 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5509 tracer_tracing_off(tr);
5510 /* resize the ring buffer to 0 */
5511 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5512
5513 trace_array_put(tr);
5514
5515 return 0;
5516 }
5517
5518 static ssize_t
5519 tracing_mark_write(struct file *filp, const char __user *ubuf,
5520 size_t cnt, loff_t *fpos)
5521 {
5522 unsigned long addr = (unsigned long)ubuf;
5523 struct trace_array *tr = filp->private_data;
5524 struct ring_buffer_event *event;
5525 struct ring_buffer *buffer;
5526 struct print_entry *entry;
5527 unsigned long irq_flags;
5528 struct page *pages[2];
5529 void *map_page[2];
5530 int nr_pages = 1;
5531 ssize_t written;
5532 int offset;
5533 int size;
5534 int len;
5535 int ret;
5536 int i;
5537
5538 if (tracing_disabled)
5539 return -EINVAL;
5540
5541 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5542 return -EINVAL;
5543
5544 if (cnt > TRACE_BUF_SIZE)
5545 cnt = TRACE_BUF_SIZE;
5546
5547 /*
5548 * Userspace is injecting traces into the kernel trace buffer.
5549 * We want to be as non intrusive as possible.
5550 * To do so, we do not want to allocate any special buffers
5551 * or take any locks, but instead write the userspace data
5552 * straight into the ring buffer.
5553 *
5554 * First we need to pin the userspace buffer into memory,
5555 * which, most likely it is, because it just referenced it.
5556 * But there's no guarantee that it is. By using get_user_pages_fast()
5557 * and kmap_atomic/kunmap_atomic() we can get access to the
5558 * pages directly. We then write the data directly into the
5559 * ring buffer.
5560 */
5561 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5562
5563 /* check if we cross pages */
5564 if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5565 nr_pages = 2;
5566
5567 offset = addr & (PAGE_SIZE - 1);
5568 addr &= PAGE_MASK;
5569
5570 ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5571 if (ret < nr_pages) {
5572 while (--ret >= 0)
5573 put_page(pages[ret]);
5574 written = -EFAULT;
5575 goto out;
5576 }
5577
5578 for (i = 0; i < nr_pages; i++)
5579 map_page[i] = kmap_atomic(pages[i]);
5580
5581 local_save_flags(irq_flags);
5582 size = sizeof(*entry) + cnt + 2; /* possible \n added */
5583 buffer = tr->trace_buffer.buffer;
5584 event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5585 irq_flags, preempt_count());
5586 if (!event) {
5587 /* Ring buffer disabled, return as if not open for write */
5588 written = -EBADF;
5589 goto out_unlock;
5590 }
5591
5592 entry = ring_buffer_event_data(event);
5593 entry->ip = _THIS_IP_;
5594
5595 if (nr_pages == 2) {
5596 len = PAGE_SIZE - offset;
5597 memcpy(&entry->buf, map_page[0] + offset, len);
5598 memcpy(&entry->buf[len], map_page[1], cnt - len);
5599 } else
5600 memcpy(&entry->buf, map_page[0] + offset, cnt);
5601
5602 if (entry->buf[cnt - 1] != '\n') {
5603 entry->buf[cnt] = '\n';
5604 entry->buf[cnt + 1] = '\0';
5605 } else
5606 entry->buf[cnt] = '\0';
5607
5608 __buffer_unlock_commit(buffer, event);
5609
5610 written = cnt;
5611
5612 *fpos += written;
5613
5614 out_unlock:
5615 for (i = nr_pages - 1; i >= 0; i--) {
5616 kunmap_atomic(map_page[i]);
5617 put_page(pages[i]);
5618 }
5619 out:
5620 return written;
5621 }
5622
5623 static int tracing_clock_show(struct seq_file *m, void *v)
5624 {
5625 struct trace_array *tr = m->private;
5626 int i;
5627
5628 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5629 seq_printf(m,
5630 "%s%s%s%s", i ? " " : "",
5631 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5632 i == tr->clock_id ? "]" : "");
5633 seq_putc(m, '\n');
5634
5635 return 0;
5636 }
5637
5638 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5639 {
5640 int i;
5641
5642 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5643 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5644 break;
5645 }
5646 if (i == ARRAY_SIZE(trace_clocks))
5647 return -EINVAL;
5648
5649 mutex_lock(&trace_types_lock);
5650
5651 tr->clock_id = i;
5652
5653 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5654
5655 /*
5656 * New clock may not be consistent with the previous clock.
5657 * Reset the buffer so that it doesn't have incomparable timestamps.
5658 */
5659 tracing_reset_online_cpus(&tr->trace_buffer);
5660
5661 #ifdef CONFIG_TRACER_MAX_TRACE
5662 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5663 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5664 tracing_reset_online_cpus(&tr->max_buffer);
5665 #endif
5666
5667 mutex_unlock(&trace_types_lock);
5668
5669 return 0;
5670 }
5671
5672 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5673 size_t cnt, loff_t *fpos)
5674 {
5675 struct seq_file *m = filp->private_data;
5676 struct trace_array *tr = m->private;
5677 char buf[64];
5678 const char *clockstr;
5679 int ret;
5680
5681 if (cnt >= sizeof(buf))
5682 return -EINVAL;
5683
5684 if (copy_from_user(buf, ubuf, cnt))
5685 return -EFAULT;
5686
5687 buf[cnt] = 0;
5688
5689 clockstr = strstrip(buf);
5690
5691 ret = tracing_set_clock(tr, clockstr);
5692 if (ret)
5693 return ret;
5694
5695 *fpos += cnt;
5696
5697 return cnt;
5698 }
5699
5700 static int tracing_clock_open(struct inode *inode, struct file *file)
5701 {
5702 struct trace_array *tr = inode->i_private;
5703 int ret;
5704
5705 if (tracing_disabled)
5706 return -ENODEV;
5707
5708 if (trace_array_get(tr))
5709 return -ENODEV;
5710
5711 ret = single_open(file, tracing_clock_show, inode->i_private);
5712 if (ret < 0)
5713 trace_array_put(tr);
5714
5715 return ret;
5716 }
5717
5718 struct ftrace_buffer_info {
5719 struct trace_iterator iter;
5720 void *spare;
5721 unsigned int read;
5722 };
5723
5724 #ifdef CONFIG_TRACER_SNAPSHOT
5725 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5726 {
5727 struct trace_array *tr = inode->i_private;
5728 struct trace_iterator *iter;
5729 struct seq_file *m;
5730 int ret = 0;
5731
5732 if (trace_array_get(tr) < 0)
5733 return -ENODEV;
5734
5735 if (file->f_mode & FMODE_READ) {
5736 iter = __tracing_open(inode, file, true);
5737 if (IS_ERR(iter))
5738 ret = PTR_ERR(iter);
5739 } else {
5740 /* Writes still need the seq_file to hold the private data */
5741 ret = -ENOMEM;
5742 m = kzalloc(sizeof(*m), GFP_KERNEL);
5743 if (!m)
5744 goto out;
5745 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5746 if (!iter) {
5747 kfree(m);
5748 goto out;
5749 }
5750 ret = 0;
5751
5752 iter->tr = tr;
5753 iter->trace_buffer = &tr->max_buffer;
5754 iter->cpu_file = tracing_get_cpu(inode);
5755 m->private = iter;
5756 file->private_data = m;
5757 }
5758 out:
5759 if (ret < 0)
5760 trace_array_put(tr);
5761
5762 return ret;
5763 }
5764
5765 static ssize_t
5766 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5767 loff_t *ppos)
5768 {
5769 struct seq_file *m = filp->private_data;
5770 struct trace_iterator *iter = m->private;
5771 struct trace_array *tr = iter->tr;
5772 unsigned long val;
5773 int ret;
5774
5775 ret = tracing_update_buffers();
5776 if (ret < 0)
5777 return ret;
5778
5779 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5780 if (ret)
5781 return ret;
5782
5783 mutex_lock(&trace_types_lock);
5784
5785 if (tr->current_trace->use_max_tr) {
5786 ret = -EBUSY;
5787 goto out;
5788 }
5789
5790 switch (val) {
5791 case 0:
5792 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5793 ret = -EINVAL;
5794 break;
5795 }
5796 if (tr->allocated_snapshot)
5797 free_snapshot(tr);
5798 break;
5799 case 1:
5800 /* Only allow per-cpu swap if the ring buffer supports it */
5801 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5802 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5803 ret = -EINVAL;
5804 break;
5805 }
5806 #endif
5807 if (!tr->allocated_snapshot) {
5808 ret = alloc_snapshot(tr);
5809 if (ret < 0)
5810 break;
5811 }
5812 local_irq_disable();
5813 /* Now, we're going to swap */
5814 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5815 update_max_tr(tr, current, smp_processor_id());
5816 else
5817 update_max_tr_single(tr, current, iter->cpu_file);
5818 local_irq_enable();
5819 break;
5820 default:
5821 if (tr->allocated_snapshot) {
5822 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5823 tracing_reset_online_cpus(&tr->max_buffer);
5824 else
5825 tracing_reset(&tr->max_buffer, iter->cpu_file);
5826 }
5827 break;
5828 }
5829
5830 if (ret >= 0) {
5831 *ppos += cnt;
5832 ret = cnt;
5833 }
5834 out:
5835 mutex_unlock(&trace_types_lock);
5836 return ret;
5837 }
5838
5839 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5840 {
5841 struct seq_file *m = file->private_data;
5842 int ret;
5843
5844 ret = tracing_release(inode, file);
5845
5846 if (file->f_mode & FMODE_READ)
5847 return ret;
5848
5849 /* If write only, the seq_file is just a stub */
5850 if (m)
5851 kfree(m->private);
5852 kfree(m);
5853
5854 return 0;
5855 }
5856
5857 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5858 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5859 size_t count, loff_t *ppos);
5860 static int tracing_buffers_release(struct inode *inode, struct file *file);
5861 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5862 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5863
5864 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5865 {
5866 struct ftrace_buffer_info *info;
5867 int ret;
5868
5869 ret = tracing_buffers_open(inode, filp);
5870 if (ret < 0)
5871 return ret;
5872
5873 info = filp->private_data;
5874
5875 if (info->iter.trace->use_max_tr) {
5876 tracing_buffers_release(inode, filp);
5877 return -EBUSY;
5878 }
5879
5880 info->iter.snapshot = true;
5881 info->iter.trace_buffer = &info->iter.tr->max_buffer;
5882
5883 return ret;
5884 }
5885
5886 #endif /* CONFIG_TRACER_SNAPSHOT */
5887
5888
5889 static const struct file_operations tracing_thresh_fops = {
5890 .open = tracing_open_generic,
5891 .read = tracing_thresh_read,
5892 .write = tracing_thresh_write,
5893 .llseek = generic_file_llseek,
5894 };
5895
5896 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5897 static const struct file_operations tracing_max_lat_fops = {
5898 .open = tracing_open_generic,
5899 .read = tracing_max_lat_read,
5900 .write = tracing_max_lat_write,
5901 .llseek = generic_file_llseek,
5902 };
5903 #endif
5904
5905 static const struct file_operations set_tracer_fops = {
5906 .open = tracing_open_generic,
5907 .read = tracing_set_trace_read,
5908 .write = tracing_set_trace_write,
5909 .llseek = generic_file_llseek,
5910 };
5911
5912 static const struct file_operations tracing_pipe_fops = {
5913 .open = tracing_open_pipe,
5914 .poll = tracing_poll_pipe,
5915 .read = tracing_read_pipe,
5916 .splice_read = tracing_splice_read_pipe,
5917 .release = tracing_release_pipe,
5918 .llseek = no_llseek,
5919 };
5920
5921 static const struct file_operations tracing_entries_fops = {
5922 .open = tracing_open_generic_tr,
5923 .read = tracing_entries_read,
5924 .write = tracing_entries_write,
5925 .llseek = generic_file_llseek,
5926 .release = tracing_release_generic_tr,
5927 };
5928
5929 static const struct file_operations tracing_total_entries_fops = {
5930 .open = tracing_open_generic_tr,
5931 .read = tracing_total_entries_read,
5932 .llseek = generic_file_llseek,
5933 .release = tracing_release_generic_tr,
5934 };
5935
5936 static const struct file_operations tracing_free_buffer_fops = {
5937 .open = tracing_open_generic_tr,
5938 .write = tracing_free_buffer_write,
5939 .release = tracing_free_buffer_release,
5940 };
5941
5942 static const struct file_operations tracing_mark_fops = {
5943 .open = tracing_open_generic_tr,
5944 .write = tracing_mark_write,
5945 .llseek = generic_file_llseek,
5946 .release = tracing_release_generic_tr,
5947 };
5948
5949 static const struct file_operations trace_clock_fops = {
5950 .open = tracing_clock_open,
5951 .read = seq_read,
5952 .llseek = seq_lseek,
5953 .release = tracing_single_release_tr,
5954 .write = tracing_clock_write,
5955 };
5956
5957 #ifdef CONFIG_TRACER_SNAPSHOT
5958 static const struct file_operations snapshot_fops = {
5959 .open = tracing_snapshot_open,
5960 .read = seq_read,
5961 .write = tracing_snapshot_write,
5962 .llseek = tracing_lseek,
5963 .release = tracing_snapshot_release,
5964 };
5965
5966 static const struct file_operations snapshot_raw_fops = {
5967 .open = snapshot_raw_open,
5968 .read = tracing_buffers_read,
5969 .release = tracing_buffers_release,
5970 .splice_read = tracing_buffers_splice_read,
5971 .llseek = no_llseek,
5972 };
5973
5974 #endif /* CONFIG_TRACER_SNAPSHOT */
5975
5976 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5977 {
5978 struct trace_array *tr = inode->i_private;
5979 struct ftrace_buffer_info *info;
5980 int ret;
5981
5982 if (tracing_disabled)
5983 return -ENODEV;
5984
5985 if (trace_array_get(tr) < 0)
5986 return -ENODEV;
5987
5988 info = kzalloc(sizeof(*info), GFP_KERNEL);
5989 if (!info) {
5990 trace_array_put(tr);
5991 return -ENOMEM;
5992 }
5993
5994 mutex_lock(&trace_types_lock);
5995
5996 info->iter.tr = tr;
5997 info->iter.cpu_file = tracing_get_cpu(inode);
5998 info->iter.trace = tr->current_trace;
5999 info->iter.trace_buffer = &tr->trace_buffer;
6000 info->spare = NULL;
6001 /* Force reading ring buffer for first read */
6002 info->read = (unsigned int)-1;
6003
6004 filp->private_data = info;
6005
6006 tr->current_trace->ref++;
6007
6008 mutex_unlock(&trace_types_lock);
6009
6010 ret = nonseekable_open(inode, filp);
6011 if (ret < 0)
6012 trace_array_put(tr);
6013
6014 return ret;
6015 }
6016
6017 static unsigned int
6018 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6019 {
6020 struct ftrace_buffer_info *info = filp->private_data;
6021 struct trace_iterator *iter = &info->iter;
6022
6023 return trace_poll(iter, filp, poll_table);
6024 }
6025
6026 static ssize_t
6027 tracing_buffers_read(struct file *filp, char __user *ubuf,
6028 size_t count, loff_t *ppos)
6029 {
6030 struct ftrace_buffer_info *info = filp->private_data;
6031 struct trace_iterator *iter = &info->iter;
6032 ssize_t ret;
6033 ssize_t size;
6034
6035 if (!count)
6036 return 0;
6037
6038 #ifdef CONFIG_TRACER_MAX_TRACE
6039 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6040 return -EBUSY;
6041 #endif
6042
6043 if (!info->spare)
6044 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6045 iter->cpu_file);
6046 if (!info->spare)
6047 return -ENOMEM;
6048
6049 /* Do we have previous read data to read? */
6050 if (info->read < PAGE_SIZE)
6051 goto read;
6052
6053 again:
6054 trace_access_lock(iter->cpu_file);
6055 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6056 &info->spare,
6057 count,
6058 iter->cpu_file, 0);
6059 trace_access_unlock(iter->cpu_file);
6060
6061 if (ret < 0) {
6062 if (trace_empty(iter)) {
6063 if ((filp->f_flags & O_NONBLOCK))
6064 return -EAGAIN;
6065
6066 ret = wait_on_pipe(iter, false);
6067 if (ret)
6068 return ret;
6069
6070 goto again;
6071 }
6072 return 0;
6073 }
6074
6075 info->read = 0;
6076 read:
6077 size = PAGE_SIZE - info->read;
6078 if (size > count)
6079 size = count;
6080
6081 ret = copy_to_user(ubuf, info->spare + info->read, size);
6082 if (ret == size)
6083 return -EFAULT;
6084
6085 size -= ret;
6086
6087 *ppos += size;
6088 info->read += size;
6089
6090 return size;
6091 }
6092
6093 static int tracing_buffers_release(struct inode *inode, struct file *file)
6094 {
6095 struct ftrace_buffer_info *info = file->private_data;
6096 struct trace_iterator *iter = &info->iter;
6097
6098 mutex_lock(&trace_types_lock);
6099
6100 iter->tr->current_trace->ref--;
6101
6102 __trace_array_put(iter->tr);
6103
6104 if (info->spare)
6105 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6106 kfree(info);
6107
6108 mutex_unlock(&trace_types_lock);
6109
6110 return 0;
6111 }
6112
6113 struct buffer_ref {
6114 struct ring_buffer *buffer;
6115 void *page;
6116 int ref;
6117 };
6118
6119 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6120 struct pipe_buffer *buf)
6121 {
6122 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6123
6124 if (--ref->ref)
6125 return;
6126
6127 ring_buffer_free_read_page(ref->buffer, ref->page);
6128 kfree(ref);
6129 buf->private = 0;
6130 }
6131
6132 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6133 struct pipe_buffer *buf)
6134 {
6135 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6136
6137 ref->ref++;
6138 }
6139
6140 /* Pipe buffer operations for a buffer. */
6141 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6142 .can_merge = 0,
6143 .confirm = generic_pipe_buf_confirm,
6144 .release = buffer_pipe_buf_release,
6145 .steal = generic_pipe_buf_steal,
6146 .get = buffer_pipe_buf_get,
6147 };
6148
6149 /*
6150 * Callback from splice_to_pipe(), if we need to release some pages
6151 * at the end of the spd in case we error'ed out in filling the pipe.
6152 */
6153 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6154 {
6155 struct buffer_ref *ref =
6156 (struct buffer_ref *)spd->partial[i].private;
6157
6158 if (--ref->ref)
6159 return;
6160
6161 ring_buffer_free_read_page(ref->buffer, ref->page);
6162 kfree(ref);
6163 spd->partial[i].private = 0;
6164 }
6165
6166 static ssize_t
6167 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6168 struct pipe_inode_info *pipe, size_t len,
6169 unsigned int flags)
6170 {
6171 struct ftrace_buffer_info *info = file->private_data;
6172 struct trace_iterator *iter = &info->iter;
6173 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6174 struct page *pages_def[PIPE_DEF_BUFFERS];
6175 struct splice_pipe_desc spd = {
6176 .pages = pages_def,
6177 .partial = partial_def,
6178 .nr_pages_max = PIPE_DEF_BUFFERS,
6179 .flags = flags,
6180 .ops = &buffer_pipe_buf_ops,
6181 .spd_release = buffer_spd_release,
6182 };
6183 struct buffer_ref *ref;
6184 int entries, size, i;
6185 ssize_t ret = 0;
6186
6187 #ifdef CONFIG_TRACER_MAX_TRACE
6188 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6189 return -EBUSY;
6190 #endif
6191
6192 if (*ppos & (PAGE_SIZE - 1))
6193 return -EINVAL;
6194
6195 if (len & (PAGE_SIZE - 1)) {
6196 if (len < PAGE_SIZE)
6197 return -EINVAL;
6198 len &= PAGE_MASK;
6199 }
6200
6201 if (splice_grow_spd(pipe, &spd))
6202 return -ENOMEM;
6203
6204 again:
6205 trace_access_lock(iter->cpu_file);
6206 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6207
6208 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6209 struct page *page;
6210 int r;
6211
6212 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6213 if (!ref) {
6214 ret = -ENOMEM;
6215 break;
6216 }
6217
6218 ref->ref = 1;
6219 ref->buffer = iter->trace_buffer->buffer;
6220 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6221 if (!ref->page) {
6222 ret = -ENOMEM;
6223 kfree(ref);
6224 break;
6225 }
6226
6227 r = ring_buffer_read_page(ref->buffer, &ref->page,
6228 len, iter->cpu_file, 1);
6229 if (r < 0) {
6230 ring_buffer_free_read_page(ref->buffer, ref->page);
6231 kfree(ref);
6232 break;
6233 }
6234
6235 /*
6236 * zero out any left over data, this is going to
6237 * user land.
6238 */
6239 size = ring_buffer_page_len(ref->page);
6240 if (size < PAGE_SIZE)
6241 memset(ref->page + size, 0, PAGE_SIZE - size);
6242
6243 page = virt_to_page(ref->page);
6244
6245 spd.pages[i] = page;
6246 spd.partial[i].len = PAGE_SIZE;
6247 spd.partial[i].offset = 0;
6248 spd.partial[i].private = (unsigned long)ref;
6249 spd.nr_pages++;
6250 *ppos += PAGE_SIZE;
6251
6252 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6253 }
6254
6255 trace_access_unlock(iter->cpu_file);
6256 spd.nr_pages = i;
6257
6258 /* did we read anything? */
6259 if (!spd.nr_pages) {
6260 if (ret)
6261 goto out;
6262
6263 ret = -EAGAIN;
6264 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6265 goto out;
6266
6267 ret = wait_on_pipe(iter, true);
6268 if (ret)
6269 goto out;
6270
6271 goto again;
6272 }
6273
6274 ret = splice_to_pipe(pipe, &spd);
6275 out:
6276 splice_shrink_spd(&spd);
6277
6278 return ret;
6279 }
6280
6281 static const struct file_operations tracing_buffers_fops = {
6282 .open = tracing_buffers_open,
6283 .read = tracing_buffers_read,
6284 .poll = tracing_buffers_poll,
6285 .release = tracing_buffers_release,
6286 .splice_read = tracing_buffers_splice_read,
6287 .llseek = no_llseek,
6288 };
6289
6290 static ssize_t
6291 tracing_stats_read(struct file *filp, char __user *ubuf,
6292 size_t count, loff_t *ppos)
6293 {
6294 struct inode *inode = file_inode(filp);
6295 struct trace_array *tr = inode->i_private;
6296 struct trace_buffer *trace_buf = &tr->trace_buffer;
6297 int cpu = tracing_get_cpu(inode);
6298 struct trace_seq *s;
6299 unsigned long cnt;
6300 unsigned long long t;
6301 unsigned long usec_rem;
6302
6303 s = kmalloc(sizeof(*s), GFP_KERNEL);
6304 if (!s)
6305 return -ENOMEM;
6306
6307 trace_seq_init(s);
6308
6309 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6310 trace_seq_printf(s, "entries: %ld\n", cnt);
6311
6312 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6313 trace_seq_printf(s, "overrun: %ld\n", cnt);
6314
6315 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6316 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6317
6318 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6319 trace_seq_printf(s, "bytes: %ld\n", cnt);
6320
6321 if (trace_clocks[tr->clock_id].in_ns) {
6322 /* local or global for trace_clock */
6323 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6324 usec_rem = do_div(t, USEC_PER_SEC);
6325 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6326 t, usec_rem);
6327
6328 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6329 usec_rem = do_div(t, USEC_PER_SEC);
6330 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6331 } else {
6332 /* counter or tsc mode for trace_clock */
6333 trace_seq_printf(s, "oldest event ts: %llu\n",
6334 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6335
6336 trace_seq_printf(s, "now ts: %llu\n",
6337 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6338 }
6339
6340 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6341 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6342
6343 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6344 trace_seq_printf(s, "read events: %ld\n", cnt);
6345
6346 count = simple_read_from_buffer(ubuf, count, ppos,
6347 s->buffer, trace_seq_used(s));
6348
6349 kfree(s);
6350
6351 return count;
6352 }
6353
6354 static const struct file_operations tracing_stats_fops = {
6355 .open = tracing_open_generic_tr,
6356 .read = tracing_stats_read,
6357 .llseek = generic_file_llseek,
6358 .release = tracing_release_generic_tr,
6359 };
6360
6361 #ifdef CONFIG_DYNAMIC_FTRACE
6362
6363 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6364 {
6365 return 0;
6366 }
6367
6368 static ssize_t
6369 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6370 size_t cnt, loff_t *ppos)
6371 {
6372 static char ftrace_dyn_info_buffer[1024];
6373 static DEFINE_MUTEX(dyn_info_mutex);
6374 unsigned long *p = filp->private_data;
6375 char *buf = ftrace_dyn_info_buffer;
6376 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6377 int r;
6378
6379 mutex_lock(&dyn_info_mutex);
6380 r = sprintf(buf, "%ld ", *p);
6381
6382 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6383 buf[r++] = '\n';
6384
6385 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6386
6387 mutex_unlock(&dyn_info_mutex);
6388
6389 return r;
6390 }
6391
6392 static const struct file_operations tracing_dyn_info_fops = {
6393 .open = tracing_open_generic,
6394 .read = tracing_read_dyn_info,
6395 .llseek = generic_file_llseek,
6396 };
6397 #endif /* CONFIG_DYNAMIC_FTRACE */
6398
6399 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6400 static void
6401 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6402 {
6403 tracing_snapshot();
6404 }
6405
6406 static void
6407 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6408 {
6409 unsigned long *count = (long *)data;
6410
6411 if (!*count)
6412 return;
6413
6414 if (*count != -1)
6415 (*count)--;
6416
6417 tracing_snapshot();
6418 }
6419
6420 static int
6421 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6422 struct ftrace_probe_ops *ops, void *data)
6423 {
6424 long count = (long)data;
6425
6426 seq_printf(m, "%ps:", (void *)ip);
6427
6428 seq_puts(m, "snapshot");
6429
6430 if (count == -1)
6431 seq_puts(m, ":unlimited\n");
6432 else
6433 seq_printf(m, ":count=%ld\n", count);
6434
6435 return 0;
6436 }
6437
6438 static struct ftrace_probe_ops snapshot_probe_ops = {
6439 .func = ftrace_snapshot,
6440 .print = ftrace_snapshot_print,
6441 };
6442
6443 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6444 .func = ftrace_count_snapshot,
6445 .print = ftrace_snapshot_print,
6446 };
6447
6448 static int
6449 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6450 char *glob, char *cmd, char *param, int enable)
6451 {
6452 struct ftrace_probe_ops *ops;
6453 void *count = (void *)-1;
6454 char *number;
6455 int ret;
6456
6457 /* hash funcs only work with set_ftrace_filter */
6458 if (!enable)
6459 return -EINVAL;
6460
6461 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
6462
6463 if (glob[0] == '!') {
6464 unregister_ftrace_function_probe_func(glob+1, ops);
6465 return 0;
6466 }
6467
6468 if (!param)
6469 goto out_reg;
6470
6471 number = strsep(&param, ":");
6472
6473 if (!strlen(number))
6474 goto out_reg;
6475
6476 /*
6477 * We use the callback data field (which is a pointer)
6478 * as our counter.
6479 */
6480 ret = kstrtoul(number, 0, (unsigned long *)&count);
6481 if (ret)
6482 return ret;
6483
6484 out_reg:
6485 ret = register_ftrace_function_probe(glob, ops, count);
6486
6487 if (ret >= 0)
6488 alloc_snapshot(&global_trace);
6489
6490 return ret < 0 ? ret : 0;
6491 }
6492
6493 static struct ftrace_func_command ftrace_snapshot_cmd = {
6494 .name = "snapshot",
6495 .func = ftrace_trace_snapshot_callback,
6496 };
6497
6498 static __init int register_snapshot_cmd(void)
6499 {
6500 return register_ftrace_command(&ftrace_snapshot_cmd);
6501 }
6502 #else
6503 static inline __init int register_snapshot_cmd(void) { return 0; }
6504 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6505
6506 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6507 {
6508 if (WARN_ON(!tr->dir))
6509 return ERR_PTR(-ENODEV);
6510
6511 /* Top directory uses NULL as the parent */
6512 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6513 return NULL;
6514
6515 /* All sub buffers have a descriptor */
6516 return tr->dir;
6517 }
6518
6519 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6520 {
6521 struct dentry *d_tracer;
6522
6523 if (tr->percpu_dir)
6524 return tr->percpu_dir;
6525
6526 d_tracer = tracing_get_dentry(tr);
6527 if (IS_ERR(d_tracer))
6528 return NULL;
6529
6530 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6531
6532 WARN_ONCE(!tr->percpu_dir,
6533 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6534
6535 return tr->percpu_dir;
6536 }
6537
6538 static struct dentry *
6539 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6540 void *data, long cpu, const struct file_operations *fops)
6541 {
6542 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6543
6544 if (ret) /* See tracing_get_cpu() */
6545 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6546 return ret;
6547 }
6548
6549 static void
6550 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6551 {
6552 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6553 struct dentry *d_cpu;
6554 char cpu_dir[30]; /* 30 characters should be more than enough */
6555
6556 if (!d_percpu)
6557 return;
6558
6559 snprintf(cpu_dir, 30, "cpu%ld", cpu);
6560 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6561 if (!d_cpu) {
6562 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6563 return;
6564 }
6565
6566 /* per cpu trace_pipe */
6567 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6568 tr, cpu, &tracing_pipe_fops);
6569
6570 /* per cpu trace */
6571 trace_create_cpu_file("trace", 0644, d_cpu,
6572 tr, cpu, &tracing_fops);
6573
6574 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6575 tr, cpu, &tracing_buffers_fops);
6576
6577 trace_create_cpu_file("stats", 0444, d_cpu,
6578 tr, cpu, &tracing_stats_fops);
6579
6580 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6581 tr, cpu, &tracing_entries_fops);
6582
6583 #ifdef CONFIG_TRACER_SNAPSHOT
6584 trace_create_cpu_file("snapshot", 0644, d_cpu,
6585 tr, cpu, &snapshot_fops);
6586
6587 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6588 tr, cpu, &snapshot_raw_fops);
6589 #endif
6590 }
6591
6592 #ifdef CONFIG_FTRACE_SELFTEST
6593 /* Let selftest have access to static functions in this file */
6594 #include "trace_selftest.c"
6595 #endif
6596
6597 static ssize_t
6598 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6599 loff_t *ppos)
6600 {
6601 struct trace_option_dentry *topt = filp->private_data;
6602 char *buf;
6603
6604 if (topt->flags->val & topt->opt->bit)
6605 buf = "1\n";
6606 else
6607 buf = "0\n";
6608
6609 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6610 }
6611
6612 static ssize_t
6613 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6614 loff_t *ppos)
6615 {
6616 struct trace_option_dentry *topt = filp->private_data;
6617 unsigned long val;
6618 int ret;
6619
6620 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6621 if (ret)
6622 return ret;
6623
6624 if (val != 0 && val != 1)
6625 return -EINVAL;
6626
6627 if (!!(topt->flags->val & topt->opt->bit) != val) {
6628 mutex_lock(&trace_types_lock);
6629 ret = __set_tracer_option(topt->tr, topt->flags,
6630 topt->opt, !val);
6631 mutex_unlock(&trace_types_lock);
6632 if (ret)
6633 return ret;
6634 }
6635
6636 *ppos += cnt;
6637
6638 return cnt;
6639 }
6640
6641
6642 static const struct file_operations trace_options_fops = {
6643 .open = tracing_open_generic,
6644 .read = trace_options_read,
6645 .write = trace_options_write,
6646 .llseek = generic_file_llseek,
6647 };
6648
6649 /*
6650 * In order to pass in both the trace_array descriptor as well as the index
6651 * to the flag that the trace option file represents, the trace_array
6652 * has a character array of trace_flags_index[], which holds the index
6653 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6654 * The address of this character array is passed to the flag option file
6655 * read/write callbacks.
6656 *
6657 * In order to extract both the index and the trace_array descriptor,
6658 * get_tr_index() uses the following algorithm.
6659 *
6660 * idx = *ptr;
6661 *
6662 * As the pointer itself contains the address of the index (remember
6663 * index[1] == 1).
6664 *
6665 * Then to get the trace_array descriptor, by subtracting that index
6666 * from the ptr, we get to the start of the index itself.
6667 *
6668 * ptr - idx == &index[0]
6669 *
6670 * Then a simple container_of() from that pointer gets us to the
6671 * trace_array descriptor.
6672 */
6673 static void get_tr_index(void *data, struct trace_array **ptr,
6674 unsigned int *pindex)
6675 {
6676 *pindex = *(unsigned char *)data;
6677
6678 *ptr = container_of(data - *pindex, struct trace_array,
6679 trace_flags_index);
6680 }
6681
6682 static ssize_t
6683 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6684 loff_t *ppos)
6685 {
6686 void *tr_index = filp->private_data;
6687 struct trace_array *tr;
6688 unsigned int index;
6689 char *buf;
6690
6691 get_tr_index(tr_index, &tr, &index);
6692
6693 if (tr->trace_flags & (1 << index))
6694 buf = "1\n";
6695 else
6696 buf = "0\n";
6697
6698 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6699 }
6700
6701 static ssize_t
6702 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6703 loff_t *ppos)
6704 {
6705 void *tr_index = filp->private_data;
6706 struct trace_array *tr;
6707 unsigned int index;
6708 unsigned long val;
6709 int ret;
6710
6711 get_tr_index(tr_index, &tr, &index);
6712
6713 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6714 if (ret)
6715 return ret;
6716
6717 if (val != 0 && val != 1)
6718 return -EINVAL;
6719
6720 mutex_lock(&trace_types_lock);
6721 ret = set_tracer_flag(tr, 1 << index, val);
6722 mutex_unlock(&trace_types_lock);
6723
6724 if (ret < 0)
6725 return ret;
6726
6727 *ppos += cnt;
6728
6729 return cnt;
6730 }
6731
6732 static const struct file_operations trace_options_core_fops = {
6733 .open = tracing_open_generic,
6734 .read = trace_options_core_read,
6735 .write = trace_options_core_write,
6736 .llseek = generic_file_llseek,
6737 };
6738
6739 struct dentry *trace_create_file(const char *name,
6740 umode_t mode,
6741 struct dentry *parent,
6742 void *data,
6743 const struct file_operations *fops)
6744 {
6745 struct dentry *ret;
6746
6747 ret = tracefs_create_file(name, mode, parent, data, fops);
6748 if (!ret)
6749 pr_warn("Could not create tracefs '%s' entry\n", name);
6750
6751 return ret;
6752 }
6753
6754
6755 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6756 {
6757 struct dentry *d_tracer;
6758
6759 if (tr->options)
6760 return tr->options;
6761
6762 d_tracer = tracing_get_dentry(tr);
6763 if (IS_ERR(d_tracer))
6764 return NULL;
6765
6766 tr->options = tracefs_create_dir("options", d_tracer);
6767 if (!tr->options) {
6768 pr_warn("Could not create tracefs directory 'options'\n");
6769 return NULL;
6770 }
6771
6772 return tr->options;
6773 }
6774
6775 static void
6776 create_trace_option_file(struct trace_array *tr,
6777 struct trace_option_dentry *topt,
6778 struct tracer_flags *flags,
6779 struct tracer_opt *opt)
6780 {
6781 struct dentry *t_options;
6782
6783 t_options = trace_options_init_dentry(tr);
6784 if (!t_options)
6785 return;
6786
6787 topt->flags = flags;
6788 topt->opt = opt;
6789 topt->tr = tr;
6790
6791 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6792 &trace_options_fops);
6793
6794 }
6795
6796 static void
6797 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6798 {
6799 struct trace_option_dentry *topts;
6800 struct trace_options *tr_topts;
6801 struct tracer_flags *flags;
6802 struct tracer_opt *opts;
6803 int cnt;
6804 int i;
6805
6806 if (!tracer)
6807 return;
6808
6809 flags = tracer->flags;
6810
6811 if (!flags || !flags->opts)
6812 return;
6813
6814 /*
6815 * If this is an instance, only create flags for tracers
6816 * the instance may have.
6817 */
6818 if (!trace_ok_for_array(tracer, tr))
6819 return;
6820
6821 for (i = 0; i < tr->nr_topts; i++) {
6822 /* Make sure there's no duplicate flags. */
6823 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6824 return;
6825 }
6826
6827 opts = flags->opts;
6828
6829 for (cnt = 0; opts[cnt].name; cnt++)
6830 ;
6831
6832 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6833 if (!topts)
6834 return;
6835
6836 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6837 GFP_KERNEL);
6838 if (!tr_topts) {
6839 kfree(topts);
6840 return;
6841 }
6842
6843 tr->topts = tr_topts;
6844 tr->topts[tr->nr_topts].tracer = tracer;
6845 tr->topts[tr->nr_topts].topts = topts;
6846 tr->nr_topts++;
6847
6848 for (cnt = 0; opts[cnt].name; cnt++) {
6849 create_trace_option_file(tr, &topts[cnt], flags,
6850 &opts[cnt]);
6851 WARN_ONCE(topts[cnt].entry == NULL,
6852 "Failed to create trace option: %s",
6853 opts[cnt].name);
6854 }
6855 }
6856
6857 static struct dentry *
6858 create_trace_option_core_file(struct trace_array *tr,
6859 const char *option, long index)
6860 {
6861 struct dentry *t_options;
6862
6863 t_options = trace_options_init_dentry(tr);
6864 if (!t_options)
6865 return NULL;
6866
6867 return trace_create_file(option, 0644, t_options,
6868 (void *)&tr->trace_flags_index[index],
6869 &trace_options_core_fops);
6870 }
6871
6872 static void create_trace_options_dir(struct trace_array *tr)
6873 {
6874 struct dentry *t_options;
6875 bool top_level = tr == &global_trace;
6876 int i;
6877
6878 t_options = trace_options_init_dentry(tr);
6879 if (!t_options)
6880 return;
6881
6882 for (i = 0; trace_options[i]; i++) {
6883 if (top_level ||
6884 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6885 create_trace_option_core_file(tr, trace_options[i], i);
6886 }
6887 }
6888
6889 static ssize_t
6890 rb_simple_read(struct file *filp, char __user *ubuf,
6891 size_t cnt, loff_t *ppos)
6892 {
6893 struct trace_array *tr = filp->private_data;
6894 char buf[64];
6895 int r;
6896
6897 r = tracer_tracing_is_on(tr);
6898 r = sprintf(buf, "%d\n", r);
6899
6900 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6901 }
6902
6903 static ssize_t
6904 rb_simple_write(struct file *filp, const char __user *ubuf,
6905 size_t cnt, loff_t *ppos)
6906 {
6907 struct trace_array *tr = filp->private_data;
6908 struct ring_buffer *buffer = tr->trace_buffer.buffer;
6909 unsigned long val;
6910 int ret;
6911
6912 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6913 if (ret)
6914 return ret;
6915
6916 if (buffer) {
6917 mutex_lock(&trace_types_lock);
6918 if (val) {
6919 tracer_tracing_on(tr);
6920 if (tr->current_trace->start)
6921 tr->current_trace->start(tr);
6922 } else {
6923 tracer_tracing_off(tr);
6924 if (tr->current_trace->stop)
6925 tr->current_trace->stop(tr);
6926 }
6927 mutex_unlock(&trace_types_lock);
6928 }
6929
6930 (*ppos)++;
6931
6932 return cnt;
6933 }
6934
6935 static const struct file_operations rb_simple_fops = {
6936 .open = tracing_open_generic_tr,
6937 .read = rb_simple_read,
6938 .write = rb_simple_write,
6939 .release = tracing_release_generic_tr,
6940 .llseek = default_llseek,
6941 };
6942
6943 struct dentry *trace_instance_dir;
6944
6945 static void
6946 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6947
6948 static int
6949 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6950 {
6951 enum ring_buffer_flags rb_flags;
6952
6953 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6954
6955 buf->tr = tr;
6956
6957 buf->buffer = ring_buffer_alloc(size, rb_flags);
6958 if (!buf->buffer)
6959 return -ENOMEM;
6960
6961 buf->data = alloc_percpu(struct trace_array_cpu);
6962 if (!buf->data) {
6963 ring_buffer_free(buf->buffer);
6964 return -ENOMEM;
6965 }
6966
6967 /* Allocate the first page for all buffers */
6968 set_buffer_entries(&tr->trace_buffer,
6969 ring_buffer_size(tr->trace_buffer.buffer, 0));
6970
6971 return 0;
6972 }
6973
6974 static int allocate_trace_buffers(struct trace_array *tr, int size)
6975 {
6976 int ret;
6977
6978 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6979 if (ret)
6980 return ret;
6981
6982 #ifdef CONFIG_TRACER_MAX_TRACE
6983 ret = allocate_trace_buffer(tr, &tr->max_buffer,
6984 allocate_snapshot ? size : 1);
6985 if (WARN_ON(ret)) {
6986 ring_buffer_free(tr->trace_buffer.buffer);
6987 free_percpu(tr->trace_buffer.data);
6988 return -ENOMEM;
6989 }
6990 tr->allocated_snapshot = allocate_snapshot;
6991
6992 /*
6993 * Only the top level trace array gets its snapshot allocated
6994 * from the kernel command line.
6995 */
6996 allocate_snapshot = false;
6997 #endif
6998 return 0;
6999 }
7000
7001 static void free_trace_buffer(struct trace_buffer *buf)
7002 {
7003 if (buf->buffer) {
7004 ring_buffer_free(buf->buffer);
7005 buf->buffer = NULL;
7006 free_percpu(buf->data);
7007 buf->data = NULL;
7008 }
7009 }
7010
7011 static void free_trace_buffers(struct trace_array *tr)
7012 {
7013 if (!tr)
7014 return;
7015
7016 free_trace_buffer(&tr->trace_buffer);
7017
7018 #ifdef CONFIG_TRACER_MAX_TRACE
7019 free_trace_buffer(&tr->max_buffer);
7020 #endif
7021 }
7022
7023 static void init_trace_flags_index(struct trace_array *tr)
7024 {
7025 int i;
7026
7027 /* Used by the trace options files */
7028 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7029 tr->trace_flags_index[i] = i;
7030 }
7031
7032 static void __update_tracer_options(struct trace_array *tr)
7033 {
7034 struct tracer *t;
7035
7036 for (t = trace_types; t; t = t->next)
7037 add_tracer_options(tr, t);
7038 }
7039
7040 static void update_tracer_options(struct trace_array *tr)
7041 {
7042 mutex_lock(&trace_types_lock);
7043 __update_tracer_options(tr);
7044 mutex_unlock(&trace_types_lock);
7045 }
7046
7047 static int instance_mkdir(const char *name)
7048 {
7049 struct trace_array *tr;
7050 int ret;
7051
7052 mutex_lock(&trace_types_lock);
7053
7054 ret = -EEXIST;
7055 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7056 if (tr->name && strcmp(tr->name, name) == 0)
7057 goto out_unlock;
7058 }
7059
7060 ret = -ENOMEM;
7061 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7062 if (!tr)
7063 goto out_unlock;
7064
7065 tr->name = kstrdup(name, GFP_KERNEL);
7066 if (!tr->name)
7067 goto out_free_tr;
7068
7069 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7070 goto out_free_tr;
7071
7072 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7073
7074 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7075
7076 raw_spin_lock_init(&tr->start_lock);
7077
7078 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7079
7080 tr->current_trace = &nop_trace;
7081
7082 INIT_LIST_HEAD(&tr->systems);
7083 INIT_LIST_HEAD(&tr->events);
7084
7085 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7086 goto out_free_tr;
7087
7088 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7089 if (!tr->dir)
7090 goto out_free_tr;
7091
7092 ret = event_trace_add_tracer(tr->dir, tr);
7093 if (ret) {
7094 tracefs_remove_recursive(tr->dir);
7095 goto out_free_tr;
7096 }
7097
7098 init_tracer_tracefs(tr, tr->dir);
7099 init_trace_flags_index(tr);
7100 __update_tracer_options(tr);
7101
7102 list_add(&tr->list, &ftrace_trace_arrays);
7103
7104 mutex_unlock(&trace_types_lock);
7105
7106 return 0;
7107
7108 out_free_tr:
7109 free_trace_buffers(tr);
7110 free_cpumask_var(tr->tracing_cpumask);
7111 kfree(tr->name);
7112 kfree(tr);
7113
7114 out_unlock:
7115 mutex_unlock(&trace_types_lock);
7116
7117 return ret;
7118
7119 }
7120
7121 static int instance_rmdir(const char *name)
7122 {
7123 struct trace_array *tr;
7124 int found = 0;
7125 int ret;
7126 int i;
7127
7128 mutex_lock(&trace_types_lock);
7129
7130 ret = -ENODEV;
7131 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7132 if (tr->name && strcmp(tr->name, name) == 0) {
7133 found = 1;
7134 break;
7135 }
7136 }
7137 if (!found)
7138 goto out_unlock;
7139
7140 ret = -EBUSY;
7141 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7142 goto out_unlock;
7143
7144 list_del(&tr->list);
7145
7146 /* Disable all the flags that were enabled coming in */
7147 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7148 if ((1 << i) & ZEROED_TRACE_FLAGS)
7149 set_tracer_flag(tr, 1 << i, 0);
7150 }
7151
7152 tracing_set_nop(tr);
7153 event_trace_del_tracer(tr);
7154 ftrace_destroy_function_files(tr);
7155 tracefs_remove_recursive(tr->dir);
7156 free_trace_buffers(tr);
7157
7158 for (i = 0; i < tr->nr_topts; i++) {
7159 kfree(tr->topts[i].topts);
7160 }
7161 kfree(tr->topts);
7162
7163 kfree(tr->name);
7164 kfree(tr);
7165
7166 ret = 0;
7167
7168 out_unlock:
7169 mutex_unlock(&trace_types_lock);
7170
7171 return ret;
7172 }
7173
7174 static __init void create_trace_instances(struct dentry *d_tracer)
7175 {
7176 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7177 instance_mkdir,
7178 instance_rmdir);
7179 if (WARN_ON(!trace_instance_dir))
7180 return;
7181 }
7182
7183 static void
7184 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7185 {
7186 int cpu;
7187
7188 trace_create_file("available_tracers", 0444, d_tracer,
7189 tr, &show_traces_fops);
7190
7191 trace_create_file("current_tracer", 0644, d_tracer,
7192 tr, &set_tracer_fops);
7193
7194 trace_create_file("tracing_cpumask", 0644, d_tracer,
7195 tr, &tracing_cpumask_fops);
7196
7197 trace_create_file("trace_options", 0644, d_tracer,
7198 tr, &tracing_iter_fops);
7199
7200 trace_create_file("trace", 0644, d_tracer,
7201 tr, &tracing_fops);
7202
7203 trace_create_file("trace_pipe", 0444, d_tracer,
7204 tr, &tracing_pipe_fops);
7205
7206 trace_create_file("buffer_size_kb", 0644, d_tracer,
7207 tr, &tracing_entries_fops);
7208
7209 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7210 tr, &tracing_total_entries_fops);
7211
7212 trace_create_file("free_buffer", 0200, d_tracer,
7213 tr, &tracing_free_buffer_fops);
7214
7215 trace_create_file("trace_marker", 0220, d_tracer,
7216 tr, &tracing_mark_fops);
7217
7218 trace_create_file("trace_clock", 0644, d_tracer, tr,
7219 &trace_clock_fops);
7220
7221 trace_create_file("tracing_on", 0644, d_tracer,
7222 tr, &rb_simple_fops);
7223
7224 create_trace_options_dir(tr);
7225
7226 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7227 trace_create_file("tracing_max_latency", 0644, d_tracer,
7228 &tr->max_latency, &tracing_max_lat_fops);
7229 #endif
7230
7231 if (ftrace_create_function_files(tr, d_tracer))
7232 WARN(1, "Could not allocate function filter files");
7233
7234 #ifdef CONFIG_TRACER_SNAPSHOT
7235 trace_create_file("snapshot", 0644, d_tracer,
7236 tr, &snapshot_fops);
7237 #endif
7238
7239 for_each_tracing_cpu(cpu)
7240 tracing_init_tracefs_percpu(tr, cpu);
7241
7242 ftrace_init_tracefs(tr, d_tracer);
7243 }
7244
7245 static struct vfsmount *trace_automount(void *ingore)
7246 {
7247 struct vfsmount *mnt;
7248 struct file_system_type *type;
7249
7250 /*
7251 * To maintain backward compatibility for tools that mount
7252 * debugfs to get to the tracing facility, tracefs is automatically
7253 * mounted to the debugfs/tracing directory.
7254 */
7255 type = get_fs_type("tracefs");
7256 if (!type)
7257 return NULL;
7258 mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7259 put_filesystem(type);
7260 if (IS_ERR(mnt))
7261 return NULL;
7262 mntget(mnt);
7263
7264 return mnt;
7265 }
7266
7267 /**
7268 * tracing_init_dentry - initialize top level trace array
7269 *
7270 * This is called when creating files or directories in the tracing
7271 * directory. It is called via fs_initcall() by any of the boot up code
7272 * and expects to return the dentry of the top level tracing directory.
7273 */
7274 struct dentry *tracing_init_dentry(void)
7275 {
7276 struct trace_array *tr = &global_trace;
7277
7278 /* The top level trace array uses NULL as parent */
7279 if (tr->dir)
7280 return NULL;
7281
7282 if (WARN_ON(!tracefs_initialized()) ||
7283 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7284 WARN_ON(!debugfs_initialized())))
7285 return ERR_PTR(-ENODEV);
7286
7287 /*
7288 * As there may still be users that expect the tracing
7289 * files to exist in debugfs/tracing, we must automount
7290 * the tracefs file system there, so older tools still
7291 * work with the newer kerenl.
7292 */
7293 tr->dir = debugfs_create_automount("tracing", NULL,
7294 trace_automount, NULL);
7295 if (!tr->dir) {
7296 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7297 return ERR_PTR(-ENOMEM);
7298 }
7299
7300 return NULL;
7301 }
7302
7303 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7304 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7305
7306 static void __init trace_enum_init(void)
7307 {
7308 int len;
7309
7310 len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7311 trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7312 }
7313
7314 #ifdef CONFIG_MODULES
7315 static void trace_module_add_enums(struct module *mod)
7316 {
7317 if (!mod->num_trace_enums)
7318 return;
7319
7320 /*
7321 * Modules with bad taint do not have events created, do
7322 * not bother with enums either.
7323 */
7324 if (trace_module_has_bad_taint(mod))
7325 return;
7326
7327 trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7328 }
7329
7330 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7331 static void trace_module_remove_enums(struct module *mod)
7332 {
7333 union trace_enum_map_item *map;
7334 union trace_enum_map_item **last = &trace_enum_maps;
7335
7336 if (!mod->num_trace_enums)
7337 return;
7338
7339 mutex_lock(&trace_enum_mutex);
7340
7341 map = trace_enum_maps;
7342
7343 while (map) {
7344 if (map->head.mod == mod)
7345 break;
7346 map = trace_enum_jmp_to_tail(map);
7347 last = &map->tail.next;
7348 map = map->tail.next;
7349 }
7350 if (!map)
7351 goto out;
7352
7353 *last = trace_enum_jmp_to_tail(map)->tail.next;
7354 kfree(map);
7355 out:
7356 mutex_unlock(&trace_enum_mutex);
7357 }
7358 #else
7359 static inline void trace_module_remove_enums(struct module *mod) { }
7360 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7361
7362 static int trace_module_notify(struct notifier_block *self,
7363 unsigned long val, void *data)
7364 {
7365 struct module *mod = data;
7366
7367 switch (val) {
7368 case MODULE_STATE_COMING:
7369 trace_module_add_enums(mod);
7370 break;
7371 case MODULE_STATE_GOING:
7372 trace_module_remove_enums(mod);
7373 break;
7374 }
7375
7376 return 0;
7377 }
7378
7379 static struct notifier_block trace_module_nb = {
7380 .notifier_call = trace_module_notify,
7381 .priority = 0,
7382 };
7383 #endif /* CONFIG_MODULES */
7384
7385 static __init int tracer_init_tracefs(void)
7386 {
7387 struct dentry *d_tracer;
7388
7389 trace_access_lock_init();
7390
7391 d_tracer = tracing_init_dentry();
7392 if (IS_ERR(d_tracer))
7393 return 0;
7394
7395 init_tracer_tracefs(&global_trace, d_tracer);
7396 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7397
7398 trace_create_file("tracing_thresh", 0644, d_tracer,
7399 &global_trace, &tracing_thresh_fops);
7400
7401 trace_create_file("README", 0444, d_tracer,
7402 NULL, &tracing_readme_fops);
7403
7404 trace_create_file("saved_cmdlines", 0444, d_tracer,
7405 NULL, &tracing_saved_cmdlines_fops);
7406
7407 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7408 NULL, &tracing_saved_cmdlines_size_fops);
7409
7410 trace_enum_init();
7411
7412 trace_create_enum_file(d_tracer);
7413
7414 #ifdef CONFIG_MODULES
7415 register_module_notifier(&trace_module_nb);
7416 #endif
7417
7418 #ifdef CONFIG_DYNAMIC_FTRACE
7419 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7420 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7421 #endif
7422
7423 create_trace_instances(d_tracer);
7424
7425 update_tracer_options(&global_trace);
7426
7427 return 0;
7428 }
7429
7430 static int trace_panic_handler(struct notifier_block *this,
7431 unsigned long event, void *unused)
7432 {
7433 if (ftrace_dump_on_oops)
7434 ftrace_dump(ftrace_dump_on_oops);
7435 return NOTIFY_OK;
7436 }
7437
7438 static struct notifier_block trace_panic_notifier = {
7439 .notifier_call = trace_panic_handler,
7440 .next = NULL,
7441 .priority = 150 /* priority: INT_MAX >= x >= 0 */
7442 };
7443
7444 static int trace_die_handler(struct notifier_block *self,
7445 unsigned long val,
7446 void *data)
7447 {
7448 switch (val) {
7449 case DIE_OOPS:
7450 if (ftrace_dump_on_oops)
7451 ftrace_dump(ftrace_dump_on_oops);
7452 break;
7453 default:
7454 break;
7455 }
7456 return NOTIFY_OK;
7457 }
7458
7459 static struct notifier_block trace_die_notifier = {
7460 .notifier_call = trace_die_handler,
7461 .priority = 200
7462 };
7463
7464 /*
7465 * printk is set to max of 1024, we really don't need it that big.
7466 * Nothing should be printing 1000 characters anyway.
7467 */
7468 #define TRACE_MAX_PRINT 1000
7469
7470 /*
7471 * Define here KERN_TRACE so that we have one place to modify
7472 * it if we decide to change what log level the ftrace dump
7473 * should be at.
7474 */
7475 #define KERN_TRACE KERN_EMERG
7476
7477 void
7478 trace_printk_seq(struct trace_seq *s)
7479 {
7480 /* Probably should print a warning here. */
7481 if (s->seq.len >= TRACE_MAX_PRINT)
7482 s->seq.len = TRACE_MAX_PRINT;
7483
7484 /*
7485 * More paranoid code. Although the buffer size is set to
7486 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7487 * an extra layer of protection.
7488 */
7489 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7490 s->seq.len = s->seq.size - 1;
7491
7492 /* should be zero ended, but we are paranoid. */
7493 s->buffer[s->seq.len] = 0;
7494
7495 printk(KERN_TRACE "%s", s->buffer);
7496
7497 trace_seq_init(s);
7498 }
7499
7500 void trace_init_global_iter(struct trace_iterator *iter)
7501 {
7502 iter->tr = &global_trace;
7503 iter->trace = iter->tr->current_trace;
7504 iter->cpu_file = RING_BUFFER_ALL_CPUS;
7505 iter->trace_buffer = &global_trace.trace_buffer;
7506
7507 if (iter->trace && iter->trace->open)
7508 iter->trace->open(iter);
7509
7510 /* Annotate start of buffers if we had overruns */
7511 if (ring_buffer_overruns(iter->trace_buffer->buffer))
7512 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7513
7514 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7515 if (trace_clocks[iter->tr->clock_id].in_ns)
7516 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7517 }
7518
7519 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7520 {
7521 /* use static because iter can be a bit big for the stack */
7522 static struct trace_iterator iter;
7523 static atomic_t dump_running;
7524 struct trace_array *tr = &global_trace;
7525 unsigned int old_userobj;
7526 unsigned long flags;
7527 int cnt = 0, cpu;
7528
7529 /* Only allow one dump user at a time. */
7530 if (atomic_inc_return(&dump_running) != 1) {
7531 atomic_dec(&dump_running);
7532 return;
7533 }
7534
7535 /*
7536 * Always turn off tracing when we dump.
7537 * We don't need to show trace output of what happens
7538 * between multiple crashes.
7539 *
7540 * If the user does a sysrq-z, then they can re-enable
7541 * tracing with echo 1 > tracing_on.
7542 */
7543 tracing_off();
7544
7545 local_irq_save(flags);
7546
7547 /* Simulate the iterator */
7548 trace_init_global_iter(&iter);
7549
7550 for_each_tracing_cpu(cpu) {
7551 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7552 }
7553
7554 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7555
7556 /* don't look at user memory in panic mode */
7557 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7558
7559 switch (oops_dump_mode) {
7560 case DUMP_ALL:
7561 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7562 break;
7563 case DUMP_ORIG:
7564 iter.cpu_file = raw_smp_processor_id();
7565 break;
7566 case DUMP_NONE:
7567 goto out_enable;
7568 default:
7569 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7570 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7571 }
7572
7573 printk(KERN_TRACE "Dumping ftrace buffer:\n");
7574
7575 /* Did function tracer already get disabled? */
7576 if (ftrace_is_dead()) {
7577 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7578 printk("# MAY BE MISSING FUNCTION EVENTS\n");
7579 }
7580
7581 /*
7582 * We need to stop all tracing on all CPUS to read the
7583 * the next buffer. This is a bit expensive, but is
7584 * not done often. We fill all what we can read,
7585 * and then release the locks again.
7586 */
7587
7588 while (!trace_empty(&iter)) {
7589
7590 if (!cnt)
7591 printk(KERN_TRACE "---------------------------------\n");
7592
7593 cnt++;
7594
7595 /* reset all but tr, trace, and overruns */
7596 memset(&iter.seq, 0,
7597 sizeof(struct trace_iterator) -
7598 offsetof(struct trace_iterator, seq));
7599 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7600 iter.pos = -1;
7601
7602 if (trace_find_next_entry_inc(&iter) != NULL) {
7603 int ret;
7604
7605 ret = print_trace_line(&iter);
7606 if (ret != TRACE_TYPE_NO_CONSUME)
7607 trace_consume(&iter);
7608 }
7609 touch_nmi_watchdog();
7610
7611 trace_printk_seq(&iter.seq);
7612 }
7613
7614 if (!cnt)
7615 printk(KERN_TRACE " (ftrace buffer empty)\n");
7616 else
7617 printk(KERN_TRACE "---------------------------------\n");
7618
7619 out_enable:
7620 tr->trace_flags |= old_userobj;
7621
7622 for_each_tracing_cpu(cpu) {
7623 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7624 }
7625 atomic_dec(&dump_running);
7626 local_irq_restore(flags);
7627 }
7628 EXPORT_SYMBOL_GPL(ftrace_dump);
7629
7630 __init static int tracer_alloc_buffers(void)
7631 {
7632 int ring_buf_size;
7633 int ret = -ENOMEM;
7634
7635 /*
7636 * Make sure we don't accidently add more trace options
7637 * than we have bits for.
7638 */
7639 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7640
7641 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7642 goto out;
7643
7644 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7645 goto out_free_buffer_mask;
7646
7647 /* Only allocate trace_printk buffers if a trace_printk exists */
7648 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7649 /* Must be called before global_trace.buffer is allocated */
7650 trace_printk_init_buffers();
7651
7652 /* To save memory, keep the ring buffer size to its minimum */
7653 if (ring_buffer_expanded)
7654 ring_buf_size = trace_buf_size;
7655 else
7656 ring_buf_size = 1;
7657
7658 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7659 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7660
7661 raw_spin_lock_init(&global_trace.start_lock);
7662
7663 /*
7664 * The prepare callbacks allocates some memory for the ring buffer. We
7665 * don't free the buffer if the if the CPU goes down. If we were to free
7666 * the buffer, then the user would lose any trace that was in the
7667 * buffer. The memory will be removed once the "instance" is removed.
7668 */
7669 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
7670 "trace/RB:preapre", trace_rb_cpu_prepare,
7671 NULL);
7672 if (ret < 0)
7673 goto out_free_cpumask;
7674 /* Used for event triggers */
7675 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7676 if (!temp_buffer)
7677 goto out_rm_hp_state;
7678
7679 if (trace_create_savedcmd() < 0)
7680 goto out_free_temp_buffer;
7681
7682 /* TODO: make the number of buffers hot pluggable with CPUS */
7683 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7684 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7685 WARN_ON(1);
7686 goto out_free_savedcmd;
7687 }
7688
7689 if (global_trace.buffer_disabled)
7690 tracing_off();
7691
7692 if (trace_boot_clock) {
7693 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7694 if (ret < 0)
7695 pr_warn("Trace clock %s not defined, going back to default\n",
7696 trace_boot_clock);
7697 }
7698
7699 /*
7700 * register_tracer() might reference current_trace, so it
7701 * needs to be set before we register anything. This is
7702 * just a bootstrap of current_trace anyway.
7703 */
7704 global_trace.current_trace = &nop_trace;
7705
7706 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7707
7708 ftrace_init_global_array_ops(&global_trace);
7709
7710 init_trace_flags_index(&global_trace);
7711
7712 register_tracer(&nop_trace);
7713
7714 /* All seems OK, enable tracing */
7715 tracing_disabled = 0;
7716
7717 atomic_notifier_chain_register(&panic_notifier_list,
7718 &trace_panic_notifier);
7719
7720 register_die_notifier(&trace_die_notifier);
7721
7722 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7723
7724 INIT_LIST_HEAD(&global_trace.systems);
7725 INIT_LIST_HEAD(&global_trace.events);
7726 list_add(&global_trace.list, &ftrace_trace_arrays);
7727
7728 apply_trace_boot_options();
7729
7730 register_snapshot_cmd();
7731
7732 return 0;
7733
7734 out_free_savedcmd:
7735 free_saved_cmdlines_buffer(savedcmd);
7736 out_free_temp_buffer:
7737 ring_buffer_free(temp_buffer);
7738 out_rm_hp_state:
7739 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
7740 out_free_cpumask:
7741 free_cpumask_var(global_trace.tracing_cpumask);
7742 out_free_buffer_mask:
7743 free_cpumask_var(tracing_buffer_mask);
7744 out:
7745 return ret;
7746 }
7747
7748 void __init trace_init(void)
7749 {
7750 if (tracepoint_printk) {
7751 tracepoint_print_iter =
7752 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7753 if (WARN_ON(!tracepoint_print_iter))
7754 tracepoint_printk = 0;
7755 }
7756 tracer_alloc_buffers();
7757 trace_event_init();
7758 }
7759
7760 __init static int clear_boot_tracer(void)
7761 {
7762 /*
7763 * The default tracer at boot buffer is an init section.
7764 * This function is called in lateinit. If we did not
7765 * find the boot tracer, then clear it out, to prevent
7766 * later registration from accessing the buffer that is
7767 * about to be freed.
7768 */
7769 if (!default_bootup_tracer)
7770 return 0;
7771
7772 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7773 default_bootup_tracer);
7774 default_bootup_tracer = NULL;
7775
7776 return 0;
7777 }
7778
7779 fs_initcall(tracer_init_tracefs);
7780 late_initcall(clear_boot_tracer);